aboutsummaryrefslogtreecommitdiffhomepage
path: root/Pipes/Text
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2014-02-16 12:19:41 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2014-02-16 12:19:41 -0500
commitfafcbeb516fda29cae18b61f84cc79b3e688f79c (patch)
treead916a67eced3e0e2c3f37aa882b200516f1cb7e /Pipes/Text
parent3f76b550da195af30d32f611d55b33e0651cd0e8 (diff)
downloadtext-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.tar.gz
text-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.tar.zst
text-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.zip
encoding documentation beginning to improve
Diffstat (limited to 'Pipes/Text')
-rw-r--r--Pipes/Text/Encoding.hs73
1 files changed, 56 insertions, 17 deletions
diff --git a/Pipes/Text/Encoding.hs b/Pipes/Text/Encoding.hs
index 21269cf..e07c47e 100644
--- a/Pipes/Text/Encoding.hs
+++ b/Pipes/Text/Encoding.hs
@@ -1,24 +1,31 @@
1
2{-# LANGUAGE RankNTypes, BangPatterns #-} 1{-# LANGUAGE RankNTypes, BangPatterns #-}
3-- | 2-- |
4 3
5-- This module uses the stream decoding functions from the text-stream-decoding package 4-- This module uses the stream decoding functions from the text-stream-decoding package
6-- to define pipes decoding functions and lenses. 5-- to define decoding functions and lenses.
7 6
8module Pipes.Text.Encoding 7module Pipes.Text.Encoding
9 ( Codec 8 (
9 -- * Lens type
10 -- $lenses
11 Codec
12 -- * Standard lenses for viewing Text in ByteString
13 -- $codecs
10 , utf8 14 , utf8
11 , utf8Pure 15 , utf8Pure
12 , utf16LE 16 , utf16LE
13 , utf16BE 17 , utf16BE
14 , utf32LE 18 , utf32LE
15 , utf32BE 19 , utf32BE
20 -- * Non-lens decoding functions
16 , decodeUtf8 21 , decodeUtf8
17 , decodeUtf8Pure 22 , decodeUtf8Pure
18 , decodeUtf16LE 23 , decodeUtf16LE
19 , decodeUtf16BE 24 , decodeUtf16BE
20 , decodeUtf32LE 25 , decodeUtf32LE
21 , decodeUtf32BE 26 , decodeUtf32BE
27 -- * Functions for latin and ascii text
28 -- $ascii
22 , encodeAscii 29 , encodeAscii
23 , decodeAscii 30 , decodeAscii
24 , encodeIso8859_1 31 , encodeIso8859_1
@@ -38,13 +45,22 @@ import Control.Monad (join)
38import Data.Word (Word8) 45import Data.Word (Word8)
39import Pipes 46import Pipes
40 47
48
41type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) 49type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
42 50
43{- | A 'Codec' is just an improper lens into a byte stream that is expected to contain text. 51{- $lenses
44 They are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc. 52 The 'Codec' type is just an aliased standard Prelude type. It just specializes
45 The stream of text they 'see' in a bytestream ends by returning the original byte stream 53 the @Lens'@ type synonymn used by the standard lens libraries, @lens@ and
46 beginning at the point of failure, or the empty bytestream with its return value. 54 @lens-families@ . You use them with
47 -} 55 the @view@ or @(^.)@ and @zoom@ functions from those libraries.
56
57 Each codec lens looks into a byte stream that is understood to contain text.
58 The stream of text it 'sees' in the stream of bytes begins at its head; it ends
59 by reverting to (returning) the original byte stream
60 beginning at the point of decoding failure. Where there is no decoding failure,
61 it returns an empty byte stream with its return value.
62 -}
63
48type Codec 64type Codec
49 = forall m r 65 = forall m r
50 . Monad m 66 . Monad m
@@ -66,6 +82,9 @@ decodeStream = loop where
66 p') 82 p')
67{-# INLINABLE decodeStream#-} 83{-# INLINABLE decodeStream#-}
68 84
85
86
87
69decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) 88decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
70decodeUtf8 = decodeStream streamUtf8 89decodeUtf8 = decodeStream streamUtf8
71{-# INLINE decodeUtf8 #-} 90{-# INLINE decodeUtf8 #-}
@@ -97,10 +116,28 @@ mkCodec :: (forall r m . Monad m =>
97mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc))) (k (dec p0)) 116mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc))) (k (dec p0))
98 117
99 118
100{- | An improper lens into a byte stream expected to be UTF-8 encoded; the associated 119{- $codecs
101 text stream ends by returning the original bytestream beginning at the point of failure, 120
102 or the empty bytestring for a well-encoded text. 121 The particular \'Codec\' lenses are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc.
103 -} 122
123> view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
124> Bytes.stdin ^. utf8 :: Producer Text IO (Producer ByteString IO r)
125
126 @zoom@ converts a Text parser into a ByteString parser:
127
128> zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char)
129>
130> withNextByte :: Parser ByteString m (Maybe Char, Maybe Word8)))
131> withNextByte = do char_ <- zoom utf8 Text.drawChar
132> byte_ <- Bytes.peekByte
133> return (char_, byte_)
134
135 @withNextByte@ will return the first valid Char in a ByteString,
136 and the first byte of the next character, if they exists. Because
137 we \'draw\' one and \'peek\' at the other, the parser as a whole only
138 advances one Char's length along the bytestring.
139
140 -}
104 141
105utf8 :: Codec 142utf8 :: Codec
106utf8 = mkCodec decodeUtf8 TE.encodeUtf8 143utf8 = mkCodec decodeUtf8 TE.encodeUtf8
@@ -121,14 +158,16 @@ utf32BE :: Codec
121utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE 158utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE
122 159
123 160
124{- | ascii and latin encodings only use a small number of the characters 'Text' 161{- $ascii
125 recognizes; thus we cannot use the pipes 'Lens' style to work with them. 162 ascii and latin encodings only use a small number of the characters 'Text'
163 recognizes; thus we cannot use the pipes @Lens@ style to work with them.
126 Rather we simply define functions each way. 164 Rather we simply define functions each way.
127
128 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
129 returning the rest of the 'Text' at the first non-ascii 'Char'
130-} 165-}
131 166
167
168-- 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream,
169-- returning the rest of the 'Text' at the first non-ascii 'Char'
170
132encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r) 171encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
133encodeAscii = go where 172encodeAscii = go where
134 go p = do e <- lift (next p) 173 go p = do e <- lift (next p)