aboutsummaryrefslogtreecommitdiffhomepage
path: root/Pipes/Text/Encoding.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Pipes/Text/Encoding.hs')
-rw-r--r--Pipes/Text/Encoding.hs122
1 files changed, 106 insertions, 16 deletions
diff --git a/Pipes/Text/Encoding.hs b/Pipes/Text/Encoding.hs
index e07c47e..a1a0113 100644
--- a/Pipes/Text/Encoding.hs
+++ b/Pipes/Text/Encoding.hs
@@ -1,16 +1,17 @@
1{-# LANGUAGE RankNTypes, BangPatterns #-} 1{-# LANGUAGE RankNTypes, BangPatterns #-}
2-- |
3 2
4-- This module uses the stream decoding functions from the text-stream-decoding package 3-- | This module uses the stream decoding functions from Michael Snoyman's new
5-- to define decoding functions and lenses. 4-- <http://hackage.haskell.org/package/text-stream-decode text-stream-decode>
5-- package to define decoding functions and lenses.
6 6
7module Pipes.Text.Encoding 7module Pipes.Text.Encoding
8 ( 8 (
9 -- * Lens type 9 -- * The Lens or Codec type
10 -- $lenses 10 -- $lenses
11 Codec 11 Codec
12 -- * Standard lenses for viewing Text in ByteString 12 -- * Viewing the Text in a ByteString
13 -- $codecs 13 -- $codecs
14 , decode
14 , utf8 15 , utf8
15 , utf8Pure 16 , utf8Pure
16 , utf16LE 17 , utf16LE
@@ -18,12 +19,20 @@ module Pipes.Text.Encoding
18 , utf32LE 19 , utf32LE
19 , utf32BE 20 , utf32BE
20 -- * Non-lens decoding functions 21 -- * Non-lens decoding functions
22 -- $decoders
21 , decodeUtf8 23 , decodeUtf8
22 , decodeUtf8Pure 24 , decodeUtf8Pure
23 , decodeUtf16LE 25 , decodeUtf16LE
24 , decodeUtf16BE 26 , decodeUtf16BE
25 , decodeUtf32LE 27 , decodeUtf32LE
26 , decodeUtf32BE 28 , decodeUtf32BE
29 -- * Re-encoding functions
30 -- $encoders
31 , encodeUtf8
32 , encodeUtf16LE
33 , encodeUtf16BE
34 , encodeUtf32LE
35 , encodeUtf32BE
27 -- * Functions for latin and ascii text 36 -- * Functions for latin and ascii text
28 -- $ascii 37 -- $ascii
29 , encodeAscii 38 , encodeAscii
@@ -33,6 +42,7 @@ module Pipes.Text.Encoding
33 ) 42 )
34 where 43 where
35 44
45import Data.Functor.Constant (Constant(..))
36import Data.Char (ord) 46import Data.Char (ord)
37import Data.ByteString as B 47import Data.ByteString as B
38import Data.ByteString (ByteString) 48import Data.ByteString (ByteString)
@@ -49,16 +59,16 @@ import Pipes
49type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) 59type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
50 60
51{- $lenses 61{- $lenses
52 The 'Codec' type is just an aliased standard Prelude type. It just specializes 62 The 'Codec' type is a simple specializion of
53 the @Lens'@ type synonymn used by the standard lens libraries, @lens@ and 63 the @Lens'@ type synonymn used by the standard lens libraries,
54 @lens-families@ . You use them with 64 <http://hackage.haskell.org/package/lens lens> and
55 the @view@ or @(^.)@ and @zoom@ functions from those libraries. 65 <http://hackage.haskell.org/package/lens-family lens-family>. That type,
56 66
57 Each codec lens looks into a byte stream that is understood to contain text. 67> type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
58 The stream of text it 'sees' in the stream of bytes begins at its head; it ends 68
59 by reverting to (returning) the original byte stream 69 is just an alias for an ordinary Prelude type. Thus you use any codec with
60 beginning at the point of decoding failure. Where there is no decoding failure, 70 the @view@ / @(^.)@ and @zoom@ functions from those libraries.
61 it returns an empty byte stream with its return value. 71
62 -} 72 -}
63 73
64type Codec 74type Codec
@@ -66,6 +76,17 @@ type Codec
66 . Monad m 76 . Monad m
67 => Lens' (Producer ByteString m r) 77 => Lens' (Producer ByteString m r)
68 (Producer Text m (Producer ByteString m r)) 78 (Producer Text m (Producer ByteString m r))
79
80{- | 'decode' is just the ordinary @view@ or @(^.)@ of the lens libraries;
81 exported here for convience
82
83> decode utf8 p = decodeUtf8 p = view utf8 p = p ^. utf
84
85-}
86
87decode :: ((b -> Constant b b) -> (a -> Constant b a)) -> a -> b
88decode codec a = getConstant (codec Constant a)
89
69 90
70decodeStream :: Monad m 91decodeStream :: Monad m
71 => (B.ByteString -> DecodeResult) 92 => (B.ByteString -> DecodeResult)
@@ -82,7 +103,20 @@ decodeStream = loop where
82 p') 103 p')
83{-# INLINABLE decodeStream#-} 104{-# INLINABLE decodeStream#-}
84 105
106{- $decoders
107 These are functions with the simple type:
108
109> decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
110
111 Thus in general
112
113> decodeUtf8 = view utf8
114> decodeUtf16LE = view utf16LE
85 115
116 and so forth, but these forms
117 may be more convenient (and give better type errors!) where lenses are
118 not desired.
119-}
86 120
87 121
88decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) 122decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
@@ -109,6 +143,34 @@ decodeUtf32BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer
109decodeUtf32BE = decodeStream streamUtf32BE 143decodeUtf32BE = decodeStream streamUtf32BE
110{-# INLINE decodeUtf32BE #-} 144{-# INLINE decodeUtf32BE #-}
111 145
146
147{- $encoders
148 These are simply defined
149
150> encodeUtf8 = yield . TE.encodeUtf8
151
152 They are intended for use with 'for'
153
154> for Text.stdin encodeUtf8 :: Producer ByteString IO ()
155
156 which would have the effect of
157
158> Text.stdin >-> Pipes.Prelude.map (TE.encodeUtf8)
159
160 using the encoding functions from Data.Text.Encoding
161-}
162
163encodeUtf8 :: Monad m => Text -> Producer ByteString m ()
164encodeUtf8 = yield . TE.encodeUtf8
165encodeUtf16LE :: Monad m => Text -> Producer ByteString m ()
166encodeUtf16LE = yield . TE.encodeUtf16LE
167encodeUtf16BE :: Monad m => Text -> Producer ByteString m ()
168encodeUtf16BE = yield . TE.encodeUtf16BE
169encodeUtf32LE :: Monad m => Text -> Producer ByteString m ()
170encodeUtf32LE = yield . TE.encodeUtf32LE
171encodeUtf32BE :: Monad m => Text -> Producer ByteString m ()
172encodeUtf32BE = yield . TE.encodeUtf32BE
173
112mkCodec :: (forall r m . Monad m => 174mkCodec :: (forall r m . Monad m =>
113 Producer ByteString m r -> Producer Text m (Producer ByteString m r )) 175 Producer ByteString m r -> Producer Text m (Producer ByteString m r ))
114 -> (Text -> ByteString) 176 -> (Text -> ByteString)
@@ -118,11 +180,39 @@ mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc))) (k (dec p0))
118 180
119{- $codecs 181{- $codecs
120 182
121 The particular \'Codec\' lenses are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc. 183 Each codec/lens looks into a byte stream that is supposed to contain text.
184 The particular \'Codec\' lenses are named in accordance with the expected
185 encoding, 'utf8', 'utf16LE' etc. @view@ / @(^.)@ -- here also called 'decode' --
186 turns a Codec into a function:
122 187
123> view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r) 188> view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
189> decode utf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
124> Bytes.stdin ^. utf8 :: Producer Text IO (Producer ByteString IO r) 190> Bytes.stdin ^. utf8 :: Producer Text IO (Producer ByteString IO r)
125 191
192 Uses of a codec with @view@ / @(^.)@ / 'decode' can always be replaced by the specialized
193 decoding functions exported here, e.g.
194
195> decodeUtf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
196> decodeUtf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
197
198 The stream of text a @Codec@ \'sees\' in the stream of bytes begins at its head.
199 At any point of decoding failure, the stream of text ends and reverts to (returns)
200 the original byte stream. Thus if the first bytes are already
201 un-decodable, the whole ByteString producer will be returned, i.e.
202
203> view utf8 bytestream
204
205 will just come to the same as
206
207> return bytestream
208
209 Where there is no decoding failure, the return value of the text stream will be
210 an empty byte stream followed by its own return value. In all cases you must
211 deal with the fact that it is a ByteString producer that is returned, even if
212 it can be thrown away with @Control.Monad.void@
213
214> void (Bytes.stdin ^. utf8) :: Producer Text IO ()
215
126 @zoom@ converts a Text parser into a ByteString parser: 216 @zoom@ converts a Text parser into a ByteString parser:
127 217
128> zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char) 218> zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char)
@@ -165,7 +255,7 @@ utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE
165-} 255-}
166 256
167 257
168-- 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream, 258-- | 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream,
169-- returning the rest of the 'Text' at the first non-ascii 'Char' 259-- returning the rest of the 'Text' at the first non-ascii 'Char'
170 260
171encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r) 261encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)