aboutsummaryrefslogtreecommitdiffhomepage
path: root/Pipes/Text
diff options
context:
space:
mode:
Diffstat (limited to 'Pipes/Text')
-rw-r--r--Pipes/Text/Encoding.hs122
-rw-r--r--Pipes/Text/IO.hs104
2 files changed, 183 insertions, 43 deletions
diff --git a/Pipes/Text/Encoding.hs b/Pipes/Text/Encoding.hs
index e07c47e..a1a0113 100644
--- a/Pipes/Text/Encoding.hs
+++ b/Pipes/Text/Encoding.hs
@@ -1,16 +1,17 @@
1{-# LANGUAGE RankNTypes, BangPatterns #-} 1{-# LANGUAGE RankNTypes, BangPatterns #-}
2-- |
3 2
4-- This module uses the stream decoding functions from the text-stream-decoding package 3-- | This module uses the stream decoding functions from Michael Snoyman's new
5-- to define decoding functions and lenses. 4-- <http://hackage.haskell.org/package/text-stream-decode text-stream-decode>
5-- package to define decoding functions and lenses.
6 6
7module Pipes.Text.Encoding 7module Pipes.Text.Encoding
8 ( 8 (
9 -- * Lens type 9 -- * The Lens or Codec type
10 -- $lenses 10 -- $lenses
11 Codec 11 Codec
12 -- * Standard lenses for viewing Text in ByteString 12 -- * Viewing the Text in a ByteString
13 -- $codecs 13 -- $codecs
14 , decode
14 , utf8 15 , utf8
15 , utf8Pure 16 , utf8Pure
16 , utf16LE 17 , utf16LE
@@ -18,12 +19,20 @@ module Pipes.Text.Encoding
18 , utf32LE 19 , utf32LE
19 , utf32BE 20 , utf32BE
20 -- * Non-lens decoding functions 21 -- * Non-lens decoding functions
22 -- $decoders
21 , decodeUtf8 23 , decodeUtf8
22 , decodeUtf8Pure 24 , decodeUtf8Pure
23 , decodeUtf16LE 25 , decodeUtf16LE
24 , decodeUtf16BE 26 , decodeUtf16BE
25 , decodeUtf32LE 27 , decodeUtf32LE
26 , decodeUtf32BE 28 , decodeUtf32BE
29 -- * Re-encoding functions
30 -- $encoders
31 , encodeUtf8
32 , encodeUtf16LE
33 , encodeUtf16BE
34 , encodeUtf32LE
35 , encodeUtf32BE
27 -- * Functions for latin and ascii text 36 -- * Functions for latin and ascii text
28 -- $ascii 37 -- $ascii
29 , encodeAscii 38 , encodeAscii
@@ -33,6 +42,7 @@ module Pipes.Text.Encoding
33 ) 42 )
34 where 43 where
35 44
45import Data.Functor.Constant (Constant(..))
36import Data.Char (ord) 46import Data.Char (ord)
37import Data.ByteString as B 47import Data.ByteString as B
38import Data.ByteString (ByteString) 48import Data.ByteString (ByteString)
@@ -49,16 +59,16 @@ import Pipes
49type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) 59type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
50 60
51{- $lenses 61{- $lenses
52 The 'Codec' type is just an aliased standard Prelude type. It just specializes 62 The 'Codec' type is a simple specializion of
53 the @Lens'@ type synonymn used by the standard lens libraries, @lens@ and 63 the @Lens'@ type synonymn used by the standard lens libraries,
54 @lens-families@ . You use them with 64 <http://hackage.haskell.org/package/lens lens> and
55 the @view@ or @(^.)@ and @zoom@ functions from those libraries. 65 <http://hackage.haskell.org/package/lens-family lens-family>. That type,
56 66
57 Each codec lens looks into a byte stream that is understood to contain text. 67> type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
58 The stream of text it 'sees' in the stream of bytes begins at its head; it ends 68
59 by reverting to (returning) the original byte stream 69 is just an alias for an ordinary Prelude type. Thus you use any codec with
60 beginning at the point of decoding failure. Where there is no decoding failure, 70 the @view@ / @(^.)@ and @zoom@ functions from those libraries.
61 it returns an empty byte stream with its return value. 71
62 -} 72 -}
63 73
64type Codec 74type Codec
@@ -66,6 +76,17 @@ type Codec
66 . Monad m 76 . Monad m
67 => Lens' (Producer ByteString m r) 77 => Lens' (Producer ByteString m r)
68 (Producer Text m (Producer ByteString m r)) 78 (Producer Text m (Producer ByteString m r))
79
80{- | 'decode' is just the ordinary @view@ or @(^.)@ of the lens libraries;
81 exported here for convience
82
83> decode utf8 p = decodeUtf8 p = view utf8 p = p ^. utf
84
85-}
86
87decode :: ((b -> Constant b b) -> (a -> Constant b a)) -> a -> b
88decode codec a = getConstant (codec Constant a)
89
69 90
70decodeStream :: Monad m 91decodeStream :: Monad m
71 => (B.ByteString -> DecodeResult) 92 => (B.ByteString -> DecodeResult)
@@ -82,7 +103,20 @@ decodeStream = loop where
82 p') 103 p')
83{-# INLINABLE decodeStream#-} 104{-# INLINABLE decodeStream#-}
84 105
106{- $decoders
107 These are functions with the simple type:
108
109> decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
110
111 Thus in general
112
113> decodeUtf8 = view utf8
114> decodeUtf16LE = view utf16LE
85 115
116 and so forth, but these forms
117 may be more convenient (and give better type errors!) where lenses are
118 not desired.
119-}
86 120
87 121
88decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) 122decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
@@ -109,6 +143,34 @@ decodeUtf32BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer
109decodeUtf32BE = decodeStream streamUtf32BE 143decodeUtf32BE = decodeStream streamUtf32BE
110{-# INLINE decodeUtf32BE #-} 144{-# INLINE decodeUtf32BE #-}
111 145
146
147{- $encoders
148 These are simply defined
149
150> encodeUtf8 = yield . TE.encodeUtf8
151
152 They are intended for use with 'for'
153
154> for Text.stdin encodeUtf8 :: Producer ByteString IO ()
155
156 which would have the effect of
157
158> Text.stdin >-> Pipes.Prelude.map (TE.encodeUtf8)
159
160 using the encoding functions from Data.Text.Encoding
161-}
162
163encodeUtf8 :: Monad m => Text -> Producer ByteString m ()
164encodeUtf8 = yield . TE.encodeUtf8
165encodeUtf16LE :: Monad m => Text -> Producer ByteString m ()
166encodeUtf16LE = yield . TE.encodeUtf16LE
167encodeUtf16BE :: Monad m => Text -> Producer ByteString m ()
168encodeUtf16BE = yield . TE.encodeUtf16BE
169encodeUtf32LE :: Monad m => Text -> Producer ByteString m ()
170encodeUtf32LE = yield . TE.encodeUtf32LE
171encodeUtf32BE :: Monad m => Text -> Producer ByteString m ()
172encodeUtf32BE = yield . TE.encodeUtf32BE
173
112mkCodec :: (forall r m . Monad m => 174mkCodec :: (forall r m . Monad m =>
113 Producer ByteString m r -> Producer Text m (Producer ByteString m r )) 175 Producer ByteString m r -> Producer Text m (Producer ByteString m r ))
114 -> (Text -> ByteString) 176 -> (Text -> ByteString)
@@ -118,11 +180,39 @@ mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc))) (k (dec p0))
118 180
119{- $codecs 181{- $codecs
120 182
121 The particular \'Codec\' lenses are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc. 183 Each codec/lens looks into a byte stream that is supposed to contain text.
184 The particular \'Codec\' lenses are named in accordance with the expected
185 encoding, 'utf8', 'utf16LE' etc. @view@ / @(^.)@ -- here also called 'decode' --
186 turns a Codec into a function:
122 187
123> view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r) 188> view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
189> decode utf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
124> Bytes.stdin ^. utf8 :: Producer Text IO (Producer ByteString IO r) 190> Bytes.stdin ^. utf8 :: Producer Text IO (Producer ByteString IO r)
125 191
192 Uses of a codec with @view@ / @(^.)@ / 'decode' can always be replaced by the specialized
193 decoding functions exported here, e.g.
194
195> decodeUtf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
196> decodeUtf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
197
198 The stream of text a @Codec@ \'sees\' in the stream of bytes begins at its head.
199 At any point of decoding failure, the stream of text ends and reverts to (returns)
200 the original byte stream. Thus if the first bytes are already
201 un-decodable, the whole ByteString producer will be returned, i.e.
202
203> view utf8 bytestream
204
205 will just come to the same as
206
207> return bytestream
208
209 Where there is no decoding failure, the return value of the text stream will be
210 an empty byte stream followed by its own return value. In all cases you must
211 deal with the fact that it is a ByteString producer that is returned, even if
212 it can be thrown away with @Control.Monad.void@
213
214> void (Bytes.stdin ^. utf8) :: Producer Text IO ()
215
126 @zoom@ converts a Text parser into a ByteString parser: 216 @zoom@ converts a Text parser into a ByteString parser:
127 217
128> zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char) 218> zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char)
@@ -165,7 +255,7 @@ utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE
165-} 255-}
166 256
167 257
168-- 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream, 258-- | 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream,
169-- returning the rest of the 'Text' at the first non-ascii 'Char' 259-- returning the rest of the 'Text' at the first non-ascii 'Char'
170 260
171encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r) 261encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
diff --git a/Pipes/Text/IO.hs b/Pipes/Text/IO.hs
index 92500c3..45a1467 100644
--- a/Pipes/Text/IO.hs
+++ b/Pipes/Text/IO.hs
@@ -1,34 +1,14 @@
1{-#LANGUAGE RankNTypes#-} 1{-#LANGUAGE RankNTypes#-}
2-- | The operations exported here are a convenience, like the similar operations in 2
3-- @Data.Text.IO@ , or rather, @Data.Text.Lazy.IO@, since @Producer Text m r@ is
4-- 'effectful text' and something like the pipes equivalent of lazy Text.
5--
6-- * Like the functions in @Data.Text.IO@, they attempt to work with the system encoding.
7--
8-- * Like the functions in @Data.Text.IO@, they are slower than ByteString operations. Where
9-- you know what encoding you are working with, use @Pipes.ByteString@ and @Pipes.Text.Encoding@ instead,
10-- e.g. @view utf8 Bytes.stdin@ instead of @Text.stdin@
11--
12-- * Like the functions in @Data.Text.IO@ , they use Text exceptions.
13--
14-- Something like
15--
16-- > view utf8 . Bytes.fromHandle :: Handle -> Producer Text IO (Producer ByteString m ())
17--
18-- yields a stream of Text, and follows
19-- standard pipes protocols by reverting to (i.e. returning) the underlying byte stream
20-- upon reaching any decoding error. (See especially the pipes-binary package.)
21--
22-- By contrast, something like
23--
24-- > Text.fromHandle :: Handle -> Producer Text IO ()
25--
26-- supplies a stream of text returning '()', which is convenient for many tasks,
27-- but violates the pipes @pipes-binary@ approach to decoding errors and
28-- throws an exception of the kind characteristic of the @text@ library instead.
29 3
30module Pipes.Text.IO 4module Pipes.Text.IO
31 ( 5 (
6 -- * Text IO
7 -- $textio
8
9 -- * Caveats
10 -- $caveats
11
32 -- * Producers 12 -- * Producers
33 fromHandle 13 fromHandle
34 , stdin 14 , stdin
@@ -52,6 +32,76 @@ import qualified Pipes.Safe as Safe
52import Pipes.Safe (MonadSafe(..), Base(..)) 32import Pipes.Safe (MonadSafe(..), Base(..))
53import Prelude hiding (readFile, writeFile) 33import Prelude hiding (readFile, writeFile)
54 34
35{- $textio
36 Where pipes IO replaces lazy IO, @Producer Text m r@ replaces lazy 'Text'.
37 This module exports some convenient functions for producing and consuming
38 pipes 'Text' in IO, with caveats described below. The main points are as in
39 <https://hackage.haskell.org/package/pipes-bytestring-1.0.0/docs/Pipes-ByteString.html @Pipes.ByteString@>
40
41 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
42
43 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
44 example, the following program copies a document from one file to another:
45
46> import Pipes
47> import qualified Pipes.Text as Text
48> import qualified Pipes.Text.IO as Text
49> import System.IO
50>
51> main =
52> withFile "inFile.txt" ReadMode $ \hIn ->
53> withFile "outFile.txt" WriteMode $ \hOut ->
54> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
55
56To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
57
58> import Pipes
59> import qualified Pipes.Text as Text
60> import qualified Pipes.Text.IO as Text
61> import Pipes.Safe
62>
63> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
64
65 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
66 and 'stdout' pipes, as with the following \"echo\" program:
67
68> main = runEffect $ Text.stdin >-> Text.stdout
69
70-}
71
72
73{- $caveats
74
75 The operations exported here are a convenience, like the similar operations in
76 @Data.Text.IO@ (or rather, @Data.Text.Lazy.IO@, since, again, @Producer Text m r@ is
77 'effectful text' and something like the pipes equivalent of lazy Text.)
78
79 * Like the functions in @Data.Text.IO@, they attempt to work with the system encoding.
80
81 * Like the functions in @Data.Text.IO@, they are slower than ByteString operations. Where
82 you know what encoding you are working with, use @Pipes.ByteString@ and @Pipes.Text.Encoding@ instead,
83 e.g. @view utf8 Bytes.stdin@ instead of @Text.stdin@
84
85 * Like the functions in @Data.Text.IO@ , they use Text exceptions.
86
87 Something like
88
89> view utf8 . Bytes.fromHandle :: Handle -> Producer Text IO (Producer ByteString m ())
90
91 yields a stream of Text, and follows
92 standard pipes protocols by reverting to (i.e. returning) the underlying byte stream
93 upon reaching any decoding error. (See especially the pipes-binary package.)
94
95 By contrast, something like
96
97> Text.fromHandle :: Handle -> Producer Text IO ()
98
99 supplies a stream of text returning '()', which is convenient for many tasks,
100 but violates the pipes @pipes-binary@ approach to decoding errors and
101 throws an exception of the kind characteristic of the @text@ library instead.
102
103
104-}
55 105
56{-| Convert a 'IO.Handle' into a text stream using a text size 106{-| Convert a 'IO.Handle' into a text stream using a text size
57 determined by the good sense of the text library. Note with the remarks 107 determined by the good sense of the text library. Note with the remarks