diff options
Diffstat (limited to 'Pipes/Text.hs')
-rw-r--r-- | Pipes/Text.hs | 269 |
1 files changed, 34 insertions, 235 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs index 4b2d2b0..8221c01 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs | |||
@@ -10,7 +10,8 @@ | |||
10 | example, the following program copies a document from one file to another: | 10 | example, the following program copies a document from one file to another: |
11 | 11 | ||
12 | > import Pipes | 12 | > import Pipes |
13 | > import qualified Data.Text.Pipes as Text | 13 | > import qualified Pipes.Text as Text |
14 | > import qualified Pipes.Text.IO as Text | ||
14 | > import System.IO | 15 | > import System.IO |
15 | > | 16 | > |
16 | > main = | 17 | > main = |
@@ -21,7 +22,8 @@ | |||
21 | To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe): | 22 | To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe): |
22 | 23 | ||
23 | > import Pipes | 24 | > import Pipes |
24 | > import qualified Data.Text.Pipes as Text | 25 | > import qualified Pipes.Text as Text |
26 | > import qualified Pipes.Text.IO as Text | ||
25 | > import Pipes.Safe | 27 | > import Pipes.Safe |
26 | > | 28 | > |
27 | > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt" | 29 | > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt" |
@@ -61,14 +63,14 @@ To stream from files, the following is perhaps more Prelude-like (note that it u | |||
61 | module Pipes.Text ( | 63 | module Pipes.Text ( |
62 | -- * Producers | 64 | -- * Producers |
63 | fromLazy | 65 | fromLazy |
64 | , stdin | 66 | -- , stdin |
65 | , fromHandle | 67 | -- , fromHandle |
66 | , readFile | 68 | -- , readFile |
67 | 69 | ||
68 | -- * Consumers | 70 | -- * Consumers |
69 | , stdout | 71 | -- , stdout |
70 | , toHandle | 72 | -- , toHandle |
71 | , writeFile | 73 | -- , writeFile |
72 | 74 | ||
73 | -- * Pipes | 75 | -- * Pipes |
74 | , map | 76 | , map |
@@ -79,7 +81,7 @@ module Pipes.Text ( | |||
79 | , dropWhile | 81 | , dropWhile |
80 | , filter | 82 | , filter |
81 | , scan | 83 | , scan |
82 | , encodeUtf8 | 84 | -- , encodeUtf8 |
83 | , pack | 85 | , pack |
84 | , unpack | 86 | , unpack |
85 | , toCaseFold | 87 | , toCaseFold |
@@ -120,22 +122,22 @@ module Pipes.Text ( | |||
120 | , word | 122 | , word |
121 | , line | 123 | , line |
122 | 124 | ||
123 | -- * Decoding Lenses | 125 | -- -- * Decoding Lenses |
124 | , decodeUtf8 | 126 | -- , decodeUtf8 |
125 | , codec | 127 | -- , codec |
126 | 128 | -- | |
127 | -- * Codecs | 129 | -- -- * Codecs |
128 | , utf8 | 130 | -- , utf8 |
129 | , utf16_le | 131 | -- , utf16_le |
130 | , utf16_be | 132 | -- , utf16_be |
131 | , utf32_le | 133 | -- , utf32_le |
132 | , utf32_be | 134 | -- , utf32_be |
133 | 135 | -- | |
134 | -- * Other Decoding/Encoding Functions | 136 | -- -- * Other Decoding/Encoding Functions |
135 | , decodeIso8859_1 | 137 | -- , decodeIso8859_1 |
136 | , decodeAscii | 138 | -- , decodeAscii |
137 | , encodeIso8859_1 | 139 | -- , encodeIso8859_1 |
138 | , encodeAscii | 140 | -- , encodeAscii |
139 | 141 | ||
140 | -- * FreeT Splitters | 142 | -- * FreeT Splitters |
141 | , chunksOf | 143 | , chunksOf |
@@ -157,11 +159,9 @@ module Pipes.Text ( | |||
157 | 159 | ||
158 | -- * Re-exports | 160 | -- * Re-exports |
159 | -- $reexports | 161 | -- $reexports |
160 | , Decoding(..) | 162 | -- , DecodeResult(..) |
161 | , streamDecodeUtf8 | 163 | -- , Codec |
162 | , decodeSomeUtf8 | 164 | -- , TextException(..) |
163 | , Codec(..) | ||
164 | , TextException(..) | ||
165 | , module Data.ByteString | 165 | , module Data.ByteString |
166 | , module Data.Text | 166 | , module Data.Text |
167 | , module Data.Profunctor | 167 | , module Data.Profunctor |
@@ -170,7 +170,6 @@ module Pipes.Text ( | |||
170 | , module Pipes.Group | 170 | , module Pipes.Group |
171 | ) where | 171 | ) where |
172 | 172 | ||
173 | import Control.Exception (throwIO, try) | ||
174 | import Control.Applicative ((<*)) | 173 | import Control.Applicative ((<*)) |
175 | import Control.Monad (liftM, unless, join) | 174 | import Control.Monad (liftM, unless, join) |
176 | import Control.Monad.Trans.State.Strict (StateT(..), modify) | 175 | import Control.Monad.Trans.State.Strict (StateT(..), modify) |
@@ -193,24 +192,20 @@ import Data.Functor.Identity (Identity) | |||
193 | import Data.Profunctor (Profunctor) | 192 | import Data.Profunctor (Profunctor) |
194 | import qualified Data.Profunctor | 193 | import qualified Data.Profunctor |
195 | import qualified Data.List as List | 194 | import qualified Data.List as List |
196 | import Foreign.C.Error (Errno(Errno), ePIPE) | ||
197 | import qualified GHC.IO.Exception as G | ||
198 | import Pipes | 195 | import Pipes |
199 | import qualified Pipes.ByteString as PB | 196 | import qualified Pipes.ByteString as PB |
200 | import qualified Pipes.Text.Internal as PI | 197 | -- import Pipes.Text.Decoding |
201 | import Pipes.Text.Internal | ||
202 | import Pipes.Core (respond, Server') | 198 | import Pipes.Core (respond, Server') |
203 | import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) | 199 | import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) |
204 | import qualified Pipes.Group as PG | 200 | import qualified Pipes.Group as PG |
205 | import qualified Pipes.Parse as PP | 201 | import qualified Pipes.Parse as PP |
206 | import Pipes.Parse (Parser) | 202 | import Pipes.Parse (Parser) |
207 | import qualified Pipes.Safe.Prelude as Safe | 203 | |
208 | import qualified Pipes.Safe as Safe | ||
209 | import Pipes.Safe (MonadSafe(..), Base(..)) | ||
210 | import qualified Pipes.Prelude as P | 204 | import qualified Pipes.Prelude as P |
211 | import qualified System.IO as IO | 205 | import qualified System.IO as IO |
212 | import Data.Char (isSpace) | 206 | import Data.Char (isSpace) |
213 | import Data.Word (Word8) | 207 | import Data.Word (Word8) |
208 | import Data.Text.StreamDecoding | ||
214 | 209 | ||
215 | import Prelude hiding ( | 210 | import Prelude hiding ( |
216 | all, | 211 | all, |
@@ -246,78 +241,6 @@ fromLazy :: (Monad m) => TL.Text -> Producer' Text m () | |||
246 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) | 241 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) |
247 | {-# INLINE fromLazy #-} | 242 | {-# INLINE fromLazy #-} |
248 | 243 | ||
249 | -- | Stream text from 'stdin' | ||
250 | stdin :: MonadIO m => Producer Text m () | ||
251 | stdin = fromHandle IO.stdin | ||
252 | {-# INLINE stdin #-} | ||
253 | |||
254 | {-| Convert a 'IO.Handle' into a text stream using a text size | ||
255 | determined by the good sense of the text library; note that this | ||
256 | is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@ | ||
257 | but uses the system encoding and has other `Data.Text.IO` features | ||
258 | -} | ||
259 | |||
260 | fromHandle :: MonadIO m => IO.Handle -> Producer Text m () | ||
261 | fromHandle h = go where | ||
262 | go = do txt <- liftIO (T.hGetChunk h) | ||
263 | unless (T.null txt) ( do yield txt | ||
264 | go ) | ||
265 | {-# INLINABLE fromHandle#-} | ||
266 | |||
267 | |||
268 | {-| Stream text from a file in the simple fashion of @Data.Text.IO@ | ||
269 | |||
270 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout | ||
271 | MAIN = PUTSTRLN "HELLO WORLD" | ||
272 | -} | ||
273 | |||
274 | readFile :: MonadSafe m => FilePath -> Producer Text m () | ||
275 | readFile file = Safe.withFile file IO.ReadMode fromHandle | ||
276 | {-# INLINE readFile #-} | ||
277 | |||
278 | |||
279 | {-| Stream text to 'stdout' | ||
280 | |||
281 | Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe. | ||
282 | |||
283 | Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@ | ||
284 | instead of @(source >-> stdout)@ . | ||
285 | -} | ||
286 | stdout :: MonadIO m => Consumer' Text m () | ||
287 | stdout = go | ||
288 | where | ||
289 | go = do | ||
290 | txt <- await | ||
291 | x <- liftIO $ try (T.putStr txt) | ||
292 | case x of | ||
293 | Left (G.IOError { G.ioe_type = G.ResourceVanished | ||
294 | , G.ioe_errno = Just ioe }) | ||
295 | | Errno ioe == ePIPE | ||
296 | -> return () | ||
297 | Left e -> liftIO (throwIO e) | ||
298 | Right () -> go | ||
299 | {-# INLINABLE stdout #-} | ||
300 | |||
301 | |||
302 | {-| Convert a text stream into a 'Handle' | ||
303 | |||
304 | Note: again, for best performance, where possible use | ||
305 | @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@. | ||
306 | -} | ||
307 | toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r | ||
308 | toHandle h = for cat (liftIO . T.hPutStr h) | ||
309 | {-# INLINABLE toHandle #-} | ||
310 | |||
311 | {-# RULES "p >-> toHandle h" forall p h . | ||
312 | p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt)) | ||
313 | #-} | ||
314 | |||
315 | |||
316 | -- | Stream text into a file. Uses @pipes-safe@. | ||
317 | writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m () | ||
318 | writeFile file = Safe.withFile file IO.WriteMode toHandle | ||
319 | {-# INLINE writeFile #-} | ||
320 | |||
321 | 244 | ||
322 | type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) | 245 | type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) |
323 | 246 | ||
@@ -690,28 +613,6 @@ isEndOfChars = do | |||
690 | {-# INLINABLE isEndOfChars #-} | 613 | {-# INLINABLE isEndOfChars #-} |
691 | 614 | ||
692 | 615 | ||
693 | {- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated | ||
694 | stream of Text ends by returning a stream of ByteStrings beginning at the point of failure. | ||
695 | -} | ||
696 | |||
697 | decodeUtf8 :: Monad m => Lens' (Producer ByteString m r) | ||
698 | (Producer Text m (Producer ByteString m r)) | ||
699 | decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8))) | ||
700 | (k (go B.empty PI.streamDecodeUtf8 p0)) where | ||
701 | go !carry dec0 p = do | ||
702 | x <- lift (next p) | ||
703 | case x of Left r -> return (if B.null carry | ||
704 | then return r -- all bytestring input was consumed | ||
705 | else (do yield carry -- a potentially valid fragment remains | ||
706 | return r)) | ||
707 | |||
708 | Right (chunk, p') -> case dec0 chunk of | ||
709 | PI.Some text carry2 dec -> do yield text | ||
710 | go carry2 dec p' | ||
711 | PI.Other text bs -> do yield text | ||
712 | return (do yield bs -- an invalid blob remains | ||
713 | p') | ||
714 | {-# INLINABLE decodeUtf8 #-} | ||
715 | 616 | ||
716 | 617 | ||
717 | -- | Splits a 'Producer' after the given number of characters | 618 | -- | Splits a 'Producer' after the given number of characters |
@@ -1057,106 +958,4 @@ unwords = intercalate (yield $ T.singleton ' ') | |||
1057 | @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. | 958 | @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. |
1058 | -} | 959 | -} |
1059 | 960 | ||
1060 | {- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are | 961 | |
1061 | 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the | ||
1062 | individual 'Codec' definitions follow the enumerator and conduit libraries. | ||
1063 | |||
1064 | Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co | ||
1065 | to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used. | ||
1066 | 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps | ||
1067 | better implementation. | ||
1068 | |||
1069 | -} | ||
1070 | codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r)) | ||
1071 | codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc))) | ||
1072 | (k (decoder (dec B.empty) p0) ) where | ||
1073 | decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
1074 | decoder !d p0 = case d of | ||
1075 | PI.Other txt bad -> do yield txt | ||
1076 | return (do yield bad | ||
1077 | p0) | ||
1078 | PI.Some txt extra dec -> do yield txt | ||
1079 | x <- lift (next p0) | ||
1080 | case x of Left r -> return (do yield extra | ||
1081 | return r) | ||
1082 | Right (chunk,p1) -> decoder (dec chunk) p1 | ||
1083 | |||
1084 | {- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot | ||
1085 | use the pipes 'Lens' style to work with them. Rather we simply define functions | ||
1086 | each way. | ||
1087 | |||
1088 | 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream, | ||
1089 | returning the rest of the 'Text' at the first non-ascii 'Char' | ||
1090 | -} | ||
1091 | encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r) | ||
1092 | encodeAscii = go where | ||
1093 | go p = do echunk <- lift (next p) | ||
1094 | case echunk of | ||
1095 | Left r -> return (return r) | ||
1096 | Right (chunk, p') -> | ||
1097 | if T.null chunk | ||
1098 | then go p' | ||
1099 | else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk | ||
1100 | in do yield (B8.pack (T.unpack safe)) | ||
1101 | if T.null unsafe | ||
1102 | then go p' | ||
1103 | else return $ do yield unsafe | ||
1104 | p' | ||
1105 | {- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream, | ||
1106 | returning the rest of the 'Text' upon hitting any non-latin 'Char' | ||
1107 | -} | ||
1108 | encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r) | ||
1109 | encodeIso8859_1 = go where | ||
1110 | go p = do etxt <- lift (next p) | ||
1111 | case etxt of | ||
1112 | Left r -> return (return r) | ||
1113 | Right (txt, p') -> | ||
1114 | if T.null txt | ||
1115 | then go p' | ||
1116 | else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt | ||
1117 | in do yield (B8.pack (T.unpack safe)) | ||
1118 | if T.null unsafe | ||
1119 | then go p' | ||
1120 | else return $ do yield unsafe | ||
1121 | p' | ||
1122 | |||
1123 | {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the | ||
1124 | unused 'ByteString' upon hitting an un-ascii byte. | ||
1125 | -} | ||
1126 | decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
1127 | decodeAscii = go where | ||
1128 | go p = do echunk <- lift (next p) | ||
1129 | case echunk of | ||
1130 | Left r -> return (return r) | ||
1131 | Right (chunk, p') -> | ||
1132 | if B.null chunk | ||
1133 | then go p' | ||
1134 | else let (safe, unsafe) = B.span (<= 0x7F) chunk | ||
1135 | in do yield (T.pack (B8.unpack safe)) | ||
1136 | if B.null unsafe | ||
1137 | then go p' | ||
1138 | else return $ do yield unsafe | ||
1139 | p' | ||
1140 | |||
1141 | {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the | ||
1142 | unused 'ByteString' upon hitting the rare un-latinizable byte. | ||
1143 | -} | ||
1144 | decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
1145 | decodeIso8859_1 = go where | ||
1146 | go p = do echunk <- lift (next p) | ||
1147 | case echunk of | ||
1148 | Left r -> return (return r) | ||
1149 | Right (chunk, p') -> | ||
1150 | if B.null chunk | ||
1151 | then go p' | ||
1152 | else let (safe, unsafe) = B.span (<= 0xFF) chunk | ||
1153 | in do yield (T.pack (B8.unpack safe)) | ||
1154 | if B.null unsafe | ||
1155 | then go p' | ||
1156 | else return $ do yield unsafe | ||
1157 | p' | ||
1158 | |||
1159 | |||
1160 | |||
1161 | |||
1162 | |||