]>
Commit | Line | Data |
---|---|---|
1677dc12 | 1 | {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-} |
64e03122 | 2 | #if __GLASGOW_HASKELL__ >= 702 |
3 | {-# LANGUAGE Trustworthy #-} | |
4 | #endif | |
13a43263 | 5 | {-| This module provides @pipes@ utilities for \"text streams\", which are |
64e03122 | 6 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but |
7 | a 'Producer' can be converted to and from lazy 'Text's, though this is generally | |
8 | unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'. | |
9 | An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to. | |
91727d11 | 10 | |
63ea9ffd | 11 | To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For |
31f41a5d | 12 | example, the following program copies a document from one file to another: |
91727d11 | 13 | |
14 | > import Pipes | |
31f41a5d | 15 | > import qualified Data.Text.Pipes as Text |
91727d11 | 16 | > import System.IO |
17 | > | |
18 | > main = | |
19 | > withFile "inFile.txt" ReadMode $ \hIn -> | |
20 | > withFile "outFile.txt" WriteMode $ \hOut -> | |
31f41a5d | 21 | > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut |
22 | ||
23 | To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe): | |
91727d11 | 24 | |
13a43263 | 25 | > import Pipes |
31f41a5d | 26 | > import qualified Data.Text.Pipes as Text |
13a43263 | 27 | > import Pipes.Safe |
28 | > | |
31f41a5d | 29 | > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt" |
13a43263 | 30 | |
91727d11 | 31 | You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin' |
31f41a5d | 32 | and 'stdout' proxies, as with the following \"echo\" program: |
91727d11 | 33 | |
31f41a5d | 34 | > main = runEffect $ Text.stdin >-> Text.stdout |
91727d11 | 35 | |
36 | You can also translate pure lazy 'TL.Text's to and from proxies: | |
37 | ||
31f41a5d | 38 | > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout |
91727d11 | 39 | |
40 | In addition, this module provides many functions equivalent to lazy | |
31f41a5d | 41 | 'Text' functions so that you can transform or fold text streams. For |
91727d11 | 42 | example, to stream only the first three lines of 'stdin' to 'stdout' you |
31f41a5d | 43 | might write: |
91727d11 | 44 | |
45 | > import Pipes | |
31f41a5d | 46 | > import qualified Pipes.Text as Text |
47 | > import qualified Pipes.Parse as Parse | |
91727d11 | 48 | > |
31f41a5d | 49 | > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout |
91727d11 | 50 | > where |
31f41a5d | 51 | > takeLines n = Text.unlines . Parse.takeFree n . Text.lines |
91727d11 | 52 | |
31f41a5d | 53 | The above program will never bring more than one chunk of text (~ 32 KB) into |
91727d11 | 54 | memory, no matter how long the lines are. |
55 | ||
56 | Note that functions in this library are designed to operate on streams that | |
31f41a5d | 57 | are insensitive to text boundaries. This means that they may freely split |
64e03122 | 58 | text into smaller texts, /discard empty texts/. However, apart from the |
59 | special case of 'concatMap', they will /never concatenate texts/ in order | |
60 | to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'. | |
91727d11 | 61 | -} |
62 | ||
7faef8bc | 63 | module Pipes.Text ( |
91727d11 | 64 | -- * Producers |
1677dc12 | 65 | fromLazy |
66 | , stdin | |
67 | , fromHandle | |
68 | , readFile | |
69 | , stdinLn | |
91727d11 | 70 | |
71 | -- * Consumers | |
1677dc12 | 72 | , stdout |
73 | , stdoutLn | |
74 | , toHandle | |
75 | , writeFile | |
91727d11 | 76 | |
77 | -- * Pipes | |
1677dc12 | 78 | , map |
79 | , concatMap | |
80 | , take | |
81 | , drop | |
82 | , takeWhile | |
83 | , dropWhile | |
84 | , filter | |
85 | , scan | |
86 | , encodeUtf8 | |
87 | , pack | |
88 | , unpack | |
89 | , toCaseFold | |
90 | , toLower | |
91 | , toUpper | |
92 | , stripStart | |
91727d11 | 93 | |
94 | -- * Folds | |
1677dc12 | 95 | , toLazy |
96 | , toLazyM | |
97 | , foldChars | |
98 | , head | |
99 | , last | |
100 | , null | |
101 | , length | |
102 | , any | |
103 | , all | |
104 | , maximum | |
105 | , minimum | |
106 | , find | |
107 | , index | |
108 | , count | |
109 | ||
110 | -- * Primitive Character Parsers | |
111 | -- $parse | |
112 | , nextChar | |
113 | , drawChar | |
114 | , unDrawChar | |
115 | , peekChar | |
9e9bb0ce | 116 | , isEndOfChars |
1677dc12 | 117 | |
118 | -- * Parsing Lenses | |
9e9bb0ce | 119 | , splitAt |
1677dc12 | 120 | , span |
121 | , break | |
122 | , groupBy | |
123 | , group | |
9e9bb0ce | 124 | , word |
125 | , line | |
90189414 | 126 | |
127 | -- * Decoding Lenses | |
1677dc12 | 128 | , decodeUtf8 |
90189414 | 129 | , codec |
130 | ||
131 | -- * Codecs | |
132 | , utf8 | |
133 | , utf16_le | |
134 | , utf16_be | |
135 | , utf32_le | |
136 | , utf32_be | |
137 | ||
138 | -- * Other Decoding/Encoding Functions | |
139 | , decodeIso8859_1 | |
140 | , decodeAscii | |
141 | , encodeIso8859_1 | |
142 | , encodeAscii | |
1677dc12 | 143 | |
144 | -- * FreeT Splitters | |
145 | , chunksOf | |
146 | , splitsWith | |
0f8c6f1b | 147 | , splits |
1677dc12 | 148 | -- , groupsBy |
149 | -- , groups | |
150 | , lines | |
151 | , words | |
152 | ||
91727d11 | 153 | -- * Transformations |
1677dc12 | 154 | , intersperse |
9e9bb0ce | 155 | , packChars |
31f41a5d | 156 | |
91727d11 | 157 | -- * Joiners |
1677dc12 | 158 | , intercalate |
159 | , unlines | |
160 | , unwords | |
9e9bb0ce | 161 | |
1677dc12 | 162 | -- * Re-exports |
91727d11 | 163 | -- $reexports |
1677dc12 | 164 | , module Data.ByteString |
165 | , module Data.Text | |
166 | , module Data.Profunctor | |
167 | , module Data.Word | |
168 | , module Pipes.Parse | |
7ed76745 | 169 | , module Pipes.Group |
409759e8 | 170 | , module Pipes.Text.Internal.Codec |
91727d11 | 171 | ) where |
172 | ||
173 | import Control.Exception (throwIO, try) | |
0f8c6f1b | 174 | import Control.Applicative ((<*)) |
64e03122 | 175 | import Control.Monad (liftM, unless, join) |
9e9bb0ce | 176 | import Control.Monad.Trans.State.Strict (StateT(..), modify) |
ca6f90a0 | 177 | import Data.Monoid ((<>)) |
91727d11 | 178 | import qualified Data.Text as T |
179 | import qualified Data.Text.IO as T | |
31f41a5d | 180 | import qualified Data.Text.Encoding as TE |
63ea9ffd | 181 | import qualified Data.Text.Encoding.Error as TE |
91727d11 | 182 | import Data.Text (Text) |
183 | import qualified Data.Text.Lazy as TL | |
184 | import qualified Data.Text.Lazy.IO as TL | |
185 | import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize) | |
186 | import Data.ByteString.Unsafe (unsafeTake, unsafeDrop) | |
31f41a5d | 187 | import Data.ByteString (ByteString) |
188 | import qualified Data.ByteString as B | |
90189414 | 189 | import qualified Data.ByteString.Char8 as B8 |
cf10d6f1 | 190 | import Data.Char (ord, isSpace) |
1677dc12 | 191 | import Data.Functor.Constant (Constant(Constant, getConstant)) |
91727d11 | 192 | import Data.Functor.Identity (Identity) |
1677dc12 | 193 | import Data.Profunctor (Profunctor) |
194 | import qualified Data.Profunctor | |
91727d11 | 195 | import qualified Data.List as List |
196 | import Foreign.C.Error (Errno(Errno), ePIPE) | |
197 | import qualified GHC.IO.Exception as G | |
198 | import Pipes | |
5e3f5409 | 199 | import qualified Pipes.ByteString as PB |
409759e8 | 200 | import qualified Pipes.Text.Internal.Decoding as PE |
201 | import Pipes.Text.Internal.Codec | |
91727d11 | 202 | import Pipes.Core (respond, Server') |
7fc48f7c | 203 | import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) |
7ed76745 | 204 | import qualified Pipes.Group as PG |
91727d11 | 205 | import qualified Pipes.Parse as PP |
7ed76745 | 206 | import Pipes.Parse (Parser) |
91727d11 | 207 | import qualified Pipes.Safe.Prelude as Safe |
208 | import qualified Pipes.Safe as Safe | |
209 | import Pipes.Safe (MonadSafe(..), Base(..)) | |
210 | import qualified Pipes.Prelude as P | |
211 | import qualified System.IO as IO | |
212 | import Data.Char (isSpace) | |
63ea9ffd | 213 | import Data.Word (Word8) |
1677dc12 | 214 | |
91727d11 | 215 | import Prelude hiding ( |
216 | all, | |
217 | any, | |
218 | break, | |
219 | concat, | |
220 | concatMap, | |
221 | drop, | |
222 | dropWhile, | |
223 | elem, | |
224 | filter, | |
225 | head, | |
226 | last, | |
227 | lines, | |
228 | length, | |
229 | map, | |
230 | maximum, | |
231 | minimum, | |
232 | notElem, | |
233 | null, | |
234 | readFile, | |
235 | span, | |
236 | splitAt, | |
237 | take, | |
238 | takeWhile, | |
239 | unlines, | |
240 | unwords, | |
241 | words, | |
242 | writeFile ) | |
243 | ||
244 | -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's | |
245 | fromLazy :: (Monad m) => TL.Text -> Producer' Text m () | |
246 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) | |
ca6f90a0 | 247 | {-# INLINE fromLazy #-} |
91727d11 | 248 | |
62e8521c | 249 | -- | Stream text from 'stdin' |
ca6f90a0 | 250 | stdin :: MonadIO m => Producer Text m () |
91727d11 | 251 | stdin = fromHandle IO.stdin |
ca6f90a0 | 252 | {-# INLINE stdin #-} |
91727d11 | 253 | |
31f41a5d | 254 | {-| Convert a 'IO.Handle' into a text stream using a text size |
ca6f90a0 | 255 | determined by the good sense of the text library; note that this |
256 | is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@ | |
257 | but uses the system encoding and has other `Data.Text.IO` features | |
31f41a5d | 258 | -} |
259 | ||
ca6f90a0 | 260 | fromHandle :: MonadIO m => IO.Handle -> Producer Text m () |
261 | fromHandle h = go where | |
262 | go = do txt <- liftIO (T.hGetChunk h) | |
4cbc92cc | 263 | unless (T.null txt) ( do yield txt |
264 | go ) | |
91727d11 | 265 | {-# INLINABLE fromHandle#-} |
ca6f90a0 | 266 | |
267 | ||
268 | {-| Stream text from a file in the simple fashion of @Data.Text.IO@ | |
6f6f9974 | 269 | |
31f41a5d | 270 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout |
271 | MAIN = PUTSTRLN "HELLO WORLD" | |
6f6f9974 | 272 | -} |
273 | ||
ca6f90a0 | 274 | readFile :: MonadSafe m => FilePath -> Producer Text m () |
91727d11 | 275 | readFile file = Safe.withFile file IO.ReadMode fromHandle |
ca6f90a0 | 276 | {-# INLINE readFile #-} |
91727d11 | 277 | |
4cbc92cc | 278 | {-| Crudely stream lines of input from stdin in the style of Pipes.Prelude. |
279 | This is for testing in ghci etc.; obviously it will be unsound if used to recieve | |
280 | the contents of immense files with few newlines. | |
31f41a5d | 281 | |
282 | >>> let safely = runSafeT . runEffect | |
283 | >>> safely $ for Text.stdinLn (lift . lift . print . T.length) | |
284 | hello | |
285 | 5 | |
286 | world | |
287 | 5 | |
288 | ||
289 | -} | |
91727d11 | 290 | stdinLn :: MonadIO m => Producer' Text m () |
31f41a5d | 291 | stdinLn = go where |
91727d11 | 292 | go = do |
293 | eof <- liftIO (IO.hIsEOF IO.stdin) | |
294 | unless eof $ do | |
295 | txt <- liftIO (T.hGetLine IO.stdin) | |
296 | yield txt | |
297 | go | |
ca6f90a0 | 298 | {-# INLINABLE stdinLn #-} |
91727d11 | 299 | |
31f41a5d | 300 | {-| Stream text to 'stdout' |
91727d11 | 301 | |
302 | Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe. | |
303 | ||
4cbc92cc | 304 | Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@ |
305 | instead of @(source >-> stdout)@ . | |
91727d11 | 306 | -} |
307 | stdout :: MonadIO m => Consumer' Text m () | |
308 | stdout = go | |
309 | where | |
310 | go = do | |
311 | txt <- await | |
312 | x <- liftIO $ try (T.putStr txt) | |
313 | case x of | |
314 | Left (G.IOError { G.ioe_type = G.ResourceVanished | |
315 | , G.ioe_errno = Just ioe }) | |
316 | | Errno ioe == ePIPE | |
317 | -> return () | |
318 | Left e -> liftIO (throwIO e) | |
319 | Right () -> go | |
320 | {-# INLINABLE stdout #-} | |
321 | ||
322 | stdoutLn :: (MonadIO m) => Consumer' Text m () | |
323 | stdoutLn = go | |
324 | where | |
325 | go = do | |
326 | str <- await | |
327 | x <- liftIO $ try (T.putStrLn str) | |
328 | case x of | |
329 | Left (G.IOError { G.ioe_type = G.ResourceVanished | |
330 | , G.ioe_errno = Just ioe }) | |
331 | | Errno ioe == ePIPE | |
332 | -> return () | |
333 | Left e -> liftIO (throwIO e) | |
334 | Right () -> go | |
335 | {-# INLINABLE stdoutLn #-} | |
336 | ||
31f41a5d | 337 | {-| Convert a text stream into a 'Handle' |
91727d11 | 338 | |
31f41a5d | 339 | Note: again, for best performance, where possible use |
340 | @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@. | |
91727d11 | 341 | -} |
342 | toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r | |
343 | toHandle h = for cat (liftIO . T.hPutStr h) | |
344 | {-# INLINABLE toHandle #-} | |
345 | ||
d4732515 | 346 | {-# RULES "p >-> toHandle h" forall p h . |
ff38b9f0 | 347 | p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt)) |
d4732515 | 348 | #-} |
349 | ||
350 | ||
31f41a5d | 351 | -- | Stream text into a file. Uses @pipes-safe@. |
ca6f90a0 | 352 | writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m () |
91727d11 | 353 | writeFile file = Safe.withFile file IO.WriteMode toHandle |
ca6f90a0 | 354 | {-# INLINE writeFile #-} |
91727d11 | 355 | |
1677dc12 | 356 | |
357 | type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) | |
358 | ||
359 | type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a) | |
360 | ||
361 | (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b | |
362 | a ^. lens = getConstant (lens Constant a) | |
363 | ||
364 | ||
91727d11 | 365 | -- | Apply a transformation to each 'Char' in the stream |
366 | map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r | |
367 | map f = P.map (T.map f) | |
368 | {-# INLINABLE map #-} | |
369 | ||
ff38b9f0 | 370 | {-# RULES "p >-> map f" forall p f . |
371 | p >-> map f = for p (\txt -> yield (T.map f txt)) | |
372 | #-} | |
373 | ||
31f41a5d | 374 | -- | Map a function over the characters of a text stream and concatenate the results |
91727d11 | 375 | concatMap |
376 | :: (Monad m) => (Char -> Text) -> Pipe Text Text m r | |
377 | concatMap f = P.map (T.concatMap f) | |
378 | {-# INLINABLE concatMap #-} | |
379 | ||
ff38b9f0 | 380 | {-# RULES "p >-> concatMap f" forall p f . |
381 | p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt)) | |
382 | #-} | |
7faef8bc | 383 | |
384 | -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8 | |
a02a69ad | 385 | -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex |
386 | -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@ | |
7faef8bc | 387 | encodeUtf8 :: Monad m => Pipe Text ByteString m r |
388 | encodeUtf8 = P.map TE.encodeUtf8 | |
389 | {-# INLINEABLE encodeUtf8 #-} | |
390 | ||
ff38b9f0 | 391 | {-# RULES "p >-> encodeUtf8" forall p . |
392 | p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt)) | |
393 | #-} | |
394 | ||
c0343bc9 | 395 | -- | Transform a Pipe of 'String's into one of 'Text' chunks |
7faef8bc | 396 | pack :: Monad m => Pipe String Text m r |
397 | pack = P.map T.pack | |
398 | {-# INLINEABLE pack #-} | |
399 | ||
ff38b9f0 | 400 | {-# RULES "p >-> pack" forall p . |
401 | p >-> pack = for p (\txt -> yield (T.pack txt)) | |
402 | #-} | |
403 | ||
404 | -- | Transform a Pipes of 'Text' chunks into one of 'String's | |
7faef8bc | 405 | unpack :: Monad m => Pipe Text String m r |
d4732515 | 406 | unpack = for cat (\t -> yield (T.unpack t)) |
7faef8bc | 407 | {-# INLINEABLE unpack #-} |
408 | ||
ff38b9f0 | 409 | {-# RULES "p >-> unpack" forall p . |
410 | p >-> unpack = for p (\txt -> yield (T.unpack txt)) | |
411 | #-} | |
d4732515 | 412 | |
b0d86a59 | 413 | -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities, |
414 | -- here acting as 'Text' pipes, rather as they would on a lazy text | |
7faef8bc | 415 | toCaseFold :: Monad m => Pipe Text Text m () |
416 | toCaseFold = P.map T.toCaseFold | |
417 | {-# INLINEABLE toCaseFold #-} | |
418 | ||
ff38b9f0 | 419 | {-# RULES "p >-> toCaseFold" forall p . |
420 | p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt)) | |
421 | #-} | |
422 | ||
423 | ||
c0343bc9 | 424 | -- | lowercase incoming 'Text' |
7faef8bc | 425 | toLower :: Monad m => Pipe Text Text m () |
426 | toLower = P.map T.toLower | |
427 | {-# INLINEABLE toLower #-} | |
428 | ||
ff38b9f0 | 429 | {-# RULES "p >-> toLower" forall p . |
430 | p >-> toLower = for p (\txt -> yield (T.toLower txt)) | |
431 | #-} | |
432 | ||
c0343bc9 | 433 | -- | uppercase incoming 'Text' |
7faef8bc | 434 | toUpper :: Monad m => Pipe Text Text m () |
435 | toUpper = P.map T.toUpper | |
436 | {-# INLINEABLE toUpper #-} | |
437 | ||
ff38b9f0 | 438 | {-# RULES "p >-> toUpper" forall p . |
439 | p >-> toUpper = for p (\txt -> yield (T.toUpper txt)) | |
440 | #-} | |
441 | ||
c0343bc9 | 442 | -- | Remove leading white space from an incoming succession of 'Text's |
7faef8bc | 443 | stripStart :: Monad m => Pipe Text Text m r |
444 | stripStart = do | |
445 | chunk <- await | |
446 | let text = T.stripStart chunk | |
447 | if T.null text | |
448 | then stripStart | |
b0d86a59 | 449 | else do yield text |
450 | cat | |
7faef8bc | 451 | {-# INLINEABLE stripStart #-} |
452 | ||
31f41a5d | 453 | -- | @(take n)@ only allows @n@ individual characters to pass; |
454 | -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass. | |
91727d11 | 455 | take :: (Monad m, Integral a) => a -> Pipe Text Text m () |
456 | take n0 = go n0 where | |
457 | go n | |
458 | | n <= 0 = return () | |
459 | | otherwise = do | |
31f41a5d | 460 | txt <- await |
461 | let len = fromIntegral (T.length txt) | |
91727d11 | 462 | if (len > n) |
31f41a5d | 463 | then yield (T.take (fromIntegral n) txt) |
91727d11 | 464 | else do |
31f41a5d | 465 | yield txt |
91727d11 | 466 | go (n - len) |
467 | {-# INLINABLE take #-} | |
468 | ||
31f41a5d | 469 | -- | @(drop n)@ drops the first @n@ characters |
91727d11 | 470 | drop :: (Monad m, Integral a) => a -> Pipe Text Text m r |
471 | drop n0 = go n0 where | |
472 | go n | |
473 | | n <= 0 = cat | |
474 | | otherwise = do | |
31f41a5d | 475 | txt <- await |
476 | let len = fromIntegral (T.length txt) | |
91727d11 | 477 | if (len >= n) |
478 | then do | |
31f41a5d | 479 | yield (T.drop (fromIntegral n) txt) |
91727d11 | 480 | cat |
481 | else go (n - len) | |
482 | {-# INLINABLE drop #-} | |
483 | ||
31f41a5d | 484 | -- | Take characters until they fail the predicate |
91727d11 | 485 | takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m () |
486 | takeWhile predicate = go | |
487 | where | |
488 | go = do | |
31f41a5d | 489 | txt <- await |
490 | let (prefix, suffix) = T.span predicate txt | |
91727d11 | 491 | if (T.null suffix) |
492 | then do | |
31f41a5d | 493 | yield txt |
91727d11 | 494 | go |
495 | else yield prefix | |
496 | {-# INLINABLE takeWhile #-} | |
497 | ||
31f41a5d | 498 | -- | Drop characters until they fail the predicate |
91727d11 | 499 | dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r |
500 | dropWhile predicate = go where | |
501 | go = do | |
31f41a5d | 502 | txt <- await |
503 | case T.findIndex (not . predicate) txt of | |
91727d11 | 504 | Nothing -> go |
505 | Just i -> do | |
31f41a5d | 506 | yield (T.drop i txt) |
91727d11 | 507 | cat |
508 | {-# INLINABLE dropWhile #-} | |
509 | ||
510 | -- | Only allows 'Char's to pass if they satisfy the predicate | |
511 | filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r | |
512 | filter predicate = P.map (T.filter predicate) | |
513 | {-# INLINABLE filter #-} | |
514 | ||
ff38b9f0 | 515 | {-# RULES "p >-> filter q" forall p q . |
516 | p >-> filter q = for p (\txt -> yield (T.filter q txt)) | |
517 | #-} | |
518 | ||
31f41a5d | 519 | -- | Strict left scan over the characters |
91727d11 | 520 | scan |
521 | :: (Monad m) | |
522 | => (Char -> Char -> Char) -> Char -> Pipe Text Text m r | |
523 | scan step begin = go begin | |
524 | where | |
31f41a5d | 525 | go c = do |
526 | txt <- await | |
527 | let txt' = T.scanl step c txt | |
528 | c' = T.last txt' | |
529 | yield txt' | |
530 | go c' | |
91727d11 | 531 | {-# INLINABLE scan #-} |
532 | ||
533 | {-| Fold a pure 'Producer' of strict 'Text's into a lazy | |
534 | 'TL.Text' | |
535 | -} | |
536 | toLazy :: Producer Text Identity () -> TL.Text | |
537 | toLazy = TL.fromChunks . P.toList | |
538 | {-# INLINABLE toLazy #-} | |
539 | ||
540 | {-| Fold an effectful 'Producer' of strict 'Text's into a lazy | |
541 | 'TL.Text' | |
542 | ||
543 | Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for | |
544 | simple testing purposes. Idiomatic @pipes@ style consumes the chunks | |
545 | immediately as they are generated instead of loading them all into memory. | |
546 | -} | |
547 | toLazyM :: (Monad m) => Producer Text m () -> m TL.Text | |
548 | toLazyM = liftM TL.fromChunks . P.toListM | |
549 | {-# INLINABLE toLazyM #-} | |
550 | ||
31f41a5d | 551 | -- | Reduce the text stream using a strict left fold over characters |
64e03122 | 552 | foldChars |
91727d11 | 553 | :: Monad m |
554 | => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r | |
64e03122 | 555 | foldChars step begin done = P.fold (T.foldl' step) begin done |
1677dc12 | 556 | {-# INLINABLE foldChars #-} |
91727d11 | 557 | |
558 | -- | Retrieve the first 'Char' | |
559 | head :: (Monad m) => Producer Text m () -> m (Maybe Char) | |
560 | head = go | |
561 | where | |
562 | go p = do | |
563 | x <- nextChar p | |
564 | case x of | |
565 | Left _ -> return Nothing | |
31f41a5d | 566 | Right (c, _) -> return (Just c) |
91727d11 | 567 | {-# INLINABLE head #-} |
568 | ||
569 | -- | Retrieve the last 'Char' | |
570 | last :: (Monad m) => Producer Text m () -> m (Maybe Char) | |
571 | last = go Nothing | |
572 | where | |
573 | go r p = do | |
574 | x <- next p | |
575 | case x of | |
576 | Left () -> return r | |
31f41a5d | 577 | Right (txt, p') -> |
578 | if (T.null txt) | |
91727d11 | 579 | then go r p' |
31f41a5d | 580 | else go (Just $ T.last txt) p' |
91727d11 | 581 | {-# INLINABLE last #-} |
582 | ||
583 | -- | Determine if the stream is empty | |
584 | null :: (Monad m) => Producer Text m () -> m Bool | |
585 | null = P.all T.null | |
586 | {-# INLINABLE null #-} | |
587 | ||
62e8521c | 588 | -- | Count the number of characters in the stream |
91727d11 | 589 | length :: (Monad m, Num n) => Producer Text m () -> m n |
31f41a5d | 590 | length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id |
91727d11 | 591 | {-# INLINABLE length #-} |
592 | ||
593 | -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate | |
594 | any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | |
595 | any predicate = P.any (T.any predicate) | |
596 | {-# INLINABLE any #-} | |
597 | ||
598 | -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate | |
599 | all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | |
600 | all predicate = P.all (T.all predicate) | |
601 | {-# INLINABLE all #-} | |
602 | ||
62e8521c | 603 | -- | Return the maximum 'Char' within a text stream |
91727d11 | 604 | maximum :: (Monad m) => Producer Text m () -> m (Maybe Char) |
605 | maximum = P.fold step Nothing id | |
606 | where | |
31f41a5d | 607 | step mc txt = |
608 | if (T.null txt) | |
609 | then mc | |
610 | else Just $ case mc of | |
611 | Nothing -> T.maximum txt | |
612 | Just c -> max c (T.maximum txt) | |
91727d11 | 613 | {-# INLINABLE maximum #-} |
614 | ||
62e8521c | 615 | -- | Return the minimum 'Char' within a text stream (surely very useful!) |
91727d11 | 616 | minimum :: (Monad m) => Producer Text m () -> m (Maybe Char) |
617 | minimum = P.fold step Nothing id | |
618 | where | |
31f41a5d | 619 | step mc txt = |
620 | if (T.null txt) | |
621 | then mc | |
622 | else case mc of | |
623 | Nothing -> Just (T.minimum txt) | |
624 | Just c -> Just (min c (T.minimum txt)) | |
91727d11 | 625 | {-# INLINABLE minimum #-} |
626 | ||
1677dc12 | 627 | |
91727d11 | 628 | -- | Find the first element in the stream that matches the predicate |
629 | find | |
630 | :: (Monad m) | |
631 | => (Char -> Bool) -> Producer Text m () -> m (Maybe Char) | |
632 | find predicate p = head (p >-> filter predicate) | |
633 | {-# INLINABLE find #-} | |
634 | ||
62e8521c | 635 | -- | Index into a text stream |
91727d11 | 636 | index |
637 | :: (Monad m, Integral a) | |
638 | => a-> Producer Text m () -> m (Maybe Char) | |
639 | index n p = head (p >-> drop n) | |
640 | {-# INLINABLE index #-} | |
641 | ||
63ea9ffd | 642 | |
31f41a5d | 643 | -- | Store a tally of how many segments match the given 'Text' |
644 | count :: (Monad m, Num n) => Text -> Producer Text m () -> m n | |
645 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | |
646 | {-# INLINABLE count #-} | |
647 | ||
9e9bb0ce | 648 | |
649 | {-| Consume the first character from a stream of 'Text' | |
650 | ||
651 | 'next' either fails with a 'Left' if the 'Producer' has no more characters or | |
652 | succeeds with a 'Right' providing the next character and the remainder of the | |
653 | 'Producer'. | |
654 | -} | |
655 | nextChar | |
656 | :: (Monad m) | |
657 | => Producer Text m r | |
658 | -> m (Either r (Char, Producer Text m r)) | |
659 | nextChar = go | |
660 | where | |
661 | go p = do | |
662 | x <- next p | |
663 | case x of | |
664 | Left r -> return (Left r) | |
665 | Right (txt, p') -> case (T.uncons txt) of | |
666 | Nothing -> go p' | |
667 | Just (c, txt') -> return (Right (c, yield txt' >> p')) | |
668 | {-# INLINABLE nextChar #-} | |
669 | ||
670 | {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the | |
671 | 'Producer' is empty | |
672 | -} | |
673 | drawChar :: (Monad m) => Parser Text m (Maybe Char) | |
674 | drawChar = do | |
675 | x <- PP.draw | |
676 | case x of | |
677 | Nothing -> return Nothing | |
678 | Just txt -> case (T.uncons txt) of | |
679 | Nothing -> drawChar | |
680 | Just (c, txt') -> do | |
681 | PP.unDraw txt' | |
682 | return (Just c) | |
683 | {-# INLINABLE drawChar #-} | |
684 | ||
685 | -- | Push back a 'Char' onto the underlying 'Producer' | |
686 | unDrawChar :: (Monad m) => Char -> Parser Text m () | |
687 | unDrawChar c = modify (yield (T.singleton c) >>) | |
688 | {-# INLINABLE unDrawChar #-} | |
689 | ||
690 | {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to | |
691 | push the 'Char' back | |
692 | ||
693 | > peekChar = do | |
694 | > x <- drawChar | |
695 | > case x of | |
696 | > Left _ -> return () | |
697 | > Right c -> unDrawChar c | |
698 | > return x | |
699 | -} | |
700 | peekChar :: (Monad m) => Parser Text m (Maybe Char) | |
701 | peekChar = do | |
702 | x <- drawChar | |
703 | case x of | |
704 | Nothing -> return () | |
705 | Just c -> unDrawChar c | |
706 | return x | |
707 | {-# INLINABLE peekChar #-} | |
708 | ||
709 | {-| Check if the underlying 'Producer' has no more characters | |
710 | ||
711 | Note that this will skip over empty 'Text' chunks, unlike | |
712 | 'PP.isEndOfInput' from @pipes-parse@, which would consider | |
713 | an empty 'Text' a valid bit of input. | |
714 | ||
715 | > isEndOfChars = liftM isLeft peekChar | |
716 | -} | |
717 | isEndOfChars :: (Monad m) => Parser Text m Bool | |
718 | isEndOfChars = do | |
719 | x <- peekChar | |
720 | return (case x of | |
721 | Nothing -> True | |
722 | Just _-> False ) | |
723 | {-# INLINABLE isEndOfChars #-} | |
724 | ||
725 | ||
4cbc92cc | 726 | -- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated |
727 | -- stream of Text ends by returning a stream of ByteStrings beginning at the point of failure. | |
ca6f90a0 | 728 | |
9e9bb0ce | 729 | decodeUtf8 :: Monad m => Lens' (Producer ByteString m r) |
730 | (Producer Text m (Producer ByteString m r)) | |
731 | decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8))) | |
732 | (k (go B.empty PE.streamDecodeUtf8 p0)) where | |
ca6f90a0 | 733 | go !carry dec0 p = do |
734 | x <- lift (next p) | |
4cbc92cc | 735 | case x of Left r -> return (if B.null carry |
736 | then return r -- all bytestring input was consumed | |
737 | else (do yield carry -- a potentially valid fragment remains | |
738 | return r)) | |
ca6f90a0 | 739 | |
740 | Right (chunk, p') -> case dec0 chunk of | |
741 | PE.Some text carry2 dec -> do yield text | |
742 | go carry2 dec p' | |
743 | PE.Other text bs -> do yield text | |
744 | return (do yield bs -- an invalid blob remains | |
745 | p') | |
746 | {-# INLINABLE decodeUtf8 #-} | |
747 | ||
31f41a5d | 748 | |
749 | -- | Splits a 'Producer' after the given number of characters | |
91727d11 | 750 | splitAt |
751 | :: (Monad m, Integral n) | |
752 | => n | |
9e9bb0ce | 753 | -> Lens' (Producer Text m r) |
754 | (Producer Text m (Producer Text m r)) | |
755 | splitAt n0 k p0 = fmap join (k (go n0 p0)) | |
91727d11 | 756 | where |
757 | go 0 p = return p | |
758 | go n p = do | |
759 | x <- lift (next p) | |
760 | case x of | |
761 | Left r -> return (return r) | |
31f41a5d | 762 | Right (txt, p') -> do |
763 | let len = fromIntegral (T.length txt) | |
91727d11 | 764 | if (len <= n) |
765 | then do | |
31f41a5d | 766 | yield txt |
91727d11 | 767 | go (n - len) p' |
768 | else do | |
31f41a5d | 769 | let (prefix, suffix) = T.splitAt (fromIntegral n) txt |
91727d11 | 770 | yield prefix |
771 | return (yield suffix >> p') | |
772 | {-# INLINABLE splitAt #-} | |
773 | ||
91727d11 | 774 | |
31f41a5d | 775 | {-| Split a text stream in two, where the first text stream is the longest |
776 | consecutive group of text that satisfy the predicate | |
91727d11 | 777 | -} |
778 | span | |
779 | :: (Monad m) | |
780 | => (Char -> Bool) | |
9e9bb0ce | 781 | -> Lens' (Producer Text m r) |
782 | (Producer Text m (Producer Text m r)) | |
783 | span predicate k p0 = fmap join (k (go p0)) | |
91727d11 | 784 | where |
785 | go p = do | |
786 | x <- lift (next p) | |
787 | case x of | |
788 | Left r -> return (return r) | |
31f41a5d | 789 | Right (txt, p') -> do |
790 | let (prefix, suffix) = T.span predicate txt | |
91727d11 | 791 | if (T.null suffix) |
792 | then do | |
31f41a5d | 793 | yield txt |
91727d11 | 794 | go p' |
795 | else do | |
796 | yield prefix | |
797 | return (yield suffix >> p') | |
798 | {-# INLINABLE span #-} | |
799 | ||
62e8521c | 800 | {-| Split a text stream in two, where the first text stream is the longest |
801 | consecutive group of characters that don't satisfy the predicate | |
91727d11 | 802 | -} |
803 | break | |
804 | :: (Monad m) | |
805 | => (Char -> Bool) | |
9e9bb0ce | 806 | -> Lens' (Producer Text m r) |
807 | (Producer Text m (Producer Text m r)) | |
91727d11 | 808 | break predicate = span (not . predicate) |
809 | {-# INLINABLE break #-} | |
810 | ||
9e9bb0ce | 811 | {-| Improper lens that splits after the first group of equivalent Chars, as |
812 | defined by the given equivalence relation | |
813 | -} | |
814 | groupBy | |
815 | :: (Monad m) | |
816 | => (Char -> Char -> Bool) | |
817 | -> Lens' (Producer Text m r) | |
818 | (Producer Text m (Producer Text m r)) | |
819 | groupBy equals k p0 = fmap join (k ((go p0))) where | |
820 | go p = do | |
821 | x <- lift (next p) | |
822 | case x of | |
823 | Left r -> return (return r) | |
824 | Right (txt, p') -> case T.uncons txt of | |
825 | Nothing -> go p' | |
826 | Just (c, _) -> (yield txt >> p') ^. span (equals c) | |
827 | {-# INLINABLE groupBy #-} | |
828 | ||
829 | -- | Improper lens that splits after the first succession of identical 'Char' s | |
830 | group :: Monad m | |
831 | => Lens' (Producer Text m r) | |
832 | (Producer Text m (Producer Text m r)) | |
833 | group = groupBy (==) | |
834 | {-# INLINABLE group #-} | |
835 | ||
836 | {-| Improper lens that splits a 'Producer' after the first word | |
837 | ||
838 | Unlike 'words', this does not drop leading whitespace | |
839 | -} | |
840 | word :: (Monad m) | |
841 | => Lens' (Producer Text m r) | |
842 | (Producer Text m (Producer Text m r)) | |
843 | word k p0 = fmap join (k (to p0)) | |
844 | where | |
845 | to p = do | |
846 | p' <- p^.span isSpace | |
847 | p'^.break isSpace | |
848 | {-# INLINABLE word #-} | |
849 | ||
850 | ||
851 | line :: (Monad m) | |
852 | => Lens' (Producer Text m r) | |
853 | (Producer Text m (Producer Text m r)) | |
854 | line = break (== '\n') | |
855 | ||
856 | {-# INLINABLE line #-} | |
857 | ||
858 | ||
859 | -- | Intersperse a 'Char' in between the characters of stream of 'Text' | |
860 | intersperse | |
861 | :: (Monad m) => Char -> Producer Text m r -> Producer Text m r | |
862 | intersperse c = go0 | |
863 | where | |
864 | go0 p = do | |
865 | x <- lift (next p) | |
866 | case x of | |
867 | Left r -> return r | |
868 | Right (txt, p') -> do | |
869 | yield (T.intersperse c txt) | |
870 | go1 p' | |
871 | go1 p = do | |
872 | x <- lift (next p) | |
873 | case x of | |
874 | Left r -> return r | |
875 | Right (txt, p') -> do | |
876 | yield (T.singleton c) | |
877 | yield (T.intersperse c txt) | |
878 | go1 p' | |
879 | {-# INLINABLE intersperse #-} | |
880 | ||
881 | ||
882 | ||
883 | -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's | |
884 | packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x) | |
885 | packChars = Data.Profunctor.dimap to (fmap from) | |
886 | where | |
887 | -- to :: Monad m => Producer Char m x -> Producer Text m x | |
7ed76745 | 888 | to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize) |
9e9bb0ce | 889 | |
890 | step diffAs c = diffAs . (c:) | |
891 | ||
892 | done diffAs = T.pack (diffAs []) | |
893 | ||
894 | -- from :: Monad m => Producer Text m x -> Producer Char m x | |
895 | from p = for p (each . T.unpack) | |
896 | {-# INLINABLE packChars #-} | |
897 | ||
0f8c6f1b | 898 | |
899 | -- | Split a text stream into 'FreeT'-delimited text streams of fixed size | |
900 | chunksOf | |
901 | :: (Monad m, Integral n) | |
902 | => n -> Lens' (Producer Text m r) | |
903 | (FreeT (Producer Text m) m r) | |
904 | chunksOf n k p0 = fmap concats (k (FreeT (go p0))) | |
905 | where | |
906 | go p = do | |
907 | x <- next p | |
908 | return $ case x of | |
7ed76745 | 909 | Left r -> Pure r |
910 | Right (txt, p') -> Free $ do | |
0f8c6f1b | 911 | p'' <- (yield txt >> p') ^. splitAt n |
7ed76745 | 912 | return $ FreeT (go p'') |
0f8c6f1b | 913 | {-# INLINABLE chunksOf #-} |
914 | ||
915 | ||
62e8521c | 916 | {-| Split a text stream into sub-streams delimited by characters that satisfy the |
91727d11 | 917 | predicate |
918 | -} | |
1677dc12 | 919 | splitsWith |
91727d11 | 920 | :: (Monad m) |
921 | => (Char -> Bool) | |
922 | -> Producer Text m r | |
7ed76745 | 923 | -> FreeT (Producer Text m) m r |
924 | splitsWith predicate p0 = FreeT (go0 p0) | |
91727d11 | 925 | where |
926 | go0 p = do | |
927 | x <- next p | |
928 | case x of | |
7ed76745 | 929 | Left r -> return (Pure r) |
31f41a5d | 930 | Right (txt, p') -> |
931 | if (T.null txt) | |
91727d11 | 932 | then go0 p' |
7ed76745 | 933 | else return $ Free $ do |
9e9bb0ce | 934 | p'' <- (yield txt >> p') ^. span (not . predicate) |
7ed76745 | 935 | return $ FreeT (go1 p'') |
91727d11 | 936 | go1 p = do |
937 | x <- nextChar p | |
938 | return $ case x of | |
7ed76745 | 939 | Left r -> Pure r |
940 | Right (_, p') -> Free $ do | |
9e9bb0ce | 941 | p'' <- p' ^. span (not . predicate) |
7ed76745 | 942 | return $ FreeT (go1 p'') |
1677dc12 | 943 | {-# INLINABLE splitsWith #-} |
91727d11 | 944 | |
31f41a5d | 945 | -- | Split a text stream using the given 'Char' as the delimiter |
0f8c6f1b | 946 | splits :: (Monad m) |
91727d11 | 947 | => Char |
0f8c6f1b | 948 | -> Lens' (Producer Text m r) |
949 | (FreeT (Producer Text m) m r) | |
950 | splits c k p = | |
7ed76745 | 951 | fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p)) |
0f8c6f1b | 952 | {-# INLINABLE splits #-} |
953 | ||
954 | {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the | |
955 | given equivalence relation | |
956 | -} | |
957 | groupsBy | |
958 | :: Monad m | |
959 | => (Char -> Char -> Bool) | |
960 | -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x) | |
7ed76745 | 961 | groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where |
0f8c6f1b | 962 | go p = do x <- next p |
7ed76745 | 963 | case x of Left r -> return (Pure r) |
0f8c6f1b | 964 | Right (bs, p') -> case T.uncons bs of |
965 | Nothing -> go p' | |
7ed76745 | 966 | Just (c, _) -> do return $ Free $ do |
0f8c6f1b | 967 | p'' <- (yield bs >> p')^.span (equals c) |
7ed76745 | 968 | return $ FreeT (go p'') |
0f8c6f1b | 969 | {-# INLINABLE groupsBy #-} |
970 | ||
971 | ||
972 | -- | Like 'groupsBy', where the equality predicate is ('==') | |
973 | groups | |
974 | :: Monad m | |
975 | => Lens' (Producer Text m x) (FreeT (Producer Text m) m x) | |
976 | groups = groupsBy (==) | |
977 | {-# INLINABLE groups #-} | |
978 | ||
91727d11 | 979 | |
91727d11 | 980 | |
62e8521c | 981 | {-| Split a text stream into 'FreeT'-delimited lines |
91727d11 | 982 | -} |
983 | lines | |
0f8c6f1b | 984 | :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r) |
985 | lines = Data.Profunctor.dimap _lines (fmap _unlines) | |
91727d11 | 986 | where |
7ed76745 | 987 | _lines p0 = FreeT (go0 p0) |
0f8c6f1b | 988 | where |
989 | go0 p = do | |
990 | x <- next p | |
991 | case x of | |
7ed76745 | 992 | Left r -> return (Pure r) |
0f8c6f1b | 993 | Right (txt, p') -> |
994 | if (T.null txt) | |
995 | then go0 p' | |
7ed76745 | 996 | else return $ Free $ go1 (yield txt >> p') |
0f8c6f1b | 997 | go1 p = do |
998 | p' <- p ^. break ('\n' ==) | |
7ed76745 | 999 | return $ FreeT $ do |
0f8c6f1b | 1000 | x <- nextChar p' |
1001 | case x of | |
7ed76745 | 1002 | Left r -> return $ Pure r |
0f8c6f1b | 1003 | Right (_, p'') -> go0 p'' |
1004 | -- _unlines | |
1005 | -- :: Monad m | |
1006 | -- => FreeT (Producer Text m) m x -> Producer Text m x | |
7fc48f7c | 1007 | _unlines = concats . PG.maps (<* yield (T.singleton '\n')) |
1008 | ||
0f8c6f1b | 1009 | |
91727d11 | 1010 | {-# INLINABLE lines #-} |
91727d11 | 1011 | |
31f41a5d | 1012 | |
1013 | ||
1014 | -- | Split a text stream into 'FreeT'-delimited words | |
91727d11 | 1015 | words |
0f8c6f1b | 1016 | :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r) |
1017 | words = Data.Profunctor.dimap go (fmap _unwords) | |
91727d11 | 1018 | where |
7ed76745 | 1019 | go p = FreeT $ do |
cf10d6f1 | 1020 | x <- next (p >-> dropWhile isSpace) |
1021 | return $ case x of | |
7ed76745 | 1022 | Left r -> Pure r |
1023 | Right (bs, p') -> Free $ do | |
9e9bb0ce | 1024 | p'' <- (yield bs >> p') ^. break isSpace |
cf10d6f1 | 1025 | return (go p'') |
7ed76745 | 1026 | _unwords = PG.intercalates (yield $ T.singleton ' ') |
0f8c6f1b | 1027 | |
91727d11 | 1028 | {-# INLINABLE words #-} |
1029 | ||
cf10d6f1 | 1030 | |
31f41a5d | 1031 | {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after |
1032 | interspersing a text stream in between them | |
91727d11 | 1033 | -} |
1034 | intercalate | |
1035 | :: (Monad m) | |
1036 | => Producer Text m () | |
1037 | -> FreeT (Producer Text m) m r | |
1038 | -> Producer Text m r | |
1039 | intercalate p0 = go0 | |
1040 | where | |
1041 | go0 f = do | |
7ed76745 | 1042 | x <- lift (runFreeT f) |
91727d11 | 1043 | case x of |
7ed76745 | 1044 | Pure r -> return r |
1045 | Free p -> do | |
91727d11 | 1046 | f' <- p |
1047 | go1 f' | |
1048 | go1 f = do | |
7ed76745 | 1049 | x <- lift (runFreeT f) |
91727d11 | 1050 | case x of |
7ed76745 | 1051 | Pure r -> return r |
1052 | Free p -> do | |
91727d11 | 1053 | p0 |
1054 | f' <- p | |
1055 | go1 f' | |
1056 | {-# INLINABLE intercalate #-} | |
1057 | ||
62e8521c | 1058 | {-| Join 'FreeT'-delimited lines into a text stream |
91727d11 | 1059 | -} |
1060 | unlines | |
1061 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | |
1062 | unlines = go | |
1063 | where | |
1064 | go f = do | |
7ed76745 | 1065 | x <- lift (runFreeT f) |
91727d11 | 1066 | case x of |
7ed76745 | 1067 | Pure r -> return r |
1068 | Free p -> do | |
91727d11 | 1069 | f' <- p |
1070 | yield $ T.singleton '\n' | |
1071 | go f' | |
1072 | {-# INLINABLE unlines #-} | |
1073 | ||
31f41a5d | 1074 | {-| Join 'FreeT'-delimited words into a text stream |
91727d11 | 1075 | -} |
1076 | unwords | |
1077 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | |
7fc48f7c | 1078 | unwords = intercalate (yield $ T.singleton ' ') |
91727d11 | 1079 | {-# INLINABLE unwords #-} |
1080 | ||
1081 | {- $parse | |
31f41a5d | 1082 | The following parsing utilities are single-character analogs of the ones found |
1083 | @pipes-parse@. | |
91727d11 | 1084 | -} |
1085 | ||
91727d11 | 1086 | {- $reexports |
91727d11 | 1087 | |
1088 | @Data.Text@ re-exports the 'Text' type. | |
1089 | ||
0f8c6f1b | 1090 | @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. |
64e03122 | 1091 | -} |
1092 | ||
90189414 | 1093 | codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r)) |
1094 | codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc))) | |
1095 | (k (decoder (dec B.empty) p0) ) where | |
1096 | decoder :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) | |
1097 | decoder !d p0 = case d of | |
1098 | PE.Other txt bad -> do yield txt | |
1099 | return (do yield bad | |
1100 | p0) | |
1101 | PE.Some txt extra dec -> do yield txt | |
1102 | x <- lift (next p0) | |
1103 | case x of Left r -> return (do yield extra | |
1104 | return r) | |
1105 | Right (chunk,p1) -> decoder (dec chunk) p1 | |
1106 | ||
1107 | -- decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8))) | |
1108 | -- (k (go B.empty PE.streamDecodeUtf8 p0)) where | |
1109 | ||
1110 | encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r) | |
1111 | encodeAscii = go where | |
1112 | go p = do echunk <- lift (next p) | |
1113 | case echunk of | |
1114 | Left r -> return (return r) | |
1115 | Right (chunk, p') -> | |
1116 | if T.null chunk | |
1117 | then go p' | |
1118 | else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk | |
1119 | in do yield (B8.pack (T.unpack safe)) | |
1120 | if T.null unsafe | |
1121 | then go p' | |
1122 | else return $ do yield unsafe | |
1123 | p' | |
1124 | ||
1125 | encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r) | |
1126 | encodeIso8859_1 = go where | |
1127 | go p = do etxt <- lift (next p) | |
1128 | case etxt of | |
1129 | Left r -> return (return r) | |
1130 | Right (txt, p') -> | |
1131 | if T.null txt | |
1132 | then go p' | |
1133 | else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt | |
1134 | in do yield (B8.pack (T.unpack safe)) | |
1135 | if T.null unsafe | |
1136 | then go p' | |
1137 | else return $ do yield unsafe | |
1138 | p' | |
1139 | ||
1140 | decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | |
1141 | decodeAscii = go where | |
1142 | go p = do echunk <- lift (next p) | |
1143 | case echunk of | |
1144 | Left r -> return (return r) | |
1145 | Right (chunk, p') -> | |
1146 | if B.null chunk | |
1147 | then go p' | |
1148 | else let (safe, unsafe) = B.span (<= 0x7F) chunk | |
1149 | in do yield (T.pack (B8.unpack safe)) | |
1150 | if B.null unsafe | |
1151 | then go p' | |
1152 | else return $ do yield unsafe | |
1153 | p' | |
1154 | ||
1155 | ||
1156 | decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | |
1157 | decodeIso8859_1 = go where | |
1158 | go p = do echunk <- lift (next p) | |
1159 | case echunk of | |
1160 | Left r -> return (return r) | |
1161 | Right (chunk, p') -> | |
1162 | if B.null chunk | |
1163 | then go p' | |
1164 | else let (safe, unsafe) = B.span (<= 0xFF) chunk | |
1165 | in do yield (T.pack (B8.unpack safe)) | |
1166 | if B.null unsafe | |
1167 | then go p' | |
1168 | else return $ do yield unsafe | |
1169 | p' | |
1170 | ||
1171 | ||
1172 | ||
1173 | {- | |
1174 | ascii :: Codec | |
1175 | ascii = Codec name enc (toDecoding dec) where | |
1176 | name = T.pack "ASCII" | |
1177 | enc text = (bytes, extra) where | |
1178 | (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text | |
1179 | bytes = B8.pack (T.unpack safe) | |
1180 | extra = if T.null unsafe | |
1181 | then Nothing | |
1182 | else Just (EncodeException ascii (T.head unsafe), unsafe) | |
1183 | ||
1184 | dec bytes = (text, extra) where | |
1185 | (safe, unsafe) = B.span (<= 0x7F) bytes | |
1186 | text = T.pack (B8.unpack safe) | |
1187 | extra = if B.null unsafe | |
1188 | then Right B.empty | |
1189 | else Left (DecodeException ascii (B.head unsafe), unsafe) | |
1190 | ||
1191 | iso8859_1 :: Codec | |
1192 | iso8859_1 = Codec name enc (toDecoding dec) where | |
1193 | name = T.pack "ISO-8859-1" | |
1194 | enc text = (bytes, extra) where | |
1195 | (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text | |
1196 | bytes = B8.pack (T.unpack safe) | |
1197 | extra = if T.null unsafe | |
1198 | then Nothing | |
1199 | else Just (EncodeException iso8859_1 (T.head unsafe), unsafe) | |
1200 | ||
1201 | dec bytes = (T.pack (B8.unpack bytes), Right B.empty) | |
1202 | -} | |
1203 |