]>
Commit | Line | Data |
---|---|---|
ca6f90a0 | 1 | {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-} |
64e03122 | 2 | #if __GLASGOW_HASKELL__ >= 702 |
3 | {-# LANGUAGE Trustworthy #-} | |
4 | #endif | |
13a43263 | 5 | {-| This module provides @pipes@ utilities for \"text streams\", which are |
64e03122 | 6 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but |
7 | a 'Producer' can be converted to and from lazy 'Text's, though this is generally | |
8 | unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'. | |
9 | An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to. | |
91727d11 | 10 | |
63ea9ffd | 11 | To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For |
31f41a5d | 12 | example, the following program copies a document from one file to another: |
91727d11 | 13 | |
14 | > import Pipes | |
31f41a5d | 15 | > import qualified Data.Text.Pipes as Text |
91727d11 | 16 | > import System.IO |
17 | > | |
18 | > main = | |
19 | > withFile "inFile.txt" ReadMode $ \hIn -> | |
20 | > withFile "outFile.txt" WriteMode $ \hOut -> | |
31f41a5d | 21 | > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut |
22 | ||
23 | To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe): | |
91727d11 | 24 | |
13a43263 | 25 | > import Pipes |
31f41a5d | 26 | > import qualified Data.Text.Pipes as Text |
13a43263 | 27 | > import Pipes.Safe |
28 | > | |
31f41a5d | 29 | > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt" |
13a43263 | 30 | |
91727d11 | 31 | You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin' |
31f41a5d | 32 | and 'stdout' proxies, as with the following \"echo\" program: |
91727d11 | 33 | |
31f41a5d | 34 | > main = runEffect $ Text.stdin >-> Text.stdout |
91727d11 | 35 | |
36 | You can also translate pure lazy 'TL.Text's to and from proxies: | |
37 | ||
31f41a5d | 38 | > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout |
91727d11 | 39 | |
40 | In addition, this module provides many functions equivalent to lazy | |
31f41a5d | 41 | 'Text' functions so that you can transform or fold text streams. For |
91727d11 | 42 | example, to stream only the first three lines of 'stdin' to 'stdout' you |
31f41a5d | 43 | might write: |
91727d11 | 44 | |
45 | > import Pipes | |
31f41a5d | 46 | > import qualified Pipes.Text as Text |
47 | > import qualified Pipes.Parse as Parse | |
91727d11 | 48 | > |
31f41a5d | 49 | > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout |
91727d11 | 50 | > where |
31f41a5d | 51 | > takeLines n = Text.unlines . Parse.takeFree n . Text.lines |
91727d11 | 52 | |
31f41a5d | 53 | The above program will never bring more than one chunk of text (~ 32 KB) into |
91727d11 | 54 | memory, no matter how long the lines are. |
55 | ||
56 | Note that functions in this library are designed to operate on streams that | |
31f41a5d | 57 | are insensitive to text boundaries. This means that they may freely split |
64e03122 | 58 | text into smaller texts, /discard empty texts/. However, apart from the |
59 | special case of 'concatMap', they will /never concatenate texts/ in order | |
60 | to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'. | |
91727d11 | 61 | -} |
62 | ||
7faef8bc | 63 | module Pipes.Text ( |
91727d11 | 64 | -- * Producers |
65 | fromLazy, | |
66 | stdin, | |
67 | fromHandle, | |
68 | readFile, | |
69 | stdinLn, | |
91727d11 | 70 | |
71 | -- * Consumers | |
72 | stdout, | |
73 | stdoutLn, | |
74 | toHandle, | |
75 | writeFile, | |
76 | ||
77 | -- * Pipes | |
78 | map, | |
79 | concatMap, | |
80 | take, | |
81 | drop, | |
82 | takeWhile, | |
83 | dropWhile, | |
84 | filter, | |
91727d11 | 85 | scan, |
7faef8bc | 86 | encodeUtf8, |
87 | pack, | |
88 | unpack, | |
1d2434b5 | 89 | toCaseFold, |
90 | toLower, | |
91 | toUpper, | |
7faef8bc | 92 | stripStart, |
91727d11 | 93 | |
94 | -- * Folds | |
95 | toLazy, | |
96 | toLazyM, | |
64e03122 | 97 | foldChars, |
91727d11 | 98 | head, |
99 | last, | |
100 | null, | |
101 | length, | |
102 | any, | |
103 | all, | |
104 | maximum, | |
105 | minimum, | |
91727d11 | 106 | find, |
107 | index, | |
31f41a5d | 108 | count, |
91727d11 | 109 | |
110 | -- * Splitters | |
111 | splitAt, | |
112 | chunksOf, | |
113 | span, | |
114 | break, | |
115 | splitWith, | |
116 | split, | |
117 | groupBy, | |
118 | group, | |
119 | lines, | |
120 | words, | |
31f41a5d | 121 | decodeUtf8, |
64e03122 | 122 | decode, |
91727d11 | 123 | -- * Transformations |
124 | intersperse, | |
31f41a5d | 125 | |
91727d11 | 126 | -- * Joiners |
127 | intercalate, | |
128 | unlines, | |
129 | unwords, | |
130 | ||
31f41a5d | 131 | -- * Character Parsers |
91727d11 | 132 | -- $parse |
31f41a5d | 133 | nextChar, |
134 | drawChar, | |
135 | unDrawChar, | |
136 | peekChar, | |
137 | isEndOfChars, | |
91727d11 | 138 | |
139 | -- * Re-exports | |
140 | -- $reexports | |
141 | module Data.Text, | |
91727d11 | 142 | module Pipes.Parse |
143 | ) where | |
144 | ||
145 | import Control.Exception (throwIO, try) | |
64e03122 | 146 | import Control.Monad (liftM, unless, join) |
acc6868f | 147 | import Control.Monad.Trans.State.Strict (StateT(..)) |
ca6f90a0 | 148 | import Data.Monoid ((<>)) |
91727d11 | 149 | import qualified Data.Text as T |
150 | import qualified Data.Text.IO as T | |
31f41a5d | 151 | import qualified Data.Text.Encoding as TE |
63ea9ffd | 152 | import qualified Data.Text.Encoding.Error as TE |
91727d11 | 153 | import Data.Text (Text) |
154 | import qualified Data.Text.Lazy as TL | |
155 | import qualified Data.Text.Lazy.IO as TL | |
156 | import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize) | |
157 | import Data.ByteString.Unsafe (unsafeTake, unsafeDrop) | |
31f41a5d | 158 | import Data.ByteString (ByteString) |
159 | import qualified Data.ByteString as B | |
cf10d6f1 | 160 | import Data.Char (ord, isSpace) |
91727d11 | 161 | import Data.Functor.Identity (Identity) |
162 | import qualified Data.List as List | |
163 | import Foreign.C.Error (Errno(Errno), ePIPE) | |
164 | import qualified GHC.IO.Exception as G | |
165 | import Pipes | |
5e3f5409 | 166 | import qualified Pipes.ByteString as PB |
64e03122 | 167 | import qualified Pipes.ByteString as PBP |
ca6f90a0 | 168 | import qualified Pipes.Text.Internal as PE |
64e03122 | 169 | import Pipes.Text.Internal (Codec(..)) |
c0343bc9 | 170 | import Pipes.Text.Parse ( |
31f41a5d | 171 | nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) |
91727d11 | 172 | import Pipes.Core (respond, Server') |
173 | import qualified Pipes.Parse as PP | |
64e03122 | 174 | import Pipes.Parse ( FreeT) |
91727d11 | 175 | import qualified Pipes.Safe.Prelude as Safe |
176 | import qualified Pipes.Safe as Safe | |
177 | import Pipes.Safe (MonadSafe(..), Base(..)) | |
178 | import qualified Pipes.Prelude as P | |
179 | import qualified System.IO as IO | |
180 | import Data.Char (isSpace) | |
63ea9ffd | 181 | import Data.Word (Word8) |
91727d11 | 182 | import Prelude hiding ( |
183 | all, | |
184 | any, | |
185 | break, | |
186 | concat, | |
187 | concatMap, | |
188 | drop, | |
189 | dropWhile, | |
190 | elem, | |
191 | filter, | |
192 | head, | |
193 | last, | |
194 | lines, | |
195 | length, | |
196 | map, | |
197 | maximum, | |
198 | minimum, | |
199 | notElem, | |
200 | null, | |
201 | readFile, | |
202 | span, | |
203 | splitAt, | |
204 | take, | |
205 | takeWhile, | |
206 | unlines, | |
207 | unwords, | |
208 | words, | |
209 | writeFile ) | |
210 | ||
211 | -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's | |
212 | fromLazy :: (Monad m) => TL.Text -> Producer' Text m () | |
213 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) | |
ca6f90a0 | 214 | {-# INLINE fromLazy #-} |
91727d11 | 215 | |
62e8521c | 216 | -- | Stream text from 'stdin' |
ca6f90a0 | 217 | stdin :: MonadIO m => Producer Text m () |
91727d11 | 218 | stdin = fromHandle IO.stdin |
ca6f90a0 | 219 | {-# INLINE stdin #-} |
91727d11 | 220 | |
31f41a5d | 221 | {-| Convert a 'IO.Handle' into a text stream using a text size |
ca6f90a0 | 222 | determined by the good sense of the text library; note that this |
223 | is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@ | |
224 | but uses the system encoding and has other `Data.Text.IO` features | |
31f41a5d | 225 | -} |
226 | ||
ca6f90a0 | 227 | fromHandle :: MonadIO m => IO.Handle -> Producer Text m () |
228 | fromHandle h = go where | |
229 | go = do txt <- liftIO (T.hGetChunk h) | |
230 | unless (T.null txt) $ do yield txt | |
231 | go | |
91727d11 | 232 | {-# INLINABLE fromHandle#-} |
ca6f90a0 | 233 | |
234 | ||
235 | {-| Stream text from a file in the simple fashion of @Data.Text.IO@ | |
6f6f9974 | 236 | |
31f41a5d | 237 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout |
238 | MAIN = PUTSTRLN "HELLO WORLD" | |
6f6f9974 | 239 | -} |
240 | ||
ca6f90a0 | 241 | readFile :: MonadSafe m => FilePath -> Producer Text m () |
91727d11 | 242 | readFile file = Safe.withFile file IO.ReadMode fromHandle |
ca6f90a0 | 243 | {-# INLINE readFile #-} |
91727d11 | 244 | |
31f41a5d | 245 | {-| Stream lines of text from stdin (for testing in ghci etc.) |
246 | ||
247 | >>> let safely = runSafeT . runEffect | |
248 | >>> safely $ for Text.stdinLn (lift . lift . print . T.length) | |
249 | hello | |
250 | 5 | |
251 | world | |
252 | 5 | |
253 | ||
254 | -} | |
91727d11 | 255 | stdinLn :: MonadIO m => Producer' Text m () |
31f41a5d | 256 | stdinLn = go where |
91727d11 | 257 | go = do |
258 | eof <- liftIO (IO.hIsEOF IO.stdin) | |
259 | unless eof $ do | |
260 | txt <- liftIO (T.hGetLine IO.stdin) | |
261 | yield txt | |
262 | go | |
ca6f90a0 | 263 | {-# INLINABLE stdinLn #-} |
91727d11 | 264 | |
31f41a5d | 265 | {-| Stream text to 'stdout' |
91727d11 | 266 | |
267 | Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe. | |
268 | ||
269 | Note: For best performance, use @(for source (liftIO . putStr))@ instead of | |
31f41a5d | 270 | @(source >-> stdout)@ in suitable cases. |
91727d11 | 271 | -} |
272 | stdout :: MonadIO m => Consumer' Text m () | |
273 | stdout = go | |
274 | where | |
275 | go = do | |
276 | txt <- await | |
277 | x <- liftIO $ try (T.putStr txt) | |
278 | case x of | |
279 | Left (G.IOError { G.ioe_type = G.ResourceVanished | |
280 | , G.ioe_errno = Just ioe }) | |
281 | | Errno ioe == ePIPE | |
282 | -> return () | |
283 | Left e -> liftIO (throwIO e) | |
284 | Right () -> go | |
285 | {-# INLINABLE stdout #-} | |
286 | ||
287 | stdoutLn :: (MonadIO m) => Consumer' Text m () | |
288 | stdoutLn = go | |
289 | where | |
290 | go = do | |
291 | str <- await | |
292 | x <- liftIO $ try (T.putStrLn str) | |
293 | case x of | |
294 | Left (G.IOError { G.ioe_type = G.ResourceVanished | |
295 | , G.ioe_errno = Just ioe }) | |
296 | | Errno ioe == ePIPE | |
297 | -> return () | |
298 | Left e -> liftIO (throwIO e) | |
299 | Right () -> go | |
300 | {-# INLINABLE stdoutLn #-} | |
301 | ||
31f41a5d | 302 | {-| Convert a text stream into a 'Handle' |
91727d11 | 303 | |
31f41a5d | 304 | Note: again, for best performance, where possible use |
305 | @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@. | |
91727d11 | 306 | -} |
307 | toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r | |
308 | toHandle h = for cat (liftIO . T.hPutStr h) | |
309 | {-# INLINABLE toHandle #-} | |
310 | ||
d4732515 | 311 | {-# RULES "p >-> toHandle h" forall p h . |
ff38b9f0 | 312 | p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt)) |
d4732515 | 313 | #-} |
314 | ||
315 | ||
31f41a5d | 316 | -- | Stream text into a file. Uses @pipes-safe@. |
ca6f90a0 | 317 | writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m () |
91727d11 | 318 | writeFile file = Safe.withFile file IO.WriteMode toHandle |
ca6f90a0 | 319 | {-# INLINE writeFile #-} |
91727d11 | 320 | |
321 | -- | Apply a transformation to each 'Char' in the stream | |
322 | map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r | |
323 | map f = P.map (T.map f) | |
324 | {-# INLINABLE map #-} | |
325 | ||
ff38b9f0 | 326 | {-# RULES "p >-> map f" forall p f . |
327 | p >-> map f = for p (\txt -> yield (T.map f txt)) | |
328 | #-} | |
329 | ||
31f41a5d | 330 | -- | Map a function over the characters of a text stream and concatenate the results |
91727d11 | 331 | concatMap |
332 | :: (Monad m) => (Char -> Text) -> Pipe Text Text m r | |
333 | concatMap f = P.map (T.concatMap f) | |
334 | {-# INLINABLE concatMap #-} | |
335 | ||
ff38b9f0 | 336 | {-# RULES "p >-> concatMap f" forall p f . |
337 | p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt)) | |
338 | #-} | |
7faef8bc | 339 | |
340 | -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8 | |
a02a69ad | 341 | -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex |
342 | -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@ | |
7faef8bc | 343 | encodeUtf8 :: Monad m => Pipe Text ByteString m r |
344 | encodeUtf8 = P.map TE.encodeUtf8 | |
345 | {-# INLINEABLE encodeUtf8 #-} | |
346 | ||
ff38b9f0 | 347 | {-# RULES "p >-> encodeUtf8" forall p . |
348 | p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt)) | |
349 | #-} | |
350 | ||
c0343bc9 | 351 | -- | Transform a Pipe of 'String's into one of 'Text' chunks |
7faef8bc | 352 | pack :: Monad m => Pipe String Text m r |
353 | pack = P.map T.pack | |
354 | {-# INLINEABLE pack #-} | |
355 | ||
ff38b9f0 | 356 | {-# RULES "p >-> pack" forall p . |
357 | p >-> pack = for p (\txt -> yield (T.pack txt)) | |
358 | #-} | |
359 | ||
360 | -- | Transform a Pipes of 'Text' chunks into one of 'String's | |
7faef8bc | 361 | unpack :: Monad m => Pipe Text String m r |
d4732515 | 362 | unpack = for cat (\t -> yield (T.unpack t)) |
7faef8bc | 363 | {-# INLINEABLE unpack #-} |
364 | ||
ff38b9f0 | 365 | {-# RULES "p >-> unpack" forall p . |
366 | p >-> unpack = for p (\txt -> yield (T.unpack txt)) | |
367 | #-} | |
d4732515 | 368 | |
c0343bc9 | 369 | -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility, |
7faef8bc | 370 | -- here acting on a 'Text' pipe, rather as they would on a lazy text |
371 | toCaseFold :: Monad m => Pipe Text Text m () | |
372 | toCaseFold = P.map T.toCaseFold | |
373 | {-# INLINEABLE toCaseFold #-} | |
374 | ||
ff38b9f0 | 375 | {-# RULES "p >-> toCaseFold" forall p . |
376 | p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt)) | |
377 | #-} | |
378 | ||
379 | ||
c0343bc9 | 380 | -- | lowercase incoming 'Text' |
7faef8bc | 381 | toLower :: Monad m => Pipe Text Text m () |
382 | toLower = P.map T.toLower | |
383 | {-# INLINEABLE toLower #-} | |
384 | ||
ff38b9f0 | 385 | {-# RULES "p >-> toLower" forall p . |
386 | p >-> toLower = for p (\txt -> yield (T.toLower txt)) | |
387 | #-} | |
388 | ||
c0343bc9 | 389 | -- | uppercase incoming 'Text' |
7faef8bc | 390 | toUpper :: Monad m => Pipe Text Text m () |
391 | toUpper = P.map T.toUpper | |
392 | {-# INLINEABLE toUpper #-} | |
393 | ||
ff38b9f0 | 394 | {-# RULES "p >-> toUpper" forall p . |
395 | p >-> toUpper = for p (\txt -> yield (T.toUpper txt)) | |
396 | #-} | |
397 | ||
c0343bc9 | 398 | -- | Remove leading white space from an incoming succession of 'Text's |
7faef8bc | 399 | stripStart :: Monad m => Pipe Text Text m r |
400 | stripStart = do | |
401 | chunk <- await | |
402 | let text = T.stripStart chunk | |
403 | if T.null text | |
404 | then stripStart | |
405 | else cat | |
406 | {-# INLINEABLE stripStart #-} | |
407 | ||
31f41a5d | 408 | -- | @(take n)@ only allows @n@ individual characters to pass; |
409 | -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass. | |
91727d11 | 410 | take :: (Monad m, Integral a) => a -> Pipe Text Text m () |
411 | take n0 = go n0 where | |
412 | go n | |
413 | | n <= 0 = return () | |
414 | | otherwise = do | |
31f41a5d | 415 | txt <- await |
416 | let len = fromIntegral (T.length txt) | |
91727d11 | 417 | if (len > n) |
31f41a5d | 418 | then yield (T.take (fromIntegral n) txt) |
91727d11 | 419 | else do |
31f41a5d | 420 | yield txt |
91727d11 | 421 | go (n - len) |
422 | {-# INLINABLE take #-} | |
423 | ||
31f41a5d | 424 | -- | @(drop n)@ drops the first @n@ characters |
91727d11 | 425 | drop :: (Monad m, Integral a) => a -> Pipe Text Text m r |
426 | drop n0 = go n0 where | |
427 | go n | |
428 | | n <= 0 = cat | |
429 | | otherwise = do | |
31f41a5d | 430 | txt <- await |
431 | let len = fromIntegral (T.length txt) | |
91727d11 | 432 | if (len >= n) |
433 | then do | |
31f41a5d | 434 | yield (T.drop (fromIntegral n) txt) |
91727d11 | 435 | cat |
436 | else go (n - len) | |
437 | {-# INLINABLE drop #-} | |
438 | ||
31f41a5d | 439 | -- | Take characters until they fail the predicate |
91727d11 | 440 | takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m () |
441 | takeWhile predicate = go | |
442 | where | |
443 | go = do | |
31f41a5d | 444 | txt <- await |
445 | let (prefix, suffix) = T.span predicate txt | |
91727d11 | 446 | if (T.null suffix) |
447 | then do | |
31f41a5d | 448 | yield txt |
91727d11 | 449 | go |
450 | else yield prefix | |
451 | {-# INLINABLE takeWhile #-} | |
452 | ||
31f41a5d | 453 | -- | Drop characters until they fail the predicate |
91727d11 | 454 | dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r |
455 | dropWhile predicate = go where | |
456 | go = do | |
31f41a5d | 457 | txt <- await |
458 | case T.findIndex (not . predicate) txt of | |
91727d11 | 459 | Nothing -> go |
460 | Just i -> do | |
31f41a5d | 461 | yield (T.drop i txt) |
91727d11 | 462 | cat |
463 | {-# INLINABLE dropWhile #-} | |
464 | ||
465 | -- | Only allows 'Char's to pass if they satisfy the predicate | |
466 | filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r | |
467 | filter predicate = P.map (T.filter predicate) | |
468 | {-# INLINABLE filter #-} | |
469 | ||
ff38b9f0 | 470 | {-# RULES "p >-> filter q" forall p q . |
471 | p >-> filter q = for p (\txt -> yield (T.filter q txt)) | |
472 | #-} | |
473 | ||
31f41a5d | 474 | -- | Strict left scan over the characters |
91727d11 | 475 | scan |
476 | :: (Monad m) | |
477 | => (Char -> Char -> Char) -> Char -> Pipe Text Text m r | |
478 | scan step begin = go begin | |
479 | where | |
31f41a5d | 480 | go c = do |
481 | txt <- await | |
482 | let txt' = T.scanl step c txt | |
483 | c' = T.last txt' | |
484 | yield txt' | |
485 | go c' | |
91727d11 | 486 | {-# INLINABLE scan #-} |
487 | ||
488 | {-| Fold a pure 'Producer' of strict 'Text's into a lazy | |
489 | 'TL.Text' | |
490 | -} | |
491 | toLazy :: Producer Text Identity () -> TL.Text | |
492 | toLazy = TL.fromChunks . P.toList | |
493 | {-# INLINABLE toLazy #-} | |
494 | ||
495 | {-| Fold an effectful 'Producer' of strict 'Text's into a lazy | |
496 | 'TL.Text' | |
497 | ||
498 | Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for | |
499 | simple testing purposes. Idiomatic @pipes@ style consumes the chunks | |
500 | immediately as they are generated instead of loading them all into memory. | |
501 | -} | |
502 | toLazyM :: (Monad m) => Producer Text m () -> m TL.Text | |
503 | toLazyM = liftM TL.fromChunks . P.toListM | |
504 | {-# INLINABLE toLazyM #-} | |
505 | ||
31f41a5d | 506 | -- | Reduce the text stream using a strict left fold over characters |
64e03122 | 507 | foldChars |
91727d11 | 508 | :: Monad m |
509 | => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r | |
64e03122 | 510 | foldChars step begin done = P.fold (T.foldl' step) begin done |
91727d11 | 511 | {-# INLINABLE fold #-} |
512 | ||
513 | -- | Retrieve the first 'Char' | |
514 | head :: (Monad m) => Producer Text m () -> m (Maybe Char) | |
515 | head = go | |
516 | where | |
517 | go p = do | |
518 | x <- nextChar p | |
519 | case x of | |
520 | Left _ -> return Nothing | |
31f41a5d | 521 | Right (c, _) -> return (Just c) |
91727d11 | 522 | {-# INLINABLE head #-} |
523 | ||
524 | -- | Retrieve the last 'Char' | |
525 | last :: (Monad m) => Producer Text m () -> m (Maybe Char) | |
526 | last = go Nothing | |
527 | where | |
528 | go r p = do | |
529 | x <- next p | |
530 | case x of | |
531 | Left () -> return r | |
31f41a5d | 532 | Right (txt, p') -> |
533 | if (T.null txt) | |
91727d11 | 534 | then go r p' |
31f41a5d | 535 | else go (Just $ T.last txt) p' |
91727d11 | 536 | {-# INLINABLE last #-} |
537 | ||
538 | -- | Determine if the stream is empty | |
539 | null :: (Monad m) => Producer Text m () -> m Bool | |
540 | null = P.all T.null | |
541 | {-# INLINABLE null #-} | |
542 | ||
62e8521c | 543 | -- | Count the number of characters in the stream |
91727d11 | 544 | length :: (Monad m, Num n) => Producer Text m () -> m n |
31f41a5d | 545 | length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id |
91727d11 | 546 | {-# INLINABLE length #-} |
547 | ||
548 | -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate | |
549 | any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | |
550 | any predicate = P.any (T.any predicate) | |
551 | {-# INLINABLE any #-} | |
552 | ||
553 | -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate | |
554 | all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | |
555 | all predicate = P.all (T.all predicate) | |
556 | {-# INLINABLE all #-} | |
557 | ||
62e8521c | 558 | -- | Return the maximum 'Char' within a text stream |
91727d11 | 559 | maximum :: (Monad m) => Producer Text m () -> m (Maybe Char) |
560 | maximum = P.fold step Nothing id | |
561 | where | |
31f41a5d | 562 | step mc txt = |
563 | if (T.null txt) | |
564 | then mc | |
565 | else Just $ case mc of | |
566 | Nothing -> T.maximum txt | |
567 | Just c -> max c (T.maximum txt) | |
91727d11 | 568 | {-# INLINABLE maximum #-} |
569 | ||
62e8521c | 570 | -- | Return the minimum 'Char' within a text stream (surely very useful!) |
91727d11 | 571 | minimum :: (Monad m) => Producer Text m () -> m (Maybe Char) |
572 | minimum = P.fold step Nothing id | |
573 | where | |
31f41a5d | 574 | step mc txt = |
575 | if (T.null txt) | |
576 | then mc | |
577 | else case mc of | |
578 | Nothing -> Just (T.minimum txt) | |
579 | Just c -> Just (min c (T.minimum txt)) | |
91727d11 | 580 | {-# INLINABLE minimum #-} |
581 | ||
91727d11 | 582 | -- | Find the first element in the stream that matches the predicate |
583 | find | |
584 | :: (Monad m) | |
585 | => (Char -> Bool) -> Producer Text m () -> m (Maybe Char) | |
586 | find predicate p = head (p >-> filter predicate) | |
587 | {-# INLINABLE find #-} | |
588 | ||
62e8521c | 589 | -- | Index into a text stream |
91727d11 | 590 | index |
591 | :: (Monad m, Integral a) | |
592 | => a-> Producer Text m () -> m (Maybe Char) | |
593 | index n p = head (p >-> drop n) | |
594 | {-# INLINABLE index #-} | |
595 | ||
63ea9ffd | 596 | |
31f41a5d | 597 | -- | Store a tally of how many segments match the given 'Text' |
598 | count :: (Monad m, Num n) => Text -> Producer Text m () -> m n | |
599 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | |
600 | {-# INLINABLE count #-} | |
601 | ||
ca6f90a0 | 602 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text |
603 | -- returning a Pipe of ByteStrings that begins at the point of failure. | |
604 | ||
605 | decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | |
606 | decodeUtf8 = go B.empty PE.streamDecodeUtf8 where | |
607 | go !carry dec0 p = do | |
608 | x <- lift (next p) | |
609 | case x of Left r -> if B.null carry | |
610 | then return (return r) -- all bytestrinput was consumed | |
611 | else return (do yield carry -- a potentially valid fragment remains | |
612 | return r) | |
613 | ||
614 | Right (chunk, p') -> case dec0 chunk of | |
615 | PE.Some text carry2 dec -> do yield text | |
616 | go carry2 dec p' | |
617 | PE.Other text bs -> do yield text | |
618 | return (do yield bs -- an invalid blob remains | |
619 | p') | |
620 | {-# INLINABLE decodeUtf8 #-} | |
621 | ||
31f41a5d | 622 | |
623 | -- | Splits a 'Producer' after the given number of characters | |
91727d11 | 624 | splitAt |
625 | :: (Monad m, Integral n) | |
626 | => n | |
627 | -> Producer Text m r | |
628 | -> Producer' Text m (Producer Text m r) | |
629 | splitAt = go | |
630 | where | |
631 | go 0 p = return p | |
632 | go n p = do | |
633 | x <- lift (next p) | |
634 | case x of | |
635 | Left r -> return (return r) | |
31f41a5d | 636 | Right (txt, p') -> do |
637 | let len = fromIntegral (T.length txt) | |
91727d11 | 638 | if (len <= n) |
639 | then do | |
31f41a5d | 640 | yield txt |
91727d11 | 641 | go (n - len) p' |
642 | else do | |
31f41a5d | 643 | let (prefix, suffix) = T.splitAt (fromIntegral n) txt |
91727d11 | 644 | yield prefix |
645 | return (yield suffix >> p') | |
646 | {-# INLINABLE splitAt #-} | |
647 | ||
31f41a5d | 648 | -- | Split a text stream into 'FreeT'-delimited text streams of fixed size |
91727d11 | 649 | chunksOf |
650 | :: (Monad m, Integral n) | |
651 | => n -> Producer Text m r -> FreeT (Producer Text m) m r | |
652 | chunksOf n p0 = PP.FreeT (go p0) | |
653 | where | |
654 | go p = do | |
655 | x <- next p | |
656 | return $ case x of | |
657 | Left r -> PP.Pure r | |
31f41a5d | 658 | Right (txt, p') -> PP.Free $ do |
659 | p'' <- splitAt n (yield txt >> p') | |
91727d11 | 660 | return $ PP.FreeT (go p'') |
661 | {-# INLINABLE chunksOf #-} | |
662 | ||
31f41a5d | 663 | {-| Split a text stream in two, where the first text stream is the longest |
664 | consecutive group of text that satisfy the predicate | |
91727d11 | 665 | -} |
666 | span | |
667 | :: (Monad m) | |
668 | => (Char -> Bool) | |
669 | -> Producer Text m r | |
670 | -> Producer' Text m (Producer Text m r) | |
671 | span predicate = go | |
672 | where | |
673 | go p = do | |
674 | x <- lift (next p) | |
675 | case x of | |
676 | Left r -> return (return r) | |
31f41a5d | 677 | Right (txt, p') -> do |
678 | let (prefix, suffix) = T.span predicate txt | |
91727d11 | 679 | if (T.null suffix) |
680 | then do | |
31f41a5d | 681 | yield txt |
91727d11 | 682 | go p' |
683 | else do | |
684 | yield prefix | |
685 | return (yield suffix >> p') | |
686 | {-# INLINABLE span #-} | |
687 | ||
62e8521c | 688 | {-| Split a text stream in two, where the first text stream is the longest |
689 | consecutive group of characters that don't satisfy the predicate | |
91727d11 | 690 | -} |
691 | break | |
692 | :: (Monad m) | |
693 | => (Char -> Bool) | |
694 | -> Producer Text m r | |
695 | -> Producer Text m (Producer Text m r) | |
696 | break predicate = span (not . predicate) | |
697 | {-# INLINABLE break #-} | |
698 | ||
62e8521c | 699 | {-| Split a text stream into sub-streams delimited by characters that satisfy the |
91727d11 | 700 | predicate |
701 | -} | |
702 | splitWith | |
703 | :: (Monad m) | |
704 | => (Char -> Bool) | |
705 | -> Producer Text m r | |
706 | -> PP.FreeT (Producer Text m) m r | |
707 | splitWith predicate p0 = PP.FreeT (go0 p0) | |
708 | where | |
709 | go0 p = do | |
710 | x <- next p | |
711 | case x of | |
712 | Left r -> return (PP.Pure r) | |
31f41a5d | 713 | Right (txt, p') -> |
714 | if (T.null txt) | |
91727d11 | 715 | then go0 p' |
716 | else return $ PP.Free $ do | |
31f41a5d | 717 | p'' <- span (not . predicate) (yield txt >> p') |
91727d11 | 718 | return $ PP.FreeT (go1 p'') |
719 | go1 p = do | |
720 | x <- nextChar p | |
721 | return $ case x of | |
722 | Left r -> PP.Pure r | |
723 | Right (_, p') -> PP.Free $ do | |
724 | p'' <- span (not . predicate) p' | |
725 | return $ PP.FreeT (go1 p'') | |
726 | {-# INLINABLE splitWith #-} | |
727 | ||
31f41a5d | 728 | -- | Split a text stream using the given 'Char' as the delimiter |
91727d11 | 729 | split :: (Monad m) |
730 | => Char | |
731 | -> Producer Text m r | |
732 | -> FreeT (Producer Text m) m r | |
31f41a5d | 733 | split c = splitWith (c ==) |
91727d11 | 734 | {-# INLINABLE split #-} |
735 | ||
62e8521c | 736 | {-| Group a text stream into 'FreeT'-delimited text streams using the supplied |
91727d11 | 737 | equality predicate |
738 | -} | |
739 | groupBy | |
740 | :: (Monad m) | |
741 | => (Char -> Char -> Bool) | |
742 | -> Producer Text m r | |
743 | -> FreeT (Producer Text m) m r | |
744 | groupBy equal p0 = PP.FreeT (go p0) | |
745 | where | |
746 | go p = do | |
747 | x <- next p | |
748 | case x of | |
749 | Left r -> return (PP.Pure r) | |
31f41a5d | 750 | Right (txt, p') -> case (T.uncons txt) of |
91727d11 | 751 | Nothing -> go p' |
31f41a5d | 752 | Just (c, _) -> do |
91727d11 | 753 | return $ PP.Free $ do |
31f41a5d | 754 | p'' <- span (equal c) (yield txt >> p') |
91727d11 | 755 | return $ PP.FreeT (go p'') |
756 | {-# INLINABLE groupBy #-} | |
757 | ||
62e8521c | 758 | -- | Group a text stream into 'FreeT'-delimited text streams of identical characters |
91727d11 | 759 | group |
760 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | |
761 | group = groupBy (==) | |
762 | {-# INLINABLE group #-} | |
763 | ||
62e8521c | 764 | {-| Split a text stream into 'FreeT'-delimited lines |
91727d11 | 765 | -} |
766 | lines | |
767 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | |
768 | lines p0 = PP.FreeT (go0 p0) | |
769 | where | |
770 | go0 p = do | |
771 | x <- next p | |
772 | case x of | |
773 | Left r -> return (PP.Pure r) | |
31f41a5d | 774 | Right (txt, p') -> |
775 | if (T.null txt) | |
91727d11 | 776 | then go0 p' |
31f41a5d | 777 | else return $ PP.Free $ go1 (yield txt >> p') |
91727d11 | 778 | go1 p = do |
779 | p' <- break ('\n' ==) p | |
b4d21c02 | 780 | return $ PP.FreeT $ do |
781 | x <- nextChar p' | |
782 | case x of | |
783 | Left r -> return $ PP.Pure r | |
784 | Right (_, p'') -> go0 p'' | |
91727d11 | 785 | {-# INLINABLE lines #-} |
91727d11 | 786 | |
31f41a5d | 787 | |
788 | ||
789 | -- | Split a text stream into 'FreeT'-delimited words | |
91727d11 | 790 | words |
791 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | |
cf10d6f1 | 792 | words = go |
91727d11 | 793 | where |
cf10d6f1 | 794 | go p = PP.FreeT $ do |
795 | x <- next (p >-> dropWhile isSpace) | |
796 | return $ case x of | |
797 | Left r -> PP.Pure r | |
798 | Right (bs, p') -> PP.Free $ do | |
799 | p'' <- break isSpace (yield bs >> p') | |
800 | return (go p'') | |
91727d11 | 801 | {-# INLINABLE words #-} |
802 | ||
cf10d6f1 | 803 | |
62e8521c | 804 | -- | Intersperse a 'Char' in between the characters of the text stream |
91727d11 | 805 | intersperse |
806 | :: (Monad m) => Char -> Producer Text m r -> Producer Text m r | |
31f41a5d | 807 | intersperse c = go0 |
91727d11 | 808 | where |
809 | go0 p = do | |
810 | x <- lift (next p) | |
811 | case x of | |
812 | Left r -> return r | |
31f41a5d | 813 | Right (txt, p') -> do |
814 | yield (T.intersperse c txt) | |
91727d11 | 815 | go1 p' |
816 | go1 p = do | |
817 | x <- lift (next p) | |
818 | case x of | |
819 | Left r -> return r | |
31f41a5d | 820 | Right (txt, p') -> do |
821 | yield (T.singleton c) | |
822 | yield (T.intersperse c txt) | |
91727d11 | 823 | go1 p' |
824 | {-# INLINABLE intersperse #-} | |
825 | ||
31f41a5d | 826 | {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after |
827 | interspersing a text stream in between them | |
91727d11 | 828 | -} |
829 | intercalate | |
830 | :: (Monad m) | |
831 | => Producer Text m () | |
832 | -> FreeT (Producer Text m) m r | |
833 | -> Producer Text m r | |
834 | intercalate p0 = go0 | |
835 | where | |
836 | go0 f = do | |
837 | x <- lift (PP.runFreeT f) | |
838 | case x of | |
839 | PP.Pure r -> return r | |
840 | PP.Free p -> do | |
841 | f' <- p | |
842 | go1 f' | |
843 | go1 f = do | |
844 | x <- lift (PP.runFreeT f) | |
845 | case x of | |
846 | PP.Pure r -> return r | |
847 | PP.Free p -> do | |
848 | p0 | |
849 | f' <- p | |
850 | go1 f' | |
851 | {-# INLINABLE intercalate #-} | |
852 | ||
62e8521c | 853 | {-| Join 'FreeT'-delimited lines into a text stream |
91727d11 | 854 | -} |
855 | unlines | |
856 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | |
857 | unlines = go | |
858 | where | |
859 | go f = do | |
860 | x <- lift (PP.runFreeT f) | |
861 | case x of | |
862 | PP.Pure r -> return r | |
863 | PP.Free p -> do | |
864 | f' <- p | |
865 | yield $ T.singleton '\n' | |
866 | go f' | |
867 | {-# INLINABLE unlines #-} | |
868 | ||
31f41a5d | 869 | {-| Join 'FreeT'-delimited words into a text stream |
91727d11 | 870 | -} |
871 | unwords | |
872 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | |
873 | unwords = intercalate (yield $ T.pack " ") | |
874 | {-# INLINABLE unwords #-} | |
875 | ||
876 | {- $parse | |
31f41a5d | 877 | The following parsing utilities are single-character analogs of the ones found |
878 | @pipes-parse@. | |
91727d11 | 879 | -} |
880 | ||
91727d11 | 881 | {- $reexports |
31f41a5d | 882 | @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'. |
91727d11 | 883 | |
884 | @Data.Text@ re-exports the 'Text' type. | |
885 | ||
91727d11 | 886 | @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type). |
64e03122 | 887 | -} |
888 | ||
889 | ||
890 | ||
891 | decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) | |
892 | -- decode codec = go B.empty where | |
893 | -- go extra p0 = | |
894 | -- do x <- lift (next p0) | |
895 | -- case x of Right (chunk, p) -> | |
896 | -- do let (text, stuff) = codecDecode codec (B.append extra chunk) | |
897 | -- yield text | |
898 | -- case stuff of Right extra' -> go extra' p | |
899 | -- Left (exc,bs) -> do yield text | |
900 | -- return (do yield bs | |
901 | -- p) | |
902 | -- Left r -> return (do yield extra | |
903 | -- return r) | |
904 | ||
905 | decode d p0 = case d of | |
906 | PE.Other txt bad -> do yield txt | |
907 | return (do yield bad | |
908 | p0) | |
909 | PE.Some txt extra dec -> do yield txt | |
910 | x <- lift (next p0) | |
911 | case x of Left r -> return (do yield extra | |
912 | return r) | |
913 | Right (chunk,p1) -> decode (dec chunk) p1 | |
914 | ||
915 | -- go !carry dec0 p = do | |
916 | -- x <- lift (next p) | |
917 | -- case x of Left r -> if B.null carry | |
918 | -- then return (return r) -- all bytestrinput was consumed | |
919 | -- else return (do yield carry -- a potentially valid fragment remains | |
920 | -- return r) | |
921 | -- | |
922 | -- Right (chunk, p') -> case dec0 chunk of | |
923 | -- PE.Some text carry2 dec -> do yield text | |
924 | -- go carry2 dec p' | |
925 | -- PE.Other text bs -> do yield text | |
926 | -- return (do yield bs -- an invalid blob remains | |
927 | -- p') | |
928 | -- {-# INLINABLE decodeUtf8 #-} |