]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
renamed fold foldChars and began updating documentation
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-}
2 #if __GLASGOW_HASKELL__ >= 702
3 {-# LANGUAGE Trustworthy #-}
4 #endif
5 {-| This module provides @pipes@ utilities for \"text streams\", which are
6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
10
11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
12 example, the following program copies a document from one file to another:
13
14 > import Pipes
15 > import qualified Data.Text.Pipes as Text
16 > import System.IO
17 >
18 > main =
19 > withFile "inFile.txt" ReadMode $ \hIn ->
20 > withFile "outFile.txt" WriteMode $ \hOut ->
21 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
24
25 > import Pipes
26 > import qualified Data.Text.Pipes as Text
27 > import Pipes.Safe
28 >
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
30
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' proxies, as with the following \"echo\" program:
33
34 > main = runEffect $ Text.stdin >-> Text.stdout
35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
39
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 might write:
44
45 > import Pipes
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
48 >
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
50 > where
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
52
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
61 -}
62
63 module Pipes.Text (
64 -- * Producers
65 fromLazy,
66 stdin,
67 fromHandle,
68 readFile,
69 stdinLn,
70
71 -- * Consumers
72 stdout,
73 stdoutLn,
74 toHandle,
75 writeFile,
76
77 -- * Pipes
78 map,
79 concatMap,
80 take,
81 drop,
82 takeWhile,
83 dropWhile,
84 filter,
85 scan,
86 encodeUtf8,
87 pack,
88 unpack,
89 toCaseFold,
90 toLower,
91 toUpper,
92 stripStart,
93
94 -- * Folds
95 toLazy,
96 toLazyM,
97 foldChars,
98 head,
99 last,
100 null,
101 length,
102 any,
103 all,
104 maximum,
105 minimum,
106 find,
107 index,
108 count,
109
110 -- * Splitters
111 splitAt,
112 chunksOf,
113 span,
114 break,
115 splitWith,
116 split,
117 groupBy,
118 group,
119 lines,
120 words,
121 decodeUtf8,
122 decode,
123 -- * Transformations
124 intersperse,
125
126 -- * Joiners
127 intercalate,
128 unlines,
129 unwords,
130
131 -- * Character Parsers
132 -- $parse
133 nextChar,
134 drawChar,
135 unDrawChar,
136 peekChar,
137 isEndOfChars,
138
139 -- * Re-exports
140 -- $reexports
141 module Data.Text,
142 module Pipes.Parse
143 ) where
144
145 import Control.Exception (throwIO, try)
146 import Control.Monad (liftM, unless, join)
147 import Control.Monad.Trans.State.Strict (StateT(..))
148 import Data.Monoid ((<>))
149 import qualified Data.Text as T
150 import qualified Data.Text.IO as T
151 import qualified Data.Text.Encoding as TE
152 import qualified Data.Text.Encoding.Error as TE
153 import Data.Text (Text)
154 import qualified Data.Text.Lazy as TL
155 import qualified Data.Text.Lazy.IO as TL
156 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
157 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
158 import Data.ByteString (ByteString)
159 import qualified Data.ByteString as B
160 import Data.Char (ord, isSpace)
161 import Data.Functor.Identity (Identity)
162 import qualified Data.List as List
163 import Foreign.C.Error (Errno(Errno), ePIPE)
164 import qualified GHC.IO.Exception as G
165 import Pipes
166 import qualified Pipes.ByteString as PB
167 import qualified Pipes.ByteString as PBP
168 import qualified Pipes.Text.Internal as PE
169 import Pipes.Text.Internal (Codec(..))
170 import Pipes.Text.Parse (
171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
172 import Pipes.Core (respond, Server')
173 import qualified Pipes.Parse as PP
174 import Pipes.Parse ( FreeT)
175 import qualified Pipes.Safe.Prelude as Safe
176 import qualified Pipes.Safe as Safe
177 import Pipes.Safe (MonadSafe(..), Base(..))
178 import qualified Pipes.Prelude as P
179 import qualified System.IO as IO
180 import Data.Char (isSpace)
181 import Data.Word (Word8)
182 import Prelude hiding (
183 all,
184 any,
185 break,
186 concat,
187 concatMap,
188 drop,
189 dropWhile,
190 elem,
191 filter,
192 head,
193 last,
194 lines,
195 length,
196 map,
197 maximum,
198 minimum,
199 notElem,
200 null,
201 readFile,
202 span,
203 splitAt,
204 take,
205 takeWhile,
206 unlines,
207 unwords,
208 words,
209 writeFile )
210
211 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
212 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
213 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
214 {-# INLINE fromLazy #-}
215
216 -- | Stream text from 'stdin'
217 stdin :: MonadIO m => Producer Text m ()
218 stdin = fromHandle IO.stdin
219 {-# INLINE stdin #-}
220
221 {-| Convert a 'IO.Handle' into a text stream using a text size
222 determined by the good sense of the text library; note that this
223 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
224 but uses the system encoding and has other `Data.Text.IO` features
225 -}
226
227 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
228 fromHandle h = go where
229 go = do txt <- liftIO (T.hGetChunk h)
230 unless (T.null txt) $ do yield txt
231 go
232 {-# INLINABLE fromHandle#-}
233
234
235 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
236
237 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
238 MAIN = PUTSTRLN "HELLO WORLD"
239 -}
240
241 readFile :: MonadSafe m => FilePath -> Producer Text m ()
242 readFile file = Safe.withFile file IO.ReadMode fromHandle
243 {-# INLINE readFile #-}
244
245 {-| Stream lines of text from stdin (for testing in ghci etc.)
246
247 >>> let safely = runSafeT . runEffect
248 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
249 hello
250 5
251 world
252 5
253
254 -}
255 stdinLn :: MonadIO m => Producer' Text m ()
256 stdinLn = go where
257 go = do
258 eof <- liftIO (IO.hIsEOF IO.stdin)
259 unless eof $ do
260 txt <- liftIO (T.hGetLine IO.stdin)
261 yield txt
262 go
263 {-# INLINABLE stdinLn #-}
264
265 {-| Stream text to 'stdout'
266
267 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
268
269 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
270 @(source >-> stdout)@ in suitable cases.
271 -}
272 stdout :: MonadIO m => Consumer' Text m ()
273 stdout = go
274 where
275 go = do
276 txt <- await
277 x <- liftIO $ try (T.putStr txt)
278 case x of
279 Left (G.IOError { G.ioe_type = G.ResourceVanished
280 , G.ioe_errno = Just ioe })
281 | Errno ioe == ePIPE
282 -> return ()
283 Left e -> liftIO (throwIO e)
284 Right () -> go
285 {-# INLINABLE stdout #-}
286
287 stdoutLn :: (MonadIO m) => Consumer' Text m ()
288 stdoutLn = go
289 where
290 go = do
291 str <- await
292 x <- liftIO $ try (T.putStrLn str)
293 case x of
294 Left (G.IOError { G.ioe_type = G.ResourceVanished
295 , G.ioe_errno = Just ioe })
296 | Errno ioe == ePIPE
297 -> return ()
298 Left e -> liftIO (throwIO e)
299 Right () -> go
300 {-# INLINABLE stdoutLn #-}
301
302 {-| Convert a text stream into a 'Handle'
303
304 Note: again, for best performance, where possible use
305 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
306 -}
307 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
308 toHandle h = for cat (liftIO . T.hPutStr h)
309 {-# INLINABLE toHandle #-}
310
311 {-# RULES "p >-> toHandle h" forall p h .
312 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
313 #-}
314
315
316 -- | Stream text into a file. Uses @pipes-safe@.
317 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
318 writeFile file = Safe.withFile file IO.WriteMode toHandle
319 {-# INLINE writeFile #-}
320
321 -- | Apply a transformation to each 'Char' in the stream
322 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
323 map f = P.map (T.map f)
324 {-# INLINABLE map #-}
325
326 {-# RULES "p >-> map f" forall p f .
327 p >-> map f = for p (\txt -> yield (T.map f txt))
328 #-}
329
330 -- | Map a function over the characters of a text stream and concatenate the results
331 concatMap
332 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
333 concatMap f = P.map (T.concatMap f)
334 {-# INLINABLE concatMap #-}
335
336 {-# RULES "p >-> concatMap f" forall p f .
337 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
338 #-}
339
340 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
341 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
342 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
343 encodeUtf8 :: Monad m => Pipe Text ByteString m r
344 encodeUtf8 = P.map TE.encodeUtf8
345 {-# INLINEABLE encodeUtf8 #-}
346
347 {-# RULES "p >-> encodeUtf8" forall p .
348 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
349 #-}
350
351 -- | Transform a Pipe of 'String's into one of 'Text' chunks
352 pack :: Monad m => Pipe String Text m r
353 pack = P.map T.pack
354 {-# INLINEABLE pack #-}
355
356 {-# RULES "p >-> pack" forall p .
357 p >-> pack = for p (\txt -> yield (T.pack txt))
358 #-}
359
360 -- | Transform a Pipes of 'Text' chunks into one of 'String's
361 unpack :: Monad m => Pipe Text String m r
362 unpack = for cat (\t -> yield (T.unpack t))
363 {-# INLINEABLE unpack #-}
364
365 {-# RULES "p >-> unpack" forall p .
366 p >-> unpack = for p (\txt -> yield (T.unpack txt))
367 #-}
368
369 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
370 -- here acting on a 'Text' pipe, rather as they would on a lazy text
371 toCaseFold :: Monad m => Pipe Text Text m ()
372 toCaseFold = P.map T.toCaseFold
373 {-# INLINEABLE toCaseFold #-}
374
375 {-# RULES "p >-> toCaseFold" forall p .
376 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
377 #-}
378
379
380 -- | lowercase incoming 'Text'
381 toLower :: Monad m => Pipe Text Text m ()
382 toLower = P.map T.toLower
383 {-# INLINEABLE toLower #-}
384
385 {-# RULES "p >-> toLower" forall p .
386 p >-> toLower = for p (\txt -> yield (T.toLower txt))
387 #-}
388
389 -- | uppercase incoming 'Text'
390 toUpper :: Monad m => Pipe Text Text m ()
391 toUpper = P.map T.toUpper
392 {-# INLINEABLE toUpper #-}
393
394 {-# RULES "p >-> toUpper" forall p .
395 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
396 #-}
397
398 -- | Remove leading white space from an incoming succession of 'Text's
399 stripStart :: Monad m => Pipe Text Text m r
400 stripStart = do
401 chunk <- await
402 let text = T.stripStart chunk
403 if T.null text
404 then stripStart
405 else cat
406 {-# INLINEABLE stripStart #-}
407
408 -- | @(take n)@ only allows @n@ individual characters to pass;
409 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
410 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
411 take n0 = go n0 where
412 go n
413 | n <= 0 = return ()
414 | otherwise = do
415 txt <- await
416 let len = fromIntegral (T.length txt)
417 if (len > n)
418 then yield (T.take (fromIntegral n) txt)
419 else do
420 yield txt
421 go (n - len)
422 {-# INLINABLE take #-}
423
424 -- | @(drop n)@ drops the first @n@ characters
425 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
426 drop n0 = go n0 where
427 go n
428 | n <= 0 = cat
429 | otherwise = do
430 txt <- await
431 let len = fromIntegral (T.length txt)
432 if (len >= n)
433 then do
434 yield (T.drop (fromIntegral n) txt)
435 cat
436 else go (n - len)
437 {-# INLINABLE drop #-}
438
439 -- | Take characters until they fail the predicate
440 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
441 takeWhile predicate = go
442 where
443 go = do
444 txt <- await
445 let (prefix, suffix) = T.span predicate txt
446 if (T.null suffix)
447 then do
448 yield txt
449 go
450 else yield prefix
451 {-# INLINABLE takeWhile #-}
452
453 -- | Drop characters until they fail the predicate
454 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
455 dropWhile predicate = go where
456 go = do
457 txt <- await
458 case T.findIndex (not . predicate) txt of
459 Nothing -> go
460 Just i -> do
461 yield (T.drop i txt)
462 cat
463 {-# INLINABLE dropWhile #-}
464
465 -- | Only allows 'Char's to pass if they satisfy the predicate
466 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
467 filter predicate = P.map (T.filter predicate)
468 {-# INLINABLE filter #-}
469
470 {-# RULES "p >-> filter q" forall p q .
471 p >-> filter q = for p (\txt -> yield (T.filter q txt))
472 #-}
473
474 -- | Strict left scan over the characters
475 scan
476 :: (Monad m)
477 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
478 scan step begin = go begin
479 where
480 go c = do
481 txt <- await
482 let txt' = T.scanl step c txt
483 c' = T.last txt'
484 yield txt'
485 go c'
486 {-# INLINABLE scan #-}
487
488 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
489 'TL.Text'
490 -}
491 toLazy :: Producer Text Identity () -> TL.Text
492 toLazy = TL.fromChunks . P.toList
493 {-# INLINABLE toLazy #-}
494
495 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
496 'TL.Text'
497
498 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
499 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
500 immediately as they are generated instead of loading them all into memory.
501 -}
502 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
503 toLazyM = liftM TL.fromChunks . P.toListM
504 {-# INLINABLE toLazyM #-}
505
506 -- | Reduce the text stream using a strict left fold over characters
507 foldChars
508 :: Monad m
509 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
510 foldChars step begin done = P.fold (T.foldl' step) begin done
511 {-# INLINABLE fold #-}
512
513 -- | Retrieve the first 'Char'
514 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
515 head = go
516 where
517 go p = do
518 x <- nextChar p
519 case x of
520 Left _ -> return Nothing
521 Right (c, _) -> return (Just c)
522 {-# INLINABLE head #-}
523
524 -- | Retrieve the last 'Char'
525 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
526 last = go Nothing
527 where
528 go r p = do
529 x <- next p
530 case x of
531 Left () -> return r
532 Right (txt, p') ->
533 if (T.null txt)
534 then go r p'
535 else go (Just $ T.last txt) p'
536 {-# INLINABLE last #-}
537
538 -- | Determine if the stream is empty
539 null :: (Monad m) => Producer Text m () -> m Bool
540 null = P.all T.null
541 {-# INLINABLE null #-}
542
543 -- | Count the number of characters in the stream
544 length :: (Monad m, Num n) => Producer Text m () -> m n
545 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
546 {-# INLINABLE length #-}
547
548 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
549 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
550 any predicate = P.any (T.any predicate)
551 {-# INLINABLE any #-}
552
553 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
554 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
555 all predicate = P.all (T.all predicate)
556 {-# INLINABLE all #-}
557
558 -- | Return the maximum 'Char' within a text stream
559 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
560 maximum = P.fold step Nothing id
561 where
562 step mc txt =
563 if (T.null txt)
564 then mc
565 else Just $ case mc of
566 Nothing -> T.maximum txt
567 Just c -> max c (T.maximum txt)
568 {-# INLINABLE maximum #-}
569
570 -- | Return the minimum 'Char' within a text stream (surely very useful!)
571 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
572 minimum = P.fold step Nothing id
573 where
574 step mc txt =
575 if (T.null txt)
576 then mc
577 else case mc of
578 Nothing -> Just (T.minimum txt)
579 Just c -> Just (min c (T.minimum txt))
580 {-# INLINABLE minimum #-}
581
582 -- | Find the first element in the stream that matches the predicate
583 find
584 :: (Monad m)
585 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
586 find predicate p = head (p >-> filter predicate)
587 {-# INLINABLE find #-}
588
589 -- | Index into a text stream
590 index
591 :: (Monad m, Integral a)
592 => a-> Producer Text m () -> m (Maybe Char)
593 index n p = head (p >-> drop n)
594 {-# INLINABLE index #-}
595
596
597 -- | Store a tally of how many segments match the given 'Text'
598 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
599 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
600 {-# INLINABLE count #-}
601
602 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
603 -- returning a Pipe of ByteStrings that begins at the point of failure.
604
605 decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
606 decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
607 go !carry dec0 p = do
608 x <- lift (next p)
609 case x of Left r -> if B.null carry
610 then return (return r) -- all bytestrinput was consumed
611 else return (do yield carry -- a potentially valid fragment remains
612 return r)
613
614 Right (chunk, p') -> case dec0 chunk of
615 PE.Some text carry2 dec -> do yield text
616 go carry2 dec p'
617 PE.Other text bs -> do yield text
618 return (do yield bs -- an invalid blob remains
619 p')
620 {-# INLINABLE decodeUtf8 #-}
621
622
623 -- | Splits a 'Producer' after the given number of characters
624 splitAt
625 :: (Monad m, Integral n)
626 => n
627 -> Producer Text m r
628 -> Producer' Text m (Producer Text m r)
629 splitAt = go
630 where
631 go 0 p = return p
632 go n p = do
633 x <- lift (next p)
634 case x of
635 Left r -> return (return r)
636 Right (txt, p') -> do
637 let len = fromIntegral (T.length txt)
638 if (len <= n)
639 then do
640 yield txt
641 go (n - len) p'
642 else do
643 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
644 yield prefix
645 return (yield suffix >> p')
646 {-# INLINABLE splitAt #-}
647
648 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
649 chunksOf
650 :: (Monad m, Integral n)
651 => n -> Producer Text m r -> FreeT (Producer Text m) m r
652 chunksOf n p0 = PP.FreeT (go p0)
653 where
654 go p = do
655 x <- next p
656 return $ case x of
657 Left r -> PP.Pure r
658 Right (txt, p') -> PP.Free $ do
659 p'' <- splitAt n (yield txt >> p')
660 return $ PP.FreeT (go p'')
661 {-# INLINABLE chunksOf #-}
662
663 {-| Split a text stream in two, where the first text stream is the longest
664 consecutive group of text that satisfy the predicate
665 -}
666 span
667 :: (Monad m)
668 => (Char -> Bool)
669 -> Producer Text m r
670 -> Producer' Text m (Producer Text m r)
671 span predicate = go
672 where
673 go p = do
674 x <- lift (next p)
675 case x of
676 Left r -> return (return r)
677 Right (txt, p') -> do
678 let (prefix, suffix) = T.span predicate txt
679 if (T.null suffix)
680 then do
681 yield txt
682 go p'
683 else do
684 yield prefix
685 return (yield suffix >> p')
686 {-# INLINABLE span #-}
687
688 {-| Split a text stream in two, where the first text stream is the longest
689 consecutive group of characters that don't satisfy the predicate
690 -}
691 break
692 :: (Monad m)
693 => (Char -> Bool)
694 -> Producer Text m r
695 -> Producer Text m (Producer Text m r)
696 break predicate = span (not . predicate)
697 {-# INLINABLE break #-}
698
699 {-| Split a text stream into sub-streams delimited by characters that satisfy the
700 predicate
701 -}
702 splitWith
703 :: (Monad m)
704 => (Char -> Bool)
705 -> Producer Text m r
706 -> PP.FreeT (Producer Text m) m r
707 splitWith predicate p0 = PP.FreeT (go0 p0)
708 where
709 go0 p = do
710 x <- next p
711 case x of
712 Left r -> return (PP.Pure r)
713 Right (txt, p') ->
714 if (T.null txt)
715 then go0 p'
716 else return $ PP.Free $ do
717 p'' <- span (not . predicate) (yield txt >> p')
718 return $ PP.FreeT (go1 p'')
719 go1 p = do
720 x <- nextChar p
721 return $ case x of
722 Left r -> PP.Pure r
723 Right (_, p') -> PP.Free $ do
724 p'' <- span (not . predicate) p'
725 return $ PP.FreeT (go1 p'')
726 {-# INLINABLE splitWith #-}
727
728 -- | Split a text stream using the given 'Char' as the delimiter
729 split :: (Monad m)
730 => Char
731 -> Producer Text m r
732 -> FreeT (Producer Text m) m r
733 split c = splitWith (c ==)
734 {-# INLINABLE split #-}
735
736 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
737 equality predicate
738 -}
739 groupBy
740 :: (Monad m)
741 => (Char -> Char -> Bool)
742 -> Producer Text m r
743 -> FreeT (Producer Text m) m r
744 groupBy equal p0 = PP.FreeT (go p0)
745 where
746 go p = do
747 x <- next p
748 case x of
749 Left r -> return (PP.Pure r)
750 Right (txt, p') -> case (T.uncons txt) of
751 Nothing -> go p'
752 Just (c, _) -> do
753 return $ PP.Free $ do
754 p'' <- span (equal c) (yield txt >> p')
755 return $ PP.FreeT (go p'')
756 {-# INLINABLE groupBy #-}
757
758 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
759 group
760 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
761 group = groupBy (==)
762 {-# INLINABLE group #-}
763
764 {-| Split a text stream into 'FreeT'-delimited lines
765 -}
766 lines
767 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
768 lines p0 = PP.FreeT (go0 p0)
769 where
770 go0 p = do
771 x <- next p
772 case x of
773 Left r -> return (PP.Pure r)
774 Right (txt, p') ->
775 if (T.null txt)
776 then go0 p'
777 else return $ PP.Free $ go1 (yield txt >> p')
778 go1 p = do
779 p' <- break ('\n' ==) p
780 return $ PP.FreeT $ do
781 x <- nextChar p'
782 case x of
783 Left r -> return $ PP.Pure r
784 Right (_, p'') -> go0 p''
785 {-# INLINABLE lines #-}
786
787
788
789 -- | Split a text stream into 'FreeT'-delimited words
790 words
791 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
792 words = go
793 where
794 go p = PP.FreeT $ do
795 x <- next (p >-> dropWhile isSpace)
796 return $ case x of
797 Left r -> PP.Pure r
798 Right (bs, p') -> PP.Free $ do
799 p'' <- break isSpace (yield bs >> p')
800 return (go p'')
801 {-# INLINABLE words #-}
802
803
804 -- | Intersperse a 'Char' in between the characters of the text stream
805 intersperse
806 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
807 intersperse c = go0
808 where
809 go0 p = do
810 x <- lift (next p)
811 case x of
812 Left r -> return r
813 Right (txt, p') -> do
814 yield (T.intersperse c txt)
815 go1 p'
816 go1 p = do
817 x <- lift (next p)
818 case x of
819 Left r -> return r
820 Right (txt, p') -> do
821 yield (T.singleton c)
822 yield (T.intersperse c txt)
823 go1 p'
824 {-# INLINABLE intersperse #-}
825
826 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
827 interspersing a text stream in between them
828 -}
829 intercalate
830 :: (Monad m)
831 => Producer Text m ()
832 -> FreeT (Producer Text m) m r
833 -> Producer Text m r
834 intercalate p0 = go0
835 where
836 go0 f = do
837 x <- lift (PP.runFreeT f)
838 case x of
839 PP.Pure r -> return r
840 PP.Free p -> do
841 f' <- p
842 go1 f'
843 go1 f = do
844 x <- lift (PP.runFreeT f)
845 case x of
846 PP.Pure r -> return r
847 PP.Free p -> do
848 p0
849 f' <- p
850 go1 f'
851 {-# INLINABLE intercalate #-}
852
853 {-| Join 'FreeT'-delimited lines into a text stream
854 -}
855 unlines
856 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
857 unlines = go
858 where
859 go f = do
860 x <- lift (PP.runFreeT f)
861 case x of
862 PP.Pure r -> return r
863 PP.Free p -> do
864 f' <- p
865 yield $ T.singleton '\n'
866 go f'
867 {-# INLINABLE unlines #-}
868
869 {-| Join 'FreeT'-delimited words into a text stream
870 -}
871 unwords
872 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
873 unwords = intercalate (yield $ T.pack " ")
874 {-# INLINABLE unwords #-}
875
876 {- $parse
877 The following parsing utilities are single-character analogs of the ones found
878 @pipes-parse@.
879 -}
880
881 {- $reexports
882 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
883
884 @Data.Text@ re-exports the 'Text' type.
885
886 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
887 -}
888
889
890
891 decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
892 -- decode codec = go B.empty where
893 -- go extra p0 =
894 -- do x <- lift (next p0)
895 -- case x of Right (chunk, p) ->
896 -- do let (text, stuff) = codecDecode codec (B.append extra chunk)
897 -- yield text
898 -- case stuff of Right extra' -> go extra' p
899 -- Left (exc,bs) -> do yield text
900 -- return (do yield bs
901 -- p)
902 -- Left r -> return (do yield extra
903 -- return r)
904
905 decode d p0 = case d of
906 PE.Other txt bad -> do yield txt
907 return (do yield bad
908 p0)
909 PE.Some txt extra dec -> do yield txt
910 x <- lift (next p0)
911 case x of Left r -> return (do yield extra
912 return r)
913 Right (chunk,p1) -> decode (dec chunk) p1
914
915 -- go !carry dec0 p = do
916 -- x <- lift (next p)
917 -- case x of Left r -> if B.null carry
918 -- then return (return r) -- all bytestrinput was consumed
919 -- else return (do yield carry -- a potentially valid fragment remains
920 -- return r)
921 --
922 -- Right (chunk, p') -> case dec0 chunk of
923 -- PE.Some text carry2 dec -> do yield text
924 -- go carry2 dec p'
925 -- PE.Other text bs -> do yield text
926 -- return (do yield bs -- an invalid blob remains
927 -- p')
928 -- {-# INLINABLE decodeUtf8 #-}