]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
8221c01ff160336f29009ee67ccdb904d9bc8bd3
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
8
9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
10 example, the following program copies a document from one file to another:
11
12 > import Pipes
13 > import qualified Pipes.Text as Text
14 > import qualified Pipes.Text.IO as Text
15 > import System.IO
16 >
17 > main =
18 > withFile "inFile.txt" ReadMode $ \hIn ->
19 > withFile "outFile.txt" WriteMode $ \hOut ->
20 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
21
22 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
23
24 > import Pipes
25 > import qualified Pipes.Text as Text
26 > import qualified Pipes.Text.IO as Text
27 > import Pipes.Safe
28 >
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
30
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' pipes, as with the following \"echo\" program:
33
34 > main = runEffect $ Text.stdin >-> Text.stdout
35
36 You can also translate pure lazy 'TL.Text's to and from pipes:
37
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
39
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 might write:
44
45 > import Pipes
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
48 >
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
50 > where
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
52
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
61 -}
62
63 module Pipes.Text (
64 -- * Producers
65 fromLazy
66 -- , stdin
67 -- , fromHandle
68 -- , readFile
69
70 -- * Consumers
71 -- , stdout
72 -- , toHandle
73 -- , writeFile
74
75 -- * Pipes
76 , map
77 , concatMap
78 , take
79 , drop
80 , takeWhile
81 , dropWhile
82 , filter
83 , scan
84 -- , encodeUtf8
85 , pack
86 , unpack
87 , toCaseFold
88 , toLower
89 , toUpper
90 , stripStart
91
92 -- * Folds
93 , toLazy
94 , toLazyM
95 , foldChars
96 , head
97 , last
98 , null
99 , length
100 , any
101 , all
102 , maximum
103 , minimum
104 , find
105 , index
106 , count
107
108 -- * Primitive Character Parsers
109 -- $parse
110 , nextChar
111 , drawChar
112 , unDrawChar
113 , peekChar
114 , isEndOfChars
115
116 -- * Parsing Lenses
117 , splitAt
118 , span
119 , break
120 , groupBy
121 , group
122 , word
123 , line
124
125 -- -- * Decoding Lenses
126 -- , decodeUtf8
127 -- , codec
128 --
129 -- -- * Codecs
130 -- , utf8
131 -- , utf16_le
132 -- , utf16_be
133 -- , utf32_le
134 -- , utf32_be
135 --
136 -- -- * Other Decoding/Encoding Functions
137 -- , decodeIso8859_1
138 -- , decodeAscii
139 -- , encodeIso8859_1
140 -- , encodeAscii
141
142 -- * FreeT Splitters
143 , chunksOf
144 , splitsWith
145 , splits
146 -- , groupsBy
147 -- , groups
148 , lines
149 , words
150
151 -- * Transformations
152 , intersperse
153 , packChars
154
155 -- * Joiners
156 , intercalate
157 , unlines
158 , unwords
159
160 -- * Re-exports
161 -- $reexports
162 -- , DecodeResult(..)
163 -- , Codec
164 -- , TextException(..)
165 , module Data.ByteString
166 , module Data.Text
167 , module Data.Profunctor
168 , module Data.Word
169 , module Pipes.Parse
170 , module Pipes.Group
171 ) where
172
173 import Control.Applicative ((<*))
174 import Control.Monad (liftM, unless, join)
175 import Control.Monad.Trans.State.Strict (StateT(..), modify)
176 import Data.Monoid ((<>))
177 import qualified Data.Text as T
178 import qualified Data.Text.IO as T
179 import qualified Data.Text.Encoding as TE
180 import qualified Data.Text.Encoding.Error as TE
181 import Data.Text (Text)
182 import qualified Data.Text.Lazy as TL
183 import qualified Data.Text.Lazy.IO as TL
184 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
185 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
186 import Data.ByteString (ByteString)
187 import qualified Data.ByteString as B
188 import qualified Data.ByteString.Char8 as B8
189 import Data.Char (ord, isSpace)
190 import Data.Functor.Constant (Constant(Constant, getConstant))
191 import Data.Functor.Identity (Identity)
192 import Data.Profunctor (Profunctor)
193 import qualified Data.Profunctor
194 import qualified Data.List as List
195 import Pipes
196 import qualified Pipes.ByteString as PB
197 -- import Pipes.Text.Decoding
198 import Pipes.Core (respond, Server')
199 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
200 import qualified Pipes.Group as PG
201 import qualified Pipes.Parse as PP
202 import Pipes.Parse (Parser)
203
204 import qualified Pipes.Prelude as P
205 import qualified System.IO as IO
206 import Data.Char (isSpace)
207 import Data.Word (Word8)
208 import Data.Text.StreamDecoding
209
210 import Prelude hiding (
211 all,
212 any,
213 break,
214 concat,
215 concatMap,
216 drop,
217 dropWhile,
218 elem,
219 filter,
220 head,
221 last,
222 lines,
223 length,
224 map,
225 maximum,
226 minimum,
227 notElem,
228 null,
229 readFile,
230 span,
231 splitAt,
232 take,
233 takeWhile,
234 unlines,
235 unwords,
236 words,
237 writeFile )
238
239 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
240 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
241 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
242 {-# INLINE fromLazy #-}
243
244
245 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
246
247 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
248
249 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
250 a ^. lens = getConstant (lens Constant a)
251
252
253 -- | Apply a transformation to each 'Char' in the stream
254 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
255 map f = P.map (T.map f)
256 {-# INLINABLE map #-}
257
258 {-# RULES "p >-> map f" forall p f .
259 p >-> map f = for p (\txt -> yield (T.map f txt))
260 #-}
261
262 -- | Map a function over the characters of a text stream and concatenate the results
263 concatMap
264 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
265 concatMap f = P.map (T.concatMap f)
266 {-# INLINABLE concatMap #-}
267
268 {-# RULES "p >-> concatMap f" forall p f .
269 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
270 #-}
271
272 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
273 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
274 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
275 encodeUtf8 :: Monad m => Pipe Text ByteString m r
276 encodeUtf8 = P.map TE.encodeUtf8
277 {-# INLINEABLE encodeUtf8 #-}
278
279 {-# RULES "p >-> encodeUtf8" forall p .
280 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
281 #-}
282
283 -- | Transform a Pipe of 'String's into one of 'Text' chunks
284 pack :: Monad m => Pipe String Text m r
285 pack = P.map T.pack
286 {-# INLINEABLE pack #-}
287
288 {-# RULES "p >-> pack" forall p .
289 p >-> pack = for p (\txt -> yield (T.pack txt))
290 #-}
291
292 -- | Transform a Pipes of 'Text' chunks into one of 'String's
293 unpack :: Monad m => Pipe Text String m r
294 unpack = for cat (\t -> yield (T.unpack t))
295 {-# INLINEABLE unpack #-}
296
297 {-# RULES "p >-> unpack" forall p .
298 p >-> unpack = for p (\txt -> yield (T.unpack txt))
299 #-}
300
301 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
302 -- here acting as 'Text' pipes, rather as they would on a lazy text
303 toCaseFold :: Monad m => Pipe Text Text m ()
304 toCaseFold = P.map T.toCaseFold
305 {-# INLINEABLE toCaseFold #-}
306
307 {-# RULES "p >-> toCaseFold" forall p .
308 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
309 #-}
310
311
312 -- | lowercase incoming 'Text'
313 toLower :: Monad m => Pipe Text Text m ()
314 toLower = P.map T.toLower
315 {-# INLINEABLE toLower #-}
316
317 {-# RULES "p >-> toLower" forall p .
318 p >-> toLower = for p (\txt -> yield (T.toLower txt))
319 #-}
320
321 -- | uppercase incoming 'Text'
322 toUpper :: Monad m => Pipe Text Text m ()
323 toUpper = P.map T.toUpper
324 {-# INLINEABLE toUpper #-}
325
326 {-# RULES "p >-> toUpper" forall p .
327 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
328 #-}
329
330 -- | Remove leading white space from an incoming succession of 'Text's
331 stripStart :: Monad m => Pipe Text Text m r
332 stripStart = do
333 chunk <- await
334 let text = T.stripStart chunk
335 if T.null text
336 then stripStart
337 else do yield text
338 cat
339 {-# INLINEABLE stripStart #-}
340
341 -- | @(take n)@ only allows @n@ individual characters to pass;
342 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
343 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
344 take n0 = go n0 where
345 go n
346 | n <= 0 = return ()
347 | otherwise = do
348 txt <- await
349 let len = fromIntegral (T.length txt)
350 if (len > n)
351 then yield (T.take (fromIntegral n) txt)
352 else do
353 yield txt
354 go (n - len)
355 {-# INLINABLE take #-}
356
357 -- | @(drop n)@ drops the first @n@ characters
358 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
359 drop n0 = go n0 where
360 go n
361 | n <= 0 = cat
362 | otherwise = do
363 txt <- await
364 let len = fromIntegral (T.length txt)
365 if (len >= n)
366 then do
367 yield (T.drop (fromIntegral n) txt)
368 cat
369 else go (n - len)
370 {-# INLINABLE drop #-}
371
372 -- | Take characters until they fail the predicate
373 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
374 takeWhile predicate = go
375 where
376 go = do
377 txt <- await
378 let (prefix, suffix) = T.span predicate txt
379 if (T.null suffix)
380 then do
381 yield txt
382 go
383 else yield prefix
384 {-# INLINABLE takeWhile #-}
385
386 -- | Drop characters until they fail the predicate
387 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
388 dropWhile predicate = go where
389 go = do
390 txt <- await
391 case T.findIndex (not . predicate) txt of
392 Nothing -> go
393 Just i -> do
394 yield (T.drop i txt)
395 cat
396 {-# INLINABLE dropWhile #-}
397
398 -- | Only allows 'Char's to pass if they satisfy the predicate
399 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
400 filter predicate = P.map (T.filter predicate)
401 {-# INLINABLE filter #-}
402
403 {-# RULES "p >-> filter q" forall p q .
404 p >-> filter q = for p (\txt -> yield (T.filter q txt))
405 #-}
406
407 -- | Strict left scan over the characters
408 scan
409 :: (Monad m)
410 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
411 scan step begin = do
412 yield (T.singleton begin)
413 go begin
414 where
415 go c = do
416 txt <- await
417 let txt' = T.scanl step c txt
418 c' = T.last txt'
419 yield (T.tail txt')
420 go c'
421 {-# INLINABLE scan #-}
422
423 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
424 'TL.Text'
425 -}
426 toLazy :: Producer Text Identity () -> TL.Text
427 toLazy = TL.fromChunks . P.toList
428 {-# INLINABLE toLazy #-}
429
430 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
431 'TL.Text'
432
433 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
434 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
435 immediately as they are generated instead of loading them all into memory.
436 -}
437 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
438 toLazyM = liftM TL.fromChunks . P.toListM
439 {-# INLINABLE toLazyM #-}
440
441 -- | Reduce the text stream using a strict left fold over characters
442 foldChars
443 :: Monad m
444 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
445 foldChars step begin done = P.fold (T.foldl' step) begin done
446 {-# INLINABLE foldChars #-}
447
448 -- | Retrieve the first 'Char'
449 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
450 head = go
451 where
452 go p = do
453 x <- nextChar p
454 case x of
455 Left _ -> return Nothing
456 Right (c, _) -> return (Just c)
457 {-# INLINABLE head #-}
458
459 -- | Retrieve the last 'Char'
460 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
461 last = go Nothing
462 where
463 go r p = do
464 x <- next p
465 case x of
466 Left () -> return r
467 Right (txt, p') ->
468 if (T.null txt)
469 then go r p'
470 else go (Just $ T.last txt) p'
471 {-# INLINABLE last #-}
472
473 -- | Determine if the stream is empty
474 null :: (Monad m) => Producer Text m () -> m Bool
475 null = P.all T.null
476 {-# INLINABLE null #-}
477
478 -- | Count the number of characters in the stream
479 length :: (Monad m, Num n) => Producer Text m () -> m n
480 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
481 {-# INLINABLE length #-}
482
483 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
484 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
485 any predicate = P.any (T.any predicate)
486 {-# INLINABLE any #-}
487
488 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
489 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
490 all predicate = P.all (T.all predicate)
491 {-# INLINABLE all #-}
492
493 -- | Return the maximum 'Char' within a text stream
494 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
495 maximum = P.fold step Nothing id
496 where
497 step mc txt =
498 if (T.null txt)
499 then mc
500 else Just $ case mc of
501 Nothing -> T.maximum txt
502 Just c -> max c (T.maximum txt)
503 {-# INLINABLE maximum #-}
504
505 -- | Return the minimum 'Char' within a text stream (surely very useful!)
506 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
507 minimum = P.fold step Nothing id
508 where
509 step mc txt =
510 if (T.null txt)
511 then mc
512 else case mc of
513 Nothing -> Just (T.minimum txt)
514 Just c -> Just (min c (T.minimum txt))
515 {-# INLINABLE minimum #-}
516
517
518 -- | Find the first element in the stream that matches the predicate
519 find
520 :: (Monad m)
521 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
522 find predicate p = head (p >-> filter predicate)
523 {-# INLINABLE find #-}
524
525 -- | Index into a text stream
526 index
527 :: (Monad m, Integral a)
528 => a-> Producer Text m () -> m (Maybe Char)
529 index n p = head (p >-> drop n)
530 {-# INLINABLE index #-}
531
532
533 -- | Store a tally of how many segments match the given 'Text'
534 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
535 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
536 {-# INLINABLE count #-}
537
538
539 {-| Consume the first character from a stream of 'Text'
540
541 'next' either fails with a 'Left' if the 'Producer' has no more characters or
542 succeeds with a 'Right' providing the next character and the remainder of the
543 'Producer'.
544 -}
545 nextChar
546 :: (Monad m)
547 => Producer Text m r
548 -> m (Either r (Char, Producer Text m r))
549 nextChar = go
550 where
551 go p = do
552 x <- next p
553 case x of
554 Left r -> return (Left r)
555 Right (txt, p') -> case (T.uncons txt) of
556 Nothing -> go p'
557 Just (c, txt') -> return (Right (c, yield txt' >> p'))
558 {-# INLINABLE nextChar #-}
559
560 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
561 'Producer' is empty
562 -}
563 drawChar :: (Monad m) => Parser Text m (Maybe Char)
564 drawChar = do
565 x <- PP.draw
566 case x of
567 Nothing -> return Nothing
568 Just txt -> case (T.uncons txt) of
569 Nothing -> drawChar
570 Just (c, txt') -> do
571 PP.unDraw txt'
572 return (Just c)
573 {-# INLINABLE drawChar #-}
574
575 -- | Push back a 'Char' onto the underlying 'Producer'
576 unDrawChar :: (Monad m) => Char -> Parser Text m ()
577 unDrawChar c = modify (yield (T.singleton c) >>)
578 {-# INLINABLE unDrawChar #-}
579
580 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
581 push the 'Char' back
582
583 > peekChar = do
584 > x <- drawChar
585 > case x of
586 > Left _ -> return ()
587 > Right c -> unDrawChar c
588 > return x
589 -}
590 peekChar :: (Monad m) => Parser Text m (Maybe Char)
591 peekChar = do
592 x <- drawChar
593 case x of
594 Nothing -> return ()
595 Just c -> unDrawChar c
596 return x
597 {-# INLINABLE peekChar #-}
598
599 {-| Check if the underlying 'Producer' has no more characters
600
601 Note that this will skip over empty 'Text' chunks, unlike
602 'PP.isEndOfInput' from @pipes-parse@, which would consider
603 an empty 'Text' a valid bit of input.
604
605 > isEndOfChars = liftM isLeft peekChar
606 -}
607 isEndOfChars :: (Monad m) => Parser Text m Bool
608 isEndOfChars = do
609 x <- peekChar
610 return (case x of
611 Nothing -> True
612 Just _-> False )
613 {-# INLINABLE isEndOfChars #-}
614
615
616
617
618 -- | Splits a 'Producer' after the given number of characters
619 splitAt
620 :: (Monad m, Integral n)
621 => n
622 -> Lens' (Producer Text m r)
623 (Producer Text m (Producer Text m r))
624 splitAt n0 k p0 = fmap join (k (go n0 p0))
625 where
626 go 0 p = return p
627 go n p = do
628 x <- lift (next p)
629 case x of
630 Left r -> return (return r)
631 Right (txt, p') -> do
632 let len = fromIntegral (T.length txt)
633 if (len <= n)
634 then do
635 yield txt
636 go (n - len) p'
637 else do
638 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
639 yield prefix
640 return (yield suffix >> p')
641 {-# INLINABLE splitAt #-}
642
643
644 {-| Split a text stream in two, where the first text stream is the longest
645 consecutive group of text that satisfy the predicate
646 -}
647 span
648 :: (Monad m)
649 => (Char -> Bool)
650 -> Lens' (Producer Text m r)
651 (Producer Text m (Producer Text m r))
652 span predicate k p0 = fmap join (k (go p0))
653 where
654 go p = do
655 x <- lift (next p)
656 case x of
657 Left r -> return (return r)
658 Right (txt, p') -> do
659 let (prefix, suffix) = T.span predicate txt
660 if (T.null suffix)
661 then do
662 yield txt
663 go p'
664 else do
665 yield prefix
666 return (yield suffix >> p')
667 {-# INLINABLE span #-}
668
669 {-| Split a text stream in two, where the first text stream is the longest
670 consecutive group of characters that don't satisfy the predicate
671 -}
672 break
673 :: (Monad m)
674 => (Char -> Bool)
675 -> Lens' (Producer Text m r)
676 (Producer Text m (Producer Text m r))
677 break predicate = span (not . predicate)
678 {-# INLINABLE break #-}
679
680 {-| Improper lens that splits after the first group of equivalent Chars, as
681 defined by the given equivalence relation
682 -}
683 groupBy
684 :: (Monad m)
685 => (Char -> Char -> Bool)
686 -> Lens' (Producer Text m r)
687 (Producer Text m (Producer Text m r))
688 groupBy equals k p0 = fmap join (k ((go p0))) where
689 go p = do
690 x <- lift (next p)
691 case x of
692 Left r -> return (return r)
693 Right (txt, p') -> case T.uncons txt of
694 Nothing -> go p'
695 Just (c, _) -> (yield txt >> p') ^. span (equals c)
696 {-# INLINABLE groupBy #-}
697
698 -- | Improper lens that splits after the first succession of identical 'Char' s
699 group :: Monad m
700 => Lens' (Producer Text m r)
701 (Producer Text m (Producer Text m r))
702 group = groupBy (==)
703 {-# INLINABLE group #-}
704
705 {-| Improper lens that splits a 'Producer' after the first word
706
707 Unlike 'words', this does not drop leading whitespace
708 -}
709 word :: (Monad m)
710 => Lens' (Producer Text m r)
711 (Producer Text m (Producer Text m r))
712 word k p0 = fmap join (k (to p0))
713 where
714 to p = do
715 p' <- p^.span isSpace
716 p'^.break isSpace
717 {-# INLINABLE word #-}
718
719
720 line :: (Monad m)
721 => Lens' (Producer Text m r)
722 (Producer Text m (Producer Text m r))
723 line = break (== '\n')
724
725 {-# INLINABLE line #-}
726
727
728 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
729 intersperse
730 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
731 intersperse c = go0
732 where
733 go0 p = do
734 x <- lift (next p)
735 case x of
736 Left r -> return r
737 Right (txt, p') -> do
738 yield (T.intersperse c txt)
739 go1 p'
740 go1 p = do
741 x <- lift (next p)
742 case x of
743 Left r -> return r
744 Right (txt, p') -> do
745 yield (T.singleton c)
746 yield (T.intersperse c txt)
747 go1 p'
748 {-# INLINABLE intersperse #-}
749
750
751
752 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
753 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
754 packChars = Data.Profunctor.dimap to (fmap from)
755 where
756 -- to :: Monad m => Producer Char m x -> Producer Text m x
757 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
758
759 step diffAs c = diffAs . (c:)
760
761 done diffAs = T.pack (diffAs [])
762
763 -- from :: Monad m => Producer Text m x -> Producer Char m x
764 from p = for p (each . T.unpack)
765 {-# INLINABLE packChars #-}
766
767
768 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
769 chunksOf
770 :: (Monad m, Integral n)
771 => n -> Lens' (Producer Text m r)
772 (FreeT (Producer Text m) m r)
773 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
774 where
775 go p = do
776 x <- next p
777 return $ case x of
778 Left r -> Pure r
779 Right (txt, p') -> Free $ do
780 p'' <- (yield txt >> p') ^. splitAt n
781 return $ FreeT (go p'')
782 {-# INLINABLE chunksOf #-}
783
784
785 {-| Split a text stream into sub-streams delimited by characters that satisfy the
786 predicate
787 -}
788 splitsWith
789 :: (Monad m)
790 => (Char -> Bool)
791 -> Producer Text m r
792 -> FreeT (Producer Text m) m r
793 splitsWith predicate p0 = FreeT (go0 p0)
794 where
795 go0 p = do
796 x <- next p
797 case x of
798 Left r -> return (Pure r)
799 Right (txt, p') ->
800 if (T.null txt)
801 then go0 p'
802 else return $ Free $ do
803 p'' <- (yield txt >> p') ^. span (not . predicate)
804 return $ FreeT (go1 p'')
805 go1 p = do
806 x <- nextChar p
807 return $ case x of
808 Left r -> Pure r
809 Right (_, p') -> Free $ do
810 p'' <- p' ^. span (not . predicate)
811 return $ FreeT (go1 p'')
812 {-# INLINABLE splitsWith #-}
813
814 -- | Split a text stream using the given 'Char' as the delimiter
815 splits :: (Monad m)
816 => Char
817 -> Lens' (Producer Text m r)
818 (FreeT (Producer Text m) m r)
819 splits c k p =
820 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
821 {-# INLINABLE splits #-}
822
823 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
824 given equivalence relation
825 -}
826 groupsBy
827 :: Monad m
828 => (Char -> Char -> Bool)
829 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
830 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
831 go p = do x <- next p
832 case x of Left r -> return (Pure r)
833 Right (bs, p') -> case T.uncons bs of
834 Nothing -> go p'
835 Just (c, _) -> do return $ Free $ do
836 p'' <- (yield bs >> p')^.span (equals c)
837 return $ FreeT (go p'')
838 {-# INLINABLE groupsBy #-}
839
840
841 -- | Like 'groupsBy', where the equality predicate is ('==')
842 groups
843 :: Monad m
844 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
845 groups = groupsBy (==)
846 {-# INLINABLE groups #-}
847
848
849
850 {-| Split a text stream into 'FreeT'-delimited lines
851 -}
852 lines
853 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
854 lines = Data.Profunctor.dimap _lines (fmap _unlines)
855 where
856 _lines p0 = FreeT (go0 p0)
857 where
858 go0 p = do
859 x <- next p
860 case x of
861 Left r -> return (Pure r)
862 Right (txt, p') ->
863 if (T.null txt)
864 then go0 p'
865 else return $ Free $ go1 (yield txt >> p')
866 go1 p = do
867 p' <- p ^. break ('\n' ==)
868 return $ FreeT $ do
869 x <- nextChar p'
870 case x of
871 Left r -> return $ Pure r
872 Right (_, p'') -> go0 p''
873 -- _unlines
874 -- :: Monad m
875 -- => FreeT (Producer Text m) m x -> Producer Text m x
876 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
877
878
879 {-# INLINABLE lines #-}
880
881
882 -- | Split a text stream into 'FreeT'-delimited words
883 words
884 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
885 words = Data.Profunctor.dimap go (fmap _unwords)
886 where
887 go p = FreeT $ do
888 x <- next (p >-> dropWhile isSpace)
889 return $ case x of
890 Left r -> Pure r
891 Right (bs, p') -> Free $ do
892 p'' <- (yield bs >> p') ^. break isSpace
893 return (go p'')
894 _unwords = PG.intercalates (yield $ T.singleton ' ')
895
896 {-# INLINABLE words #-}
897
898
899 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
900 interspersing a text stream in between them
901 -}
902 intercalate
903 :: (Monad m)
904 => Producer Text m ()
905 -> FreeT (Producer Text m) m r
906 -> Producer Text m r
907 intercalate p0 = go0
908 where
909 go0 f = do
910 x <- lift (runFreeT f)
911 case x of
912 Pure r -> return r
913 Free p -> do
914 f' <- p
915 go1 f'
916 go1 f = do
917 x <- lift (runFreeT f)
918 case x of
919 Pure r -> return r
920 Free p -> do
921 p0
922 f' <- p
923 go1 f'
924 {-# INLINABLE intercalate #-}
925
926 {-| Join 'FreeT'-delimited lines into a text stream
927 -}
928 unlines
929 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
930 unlines = go
931 where
932 go f = do
933 x <- lift (runFreeT f)
934 case x of
935 Pure r -> return r
936 Free p -> do
937 f' <- p
938 yield $ T.singleton '\n'
939 go f'
940 {-# INLINABLE unlines #-}
941
942 {-| Join 'FreeT'-delimited words into a text stream
943 -}
944 unwords
945 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
946 unwords = intercalate (yield $ T.singleton ' ')
947 {-# INLINABLE unwords #-}
948
949 {- $parse
950 The following parsing utilities are single-character analogs of the ones found
951 @pipes-parse@.
952 -}
953
954 {- $reexports
955
956 @Data.Text@ re-exports the 'Text' type.
957
958 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
959 -}
960
961