1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
10 example, the following program copies a document from one file to another:
13 > import qualified Pipes.Text as Text
14 > import qualified Pipes.Text.IO as Text
18 > withFile "inFile.txt" ReadMode $ \hIn ->
19 > withFile "outFile.txt" WriteMode $ \hOut ->
20 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
25 > import qualified Pipes.Text as Text
26 > import qualified Pipes.Text.IO as Text
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' pipes, as with the following \"echo\" program:
34 > main = runEffect $ Text.stdin >-> Text.stdout
36 You can also translate pure lazy 'TL.Text's to and from pipes:
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
108 -- * Primitive Character Parsers
125 -- -- * Decoding Lenses
136 -- -- * Other Decoding/Encoding Functions
162 -- , DecodeResult(..)
164 -- , TextException(..)
165 , module Data.ByteString
167 , module Data.Profunctor
172 import Control.Applicative ((<*))
173 import Control.Monad (liftM, join)
174 import Control.Monad.Trans.State.Strict (StateT(..), modify)
175 import qualified Data.Text as T
176 import Data.Text (Text)
177 import qualified Data.Text.Lazy as TL
178 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
179 import Data.ByteString (ByteString)
180 import Data.Functor.Constant (Constant(Constant, getConstant))
181 import Data.Functor.Identity (Identity)
182 import Data.Profunctor (Profunctor)
183 import qualified Data.Profunctor
185 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
186 import qualified Pipes.Group as PG
187 import qualified Pipes.Parse as PP
188 import Pipes.Parse (Parser)
189 import qualified Pipes.Prelude as P
190 import Data.Char (isSpace)
192 import Prelude hiding (
221 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
222 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
223 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
224 {-# INLINE fromLazy #-}
227 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
229 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
231 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
232 a ^. lens = getConstant (lens Constant a)
235 -- | Apply a transformation to each 'Char' in the stream
236 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
237 map f = P.map (T.map f)
238 {-# INLINABLE map #-}
240 {-# RULES "p >-> map f" forall p f .
241 p >-> map f = for p (\txt -> yield (T.map f txt))
244 -- | Map a function over the characters of a text stream and concatenate the results
246 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
247 concatMap f = P.map (T.concatMap f)
248 {-# INLINABLE concatMap #-}
250 {-# RULES "p >-> concatMap f" forall p f .
251 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
254 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
255 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
256 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
257 -- encodeUtf8 :: Monad m => Pipe Text ByteString m r
258 -- encodeUtf8 = P.map TE.encodeUtf8
259 -- {-# INLINEABLE encodeUtf8 #-}
261 -- {-# RULES "p >-> encodeUtf8" forall p .
262 -- p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
265 -- | Transform a Pipe of 'String's into one of 'Text' chunks
266 pack :: Monad m => Pipe String Text m r
268 {-# INLINEABLE pack #-}
270 {-# RULES "p >-> pack" forall p .
271 p >-> pack = for p (\txt -> yield (T.pack txt))
274 -- | Transform a Pipes of 'Text' chunks into one of 'String's
275 unpack :: Monad m => Pipe Text String m r
276 unpack = for cat (\t -> yield (T.unpack t))
277 {-# INLINEABLE unpack #-}
279 {-# RULES "p >-> unpack" forall p .
280 p >-> unpack = for p (\txt -> yield (T.unpack txt))
283 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
284 -- here acting as 'Text' pipes, rather as they would on a lazy text
285 toCaseFold :: Monad m => Pipe Text Text m ()
286 toCaseFold = P.map T.toCaseFold
287 {-# INLINEABLE toCaseFold #-}
289 {-# RULES "p >-> toCaseFold" forall p .
290 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
294 -- | lowercase incoming 'Text'
295 toLower :: Monad m => Pipe Text Text m ()
296 toLower = P.map T.toLower
297 {-# INLINEABLE toLower #-}
299 {-# RULES "p >-> toLower" forall p .
300 p >-> toLower = for p (\txt -> yield (T.toLower txt))
303 -- | uppercase incoming 'Text'
304 toUpper :: Monad m => Pipe Text Text m ()
305 toUpper = P.map T.toUpper
306 {-# INLINEABLE toUpper #-}
308 {-# RULES "p >-> toUpper" forall p .
309 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
312 -- | Remove leading white space from an incoming succession of 'Text's
313 stripStart :: Monad m => Pipe Text Text m r
316 let text = T.stripStart chunk
321 {-# INLINEABLE stripStart #-}
323 -- | @(take n)@ only allows @n@ individual characters to pass;
324 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
325 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
326 take n0 = go n0 where
331 let len = fromIntegral (T.length txt)
333 then yield (T.take (fromIntegral n) txt)
337 {-# INLINABLE take #-}
339 -- | @(drop n)@ drops the first @n@ characters
340 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
341 drop n0 = go n0 where
346 let len = fromIntegral (T.length txt)
349 yield (T.drop (fromIntegral n) txt)
352 {-# INLINABLE drop #-}
354 -- | Take characters until they fail the predicate
355 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
356 takeWhile predicate = go
360 let (prefix, suffix) = T.span predicate txt
366 {-# INLINABLE takeWhile #-}
368 -- | Drop characters until they fail the predicate
369 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
370 dropWhile predicate = go where
373 case T.findIndex (not . predicate) txt of
378 {-# INLINABLE dropWhile #-}
380 -- | Only allows 'Char's to pass if they satisfy the predicate
381 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
382 filter predicate = P.map (T.filter predicate)
383 {-# INLINABLE filter #-}
385 {-# RULES "p >-> filter q" forall p q .
386 p >-> filter q = for p (\txt -> yield (T.filter q txt))
389 -- | Strict left scan over the characters
392 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
394 yield (T.singleton begin)
399 let txt' = T.scanl step c txt
403 {-# INLINABLE scan #-}
405 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
408 toLazy :: Producer Text Identity () -> TL.Text
409 toLazy = TL.fromChunks . P.toList
410 {-# INLINABLE toLazy #-}
412 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
415 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
416 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
417 immediately as they are generated instead of loading them all into memory.
419 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
420 toLazyM = liftM TL.fromChunks . P.toListM
421 {-# INLINABLE toLazyM #-}
423 -- | Reduce the text stream using a strict left fold over characters
426 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
427 foldChars step begin done = P.fold (T.foldl' step) begin done
428 {-# INLINABLE foldChars #-}
430 -- | Retrieve the first 'Char'
431 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
437 Left _ -> return Nothing
438 Right (c, _) -> return (Just c)
439 {-# INLINABLE head #-}
441 -- | Retrieve the last 'Char'
442 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
452 else go (Just $ T.last txt) p'
453 {-# INLINABLE last #-}
455 -- | Determine if the stream is empty
456 null :: (Monad m) => Producer Text m () -> m Bool
458 {-# INLINABLE null #-}
460 -- | Count the number of characters in the stream
461 length :: (Monad m, Num n) => Producer Text m () -> m n
462 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
463 {-# INLINABLE length #-}
465 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
466 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
467 any predicate = P.any (T.any predicate)
468 {-# INLINABLE any #-}
470 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
471 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
472 all predicate = P.all (T.all predicate)
473 {-# INLINABLE all #-}
475 -- | Return the maximum 'Char' within a text stream
476 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
477 maximum = P.fold step Nothing id
482 else Just $ case mc of
483 Nothing -> T.maximum txt
484 Just c -> max c (T.maximum txt)
485 {-# INLINABLE maximum #-}
487 -- | Return the minimum 'Char' within a text stream (surely very useful!)
488 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
489 minimum = P.fold step Nothing id
495 Nothing -> Just (T.minimum txt)
496 Just c -> Just (min c (T.minimum txt))
497 {-# INLINABLE minimum #-}
500 -- | Find the first element in the stream that matches the predicate
503 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
504 find predicate p = head (p >-> filter predicate)
505 {-# INLINABLE find #-}
507 -- | Index into a text stream
509 :: (Monad m, Integral a)
510 => a-> Producer Text m () -> m (Maybe Char)
511 index n p = head (p >-> drop n)
512 {-# INLINABLE index #-}
515 -- | Store a tally of how many segments match the given 'Text'
516 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
517 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
518 {-# INLINABLE count #-}
521 {-| Consume the first character from a stream of 'Text'
523 'next' either fails with a 'Left' if the 'Producer' has no more characters or
524 succeeds with a 'Right' providing the next character and the remainder of the
530 -> m (Either r (Char, Producer Text m r))
536 Left r -> return (Left r)
537 Right (txt, p') -> case (T.uncons txt) of
539 Just (c, txt') -> return (Right (c, yield txt' >> p'))
540 {-# INLINABLE nextChar #-}
542 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
545 drawChar :: (Monad m) => Parser Text m (Maybe Char)
549 Nothing -> return Nothing
550 Just txt -> case (T.uncons txt) of
555 {-# INLINABLE drawChar #-}
557 -- | Push back a 'Char' onto the underlying 'Producer'
558 unDrawChar :: (Monad m) => Char -> Parser Text m ()
559 unDrawChar c = modify (yield (T.singleton c) >>)
560 {-# INLINABLE unDrawChar #-}
562 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
568 > Left _ -> return ()
569 > Right c -> unDrawChar c
572 peekChar :: (Monad m) => Parser Text m (Maybe Char)
577 Just c -> unDrawChar c
579 {-# INLINABLE peekChar #-}
581 {-| Check if the underlying 'Producer' has no more characters
583 Note that this will skip over empty 'Text' chunks, unlike
584 'PP.isEndOfInput' from @pipes-parse@, which would consider
585 an empty 'Text' a valid bit of input.
587 > isEndOfChars = liftM isLeft peekChar
589 isEndOfChars :: (Monad m) => Parser Text m Bool
595 {-# INLINABLE isEndOfChars #-}
600 -- | Splits a 'Producer' after the given number of characters
602 :: (Monad m, Integral n)
604 -> Lens' (Producer Text m r)
605 (Producer Text m (Producer Text m r))
606 splitAt n0 k p0 = fmap join (k (go n0 p0))
612 Left r -> return (return r)
613 Right (txt, p') -> do
614 let len = fromIntegral (T.length txt)
620 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
622 return (yield suffix >> p')
623 {-# INLINABLE splitAt #-}
626 {-| Split a text stream in two, where the first text stream is the longest
627 consecutive group of text that satisfy the predicate
632 -> Lens' (Producer Text m r)
633 (Producer Text m (Producer Text m r))
634 span predicate k p0 = fmap join (k (go p0))
639 Left r -> return (return r)
640 Right (txt, p') -> do
641 let (prefix, suffix) = T.span predicate txt
648 return (yield suffix >> p')
649 {-# INLINABLE span #-}
651 {-| Split a text stream in two, where the first text stream is the longest
652 consecutive group of characters that don't satisfy the predicate
657 -> Lens' (Producer Text m r)
658 (Producer Text m (Producer Text m r))
659 break predicate = span (not . predicate)
660 {-# INLINABLE break #-}
662 {-| Improper lens that splits after the first group of equivalent Chars, as
663 defined by the given equivalence relation
667 => (Char -> Char -> Bool)
668 -> Lens' (Producer Text m r)
669 (Producer Text m (Producer Text m r))
670 groupBy equals k p0 = fmap join (k ((go p0))) where
674 Left r -> return (return r)
675 Right (txt, p') -> case T.uncons txt of
677 Just (c, _) -> (yield txt >> p') ^. span (equals c)
678 {-# INLINABLE groupBy #-}
680 -- | Improper lens that splits after the first succession of identical 'Char' s
682 => Lens' (Producer Text m r)
683 (Producer Text m (Producer Text m r))
685 {-# INLINABLE group #-}
687 {-| Improper lens that splits a 'Producer' after the first word
689 Unlike 'words', this does not drop leading whitespace
692 => Lens' (Producer Text m r)
693 (Producer Text m (Producer Text m r))
694 word k p0 = fmap join (k (to p0))
697 p' <- p^.span isSpace
699 {-# INLINABLE word #-}
703 => Lens' (Producer Text m r)
704 (Producer Text m (Producer Text m r))
705 line = break (== '\n')
707 {-# INLINABLE line #-}
710 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
712 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
719 Right (txt, p') -> do
720 yield (T.intersperse c txt)
726 Right (txt, p') -> do
727 yield (T.singleton c)
728 yield (T.intersperse c txt)
730 {-# INLINABLE intersperse #-}
734 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
735 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
736 packChars = Data.Profunctor.dimap to (fmap from)
738 -- to :: Monad m => Producer Char m x -> Producer Text m x
739 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
741 step diffAs c = diffAs . (c:)
743 done diffAs = T.pack (diffAs [])
745 -- from :: Monad m => Producer Text m x -> Producer Char m x
746 from p = for p (each . T.unpack)
747 {-# INLINABLE packChars #-}
750 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
752 :: (Monad m, Integral n)
753 => n -> Lens' (Producer Text m r)
754 (FreeT (Producer Text m) m r)
755 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
761 Right (txt, p') -> Free $ do
762 p'' <- (yield txt >> p') ^. splitAt n
763 return $ FreeT (go p'')
764 {-# INLINABLE chunksOf #-}
767 {-| Split a text stream into sub-streams delimited by characters that satisfy the
774 -> FreeT (Producer Text m) m r
775 splitsWith predicate p0 = FreeT (go0 p0)
780 Left r -> return (Pure r)
784 else return $ Free $ do
785 p'' <- (yield txt >> p') ^. span (not . predicate)
786 return $ FreeT (go1 p'')
791 Right (_, p') -> Free $ do
792 p'' <- p' ^. span (not . predicate)
793 return $ FreeT (go1 p'')
794 {-# INLINABLE splitsWith #-}
796 -- | Split a text stream using the given 'Char' as the delimiter
799 -> Lens' (Producer Text m r)
800 (FreeT (Producer Text m) m r)
802 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
803 {-# INLINABLE splits #-}
805 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
806 given equivalence relation
810 => (Char -> Char -> Bool)
811 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
812 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
813 go p = do x <- next p
814 case x of Left r -> return (Pure r)
815 Right (bs, p') -> case T.uncons bs of
817 Just (c, _) -> do return $ Free $ do
818 p'' <- (yield bs >> p')^.span (equals c)
819 return $ FreeT (go p'')
820 {-# INLINABLE groupsBy #-}
823 -- | Like 'groupsBy', where the equality predicate is ('==')
826 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
827 groups = groupsBy (==)
828 {-# INLINABLE groups #-}
832 {-| Split a text stream into 'FreeT'-delimited lines
835 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
836 lines = Data.Profunctor.dimap _lines (fmap _unlines)
838 _lines p0 = FreeT (go0 p0)
843 Left r -> return (Pure r)
847 else return $ Free $ go1 (yield txt >> p')
849 p' <- p ^. break ('\n' ==)
853 Left r -> return $ Pure r
854 Right (_, p'') -> go0 p''
857 -- => FreeT (Producer Text m) m x -> Producer Text m x
858 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
861 {-# INLINABLE lines #-}
864 -- | Split a text stream into 'FreeT'-delimited words
866 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
867 words = Data.Profunctor.dimap go (fmap _unwords)
870 x <- next (p >-> dropWhile isSpace)
873 Right (bs, p') -> Free $ do
874 p'' <- (yield bs >> p') ^. break isSpace
876 _unwords = PG.intercalates (yield $ T.singleton ' ')
878 {-# INLINABLE words #-}
881 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
882 interspersing a text stream in between them
886 => Producer Text m ()
887 -> FreeT (Producer Text m) m r
892 x <- lift (runFreeT f)
899 x <- lift (runFreeT f)
906 {-# INLINABLE intercalate #-}
908 {-| Join 'FreeT'-delimited lines into a text stream
911 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
915 x <- lift (runFreeT f)
920 yield $ T.singleton '\n'
922 {-# INLINABLE unlines #-}
924 {-| Join 'FreeT'-delimited words into a text stream
927 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
928 unwords = intercalate (yield $ T.singleton ' ')
929 {-# INLINABLE unwords #-}
932 The following parsing utilities are single-character analogs of the ones found
938 @Data.Text@ re-exports the 'Text' type.
940 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.