1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
10 example, the following program copies a document from one file to another:
13 > import qualified Pipes.Text as Text
14 > import qualified Pipes.Text.IO as Text
18 > withFile "inFile.txt" ReadMode $ \hIn ->
19 > withFile "outFile.txt" WriteMode $ \hOut ->
20 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
25 > import qualified Pipes.Text as Text
26 > import qualified Pipes.Text.IO as Text
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' pipes, as with the following \"echo\" program:
34 > main = runEffect $ Text.stdin >-> Text.stdout
36 You can also translate pure lazy 'TL.Text's to and from pipes:
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
99 -- * Primitive Character Parsers
136 , module Data.ByteString
138 , module Data.Profunctor
143 import Control.Applicative ((<*))
144 import Control.Monad (liftM, join)
145 import Control.Monad.Trans.State.Strict (StateT(..), modify)
146 import qualified Data.Text as T
147 import Data.Text (Text)
148 import qualified Data.Text.Lazy as TL
149 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
150 import Data.ByteString (ByteString)
151 import Data.Functor.Constant (Constant(Constant, getConstant))
152 import Data.Functor.Identity (Identity)
153 import Data.Profunctor (Profunctor)
154 import qualified Data.Profunctor
156 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
157 import qualified Pipes.Group as PG
158 import qualified Pipes.Parse as PP
159 import Pipes.Parse (Parser)
160 import qualified Pipes.Prelude as P
161 import Data.Char (isSpace)
162 import Data.Word (Word8)
164 import Prelude hiding (
193 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
194 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
195 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
196 {-# INLINE fromLazy #-}
199 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
201 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
203 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
204 a ^. lens = getConstant (lens Constant a)
207 -- | Apply a transformation to each 'Char' in the stream
208 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
209 map f = P.map (T.map f)
210 {-# INLINABLE map #-}
212 {-# RULES "p >-> map f" forall p f .
213 p >-> map f = for p (\txt -> yield (T.map f txt))
216 -- | Map a function over the characters of a text stream and concatenate the results
218 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
219 concatMap f = P.map (T.concatMap f)
220 {-# INLINABLE concatMap #-}
222 {-# RULES "p >-> concatMap f" forall p f .
223 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
227 -- | Transform a Pipe of 'String's into one of 'Text' chunks
228 pack :: Monad m => Pipe String Text m r
230 {-# INLINEABLE pack #-}
232 {-# RULES "p >-> pack" forall p .
233 p >-> pack = for p (\txt -> yield (T.pack txt))
236 -- | Transform a Pipes of 'Text' chunks into one of 'String's
237 unpack :: Monad m => Pipe Text String m r
238 unpack = for cat (\t -> yield (T.unpack t))
239 {-# INLINEABLE unpack #-}
241 {-# RULES "p >-> unpack" forall p .
242 p >-> unpack = for p (\txt -> yield (T.unpack txt))
245 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
246 -- here acting as 'Text' pipes, rather as they would on a lazy text
247 toCaseFold :: Monad m => Pipe Text Text m ()
248 toCaseFold = P.map T.toCaseFold
249 {-# INLINEABLE toCaseFold #-}
251 {-# RULES "p >-> toCaseFold" forall p .
252 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
256 -- | lowercase incoming 'Text'
257 toLower :: Monad m => Pipe Text Text m ()
258 toLower = P.map T.toLower
259 {-# INLINEABLE toLower #-}
261 {-# RULES "p >-> toLower" forall p .
262 p >-> toLower = for p (\txt -> yield (T.toLower txt))
265 -- | uppercase incoming 'Text'
266 toUpper :: Monad m => Pipe Text Text m ()
267 toUpper = P.map T.toUpper
268 {-# INLINEABLE toUpper #-}
270 {-# RULES "p >-> toUpper" forall p .
271 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
274 -- | Remove leading white space from an incoming succession of 'Text's
275 stripStart :: Monad m => Pipe Text Text m r
278 let text = T.stripStart chunk
283 {-# INLINEABLE stripStart #-}
285 -- | @(take n)@ only allows @n@ individual characters to pass;
286 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
287 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
288 take n0 = go n0 where
293 let len = fromIntegral (T.length txt)
295 then yield (T.take (fromIntegral n) txt)
299 {-# INLINABLE take #-}
301 -- | @(drop n)@ drops the first @n@ characters
302 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
303 drop n0 = go n0 where
308 let len = fromIntegral (T.length txt)
311 yield (T.drop (fromIntegral n) txt)
314 {-# INLINABLE drop #-}
316 -- | Take characters until they fail the predicate
317 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
318 takeWhile predicate = go
322 let (prefix, suffix) = T.span predicate txt
328 {-# INLINABLE takeWhile #-}
330 -- | Drop characters until they fail the predicate
331 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
332 dropWhile predicate = go where
335 case T.findIndex (not . predicate) txt of
340 {-# INLINABLE dropWhile #-}
342 -- | Only allows 'Char's to pass if they satisfy the predicate
343 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
344 filter predicate = P.map (T.filter predicate)
345 {-# INLINABLE filter #-}
347 {-# RULES "p >-> filter q" forall p q .
348 p >-> filter q = for p (\txt -> yield (T.filter q txt))
351 -- | Strict left scan over the characters
354 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
356 yield (T.singleton begin)
361 let txt' = T.scanl step c txt
365 {-# INLINABLE scan #-}
367 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
370 toLazy :: Producer Text Identity () -> TL.Text
371 toLazy = TL.fromChunks . P.toList
372 {-# INLINABLE toLazy #-}
374 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
377 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
378 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
379 immediately as they are generated instead of loading them all into memory.
381 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
382 toLazyM = liftM TL.fromChunks . P.toListM
383 {-# INLINABLE toLazyM #-}
385 -- | Reduce the text stream using a strict left fold over characters
388 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
389 foldChars step begin done = P.fold (T.foldl' step) begin done
390 {-# INLINABLE foldChars #-}
392 -- | Retrieve the first 'Char'
393 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
399 Left _ -> return Nothing
400 Right (c, _) -> return (Just c)
401 {-# INLINABLE head #-}
403 -- | Retrieve the last 'Char'
404 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
414 else go (Just $ T.last txt) p'
415 {-# INLINABLE last #-}
417 -- | Determine if the stream is empty
418 null :: (Monad m) => Producer Text m () -> m Bool
420 {-# INLINABLE null #-}
422 -- | Count the number of characters in the stream
423 length :: (Monad m, Num n) => Producer Text m () -> m n
424 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
425 {-# INLINABLE length #-}
427 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
428 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
429 any predicate = P.any (T.any predicate)
430 {-# INLINABLE any #-}
432 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
433 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
434 all predicate = P.all (T.all predicate)
435 {-# INLINABLE all #-}
437 -- | Return the maximum 'Char' within a text stream
438 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
439 maximum = P.fold step Nothing id
444 else Just $ case mc of
445 Nothing -> T.maximum txt
446 Just c -> max c (T.maximum txt)
447 {-# INLINABLE maximum #-}
449 -- | Return the minimum 'Char' within a text stream (surely very useful!)
450 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
451 minimum = P.fold step Nothing id
457 Nothing -> Just (T.minimum txt)
458 Just c -> Just (min c (T.minimum txt))
459 {-# INLINABLE minimum #-}
461 -- | Find the first element in the stream that matches the predicate
464 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
465 find predicate p = head (p >-> filter predicate)
466 {-# INLINABLE find #-}
468 -- | Index into a text stream
470 :: (Monad m, Integral a)
471 => a-> Producer Text m () -> m (Maybe Char)
472 index n p = head (p >-> drop n)
473 {-# INLINABLE index #-}
476 -- | Store a tally of how many segments match the given 'Text'
477 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
478 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
479 {-# INLINABLE count #-}
482 -- | Consume the first character from a stream of 'Text'
484 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
485 -- succeeds with a 'Right' providing the next character and the remainder of the
491 -> m (Either r (Char, Producer Text m r))
497 Left r -> return (Left r)
498 Right (txt, p') -> case (T.uncons txt) of
500 Just (c, txt') -> return (Right (c, yield txt' >> p'))
501 {-# INLINABLE nextChar #-}
503 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
505 drawChar :: (Monad m) => Parser Text m (Maybe Char)
509 Nothing -> return Nothing
510 Just txt -> case (T.uncons txt) of
515 {-# INLINABLE drawChar #-}
517 -- | Push back a 'Char' onto the underlying 'Producer'
518 unDrawChar :: (Monad m) => Char -> Parser Text m ()
519 unDrawChar c = modify (yield (T.singleton c) >>)
520 {-# INLINABLE unDrawChar #-}
522 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
528 > Left _ -> return ()
529 > Right c -> unDrawChar c
534 peekChar :: (Monad m) => Parser Text m (Maybe Char)
539 Just c -> unDrawChar c
541 {-# INLINABLE peekChar #-}
543 {-| Check if the underlying 'Producer' has no more characters
545 Note that this will skip over empty 'Text' chunks, unlike
546 'PP.isEndOfInput' from @pipes-parse@, which would consider
547 an empty 'Text' a valid bit of input.
549 > isEndOfChars = liftM isLeft peekChar
551 isEndOfChars :: (Monad m) => Parser Text m Bool
557 {-# INLINABLE isEndOfChars #-}
560 -- | Splits a 'Producer' after the given number of characters
562 :: (Monad m, Integral n)
564 -> Lens' (Producer Text m r)
565 (Producer Text m (Producer Text m r))
566 splitAt n0 k p0 = fmap join (k (go n0 p0))
572 Left r -> return (return r)
573 Right (txt, p') -> do
574 let len = fromIntegral (T.length txt)
580 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
582 return (yield suffix >> p')
583 {-# INLINABLE splitAt #-}
586 -- | Split a text stream in two, producing the longest
587 -- consecutive group of characters that satisfies the predicate
588 -- and returning the rest
593 -> Lens' (Producer Text m r)
594 (Producer Text m (Producer Text m r))
595 span predicate k p0 = fmap join (k (go p0))
600 Left r -> return (return r)
601 Right (txt, p') -> do
602 let (prefix, suffix) = T.span predicate txt
609 return (yield suffix >> p')
610 {-# INLINABLE span #-}
612 {-| Split a text stream in two, producing the longest
613 consecutive group of characters that don't satisfy the predicate
618 -> Lens' (Producer Text m r)
619 (Producer Text m (Producer Text m r))
620 break predicate = span (not . predicate)
621 {-# INLINABLE break #-}
623 {-| Improper lens that splits after the first group of equivalent Chars, as
624 defined by the given equivalence relation
628 => (Char -> Char -> Bool)
629 -> Lens' (Producer Text m r)
630 (Producer Text m (Producer Text m r))
631 groupBy equals k p0 = fmap join (k ((go p0))) where
635 Left r -> return (return r)
636 Right (txt, p') -> case T.uncons txt of
638 Just (c, _) -> (yield txt >> p') ^. span (equals c)
639 {-# INLINABLE groupBy #-}
641 -- | Improper lens that splits after the first succession of identical 'Char' s
643 => Lens' (Producer Text m r)
644 (Producer Text m (Producer Text m r))
646 {-# INLINABLE group #-}
648 {-| Improper lens that splits a 'Producer' after the first word
650 Unlike 'words', this does not drop leading whitespace
653 => Lens' (Producer Text m r)
654 (Producer Text m (Producer Text m r))
655 word k p0 = fmap join (k (to p0))
658 p' <- p^.span isSpace
660 {-# INLINABLE word #-}
664 => Lens' (Producer Text m r)
665 (Producer Text m (Producer Text m r))
666 line = break (== '\n')
668 {-# INLINABLE line #-}
671 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
673 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
680 Right (txt, p') -> do
681 yield (T.intersperse c txt)
687 Right (txt, p') -> do
688 yield (T.singleton c)
689 yield (T.intersperse c txt)
691 {-# INLINABLE intersperse #-}
695 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
696 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
697 packChars = Data.Profunctor.dimap to (fmap from)
699 -- to :: Monad m => Producer Char m x -> Producer Text m x
700 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
702 step diffAs c = diffAs . (c:)
704 done diffAs = T.pack (diffAs [])
706 -- from :: Monad m => Producer Text m x -> Producer Char m x
707 from p = for p (each . T.unpack)
708 {-# INLINABLE packChars #-}
711 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
713 :: (Monad m, Integral n)
714 => n -> Lens' (Producer Text m r)
715 (FreeT (Producer Text m) m r)
716 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
722 Right (txt, p') -> Free $ do
723 p'' <- (yield txt >> p') ^. splitAt n
724 return $ FreeT (go p'')
725 {-# INLINABLE chunksOf #-}
728 {-| Split a text stream into sub-streams delimited by characters that satisfy the
735 -> FreeT (Producer Text m) m r
736 splitsWith predicate p0 = FreeT (go0 p0)
741 Left r -> return (Pure r)
745 else return $ Free $ do
746 p'' <- (yield txt >> p') ^. span (not . predicate)
747 return $ FreeT (go1 p'')
752 Right (_, p') -> Free $ do
753 p'' <- p' ^. span (not . predicate)
754 return $ FreeT (go1 p'')
755 {-# INLINABLE splitsWith #-}
757 -- | Split a text stream using the given 'Char' as the delimiter
760 -> Lens' (Producer Text m r)
761 (FreeT (Producer Text m) m r)
763 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
764 {-# INLINABLE splits #-}
766 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
767 given equivalence relation
771 => (Char -> Char -> Bool)
772 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
773 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
774 go p = do x <- next p
775 case x of Left r -> return (Pure r)
776 Right (bs, p') -> case T.uncons bs of
778 Just (c, _) -> do return $ Free $ do
779 p'' <- (yield bs >> p')^.span (equals c)
780 return $ FreeT (go p'')
781 {-# INLINABLE groupsBy #-}
784 -- | Like 'groupsBy', where the equality predicate is ('==')
787 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
788 groups = groupsBy (==)
789 {-# INLINABLE groups #-}
793 {-| Split a text stream into 'FreeT'-delimited lines
796 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
797 lines = Data.Profunctor.dimap _lines (fmap _unlines)
799 _lines p0 = FreeT (go0 p0)
804 Left r -> return (Pure r)
808 else return $ Free $ go1 (yield txt >> p')
810 p' <- p ^. break ('\n' ==)
814 Left r -> return $ Pure r
815 Right (_, p'') -> go0 p''
818 -- => FreeT (Producer Text m) m x -> Producer Text m x
819 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
822 {-# INLINABLE lines #-}
825 -- | Split a text stream into 'FreeT'-delimited words
827 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
828 words = Data.Profunctor.dimap go (fmap _unwords)
831 x <- next (p >-> dropWhile isSpace)
834 Right (bs, p') -> Free $ do
835 p'' <- (yield bs >> p') ^. break isSpace
837 _unwords = PG.intercalates (yield $ T.singleton ' ')
839 {-# INLINABLE words #-}
842 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
843 interspersing a text stream in between them
847 => Producer Text m ()
848 -> FreeT (Producer Text m) m r
853 x <- lift (runFreeT f)
860 x <- lift (runFreeT f)
867 {-# INLINABLE intercalate #-}
869 {-| Join 'FreeT'-delimited lines into a text stream
872 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
876 x <- lift (runFreeT f)
881 yield $ T.singleton '\n'
883 {-# INLINABLE unlines #-}
885 {-| Join 'FreeT'-delimited words into a text stream
888 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
889 unwords = intercalate (yield $ T.singleton ' ')
890 {-# INLINABLE unwords #-}
895 @Data.Text@ re-exports the 'Text' type.
897 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.