1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
3 {-| This package provides @pipes@ utilities for \'text streams\', which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, and you
5 will generally want @Data.Text@ in scope. But the type @Producer Text m r@ is
6 in some ways the pipes equivalent of the lazy @Text@ type.
8 This module provides many functions equivalent in one way or another to
9 the 'pure' functions in
10 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
11 They transform, divide, group and fold text streams. Though @Producer Text m r@
12 is \'effectful\' Text, functions
13 in this module are \'pure\' in the sense that they are uniformly monad-independent.
14 Simple IO operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
15 operations are in @Data.Text.Lazy.IO@. Interoperation with @ByteString@
16 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
18 The Text type exported by @Data.Text.Lazy@ is basically '[Text]'. The implementation
19 is arranged so that the individual strict 'Text' chunks are kept to a reasonable size;
20 the user is not aware of the divisions between the connected 'Text' chunks.
21 So also here: the functions in this module are designed to operate on streams that
22 are insensitive to text boundaries. This means that they may freely split
23 text into smaller texts and /discard empty texts/. However, the objective is
24 that they should /never concatenate texts/ in order to provide strict upper
25 bounds on memory usage.
27 For example, to stream only the first three lines of 'stdin' to 'stdout' you
31 > import qualified Pipes.Text as Text
32 > import qualified Pipes.Text.IO as Text
36 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
38 > takeLines n = Text.unlines . takes' n . view Text.lines
40 > -- takeLines n = over Text.lines (takes' n)
42 The above program will never bring more than one chunk of text (~ 32 KB) into
43 memory, no matter how long the lines are.
45 As this example shows, one superficial difference from @Data.Text.Lazy@
46 is that many of the operations, like 'lines',
47 are \'lensified\'; this has a number of advantages where it is possible, in particular
48 it facilitates their use with 'Parser's of Text (in the general
49 <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
51 Each such expression, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
52 intuitively corresponding function when used with @view@ or @(^.)@.
54 A more important difference the example reveals is in the types closely associated with
55 the central type, @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@
56 we find functions like
58 > splitAt :: Int -> Text -> (Text, Text)
59 > lines :: Int -> Text -> [Text]
60 > chunksOf :: Int -> Text -> [Text]
62 which relate a Text with a pair or list of Texts. The corresponding functions here (taking
63 account of \'lensification\') are
65 > view . splitAt :: (Monad m, Integral n)
66 > => n -> Producer Text m r -> Producer Text.Text m (Producer Text.Text m r)
67 > view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
68 > view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
70 In the type @Producer Text m (Producer Text m r)@ the second
71 element of the \'pair\' of of \'effectful Texts\' cannot simply be retrieved
72 with 'snd'. This is an \'effectful\' pair, and one must work through the effects
73 of the first element to arrive at the second. Similarly in @FreeT (Producer Text m) m r@,
74 which corresponds with @[Text]@, on cannot simply drop 10 Producers and take the others;
75 we can only get to the ones we want to take by working through their predecessors.
77 Some of the types may be more readable if you imagine that we have introduced
80 > type Text m r = Producer T.Text m r
81 > type Texts m r = FreeT (Producer T.Text m) m r
83 Then we would think of the types above as
85 > view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
86 > view lines :: (Monad m) => Text m r -> Texts m r
87 > view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
89 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
129 -- * Primitive Character Parsers
165 , module Data.ByteString
167 , module Data.Profunctor
172 import Control.Applicative ((<*))
173 import Control.Monad (liftM, join)
174 import Control.Monad.Trans.State.Strict (StateT(..), modify)
175 import qualified Data.Text as T
176 import Data.Text (Text)
177 import qualified Data.Text.Lazy as TL
178 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
179 import Data.ByteString (ByteString)
180 import Data.Functor.Constant (Constant(Constant, getConstant))
181 import Data.Functor.Identity (Identity)
182 import Data.Profunctor (Profunctor)
183 import qualified Data.Profunctor
185 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
186 import qualified Pipes.Group as PG
187 import qualified Pipes.Parse as PP
188 import Pipes.Parse (Parser)
189 import qualified Pipes.Prelude as P
190 import Data.Char (isSpace)
191 import Data.Word (Word8)
193 import Prelude hiding (
222 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
223 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
224 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
225 {-# INLINE fromLazy #-}
228 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
230 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
232 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
233 a ^. lens = getConstant (lens Constant a)
236 -- | Apply a transformation to each 'Char' in the stream
237 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
238 map f = P.map (T.map f)
239 {-# INLINABLE map #-}
241 {-# RULES "p >-> map f" forall p f .
242 p >-> map f = for p (\txt -> yield (T.map f txt))
245 -- | Map a function over the characters of a text stream and concatenate the results
247 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
248 concatMap f = P.map (T.concatMap f)
249 {-# INLINABLE concatMap #-}
251 {-# RULES "p >-> concatMap f" forall p f .
252 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
256 -- | Transform a Pipe of 'String's into one of 'Text' chunks
257 pack :: Monad m => Pipe String Text m r
259 {-# INLINEABLE pack #-}
261 {-# RULES "p >-> pack" forall p .
262 p >-> pack = for p (\txt -> yield (T.pack txt))
265 -- | Transform a Pipes of 'Text' chunks into one of 'String's
266 unpack :: Monad m => Pipe Text String m r
267 unpack = for cat (\t -> yield (T.unpack t))
268 {-# INLINEABLE unpack #-}
270 {-# RULES "p >-> unpack" forall p .
271 p >-> unpack = for p (\txt -> yield (T.unpack txt))
274 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
275 -- here acting as 'Text' pipes, rather as they would on a lazy text
276 toCaseFold :: Monad m => Pipe Text Text m ()
277 toCaseFold = P.map T.toCaseFold
278 {-# INLINEABLE toCaseFold #-}
280 {-# RULES "p >-> toCaseFold" forall p .
281 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
285 -- | lowercase incoming 'Text'
286 toLower :: Monad m => Pipe Text Text m ()
287 toLower = P.map T.toLower
288 {-# INLINEABLE toLower #-}
290 {-# RULES "p >-> toLower" forall p .
291 p >-> toLower = for p (\txt -> yield (T.toLower txt))
294 -- | uppercase incoming 'Text'
295 toUpper :: Monad m => Pipe Text Text m ()
296 toUpper = P.map T.toUpper
297 {-# INLINEABLE toUpper #-}
299 {-# RULES "p >-> toUpper" forall p .
300 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
303 -- | Remove leading white space from an incoming succession of 'Text's
304 stripStart :: Monad m => Pipe Text Text m r
307 let text = T.stripStart chunk
312 {-# INLINEABLE stripStart #-}
314 -- | @(take n)@ only allows @n@ individual characters to pass;
315 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
316 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
317 take n0 = go n0 where
322 let len = fromIntegral (T.length txt)
324 then yield (T.take (fromIntegral n) txt)
328 {-# INLINABLE take #-}
330 -- | @(drop n)@ drops the first @n@ characters
331 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
332 drop n0 = go n0 where
337 let len = fromIntegral (T.length txt)
340 yield (T.drop (fromIntegral n) txt)
343 {-# INLINABLE drop #-}
345 -- | Take characters until they fail the predicate
346 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
347 takeWhile predicate = go
351 let (prefix, suffix) = T.span predicate txt
357 {-# INLINABLE takeWhile #-}
359 -- | Drop characters until they fail the predicate
360 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
361 dropWhile predicate = go where
364 case T.findIndex (not . predicate) txt of
369 {-# INLINABLE dropWhile #-}
371 -- | Only allows 'Char's to pass if they satisfy the predicate
372 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
373 filter predicate = P.map (T.filter predicate)
374 {-# INLINABLE filter #-}
376 {-# RULES "p >-> filter q" forall p q .
377 p >-> filter q = for p (\txt -> yield (T.filter q txt))
380 -- | Strict left scan over the characters
383 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
385 yield (T.singleton begin)
390 let txt' = T.scanl step c txt
394 {-# INLINABLE scan #-}
396 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
399 toLazy :: Producer Text Identity () -> TL.Text
400 toLazy = TL.fromChunks . P.toList
401 {-# INLINABLE toLazy #-}
403 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
406 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
407 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
408 immediately as they are generated instead of loading them all into memory.
410 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
411 toLazyM = liftM TL.fromChunks . P.toListM
412 {-# INLINABLE toLazyM #-}
414 -- | Reduce the text stream using a strict left fold over characters
417 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
418 foldChars step begin done = P.fold (T.foldl' step) begin done
419 {-# INLINABLE foldChars #-}
421 -- | Retrieve the first 'Char'
422 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
428 Left _ -> return Nothing
429 Right (c, _) -> return (Just c)
430 {-# INLINABLE head #-}
432 -- | Retrieve the last 'Char'
433 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
443 else go (Just $ T.last txt) p'
444 {-# INLINABLE last #-}
446 -- | Determine if the stream is empty
447 null :: (Monad m) => Producer Text m () -> m Bool
449 {-# INLINABLE null #-}
451 -- | Count the number of characters in the stream
452 length :: (Monad m, Num n) => Producer Text m () -> m n
453 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
454 {-# INLINABLE length #-}
456 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
457 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
458 any predicate = P.any (T.any predicate)
459 {-# INLINABLE any #-}
461 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
462 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
463 all predicate = P.all (T.all predicate)
464 {-# INLINABLE all #-}
466 -- | Return the maximum 'Char' within a text stream
467 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
468 maximum = P.fold step Nothing id
473 else Just $ case mc of
474 Nothing -> T.maximum txt
475 Just c -> max c (T.maximum txt)
476 {-# INLINABLE maximum #-}
478 -- | Return the minimum 'Char' within a text stream (surely very useful!)
479 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
480 minimum = P.fold step Nothing id
486 Nothing -> Just (T.minimum txt)
487 Just c -> Just (min c (T.minimum txt))
488 {-# INLINABLE minimum #-}
490 -- | Find the first element in the stream that matches the predicate
493 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
494 find predicate p = head (p >-> filter predicate)
495 {-# INLINABLE find #-}
497 -- | Index into a text stream
499 :: (Monad m, Integral a)
500 => a-> Producer Text m () -> m (Maybe Char)
501 index n p = head (p >-> drop n)
502 {-# INLINABLE index #-}
505 -- | Store a tally of how many segments match the given 'Text'
506 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
507 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
508 {-# INLINABLE count #-}
511 -- | Consume the first character from a stream of 'Text'
513 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
514 -- succeeds with a 'Right' providing the next character and the remainder of the
520 -> m (Either r (Char, Producer Text m r))
526 Left r -> return (Left r)
527 Right (txt, p') -> case (T.uncons txt) of
529 Just (c, txt') -> return (Right (c, yield txt' >> p'))
530 {-# INLINABLE nextChar #-}
532 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
534 drawChar :: (Monad m) => Parser Text m (Maybe Char)
538 Nothing -> return Nothing
539 Just txt -> case (T.uncons txt) of
544 {-# INLINABLE drawChar #-}
546 -- | Push back a 'Char' onto the underlying 'Producer'
547 unDrawChar :: (Monad m) => Char -> Parser Text m ()
548 unDrawChar c = modify (yield (T.singleton c) >>)
549 {-# INLINABLE unDrawChar #-}
551 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
557 > Left _ -> return ()
558 > Right c -> unDrawChar c
563 peekChar :: (Monad m) => Parser Text m (Maybe Char)
568 Just c -> unDrawChar c
570 {-# INLINABLE peekChar #-}
572 {-| Check if the underlying 'Producer' has no more characters
574 Note that this will skip over empty 'Text' chunks, unlike
575 'PP.isEndOfInput' from @pipes-parse@, which would consider
576 an empty 'Text' a valid bit of input.
578 > isEndOfChars = liftM isLeft peekChar
580 isEndOfChars :: (Monad m) => Parser Text m Bool
586 {-# INLINABLE isEndOfChars #-}
589 -- | Splits a 'Producer' after the given number of characters
591 :: (Monad m, Integral n)
593 -> Lens' (Producer Text m r)
594 (Producer Text m (Producer Text m r))
595 splitAt n0 k p0 = fmap join (k (go n0 p0))
601 Left r -> return (return r)
602 Right (txt, p') -> do
603 let len = fromIntegral (T.length txt)
609 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
611 return (yield suffix >> p')
612 {-# INLINABLE splitAt #-}
615 -- | Split a text stream in two, producing the longest
616 -- consecutive group of characters that satisfies the predicate
617 -- and returning the rest
622 -> Lens' (Producer Text m r)
623 (Producer Text m (Producer Text m r))
624 span predicate k p0 = fmap join (k (go p0))
629 Left r -> return (return r)
630 Right (txt, p') -> do
631 let (prefix, suffix) = T.span predicate txt
638 return (yield suffix >> p')
639 {-# INLINABLE span #-}
641 {-| Split a text stream in two, producing the longest
642 consecutive group of characters that don't satisfy the predicate
647 -> Lens' (Producer Text m r)
648 (Producer Text m (Producer Text m r))
649 break predicate = span (not . predicate)
650 {-# INLINABLE break #-}
652 {-| Improper lens that splits after the first group of equivalent Chars, as
653 defined by the given equivalence relation
657 => (Char -> Char -> Bool)
658 -> Lens' (Producer Text m r)
659 (Producer Text m (Producer Text m r))
660 groupBy equals k p0 = fmap join (k ((go p0))) where
664 Left r -> return (return r)
665 Right (txt, p') -> case T.uncons txt of
667 Just (c, _) -> (yield txt >> p') ^. span (equals c)
668 {-# INLINABLE groupBy #-}
670 -- | Improper lens that splits after the first succession of identical 'Char' s
672 => Lens' (Producer Text m r)
673 (Producer Text m (Producer Text m r))
675 {-# INLINABLE group #-}
677 {-| Improper lens that splits a 'Producer' after the first word
679 Unlike 'words', this does not drop leading whitespace
682 => Lens' (Producer Text m r)
683 (Producer Text m (Producer Text m r))
684 word k p0 = fmap join (k (to p0))
687 p' <- p^.span isSpace
689 {-# INLINABLE word #-}
693 => Lens' (Producer Text m r)
694 (Producer Text m (Producer Text m r))
695 line = break (== '\n')
697 {-# INLINABLE line #-}
700 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
702 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
709 Right (txt, p') -> do
710 yield (T.intersperse c txt)
716 Right (txt, p') -> do
717 yield (T.singleton c)
718 yield (T.intersperse c txt)
720 {-# INLINABLE intersperse #-}
724 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
725 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
726 packChars = Data.Profunctor.dimap to (fmap from)
728 -- to :: Monad m => Producer Char m x -> Producer Text m x
729 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
731 step diffAs c = diffAs . (c:)
733 done diffAs = T.pack (diffAs [])
735 -- from :: Monad m => Producer Text m x -> Producer Char m x
736 from p = for p (each . T.unpack)
737 {-# INLINABLE packChars #-}
740 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
742 :: (Monad m, Integral n)
743 => n -> Lens' (Producer Text m r)
744 (FreeT (Producer Text m) m r)
745 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
751 Right (txt, p') -> Free $ do
752 p'' <- (yield txt >> p') ^. splitAt n
753 return $ FreeT (go p'')
754 {-# INLINABLE chunksOf #-}
757 {-| Split a text stream into sub-streams delimited by characters that satisfy the
764 -> FreeT (Producer Text m) m r
765 splitsWith predicate p0 = FreeT (go0 p0)
770 Left r -> return (Pure r)
774 else return $ Free $ do
775 p'' <- (yield txt >> p') ^. span (not . predicate)
776 return $ FreeT (go1 p'')
781 Right (_, p') -> Free $ do
782 p'' <- p' ^. span (not . predicate)
783 return $ FreeT (go1 p'')
784 {-# INLINABLE splitsWith #-}
786 -- | Split a text stream using the given 'Char' as the delimiter
789 -> Lens' (Producer Text m r)
790 (FreeT (Producer Text m) m r)
792 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
793 {-# INLINABLE splits #-}
795 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
796 given equivalence relation
800 => (Char -> Char -> Bool)
801 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
802 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
803 go p = do x <- next p
804 case x of Left r -> return (Pure r)
805 Right (bs, p') -> case T.uncons bs of
807 Just (c, _) -> do return $ Free $ do
808 p'' <- (yield bs >> p')^.span (equals c)
809 return $ FreeT (go p'')
810 {-# INLINABLE groupsBy #-}
813 -- | Like 'groupsBy', where the equality predicate is ('==')
816 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
817 groups = groupsBy (==)
818 {-# INLINABLE groups #-}
822 {-| Split a text stream into 'FreeT'-delimited lines
825 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
826 lines = Data.Profunctor.dimap _lines (fmap _unlines)
828 _lines p0 = FreeT (go0 p0)
833 Left r -> return (Pure r)
837 else return $ Free $ go1 (yield txt >> p')
839 p' <- p ^. break ('\n' ==)
843 Left r -> return $ Pure r
844 Right (_, p'') -> go0 p''
847 -- => FreeT (Producer Text m) m x -> Producer Text m x
848 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
851 {-# INLINABLE lines #-}
854 -- | Split a text stream into 'FreeT'-delimited words
856 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
857 words = Data.Profunctor.dimap go (fmap _unwords)
860 x <- next (p >-> dropWhile isSpace)
863 Right (bs, p') -> Free $ do
864 p'' <- (yield bs >> p') ^. break isSpace
866 _unwords = PG.intercalates (yield $ T.singleton ' ')
868 {-# INLINABLE words #-}
871 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
872 interspersing a text stream in between them
876 => Producer Text m ()
877 -> FreeT (Producer Text m) m r
882 x <- lift (runFreeT f)
889 x <- lift (runFreeT f)
896 {-# INLINABLE intercalate #-}
898 {-| Join 'FreeT'-delimited lines into a text stream
901 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
905 x <- lift (runFreeT f)
910 yield $ T.singleton '\n'
912 {-# INLINABLE unlines #-}
914 {-| Join 'FreeT'-delimited words into a text stream
917 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
918 unwords = intercalate (yield $ T.singleton ' ')
919 {-# INLINABLE unwords #-}
924 @Data.Text@ re-exports the 'Text' type.
926 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.