1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
3 {-| This package provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, and you
5 will generally want @Data.Text@ in scope. But the type @Producer Text m r@ is
6 in many ways the pipes equivalent of lazy @Text@ .
8 This module provides many functions equivalent in one way or another to
9 the 'pure' functions in
10 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
11 They transform, divide, group and fold text streams. The functions
12 in this module are \'pure\' in the sense that they are uniformly monad-independent.
13 Simple IO operations are defined in
14 @Pipes.Text.IO@ -- as lazy IO @Text@ operations are in @Data.Text.Lazy.IO@ Interoperation
15 with @ByteString@ is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
17 The Text type exported by @Data.Text.Lazy@ is similar to '[Text]'
18 where the individual chunks are kept to a reasonable size; the user is not
19 aware of the divisions between the connected (strict) 'Text' chunks.
20 Similarly, functions in this module are designed to operate on streams that
21 are insensitive to text boundaries. This means that they may freely split
22 text into smaller texts, /discard empty texts/. However, the objective is that they should
23 /never concatenate texts/ in order to provide strict upper bounds on memory usage.
25 One difference from @Data.Text.Lazy@ is that many of the operations are 'lensified';
26 this has a number of advantages where it is possible, in particular it facilitate
27 their use with pipes-style 'Parser's of Text.
28 For example, to stream only the first three lines of 'stdin' to 'stdout' you
32 > import qualified Pipes.Text as Text
33 > import qualified Pipes.Parse as Parse
35 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
37 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
39 The above program will never bring more than one chunk of text (~ 32 KB) into
40 memory, no matter how long the lines are.
79 -- * Primitive Character Parsers
115 , module Data.ByteString
117 , module Data.Profunctor
122 import Control.Applicative ((<*))
123 import Control.Monad (liftM, join)
124 import Control.Monad.Trans.State.Strict (StateT(..), modify)
125 import qualified Data.Text as T
126 import Data.Text (Text)
127 import qualified Data.Text.Lazy as TL
128 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
129 import Data.ByteString (ByteString)
130 import Data.Functor.Constant (Constant(Constant, getConstant))
131 import Data.Functor.Identity (Identity)
132 import Data.Profunctor (Profunctor)
133 import qualified Data.Profunctor
135 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
136 import qualified Pipes.Group as PG
137 import qualified Pipes.Parse as PP
138 import Pipes.Parse (Parser)
139 import qualified Pipes.Prelude as P
140 import Data.Char (isSpace)
141 import Data.Word (Word8)
143 import Prelude hiding (
172 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
173 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
174 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
175 {-# INLINE fromLazy #-}
178 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
180 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
182 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
183 a ^. lens = getConstant (lens Constant a)
186 -- | Apply a transformation to each 'Char' in the stream
187 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
188 map f = P.map (T.map f)
189 {-# INLINABLE map #-}
191 {-# RULES "p >-> map f" forall p f .
192 p >-> map f = for p (\txt -> yield (T.map f txt))
195 -- | Map a function over the characters of a text stream and concatenate the results
197 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
198 concatMap f = P.map (T.concatMap f)
199 {-# INLINABLE concatMap #-}
201 {-# RULES "p >-> concatMap f" forall p f .
202 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
206 -- | Transform a Pipe of 'String's into one of 'Text' chunks
207 pack :: Monad m => Pipe String Text m r
209 {-# INLINEABLE pack #-}
211 {-# RULES "p >-> pack" forall p .
212 p >-> pack = for p (\txt -> yield (T.pack txt))
215 -- | Transform a Pipes of 'Text' chunks into one of 'String's
216 unpack :: Monad m => Pipe Text String m r
217 unpack = for cat (\t -> yield (T.unpack t))
218 {-# INLINEABLE unpack #-}
220 {-# RULES "p >-> unpack" forall p .
221 p >-> unpack = for p (\txt -> yield (T.unpack txt))
224 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
225 -- here acting as 'Text' pipes, rather as they would on a lazy text
226 toCaseFold :: Monad m => Pipe Text Text m ()
227 toCaseFold = P.map T.toCaseFold
228 {-# INLINEABLE toCaseFold #-}
230 {-# RULES "p >-> toCaseFold" forall p .
231 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
235 -- | lowercase incoming 'Text'
236 toLower :: Monad m => Pipe Text Text m ()
237 toLower = P.map T.toLower
238 {-# INLINEABLE toLower #-}
240 {-# RULES "p >-> toLower" forall p .
241 p >-> toLower = for p (\txt -> yield (T.toLower txt))
244 -- | uppercase incoming 'Text'
245 toUpper :: Monad m => Pipe Text Text m ()
246 toUpper = P.map T.toUpper
247 {-# INLINEABLE toUpper #-}
249 {-# RULES "p >-> toUpper" forall p .
250 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
253 -- | Remove leading white space from an incoming succession of 'Text's
254 stripStart :: Monad m => Pipe Text Text m r
257 let text = T.stripStart chunk
262 {-# INLINEABLE stripStart #-}
264 -- | @(take n)@ only allows @n@ individual characters to pass;
265 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
266 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
267 take n0 = go n0 where
272 let len = fromIntegral (T.length txt)
274 then yield (T.take (fromIntegral n) txt)
278 {-# INLINABLE take #-}
280 -- | @(drop n)@ drops the first @n@ characters
281 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
282 drop n0 = go n0 where
287 let len = fromIntegral (T.length txt)
290 yield (T.drop (fromIntegral n) txt)
293 {-# INLINABLE drop #-}
295 -- | Take characters until they fail the predicate
296 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
297 takeWhile predicate = go
301 let (prefix, suffix) = T.span predicate txt
307 {-# INLINABLE takeWhile #-}
309 -- | Drop characters until they fail the predicate
310 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
311 dropWhile predicate = go where
314 case T.findIndex (not . predicate) txt of
319 {-# INLINABLE dropWhile #-}
321 -- | Only allows 'Char's to pass if they satisfy the predicate
322 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
323 filter predicate = P.map (T.filter predicate)
324 {-# INLINABLE filter #-}
326 {-# RULES "p >-> filter q" forall p q .
327 p >-> filter q = for p (\txt -> yield (T.filter q txt))
330 -- | Strict left scan over the characters
333 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
335 yield (T.singleton begin)
340 let txt' = T.scanl step c txt
344 {-# INLINABLE scan #-}
346 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
349 toLazy :: Producer Text Identity () -> TL.Text
350 toLazy = TL.fromChunks . P.toList
351 {-# INLINABLE toLazy #-}
353 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
356 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
357 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
358 immediately as they are generated instead of loading them all into memory.
360 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
361 toLazyM = liftM TL.fromChunks . P.toListM
362 {-# INLINABLE toLazyM #-}
364 -- | Reduce the text stream using a strict left fold over characters
367 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
368 foldChars step begin done = P.fold (T.foldl' step) begin done
369 {-# INLINABLE foldChars #-}
371 -- | Retrieve the first 'Char'
372 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
378 Left _ -> return Nothing
379 Right (c, _) -> return (Just c)
380 {-# INLINABLE head #-}
382 -- | Retrieve the last 'Char'
383 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
393 else go (Just $ T.last txt) p'
394 {-# INLINABLE last #-}
396 -- | Determine if the stream is empty
397 null :: (Monad m) => Producer Text m () -> m Bool
399 {-# INLINABLE null #-}
401 -- | Count the number of characters in the stream
402 length :: (Monad m, Num n) => Producer Text m () -> m n
403 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
404 {-# INLINABLE length #-}
406 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
407 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
408 any predicate = P.any (T.any predicate)
409 {-# INLINABLE any #-}
411 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
412 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
413 all predicate = P.all (T.all predicate)
414 {-# INLINABLE all #-}
416 -- | Return the maximum 'Char' within a text stream
417 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
418 maximum = P.fold step Nothing id
423 else Just $ case mc of
424 Nothing -> T.maximum txt
425 Just c -> max c (T.maximum txt)
426 {-# INLINABLE maximum #-}
428 -- | Return the minimum 'Char' within a text stream (surely very useful!)
429 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
430 minimum = P.fold step Nothing id
436 Nothing -> Just (T.minimum txt)
437 Just c -> Just (min c (T.minimum txt))
438 {-# INLINABLE minimum #-}
440 -- | Find the first element in the stream that matches the predicate
443 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
444 find predicate p = head (p >-> filter predicate)
445 {-# INLINABLE find #-}
447 -- | Index into a text stream
449 :: (Monad m, Integral a)
450 => a-> Producer Text m () -> m (Maybe Char)
451 index n p = head (p >-> drop n)
452 {-# INLINABLE index #-}
455 -- | Store a tally of how many segments match the given 'Text'
456 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
457 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
458 {-# INLINABLE count #-}
461 -- | Consume the first character from a stream of 'Text'
463 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
464 -- succeeds with a 'Right' providing the next character and the remainder of the
470 -> m (Either r (Char, Producer Text m r))
476 Left r -> return (Left r)
477 Right (txt, p') -> case (T.uncons txt) of
479 Just (c, txt') -> return (Right (c, yield txt' >> p'))
480 {-# INLINABLE nextChar #-}
482 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
484 drawChar :: (Monad m) => Parser Text m (Maybe Char)
488 Nothing -> return Nothing
489 Just txt -> case (T.uncons txt) of
494 {-# INLINABLE drawChar #-}
496 -- | Push back a 'Char' onto the underlying 'Producer'
497 unDrawChar :: (Monad m) => Char -> Parser Text m ()
498 unDrawChar c = modify (yield (T.singleton c) >>)
499 {-# INLINABLE unDrawChar #-}
501 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
507 > Left _ -> return ()
508 > Right c -> unDrawChar c
513 peekChar :: (Monad m) => Parser Text m (Maybe Char)
518 Just c -> unDrawChar c
520 {-# INLINABLE peekChar #-}
522 {-| Check if the underlying 'Producer' has no more characters
524 Note that this will skip over empty 'Text' chunks, unlike
525 'PP.isEndOfInput' from @pipes-parse@, which would consider
526 an empty 'Text' a valid bit of input.
528 > isEndOfChars = liftM isLeft peekChar
530 isEndOfChars :: (Monad m) => Parser Text m Bool
536 {-# INLINABLE isEndOfChars #-}
539 -- | Splits a 'Producer' after the given number of characters
541 :: (Monad m, Integral n)
543 -> Lens' (Producer Text m r)
544 (Producer Text m (Producer Text m r))
545 splitAt n0 k p0 = fmap join (k (go n0 p0))
551 Left r -> return (return r)
552 Right (txt, p') -> do
553 let len = fromIntegral (T.length txt)
559 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
561 return (yield suffix >> p')
562 {-# INLINABLE splitAt #-}
565 -- | Split a text stream in two, producing the longest
566 -- consecutive group of characters that satisfies the predicate
567 -- and returning the rest
572 -> Lens' (Producer Text m r)
573 (Producer Text m (Producer Text m r))
574 span predicate k p0 = fmap join (k (go p0))
579 Left r -> return (return r)
580 Right (txt, p') -> do
581 let (prefix, suffix) = T.span predicate txt
588 return (yield suffix >> p')
589 {-# INLINABLE span #-}
591 {-| Split a text stream in two, producing the longest
592 consecutive group of characters that don't satisfy the predicate
597 -> Lens' (Producer Text m r)
598 (Producer Text m (Producer Text m r))
599 break predicate = span (not . predicate)
600 {-# INLINABLE break #-}
602 {-| Improper lens that splits after the first group of equivalent Chars, as
603 defined by the given equivalence relation
607 => (Char -> Char -> Bool)
608 -> Lens' (Producer Text m r)
609 (Producer Text m (Producer Text m r))
610 groupBy equals k p0 = fmap join (k ((go p0))) where
614 Left r -> return (return r)
615 Right (txt, p') -> case T.uncons txt of
617 Just (c, _) -> (yield txt >> p') ^. span (equals c)
618 {-# INLINABLE groupBy #-}
620 -- | Improper lens that splits after the first succession of identical 'Char' s
622 => Lens' (Producer Text m r)
623 (Producer Text m (Producer Text m r))
625 {-# INLINABLE group #-}
627 {-| Improper lens that splits a 'Producer' after the first word
629 Unlike 'words', this does not drop leading whitespace
632 => Lens' (Producer Text m r)
633 (Producer Text m (Producer Text m r))
634 word k p0 = fmap join (k (to p0))
637 p' <- p^.span isSpace
639 {-# INLINABLE word #-}
643 => Lens' (Producer Text m r)
644 (Producer Text m (Producer Text m r))
645 line = break (== '\n')
647 {-# INLINABLE line #-}
650 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
652 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
659 Right (txt, p') -> do
660 yield (T.intersperse c txt)
666 Right (txt, p') -> do
667 yield (T.singleton c)
668 yield (T.intersperse c txt)
670 {-# INLINABLE intersperse #-}
674 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
675 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
676 packChars = Data.Profunctor.dimap to (fmap from)
678 -- to :: Monad m => Producer Char m x -> Producer Text m x
679 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
681 step diffAs c = diffAs . (c:)
683 done diffAs = T.pack (diffAs [])
685 -- from :: Monad m => Producer Text m x -> Producer Char m x
686 from p = for p (each . T.unpack)
687 {-# INLINABLE packChars #-}
690 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
692 :: (Monad m, Integral n)
693 => n -> Lens' (Producer Text m r)
694 (FreeT (Producer Text m) m r)
695 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
701 Right (txt, p') -> Free $ do
702 p'' <- (yield txt >> p') ^. splitAt n
703 return $ FreeT (go p'')
704 {-# INLINABLE chunksOf #-}
707 {-| Split a text stream into sub-streams delimited by characters that satisfy the
714 -> FreeT (Producer Text m) m r
715 splitsWith predicate p0 = FreeT (go0 p0)
720 Left r -> return (Pure r)
724 else return $ Free $ do
725 p'' <- (yield txt >> p') ^. span (not . predicate)
726 return $ FreeT (go1 p'')
731 Right (_, p') -> Free $ do
732 p'' <- p' ^. span (not . predicate)
733 return $ FreeT (go1 p'')
734 {-# INLINABLE splitsWith #-}
736 -- | Split a text stream using the given 'Char' as the delimiter
739 -> Lens' (Producer Text m r)
740 (FreeT (Producer Text m) m r)
742 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
743 {-# INLINABLE splits #-}
745 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
746 given equivalence relation
750 => (Char -> Char -> Bool)
751 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
752 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
753 go p = do x <- next p
754 case x of Left r -> return (Pure r)
755 Right (bs, p') -> case T.uncons bs of
757 Just (c, _) -> do return $ Free $ do
758 p'' <- (yield bs >> p')^.span (equals c)
759 return $ FreeT (go p'')
760 {-# INLINABLE groupsBy #-}
763 -- | Like 'groupsBy', where the equality predicate is ('==')
766 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
767 groups = groupsBy (==)
768 {-# INLINABLE groups #-}
772 {-| Split a text stream into 'FreeT'-delimited lines
775 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
776 lines = Data.Profunctor.dimap _lines (fmap _unlines)
778 _lines p0 = FreeT (go0 p0)
783 Left r -> return (Pure r)
787 else return $ Free $ go1 (yield txt >> p')
789 p' <- p ^. break ('\n' ==)
793 Left r -> return $ Pure r
794 Right (_, p'') -> go0 p''
797 -- => FreeT (Producer Text m) m x -> Producer Text m x
798 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
801 {-# INLINABLE lines #-}
804 -- | Split a text stream into 'FreeT'-delimited words
806 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
807 words = Data.Profunctor.dimap go (fmap _unwords)
810 x <- next (p >-> dropWhile isSpace)
813 Right (bs, p') -> Free $ do
814 p'' <- (yield bs >> p') ^. break isSpace
816 _unwords = PG.intercalates (yield $ T.singleton ' ')
818 {-# INLINABLE words #-}
821 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
822 interspersing a text stream in between them
826 => Producer Text m ()
827 -> FreeT (Producer Text m) m r
832 x <- lift (runFreeT f)
839 x <- lift (runFreeT f)
846 {-# INLINABLE intercalate #-}
848 {-| Join 'FreeT'-delimited lines into a text stream
851 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
855 x <- lift (runFreeT f)
860 yield $ T.singleton '\n'
862 {-# INLINABLE unlines #-}
864 {-| Join 'FreeT'-delimited words into a text stream
867 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
868 unwords = intercalate (yield $ T.singleton ' ')
869 {-# INLINABLE unwords #-}
874 @Data.Text@ re-exports the 'Text' type.
876 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.