1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
10 example, the following program copies a document from one file to another:
13 > import qualified Pipes.Text as Text
14 > import qualified Pipes.Text.IO as Text
18 > withFile "inFile.txt" ReadMode $ \hIn ->
19 > withFile "outFile.txt" WriteMode $ \hOut ->
20 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
25 > import qualified Pipes.Text as Text
26 > import qualified Pipes.Text.IO as Text
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' pipes, as with the following \"echo\" program:
34 > main = runEffect $ Text.stdin >-> Text.stdout
36 You can also translate pure lazy 'TL.Text's to and from pipes:
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
108 -- * Primitive Character Parsers
125 -- -- * Decoding Lenses
136 -- -- * Other Decoding/Encoding Functions
162 -- , DecodeResult(..)
164 -- , TextException(..)
165 , module Data.ByteString
167 , module Data.Profunctor
173 import Control.Applicative ((<*))
174 import Control.Monad (liftM, unless, join)
175 import Control.Monad.Trans.State.Strict (StateT(..), modify)
176 import Data.Monoid ((<>))
177 import qualified Data.Text as T
178 import qualified Data.Text.IO as T
179 import qualified Data.Text.Encoding as TE
180 import qualified Data.Text.Encoding.Error as TE
181 import Data.Text (Text)
182 import qualified Data.Text.Lazy as TL
183 import qualified Data.Text.Lazy.IO as TL
184 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
185 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
186 import Data.ByteString (ByteString)
187 import qualified Data.ByteString as B
188 import qualified Data.ByteString.Char8 as B8
189 import Data.Char (ord, isSpace)
190 import Data.Functor.Constant (Constant(Constant, getConstant))
191 import Data.Functor.Identity (Identity)
192 import Data.Profunctor (Profunctor)
193 import qualified Data.Profunctor
194 import qualified Data.List as List
196 import qualified Pipes.ByteString as PB
197 -- import Pipes.Text.Decoding
198 import Pipes.Core (respond, Server')
199 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
200 import qualified Pipes.Group as PG
201 import qualified Pipes.Parse as PP
202 import Pipes.Parse (Parser)
204 import qualified Pipes.Prelude as P
205 import qualified System.IO as IO
206 import Data.Char (isSpace)
207 import Data.Word (Word8)
208 import Data.Text.StreamDecoding
210 import Prelude hiding (
239 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
240 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
241 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
242 {-# INLINE fromLazy #-}
245 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
247 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
249 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
250 a ^. lens = getConstant (lens Constant a)
253 -- | Apply a transformation to each 'Char' in the stream
254 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
255 map f = P.map (T.map f)
256 {-# INLINABLE map #-}
258 {-# RULES "p >-> map f" forall p f .
259 p >-> map f = for p (\txt -> yield (T.map f txt))
262 -- | Map a function over the characters of a text stream and concatenate the results
264 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
265 concatMap f = P.map (T.concatMap f)
266 {-# INLINABLE concatMap #-}
268 {-# RULES "p >-> concatMap f" forall p f .
269 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
272 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
273 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
274 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
275 encodeUtf8 :: Monad m => Pipe Text ByteString m r
276 encodeUtf8 = P.map TE.encodeUtf8
277 {-# INLINEABLE encodeUtf8 #-}
279 {-# RULES "p >-> encodeUtf8" forall p .
280 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
283 -- | Transform a Pipe of 'String's into one of 'Text' chunks
284 pack :: Monad m => Pipe String Text m r
286 {-# INLINEABLE pack #-}
288 {-# RULES "p >-> pack" forall p .
289 p >-> pack = for p (\txt -> yield (T.pack txt))
292 -- | Transform a Pipes of 'Text' chunks into one of 'String's
293 unpack :: Monad m => Pipe Text String m r
294 unpack = for cat (\t -> yield (T.unpack t))
295 {-# INLINEABLE unpack #-}
297 {-# RULES "p >-> unpack" forall p .
298 p >-> unpack = for p (\txt -> yield (T.unpack txt))
301 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
302 -- here acting as 'Text' pipes, rather as they would on a lazy text
303 toCaseFold :: Monad m => Pipe Text Text m ()
304 toCaseFold = P.map T.toCaseFold
305 {-# INLINEABLE toCaseFold #-}
307 {-# RULES "p >-> toCaseFold" forall p .
308 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
312 -- | lowercase incoming 'Text'
313 toLower :: Monad m => Pipe Text Text m ()
314 toLower = P.map T.toLower
315 {-# INLINEABLE toLower #-}
317 {-# RULES "p >-> toLower" forall p .
318 p >-> toLower = for p (\txt -> yield (T.toLower txt))
321 -- | uppercase incoming 'Text'
322 toUpper :: Monad m => Pipe Text Text m ()
323 toUpper = P.map T.toUpper
324 {-# INLINEABLE toUpper #-}
326 {-# RULES "p >-> toUpper" forall p .
327 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
330 -- | Remove leading white space from an incoming succession of 'Text's
331 stripStart :: Monad m => Pipe Text Text m r
334 let text = T.stripStart chunk
339 {-# INLINEABLE stripStart #-}
341 -- | @(take n)@ only allows @n@ individual characters to pass;
342 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
343 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
344 take n0 = go n0 where
349 let len = fromIntegral (T.length txt)
351 then yield (T.take (fromIntegral n) txt)
355 {-# INLINABLE take #-}
357 -- | @(drop n)@ drops the first @n@ characters
358 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
359 drop n0 = go n0 where
364 let len = fromIntegral (T.length txt)
367 yield (T.drop (fromIntegral n) txt)
370 {-# INLINABLE drop #-}
372 -- | Take characters until they fail the predicate
373 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
374 takeWhile predicate = go
378 let (prefix, suffix) = T.span predicate txt
384 {-# INLINABLE takeWhile #-}
386 -- | Drop characters until they fail the predicate
387 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
388 dropWhile predicate = go where
391 case T.findIndex (not . predicate) txt of
396 {-# INLINABLE dropWhile #-}
398 -- | Only allows 'Char's to pass if they satisfy the predicate
399 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
400 filter predicate = P.map (T.filter predicate)
401 {-# INLINABLE filter #-}
403 {-# RULES "p >-> filter q" forall p q .
404 p >-> filter q = for p (\txt -> yield (T.filter q txt))
407 -- | Strict left scan over the characters
410 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
412 yield (T.singleton begin)
417 let txt' = T.scanl step c txt
421 {-# INLINABLE scan #-}
423 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
426 toLazy :: Producer Text Identity () -> TL.Text
427 toLazy = TL.fromChunks . P.toList
428 {-# INLINABLE toLazy #-}
430 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
433 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
434 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
435 immediately as they are generated instead of loading them all into memory.
437 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
438 toLazyM = liftM TL.fromChunks . P.toListM
439 {-# INLINABLE toLazyM #-}
441 -- | Reduce the text stream using a strict left fold over characters
444 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
445 foldChars step begin done = P.fold (T.foldl' step) begin done
446 {-# INLINABLE foldChars #-}
448 -- | Retrieve the first 'Char'
449 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
455 Left _ -> return Nothing
456 Right (c, _) -> return (Just c)
457 {-# INLINABLE head #-}
459 -- | Retrieve the last 'Char'
460 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
470 else go (Just $ T.last txt) p'
471 {-# INLINABLE last #-}
473 -- | Determine if the stream is empty
474 null :: (Monad m) => Producer Text m () -> m Bool
476 {-# INLINABLE null #-}
478 -- | Count the number of characters in the stream
479 length :: (Monad m, Num n) => Producer Text m () -> m n
480 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
481 {-# INLINABLE length #-}
483 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
484 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
485 any predicate = P.any (T.any predicate)
486 {-# INLINABLE any #-}
488 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
489 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
490 all predicate = P.all (T.all predicate)
491 {-# INLINABLE all #-}
493 -- | Return the maximum 'Char' within a text stream
494 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
495 maximum = P.fold step Nothing id
500 else Just $ case mc of
501 Nothing -> T.maximum txt
502 Just c -> max c (T.maximum txt)
503 {-# INLINABLE maximum #-}
505 -- | Return the minimum 'Char' within a text stream (surely very useful!)
506 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
507 minimum = P.fold step Nothing id
513 Nothing -> Just (T.minimum txt)
514 Just c -> Just (min c (T.minimum txt))
515 {-# INLINABLE minimum #-}
518 -- | Find the first element in the stream that matches the predicate
521 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
522 find predicate p = head (p >-> filter predicate)
523 {-# INLINABLE find #-}
525 -- | Index into a text stream
527 :: (Monad m, Integral a)
528 => a-> Producer Text m () -> m (Maybe Char)
529 index n p = head (p >-> drop n)
530 {-# INLINABLE index #-}
533 -- | Store a tally of how many segments match the given 'Text'
534 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
535 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
536 {-# INLINABLE count #-}
539 {-| Consume the first character from a stream of 'Text'
541 'next' either fails with a 'Left' if the 'Producer' has no more characters or
542 succeeds with a 'Right' providing the next character and the remainder of the
548 -> m (Either r (Char, Producer Text m r))
554 Left r -> return (Left r)
555 Right (txt, p') -> case (T.uncons txt) of
557 Just (c, txt') -> return (Right (c, yield txt' >> p'))
558 {-# INLINABLE nextChar #-}
560 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
563 drawChar :: (Monad m) => Parser Text m (Maybe Char)
567 Nothing -> return Nothing
568 Just txt -> case (T.uncons txt) of
573 {-# INLINABLE drawChar #-}
575 -- | Push back a 'Char' onto the underlying 'Producer'
576 unDrawChar :: (Monad m) => Char -> Parser Text m ()
577 unDrawChar c = modify (yield (T.singleton c) >>)
578 {-# INLINABLE unDrawChar #-}
580 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
586 > Left _ -> return ()
587 > Right c -> unDrawChar c
590 peekChar :: (Monad m) => Parser Text m (Maybe Char)
595 Just c -> unDrawChar c
597 {-# INLINABLE peekChar #-}
599 {-| Check if the underlying 'Producer' has no more characters
601 Note that this will skip over empty 'Text' chunks, unlike
602 'PP.isEndOfInput' from @pipes-parse@, which would consider
603 an empty 'Text' a valid bit of input.
605 > isEndOfChars = liftM isLeft peekChar
607 isEndOfChars :: (Monad m) => Parser Text m Bool
613 {-# INLINABLE isEndOfChars #-}
618 -- | Splits a 'Producer' after the given number of characters
620 :: (Monad m, Integral n)
622 -> Lens' (Producer Text m r)
623 (Producer Text m (Producer Text m r))
624 splitAt n0 k p0 = fmap join (k (go n0 p0))
630 Left r -> return (return r)
631 Right (txt, p') -> do
632 let len = fromIntegral (T.length txt)
638 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
640 return (yield suffix >> p')
641 {-# INLINABLE splitAt #-}
644 {-| Split a text stream in two, where the first text stream is the longest
645 consecutive group of text that satisfy the predicate
650 -> Lens' (Producer Text m r)
651 (Producer Text m (Producer Text m r))
652 span predicate k p0 = fmap join (k (go p0))
657 Left r -> return (return r)
658 Right (txt, p') -> do
659 let (prefix, suffix) = T.span predicate txt
666 return (yield suffix >> p')
667 {-# INLINABLE span #-}
669 {-| Split a text stream in two, where the first text stream is the longest
670 consecutive group of characters that don't satisfy the predicate
675 -> Lens' (Producer Text m r)
676 (Producer Text m (Producer Text m r))
677 break predicate = span (not . predicate)
678 {-# INLINABLE break #-}
680 {-| Improper lens that splits after the first group of equivalent Chars, as
681 defined by the given equivalence relation
685 => (Char -> Char -> Bool)
686 -> Lens' (Producer Text m r)
687 (Producer Text m (Producer Text m r))
688 groupBy equals k p0 = fmap join (k ((go p0))) where
692 Left r -> return (return r)
693 Right (txt, p') -> case T.uncons txt of
695 Just (c, _) -> (yield txt >> p') ^. span (equals c)
696 {-# INLINABLE groupBy #-}
698 -- | Improper lens that splits after the first succession of identical 'Char' s
700 => Lens' (Producer Text m r)
701 (Producer Text m (Producer Text m r))
703 {-# INLINABLE group #-}
705 {-| Improper lens that splits a 'Producer' after the first word
707 Unlike 'words', this does not drop leading whitespace
710 => Lens' (Producer Text m r)
711 (Producer Text m (Producer Text m r))
712 word k p0 = fmap join (k (to p0))
715 p' <- p^.span isSpace
717 {-# INLINABLE word #-}
721 => Lens' (Producer Text m r)
722 (Producer Text m (Producer Text m r))
723 line = break (== '\n')
725 {-# INLINABLE line #-}
728 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
730 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
737 Right (txt, p') -> do
738 yield (T.intersperse c txt)
744 Right (txt, p') -> do
745 yield (T.singleton c)
746 yield (T.intersperse c txt)
748 {-# INLINABLE intersperse #-}
752 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
753 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
754 packChars = Data.Profunctor.dimap to (fmap from)
756 -- to :: Monad m => Producer Char m x -> Producer Text m x
757 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
759 step diffAs c = diffAs . (c:)
761 done diffAs = T.pack (diffAs [])
763 -- from :: Monad m => Producer Text m x -> Producer Char m x
764 from p = for p (each . T.unpack)
765 {-# INLINABLE packChars #-}
768 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
770 :: (Monad m, Integral n)
771 => n -> Lens' (Producer Text m r)
772 (FreeT (Producer Text m) m r)
773 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
779 Right (txt, p') -> Free $ do
780 p'' <- (yield txt >> p') ^. splitAt n
781 return $ FreeT (go p'')
782 {-# INLINABLE chunksOf #-}
785 {-| Split a text stream into sub-streams delimited by characters that satisfy the
792 -> FreeT (Producer Text m) m r
793 splitsWith predicate p0 = FreeT (go0 p0)
798 Left r -> return (Pure r)
802 else return $ Free $ do
803 p'' <- (yield txt >> p') ^. span (not . predicate)
804 return $ FreeT (go1 p'')
809 Right (_, p') -> Free $ do
810 p'' <- p' ^. span (not . predicate)
811 return $ FreeT (go1 p'')
812 {-# INLINABLE splitsWith #-}
814 -- | Split a text stream using the given 'Char' as the delimiter
817 -> Lens' (Producer Text m r)
818 (FreeT (Producer Text m) m r)
820 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
821 {-# INLINABLE splits #-}
823 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
824 given equivalence relation
828 => (Char -> Char -> Bool)
829 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
830 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
831 go p = do x <- next p
832 case x of Left r -> return (Pure r)
833 Right (bs, p') -> case T.uncons bs of
835 Just (c, _) -> do return $ Free $ do
836 p'' <- (yield bs >> p')^.span (equals c)
837 return $ FreeT (go p'')
838 {-# INLINABLE groupsBy #-}
841 -- | Like 'groupsBy', where the equality predicate is ('==')
844 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
845 groups = groupsBy (==)
846 {-# INLINABLE groups #-}
850 {-| Split a text stream into 'FreeT'-delimited lines
853 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
854 lines = Data.Profunctor.dimap _lines (fmap _unlines)
856 _lines p0 = FreeT (go0 p0)
861 Left r -> return (Pure r)
865 else return $ Free $ go1 (yield txt >> p')
867 p' <- p ^. break ('\n' ==)
871 Left r -> return $ Pure r
872 Right (_, p'') -> go0 p''
875 -- => FreeT (Producer Text m) m x -> Producer Text m x
876 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
879 {-# INLINABLE lines #-}
882 -- | Split a text stream into 'FreeT'-delimited words
884 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
885 words = Data.Profunctor.dimap go (fmap _unwords)
888 x <- next (p >-> dropWhile isSpace)
891 Right (bs, p') -> Free $ do
892 p'' <- (yield bs >> p') ^. break isSpace
894 _unwords = PG.intercalates (yield $ T.singleton ' ')
896 {-# INLINABLE words #-}
899 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
900 interspersing a text stream in between them
904 => Producer Text m ()
905 -> FreeT (Producer Text m) m r
910 x <- lift (runFreeT f)
917 x <- lift (runFreeT f)
924 {-# INLINABLE intercalate #-}
926 {-| Join 'FreeT'-delimited lines into a text stream
929 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
933 x <- lift (runFreeT f)
938 yield $ T.singleton '\n'
940 {-# INLINABLE unlines #-}
942 {-| Join 'FreeT'-delimited words into a text stream
945 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
946 unwords = intercalate (yield $ T.singleton ' ')
947 {-# INLINABLE unwords #-}
950 The following parsing utilities are single-character analogs of the ones found
956 @Data.Text@ re-exports the 'Text' type.
958 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.