1 {-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
9 example, the following program copies a document from one file to another:
12 > import qualified Data.Text.Pipes as Text
16 > withFile "inFile.txt" ReadMode $ \hIn ->
17 > withFile "outFile.txt" WriteMode $ \hOut ->
18 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
20 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
23 > import qualified Data.Text.Pipes as Text
26 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
29 and 'stdout' proxies, as with the following \"echo\" program:
31 > main = runEffect $ Text.stdin >-> Text.stdout
33 You can also translate pure lazy 'TL.Text's to and from proxies:
35 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
37 In addition, this module provides many functions equivalent to lazy
38 'Text' functions so that you can transform or fold text streams. For
39 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 > import qualified Pipes.Text as Text
44 > import qualified Pipes.Parse as Parse
46 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
48 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
50 The above program will never bring more than one chunk of text (~ 32 KB) into
51 memory, no matter how long the lines are.
53 Note that functions in this library are designed to operate on streams that
54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
84 #if MIN_VERSION_text(0,11,4)
122 #if MIN_VERSION_text(0,11,4)
134 -- * Character Parsers
148 import Control.Exception (throwIO, try)
149 import Control.Monad (liftM, unless)
150 import Control.Monad.Trans.State.Strict (StateT(..))
151 import qualified Data.Text as T
152 import qualified Data.Text.IO as T
153 import qualified Data.Text.Encoding as TE
154 import qualified Data.Text.Encoding.Error as TE
155 import Data.Text (Text)
156 import qualified Data.Text.Lazy as TL
157 import qualified Data.Text.Lazy.IO as TL
158 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
159 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
160 import Data.ByteString (ByteString)
161 import qualified Data.ByteString as B
162 import Data.Char (ord, isSpace)
163 import Data.Functor.Identity (Identity)
164 import qualified Data.List as List
165 import Foreign.C.Error (Errno(Errno), ePIPE)
166 import qualified GHC.IO.Exception as G
168 import qualified Pipes.ByteString as PB
169 import qualified Pipes.ByteString.Parse as PBP
170 import Pipes.Text.Parse (
171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
172 import Pipes.Core (respond, Server')
173 import qualified Pipes.Parse as PP
174 import Pipes.Parse (input, concat, FreeT)
175 import qualified Pipes.Safe.Prelude as Safe
176 import qualified Pipes.Safe as Safe
177 import Pipes.Safe (MonadSafe(..), Base(..))
178 import qualified Pipes.Prelude as P
179 import qualified System.IO as IO
180 import Data.Char (isSpace)
181 import Data.Word (Word8)
182 import Prelude hiding (
211 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
212 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
213 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
214 {-# INLINABLE fromLazy #-}
216 -- | Stream text from 'stdin'
217 stdin :: MonadIO m => Producer' Text m ()
218 stdin = fromHandle IO.stdin
219 {-# INLINABLE stdin #-}
221 {-| Convert a 'IO.Handle' into a text stream using a text size
222 determined by the good sense of the text library.
226 fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
227 #if MIN_VERSION_text(0,11,4)
228 fromHandle h = PB.fromHandle h >-> pipeDecodeUtf8
229 {-# INLINABLE fromHandle#-}
230 -- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as
231 -- the dedicated Text IO function 'hGetChunk' ;
232 -- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut"
233 -- runs the same as the conduit equivalent, only slightly slower
234 -- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut"
237 fromHandle h = go where
238 go = do txt <- liftIO (T.hGetChunk h)
239 unless (T.null txt) $ do yield txt
241 {-# INLINABLE fromHandle#-}
243 {-| Stream text from a file using Pipes.Safe
245 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
246 MAIN = PUTSTRLN "HELLO WORLD"
249 readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
250 readFile file = Safe.withFile file IO.ReadMode fromHandle
251 {-# INLINABLE readFile #-}
253 {-| Stream lines of text from stdin (for testing in ghci etc.)
255 >>> let safely = runSafeT . runEffect
256 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
263 stdinLn :: MonadIO m => Producer' Text m ()
266 eof <- liftIO (IO.hIsEOF IO.stdin)
268 txt <- liftIO (T.hGetLine IO.stdin)
273 {-| Stream text to 'stdout'
275 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
277 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
278 @(source >-> stdout)@ in suitable cases.
280 stdout :: MonadIO m => Consumer' Text m ()
285 x <- liftIO $ try (T.putStr txt)
287 Left (G.IOError { G.ioe_type = G.ResourceVanished
288 , G.ioe_errno = Just ioe })
291 Left e -> liftIO (throwIO e)
293 {-# INLINABLE stdout #-}
295 stdoutLn :: (MonadIO m) => Consumer' Text m ()
300 x <- liftIO $ try (T.putStrLn str)
302 Left (G.IOError { G.ioe_type = G.ResourceVanished
303 , G.ioe_errno = Just ioe })
306 Left e -> liftIO (throwIO e)
308 {-# INLINABLE stdoutLn #-}
310 {-| Convert a text stream into a 'Handle'
312 Note: again, for best performance, where possible use
313 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
315 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
316 toHandle h = for cat (liftIO . T.hPutStr h)
317 {-# INLINABLE toHandle #-}
319 {-# RULES "p >-> toHandle h" forall p h .
320 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
324 -- | Stream text into a file. Uses @pipes-safe@.
325 writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
326 writeFile file = Safe.withFile file IO.WriteMode toHandle
328 -- | Apply a transformation to each 'Char' in the stream
329 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
330 map f = P.map (T.map f)
331 {-# INLINABLE map #-}
333 {-# RULES "p >-> map f" forall p f .
334 p >-> map f = for p (\txt -> yield (T.map f txt))
337 -- | Map a function over the characters of a text stream and concatenate the results
339 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
340 concatMap f = P.map (T.concatMap f)
341 {-# INLINABLE concatMap #-}
343 {-# RULES "p >-> concatMap f" forall p f .
344 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
347 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
348 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
349 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
350 encodeUtf8 :: Monad m => Pipe Text ByteString m r
351 encodeUtf8 = P.map TE.encodeUtf8
352 {-# INLINEABLE encodeUtf8 #-}
354 {-# RULES "p >-> encodeUtf8" forall p .
355 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
358 -- | Transform a Pipe of 'String's into one of 'Text' chunks
359 pack :: Monad m => Pipe String Text m r
361 {-# INLINEABLE pack #-}
363 {-# RULES "p >-> pack" forall p .
364 p >-> pack = for p (\txt -> yield (T.pack txt))
367 -- | Transform a Pipes of 'Text' chunks into one of 'String's
368 unpack :: Monad m => Pipe Text String m r
369 unpack = for cat (\t -> yield (T.unpack t))
370 {-# INLINEABLE unpack #-}
372 {-# RULES "p >-> unpack" forall p .
373 p >-> unpack = for p (\txt -> yield (T.unpack txt))
376 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
377 -- here acting on a 'Text' pipe, rather as they would on a lazy text
378 toCaseFold :: Monad m => Pipe Text Text m ()
379 toCaseFold = P.map T.toCaseFold
380 {-# INLINEABLE toCaseFold #-}
382 {-# RULES "p >-> toCaseFold" forall p .
383 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
387 -- | lowercase incoming 'Text'
388 toLower :: Monad m => Pipe Text Text m ()
389 toLower = P.map T.toLower
390 {-# INLINEABLE toLower #-}
392 {-# RULES "p >-> toLower" forall p .
393 p >-> toLower = for p (\txt -> yield (T.toLower txt))
396 -- | uppercase incoming 'Text'
397 toUpper :: Monad m => Pipe Text Text m ()
398 toUpper = P.map T.toUpper
399 {-# INLINEABLE toUpper #-}
401 {-# RULES "p >-> toUpper" forall p .
402 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
405 -- | Remove leading white space from an incoming succession of 'Text's
406 stripStart :: Monad m => Pipe Text Text m r
409 let text = T.stripStart chunk
413 {-# INLINEABLE stripStart #-}
415 -- | @(take n)@ only allows @n@ individual characters to pass;
416 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
417 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
418 take n0 = go n0 where
423 let len = fromIntegral (T.length txt)
425 then yield (T.take (fromIntegral n) txt)
429 {-# INLINABLE take #-}
431 -- | @(drop n)@ drops the first @n@ characters
432 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
433 drop n0 = go n0 where
438 let len = fromIntegral (T.length txt)
441 yield (T.drop (fromIntegral n) txt)
444 {-# INLINABLE drop #-}
446 -- | Take characters until they fail the predicate
447 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
448 takeWhile predicate = go
452 let (prefix, suffix) = T.span predicate txt
458 {-# INLINABLE takeWhile #-}
460 -- | Drop characters until they fail the predicate
461 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
462 dropWhile predicate = go where
465 case T.findIndex (not . predicate) txt of
470 {-# INLINABLE dropWhile #-}
472 -- | Only allows 'Char's to pass if they satisfy the predicate
473 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
474 filter predicate = P.map (T.filter predicate)
475 {-# INLINABLE filter #-}
477 {-# RULES "p >-> filter q" forall p q .
478 p >-> filter q = for p (\txt -> yield (T.filter q txt))
481 -- | Strict left scan over the characters
484 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
485 scan step begin = go begin
489 let txt' = T.scanl step c txt
493 {-# INLINABLE scan #-}
495 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
498 toLazy :: Producer Text Identity () -> TL.Text
499 toLazy = TL.fromChunks . P.toList
500 {-# INLINABLE toLazy #-}
502 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
505 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
506 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
507 immediately as they are generated instead of loading them all into memory.
509 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
510 toLazyM = liftM TL.fromChunks . P.toListM
511 {-# INLINABLE toLazyM #-}
513 -- | Reduce the text stream using a strict left fold over characters
516 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
517 fold step begin done = P.fold (T.foldl' step) begin done
518 {-# INLINABLE fold #-}
520 -- | Retrieve the first 'Char'
521 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
527 Left _ -> return Nothing
528 Right (c, _) -> return (Just c)
529 {-# INLINABLE head #-}
531 -- | Retrieve the last 'Char'
532 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
542 else go (Just $ T.last txt) p'
543 {-# INLINABLE last #-}
545 -- | Determine if the stream is empty
546 null :: (Monad m) => Producer Text m () -> m Bool
548 {-# INLINABLE null #-}
550 -- | Count the number of characters in the stream
551 length :: (Monad m, Num n) => Producer Text m () -> m n
552 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
553 {-# INLINABLE length #-}
555 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
556 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
557 any predicate = P.any (T.any predicate)
558 {-# INLINABLE any #-}
560 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
561 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
562 all predicate = P.all (T.all predicate)
563 {-# INLINABLE all #-}
565 -- | Return the maximum 'Char' within a text stream
566 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
567 maximum = P.fold step Nothing id
572 else Just $ case mc of
573 Nothing -> T.maximum txt
574 Just c -> max c (T.maximum txt)
575 {-# INLINABLE maximum #-}
577 -- | Return the minimum 'Char' within a text stream (surely very useful!)
578 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
579 minimum = P.fold step Nothing id
585 Nothing -> Just (T.minimum txt)
586 Just c -> Just (min c (T.minimum txt))
587 {-# INLINABLE minimum #-}
589 -- | Find the first element in the stream that matches the predicate
592 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
593 find predicate p = head (p >-> filter predicate)
594 {-# INLINABLE find #-}
596 -- | Index into a text stream
598 :: (Monad m, Integral a)
599 => a-> Producer Text m () -> m (Maybe Char)
600 index n p = head (p >-> drop n)
601 {-# INLINABLE index #-}
604 -- | Store a tally of how many segments match the given 'Text'
605 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
606 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
607 {-# INLINABLE count #-}
609 #if MIN_VERSION_text(0,11,4)
610 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
611 -- into a Pipe of Text
614 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
615 decodeUtf8 = go TE.streamDecodeUtf8
619 Left r -> return (return r)
620 Right (chunk, p') -> do
621 let TE.Some text l dec' = dec chunk
629 {-# INLINEABLE decodeUtf8 #-}
631 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
632 -- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
633 -- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
637 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
638 decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
642 Left r -> return (return r)
643 Right (chunk, p') -> do
644 let TE.Some text l dec' = dec chunk
652 {-# INLINEABLE decodeUtf8With #-}
654 -- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise
655 -- with any chunk that contains a sequence of bytes that is unreadable. Otherwise
656 -- only few bytes will only be moved from one chunk to the next before decoding.
657 pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r
658 pipeDecodeUtf8 = go TE.streamDecodeUtf8
659 where go dec = do chunk <- await
661 TE.Some text l dec' -> do yield text
663 {-# INLINEABLE pipeDecodeUtf8 #-}
665 -- | A simple pipe from 'ByteString' to 'Text' using a replacement function.
669 -> Pipe ByteString Text m r
670 pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
671 where go dec = do chunk <- await
673 TE.Some text l dec' -> do yield text
675 {-# INLINEABLE pipeDecodeUtf8With #-}
678 -- | Splits a 'Producer' after the given number of characters
680 :: (Monad m, Integral n)
683 -> Producer' Text m (Producer Text m r)
690 Left r -> return (return r)
691 Right (txt, p') -> do
692 let len = fromIntegral (T.length txt)
698 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
700 return (yield suffix >> p')
701 {-# INLINABLE splitAt #-}
703 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
705 :: (Monad m, Integral n)
706 => n -> Producer Text m r -> FreeT (Producer Text m) m r
707 chunksOf n p0 = PP.FreeT (go p0)
713 Right (txt, p') -> PP.Free $ do
714 p'' <- splitAt n (yield txt >> p')
715 return $ PP.FreeT (go p'')
716 {-# INLINABLE chunksOf #-}
718 {-| Split a text stream in two, where the first text stream is the longest
719 consecutive group of text that satisfy the predicate
725 -> Producer' Text m (Producer Text m r)
731 Left r -> return (return r)
732 Right (txt, p') -> do
733 let (prefix, suffix) = T.span predicate txt
740 return (yield suffix >> p')
741 {-# INLINABLE span #-}
743 {-| Split a text stream in two, where the first text stream is the longest
744 consecutive group of characters that don't satisfy the predicate
750 -> Producer Text m (Producer Text m r)
751 break predicate = span (not . predicate)
752 {-# INLINABLE break #-}
754 {-| Split a text stream into sub-streams delimited by characters that satisfy the
761 -> PP.FreeT (Producer Text m) m r
762 splitWith predicate p0 = PP.FreeT (go0 p0)
767 Left r -> return (PP.Pure r)
771 else return $ PP.Free $ do
772 p'' <- span (not . predicate) (yield txt >> p')
773 return $ PP.FreeT (go1 p'')
778 Right (_, p') -> PP.Free $ do
779 p'' <- span (not . predicate) p'
780 return $ PP.FreeT (go1 p'')
781 {-# INLINABLE splitWith #-}
783 -- | Split a text stream using the given 'Char' as the delimiter
787 -> FreeT (Producer Text m) m r
788 split c = splitWith (c ==)
789 {-# INLINABLE split #-}
791 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
796 => (Char -> Char -> Bool)
798 -> FreeT (Producer Text m) m r
799 groupBy equal p0 = PP.FreeT (go p0)
804 Left r -> return (PP.Pure r)
805 Right (txt, p') -> case (T.uncons txt) of
808 return $ PP.Free $ do
809 p'' <- span (equal c) (yield txt >> p')
810 return $ PP.FreeT (go p'')
811 {-# INLINABLE groupBy #-}
813 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
815 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
817 {-# INLINABLE group #-}
819 {-| Split a text stream into 'FreeT'-delimited lines
822 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
823 lines p0 = PP.FreeT (go0 p0)
828 Left r -> return (PP.Pure r)
832 else return $ PP.Free $ go1 (yield txt >> p')
834 p' <- break ('\n' ==) p
835 return $ PP.FreeT $ do
838 Left r -> return $ PP.Pure r
839 Right (_, p'') -> go0 p''
840 {-# INLINABLE lines #-}
844 -- | Split a text stream into 'FreeT'-delimited words
846 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
850 x <- next (p >-> dropWhile isSpace)
853 Right (bs, p') -> PP.Free $ do
854 p'' <- break isSpace (yield bs >> p')
856 {-# INLINABLE words #-}
859 -- | Intersperse a 'Char' in between the characters of the text stream
861 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
868 Right (txt, p') -> do
869 yield (T.intersperse c txt)
875 Right (txt, p') -> do
876 yield (T.singleton c)
877 yield (T.intersperse c txt)
879 {-# INLINABLE intersperse #-}
881 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
882 interspersing a text stream in between them
886 => Producer Text m ()
887 -> FreeT (Producer Text m) m r
892 x <- lift (PP.runFreeT f)
894 PP.Pure r -> return r
899 x <- lift (PP.runFreeT f)
901 PP.Pure r -> return r
906 {-# INLINABLE intercalate #-}
908 {-| Join 'FreeT'-delimited lines into a text stream
911 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
915 x <- lift (PP.runFreeT f)
917 PP.Pure r -> return r
920 yield $ T.singleton '\n'
922 {-# INLINABLE unlines #-}
924 {-| Join 'FreeT'-delimited words into a text stream
927 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
928 unwords = intercalate (yield $ T.pack " ")
929 {-# INLINABLE unwords #-}
932 The following parsing utilities are single-character analogs of the ones found
937 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
939 @Data.Text@ re-exports the 'Text' type.
941 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).