1 {-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
9 example, the following program copies a document from one file to another:
12 > import qualified Data.Text.Pipes as Text
16 > withFile "inFile.txt" ReadMode $ \hIn ->
17 > withFile "outFile.txt" WriteMode $ \hOut ->
18 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
20 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
23 > import qualified Data.Text.Pipes as Text
26 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
29 and 'stdout' proxies, as with the following \"echo\" program:
31 > main = runEffect $ Text.stdin >-> Text.stdout
33 You can also translate pure lazy 'TL.Text's to and from proxies:
35 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
37 In addition, this module provides many functions equivalent to lazy
38 'Text' functions so that you can transform or fold text streams. For
39 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 > import qualified Pipes.Text as Text
44 > import qualified Pipes.Parse as Parse
46 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
48 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
50 The above program will never bring more than one chunk of text (~ 32 KB) into
51 memory, no matter how long the lines are.
53 Note that functions in this library are designed to operate on streams that
54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
84 #if MIN_VERSION_text(0,11,4)
122 #if MIN_VERSION_text(0,11,4)
134 -- * Character Parsers
148 import Control.Exception (throwIO, try)
149 import Control.Monad (liftM, unless)
150 import Control.Monad.Trans.State.Strict (StateT(..))
151 import qualified Data.Text as T
152 import qualified Data.Text.IO as T
153 import qualified Data.Text.Encoding as TE
154 import qualified Data.Text.Encoding.Error as TE
155 import Data.Text (Text)
156 import qualified Data.Text.Lazy as TL
157 import qualified Data.Text.Lazy.IO as TL
158 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
159 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
160 import Data.ByteString (ByteString)
161 import qualified Data.ByteString as B
162 import Data.Char (ord, isSpace)
163 import Data.Functor.Identity (Identity)
164 import qualified Data.List as List
165 import Foreign.C.Error (Errno(Errno), ePIPE)
166 import qualified GHC.IO.Exception as G
168 import qualified Pipes.ByteString as PB
169 import qualified Pipes.ByteString.Parse as PBP
170 import Pipes.Text.Parse (
171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
172 import Pipes.Core (respond, Server')
173 import qualified Pipes.Parse as PP
174 import Pipes.Parse (input, concat, FreeT)
175 import qualified Pipes.Safe.Prelude as Safe
176 import qualified Pipes.Safe as Safe
177 import Pipes.Safe (MonadSafe(..), Base(..))
178 import qualified Pipes.Prelude as P
179 import qualified System.IO as IO
180 import Data.Char (isSpace)
181 import Data.Word (Word8)
182 import Prelude hiding (
211 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
212 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
213 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
214 {-# INLINABLE fromLazy #-}
216 -- | Stream text from 'stdin'
217 stdin :: MonadIO m => Producer' Text m ()
218 stdin = fromHandle IO.stdin
219 {-# INLINABLE stdin #-}
221 {-| Convert a 'IO.Handle' into a text stream using a text size
222 determined by the good sense of the text library.
226 fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
227 #if MIN_VERSION_text(0,11,4)
228 fromHandle h = go TE.streamDecodeUtf8 where
229 act = B.hGetSome h defaultChunkSize
230 go dec = do chunk <- liftIO act
232 TE.Some text _ dec' -> do yield text
233 unless (B.null chunk) (go dec')
234 {-# INLINE fromHandle#-}
235 -- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as
236 -- the dedicated Text IO function 'hGetChunk' ;
237 -- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut"
238 -- runs the same as the conduit equivalent, only slightly slower
239 -- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut"
241 fromHandle h = go where
242 go = do txt <- liftIO (T.hGetChunk h)
243 unless (T.null txt) $ do yield txt
245 {-# INLINABLE fromHandle#-}
247 {-| Stream text from a file using Pipes.Safe
249 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
250 MAIN = PUTSTRLN "HELLO WORLD"
253 readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
254 readFile file = Safe.withFile file IO.ReadMode fromHandle
255 {-# INLINABLE readFile #-}
257 {-| Stream lines of text from stdin (for testing in ghci etc.)
259 >>> let safely = runSafeT . runEffect
260 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
267 stdinLn :: MonadIO m => Producer' Text m ()
270 eof <- liftIO (IO.hIsEOF IO.stdin)
272 txt <- liftIO (T.hGetLine IO.stdin)
277 {-| Stream text to 'stdout'
279 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
281 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
282 @(source >-> stdout)@ in suitable cases.
284 stdout :: MonadIO m => Consumer' Text m ()
289 x <- liftIO $ try (T.putStr txt)
291 Left (G.IOError { G.ioe_type = G.ResourceVanished
292 , G.ioe_errno = Just ioe })
295 Left e -> liftIO (throwIO e)
297 {-# INLINABLE stdout #-}
299 stdoutLn :: (MonadIO m) => Consumer' Text m ()
304 x <- liftIO $ try (T.putStrLn str)
306 Left (G.IOError { G.ioe_type = G.ResourceVanished
307 , G.ioe_errno = Just ioe })
310 Left e -> liftIO (throwIO e)
312 {-# INLINABLE stdoutLn #-}
314 {-| Convert a text stream into a 'Handle'
316 Note: again, for best performance, where possible use
317 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
319 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
320 toHandle h = for cat (liftIO . T.hPutStr h)
321 {-# INLINABLE toHandle #-}
323 {-# RULES "p >-> toHandle h" forall p h .
324 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
328 -- | Stream text into a file. Uses @pipes-safe@.
329 writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
330 writeFile file = Safe.withFile file IO.WriteMode toHandle
332 -- | Apply a transformation to each 'Char' in the stream
333 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
334 map f = P.map (T.map f)
335 {-# INLINABLE map #-}
337 {-# RULES "p >-> map f" forall p f .
338 p >-> map f = for p (\txt -> yield (T.map f txt))
341 -- | Map a function over the characters of a text stream and concatenate the results
343 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
344 concatMap f = P.map (T.concatMap f)
345 {-# INLINABLE concatMap #-}
347 {-# RULES "p >-> concatMap f" forall p f .
348 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
351 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
352 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
353 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
354 encodeUtf8 :: Monad m => Pipe Text ByteString m r
355 encodeUtf8 = P.map TE.encodeUtf8
356 {-# INLINEABLE encodeUtf8 #-}
358 {-# RULES "p >-> encodeUtf8" forall p .
359 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
362 -- | Transform a Pipe of 'String's into one of 'Text' chunks
363 pack :: Monad m => Pipe String Text m r
365 {-# INLINEABLE pack #-}
367 {-# RULES "p >-> pack" forall p .
368 p >-> pack = for p (\txt -> yield (T.pack txt))
371 -- | Transform a Pipes of 'Text' chunks into one of 'String's
372 unpack :: Monad m => Pipe Text String m r
373 unpack = for cat (\t -> yield (T.unpack t))
374 {-# INLINEABLE unpack #-}
376 {-# RULES "p >-> unpack" forall p .
377 p >-> unpack = for p (\txt -> yield (T.unpack txt))
380 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
381 -- here acting on a 'Text' pipe, rather as they would on a lazy text
382 toCaseFold :: Monad m => Pipe Text Text m ()
383 toCaseFold = P.map T.toCaseFold
384 {-# INLINEABLE toCaseFold #-}
386 {-# RULES "p >-> toCaseFold" forall p .
387 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
391 -- | lowercase incoming 'Text'
392 toLower :: Monad m => Pipe Text Text m ()
393 toLower = P.map T.toLower
394 {-# INLINEABLE toLower #-}
396 {-# RULES "p >-> toLower" forall p .
397 p >-> toLower = for p (\txt -> yield (T.toLower txt))
400 -- | uppercase incoming 'Text'
401 toUpper :: Monad m => Pipe Text Text m ()
402 toUpper = P.map T.toUpper
403 {-# INLINEABLE toUpper #-}
405 {-# RULES "p >-> toUpper" forall p .
406 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
409 -- | Remove leading white space from an incoming succession of 'Text's
410 stripStart :: Monad m => Pipe Text Text m r
413 let text = T.stripStart chunk
417 {-# INLINEABLE stripStart #-}
419 -- | @(take n)@ only allows @n@ individual characters to pass;
420 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
421 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
422 take n0 = go n0 where
427 let len = fromIntegral (T.length txt)
429 then yield (T.take (fromIntegral n) txt)
433 {-# INLINABLE take #-}
435 -- | @(drop n)@ drops the first @n@ characters
436 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
437 drop n0 = go n0 where
442 let len = fromIntegral (T.length txt)
445 yield (T.drop (fromIntegral n) txt)
448 {-# INLINABLE drop #-}
450 -- | Take characters until they fail the predicate
451 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
452 takeWhile predicate = go
456 let (prefix, suffix) = T.span predicate txt
462 {-# INLINABLE takeWhile #-}
464 -- | Drop characters until they fail the predicate
465 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
466 dropWhile predicate = go where
469 case T.findIndex (not . predicate) txt of
474 {-# INLINABLE dropWhile #-}
476 -- | Only allows 'Char's to pass if they satisfy the predicate
477 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
478 filter predicate = P.map (T.filter predicate)
479 {-# INLINABLE filter #-}
481 {-# RULES "p >-> filter q" forall p q .
482 p >-> filter q = for p (\txt -> yield (T.filter q txt))
485 -- | Strict left scan over the characters
488 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
489 scan step begin = go begin
493 let txt' = T.scanl step c txt
497 {-# INLINABLE scan #-}
499 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
502 toLazy :: Producer Text Identity () -> TL.Text
503 toLazy = TL.fromChunks . P.toList
504 {-# INLINABLE toLazy #-}
506 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
509 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
510 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
511 immediately as they are generated instead of loading them all into memory.
513 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
514 toLazyM = liftM TL.fromChunks . P.toListM
515 {-# INLINABLE toLazyM #-}
517 -- | Reduce the text stream using a strict left fold over characters
520 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
521 fold step begin done = P.fold (T.foldl' step) begin done
522 {-# INLINABLE fold #-}
524 -- | Retrieve the first 'Char'
525 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
531 Left _ -> return Nothing
532 Right (c, _) -> return (Just c)
533 {-# INLINABLE head #-}
535 -- | Retrieve the last 'Char'
536 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
546 else go (Just $ T.last txt) p'
547 {-# INLINABLE last #-}
549 -- | Determine if the stream is empty
550 null :: (Monad m) => Producer Text m () -> m Bool
552 {-# INLINABLE null #-}
554 -- | Count the number of characters in the stream
555 length :: (Monad m, Num n) => Producer Text m () -> m n
556 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
557 {-# INLINABLE length #-}
559 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
560 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
561 any predicate = P.any (T.any predicate)
562 {-# INLINABLE any #-}
564 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
565 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
566 all predicate = P.all (T.all predicate)
567 {-# INLINABLE all #-}
569 -- | Return the maximum 'Char' within a text stream
570 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
571 maximum = P.fold step Nothing id
576 else Just $ case mc of
577 Nothing -> T.maximum txt
578 Just c -> max c (T.maximum txt)
579 {-# INLINABLE maximum #-}
581 -- | Return the minimum 'Char' within a text stream (surely very useful!)
582 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
583 minimum = P.fold step Nothing id
589 Nothing -> Just (T.minimum txt)
590 Just c -> Just (min c (T.minimum txt))
591 {-# INLINABLE minimum #-}
593 -- | Find the first element in the stream that matches the predicate
596 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
597 find predicate p = head (p >-> filter predicate)
598 {-# INLINABLE find #-}
600 -- | Index into a text stream
602 :: (Monad m, Integral a)
603 => a-> Producer Text m () -> m (Maybe Char)
604 index n p = head (p >-> drop n)
605 {-# INLINABLE index #-}
608 -- | Store a tally of how many segments match the given 'Text'
609 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
610 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
611 {-# INLINABLE count #-}
613 #if MIN_VERSION_text(0,11,4)
614 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
615 -- into a Pipe of Text
618 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
619 decodeUtf8 = go TE.streamDecodeUtf8
623 Left r -> return (return r)
624 Right (chunk, p') -> do
625 let TE.Some text l dec' = dec chunk
633 {-# INLINEABLE decodeUtf8 #-}
635 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
636 -- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
637 -- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
641 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
642 decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
646 Left r -> return (return r)
647 Right (chunk, p') -> do
648 let TE.Some text l dec' = dec chunk
656 {-# INLINEABLE decodeUtf8With #-}
658 -- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise
659 -- with any chunk that contains a sequence of bytes that is unreadable. Otherwise
660 -- only few bytes will only be moved from one chunk to the next before decoding.
661 pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r
662 pipeDecodeUtf8 = go TE.streamDecodeUtf8
663 where go dec = do chunk <- await
665 TE.Some text l dec' -> do yield text
667 {-# INLINEABLE pipeDecodeUtf8 #-}
669 -- | A simple pipe from 'ByteString' to 'Text' using a replacement function.
673 -> Pipe ByteString Text m r
674 pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
675 where go dec = do chunk <- await
677 TE.Some text l dec' -> do yield text
679 {-# INLINEABLE pipeDecodeUtf8With #-}
682 -- | Splits a 'Producer' after the given number of characters
684 :: (Monad m, Integral n)
687 -> Producer' Text m (Producer Text m r)
694 Left r -> return (return r)
695 Right (txt, p') -> do
696 let len = fromIntegral (T.length txt)
702 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
704 return (yield suffix >> p')
705 {-# INLINABLE splitAt #-}
707 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
709 :: (Monad m, Integral n)
710 => n -> Producer Text m r -> FreeT (Producer Text m) m r
711 chunksOf n p0 = PP.FreeT (go p0)
717 Right (txt, p') -> PP.Free $ do
718 p'' <- splitAt n (yield txt >> p')
719 return $ PP.FreeT (go p'')
720 {-# INLINABLE chunksOf #-}
722 {-| Split a text stream in two, where the first text stream is the longest
723 consecutive group of text that satisfy the predicate
729 -> Producer' Text m (Producer Text m r)
735 Left r -> return (return r)
736 Right (txt, p') -> do
737 let (prefix, suffix) = T.span predicate txt
744 return (yield suffix >> p')
745 {-# INLINABLE span #-}
747 {-| Split a text stream in two, where the first text stream is the longest
748 consecutive group of characters that don't satisfy the predicate
754 -> Producer Text m (Producer Text m r)
755 break predicate = span (not . predicate)
756 {-# INLINABLE break #-}
758 {-| Split a text stream into sub-streams delimited by characters that satisfy the
765 -> PP.FreeT (Producer Text m) m r
766 splitWith predicate p0 = PP.FreeT (go0 p0)
771 Left r -> return (PP.Pure r)
775 else return $ PP.Free $ do
776 p'' <- span (not . predicate) (yield txt >> p')
777 return $ PP.FreeT (go1 p'')
782 Right (_, p') -> PP.Free $ do
783 p'' <- span (not . predicate) p'
784 return $ PP.FreeT (go1 p'')
785 {-# INLINABLE splitWith #-}
787 -- | Split a text stream using the given 'Char' as the delimiter
791 -> FreeT (Producer Text m) m r
792 split c = splitWith (c ==)
793 {-# INLINABLE split #-}
795 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
800 => (Char -> Char -> Bool)
802 -> FreeT (Producer Text m) m r
803 groupBy equal p0 = PP.FreeT (go p0)
808 Left r -> return (PP.Pure r)
809 Right (txt, p') -> case (T.uncons txt) of
812 return $ PP.Free $ do
813 p'' <- span (equal c) (yield txt >> p')
814 return $ PP.FreeT (go p'')
815 {-# INLINABLE groupBy #-}
817 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
819 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
821 {-# INLINABLE group #-}
823 {-| Split a text stream into 'FreeT'-delimited lines
826 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
827 lines p0 = PP.FreeT (go0 p0)
832 Left r -> return (PP.Pure r)
836 else return $ PP.Free $ go1 (yield txt >> p')
838 p' <- break ('\n' ==) p
839 return $ PP.FreeT $ do
842 Left r -> return $ PP.Pure r
843 Right (_, p'') -> go0 p''
844 {-# INLINABLE lines #-}
848 -- | Split a text stream into 'FreeT'-delimited words
850 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
854 x <- next (p >-> dropWhile isSpace)
857 Right (bs, p') -> PP.Free $ do
858 p'' <- break isSpace (yield bs >> p')
860 {-# INLINABLE words #-}
863 -- | Intersperse a 'Char' in between the characters of the text stream
865 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
872 Right (txt, p') -> do
873 yield (T.intersperse c txt)
879 Right (txt, p') -> do
880 yield (T.singleton c)
881 yield (T.intersperse c txt)
883 {-# INLINABLE intersperse #-}
885 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
886 interspersing a text stream in between them
890 => Producer Text m ()
891 -> FreeT (Producer Text m) m r
896 x <- lift (PP.runFreeT f)
898 PP.Pure r -> return r
903 x <- lift (PP.runFreeT f)
905 PP.Pure r -> return r
910 {-# INLINABLE intercalate #-}
912 {-| Join 'FreeT'-delimited lines into a text stream
915 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
919 x <- lift (PP.runFreeT f)
921 PP.Pure r -> return r
924 yield $ T.singleton '\n'
926 {-# INLINABLE unlines #-}
928 {-| Join 'FreeT'-delimited words into a text stream
931 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
932 unwords = intercalate (yield $ T.pack " ")
933 {-# INLINABLE unwords #-}
936 The following parsing utilities are single-character analogs of the ones found
941 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
943 @Data.Text@ re-exports the 'Text' type.
945 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).