1 {-# LANGUAGE RankNTypes, TypeFamilies #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of strict 'Text' chunks. Use text streams to interact
5 with both 'IO.Handle's and lazy 'Text's.
7 To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For
8 example, the following program copies data from one file to another:
11 > import qualified Data.Text.Pipes as P
15 > withFile "inFile.txt" ReadMode $ \hIn ->
16 > withFile "outFile.txt" WriteMode $ \hOut ->
17 > runEffect $ P.fromHandle hIn >-> P.toHandle hOut
19 The following is the more Prelude-like and uses Pipes.Safe:
22 > import qualified Data.Text.Pipes as P
25 > main = runSafeT $ runEffect $ P.readFile "inFile.txt" >-> P.writeFile "outFile.txt"
28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
29 and 'stdout' proxies, like in the following \"echo\" program:
31 > main = runEffect $ P.stdin >-> P.stdout
33 You can also translate pure lazy 'TL.Text's to and from proxies:
35 > import qualified Data.Text.Lazy as TL
37 > main = runEffect $ P.fromLazy (TL.pack "Hello, world!\n") >-> P.stdout
39 In addition, this module provides many functions equivalent to lazy
40 'Text' functions so that you can transform or fold byte streams. For
41 example, to stream only the first three lines of 'stdin' to 'stdout' you
45 > import qualified Pipes.Text as PT
46 > import qualified Pipes.Parse as PP
48 > main = runEffect $ takeLines 3 PB.stdin >-> PT.stdout
50 > takeLines n = PB.unlines . PP.takeFree n . PT.lines
52 The above program will never bring more than one chunk (~ 32 KB) into
53 memory, no matter how long the lines are.
55 Note that functions in this library are designed to operate on streams that
56 are insensitive to chunk boundaries. This means that they may freely split
57 chunks into smaller chunks and /discard empty chunks/. However, they will
58 /never concatenate chunks/ in order to provide strict upper bounds on memory
62 module Data.Text.Pipes (
134 -- * Low-level Parsers
150 import Control.Exception (throwIO, try)
151 import Control.Monad (liftM, unless)
152 import Control.Monad.Trans.State.Strict (StateT)
153 import qualified Data.Text as T
154 import qualified Data.Text.IO as T
155 import Data.Text (Text)
156 import qualified Data.Text.Lazy as TL
157 import qualified Data.Text.Lazy.IO as TL
158 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
159 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
160 import Data.Char (ord)
161 import Data.Functor.Identity (Identity)
162 import qualified Data.List as List
163 import Foreign.C.Error (Errno(Errno), ePIPE)
164 import qualified GHC.IO.Exception as G
166 import qualified Pipes.ByteString.Parse as PBP
167 import Pipes.ByteString.Parse (
168 nextByte, drawByte, unDrawByte, peekByte, isEndOfBytes )
169 import Pipes.Core (respond, Server')
170 import qualified Pipes.Parse as PP
171 import Pipes.Parse (input, concat, FreeT)
172 import qualified Pipes.Safe.Prelude as Safe
173 import qualified Pipes.Safe as Safe
174 import Pipes.Safe (MonadSafe(..), Base(..))
175 import qualified Pipes.Prelude as P
176 import qualified System.IO as IO
177 import Data.Char (isSpace)
178 import Prelude hiding (
207 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
208 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
209 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
210 {-# INLINABLE fromLazy #-}
212 -- | Stream bytes from 'stdin'
213 stdin :: MonadIO m => Producer' Text m ()
214 stdin = fromHandle IO.stdin
215 {-# INLINABLE stdin #-}
217 -- | Convert a 'IO.Handle' into a text stream using a chunk size
218 -- determined by the good sense of the text library.
219 fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
220 fromHandle h = go where
221 go = do txt <- liftIO (T.hGetChunk h)
222 unless (T.null txt) $ do yield txt
224 {-# INLINABLE fromHandle#-}
226 {-| Stream text from a file using Pipes.Safe
228 >>> runSafeT $ runEffect $ readFile "README.md" >-> map toUpper >-> hoist lift stdout
231 TEXT PIPES, SOMEHOW TO BE FUSED WITH `PIPES-TEXT`.
236 readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
237 readFile file = Safe.withFile file IO.ReadMode fromHandle
238 {-# INLINABLE readFile #-}
240 stdinLn :: MonadIO m => Producer' Text m ()
244 eof <- liftIO (IO.hIsEOF IO.stdin)
246 txt <- liftIO (T.hGetLine IO.stdin)
250 {-| Convert a handle into a byte stream using a fixed chunk size
252 'hGet' waits until exactly the requested number of bytes are available for
255 -- hGet :: MonadIO m => Int -> IO.Handle -> Producer' Text m ()
256 -- hGet size h = go where
258 -- eof <- liftIO (IO.hIsEOF h)
262 -- bs <- liftIO (T.hGet h size)
265 -- {-# INLINABLE hGet #-}
267 {-| Like 'hGetSome', except you can vary the maximum chunk size for each request
269 -- hGetSomeN :: MonadIO m => IO.Handle -> Int -> Server' Int Text m ()
270 -- hGetSomeN h = go where
272 -- eof <- liftIO (IO.hIsEOF h)
276 -- bs <- liftIO (T.hGetSome h size)
277 -- size2 <- respond bs
279 -- {-# INLINABLE hGetSomeN #-}
281 -- -- | Like 'hGet', except you can vary the chunk size for each request
282 -- hGetN :: MonadIO m => IO.Handle -> Int -> Server' Int Text m ()
283 -- hGetN h = go where
285 -- eof <- liftIO (IO.hIsEOF h)
289 -- bs <- liftIO (T.hGet h size)
290 -- size2 <- respond bs
292 -- {-# INLINABLE hGetN #-}
294 {-| Stream bytes to 'stdout'
296 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
298 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
299 @(source >-> stdout)@.
301 stdout :: MonadIO m => Consumer' Text m ()
306 x <- liftIO $ try (T.putStr txt)
308 Left (G.IOError { G.ioe_type = G.ResourceVanished
309 , G.ioe_errno = Just ioe })
312 Left e -> liftIO (throwIO e)
314 {-# INLINABLE stdout #-}
316 stdoutLn :: (MonadIO m) => Consumer' Text m ()
321 x <- liftIO $ try (T.putStrLn str)
323 Left (G.IOError { G.ioe_type = G.ResourceVanished
324 , G.ioe_errno = Just ioe })
327 Left e -> liftIO (throwIO e)
329 {-# INLINABLE stdoutLn #-}
331 {-| Convert a byte stream into a 'Handle'
333 Note: For best performance, use @(for source (liftIO . hPutStr handle))@
334 instead of @(source >-> toHandle handle)@.
336 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
337 toHandle h = for cat (liftIO . T.hPutStr h)
338 {-# INLINABLE toHandle #-}
340 writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
341 writeFile file = Safe.withFile file IO.WriteMode toHandle
343 -- | Apply a transformation to each 'Char' in the stream
344 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
345 map f = P.map (T.map f)
346 {-# INLINABLE map #-}
348 -- | Map a function over the byte stream and concatenate the results
350 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
351 concatMap f = P.map (T.concatMap f)
352 {-# INLINABLE concatMap #-}
354 -- | @(take n)@ only allows @n@ bytes to pass
355 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
356 take n0 = go n0 where
361 let len = fromIntegral (T.length bs)
363 then yield (T.take (fromIntegral n) bs)
367 {-# INLINABLE take #-}
369 -- | @(dropD n)@ drops the first @n@ bytes
370 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
371 drop n0 = go n0 where
376 let len = fromIntegral (T.length bs)
379 yield (T.drop (fromIntegral n) bs)
382 {-# INLINABLE drop #-}
384 -- | Take bytes until they fail the predicate
385 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
386 takeWhile predicate = go
390 let (prefix, suffix) = T.span predicate bs
396 {-# INLINABLE takeWhile #-}
398 -- | Drop bytes until they fail the predicate
399 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
400 dropWhile predicate = go where
403 case T.findIndex (not . predicate) bs of
408 {-# INLINABLE dropWhile #-}
410 -- | Only allows 'Char's to pass if they satisfy the predicate
411 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
412 filter predicate = P.map (T.filter predicate)
413 {-# INLINABLE filter #-}
415 -- | Stream all indices whose elements match the given 'Char'
416 -- elemIndices :: (Monad m, Num n) => Char -> Pipe Text n m r
417 -- elemIndices w8 = findIndices (w8 ==)
418 -- {-# INLINABLE elemIndices #-}
420 -- | Stream all indices whose elements satisfy the given predicate
421 -- findIndices :: (Monad m, Num n) => (Char -> Bool) -> Pipe Text n m r
422 -- findIndices predicate = go 0
426 -- each $ List.map (\i -> n + fromIntegral i) (T.findIndices predicate bs)
427 -- go $! n + fromIntegral (T.length bs)
428 -- {-# INLINABLE findIndices #-}
430 -- | Strict left scan over the bytes
433 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
434 scan step begin = go begin
438 let bs' = T.scanl step w8 bs
442 {-# INLINABLE scan #-}
444 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
447 toLazy :: Producer Text Identity () -> TL.Text
448 toLazy = TL.fromChunks . P.toList
449 {-# INLINABLE toLazy #-}
451 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
454 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
455 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
456 immediately as they are generated instead of loading them all into memory.
458 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
459 toLazyM = liftM TL.fromChunks . P.toListM
460 {-# INLINABLE toLazyM #-}
462 -- | Reduce the stream of bytes using a strict left fold
465 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
466 fold step begin done = P.fold (\x bs -> T.foldl' step x bs) begin done
467 {-# INLINABLE fold #-}
469 -- | Retrieve the first 'Char'
470 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
476 Left _ -> return Nothing
477 Right (w8, _) -> return (Just w8)
478 {-# INLINABLE head #-}
480 -- | Retrieve the last 'Char'
481 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
491 else go (Just $ T.last bs) p'
492 -- TODO: Change this to 'unsafeLast' when bytestring-0.10.2.0
493 -- becomes more widespread
494 {-# INLINABLE last #-}
496 -- | Determine if the stream is empty
497 null :: (Monad m) => Producer Text m () -> m Bool
499 {-# INLINABLE null #-}
501 -- | Count the number of bytes
502 length :: (Monad m, Num n) => Producer Text m () -> m n
503 length = P.fold (\n bs -> n + fromIntegral (T.length bs)) 0 id
504 {-# INLINABLE length #-}
506 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
507 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
508 any predicate = P.any (T.any predicate)
509 {-# INLINABLE any #-}
511 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
512 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
513 all predicate = P.all (T.all predicate)
514 {-# INLINABLE all #-}
516 -- | Return the maximum 'Char' within a byte stream
517 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
518 maximum = P.fold step Nothing id
523 else Just $ case mw8 of
524 Nothing -> T.maximum bs
525 Just w8 -> max w8 (T.maximum bs)
526 {-# INLINABLE maximum #-}
528 -- | Return the minimum 'Char' within a byte stream
529 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
530 minimum = P.fold step Nothing id
536 Nothing -> Just (T.minimum bs)
537 Just w8 -> Just (min w8 (T.minimum bs))
538 {-# INLINABLE minimum #-}
540 -- | Determine whether any element in the byte stream matches the given 'Char'
541 -- elem :: (Monad m) => Char -> Producer Text m () -> m Bool
542 -- elem w8 = P.any (T.elem w8)
543 -- {-# INLINABLE elem #-}
545 -- {-| Determine whether all elements in the byte stream do not match the given
548 -- notElem :: (Monad m) => Char -> Producer Text m () -> m Bool
549 -- notElem w8 = P.all (T.notElem w8)
550 -- {-# INLINABLE notElem #-}
552 -- | Find the first element in the stream that matches the predicate
555 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
556 find predicate p = head (p >-> filter predicate)
557 {-# INLINABLE find #-}
559 -- | Index into a byte stream
561 :: (Monad m, Integral a)
562 => a-> Producer Text m () -> m (Maybe Char)
563 index n p = head (p >-> drop n)
564 {-# INLINABLE index #-}
566 -- | Find the index of an element that matches the given 'Char'
568 -- :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n)
569 -- elemIndex w8 = findIndex (w8 ==)
570 -- {-# INLINABLE elemIndex #-}
572 -- | Store the first index of an element that satisfies the predicate
574 -- :: (Monad m, Num n)
575 -- => (Char -> Bool) -> Producer Text m () -> m (Maybe n)
576 -- findIndex predicate p = P.head (p >-> findIndices predicate)
577 -- {-# INLINABLE findIndex #-}
579 -- -- | Store a tally of how many elements match the given 'Char'
580 -- count :: (Monad m, Num n) => Char -> Producer Text m () -> m n
581 -- count w8 p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count w8))
582 -- {-# INLINABLE count #-}
584 -- | Splits a 'Producer' after the given number of bytes
586 :: (Monad m, Integral n)
589 -> Producer' Text m (Producer Text m r)
596 Left r -> return (return r)
598 let len = fromIntegral (T.length bs)
604 let (prefix, suffix) = T.splitAt (fromIntegral n) bs
606 return (yield suffix >> p')
607 {-# INLINABLE splitAt #-}
609 -- | Split a byte stream into 'FreeT'-delimited byte streams of fixed size
611 :: (Monad m, Integral n)
612 => n -> Producer Text m r -> FreeT (Producer Text m) m r
613 chunksOf n p0 = PP.FreeT (go p0)
619 Right (bs, p') -> PP.Free $ do
620 p'' <- splitAt n (yield bs >> p')
621 return $ PP.FreeT (go p'')
622 {-# INLINABLE chunksOf #-}
624 {-| Split a byte stream in two, where the first byte stream is the longest
625 consecutive group of bytes that satisfy the predicate
631 -> Producer' Text m (Producer Text m r)
637 Left r -> return (return r)
639 let (prefix, suffix) = T.span predicate bs
646 return (yield suffix >> p')
647 {-# INLINABLE span #-}
649 {-| Split a byte stream in two, where the first byte stream is the longest
650 consecutive group of bytes that don't satisfy the predicate
656 -> Producer Text m (Producer Text m r)
657 break predicate = span (not . predicate)
658 {-# INLINABLE break #-}
660 {-| Split a byte stream into sub-streams delimited by bytes that satisfy the
667 -> PP.FreeT (Producer Text m) m r
668 splitWith predicate p0 = PP.FreeT (go0 p0)
673 Left r -> return (PP.Pure r)
677 else return $ PP.Free $ do
678 p'' <- span (not . predicate) (yield bs >> p')
679 return $ PP.FreeT (go1 p'')
684 Right (_, p') -> PP.Free $ do
685 p'' <- span (not . predicate) p'
686 return $ PP.FreeT (go1 p'')
687 {-# INLINABLE splitWith #-}
689 -- | Split a byte stream using the given 'Char' as the delimiter
693 -> FreeT (Producer Text m) m r
694 split w8 = splitWith (w8 ==)
695 {-# INLINABLE split #-}
697 {-| Group a byte stream into 'FreeT'-delimited byte streams using the supplied
702 => (Char -> Char -> Bool)
704 -> FreeT (Producer Text m) m r
705 groupBy equal p0 = PP.FreeT (go p0)
710 Left r -> return (PP.Pure r)
711 Right (bs, p') -> case (T.uncons bs) of
714 return $ PP.Free $ do
715 p'' <- span (equal w8) (yield bs >> p')
716 return $ PP.FreeT (go p'')
717 {-# INLINABLE groupBy #-}
719 -- | Group a byte stream into 'FreeT'-delimited byte streams of identical bytes
721 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
723 {-# INLINABLE group #-}
725 {-| Split a byte stream into 'FreeT'-delimited lines
727 Note: This function is purely for demonstration purposes since it assumes a
728 particular encoding. You should prefer the 'Data.Text.Text' equivalent of
729 this function from the upcoming @pipes-text@ library.
732 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
733 lines p0 = PP.FreeT (go0 p0)
738 Left r -> return (PP.Pure r)
742 else return $ PP.Free $ go1 (yield bs >> p')
744 p' <- break ('\n' ==) p
745 return $ PP.FreeT (go2 p')
750 Right (_, p') -> PP.Free (go1 p')
751 {-# INLINABLE lines #-}
753 {-| Split a byte stream into 'FreeT'-delimited words
755 Note: This function is purely for demonstration purposes since it assumes a
756 particular encoding. You should prefer the 'Data.Text.Text' equivalent of
757 this function from the upcoming @pipes-text@ library.
760 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
761 words p0 = removeEmpty (splitWith isSpace p0)
763 removeEmpty f = PP.FreeT $ do
766 PP.Pure r -> return (PP.Pure r)
770 Left f' -> PP.runFreeT (removeEmpty f')
771 Right (bs, p') -> return $ PP.Free $ do
774 return (removeEmpty f')
775 {-# INLINABLE words #-}
777 -- | Intersperse a 'Char' in between the bytes of the byte stream
779 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
787 yield (T.intersperse w8 bs)
794 yield (T.singleton w8)
795 yield (T.intersperse w8 bs)
797 {-# INLINABLE intersperse #-}
799 {-| 'intercalate' concatenates the 'FreeT'-delimited byte streams after
800 interspersing a byte stream in between them
804 => Producer Text m ()
805 -> FreeT (Producer Text m) m r
810 x <- lift (PP.runFreeT f)
812 PP.Pure r -> return r
817 x <- lift (PP.runFreeT f)
819 PP.Pure r -> return r
824 {-# INLINABLE intercalate #-}
826 {-| Join 'FreeT'-delimited lines into a byte stream
828 Note: This function is purely for demonstration purposes since it assumes a
829 particular encoding. You should prefer the 'Data.Text.Text' equivalent of
830 this function from the upcoming @pipes-text@ library.
833 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
837 x <- lift (PP.runFreeT f)
839 PP.Pure r -> return r
842 yield $ T.singleton '\n'
844 {-# INLINABLE unlines #-}
846 {-| Join 'FreeT'-delimited words into a byte stream
848 Note: This function is purely for demonstration purposes since it assumes a
849 particular encoding. You should prefer the 'Data.Text.Text' equivalent of
850 this function from the upcoming @pipes-text@ library.
853 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
854 unwords = intercalate (yield $ T.pack " ")
855 {-# INLINABLE unwords #-}
858 The following parsing utilities are single-byte analogs of the ones found
862 {-| Take bytes until they fail the predicate
864 Unlike 'takeWhile', this 'PP.unDraw's unused bytes
869 -- -> Pipe Text Text (StateT (Producer Text m r) m) ()
870 -- takeWhile' = PBP.takeWhile
871 -- {-# INLINABLE takeWhile' #-}
872 -- {-# DEPRECATED takeWhile' "Use Pipes.Text.Parse.takeWhile instead" #-}
875 "Pipes.Text.Parse" re-exports 'nextByte', 'drawByte', 'unDrawByte',
876 'peekByte', and 'isEndOfBytes'.
878 @Data.Text@ re-exports the 'Text' type.
880 @Data.Word@ re-exports the 'Char' type.
882 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).