1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
9 example, the following program copies a document from one file to another:
12 > import qualified Data.Text.Pipes as Text
16 > withFile "inFile.txt" ReadMode $ \hIn ->
17 > withFile "outFile.txt" WriteMode $ \hOut ->
18 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
20 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
23 > import qualified Data.Text.Pipes as Text
26 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
29 and 'stdout' proxies, as with the following \"echo\" program:
31 > main = runEffect $ Text.stdin >-> Text.stdout
33 You can also translate pure lazy 'TL.Text's to and from proxies:
35 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
37 In addition, this module provides many functions equivalent to lazy
38 'Text' functions so that you can transform or fold text streams. For
39 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 > import qualified Pipes.Text as Text
44 > import qualified Pipes.Parse as Parse
46 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
48 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
50 The above program will never bring more than one chunk of text (~ 32 KB) into
51 memory, no matter how long the lines are.
53 Note that functions in this library are designed to operate on streams that
54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
127 -- * Character Parsers
141 import Control.Exception (throwIO, try)
142 import Control.Monad (liftM, unless)
143 import Control.Monad.Trans.State.Strict (StateT(..))
144 import Data.Monoid ((<>))
145 import qualified Data.Text as T
146 import qualified Data.Text.IO as T
147 import qualified Data.Text.Encoding as TE
148 import qualified Data.Text.Encoding.Error as TE
149 import Data.Text (Text)
150 import qualified Data.Text.Lazy as TL
151 import qualified Data.Text.Lazy.IO as TL
152 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
153 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
154 import Data.ByteString (ByteString)
155 import qualified Data.ByteString as B
156 import Data.Char (ord, isSpace)
157 import Data.Functor.Identity (Identity)
158 import qualified Data.List as List
159 import Foreign.C.Error (Errno(Errno), ePIPE)
160 import qualified GHC.IO.Exception as G
162 import qualified Pipes.ByteString as PB
163 import qualified Pipes.ByteString.Parse as PBP
164 import qualified Pipes.Text.Internal as PE
165 import Pipes.Text.Parse (
166 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
167 import Pipes.Core (respond, Server')
168 import qualified Pipes.Parse as PP
169 import Pipes.Parse (input, concat, FreeT)
170 import qualified Pipes.Safe.Prelude as Safe
171 import qualified Pipes.Safe as Safe
172 import Pipes.Safe (MonadSafe(..), Base(..))
173 import qualified Pipes.Prelude as P
174 import qualified System.IO as IO
175 import Data.Char (isSpace)
176 import Data.Word (Word8)
177 import Prelude hiding (
206 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
207 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
208 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
209 {-# INLINE fromLazy #-}
211 -- | Stream text from 'stdin'
212 stdin :: MonadIO m => Producer Text m ()
213 stdin = fromHandle IO.stdin
216 {-| Convert a 'IO.Handle' into a text stream using a text size
217 determined by the good sense of the text library; note that this
218 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
219 but uses the system encoding and has other `Data.Text.IO` features
222 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
223 fromHandle h = go where
224 go = do txt <- liftIO (T.hGetChunk h)
225 unless (T.null txt) $ do yield txt
227 {-# INLINABLE fromHandle#-}
230 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
232 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
233 MAIN = PUTSTRLN "HELLO WORLD"
236 readFile :: MonadSafe m => FilePath -> Producer Text m ()
237 readFile file = Safe.withFile file IO.ReadMode fromHandle
238 {-# INLINE readFile #-}
240 {-| Stream lines of text from stdin (for testing in ghci etc.)
242 >>> let safely = runSafeT . runEffect
243 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
250 stdinLn :: MonadIO m => Producer' Text m ()
253 eof <- liftIO (IO.hIsEOF IO.stdin)
255 txt <- liftIO (T.hGetLine IO.stdin)
258 {-# INLINABLE stdinLn #-}
260 {-| Stream text to 'stdout'
262 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
264 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
265 @(source >-> stdout)@ in suitable cases.
267 stdout :: MonadIO m => Consumer' Text m ()
272 x <- liftIO $ try (T.putStr txt)
274 Left (G.IOError { G.ioe_type = G.ResourceVanished
275 , G.ioe_errno = Just ioe })
278 Left e -> liftIO (throwIO e)
280 {-# INLINABLE stdout #-}
282 stdoutLn :: (MonadIO m) => Consumer' Text m ()
287 x <- liftIO $ try (T.putStrLn str)
289 Left (G.IOError { G.ioe_type = G.ResourceVanished
290 , G.ioe_errno = Just ioe })
293 Left e -> liftIO (throwIO e)
295 {-# INLINABLE stdoutLn #-}
297 {-| Convert a text stream into a 'Handle'
299 Note: again, for best performance, where possible use
300 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
302 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
303 toHandle h = for cat (liftIO . T.hPutStr h)
304 {-# INLINABLE toHandle #-}
306 {-# RULES "p >-> toHandle h" forall p h .
307 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
311 -- | Stream text into a file. Uses @pipes-safe@.
312 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
313 writeFile file = Safe.withFile file IO.WriteMode toHandle
314 {-# INLINE writeFile #-}
316 -- | Apply a transformation to each 'Char' in the stream
317 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
318 map f = P.map (T.map f)
319 {-# INLINABLE map #-}
321 {-# RULES "p >-> map f" forall p f .
322 p >-> map f = for p (\txt -> yield (T.map f txt))
325 -- | Map a function over the characters of a text stream and concatenate the results
327 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
328 concatMap f = P.map (T.concatMap f)
329 {-# INLINABLE concatMap #-}
331 {-# RULES "p >-> concatMap f" forall p f .
332 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
335 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
336 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
337 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
338 encodeUtf8 :: Monad m => Pipe Text ByteString m r
339 encodeUtf8 = P.map TE.encodeUtf8
340 {-# INLINEABLE encodeUtf8 #-}
342 {-# RULES "p >-> encodeUtf8" forall p .
343 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
346 -- | Transform a Pipe of 'String's into one of 'Text' chunks
347 pack :: Monad m => Pipe String Text m r
349 {-# INLINEABLE pack #-}
351 {-# RULES "p >-> pack" forall p .
352 p >-> pack = for p (\txt -> yield (T.pack txt))
355 -- | Transform a Pipes of 'Text' chunks into one of 'String's
356 unpack :: Monad m => Pipe Text String m r
357 unpack = for cat (\t -> yield (T.unpack t))
358 {-# INLINEABLE unpack #-}
360 {-# RULES "p >-> unpack" forall p .
361 p >-> unpack = for p (\txt -> yield (T.unpack txt))
364 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
365 -- here acting on a 'Text' pipe, rather as they would on a lazy text
366 toCaseFold :: Monad m => Pipe Text Text m ()
367 toCaseFold = P.map T.toCaseFold
368 {-# INLINEABLE toCaseFold #-}
370 {-# RULES "p >-> toCaseFold" forall p .
371 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
375 -- | lowercase incoming 'Text'
376 toLower :: Monad m => Pipe Text Text m ()
377 toLower = P.map T.toLower
378 {-# INLINEABLE toLower #-}
380 {-# RULES "p >-> toLower" forall p .
381 p >-> toLower = for p (\txt -> yield (T.toLower txt))
384 -- | uppercase incoming 'Text'
385 toUpper :: Monad m => Pipe Text Text m ()
386 toUpper = P.map T.toUpper
387 {-# INLINEABLE toUpper #-}
389 {-# RULES "p >-> toUpper" forall p .
390 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
393 -- | Remove leading white space from an incoming succession of 'Text's
394 stripStart :: Monad m => Pipe Text Text m r
397 let text = T.stripStart chunk
401 {-# INLINEABLE stripStart #-}
403 -- | @(take n)@ only allows @n@ individual characters to pass;
404 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
405 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
406 take n0 = go n0 where
411 let len = fromIntegral (T.length txt)
413 then yield (T.take (fromIntegral n) txt)
417 {-# INLINABLE take #-}
419 -- | @(drop n)@ drops the first @n@ characters
420 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
421 drop n0 = go n0 where
426 let len = fromIntegral (T.length txt)
429 yield (T.drop (fromIntegral n) txt)
432 {-# INLINABLE drop #-}
434 -- | Take characters until they fail the predicate
435 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
436 takeWhile predicate = go
440 let (prefix, suffix) = T.span predicate txt
446 {-# INLINABLE takeWhile #-}
448 -- | Drop characters until they fail the predicate
449 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
450 dropWhile predicate = go where
453 case T.findIndex (not . predicate) txt of
458 {-# INLINABLE dropWhile #-}
460 -- | Only allows 'Char's to pass if they satisfy the predicate
461 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
462 filter predicate = P.map (T.filter predicate)
463 {-# INLINABLE filter #-}
465 {-# RULES "p >-> filter q" forall p q .
466 p >-> filter q = for p (\txt -> yield (T.filter q txt))
469 -- | Strict left scan over the characters
472 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
473 scan step begin = go begin
477 let txt' = T.scanl step c txt
481 {-# INLINABLE scan #-}
483 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
486 toLazy :: Producer Text Identity () -> TL.Text
487 toLazy = TL.fromChunks . P.toList
488 {-# INLINABLE toLazy #-}
490 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
493 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
494 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
495 immediately as they are generated instead of loading them all into memory.
497 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
498 toLazyM = liftM TL.fromChunks . P.toListM
499 {-# INLINABLE toLazyM #-}
501 -- | Reduce the text stream using a strict left fold over characters
504 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
505 fold step begin done = P.fold (T.foldl' step) begin done
506 {-# INLINABLE fold #-}
508 -- | Retrieve the first 'Char'
509 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
515 Left _ -> return Nothing
516 Right (c, _) -> return (Just c)
517 {-# INLINABLE head #-}
519 -- | Retrieve the last 'Char'
520 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
530 else go (Just $ T.last txt) p'
531 {-# INLINABLE last #-}
533 -- | Determine if the stream is empty
534 null :: (Monad m) => Producer Text m () -> m Bool
536 {-# INLINABLE null #-}
538 -- | Count the number of characters in the stream
539 length :: (Monad m, Num n) => Producer Text m () -> m n
540 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
541 {-# INLINABLE length #-}
543 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
544 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
545 any predicate = P.any (T.any predicate)
546 {-# INLINABLE any #-}
548 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
549 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
550 all predicate = P.all (T.all predicate)
551 {-# INLINABLE all #-}
553 -- | Return the maximum 'Char' within a text stream
554 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
555 maximum = P.fold step Nothing id
560 else Just $ case mc of
561 Nothing -> T.maximum txt
562 Just c -> max c (T.maximum txt)
563 {-# INLINABLE maximum #-}
565 -- | Return the minimum 'Char' within a text stream (surely very useful!)
566 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
567 minimum = P.fold step Nothing id
573 Nothing -> Just (T.minimum txt)
574 Just c -> Just (min c (T.minimum txt))
575 {-# INLINABLE minimum #-}
577 -- | Find the first element in the stream that matches the predicate
580 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
581 find predicate p = head (p >-> filter predicate)
582 {-# INLINABLE find #-}
584 -- | Index into a text stream
586 :: (Monad m, Integral a)
587 => a-> Producer Text m () -> m (Maybe Char)
588 index n p = head (p >-> drop n)
589 {-# INLINABLE index #-}
592 -- | Store a tally of how many segments match the given 'Text'
593 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
594 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
595 {-# INLINABLE count #-}
597 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
598 -- returning a Pipe of ByteStrings that begins at the point of failure.
600 decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
601 decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
602 go !carry dec0 p = do
604 case x of Left r -> if B.null carry
605 then return (return r) -- all bytestrinput was consumed
606 else return (do yield carry -- a potentially valid fragment remains
609 Right (chunk, p') -> case dec0 chunk of
610 PE.Some text carry2 dec -> do yield text
612 PE.Other text bs -> do yield text
613 return (do yield bs -- an invalid blob remains
615 {-# INLINABLE decodeUtf8 #-}
618 -- | Splits a 'Producer' after the given number of characters
620 :: (Monad m, Integral n)
623 -> Producer' Text m (Producer Text m r)
630 Left r -> return (return r)
631 Right (txt, p') -> do
632 let len = fromIntegral (T.length txt)
638 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
640 return (yield suffix >> p')
641 {-# INLINABLE splitAt #-}
643 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
645 :: (Monad m, Integral n)
646 => n -> Producer Text m r -> FreeT (Producer Text m) m r
647 chunksOf n p0 = PP.FreeT (go p0)
653 Right (txt, p') -> PP.Free $ do
654 p'' <- splitAt n (yield txt >> p')
655 return $ PP.FreeT (go p'')
656 {-# INLINABLE chunksOf #-}
658 {-| Split a text stream in two, where the first text stream is the longest
659 consecutive group of text that satisfy the predicate
665 -> Producer' Text m (Producer Text m r)
671 Left r -> return (return r)
672 Right (txt, p') -> do
673 let (prefix, suffix) = T.span predicate txt
680 return (yield suffix >> p')
681 {-# INLINABLE span #-}
683 {-| Split a text stream in two, where the first text stream is the longest
684 consecutive group of characters that don't satisfy the predicate
690 -> Producer Text m (Producer Text m r)
691 break predicate = span (not . predicate)
692 {-# INLINABLE break #-}
694 {-| Split a text stream into sub-streams delimited by characters that satisfy the
701 -> PP.FreeT (Producer Text m) m r
702 splitWith predicate p0 = PP.FreeT (go0 p0)
707 Left r -> return (PP.Pure r)
711 else return $ PP.Free $ do
712 p'' <- span (not . predicate) (yield txt >> p')
713 return $ PP.FreeT (go1 p'')
718 Right (_, p') -> PP.Free $ do
719 p'' <- span (not . predicate) p'
720 return $ PP.FreeT (go1 p'')
721 {-# INLINABLE splitWith #-}
723 -- | Split a text stream using the given 'Char' as the delimiter
727 -> FreeT (Producer Text m) m r
728 split c = splitWith (c ==)
729 {-# INLINABLE split #-}
731 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
736 => (Char -> Char -> Bool)
738 -> FreeT (Producer Text m) m r
739 groupBy equal p0 = PP.FreeT (go p0)
744 Left r -> return (PP.Pure r)
745 Right (txt, p') -> case (T.uncons txt) of
748 return $ PP.Free $ do
749 p'' <- span (equal c) (yield txt >> p')
750 return $ PP.FreeT (go p'')
751 {-# INLINABLE groupBy #-}
753 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
755 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
757 {-# INLINABLE group #-}
759 {-| Split a text stream into 'FreeT'-delimited lines
762 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
763 lines p0 = PP.FreeT (go0 p0)
768 Left r -> return (PP.Pure r)
772 else return $ PP.Free $ go1 (yield txt >> p')
774 p' <- break ('\n' ==) p
775 return $ PP.FreeT $ do
778 Left r -> return $ PP.Pure r
779 Right (_, p'') -> go0 p''
780 {-# INLINABLE lines #-}
784 -- | Split a text stream into 'FreeT'-delimited words
786 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
790 x <- next (p >-> dropWhile isSpace)
793 Right (bs, p') -> PP.Free $ do
794 p'' <- break isSpace (yield bs >> p')
796 {-# INLINABLE words #-}
799 -- | Intersperse a 'Char' in between the characters of the text stream
801 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
808 Right (txt, p') -> do
809 yield (T.intersperse c txt)
815 Right (txt, p') -> do
816 yield (T.singleton c)
817 yield (T.intersperse c txt)
819 {-# INLINABLE intersperse #-}
821 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
822 interspersing a text stream in between them
826 => Producer Text m ()
827 -> FreeT (Producer Text m) m r
832 x <- lift (PP.runFreeT f)
834 PP.Pure r -> return r
839 x <- lift (PP.runFreeT f)
841 PP.Pure r -> return r
846 {-# INLINABLE intercalate #-}
848 {-| Join 'FreeT'-delimited lines into a text stream
851 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
855 x <- lift (PP.runFreeT f)
857 PP.Pure r -> return r
860 yield $ T.singleton '\n'
862 {-# INLINABLE unlines #-}
864 {-| Join 'FreeT'-delimited words into a text stream
867 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
868 unwords = intercalate (yield $ T.pack " ")
869 {-# INLINABLE unwords #-}
872 The following parsing utilities are single-character analogs of the ones found
877 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
879 @Data.Text@ re-exports the 'Text' type.
881 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).