1 {-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
9 example, the following program copies a document from one file to another:
12 > import qualified Data.Text.Pipes as Text
16 > withFile "inFile.txt" ReadMode $ \hIn ->
17 > withFile "outFile.txt" WriteMode $ \hOut ->
18 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
20 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
23 > import qualified Data.Text.Pipes as Text
26 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
29 and 'stdout' proxies, as with the following \"echo\" program:
31 > main = runEffect $ Text.stdin >-> Text.stdout
33 You can also translate pure lazy 'TL.Text's to and from proxies:
35 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
37 In addition, this module provides many functions equivalent to lazy
38 'Text' functions so that you can transform or fold text streams. For
39 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 > import qualified Pipes.Text as Text
44 > import qualified Pipes.Parse as Parse
46 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
48 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
50 The above program will never bring more than one chunk of text (~ 32 KB) into
51 memory, no matter how long the lines are.
53 Note that functions in this library are designed to operate on streams that
54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
118 #if MIN_VERSION_text(0,11,4)
130 -- * Character Parsers
144 import Control.Exception (throwIO, try)
145 import Control.Monad (liftM, unless)
146 import Control.Monad.Trans.State.Strict (StateT(..))
147 import qualified Data.Text as T
148 import qualified Data.Text.IO as T
149 import qualified Data.Text.Encoding as TE
150 import qualified Data.Text.Encoding.Error as TE
151 import Data.Text (Text)
152 import qualified Data.Text.Lazy as TL
153 import qualified Data.Text.Lazy.IO as TL
154 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
155 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
156 import Data.ByteString (ByteString)
157 import qualified Data.ByteString as B
158 import Data.Char (ord)
159 import Data.Functor.Identity (Identity)
160 import qualified Data.List as List
161 import Foreign.C.Error (Errno(Errno), ePIPE)
162 import qualified GHC.IO.Exception as G
164 import qualified Pipes.ByteString.Parse as PBP
165 import Pipes.Text.Parse (
166 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
167 import Pipes.Core (respond, Server')
168 import qualified Pipes.Parse as PP
169 import Pipes.Parse (input, concat, FreeT)
170 import qualified Pipes.Safe.Prelude as Safe
171 import qualified Pipes.Safe as Safe
172 import Pipes.Safe (MonadSafe(..), Base(..))
173 import qualified Pipes.Prelude as P
174 import qualified System.IO as IO
175 import Data.Char (isSpace)
176 import Data.Word (Word8)
177 import Prelude hiding (
206 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
207 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
208 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
209 {-# INLINABLE fromLazy #-}
211 -- | Stream text from 'stdin'
212 stdin :: MonadIO m => Producer' Text m ()
213 stdin = fromHandle IO.stdin
214 {-# INLINABLE stdin #-}
216 {-| Convert a 'IO.Handle' into a text stream using a text size
217 determined by the good sense of the text library.
221 fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
222 fromHandle h = go where
223 go = do txt <- liftIO (T.hGetChunk h)
224 unless (T.null txt) $ do yield txt
226 {-# INLINABLE fromHandle#-}
228 {-| Stream text from a file using Pipes.Safe
230 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
231 MAIN = PUTSTRLN "HELLO WORLD"
234 readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
235 readFile file = Safe.withFile file IO.ReadMode fromHandle
236 {-# INLINABLE readFile #-}
238 {-| Stream lines of text from stdin (for testing in ghci etc.)
240 >>> let safely = runSafeT . runEffect
241 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
248 stdinLn :: MonadIO m => Producer' Text m ()
251 eof <- liftIO (IO.hIsEOF IO.stdin)
253 txt <- liftIO (T.hGetLine IO.stdin)
258 {-| Stream text to 'stdout'
260 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
262 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
263 @(source >-> stdout)@ in suitable cases.
265 stdout :: MonadIO m => Consumer' Text m ()
270 x <- liftIO $ try (T.putStr txt)
272 Left (G.IOError { G.ioe_type = G.ResourceVanished
273 , G.ioe_errno = Just ioe })
276 Left e -> liftIO (throwIO e)
278 {-# INLINABLE stdout #-}
280 stdoutLn :: (MonadIO m) => Consumer' Text m ()
285 x <- liftIO $ try (T.putStrLn str)
287 Left (G.IOError { G.ioe_type = G.ResourceVanished
288 , G.ioe_errno = Just ioe })
291 Left e -> liftIO (throwIO e)
293 {-# INLINABLE stdoutLn #-}
295 {-| Convert a text stream into a 'Handle'
297 Note: again, for best performance, where possible use
298 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
300 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
301 toHandle h = for cat (liftIO . T.hPutStr h)
302 {-# INLINABLE toHandle #-}
304 -- | Stream text into a file. Uses @pipes-safe@.
305 writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
306 writeFile file = Safe.withFile file IO.WriteMode toHandle
308 -- | Apply a transformation to each 'Char' in the stream
309 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
310 map f = P.map (T.map f)
311 {-# INLINABLE map #-}
313 -- | Map a function over the characters of a text stream and concatenate the results
315 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
316 concatMap f = P.map (T.concatMap f)
317 {-# INLINABLE concatMap #-}
320 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
322 encodeUtf8 :: Monad m => Pipe Text ByteString m r
323 encodeUtf8 = P.map TE.encodeUtf8
324 {-# INLINEABLE encodeUtf8 #-}
326 -- | Transform a Pipe of 'String's into one of 'Text' chunks
327 pack :: Monad m => Pipe String Text m r
329 {-# INLINEABLE pack #-}
331 -- | Transforma a Pipes of 'Text' chunks into one of 'String's
332 unpack :: Monad m => Pipe Text String m r
333 unpack = P.map T.unpack
334 {-# INLINEABLE unpack #-}
336 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
337 -- here acting on a 'Text' pipe, rather as they would on a lazy text
338 toCaseFold :: Monad m => Pipe Text Text m ()
339 toCaseFold = P.map T.toCaseFold
340 {-# INLINEABLE toCaseFold #-}
342 -- | lowercase incoming 'Text'
343 toLower :: Monad m => Pipe Text Text m ()
344 toLower = P.map T.toLower
345 {-# INLINEABLE toLower #-}
347 -- | uppercase incoming 'Text'
348 toUpper :: Monad m => Pipe Text Text m ()
349 toUpper = P.map T.toUpper
350 {-# INLINEABLE toUpper #-}
352 -- | Remove leading white space from an incoming succession of 'Text's
353 stripStart :: Monad m => Pipe Text Text m r
356 let text = T.stripStart chunk
360 {-# INLINEABLE stripStart #-}
362 -- | @(take n)@ only allows @n@ individual characters to pass;
363 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
364 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
365 take n0 = go n0 where
370 let len = fromIntegral (T.length txt)
372 then yield (T.take (fromIntegral n) txt)
376 {-# INLINABLE take #-}
378 -- | @(drop n)@ drops the first @n@ characters
379 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
380 drop n0 = go n0 where
385 let len = fromIntegral (T.length txt)
388 yield (T.drop (fromIntegral n) txt)
391 {-# INLINABLE drop #-}
393 -- | Take characters until they fail the predicate
394 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
395 takeWhile predicate = go
399 let (prefix, suffix) = T.span predicate txt
405 {-# INLINABLE takeWhile #-}
407 -- | Drop characters until they fail the predicate
408 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
409 dropWhile predicate = go where
412 case T.findIndex (not . predicate) txt of
417 {-# INLINABLE dropWhile #-}
419 -- | Only allows 'Char's to pass if they satisfy the predicate
420 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
421 filter predicate = P.map (T.filter predicate)
422 {-# INLINABLE filter #-}
425 -- | Strict left scan over the characters
428 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
429 scan step begin = go begin
433 let txt' = T.scanl step c txt
437 {-# INLINABLE scan #-}
439 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
442 toLazy :: Producer Text Identity () -> TL.Text
443 toLazy = TL.fromChunks . P.toList
444 {-# INLINABLE toLazy #-}
446 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
449 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
450 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
451 immediately as they are generated instead of loading them all into memory.
453 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
454 toLazyM = liftM TL.fromChunks . P.toListM
455 {-# INLINABLE toLazyM #-}
457 -- | Reduce the text stream using a strict left fold over characters
460 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
461 fold step begin done = P.fold (T.foldl' step) begin done
462 {-# INLINABLE fold #-}
464 -- | Retrieve the first 'Char'
465 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
471 Left _ -> return Nothing
472 Right (c, _) -> return (Just c)
473 {-# INLINABLE head #-}
475 -- | Retrieve the last 'Char'
476 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
486 else go (Just $ T.last txt) p'
487 {-# INLINABLE last #-}
489 -- | Determine if the stream is empty
490 null :: (Monad m) => Producer Text m () -> m Bool
492 {-# INLINABLE null #-}
494 -- | Count the number of characters in the stream
495 length :: (Monad m, Num n) => Producer Text m () -> m n
496 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
497 {-# INLINABLE length #-}
499 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
500 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
501 any predicate = P.any (T.any predicate)
502 {-# INLINABLE any #-}
504 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
505 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
506 all predicate = P.all (T.all predicate)
507 {-# INLINABLE all #-}
509 -- | Return the maximum 'Char' within a text stream
510 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
511 maximum = P.fold step Nothing id
516 else Just $ case mc of
517 Nothing -> T.maximum txt
518 Just c -> max c (T.maximum txt)
519 {-# INLINABLE maximum #-}
521 -- | Return the minimum 'Char' within a text stream (surely very useful!)
522 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
523 minimum = P.fold step Nothing id
529 Nothing -> Just (T.minimum txt)
530 Just c -> Just (min c (T.minimum txt))
531 {-# INLINABLE minimum #-}
533 -- | Find the first element in the stream that matches the predicate
536 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
537 find predicate p = head (p >-> filter predicate)
538 {-# INLINABLE find #-}
540 -- | Index into a text stream
542 :: (Monad m, Integral a)
543 => a-> Producer Text m () -> m (Maybe Char)
544 index n p = head (p >-> drop n)
545 {-# INLINABLE index #-}
548 -- | Store a tally of how many segments match the given 'Text'
549 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
550 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
551 {-# INLINABLE count #-}
553 #if MIN_VERSION_text(0,11,4)
554 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
555 -- into a Pipe of Text
558 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
559 decodeUtf8 = go TE.streamDecodeUtf8
563 Left r -> return (return r)
564 Right (chunk, p') -> do
565 let TE.Some text l dec' = dec chunk
573 {-# INLINEABLE decodeUtf8 #-}
575 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
576 -- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
577 -- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
581 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
582 decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
586 Left r -> return (return r)
587 Right (chunk, p') -> do
588 let TE.Some text l dec' = dec chunk
596 {-# INLINEABLE decodeUtf8With #-}
599 -- | Splits a 'Producer' after the given number of characters
601 :: (Monad m, Integral n)
604 -> Producer' Text m (Producer Text m r)
611 Left r -> return (return r)
612 Right (txt, p') -> do
613 let len = fromIntegral (T.length txt)
619 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
621 return (yield suffix >> p')
622 {-# INLINABLE splitAt #-}
624 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
626 :: (Monad m, Integral n)
627 => n -> Producer Text m r -> FreeT (Producer Text m) m r
628 chunksOf n p0 = PP.FreeT (go p0)
634 Right (txt, p') -> PP.Free $ do
635 p'' <- splitAt n (yield txt >> p')
636 return $ PP.FreeT (go p'')
637 {-# INLINABLE chunksOf #-}
639 {-| Split a text stream in two, where the first text stream is the longest
640 consecutive group of text that satisfy the predicate
646 -> Producer' Text m (Producer Text m r)
652 Left r -> return (return r)
653 Right (txt, p') -> do
654 let (prefix, suffix) = T.span predicate txt
661 return (yield suffix >> p')
662 {-# INLINABLE span #-}
664 {-| Split a text stream in two, where the first text stream is the longest
665 consecutive group of characters that don't satisfy the predicate
671 -> Producer Text m (Producer Text m r)
672 break predicate = span (not . predicate)
673 {-# INLINABLE break #-}
675 {-| Split a text stream into sub-streams delimited by characters that satisfy the
682 -> PP.FreeT (Producer Text m) m r
683 splitWith predicate p0 = PP.FreeT (go0 p0)
688 Left r -> return (PP.Pure r)
692 else return $ PP.Free $ do
693 p'' <- span (not . predicate) (yield txt >> p')
694 return $ PP.FreeT (go1 p'')
699 Right (_, p') -> PP.Free $ do
700 p'' <- span (not . predicate) p'
701 return $ PP.FreeT (go1 p'')
702 {-# INLINABLE splitWith #-}
704 -- | Split a text stream using the given 'Char' as the delimiter
708 -> FreeT (Producer Text m) m r
709 split c = splitWith (c ==)
710 {-# INLINABLE split #-}
712 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
717 => (Char -> Char -> Bool)
719 -> FreeT (Producer Text m) m r
720 groupBy equal p0 = PP.FreeT (go p0)
725 Left r -> return (PP.Pure r)
726 Right (txt, p') -> case (T.uncons txt) of
729 return $ PP.Free $ do
730 p'' <- span (equal c) (yield txt >> p')
731 return $ PP.FreeT (go p'')
732 {-# INLINABLE groupBy #-}
734 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
736 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
738 {-# INLINABLE group #-}
740 {-| Split a text stream into 'FreeT'-delimited lines
743 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
744 lines p0 = PP.FreeT (go0 p0)
749 Left r -> return (PP.Pure r)
753 else return $ PP.Free $ go1 (yield txt >> p')
755 p' <- break ('\n' ==) p
756 return $ PP.FreeT (go2 p')
761 Right (_, p') -> PP.Free (go1 p')
762 {-# INLINABLE lines #-}
766 -- | Split a text stream into 'FreeT'-delimited words
768 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
769 words p0 = removeEmpty (splitWith isSpace p0)
771 removeEmpty f = PP.FreeT $ do
774 PP.Pure r -> return (PP.Pure r)
779 Left f' -> PP.runFreeT (removeEmpty f')
783 else return $ PP.Free $ do
786 return (removeEmpty f')
787 {-# INLINABLE words #-}
789 -- | Intersperse a 'Char' in between the characters of the text stream
791 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
798 Right (txt, p') -> do
799 yield (T.intersperse c txt)
805 Right (txt, p') -> do
806 yield (T.singleton c)
807 yield (T.intersperse c txt)
809 {-# INLINABLE intersperse #-}
811 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
812 interspersing a text stream in between them
816 => Producer Text m ()
817 -> FreeT (Producer Text m) m r
822 x <- lift (PP.runFreeT f)
824 PP.Pure r -> return r
829 x <- lift (PP.runFreeT f)
831 PP.Pure r -> return r
836 {-# INLINABLE intercalate #-}
838 {-| Join 'FreeT'-delimited lines into a text stream
841 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
845 x <- lift (PP.runFreeT f)
847 PP.Pure r -> return r
850 yield $ T.singleton '\n'
852 {-# INLINABLE unlines #-}
854 {-| Join 'FreeT'-delimited words into a text stream
857 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
858 unwords = intercalate (yield $ T.pack " ")
859 {-# INLINABLE unwords #-}
862 The following parsing utilities are single-character analogs of the ones found
867 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
869 @Data.Text@ re-exports the 'Text' type.
871 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).