1 {-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 can interact lazy 'Text's and 'IO.Handle's.
7 To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For
8 example, the following program copies a document from one file to another:
11 > import qualified Data.Text.Pipes as Text
15 > withFile "inFile.txt" ReadMode $ \hIn ->
16 > withFile "outFile.txt" WriteMode $ \hOut ->
17 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
19 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
22 > import qualified Data.Text.Pipes as Text
25 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
27 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
28 and 'stdout' proxies, as with the following \"echo\" program:
30 > main = runEffect $ Text.stdin >-> Text.stdout
32 You can also translate pure lazy 'TL.Text's to and from proxies:
34 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
36 In addition, this module provides many functions equivalent to lazy
37 'Text' functions so that you can transform or fold text streams. For
38 example, to stream only the first three lines of 'stdin' to 'stdout' you
42 > import qualified Pipes.Text as Text
43 > import qualified Pipes.Parse as Parse
45 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
47 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
49 The above program will never bring more than one chunk of text (~ 32 KB) into
50 memory, no matter how long the lines are.
52 Note that functions in this library are designed to operate on streams that
53 are insensitive to text boundaries. This means that they may freely split
54 text into smaller texts and /discard empty texts/. However, they will
55 /never concatenate texts/ in order to provide strict upper bounds on memory
116 #if MIN_VERSION_text(0,11,4)
127 -- * Character Parsers
141 import Control.Exception (throwIO, try)
142 import Control.Monad (liftM, unless)
143 import Control.Monad.Trans.State.Strict (StateT)
144 import qualified Data.Text as T
145 import qualified Data.Text.IO as T
146 import qualified Data.Text.Encoding as TE
147 import Data.Text (Text)
148 import qualified Data.Text.Lazy as TL
149 import qualified Data.Text.Lazy.IO as TL
150 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
151 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
152 import Data.ByteString (ByteString)
153 import qualified Data.ByteString as B
154 import Data.Char (ord)
155 import Data.Functor.Identity (Identity)
156 import qualified Data.List as List
157 import Foreign.C.Error (Errno(Errno), ePIPE)
158 import qualified GHC.IO.Exception as G
160 import qualified Pipes.ByteString.Parse as PBP
161 import Pipes.Text.Parse (
162 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
163 import Pipes.Core (respond, Server')
164 import qualified Pipes.Parse as PP
165 import Pipes.Parse (input, concat, FreeT)
166 import qualified Pipes.Safe.Prelude as Safe
167 import qualified Pipes.Safe as Safe
168 import Pipes.Safe (MonadSafe(..), Base(..))
169 import qualified Pipes.Prelude as P
170 import qualified System.IO as IO
171 import Data.Char (isSpace)
172 import Prelude hiding (
201 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
202 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
203 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
204 {-# INLINABLE fromLazy #-}
206 -- | Stream text from 'stdin'
207 stdin :: MonadIO m => Producer' Text m ()
208 stdin = fromHandle IO.stdin
209 {-# INLINABLE stdin #-}
211 {-| Convert a 'IO.Handle' into a text stream using a text size
212 determined by the good sense of the text library.
216 fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
217 fromHandle h = go where
218 go = do txt <- liftIO (T.hGetChunk h)
219 unless (T.null txt) $ do yield txt
221 {-# INLINABLE fromHandle#-}
223 {-| Stream text from a file using Pipes.Safe
225 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
226 MAIN = PUTSTRLN "HELLO WORLD"
229 readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
230 readFile file = Safe.withFile file IO.ReadMode fromHandle
231 {-# INLINABLE readFile #-}
233 {-| Stream lines of text from stdin (for testing in ghci etc.)
235 >>> let safely = runSafeT . runEffect
236 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
243 stdinLn :: MonadIO m => Producer' Text m ()
246 eof <- liftIO (IO.hIsEOF IO.stdin)
248 txt <- liftIO (T.hGetLine IO.stdin)
253 {-| Stream text to 'stdout'
255 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
257 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
258 @(source >-> stdout)@ in suitable cases.
260 stdout :: MonadIO m => Consumer' Text m ()
265 x <- liftIO $ try (T.putStr txt)
267 Left (G.IOError { G.ioe_type = G.ResourceVanished
268 , G.ioe_errno = Just ioe })
271 Left e -> liftIO (throwIO e)
273 {-# INLINABLE stdout #-}
275 stdoutLn :: (MonadIO m) => Consumer' Text m ()
280 x <- liftIO $ try (T.putStrLn str)
282 Left (G.IOError { G.ioe_type = G.ResourceVanished
283 , G.ioe_errno = Just ioe })
286 Left e -> liftIO (throwIO e)
288 {-# INLINABLE stdoutLn #-}
290 {-| Convert a text stream into a 'Handle'
292 Note: again, for best performance, where possible use
293 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
295 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
296 toHandle h = for cat (liftIO . T.hPutStr h)
297 {-# INLINABLE toHandle #-}
299 -- | Stream text into a file. Uses @pipes-safe@.
300 writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
301 writeFile file = Safe.withFile file IO.WriteMode toHandle
303 -- | Apply a transformation to each 'Char' in the stream
304 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
305 map f = P.map (T.map f)
306 {-# INLINABLE map #-}
308 -- | Map a function over the characters of a text stream and concatenate the results
310 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
311 concatMap f = P.map (T.concatMap f)
312 {-# INLINABLE concatMap #-}
315 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
317 encodeUtf8 :: Monad m => Pipe Text ByteString m r
318 encodeUtf8 = P.map TE.encodeUtf8
319 {-# INLINEABLE encodeUtf8 #-}
321 -- | Transform a Pipe of 'String's into one of 'Text' chunks
322 pack :: Monad m => Pipe String Text m r
324 {-# INLINEABLE pack #-}
326 -- | Transforma a Pipes of 'Text' chunks into one of 'String's
327 unpack :: Monad m => Pipe Text String m r
328 unpack = P.map T.unpack
329 {-# INLINEABLE unpack #-}
331 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
332 -- here acting on a 'Text' pipe, rather as they would on a lazy text
333 toCaseFold :: Monad m => Pipe Text Text m ()
334 toCaseFold = P.map T.toCaseFold
335 {-# INLINEABLE toCaseFold #-}
337 -- | lowercase incoming 'Text'
338 toLower :: Monad m => Pipe Text Text m ()
339 toLower = P.map T.toLower
340 {-# INLINEABLE toLower #-}
342 -- | uppercase incoming 'Text'
343 toUpper :: Monad m => Pipe Text Text m ()
344 toUpper = P.map T.toUpper
345 {-# INLINEABLE toUpper #-}
347 -- | Remove leading white space from an incoming succession of 'Text's
348 stripStart :: Monad m => Pipe Text Text m r
351 let text = T.stripStart chunk
355 {-# INLINEABLE stripStart #-}
357 -- | @(take n)@ only allows @n@ individual characters to pass;
358 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
359 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
360 take n0 = go n0 where
365 let len = fromIntegral (T.length txt)
367 then yield (T.take (fromIntegral n) txt)
371 {-# INLINABLE take #-}
373 -- | @(drop n)@ drops the first @n@ characters
374 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
375 drop n0 = go n0 where
380 let len = fromIntegral (T.length txt)
383 yield (T.drop (fromIntegral n) txt)
386 {-# INLINABLE drop #-}
388 -- | Take characters until they fail the predicate
389 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
390 takeWhile predicate = go
394 let (prefix, suffix) = T.span predicate txt
400 {-# INLINABLE takeWhile #-}
402 -- | Drop characters until they fail the predicate
403 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
404 dropWhile predicate = go where
407 case T.findIndex (not . predicate) txt of
412 {-# INLINABLE dropWhile #-}
414 -- | Only allows 'Char's to pass if they satisfy the predicate
415 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
416 filter predicate = P.map (T.filter predicate)
417 {-# INLINABLE filter #-}
420 -- | Strict left scan over the characters
423 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
424 scan step begin = go begin
428 let txt' = T.scanl step c txt
432 {-# INLINABLE scan #-}
434 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
437 toLazy :: Producer Text Identity () -> TL.Text
438 toLazy = TL.fromChunks . P.toList
439 {-# INLINABLE toLazy #-}
441 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
444 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
445 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
446 immediately as they are generated instead of loading them all into memory.
448 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
449 toLazyM = liftM TL.fromChunks . P.toListM
450 {-# INLINABLE toLazyM #-}
452 -- | Reduce the text stream using a strict left fold over characters
455 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
456 fold step begin done = P.fold (T.foldl' step) begin done
457 {-# INLINABLE fold #-}
459 -- | Retrieve the first 'Char'
460 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
466 Left _ -> return Nothing
467 Right (c, _) -> return (Just c)
468 {-# INLINABLE head #-}
470 -- | Retrieve the last 'Char'
471 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
481 else go (Just $ T.last txt) p'
482 {-# INLINABLE last #-}
484 -- | Determine if the stream is empty
485 null :: (Monad m) => Producer Text m () -> m Bool
487 {-# INLINABLE null #-}
489 -- | Count the number of characters in the stream
490 length :: (Monad m, Num n) => Producer Text m () -> m n
491 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
492 {-# INLINABLE length #-}
494 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
495 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
496 any predicate = P.any (T.any predicate)
497 {-# INLINABLE any #-}
499 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
500 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
501 all predicate = P.all (T.all predicate)
502 {-# INLINABLE all #-}
504 -- | Return the maximum 'Char' within a text stream
505 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
506 maximum = P.fold step Nothing id
511 else Just $ case mc of
512 Nothing -> T.maximum txt
513 Just c -> max c (T.maximum txt)
514 {-# INLINABLE maximum #-}
516 -- | Return the minimum 'Char' within a text stream (surely very useful!)
517 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
518 minimum = P.fold step Nothing id
524 Nothing -> Just (T.minimum txt)
525 Just c -> Just (min c (T.minimum txt))
526 {-# INLINABLE minimum #-}
528 -- | Find the first element in the stream that matches the predicate
531 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
532 find predicate p = head (p >-> filter predicate)
533 {-# INLINABLE find #-}
535 -- | Index into a text stream
537 :: (Monad m, Integral a)
538 => a-> Producer Text m () -> m (Maybe Char)
539 index n p = head (p >-> drop n)
540 {-# INLINABLE index #-}
542 -- | Find the index of an element that matches the given 'Char'
544 -- :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n)
545 -- elemIndex w8 = findIndex (w8 ==)
546 -- {-# INLINABLE elemIndex #-}
548 -- | Store the first index of an element that satisfies the predicate
550 -- :: (Monad m, Num n)
551 -- => (Char -> Bool) -> Producer Text m () -> m (Maybe n)
552 -- findIndex predicate p = P.head (p >-> findIndices predicate)
553 -- {-# INLINABLE findIndex #-}
555 -- | Store a tally of how many segments match the given 'Text'
556 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
557 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
558 {-# INLINABLE count #-}
560 #if MIN_VERSION_text(0,11,4)
561 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
562 -- into a Pipe of Text
565 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
566 decodeUtf8 = go TE.streamDecodeUtf8
570 Left r -> return (return r)
571 Right (chunk, p') -> do
572 let TE.Some text l dec' = dec chunk
580 {-# INLINEABLE decodeUtf8 #-}
583 -- | Splits a 'Producer' after the given number of characters
585 :: (Monad m, Integral n)
588 -> Producer' Text m (Producer Text m r)
595 Left r -> return (return r)
596 Right (txt, p') -> do
597 let len = fromIntegral (T.length txt)
603 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
605 return (yield suffix >> p')
606 {-# INLINABLE splitAt #-}
608 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
610 :: (Monad m, Integral n)
611 => n -> Producer Text m r -> FreeT (Producer Text m) m r
612 chunksOf n p0 = PP.FreeT (go p0)
618 Right (txt, p') -> PP.Free $ do
619 p'' <- splitAt n (yield txt >> p')
620 return $ PP.FreeT (go p'')
621 {-# INLINABLE chunksOf #-}
623 {-| Split a text stream in two, where the first text stream is the longest
624 consecutive group of text that satisfy the predicate
630 -> Producer' Text m (Producer Text m r)
636 Left r -> return (return r)
637 Right (txt, p') -> do
638 let (prefix, suffix) = T.span predicate txt
645 return (yield suffix >> p')
646 {-# INLINABLE span #-}
648 {-| Split a text stream in two, where the first text stream is the longest
649 consecutive group of characters that don't satisfy the predicate
655 -> Producer Text m (Producer Text m r)
656 break predicate = span (not . predicate)
657 {-# INLINABLE break #-}
659 {-| Split a text stream into sub-streams delimited by characters that satisfy the
666 -> PP.FreeT (Producer Text m) m r
667 splitWith predicate p0 = PP.FreeT (go0 p0)
672 Left r -> return (PP.Pure r)
676 else return $ PP.Free $ do
677 p'' <- span (not . predicate) (yield txt >> p')
678 return $ PP.FreeT (go1 p'')
683 Right (_, p') -> PP.Free $ do
684 p'' <- span (not . predicate) p'
685 return $ PP.FreeT (go1 p'')
686 {-# INLINABLE splitWith #-}
688 -- | Split a text stream using the given 'Char' as the delimiter
692 -> FreeT (Producer Text m) m r
693 split c = splitWith (c ==)
694 {-# INLINABLE split #-}
696 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
701 => (Char -> Char -> Bool)
703 -> FreeT (Producer Text m) m r
704 groupBy equal p0 = PP.FreeT (go p0)
709 Left r -> return (PP.Pure r)
710 Right (txt, p') -> case (T.uncons txt) of
713 return $ PP.Free $ do
714 p'' <- span (equal c) (yield txt >> p')
715 return $ PP.FreeT (go p'')
716 {-# INLINABLE groupBy #-}
718 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
720 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
722 {-# INLINABLE group #-}
724 {-| Split a text stream into 'FreeT'-delimited lines
727 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
728 lines p0 = PP.FreeT (go0 p0)
733 Left r -> return (PP.Pure r)
737 else return $ PP.Free $ go1 (yield txt >> p')
739 p' <- break ('\n' ==) p
740 return $ PP.FreeT (go2 p')
745 Right (_, p') -> PP.Free (go1 p')
746 {-# INLINABLE lines #-}
750 -- | Split a text stream into 'FreeT'-delimited words
752 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
753 words p0 = removeEmpty (splitWith isSpace p0)
755 removeEmpty f = PP.FreeT $ do
758 PP.Pure r -> return (PP.Pure r)
762 Left f' -> PP.runFreeT (removeEmpty f')
763 Right (bs, p') -> return $ PP.Free $ do
766 return (removeEmpty f')
767 {-# INLINABLE words #-}
769 -- | Intersperse a 'Char' in between the characters of the text stream
771 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
778 Right (txt, p') -> do
779 yield (T.intersperse c txt)
785 Right (txt, p') -> do
786 yield (T.singleton c)
787 yield (T.intersperse c txt)
789 {-# INLINABLE intersperse #-}
791 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
792 interspersing a text stream in between them
796 => Producer Text m ()
797 -> FreeT (Producer Text m) m r
802 x <- lift (PP.runFreeT f)
804 PP.Pure r -> return r
809 x <- lift (PP.runFreeT f)
811 PP.Pure r -> return r
816 {-# INLINABLE intercalate #-}
818 {-| Join 'FreeT'-delimited lines into a text stream
821 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
825 x <- lift (PP.runFreeT f)
827 PP.Pure r -> return r
830 yield $ T.singleton '\n'
832 {-# INLINABLE unlines #-}
834 {-| Join 'FreeT'-delimited words into a text stream
837 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
838 unwords = intercalate (yield $ T.pack " ")
839 {-# INLINABLE unwords #-}
842 The following parsing utilities are single-character analogs of the ones found
847 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
849 @Data.Text@ re-exports the 'Text' type.
851 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).