1 {-# LANGUAGE RankNTypes, TypeFamilies, NoMonomorphismRestriction #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
9 example, the following program copies a document from one file to another:
12 > import qualified Data.Text.Pipes as Text
16 > withFile "inFile.txt" ReadMode $ \hIn ->
17 > withFile "outFile.txt" WriteMode $ \hOut ->
18 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
20 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
23 > import qualified Data.Text.Pipes as Text
26 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
29 and 'stdout' proxies, as with the following \"echo\" program:
31 > main = runEffect $ Text.stdin >-> Text.stdout
33 You can also translate pure lazy 'TL.Text's to and from proxies:
35 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
37 In addition, this module provides many functions equivalent to lazy
38 'Text' functions so that you can transform or fold text streams. For
39 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 > import qualified Pipes.Text as Text
44 > import qualified Pipes.Parse as Parse
46 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
48 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
50 The above program will never bring more than one chunk of text (~ 32 KB) into
51 memory, no matter how long the lines are.
53 Note that functions in this library are designed to operate on streams that
54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
127 -- * Character Parsers
141 import Control.Exception (throwIO, try)
142 import Control.Monad (liftM, unless)
143 import Control.Monad.Trans.State.Strict (StateT(..))
144 import Data.Monoid ((<>))
145 import qualified Data.Text as T
146 import qualified Data.Text.IO as T
147 import qualified Data.Text.Encoding as TE
148 import qualified Data.Text.Encoding.Error as TE
149 import Data.Text (Text)
150 import qualified Data.Text.Lazy as TL
151 import qualified Data.Text.Lazy.IO as TL
152 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
153 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
154 import Data.ByteString (ByteString)
155 import qualified Data.ByteString as B
156 import Data.Char (ord, isSpace)
157 import Data.Functor.Identity (Identity)
158 import qualified Data.List as List
159 import Foreign.C.Error (Errno(Errno), ePIPE)
160 import qualified GHC.IO.Exception as G
162 import qualified Pipes.ByteString as PB
163 import qualified Pipes.ByteString.Parse as PBP
164 import qualified Pipes.Text.Internal as PE
165 import Pipes.Text.Parse (
166 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
167 import Pipes.Core (respond, Server')
168 import qualified Pipes.Parse as PP
169 import Pipes.Parse (input, concat, FreeT)
170 import qualified Pipes.Safe.Prelude as Safe
171 import qualified Pipes.Safe as Safe
172 import Pipes.Safe (MonadSafe(..), Base(..))
173 import qualified Pipes.Prelude as P
174 import qualified System.IO as IO
175 import Data.Char (isSpace)
176 import Data.Word (Word8)
177 import Prelude hiding (
206 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
207 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
208 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
209 {-# INLINABLE fromLazy #-}
211 -- | Stream text from 'stdin'
212 stdin :: MonadIO m => Producer Text m (Producer ByteString m ())
213 stdin = fromHandle IO.stdin
214 {-# INLINABLE stdin #-}
216 {-| Convert a 'IO.Handle' into a text stream using a text size
217 determined by the good sense of the text library.
220 fromHandle :: MonadIO m => IO.Handle -> Producer Text m (Producer ByteString m ())
221 fromHandle h = decodeUtf8 (PB.fromHandle h)
222 {-# INLINE fromHandle#-}
224 {-| Stream text from a file using Pipes.Safe
226 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
227 MAIN = PUTSTRLN "HELLO WORLD"
230 readFile :: (MonadSafe m) => FilePath -> Producer Text m (Producer ByteString m ())
231 readFile file = Safe.withFile file IO.ReadMode fromHandle
232 {-# INLINABLE readFile #-}
234 {-| Stream lines of text from stdin (for testing in ghci etc.)
236 >>> let safely = runSafeT . runEffect
237 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
244 stdinLn :: MonadIO m => Producer' Text m ()
247 eof <- liftIO (IO.hIsEOF IO.stdin)
249 txt <- liftIO (T.hGetLine IO.stdin)
254 {-| Stream text to 'stdout'
256 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
258 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
259 @(source >-> stdout)@ in suitable cases.
261 stdout :: MonadIO m => Consumer' Text m ()
266 x <- liftIO $ try (T.putStr txt)
268 Left (G.IOError { G.ioe_type = G.ResourceVanished
269 , G.ioe_errno = Just ioe })
272 Left e -> liftIO (throwIO e)
274 {-# INLINABLE stdout #-}
276 stdoutLn :: (MonadIO m) => Consumer' Text m ()
281 x <- liftIO $ try (T.putStrLn str)
283 Left (G.IOError { G.ioe_type = G.ResourceVanished
284 , G.ioe_errno = Just ioe })
287 Left e -> liftIO (throwIO e)
289 {-# INLINABLE stdoutLn #-}
291 {-| Convert a text stream into a 'Handle'
293 Note: again, for best performance, where possible use
294 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
296 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
297 toHandle h = for cat (liftIO . T.hPutStr h)
298 {-# INLINABLE toHandle #-}
300 {-# RULES "p >-> toHandle h" forall p h .
301 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
305 -- | Stream text into a file. Uses @pipes-safe@.
306 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
307 writeFile file = Safe.withFile file IO.WriteMode toHandle
309 -- | Apply a transformation to each 'Char' in the stream
310 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
311 map f = P.map (T.map f)
312 {-# INLINABLE map #-}
314 {-# RULES "p >-> map f" forall p f .
315 p >-> map f = for p (\txt -> yield (T.map f txt))
318 -- | Map a function over the characters of a text stream and concatenate the results
320 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
321 concatMap f = P.map (T.concatMap f)
322 {-# INLINABLE concatMap #-}
324 {-# RULES "p >-> concatMap f" forall p f .
325 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
328 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
329 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
330 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
331 encodeUtf8 :: Monad m => Pipe Text ByteString m r
332 encodeUtf8 = P.map TE.encodeUtf8
333 {-# INLINEABLE encodeUtf8 #-}
335 {-# RULES "p >-> encodeUtf8" forall p .
336 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
339 -- | Transform a Pipe of 'String's into one of 'Text' chunks
340 pack :: Monad m => Pipe String Text m r
342 {-# INLINEABLE pack #-}
344 {-# RULES "p >-> pack" forall p .
345 p >-> pack = for p (\txt -> yield (T.pack txt))
348 -- | Transform a Pipes of 'Text' chunks into one of 'String's
349 unpack :: Monad m => Pipe Text String m r
350 unpack = for cat (\t -> yield (T.unpack t))
351 {-# INLINEABLE unpack #-}
353 {-# RULES "p >-> unpack" forall p .
354 p >-> unpack = for p (\txt -> yield (T.unpack txt))
357 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
358 -- here acting on a 'Text' pipe, rather as they would on a lazy text
359 toCaseFold :: Monad m => Pipe Text Text m ()
360 toCaseFold = P.map T.toCaseFold
361 {-# INLINEABLE toCaseFold #-}
363 {-# RULES "p >-> toCaseFold" forall p .
364 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
368 -- | lowercase incoming 'Text'
369 toLower :: Monad m => Pipe Text Text m ()
370 toLower = P.map T.toLower
371 {-# INLINEABLE toLower #-}
373 {-# RULES "p >-> toLower" forall p .
374 p >-> toLower = for p (\txt -> yield (T.toLower txt))
377 -- | uppercase incoming 'Text'
378 toUpper :: Monad m => Pipe Text Text m ()
379 toUpper = P.map T.toUpper
380 {-# INLINEABLE toUpper #-}
382 {-# RULES "p >-> toUpper" forall p .
383 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
386 -- | Remove leading white space from an incoming succession of 'Text's
387 stripStart :: Monad m => Pipe Text Text m r
390 let text = T.stripStart chunk
394 {-# INLINEABLE stripStart #-}
396 -- | @(take n)@ only allows @n@ individual characters to pass;
397 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
398 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
399 take n0 = go n0 where
404 let len = fromIntegral (T.length txt)
406 then yield (T.take (fromIntegral n) txt)
410 {-# INLINABLE take #-}
412 -- | @(drop n)@ drops the first @n@ characters
413 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
414 drop n0 = go n0 where
419 let len = fromIntegral (T.length txt)
422 yield (T.drop (fromIntegral n) txt)
425 {-# INLINABLE drop #-}
427 -- | Take characters until they fail the predicate
428 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
429 takeWhile predicate = go
433 let (prefix, suffix) = T.span predicate txt
439 {-# INLINABLE takeWhile #-}
441 -- | Drop characters until they fail the predicate
442 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
443 dropWhile predicate = go where
446 case T.findIndex (not . predicate) txt of
451 {-# INLINABLE dropWhile #-}
453 -- | Only allows 'Char's to pass if they satisfy the predicate
454 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
455 filter predicate = P.map (T.filter predicate)
456 {-# INLINABLE filter #-}
458 {-# RULES "p >-> filter q" forall p q .
459 p >-> filter q = for p (\txt -> yield (T.filter q txt))
462 -- | Strict left scan over the characters
465 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
466 scan step begin = go begin
470 let txt' = T.scanl step c txt
474 {-# INLINABLE scan #-}
476 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
479 toLazy :: Producer Text Identity () -> TL.Text
480 toLazy = TL.fromChunks . P.toList
481 {-# INLINABLE toLazy #-}
483 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
486 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
487 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
488 immediately as they are generated instead of loading them all into memory.
490 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
491 toLazyM = liftM TL.fromChunks . P.toListM
492 {-# INLINABLE toLazyM #-}
494 -- | Reduce the text stream using a strict left fold over characters
497 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
498 fold step begin done = P.fold (T.foldl' step) begin done
499 {-# INLINABLE fold #-}
501 -- | Retrieve the first 'Char'
502 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
508 Left _ -> return Nothing
509 Right (c, _) -> return (Just c)
510 {-# INLINABLE head #-}
512 -- | Retrieve the last 'Char'
513 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
523 else go (Just $ T.last txt) p'
524 {-# INLINABLE last #-}
526 -- | Determine if the stream is empty
527 null :: (Monad m) => Producer Text m () -> m Bool
529 {-# INLINABLE null #-}
531 -- | Count the number of characters in the stream
532 length :: (Monad m, Num n) => Producer Text m () -> m n
533 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
534 {-# INLINABLE length #-}
536 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
537 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
538 any predicate = P.any (T.any predicate)
539 {-# INLINABLE any #-}
541 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
542 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
543 all predicate = P.all (T.all predicate)
544 {-# INLINABLE all #-}
546 -- | Return the maximum 'Char' within a text stream
547 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
548 maximum = P.fold step Nothing id
553 else Just $ case mc of
554 Nothing -> T.maximum txt
555 Just c -> max c (T.maximum txt)
556 {-# INLINABLE maximum #-}
558 -- | Return the minimum 'Char' within a text stream (surely very useful!)
559 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
560 minimum = P.fold step Nothing id
566 Nothing -> Just (T.minimum txt)
567 Just c -> Just (min c (T.minimum txt))
568 {-# INLINABLE minimum #-}
570 -- | Find the first element in the stream that matches the predicate
573 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
574 find predicate p = head (p >-> filter predicate)
575 {-# INLINABLE find #-}
577 -- | Index into a text stream
579 :: (Monad m, Integral a)
580 => a-> Producer Text m () -> m (Maybe Char)
581 index n p = head (p >-> drop n)
582 {-# INLINABLE index #-}
585 -- | Store a tally of how many segments match the given 'Text'
586 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
587 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
588 {-# INLINABLE count #-}
590 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
591 -- into a Pipe of Text
593 decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
594 decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
597 case x of Left r -> return (do yield carry
599 Right (chunk, p') -> case dec0 chunk of
600 PE.Some text carry2 dec -> do yield text
602 PE.Other text bs -> do yield text
606 -- | Splits a 'Producer' after the given number of characters
608 :: (Monad m, Integral n)
611 -> Producer' Text m (Producer Text m r)
618 Left r -> return (return r)
619 Right (txt, p') -> do
620 let len = fromIntegral (T.length txt)
626 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
628 return (yield suffix >> p')
629 {-# INLINABLE splitAt #-}
631 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
633 :: (Monad m, Integral n)
634 => n -> Producer Text m r -> FreeT (Producer Text m) m r
635 chunksOf n p0 = PP.FreeT (go p0)
641 Right (txt, p') -> PP.Free $ do
642 p'' <- splitAt n (yield txt >> p')
643 return $ PP.FreeT (go p'')
644 {-# INLINABLE chunksOf #-}
646 {-| Split a text stream in two, where the first text stream is the longest
647 consecutive group of text that satisfy the predicate
653 -> Producer' Text m (Producer Text m r)
659 Left r -> return (return r)
660 Right (txt, p') -> do
661 let (prefix, suffix) = T.span predicate txt
668 return (yield suffix >> p')
669 {-# INLINABLE span #-}
671 {-| Split a text stream in two, where the first text stream is the longest
672 consecutive group of characters that don't satisfy the predicate
678 -> Producer Text m (Producer Text m r)
679 break predicate = span (not . predicate)
680 {-# INLINABLE break #-}
682 {-| Split a text stream into sub-streams delimited by characters that satisfy the
689 -> PP.FreeT (Producer Text m) m r
690 splitWith predicate p0 = PP.FreeT (go0 p0)
695 Left r -> return (PP.Pure r)
699 else return $ PP.Free $ do
700 p'' <- span (not . predicate) (yield txt >> p')
701 return $ PP.FreeT (go1 p'')
706 Right (_, p') -> PP.Free $ do
707 p'' <- span (not . predicate) p'
708 return $ PP.FreeT (go1 p'')
709 {-# INLINABLE splitWith #-}
711 -- | Split a text stream using the given 'Char' as the delimiter
715 -> FreeT (Producer Text m) m r
716 split c = splitWith (c ==)
717 {-# INLINABLE split #-}
719 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
724 => (Char -> Char -> Bool)
726 -> FreeT (Producer Text m) m r
727 groupBy equal p0 = PP.FreeT (go p0)
732 Left r -> return (PP.Pure r)
733 Right (txt, p') -> case (T.uncons txt) of
736 return $ PP.Free $ do
737 p'' <- span (equal c) (yield txt >> p')
738 return $ PP.FreeT (go p'')
739 {-# INLINABLE groupBy #-}
741 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
743 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
745 {-# INLINABLE group #-}
747 {-| Split a text stream into 'FreeT'-delimited lines
750 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
751 lines p0 = PP.FreeT (go0 p0)
756 Left r -> return (PP.Pure r)
760 else return $ PP.Free $ go1 (yield txt >> p')
762 p' <- break ('\n' ==) p
763 return $ PP.FreeT $ do
766 Left r -> return $ PP.Pure r
767 Right (_, p'') -> go0 p''
768 {-# INLINABLE lines #-}
772 -- | Split a text stream into 'FreeT'-delimited words
774 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
778 x <- next (p >-> dropWhile isSpace)
781 Right (bs, p') -> PP.Free $ do
782 p'' <- break isSpace (yield bs >> p')
784 {-# INLINABLE words #-}
787 -- | Intersperse a 'Char' in between the characters of the text stream
789 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
796 Right (txt, p') -> do
797 yield (T.intersperse c txt)
803 Right (txt, p') -> do
804 yield (T.singleton c)
805 yield (T.intersperse c txt)
807 {-# INLINABLE intersperse #-}
809 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
810 interspersing a text stream in between them
814 => Producer Text m ()
815 -> FreeT (Producer Text m) m r
820 x <- lift (PP.runFreeT f)
822 PP.Pure r -> return r
827 x <- lift (PP.runFreeT f)
829 PP.Pure r -> return r
834 {-# INLINABLE intercalate #-}
836 {-| Join 'FreeT'-delimited lines into a text stream
839 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
843 x <- lift (PP.runFreeT f)
845 PP.Pure r -> return r
848 yield $ T.singleton '\n'
850 {-# INLINABLE unlines #-}
852 {-| Join 'FreeT'-delimited words into a text stream
855 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
856 unwords = intercalate (yield $ T.pack " ")
857 {-# INLINABLE unwords #-}
860 The following parsing utilities are single-character analogs of the ones found
865 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
867 @Data.Text@ re-exports the 'Text' type.
869 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).