1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-}
2 #if __GLASGOW_HASKELL__ >= 702
3 {-# LANGUAGE Trustworthy #-}
5 {-| This module provides @pipes@ utilities for \"text streams\", which are
6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
12 example, the following program copies a document from one file to another:
15 > import qualified Data.Text.Pipes as Text
19 > withFile "inFile.txt" ReadMode $ \hIn ->
20 > withFile "outFile.txt" WriteMode $ \hOut ->
21 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
23 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
26 > import qualified Data.Text.Pipes as Text
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' proxies, as with the following \"echo\" program:
34 > main = runEffect $ Text.stdin >-> Text.stdout
36 You can also translate pure lazy 'TL.Text's to and from proxies:
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
131 -- * Character Parsers
145 import Control.Exception (throwIO, try)
146 import Control.Monad (liftM, unless, join)
147 import Control.Monad.Trans.State.Strict (StateT(..))
148 import Data.Monoid ((<>))
149 import qualified Data.Text as T
150 import qualified Data.Text.IO as T
151 import qualified Data.Text.Encoding as TE
152 import qualified Data.Text.Encoding.Error as TE
153 import Data.Text (Text)
154 import qualified Data.Text.Lazy as TL
155 import qualified Data.Text.Lazy.IO as TL
156 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
157 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
158 import Data.ByteString (ByteString)
159 import qualified Data.ByteString as B
160 import Data.Char (ord, isSpace)
161 import Data.Functor.Identity (Identity)
162 import qualified Data.List as List
163 import Foreign.C.Error (Errno(Errno), ePIPE)
164 import qualified GHC.IO.Exception as G
166 import qualified Pipes.ByteString as PB
167 import qualified Pipes.ByteString as PBP
168 import qualified Pipes.Text.Internal as PE
169 import Pipes.Text.Internal (Codec(..))
170 import Pipes.Text.Parse (
171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
172 import Pipes.Core (respond, Server')
173 import qualified Pipes.Parse as PP
174 import Pipes.Parse ( FreeT)
175 import qualified Pipes.Safe.Prelude as Safe
176 import qualified Pipes.Safe as Safe
177 import Pipes.Safe (MonadSafe(..), Base(..))
178 import qualified Pipes.Prelude as P
179 import qualified System.IO as IO
180 import Data.Char (isSpace)
181 import Data.Word (Word8)
182 import Prelude hiding (
211 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
212 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
213 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
214 {-# INLINE fromLazy #-}
216 -- | Stream text from 'stdin'
217 stdin :: MonadIO m => Producer Text m ()
218 stdin = fromHandle IO.stdin
221 {-| Convert a 'IO.Handle' into a text stream using a text size
222 determined by the good sense of the text library; note that this
223 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
224 but uses the system encoding and has other `Data.Text.IO` features
227 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
228 fromHandle h = go where
229 go = do txt <- liftIO (T.hGetChunk h)
230 unless (T.null txt) $ do yield txt
232 {-# INLINABLE fromHandle#-}
235 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
237 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
238 MAIN = PUTSTRLN "HELLO WORLD"
241 readFile :: MonadSafe m => FilePath -> Producer Text m ()
242 readFile file = Safe.withFile file IO.ReadMode fromHandle
243 {-# INLINE readFile #-}
245 {-| Stream lines of text from stdin (for testing in ghci etc.)
247 >>> let safely = runSafeT . runEffect
248 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
255 stdinLn :: MonadIO m => Producer' Text m ()
258 eof <- liftIO (IO.hIsEOF IO.stdin)
260 txt <- liftIO (T.hGetLine IO.stdin)
263 {-# INLINABLE stdinLn #-}
265 {-| Stream text to 'stdout'
267 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
269 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
270 @(source >-> stdout)@ in suitable cases.
272 stdout :: MonadIO m => Consumer' Text m ()
277 x <- liftIO $ try (T.putStr txt)
279 Left (G.IOError { G.ioe_type = G.ResourceVanished
280 , G.ioe_errno = Just ioe })
283 Left e -> liftIO (throwIO e)
285 {-# INLINABLE stdout #-}
287 stdoutLn :: (MonadIO m) => Consumer' Text m ()
292 x <- liftIO $ try (T.putStrLn str)
294 Left (G.IOError { G.ioe_type = G.ResourceVanished
295 , G.ioe_errno = Just ioe })
298 Left e -> liftIO (throwIO e)
300 {-# INLINABLE stdoutLn #-}
302 {-| Convert a text stream into a 'Handle'
304 Note: again, for best performance, where possible use
305 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
307 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
308 toHandle h = for cat (liftIO . T.hPutStr h)
309 {-# INLINABLE toHandle #-}
311 {-# RULES "p >-> toHandle h" forall p h .
312 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
316 -- | Stream text into a file. Uses @pipes-safe@.
317 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
318 writeFile file = Safe.withFile file IO.WriteMode toHandle
319 {-# INLINE writeFile #-}
321 -- | Apply a transformation to each 'Char' in the stream
322 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
323 map f = P.map (T.map f)
324 {-# INLINABLE map #-}
326 {-# RULES "p >-> map f" forall p f .
327 p >-> map f = for p (\txt -> yield (T.map f txt))
330 -- | Map a function over the characters of a text stream and concatenate the results
332 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
333 concatMap f = P.map (T.concatMap f)
334 {-# INLINABLE concatMap #-}
336 {-# RULES "p >-> concatMap f" forall p f .
337 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
340 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
341 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
342 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
343 encodeUtf8 :: Monad m => Pipe Text ByteString m r
344 encodeUtf8 = P.map TE.encodeUtf8
345 {-# INLINEABLE encodeUtf8 #-}
347 {-# RULES "p >-> encodeUtf8" forall p .
348 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
351 -- | Transform a Pipe of 'String's into one of 'Text' chunks
352 pack :: Monad m => Pipe String Text m r
354 {-# INLINEABLE pack #-}
356 {-# RULES "p >-> pack" forall p .
357 p >-> pack = for p (\txt -> yield (T.pack txt))
360 -- | Transform a Pipes of 'Text' chunks into one of 'String's
361 unpack :: Monad m => Pipe Text String m r
362 unpack = for cat (\t -> yield (T.unpack t))
363 {-# INLINEABLE unpack #-}
365 {-# RULES "p >-> unpack" forall p .
366 p >-> unpack = for p (\txt -> yield (T.unpack txt))
369 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
370 -- here acting on a 'Text' pipe, rather as they would on a lazy text
371 toCaseFold :: Monad m => Pipe Text Text m ()
372 toCaseFold = P.map T.toCaseFold
373 {-# INLINEABLE toCaseFold #-}
375 {-# RULES "p >-> toCaseFold" forall p .
376 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
380 -- | lowercase incoming 'Text'
381 toLower :: Monad m => Pipe Text Text m ()
382 toLower = P.map T.toLower
383 {-# INLINEABLE toLower #-}
385 {-# RULES "p >-> toLower" forall p .
386 p >-> toLower = for p (\txt -> yield (T.toLower txt))
389 -- | uppercase incoming 'Text'
390 toUpper :: Monad m => Pipe Text Text m ()
391 toUpper = P.map T.toUpper
392 {-# INLINEABLE toUpper #-}
394 {-# RULES "p >-> toUpper" forall p .
395 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
398 -- | Remove leading white space from an incoming succession of 'Text's
399 stripStart :: Monad m => Pipe Text Text m r
402 let text = T.stripStart chunk
406 {-# INLINEABLE stripStart #-}
408 -- | @(take n)@ only allows @n@ individual characters to pass;
409 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
410 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
411 take n0 = go n0 where
416 let len = fromIntegral (T.length txt)
418 then yield (T.take (fromIntegral n) txt)
422 {-# INLINABLE take #-}
424 -- | @(drop n)@ drops the first @n@ characters
425 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
426 drop n0 = go n0 where
431 let len = fromIntegral (T.length txt)
434 yield (T.drop (fromIntegral n) txt)
437 {-# INLINABLE drop #-}
439 -- | Take characters until they fail the predicate
440 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
441 takeWhile predicate = go
445 let (prefix, suffix) = T.span predicate txt
451 {-# INLINABLE takeWhile #-}
453 -- | Drop characters until they fail the predicate
454 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
455 dropWhile predicate = go where
458 case T.findIndex (not . predicate) txt of
463 {-# INLINABLE dropWhile #-}
465 -- | Only allows 'Char's to pass if they satisfy the predicate
466 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
467 filter predicate = P.map (T.filter predicate)
468 {-# INLINABLE filter #-}
470 {-# RULES "p >-> filter q" forall p q .
471 p >-> filter q = for p (\txt -> yield (T.filter q txt))
474 -- | Strict left scan over the characters
477 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
478 scan step begin = go begin
482 let txt' = T.scanl step c txt
486 {-# INLINABLE scan #-}
488 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
491 toLazy :: Producer Text Identity () -> TL.Text
492 toLazy = TL.fromChunks . P.toList
493 {-# INLINABLE toLazy #-}
495 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
498 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
499 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
500 immediately as they are generated instead of loading them all into memory.
502 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
503 toLazyM = liftM TL.fromChunks . P.toListM
504 {-# INLINABLE toLazyM #-}
506 -- | Reduce the text stream using a strict left fold over characters
509 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
510 foldChars step begin done = P.fold (T.foldl' step) begin done
511 {-# INLINABLE fold #-}
513 -- | Retrieve the first 'Char'
514 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
520 Left _ -> return Nothing
521 Right (c, _) -> return (Just c)
522 {-# INLINABLE head #-}
524 -- | Retrieve the last 'Char'
525 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
535 else go (Just $ T.last txt) p'
536 {-# INLINABLE last #-}
538 -- | Determine if the stream is empty
539 null :: (Monad m) => Producer Text m () -> m Bool
541 {-# INLINABLE null #-}
543 -- | Count the number of characters in the stream
544 length :: (Monad m, Num n) => Producer Text m () -> m n
545 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
546 {-# INLINABLE length #-}
548 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
549 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
550 any predicate = P.any (T.any predicate)
551 {-# INLINABLE any #-}
553 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
554 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
555 all predicate = P.all (T.all predicate)
556 {-# INLINABLE all #-}
558 -- | Return the maximum 'Char' within a text stream
559 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
560 maximum = P.fold step Nothing id
565 else Just $ case mc of
566 Nothing -> T.maximum txt
567 Just c -> max c (T.maximum txt)
568 {-# INLINABLE maximum #-}
570 -- | Return the minimum 'Char' within a text stream (surely very useful!)
571 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
572 minimum = P.fold step Nothing id
578 Nothing -> Just (T.minimum txt)
579 Just c -> Just (min c (T.minimum txt))
580 {-# INLINABLE minimum #-}
582 -- | Find the first element in the stream that matches the predicate
585 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
586 find predicate p = head (p >-> filter predicate)
587 {-# INLINABLE find #-}
589 -- | Index into a text stream
591 :: (Monad m, Integral a)
592 => a-> Producer Text m () -> m (Maybe Char)
593 index n p = head (p >-> drop n)
594 {-# INLINABLE index #-}
597 -- | Store a tally of how many segments match the given 'Text'
598 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
599 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
600 {-# INLINABLE count #-}
602 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
603 -- returning a Pipe of ByteStrings that begins at the point of failure.
605 decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
606 decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
607 go !carry dec0 p = do
609 case x of Left r -> if B.null carry
610 then return (return r) -- all bytestrinput was consumed
611 else return (do yield carry -- a potentially valid fragment remains
614 Right (chunk, p') -> case dec0 chunk of
615 PE.Some text carry2 dec -> do yield text
617 PE.Other text bs -> do yield text
618 return (do yield bs -- an invalid blob remains
620 {-# INLINABLE decodeUtf8 #-}
623 -- | Splits a 'Producer' after the given number of characters
625 :: (Monad m, Integral n)
628 -> Producer' Text m (Producer Text m r)
635 Left r -> return (return r)
636 Right (txt, p') -> do
637 let len = fromIntegral (T.length txt)
643 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
645 return (yield suffix >> p')
646 {-# INLINABLE splitAt #-}
648 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
650 :: (Monad m, Integral n)
651 => n -> Producer Text m r -> FreeT (Producer Text m) m r
652 chunksOf n p0 = PP.FreeT (go p0)
658 Right (txt, p') -> PP.Free $ do
659 p'' <- splitAt n (yield txt >> p')
660 return $ PP.FreeT (go p'')
661 {-# INLINABLE chunksOf #-}
663 {-| Split a text stream in two, where the first text stream is the longest
664 consecutive group of text that satisfy the predicate
670 -> Producer' Text m (Producer Text m r)
676 Left r -> return (return r)
677 Right (txt, p') -> do
678 let (prefix, suffix) = T.span predicate txt
685 return (yield suffix >> p')
686 {-# INLINABLE span #-}
688 {-| Split a text stream in two, where the first text stream is the longest
689 consecutive group of characters that don't satisfy the predicate
695 -> Producer Text m (Producer Text m r)
696 break predicate = span (not . predicate)
697 {-# INLINABLE break #-}
699 {-| Split a text stream into sub-streams delimited by characters that satisfy the
706 -> PP.FreeT (Producer Text m) m r
707 splitWith predicate p0 = PP.FreeT (go0 p0)
712 Left r -> return (PP.Pure r)
716 else return $ PP.Free $ do
717 p'' <- span (not . predicate) (yield txt >> p')
718 return $ PP.FreeT (go1 p'')
723 Right (_, p') -> PP.Free $ do
724 p'' <- span (not . predicate) p'
725 return $ PP.FreeT (go1 p'')
726 {-# INLINABLE splitWith #-}
728 -- | Split a text stream using the given 'Char' as the delimiter
732 -> FreeT (Producer Text m) m r
733 split c = splitWith (c ==)
734 {-# INLINABLE split #-}
736 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
741 => (Char -> Char -> Bool)
743 -> FreeT (Producer Text m) m r
744 groupBy equal p0 = PP.FreeT (go p0)
749 Left r -> return (PP.Pure r)
750 Right (txt, p') -> case (T.uncons txt) of
753 return $ PP.Free $ do
754 p'' <- span (equal c) (yield txt >> p')
755 return $ PP.FreeT (go p'')
756 {-# INLINABLE groupBy #-}
758 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
760 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
762 {-# INLINABLE group #-}
764 {-| Split a text stream into 'FreeT'-delimited lines
767 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
768 lines p0 = PP.FreeT (go0 p0)
773 Left r -> return (PP.Pure r)
777 else return $ PP.Free $ go1 (yield txt >> p')
779 p' <- break ('\n' ==) p
780 return $ PP.FreeT $ do
783 Left r -> return $ PP.Pure r
784 Right (_, p'') -> go0 p''
785 {-# INLINABLE lines #-}
789 -- | Split a text stream into 'FreeT'-delimited words
791 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
795 x <- next (p >-> dropWhile isSpace)
798 Right (bs, p') -> PP.Free $ do
799 p'' <- break isSpace (yield bs >> p')
801 {-# INLINABLE words #-}
804 -- | Intersperse a 'Char' in between the characters of the text stream
806 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
813 Right (txt, p') -> do
814 yield (T.intersperse c txt)
820 Right (txt, p') -> do
821 yield (T.singleton c)
822 yield (T.intersperse c txt)
824 {-# INLINABLE intersperse #-}
826 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
827 interspersing a text stream in between them
831 => Producer Text m ()
832 -> FreeT (Producer Text m) m r
837 x <- lift (PP.runFreeT f)
839 PP.Pure r -> return r
844 x <- lift (PP.runFreeT f)
846 PP.Pure r -> return r
851 {-# INLINABLE intercalate #-}
853 {-| Join 'FreeT'-delimited lines into a text stream
856 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
860 x <- lift (PP.runFreeT f)
862 PP.Pure r -> return r
865 yield $ T.singleton '\n'
867 {-# INLINABLE unlines #-}
869 {-| Join 'FreeT'-delimited words into a text stream
872 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
873 unwords = intercalate (yield $ T.pack " ")
874 {-# INLINABLE unwords #-}
877 The following parsing utilities are single-character analogs of the ones found
882 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
884 @Data.Text@ re-exports the 'Text' type.
886 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
891 decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
892 -- decode codec = go B.empty where
894 -- do x <- lift (next p0)
895 -- case x of Right (chunk, p) ->
896 -- do let (text, stuff) = codecDecode codec (B.append extra chunk)
898 -- case stuff of Right extra' -> go extra' p
899 -- Left (exc,bs) -> do yield text
900 -- return (do yield bs
902 -- Left r -> return (do yield extra
905 decode d p0 = case d of
906 PE.Other txt bad -> do yield txt
909 PE.Some txt extra dec -> do yield txt
911 case x of Left r -> return (do yield extra
913 Right (chunk,p1) -> decode (dec chunk) p1
915 -- go !carry dec0 p = do
916 -- x <- lift (next p)
917 -- case x of Left r -> if B.null carry
918 -- then return (return r) -- all bytestrinput was consumed
919 -- else return (do yield carry -- a potentially valid fragment remains
922 -- Right (chunk, p') -> case dec0 chunk of
923 -- PE.Some text carry2 dec -> do yield text
925 -- PE.Other text bs -> do yield text
926 -- return (do yield bs -- an invalid blob remains
928 -- {-# INLINABLE decodeUtf8 #-}