1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
10 example, the following program copies a document from one file to another:
13 > import qualified Data.Text.Pipes as Text
17 > withFile "inFile.txt" ReadMode $ \hIn ->
18 > withFile "outFile.txt" WriteMode $ \hOut ->
19 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
21 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
24 > import qualified Data.Text.Pipes as Text
27 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
29 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
30 and 'stdout' pipes, as with the following \"echo\" program:
32 > main = runEffect $ Text.stdin >-> Text.stdout
34 You can also translate pure lazy 'TL.Text's to and from pipes:
36 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
38 In addition, this module provides many functions equivalent to lazy
39 'Text' functions so that you can transform or fold text streams. For
40 example, to stream only the first three lines of 'stdin' to 'stdout' you
44 > import qualified Pipes.Text as Text
45 > import qualified Pipes.Parse as Parse
47 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
49 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
51 The above program will never bring more than one chunk of text (~ 32 KB) into
52 memory, no matter how long the lines are.
54 Note that functions in this library are designed to operate on streams that
55 are insensitive to text boundaries. This means that they may freely split
56 text into smaller texts, /discard empty texts/. However, apart from the
57 special case of 'concatMap', they will /never concatenate texts/ in order
58 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
106 -- * Primitive Character Parsers
134 -- * Other Decoding/Encoding Functions
165 , module Data.ByteString
167 , module Data.Profunctor
173 import Control.Exception (throwIO, try)
174 import Control.Applicative ((<*))
175 import Control.Monad (liftM, unless, join)
176 import Control.Monad.Trans.State.Strict (StateT(..), modify)
177 import Data.Monoid ((<>))
178 import qualified Data.Text as T
179 import qualified Data.Text.IO as T
180 import qualified Data.Text.Encoding as TE
181 import qualified Data.Text.Encoding.Error as TE
182 import Data.Text (Text)
183 import qualified Data.Text.Lazy as TL
184 import qualified Data.Text.Lazy.IO as TL
185 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
186 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
187 import Data.ByteString (ByteString)
188 import qualified Data.ByteString as B
189 import qualified Data.ByteString.Char8 as B8
190 import Data.Char (ord, isSpace)
191 import Data.Functor.Constant (Constant(Constant, getConstant))
192 import Data.Functor.Identity (Identity)
193 import Data.Profunctor (Profunctor)
194 import qualified Data.Profunctor
195 import qualified Data.List as List
196 import Foreign.C.Error (Errno(Errno), ePIPE)
197 import qualified GHC.IO.Exception as G
199 import qualified Pipes.ByteString as PB
200 import qualified Pipes.Text.Internal as PI
201 import Pipes.Text.Internal
202 import Pipes.Core (respond, Server')
203 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
204 import qualified Pipes.Group as PG
205 import qualified Pipes.Parse as PP
206 import Pipes.Parse (Parser)
207 import qualified Pipes.Safe.Prelude as Safe
208 import qualified Pipes.Safe as Safe
209 import Pipes.Safe (MonadSafe(..), Base(..))
210 import qualified Pipes.Prelude as P
211 import qualified System.IO as IO
212 import Data.Char (isSpace)
213 import Data.Word (Word8)
215 import Prelude hiding (
244 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
245 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
246 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
247 {-# INLINE fromLazy #-}
249 -- | Stream text from 'stdin'
250 stdin :: MonadIO m => Producer Text m ()
251 stdin = fromHandle IO.stdin
254 {-| Convert a 'IO.Handle' into a text stream using a text size
255 determined by the good sense of the text library; note that this
256 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
257 but uses the system encoding and has other `Data.Text.IO` features
260 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
261 fromHandle h = go where
262 go = do txt <- liftIO (T.hGetChunk h)
263 unless (T.null txt) ( do yield txt
265 {-# INLINABLE fromHandle#-}
268 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
270 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
271 MAIN = PUTSTRLN "HELLO WORLD"
274 readFile :: MonadSafe m => FilePath -> Producer Text m ()
275 readFile file = Safe.withFile file IO.ReadMode fromHandle
276 {-# INLINE readFile #-}
279 {-| Stream text to 'stdout'
281 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
283 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
284 instead of @(source >-> stdout)@ .
286 stdout :: MonadIO m => Consumer' Text m ()
291 x <- liftIO $ try (T.putStr txt)
293 Left (G.IOError { G.ioe_type = G.ResourceVanished
294 , G.ioe_errno = Just ioe })
297 Left e -> liftIO (throwIO e)
299 {-# INLINABLE stdout #-}
302 {-| Convert a text stream into a 'Handle'
304 Note: again, for best performance, where possible use
305 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
307 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
308 toHandle h = for cat (liftIO . T.hPutStr h)
309 {-# INLINABLE toHandle #-}
311 {-# RULES "p >-> toHandle h" forall p h .
312 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
316 -- | Stream text into a file. Uses @pipes-safe@.
317 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
318 writeFile file = Safe.withFile file IO.WriteMode toHandle
319 {-# INLINE writeFile #-}
322 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
324 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
326 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
327 a ^. lens = getConstant (lens Constant a)
330 -- | Apply a transformation to each 'Char' in the stream
331 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
332 map f = P.map (T.map f)
333 {-# INLINABLE map #-}
335 {-# RULES "p >-> map f" forall p f .
336 p >-> map f = for p (\txt -> yield (T.map f txt))
339 -- | Map a function over the characters of a text stream and concatenate the results
341 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
342 concatMap f = P.map (T.concatMap f)
343 {-# INLINABLE concatMap #-}
345 {-# RULES "p >-> concatMap f" forall p f .
346 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
349 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
350 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
351 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
352 encodeUtf8 :: Monad m => Pipe Text ByteString m r
353 encodeUtf8 = P.map TE.encodeUtf8
354 {-# INLINEABLE encodeUtf8 #-}
356 {-# RULES "p >-> encodeUtf8" forall p .
357 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
360 -- | Transform a Pipe of 'String's into one of 'Text' chunks
361 pack :: Monad m => Pipe String Text m r
363 {-# INLINEABLE pack #-}
365 {-# RULES "p >-> pack" forall p .
366 p >-> pack = for p (\txt -> yield (T.pack txt))
369 -- | Transform a Pipes of 'Text' chunks into one of 'String's
370 unpack :: Monad m => Pipe Text String m r
371 unpack = for cat (\t -> yield (T.unpack t))
372 {-# INLINEABLE unpack #-}
374 {-# RULES "p >-> unpack" forall p .
375 p >-> unpack = for p (\txt -> yield (T.unpack txt))
378 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
379 -- here acting as 'Text' pipes, rather as they would on a lazy text
380 toCaseFold :: Monad m => Pipe Text Text m ()
381 toCaseFold = P.map T.toCaseFold
382 {-# INLINEABLE toCaseFold #-}
384 {-# RULES "p >-> toCaseFold" forall p .
385 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
389 -- | lowercase incoming 'Text'
390 toLower :: Monad m => Pipe Text Text m ()
391 toLower = P.map T.toLower
392 {-# INLINEABLE toLower #-}
394 {-# RULES "p >-> toLower" forall p .
395 p >-> toLower = for p (\txt -> yield (T.toLower txt))
398 -- | uppercase incoming 'Text'
399 toUpper :: Monad m => Pipe Text Text m ()
400 toUpper = P.map T.toUpper
401 {-# INLINEABLE toUpper #-}
403 {-# RULES "p >-> toUpper" forall p .
404 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
407 -- | Remove leading white space from an incoming succession of 'Text's
408 stripStart :: Monad m => Pipe Text Text m r
411 let text = T.stripStart chunk
416 {-# INLINEABLE stripStart #-}
418 -- | @(take n)@ only allows @n@ individual characters to pass;
419 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
420 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
421 take n0 = go n0 where
426 let len = fromIntegral (T.length txt)
428 then yield (T.take (fromIntegral n) txt)
432 {-# INLINABLE take #-}
434 -- | @(drop n)@ drops the first @n@ characters
435 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
436 drop n0 = go n0 where
441 let len = fromIntegral (T.length txt)
444 yield (T.drop (fromIntegral n) txt)
447 {-# INLINABLE drop #-}
449 -- | Take characters until they fail the predicate
450 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
451 takeWhile predicate = go
455 let (prefix, suffix) = T.span predicate txt
461 {-# INLINABLE takeWhile #-}
463 -- | Drop characters until they fail the predicate
464 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
465 dropWhile predicate = go where
468 case T.findIndex (not . predicate) txt of
473 {-# INLINABLE dropWhile #-}
475 -- | Only allows 'Char's to pass if they satisfy the predicate
476 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
477 filter predicate = P.map (T.filter predicate)
478 {-# INLINABLE filter #-}
480 {-# RULES "p >-> filter q" forall p q .
481 p >-> filter q = for p (\txt -> yield (T.filter q txt))
484 -- | Strict left scan over the characters
487 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
488 scan step begin = go begin
492 let txt' = T.scanl step c txt
496 {-# INLINABLE scan #-}
498 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
501 toLazy :: Producer Text Identity () -> TL.Text
502 toLazy = TL.fromChunks . P.toList
503 {-# INLINABLE toLazy #-}
505 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
508 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
509 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
510 immediately as they are generated instead of loading them all into memory.
512 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
513 toLazyM = liftM TL.fromChunks . P.toListM
514 {-# INLINABLE toLazyM #-}
516 -- | Reduce the text stream using a strict left fold over characters
519 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
520 foldChars step begin done = P.fold (T.foldl' step) begin done
521 {-# INLINABLE foldChars #-}
523 -- | Retrieve the first 'Char'
524 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
530 Left _ -> return Nothing
531 Right (c, _) -> return (Just c)
532 {-# INLINABLE head #-}
534 -- | Retrieve the last 'Char'
535 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
545 else go (Just $ T.last txt) p'
546 {-# INLINABLE last #-}
548 -- | Determine if the stream is empty
549 null :: (Monad m) => Producer Text m () -> m Bool
551 {-# INLINABLE null #-}
553 -- | Count the number of characters in the stream
554 length :: (Monad m, Num n) => Producer Text m () -> m n
555 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
556 {-# INLINABLE length #-}
558 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
559 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
560 any predicate = P.any (T.any predicate)
561 {-# INLINABLE any #-}
563 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
564 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
565 all predicate = P.all (T.all predicate)
566 {-# INLINABLE all #-}
568 -- | Return the maximum 'Char' within a text stream
569 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
570 maximum = P.fold step Nothing id
575 else Just $ case mc of
576 Nothing -> T.maximum txt
577 Just c -> max c (T.maximum txt)
578 {-# INLINABLE maximum #-}
580 -- | Return the minimum 'Char' within a text stream (surely very useful!)
581 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
582 minimum = P.fold step Nothing id
588 Nothing -> Just (T.minimum txt)
589 Just c -> Just (min c (T.minimum txt))
590 {-# INLINABLE minimum #-}
593 -- | Find the first element in the stream that matches the predicate
596 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
597 find predicate p = head (p >-> filter predicate)
598 {-# INLINABLE find #-}
600 -- | Index into a text stream
602 :: (Monad m, Integral a)
603 => a-> Producer Text m () -> m (Maybe Char)
604 index n p = head (p >-> drop n)
605 {-# INLINABLE index #-}
608 -- | Store a tally of how many segments match the given 'Text'
609 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
610 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
611 {-# INLINABLE count #-}
614 {-| Consume the first character from a stream of 'Text'
616 'next' either fails with a 'Left' if the 'Producer' has no more characters or
617 succeeds with a 'Right' providing the next character and the remainder of the
623 -> m (Either r (Char, Producer Text m r))
629 Left r -> return (Left r)
630 Right (txt, p') -> case (T.uncons txt) of
632 Just (c, txt') -> return (Right (c, yield txt' >> p'))
633 {-# INLINABLE nextChar #-}
635 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
638 drawChar :: (Monad m) => Parser Text m (Maybe Char)
642 Nothing -> return Nothing
643 Just txt -> case (T.uncons txt) of
648 {-# INLINABLE drawChar #-}
650 -- | Push back a 'Char' onto the underlying 'Producer'
651 unDrawChar :: (Monad m) => Char -> Parser Text m ()
652 unDrawChar c = modify (yield (T.singleton c) >>)
653 {-# INLINABLE unDrawChar #-}
655 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
661 > Left _ -> return ()
662 > Right c -> unDrawChar c
665 peekChar :: (Monad m) => Parser Text m (Maybe Char)
670 Just c -> unDrawChar c
672 {-# INLINABLE peekChar #-}
674 {-| Check if the underlying 'Producer' has no more characters
676 Note that this will skip over empty 'Text' chunks, unlike
677 'PP.isEndOfInput' from @pipes-parse@, which would consider
678 an empty 'Text' a valid bit of input.
680 > isEndOfChars = liftM isLeft peekChar
682 isEndOfChars :: (Monad m) => Parser Text m Bool
688 {-# INLINABLE isEndOfChars #-}
691 {- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
692 stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
695 decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
696 (Producer Text m (Producer ByteString m r))
697 decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
698 (k (go B.empty PI.streamDecodeUtf8 p0)) where
699 go !carry dec0 p = do
701 case x of Left r -> return (if B.null carry
702 then return r -- all bytestring input was consumed
703 else (do yield carry -- a potentially valid fragment remains
706 Right (chunk, p') -> case dec0 chunk of
707 PI.Some text carry2 dec -> do yield text
709 PI.Other text bs -> do yield text
710 return (do yield bs -- an invalid blob remains
712 {-# INLINABLE decodeUtf8 #-}
715 -- | Splits a 'Producer' after the given number of characters
717 :: (Monad m, Integral n)
719 -> Lens' (Producer Text m r)
720 (Producer Text m (Producer Text m r))
721 splitAt n0 k p0 = fmap join (k (go n0 p0))
727 Left r -> return (return r)
728 Right (txt, p') -> do
729 let len = fromIntegral (T.length txt)
735 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
737 return (yield suffix >> p')
738 {-# INLINABLE splitAt #-}
741 {-| Split a text stream in two, where the first text stream is the longest
742 consecutive group of text that satisfy the predicate
747 -> Lens' (Producer Text m r)
748 (Producer Text m (Producer Text m r))
749 span predicate k p0 = fmap join (k (go p0))
754 Left r -> return (return r)
755 Right (txt, p') -> do
756 let (prefix, suffix) = T.span predicate txt
763 return (yield suffix >> p')
764 {-# INLINABLE span #-}
766 {-| Split a text stream in two, where the first text stream is the longest
767 consecutive group of characters that don't satisfy the predicate
772 -> Lens' (Producer Text m r)
773 (Producer Text m (Producer Text m r))
774 break predicate = span (not . predicate)
775 {-# INLINABLE break #-}
777 {-| Improper lens that splits after the first group of equivalent Chars, as
778 defined by the given equivalence relation
782 => (Char -> Char -> Bool)
783 -> Lens' (Producer Text m r)
784 (Producer Text m (Producer Text m r))
785 groupBy equals k p0 = fmap join (k ((go p0))) where
789 Left r -> return (return r)
790 Right (txt, p') -> case T.uncons txt of
792 Just (c, _) -> (yield txt >> p') ^. span (equals c)
793 {-# INLINABLE groupBy #-}
795 -- | Improper lens that splits after the first succession of identical 'Char' s
797 => Lens' (Producer Text m r)
798 (Producer Text m (Producer Text m r))
800 {-# INLINABLE group #-}
802 {-| Improper lens that splits a 'Producer' after the first word
804 Unlike 'words', this does not drop leading whitespace
807 => Lens' (Producer Text m r)
808 (Producer Text m (Producer Text m r))
809 word k p0 = fmap join (k (to p0))
812 p' <- p^.span isSpace
814 {-# INLINABLE word #-}
818 => Lens' (Producer Text m r)
819 (Producer Text m (Producer Text m r))
820 line = break (== '\n')
822 {-# INLINABLE line #-}
825 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
827 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
834 Right (txt, p') -> do
835 yield (T.intersperse c txt)
841 Right (txt, p') -> do
842 yield (T.singleton c)
843 yield (T.intersperse c txt)
845 {-# INLINABLE intersperse #-}
849 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
850 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
851 packChars = Data.Profunctor.dimap to (fmap from)
853 -- to :: Monad m => Producer Char m x -> Producer Text m x
854 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
856 step diffAs c = diffAs . (c:)
858 done diffAs = T.pack (diffAs [])
860 -- from :: Monad m => Producer Text m x -> Producer Char m x
861 from p = for p (each . T.unpack)
862 {-# INLINABLE packChars #-}
865 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
867 :: (Monad m, Integral n)
868 => n -> Lens' (Producer Text m r)
869 (FreeT (Producer Text m) m r)
870 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
876 Right (txt, p') -> Free $ do
877 p'' <- (yield txt >> p') ^. splitAt n
878 return $ FreeT (go p'')
879 {-# INLINABLE chunksOf #-}
882 {-| Split a text stream into sub-streams delimited by characters that satisfy the
889 -> FreeT (Producer Text m) m r
890 splitsWith predicate p0 = FreeT (go0 p0)
895 Left r -> return (Pure r)
899 else return $ Free $ do
900 p'' <- (yield txt >> p') ^. span (not . predicate)
901 return $ FreeT (go1 p'')
906 Right (_, p') -> Free $ do
907 p'' <- p' ^. span (not . predicate)
908 return $ FreeT (go1 p'')
909 {-# INLINABLE splitsWith #-}
911 -- | Split a text stream using the given 'Char' as the delimiter
914 -> Lens' (Producer Text m r)
915 (FreeT (Producer Text m) m r)
917 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
918 {-# INLINABLE splits #-}
920 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
921 given equivalence relation
925 => (Char -> Char -> Bool)
926 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
927 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
928 go p = do x <- next p
929 case x of Left r -> return (Pure r)
930 Right (bs, p') -> case T.uncons bs of
932 Just (c, _) -> do return $ Free $ do
933 p'' <- (yield bs >> p')^.span (equals c)
934 return $ FreeT (go p'')
935 {-# INLINABLE groupsBy #-}
938 -- | Like 'groupsBy', where the equality predicate is ('==')
941 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
942 groups = groupsBy (==)
943 {-# INLINABLE groups #-}
947 {-| Split a text stream into 'FreeT'-delimited lines
950 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
951 lines = Data.Profunctor.dimap _lines (fmap _unlines)
953 _lines p0 = FreeT (go0 p0)
958 Left r -> return (Pure r)
962 else return $ Free $ go1 (yield txt >> p')
964 p' <- p ^. break ('\n' ==)
968 Left r -> return $ Pure r
969 Right (_, p'') -> go0 p''
972 -- => FreeT (Producer Text m) m x -> Producer Text m x
973 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
976 {-# INLINABLE lines #-}
979 -- | Split a text stream into 'FreeT'-delimited words
981 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
982 words = Data.Profunctor.dimap go (fmap _unwords)
985 x <- next (p >-> dropWhile isSpace)
988 Right (bs, p') -> Free $ do
989 p'' <- (yield bs >> p') ^. break isSpace
991 _unwords = PG.intercalates (yield $ T.singleton ' ')
993 {-# INLINABLE words #-}
996 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
997 interspersing a text stream in between them
1001 => Producer Text m ()
1002 -> FreeT (Producer Text m) m r
1003 -> Producer Text m r
1004 intercalate p0 = go0
1007 x <- lift (runFreeT f)
1014 x <- lift (runFreeT f)
1021 {-# INLINABLE intercalate #-}
1023 {-| Join 'FreeT'-delimited lines into a text stream
1026 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1030 x <- lift (runFreeT f)
1035 yield $ T.singleton '\n'
1037 {-# INLINABLE unlines #-}
1039 {-| Join 'FreeT'-delimited words into a text stream
1042 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1043 unwords = intercalate (yield $ T.singleton ' ')
1044 {-# INLINABLE unwords #-}
1047 The following parsing utilities are single-character analogs of the ones found
1053 @Data.Text@ re-exports the 'Text' type.
1055 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1058 {- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
1059 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
1060 individual 'Codec' definitions follow the enumerator and conduit libraries.
1062 Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
1063 to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
1064 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
1065 better implementation.
1068 codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1069 codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1070 (k (decoder (dec B.empty) p0) ) where
1071 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1072 decoder !d p0 = case d of
1073 PI.Other txt bad -> do yield txt
1074 return (do yield bad
1076 PI.Some txt extra dec -> do yield txt
1078 case x of Left r -> return (do yield extra
1080 Right (chunk,p1) -> decoder (dec chunk) p1
1082 {- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
1083 use the pipes 'Lens' style to work with them. Rather we simply define functions
1086 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
1087 returning the rest of the 'Text' at the first non-ascii 'Char'
1089 encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1090 encodeAscii = go where
1091 go p = do echunk <- lift (next p)
1093 Left r -> return (return r)
1094 Right (chunk, p') ->
1097 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1098 in do yield (B8.pack (T.unpack safe))
1101 else return $ do yield unsafe
1103 {- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
1104 returning the rest of the 'Text' upon hitting any non-latin 'Char'
1106 encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1107 encodeIso8859_1 = go where
1108 go p = do etxt <- lift (next p)
1110 Left r -> return (return r)
1114 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1115 in do yield (B8.pack (T.unpack safe))
1118 else return $ do yield unsafe
1121 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1122 unused 'ByteString' upon hitting an un-ascii byte.
1124 decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1125 decodeAscii = go where
1126 go p = do echunk <- lift (next p)
1128 Left r -> return (return r)
1129 Right (chunk, p') ->
1132 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1133 in do yield (T.pack (B8.unpack safe))
1136 else return $ do yield unsafe
1139 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1140 unused 'ByteString' upon hitting the rare un-latinizable byte.
1142 decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1143 decodeIso8859_1 = go where
1144 go p = do echunk <- lift (next p)
1146 Left r -> return (return r)
1147 Right (chunk, p') ->
1150 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1151 in do yield (T.pack (B8.unpack safe))
1154 else return $ do yield unsafe