1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
2 #if __GLASGOW_HASKELL__ >= 702
3 {-# LANGUAGE Trustworthy #-}
5 {-| This module provides @pipes@ utilities for \"text streams\", which are
6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
12 example, the following program copies a document from one file to another:
15 > import qualified Data.Text.Pipes as Text
19 > withFile "inFile.txt" ReadMode $ \hIn ->
20 > withFile "outFile.txt" WriteMode $ \hOut ->
21 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
23 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
26 > import qualified Data.Text.Pipes as Text
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' pipes, as with the following \"echo\" program:
34 > main = runEffect $ Text.stdin >-> Text.stdout
36 You can also translate pure lazy 'TL.Text's to and from pipes:
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
108 -- * Primitive Character Parsers
136 -- * Other Decoding/Encoding Functions
167 , module Data.ByteString
169 , module Data.Profunctor
175 import Control.Exception (throwIO, try)
176 import Control.Applicative ((<*))
177 import Control.Monad (liftM, unless, join)
178 import Control.Monad.Trans.State.Strict (StateT(..), modify)
179 import Data.Monoid ((<>))
180 import qualified Data.Text as T
181 import qualified Data.Text.IO as T
182 import qualified Data.Text.Encoding as TE
183 import qualified Data.Text.Encoding.Error as TE
184 import Data.Text (Text)
185 import qualified Data.Text.Lazy as TL
186 import qualified Data.Text.Lazy.IO as TL
187 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
188 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
189 import Data.ByteString (ByteString)
190 import qualified Data.ByteString as B
191 import qualified Data.ByteString.Char8 as B8
192 import Data.Char (ord, isSpace)
193 import Data.Functor.Constant (Constant(Constant, getConstant))
194 import Data.Functor.Identity (Identity)
195 import Data.Profunctor (Profunctor)
196 import qualified Data.Profunctor
197 import qualified Data.List as List
198 import Foreign.C.Error (Errno(Errno), ePIPE)
199 import qualified GHC.IO.Exception as G
201 import qualified Pipes.ByteString as PB
202 import qualified Pipes.Text.Internal as PI
203 import Pipes.Text.Internal
204 import Pipes.Core (respond, Server')
205 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
206 import qualified Pipes.Group as PG
207 import qualified Pipes.Parse as PP
208 import Pipes.Parse (Parser)
209 import qualified Pipes.Safe.Prelude as Safe
210 import qualified Pipes.Safe as Safe
211 import Pipes.Safe (MonadSafe(..), Base(..))
212 import qualified Pipes.Prelude as P
213 import qualified System.IO as IO
214 import Data.Char (isSpace)
215 import Data.Word (Word8)
217 import Prelude hiding (
246 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
247 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
248 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
249 {-# INLINE fromLazy #-}
251 -- | Stream text from 'stdin'
252 stdin :: MonadIO m => Producer Text m ()
253 stdin = fromHandle IO.stdin
256 {-| Convert a 'IO.Handle' into a text stream using a text size
257 determined by the good sense of the text library; note that this
258 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
259 but uses the system encoding and has other `Data.Text.IO` features
262 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
263 fromHandle h = go where
264 go = do txt <- liftIO (T.hGetChunk h)
265 unless (T.null txt) ( do yield txt
267 {-# INLINABLE fromHandle#-}
270 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
272 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
273 MAIN = PUTSTRLN "HELLO WORLD"
276 readFile :: MonadSafe m => FilePath -> Producer Text m ()
277 readFile file = Safe.withFile file IO.ReadMode fromHandle
278 {-# INLINE readFile #-}
281 {-| Stream text to 'stdout'
283 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
285 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
286 instead of @(source >-> stdout)@ .
288 stdout :: MonadIO m => Consumer' Text m ()
293 x <- liftIO $ try (T.putStr txt)
295 Left (G.IOError { G.ioe_type = G.ResourceVanished
296 , G.ioe_errno = Just ioe })
299 Left e -> liftIO (throwIO e)
301 {-# INLINABLE stdout #-}
304 {-| Convert a text stream into a 'Handle'
306 Note: again, for best performance, where possible use
307 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
309 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
310 toHandle h = for cat (liftIO . T.hPutStr h)
311 {-# INLINABLE toHandle #-}
313 {-# RULES "p >-> toHandle h" forall p h .
314 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
318 -- | Stream text into a file. Uses @pipes-safe@.
319 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
320 writeFile file = Safe.withFile file IO.WriteMode toHandle
321 {-# INLINE writeFile #-}
324 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
326 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
328 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
329 a ^. lens = getConstant (lens Constant a)
332 -- | Apply a transformation to each 'Char' in the stream
333 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
334 map f = P.map (T.map f)
335 {-# INLINABLE map #-}
337 {-# RULES "p >-> map f" forall p f .
338 p >-> map f = for p (\txt -> yield (T.map f txt))
341 -- | Map a function over the characters of a text stream and concatenate the results
343 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
344 concatMap f = P.map (T.concatMap f)
345 {-# INLINABLE concatMap #-}
347 {-# RULES "p >-> concatMap f" forall p f .
348 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
351 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
352 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
353 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
354 encodeUtf8 :: Monad m => Pipe Text ByteString m r
355 encodeUtf8 = P.map TE.encodeUtf8
356 {-# INLINEABLE encodeUtf8 #-}
358 {-# RULES "p >-> encodeUtf8" forall p .
359 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
362 -- | Transform a Pipe of 'String's into one of 'Text' chunks
363 pack :: Monad m => Pipe String Text m r
365 {-# INLINEABLE pack #-}
367 {-# RULES "p >-> pack" forall p .
368 p >-> pack = for p (\txt -> yield (T.pack txt))
371 -- | Transform a Pipes of 'Text' chunks into one of 'String's
372 unpack :: Monad m => Pipe Text String m r
373 unpack = for cat (\t -> yield (T.unpack t))
374 {-# INLINEABLE unpack #-}
376 {-# RULES "p >-> unpack" forall p .
377 p >-> unpack = for p (\txt -> yield (T.unpack txt))
380 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
381 -- here acting as 'Text' pipes, rather as they would on a lazy text
382 toCaseFold :: Monad m => Pipe Text Text m ()
383 toCaseFold = P.map T.toCaseFold
384 {-# INLINEABLE toCaseFold #-}
386 {-# RULES "p >-> toCaseFold" forall p .
387 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
391 -- | lowercase incoming 'Text'
392 toLower :: Monad m => Pipe Text Text m ()
393 toLower = P.map T.toLower
394 {-# INLINEABLE toLower #-}
396 {-# RULES "p >-> toLower" forall p .
397 p >-> toLower = for p (\txt -> yield (T.toLower txt))
400 -- | uppercase incoming 'Text'
401 toUpper :: Monad m => Pipe Text Text m ()
402 toUpper = P.map T.toUpper
403 {-# INLINEABLE toUpper #-}
405 {-# RULES "p >-> toUpper" forall p .
406 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
409 -- | Remove leading white space from an incoming succession of 'Text's
410 stripStart :: Monad m => Pipe Text Text m r
413 let text = T.stripStart chunk
418 {-# INLINEABLE stripStart #-}
420 -- | @(take n)@ only allows @n@ individual characters to pass;
421 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
422 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
423 take n0 = go n0 where
428 let len = fromIntegral (T.length txt)
430 then yield (T.take (fromIntegral n) txt)
434 {-# INLINABLE take #-}
436 -- | @(drop n)@ drops the first @n@ characters
437 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
438 drop n0 = go n0 where
443 let len = fromIntegral (T.length txt)
446 yield (T.drop (fromIntegral n) txt)
449 {-# INLINABLE drop #-}
451 -- | Take characters until they fail the predicate
452 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
453 takeWhile predicate = go
457 let (prefix, suffix) = T.span predicate txt
463 {-# INLINABLE takeWhile #-}
465 -- | Drop characters until they fail the predicate
466 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
467 dropWhile predicate = go where
470 case T.findIndex (not . predicate) txt of
475 {-# INLINABLE dropWhile #-}
477 -- | Only allows 'Char's to pass if they satisfy the predicate
478 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
479 filter predicate = P.map (T.filter predicate)
480 {-# INLINABLE filter #-}
482 {-# RULES "p >-> filter q" forall p q .
483 p >-> filter q = for p (\txt -> yield (T.filter q txt))
486 -- | Strict left scan over the characters
489 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
490 scan step begin = go begin
494 let txt' = T.scanl step c txt
498 {-# INLINABLE scan #-}
500 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
503 toLazy :: Producer Text Identity () -> TL.Text
504 toLazy = TL.fromChunks . P.toList
505 {-# INLINABLE toLazy #-}
507 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
510 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
511 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
512 immediately as they are generated instead of loading them all into memory.
514 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
515 toLazyM = liftM TL.fromChunks . P.toListM
516 {-# INLINABLE toLazyM #-}
518 -- | Reduce the text stream using a strict left fold over characters
521 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
522 foldChars step begin done = P.fold (T.foldl' step) begin done
523 {-# INLINABLE foldChars #-}
525 -- | Retrieve the first 'Char'
526 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
532 Left _ -> return Nothing
533 Right (c, _) -> return (Just c)
534 {-# INLINABLE head #-}
536 -- | Retrieve the last 'Char'
537 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
547 else go (Just $ T.last txt) p'
548 {-# INLINABLE last #-}
550 -- | Determine if the stream is empty
551 null :: (Monad m) => Producer Text m () -> m Bool
553 {-# INLINABLE null #-}
555 -- | Count the number of characters in the stream
556 length :: (Monad m, Num n) => Producer Text m () -> m n
557 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
558 {-# INLINABLE length #-}
560 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
561 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
562 any predicate = P.any (T.any predicate)
563 {-# INLINABLE any #-}
565 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
566 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
567 all predicate = P.all (T.all predicate)
568 {-# INLINABLE all #-}
570 -- | Return the maximum 'Char' within a text stream
571 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
572 maximum = P.fold step Nothing id
577 else Just $ case mc of
578 Nothing -> T.maximum txt
579 Just c -> max c (T.maximum txt)
580 {-# INLINABLE maximum #-}
582 -- | Return the minimum 'Char' within a text stream (surely very useful!)
583 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
584 minimum = P.fold step Nothing id
590 Nothing -> Just (T.minimum txt)
591 Just c -> Just (min c (T.minimum txt))
592 {-# INLINABLE minimum #-}
595 -- | Find the first element in the stream that matches the predicate
598 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
599 find predicate p = head (p >-> filter predicate)
600 {-# INLINABLE find #-}
602 -- | Index into a text stream
604 :: (Monad m, Integral a)
605 => a-> Producer Text m () -> m (Maybe Char)
606 index n p = head (p >-> drop n)
607 {-# INLINABLE index #-}
610 -- | Store a tally of how many segments match the given 'Text'
611 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
612 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
613 {-# INLINABLE count #-}
616 {-| Consume the first character from a stream of 'Text'
618 'next' either fails with a 'Left' if the 'Producer' has no more characters or
619 succeeds with a 'Right' providing the next character and the remainder of the
625 -> m (Either r (Char, Producer Text m r))
631 Left r -> return (Left r)
632 Right (txt, p') -> case (T.uncons txt) of
634 Just (c, txt') -> return (Right (c, yield txt' >> p'))
635 {-# INLINABLE nextChar #-}
637 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
640 drawChar :: (Monad m) => Parser Text m (Maybe Char)
644 Nothing -> return Nothing
645 Just txt -> case (T.uncons txt) of
650 {-# INLINABLE drawChar #-}
652 -- | Push back a 'Char' onto the underlying 'Producer'
653 unDrawChar :: (Monad m) => Char -> Parser Text m ()
654 unDrawChar c = modify (yield (T.singleton c) >>)
655 {-# INLINABLE unDrawChar #-}
657 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
663 > Left _ -> return ()
664 > Right c -> unDrawChar c
667 peekChar :: (Monad m) => Parser Text m (Maybe Char)
672 Just c -> unDrawChar c
674 {-# INLINABLE peekChar #-}
676 {-| Check if the underlying 'Producer' has no more characters
678 Note that this will skip over empty 'Text' chunks, unlike
679 'PP.isEndOfInput' from @pipes-parse@, which would consider
680 an empty 'Text' a valid bit of input.
682 > isEndOfChars = liftM isLeft peekChar
684 isEndOfChars :: (Monad m) => Parser Text m Bool
690 {-# INLINABLE isEndOfChars #-}
693 {- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
694 stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
697 decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
698 (Producer Text m (Producer ByteString m r))
699 decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
700 (k (go B.empty PI.streamDecodeUtf8 p0)) where
701 go !carry dec0 p = do
703 case x of Left r -> return (if B.null carry
704 then return r -- all bytestring input was consumed
705 else (do yield carry -- a potentially valid fragment remains
708 Right (chunk, p') -> case dec0 chunk of
709 PI.Some text carry2 dec -> do yield text
711 PI.Other text bs -> do yield text
712 return (do yield bs -- an invalid blob remains
714 {-# INLINABLE decodeUtf8 #-}
717 -- | Splits a 'Producer' after the given number of characters
719 :: (Monad m, Integral n)
721 -> Lens' (Producer Text m r)
722 (Producer Text m (Producer Text m r))
723 splitAt n0 k p0 = fmap join (k (go n0 p0))
729 Left r -> return (return r)
730 Right (txt, p') -> do
731 let len = fromIntegral (T.length txt)
737 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
739 return (yield suffix >> p')
740 {-# INLINABLE splitAt #-}
743 {-| Split a text stream in two, where the first text stream is the longest
744 consecutive group of text that satisfy the predicate
749 -> Lens' (Producer Text m r)
750 (Producer Text m (Producer Text m r))
751 span predicate k p0 = fmap join (k (go p0))
756 Left r -> return (return r)
757 Right (txt, p') -> do
758 let (prefix, suffix) = T.span predicate txt
765 return (yield suffix >> p')
766 {-# INLINABLE span #-}
768 {-| Split a text stream in two, where the first text stream is the longest
769 consecutive group of characters that don't satisfy the predicate
774 -> Lens' (Producer Text m r)
775 (Producer Text m (Producer Text m r))
776 break predicate = span (not . predicate)
777 {-# INLINABLE break #-}
779 {-| Improper lens that splits after the first group of equivalent Chars, as
780 defined by the given equivalence relation
784 => (Char -> Char -> Bool)
785 -> Lens' (Producer Text m r)
786 (Producer Text m (Producer Text m r))
787 groupBy equals k p0 = fmap join (k ((go p0))) where
791 Left r -> return (return r)
792 Right (txt, p') -> case T.uncons txt of
794 Just (c, _) -> (yield txt >> p') ^. span (equals c)
795 {-# INLINABLE groupBy #-}
797 -- | Improper lens that splits after the first succession of identical 'Char' s
799 => Lens' (Producer Text m r)
800 (Producer Text m (Producer Text m r))
802 {-# INLINABLE group #-}
804 {-| Improper lens that splits a 'Producer' after the first word
806 Unlike 'words', this does not drop leading whitespace
809 => Lens' (Producer Text m r)
810 (Producer Text m (Producer Text m r))
811 word k p0 = fmap join (k (to p0))
814 p' <- p^.span isSpace
816 {-# INLINABLE word #-}
820 => Lens' (Producer Text m r)
821 (Producer Text m (Producer Text m r))
822 line = break (== '\n')
824 {-# INLINABLE line #-}
827 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
829 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
836 Right (txt, p') -> do
837 yield (T.intersperse c txt)
843 Right (txt, p') -> do
844 yield (T.singleton c)
845 yield (T.intersperse c txt)
847 {-# INLINABLE intersperse #-}
851 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
852 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
853 packChars = Data.Profunctor.dimap to (fmap from)
855 -- to :: Monad m => Producer Char m x -> Producer Text m x
856 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
858 step diffAs c = diffAs . (c:)
860 done diffAs = T.pack (diffAs [])
862 -- from :: Monad m => Producer Text m x -> Producer Char m x
863 from p = for p (each . T.unpack)
864 {-# INLINABLE packChars #-}
867 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
869 :: (Monad m, Integral n)
870 => n -> Lens' (Producer Text m r)
871 (FreeT (Producer Text m) m r)
872 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
878 Right (txt, p') -> Free $ do
879 p'' <- (yield txt >> p') ^. splitAt n
880 return $ FreeT (go p'')
881 {-# INLINABLE chunksOf #-}
884 {-| Split a text stream into sub-streams delimited by characters that satisfy the
891 -> FreeT (Producer Text m) m r
892 splitsWith predicate p0 = FreeT (go0 p0)
897 Left r -> return (Pure r)
901 else return $ Free $ do
902 p'' <- (yield txt >> p') ^. span (not . predicate)
903 return $ FreeT (go1 p'')
908 Right (_, p') -> Free $ do
909 p'' <- p' ^. span (not . predicate)
910 return $ FreeT (go1 p'')
911 {-# INLINABLE splitsWith #-}
913 -- | Split a text stream using the given 'Char' as the delimiter
916 -> Lens' (Producer Text m r)
917 (FreeT (Producer Text m) m r)
919 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
920 {-# INLINABLE splits #-}
922 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
923 given equivalence relation
927 => (Char -> Char -> Bool)
928 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
929 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
930 go p = do x <- next p
931 case x of Left r -> return (Pure r)
932 Right (bs, p') -> case T.uncons bs of
934 Just (c, _) -> do return $ Free $ do
935 p'' <- (yield bs >> p')^.span (equals c)
936 return $ FreeT (go p'')
937 {-# INLINABLE groupsBy #-}
940 -- | Like 'groupsBy', where the equality predicate is ('==')
943 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
944 groups = groupsBy (==)
945 {-# INLINABLE groups #-}
949 {-| Split a text stream into 'FreeT'-delimited lines
952 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
953 lines = Data.Profunctor.dimap _lines (fmap _unlines)
955 _lines p0 = FreeT (go0 p0)
960 Left r -> return (Pure r)
964 else return $ Free $ go1 (yield txt >> p')
966 p' <- p ^. break ('\n' ==)
970 Left r -> return $ Pure r
971 Right (_, p'') -> go0 p''
974 -- => FreeT (Producer Text m) m x -> Producer Text m x
975 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
978 {-# INLINABLE lines #-}
981 -- | Split a text stream into 'FreeT'-delimited words
983 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
984 words = Data.Profunctor.dimap go (fmap _unwords)
987 x <- next (p >-> dropWhile isSpace)
990 Right (bs, p') -> Free $ do
991 p'' <- (yield bs >> p') ^. break isSpace
993 _unwords = PG.intercalates (yield $ T.singleton ' ')
995 {-# INLINABLE words #-}
998 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
999 interspersing a text stream in between them
1003 => Producer Text m ()
1004 -> FreeT (Producer Text m) m r
1005 -> Producer Text m r
1006 intercalate p0 = go0
1009 x <- lift (runFreeT f)
1016 x <- lift (runFreeT f)
1023 {-# INLINABLE intercalate #-}
1025 {-| Join 'FreeT'-delimited lines into a text stream
1028 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1032 x <- lift (runFreeT f)
1037 yield $ T.singleton '\n'
1039 {-# INLINABLE unlines #-}
1041 {-| Join 'FreeT'-delimited words into a text stream
1044 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1045 unwords = intercalate (yield $ T.singleton ' ')
1046 {-# INLINABLE unwords #-}
1049 The following parsing utilities are single-character analogs of the ones found
1055 @Data.Text@ re-exports the 'Text' type.
1057 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1060 {- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
1061 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
1062 individual 'Codec' definitions follow the enumerator and conduit libraries.
1064 Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
1065 to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
1066 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
1067 better implementation.
1070 codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1071 codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1072 (k (decoder (dec B.empty) p0) ) where
1073 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1074 decoder !d p0 = case d of
1075 PI.Other txt bad -> do yield txt
1076 return (do yield bad
1078 PI.Some txt extra dec -> do yield txt
1080 case x of Left r -> return (do yield extra
1082 Right (chunk,p1) -> decoder (dec chunk) p1
1084 {- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
1085 use the pipes 'Lens' style to work with them. Rather we simply define functions
1088 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
1089 returning the rest of the 'Text' at the first non-ascii 'Char'
1091 encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1092 encodeAscii = go where
1093 go p = do echunk <- lift (next p)
1095 Left r -> return (return r)
1096 Right (chunk, p') ->
1099 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1100 in do yield (B8.pack (T.unpack safe))
1103 else return $ do yield unsafe
1105 {- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
1106 returning the rest of the 'Text' upon hitting any non-latin 'Char'
1108 encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1109 encodeIso8859_1 = go where
1110 go p = do etxt <- lift (next p)
1112 Left r -> return (return r)
1116 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1117 in do yield (B8.pack (T.unpack safe))
1120 else return $ do yield unsafe
1123 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1124 unused 'ByteString' upon hitting an un-ascii byte.
1126 decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1127 decodeAscii = go where
1128 go p = do echunk <- lift (next p)
1130 Left r -> return (return r)
1131 Right (chunk, p') ->
1134 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1135 in do yield (T.pack (B8.unpack safe))
1138 else return $ do yield unsafe
1141 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1142 unused 'ByteString' upon hitting the rare un-latinizable byte.
1144 decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1145 decodeIso8859_1 = go where
1146 go p = do echunk <- lift (next p)
1148 Left r -> return (return r)
1149 Right (chunk, p') ->
1152 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1153 in do yield (T.pack (B8.unpack safe))
1156 else return $ do yield unsafe