1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
3 {-| The module @Pipes.Text@ closely follows @Pipes.ByteString@ from
4 the @pipes-bytestring@ package. A draft tutorial can be found in
39 -- * Primitive Character Parsers
55 -- * Transforming Text and Character Streams
62 -- * FreeT Transformations
76 , module Data.ByteString
82 import Control.Applicative ((<*))
83 import Control.Monad (liftM, join)
84 import Control.Monad.Trans.State.Strict (StateT(..), modify)
85 import qualified Data.Text as T
86 import Data.Text (Text)
87 import qualified Data.Text.Lazy as TL
88 import Data.ByteString (ByteString)
89 import Data.Functor.Constant (Constant(Constant, getConstant))
90 import Data.Functor.Identity (Identity)
93 import Pipes.Group (folds, maps, concats, intercalates, FreeT(..), FreeF(..))
94 import qualified Pipes.Group as PG
95 import qualified Pipes.Parse as PP
96 import Pipes.Parse (Parser)
97 import qualified Pipes.Prelude as P
98 import Data.Char (isSpace)
99 import Data.Word (Word8)
100 import Foreign.Storable (sizeOf)
101 import Data.Bits (shiftL)
102 import Prelude hiding (
132 -- >>> :set -XOverloadedStrings
133 -- >>> import Data.Text (Text)
134 -- >>> import qualified Data.Text as T
135 -- >>> import qualified Data.Text.Lazy.IO as TL
136 -- >>> import Data.Char
138 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's. Producers in
139 -- IO can be found in 'Pipes.Text.IO' or in pipes-bytestring, employed with the
140 -- decoding lenses in 'Pipes.Text.Encoding'
141 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
142 fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
143 {-# INLINE fromLazy #-}
145 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
146 a ^. lens = getConstant (lens Constant a)
148 -- | Apply a transformation to each 'Char' in the stream
150 -- >>> let margaret = ["Margaret, are you grieving\nOver Golde","ngrove unleaving?":: Text]
151 -- >>> TL.putStrLn . toLazy $ each margaret >-> map Data.Char.toUpper
152 -- MARGARET, ARE YOU GRIEVING
153 -- OVER GOLDENGROVE UNLEAVING?
154 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
155 map f = P.map (T.map f)
156 {-# INLINABLE map #-}
158 -- | Map a function over the characters of a text stream and concatenate the results
161 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
162 concatMap f = P.map (T.concatMap f)
163 {-# INLINABLE concatMap #-}
165 -- | @(take n)@ only allows @n@ individual characters to pass;
166 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
167 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
168 take n0 = go n0 where
173 let len = fromIntegral (T.length txt)
175 then yield (T.take (fromIntegral n) txt)
179 {-# INLINABLE take #-}
181 -- | Take characters until they fail the predicate
182 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
183 takeWhile predicate = go
187 let (prefix, suffix) = T.span predicate txt
193 {-# INLINABLE takeWhile #-}
195 -- | Only allows 'Char's to pass if they satisfy the predicate
196 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
197 filter predicate = P.map (T.filter predicate)
198 {-# INLINABLE filter #-}
200 -- | Strict left scan over the characters
201 -- >>> let margaret = ["Margaret, are you grieving\nOver Golde","ngrove unleaving?":: Text]
202 -- >>> let title_caser a x = case a of ' ' -> Data.Char.toUpper x; _ -> x
203 -- >>> toLazy $ each margaret >-> scan title_caser ' '
204 -- " Margaret, Are You Grieving\nOver Goldengrove Unleaving?"
208 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
210 yield (T.singleton begin)
215 let txt' = T.scanl step c txt
219 {-# INLINABLE scan #-}
221 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
222 -- here acting as 'Text' pipes, rather as they would on a lazy text
223 toCaseFold :: Monad m => Pipe Text Text m r
224 toCaseFold = P.map T.toCaseFold
225 {-# INLINEABLE toCaseFold #-}
227 -- | lowercase incoming 'Text'
228 toLower :: Monad m => Pipe Text Text m r
229 toLower = P.map T.toLower
230 {-# INLINEABLE toLower #-}
232 -- | uppercase incoming 'Text'
233 toUpper :: Monad m => Pipe Text Text m r
234 toUpper = P.map T.toUpper
235 {-# INLINEABLE toUpper #-}
237 -- | Remove leading white space from an incoming succession of 'Text's
238 stripStart :: Monad m => Pipe Text Text m r
241 let text = T.stripStart chunk
246 {-# INLINEABLE stripStart #-}
248 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
251 toLazy :: Producer Text Identity () -> TL.Text
252 toLazy = TL.fromChunks . P.toList
253 {-# INLINABLE toLazy #-}
255 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
258 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
259 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
260 immediately as they are generated instead of loading them all into memory.
262 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
263 toLazyM = liftM TL.fromChunks . P.toListM
264 {-# INLINABLE toLazyM #-}
266 -- | Reduce the text stream using a strict left fold over characters
269 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
270 foldChars step begin done = P.fold (T.foldl' step) begin done
271 {-# INLINABLE foldChars #-}
274 -- | Retrieve the first 'Char'
275 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
281 Left _ -> return Nothing
282 Right (c, _) -> return (Just c)
283 {-# INLINABLE head #-}
285 -- | Retrieve the last 'Char'
286 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
296 else go (Just $ T.last txt) p'
297 {-# INLINABLE last #-}
299 -- | Determine if the stream is empty
300 null :: (Monad m) => Producer Text m () -> m Bool
302 {-# INLINABLE null #-}
304 -- | Count the number of characters in the stream
305 length :: (Monad m, Num n) => Producer Text m () -> m n
306 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
307 {-# INLINABLE length #-}
309 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
310 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
311 any predicate = P.any (T.any predicate)
312 {-# INLINABLE any #-}
314 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
315 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
316 all predicate = P.all (T.all predicate)
317 {-# INLINABLE all #-}
319 -- | Return the maximum 'Char' within a text stream
320 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
321 maximum = P.fold step Nothing id
326 else Just $ case mc of
327 Nothing -> T.maximum txt
328 Just c -> max c (T.maximum txt)
329 {-# INLINABLE maximum #-}
331 -- | Return the minimum 'Char' within a text stream (surely very useful!)
332 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
333 minimum = P.fold step Nothing id
339 Nothing -> Just (T.minimum txt)
340 Just c -> Just (min c (T.minimum txt))
341 {-# INLINABLE minimum #-}
343 -- | Find the first element in the stream that matches the predicate
346 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
347 find predicate p = head (p >-> filter predicate)
348 {-# INLINABLE find #-}
350 -- | Index into a text stream
352 :: (Monad m, Integral a)
353 => a-> Producer Text m () -> m (Maybe Char)
354 index n p = head (drop n p)
355 {-# INLINABLE index #-}
359 -- | Consume the first character from a stream of 'Text'
361 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
362 -- succeeds with a 'Right' providing the next character and the remainder of the
368 -> m (Either r (Char, Producer Text m r))
374 Left r -> return (Left r)
375 Right (txt, p') -> case (T.uncons txt) of
377 Just (c, txt') -> return (Right (c, yield txt' >> p'))
378 {-# INLINABLE nextChar #-}
380 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
382 drawChar :: (Monad m) => Parser Text m (Maybe Char)
386 Nothing -> return Nothing
387 Just txt -> case (T.uncons txt) of
392 {-# INLINABLE drawChar #-}
394 -- | Push back a 'Char' onto the underlying 'Producer'
395 unDrawChar :: (Monad m) => Char -> Parser Text m ()
396 unDrawChar c = modify (yield (T.singleton c) >>)
397 {-# INLINABLE unDrawChar #-}
399 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
405 > Left _ -> return ()
406 > Right c -> unDrawChar c
411 peekChar :: (Monad m) => Parser Text m (Maybe Char)
416 Just c -> unDrawChar c
418 {-# INLINABLE peekChar #-}
420 {-| Check if the underlying 'Producer' has no more characters
422 Note that this will skip over empty 'Text' chunks, unlike
423 'PP.isEndOfInput' from @pipes-parse@, which would consider
424 an empty 'Text' a valid bit of input.
426 > isEndOfChars = liftM isLeft peekChar
428 isEndOfChars :: (Monad m) => Parser Text m Bool
434 {-# INLINABLE isEndOfChars #-}
436 -- | Splits a 'Producer' after the given number of characters
438 :: (Monad m, Integral n)
440 -> Lens' (Producer Text m r)
441 (Producer Text m (Producer Text m r))
442 splitAt n0 k p0 = fmap join (k (go n0 p0))
448 Left r -> return (return r)
449 Right (txt, p') -> do
450 let len = fromIntegral (T.length txt)
456 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
458 return (yield suffix >> p')
459 {-# INLINABLE splitAt #-}
462 -- | Split a text stream in two, producing the longest
463 -- consecutive group of characters that satisfies the predicate
464 -- and returning the rest
469 -> Lens' (Producer Text m r)
470 (Producer Text m (Producer Text m r))
471 span predicate k p0 = fmap join (k (go p0))
476 Left r -> return (return r)
477 Right (txt, p') -> do
478 let (prefix, suffix) = T.span predicate txt
485 return (yield suffix >> p')
486 {-# INLINABLE span #-}
488 {-| Split a text stream in two, producing the longest
489 consecutive group of characters that don't satisfy the predicate
494 -> Lens' (Producer Text m r)
495 (Producer Text m (Producer Text m r))
496 break predicate = span (not . predicate)
497 {-# INLINABLE break #-}
499 {-| Improper lens that splits after the first group of equivalent Chars, as
500 defined by the given equivalence relation
504 => (Char -> Char -> Bool)
505 -> Lens' (Producer Text m r)
506 (Producer Text m (Producer Text m r))
507 groupBy equals k p0 = fmap join (k ((go p0))) where
511 Left r -> return (return r)
512 Right (txt, p') -> case T.uncons txt of
514 Just (c, _) -> (yield txt >> p') ^. span (equals c)
515 {-# INLINABLE groupBy #-}
517 -- | Improper lens that splits after the first succession of identical 'Char' s
519 => Lens' (Producer Text m r)
520 (Producer Text m (Producer Text m r))
522 {-# INLINABLE group #-}
524 {-| Improper lens that splits a 'Producer' after the first word
526 Unlike 'words', this does not drop leading whitespace
529 => Lens' (Producer Text m r)
530 (Producer Text m (Producer Text m r))
531 word k p0 = fmap join (k (to p0))
534 p' <- p^.span isSpace
536 {-# INLINABLE word #-}
539 => Lens' (Producer Text m r)
540 (Producer Text m (Producer Text m r))
541 line = break (== '\n')
542 {-# INLINABLE line #-}
544 -- | @(drop n)@ drops the first @n@ characters
545 drop :: (Monad m, Integral n)
546 => n -> Producer Text m r -> Producer Text m r
548 p' <- lift $ runEffect (for (p ^. splitAt n) discard)
550 {-# INLINABLE drop #-}
552 -- | Drop characters until they fail the predicate
553 dropWhile :: (Monad m)
554 => (Char -> Bool) -> Producer Text m r -> Producer Text m r
555 dropWhile predicate p = do
556 p' <- lift $ runEffect (for (p ^. span predicate) discard)
558 {-# INLINABLE dropWhile #-}
560 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
562 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
569 Right (txt, p') -> do
570 yield (T.intersperse c txt)
576 Right (txt, p') -> do
577 yield (T.singleton c)
578 yield (T.intersperse c txt)
580 {-# INLINABLE intersperse #-}
583 -- | Improper lens from unpacked 'Word8's to packaged 'ByteString's
584 pack :: Monad m => Lens' (Producer Char m r) (Producer Text m r)
585 pack k p = fmap _unpack (k (_pack p))
586 {-# INLINABLE pack #-}
588 -- | Improper lens from packed 'ByteString's to unpacked 'Word8's
589 unpack :: Monad m => Lens' (Producer Text m r) (Producer Char m r)
590 unpack k p = fmap _pack (k (_unpack p))
591 {-# INLINABLE unpack #-}
593 _pack :: Monad m => Producer Char m r -> Producer Text m r
594 _pack p = folds step id done (p^.PG.chunksOf defaultChunkSize)
596 step diffAs w8 = diffAs . (w8:)
598 done diffAs = T.pack (diffAs [])
599 {-# INLINABLE _pack #-}
601 _unpack :: Monad m => Producer Text m r -> Producer Char m r
602 _unpack p = for p (each . T.unpack)
603 {-# INLINABLE _unpack #-}
605 defaultChunkSize :: Int
606 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
609 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
611 :: (Monad m, Integral n)
612 => n -> Lens' (Producer Text m r)
613 (FreeT (Producer Text m) m r)
614 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
620 Right (txt, p') -> Free $ do
621 p'' <- (yield txt >> p') ^. splitAt n
622 return $ FreeT (go p'')
623 {-# INLINABLE chunksOf #-}
626 {-| Split a text stream into sub-streams delimited by characters that satisfy the
632 -> Producer Text m r -> FreeT (Producer Text m) m r
633 splitsWith predicate p0 = FreeT (go0 p0)
638 Left r -> return (Pure r)
642 else return $ Free $ do
643 p'' <- (yield txt >> p') ^. span (not . predicate)
644 return $ FreeT (go1 p'')
649 Right (_, p') -> Free $ do
650 p'' <- p' ^. span (not . predicate)
651 return $ FreeT (go1 p'')
652 {-# INLINABLE splitsWith #-}
654 -- | Split a text stream using the given 'Char' as the delimiter
657 -> Lens' (Producer Text m r)
658 (FreeT (Producer Text m) m r)
660 fmap (intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
661 {-# INLINABLE splits #-}
663 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
664 given equivalence relation
668 => (Char -> Char -> Bool)
669 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
670 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
671 go p = do x <- next p
672 case x of Left r -> return (Pure r)
673 Right (bs, p') -> case T.uncons bs of
675 Just (c, _) -> do return $ Free $ do
676 p'' <- (yield bs >> p')^.span (equals c)
677 return $ FreeT (go p'')
678 {-# INLINABLE groupsBy #-}
681 -- | Like 'groupsBy', where the equality predicate is ('==')
684 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
685 groups = groupsBy (==)
686 {-# INLINABLE groups #-}
690 {-| Split a text stream into 'FreeT'-delimited lines
693 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
694 lines k p = fmap _unlines (k (_lines p))
695 {-# INLINABLE lines #-}
699 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
700 unlines k p = fmap _lines (k (_unlines p))
701 {-# INLINABLE unlines #-}
704 => Producer Text m r -> FreeT (Producer Text m) m r
705 _lines p0 = FreeT (go0 p0)
710 Left r -> return (Pure r)
714 else return $ Free $ go1 (yield txt >> p')
716 p' <- p ^. break ('\n' ==)
720 Left r -> return $ Pure r
721 Right (_, p'') -> go0 p''
722 {-# INLINABLE _lines #-}
725 => FreeT (Producer Text m) m r -> Producer Text m r
726 _unlines = concats . maps (<* yield (T.singleton '\n'))
727 {-# INLINABLE _unlines #-}
729 -- | Split a text stream into 'FreeT'-delimited words. Note that
730 -- roundtripping with e.g. @over words id@ eliminates extra space
731 -- characters as with @Prelude.unwords . Prelude.words@
733 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
734 words k p = fmap _unwords (k (_words p))
735 {-# INLINABLE words #-}
739 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
740 unwords k p = fmap _words (k (_unwords p))
741 {-# INLINABLE unwords #-}
743 _words :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
744 _words p = FreeT $ do
745 x <- next (dropWhile isSpace p)
748 Right (bs, p') -> Free $ do
749 p'' <- (yield bs >> p') ^. break isSpace
751 {-# INLINABLE _words #-}
753 _unwords :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
754 _unwords = intercalates (yield $ T.singleton ' ')
755 {-# INLINABLE _unwords #-}
758 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
759 interspersing a text stream in between them
763 => Producer Text m () -> FreeT (Producer Text m) m r -> Producer Text m r
767 x <- lift (runFreeT f)
774 x <- lift (runFreeT f)
781 {-# INLINABLE intercalate #-}
787 @Data.Text@ re-exports the 'Text' type.
789 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
793 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)