1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns#-}
3 {-| The module @Pipes.Text@ closely follows @Pipes.ByteString@ from
4 the @pipes-bytestring@ package. A draft tutorial can be found in
39 -- * Primitive Character Parsers
55 -- * Transforming Text and Character Streams
62 -- * FreeT Transformations
76 , module Data.ByteString
82 import Control.Applicative ((<*))
83 import Control.Monad (liftM, join)
84 import Data.Functor.Constant (Constant(..))
85 import Data.Functor.Identity (Identity)
86 import Control.Monad.Trans.State.Strict (modify)
88 import qualified Data.Text as T
89 import Data.Text (Text)
90 import qualified Data.Text.Lazy as TL
91 import Data.ByteString (ByteString)
92 import Data.Char (isSpace)
93 import Foreign.Storable (sizeOf)
94 import Data.Bits (shiftL)
97 import Pipes.Group (folds, maps, concats, intercalates, FreeT(..), FreeF(..))
98 import qualified Pipes.Group as PG
99 import qualified Pipes.Parse as PP
100 import Pipes.Parse (Parser)
101 import qualified Pipes.Prelude as P
105 import Prelude hiding (
135 -- >>> :set -XOverloadedStrings
136 -- >>> import Data.Text (Text)
137 -- >>> import qualified Data.Text as T
138 -- >>> import qualified Data.Text.Lazy.IO as TL
139 -- >>> import Data.Char
141 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's. Producers in
142 -- IO can be found in 'Pipes.Text.IO' or in pipes-bytestring, employed with the
143 -- decoding lenses in 'Pipes.Text.Encoding'
144 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
145 fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
146 {-# INLINE fromLazy #-}
148 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
149 a ^. lens = getConstant (lens Constant a)
151 -- | Apply a transformation to each 'Char' in the stream
153 -- >>> let margaret = ["Margaret, are you grieving\nOver Golde","ngrove unleaving?":: Text]
154 -- >>> TL.putStrLn . toLazy $ each margaret >-> map Data.Char.toUpper
155 -- MARGARET, ARE YOU GRIEVING
156 -- OVER GOLDENGROVE UNLEAVING?
157 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
158 map f = P.map (T.map f)
159 {-# INLINABLE map #-}
161 -- | Map a function over the characters of a text stream and concatenate the results
164 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
165 concatMap f = P.map (T.concatMap f)
166 {-# INLINABLE concatMap #-}
168 -- | @(take n)@ only allows @n@ individual characters to pass;
169 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
170 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
171 take n0 = go n0 where
176 let len = fromIntegral (T.length txt)
178 then yield (T.take (fromIntegral n) txt)
182 {-# INLINABLE take #-}
184 -- | Take characters until they fail the predicate
185 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
186 takeWhile predicate = go
190 let (prefix, suffix) = T.span predicate txt
196 {-# INLINABLE takeWhile #-}
198 -- | Only allows 'Char's to pass if they satisfy the predicate
199 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
200 filter predicate = P.map (T.filter predicate)
201 {-# INLINABLE filter #-}
203 -- | Strict left scan over the characters
204 -- >>> let margaret = ["Margaret, are you grieving\nOver Golde","ngrove unleaving?":: Text]
205 -- >>> let title_caser a x = case a of ' ' -> Data.Char.toUpper x; _ -> x
206 -- >>> toLazy $ each margaret >-> scan title_caser ' '
207 -- " Margaret, Are You Grieving\nOver Goldengrove Unleaving?"
211 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
213 yield (T.singleton begin)
218 let txt' = T.scanl step c txt
222 {-# INLINABLE scan #-}
224 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
225 -- here acting as 'Text' pipes, rather as they would on a lazy text
226 toCaseFold :: Monad m => Pipe Text Text m r
227 toCaseFold = P.map T.toCaseFold
228 {-# INLINEABLE toCaseFold #-}
230 -- | lowercase incoming 'Text'
231 toLower :: Monad m => Pipe Text Text m r
232 toLower = P.map T.toLower
233 {-# INLINEABLE toLower #-}
235 -- | uppercase incoming 'Text'
236 toUpper :: Monad m => Pipe Text Text m r
237 toUpper = P.map T.toUpper
238 {-# INLINEABLE toUpper #-}
240 -- | Remove leading white space from an incoming succession of 'Text's
241 stripStart :: Monad m => Pipe Text Text m r
244 let text = T.stripStart chunk
249 {-# INLINEABLE stripStart #-}
251 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
254 toLazy :: Producer Text Identity () -> TL.Text
255 toLazy = TL.fromChunks . P.toList
256 {-# INLINABLE toLazy #-}
258 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
261 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
262 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
263 immediately as they are generated instead of loading them all into memory.
265 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
266 toLazyM = liftM TL.fromChunks . P.toListM
267 {-# INLINABLE toLazyM #-}
269 -- | Reduce the text stream using a strict left fold over characters
272 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
273 foldChars step begin done = P.fold (T.foldl' step) begin done
274 {-# INLINABLE foldChars #-}
277 -- | Retrieve the first 'Char'
278 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
284 Left _ -> return Nothing
285 Right (c, _) -> return (Just c)
286 {-# INLINABLE head #-}
288 -- | Retrieve the last 'Char'
289 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
299 else go (Just $ T.last txt) p'
300 {-# INLINABLE last #-}
302 -- | Determine if the stream is empty
303 null :: (Monad m) => Producer Text m () -> m Bool
305 {-# INLINABLE null #-}
307 -- | Count the number of characters in the stream
308 length :: (Monad m, Num n) => Producer Text m () -> m n
309 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
310 {-# INLINABLE length #-}
312 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
313 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
314 any predicate = P.any (T.any predicate)
315 {-# INLINABLE any #-}
317 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
318 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
319 all predicate = P.all (T.all predicate)
320 {-# INLINABLE all #-}
322 -- | Return the maximum 'Char' within a text stream
323 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
324 maximum = P.fold step Nothing id
329 else Just $ case mc of
330 Nothing -> T.maximum txt
331 Just c -> max c (T.maximum txt)
332 {-# INLINABLE maximum #-}
334 -- | Return the minimum 'Char' within a text stream (surely very useful!)
335 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
336 minimum = P.fold step Nothing id
342 Nothing -> Just (T.minimum txt)
343 Just c -> Just (min c (T.minimum txt))
344 {-# INLINABLE minimum #-}
346 -- | Find the first element in the stream that matches the predicate
349 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
350 find predicate p = head (p >-> filter predicate)
351 {-# INLINABLE find #-}
353 -- | Index into a text stream
355 :: (Monad m, Integral a)
356 => a-> Producer Text m () -> m (Maybe Char)
357 index n p = head (drop n p)
358 {-# INLINABLE index #-}
362 -- | Consume the first character from a stream of 'Text'
364 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
365 -- succeeds with a 'Right' providing the next character and the remainder of the
371 -> m (Either r (Char, Producer Text m r))
377 Left r -> return (Left r)
378 Right (txt, p') -> case (T.uncons txt) of
380 Just (c, txt') -> return (Right (c, yield txt' >> p'))
381 {-# INLINABLE nextChar #-}
383 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
385 drawChar :: (Monad m) => Parser Text m (Maybe Char)
389 Nothing -> return Nothing
390 Just txt -> case (T.uncons txt) of
395 {-# INLINABLE drawChar #-}
397 -- | Push back a 'Char' onto the underlying 'Producer'
398 unDrawChar :: (Monad m) => Char -> Parser Text m ()
399 unDrawChar c = modify (yield (T.singleton c) >>)
400 {-# INLINABLE unDrawChar #-}
402 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
408 > Left _ -> return ()
409 > Right c -> unDrawChar c
414 peekChar :: (Monad m) => Parser Text m (Maybe Char)
419 Just c -> unDrawChar c
421 {-# INLINABLE peekChar #-}
423 {-| Check if the underlying 'Producer' has no more characters
425 Note that this will skip over empty 'Text' chunks, unlike
426 'PP.isEndOfInput' from @pipes-parse@, which would consider
427 an empty 'Text' a valid bit of input.
429 > isEndOfChars = liftM isLeft peekChar
431 isEndOfChars :: (Monad m) => Parser Text m Bool
437 {-# INLINABLE isEndOfChars #-}
439 -- | Splits a 'Producer' after the given number of characters
441 :: (Monad m, Integral n)
443 -> Lens' (Producer Text m r)
444 (Producer Text m (Producer Text m r))
445 splitAt n0 k p0 = fmap join (k (go n0 p0))
451 Left r -> return (return r)
452 Right (txt, p') -> do
453 let len = fromIntegral (T.length txt)
459 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
461 return (yield suffix >> p')
462 {-# INLINABLE splitAt #-}
465 -- | Split a text stream in two, producing the longest
466 -- consecutive group of characters that satisfies the predicate
467 -- and returning the rest
472 -> Lens' (Producer Text m r)
473 (Producer Text m (Producer Text m r))
474 span predicate k p0 = fmap join (k (go p0))
479 Left r -> return (return r)
480 Right (txt, p') -> do
481 let (prefix, suffix) = T.span predicate txt
488 return (yield suffix >> p')
489 {-# INLINABLE span #-}
491 {-| Split a text stream in two, producing the longest
492 consecutive group of characters that don't satisfy the predicate
497 -> Lens' (Producer Text m r)
498 (Producer Text m (Producer Text m r))
499 break predicate = span (not . predicate)
500 {-# INLINABLE break #-}
502 {-| Improper lens that splits after the first group of equivalent Chars, as
503 defined by the given equivalence relation
507 => (Char -> Char -> Bool)
508 -> Lens' (Producer Text m r)
509 (Producer Text m (Producer Text m r))
510 groupBy equals k p0 = fmap join (k ((go p0))) where
514 Left r -> return (return r)
515 Right (txt, p') -> case T.uncons txt of
517 Just (c, _) -> (yield txt >> p') ^. span (equals c)
518 {-# INLINABLE groupBy #-}
520 -- | Improper lens that splits after the first succession of identical 'Char' s
522 => Lens' (Producer Text m r)
523 (Producer Text m (Producer Text m r))
525 {-# INLINABLE group #-}
527 {-| Improper lens that splits a 'Producer' after the first word
529 Unlike 'words', this does not drop leading whitespace
532 => Lens' (Producer Text m r)
533 (Producer Text m (Producer Text m r))
534 word k p0 = fmap join (k (to p0))
537 p' <- p^.span isSpace
539 {-# INLINABLE word #-}
542 => Lens' (Producer Text m r)
543 (Producer Text m (Producer Text m r))
544 line = break (== '\n')
545 {-# INLINABLE line #-}
547 -- | @(drop n)@ drops the first @n@ characters
548 drop :: (Monad m, Integral n)
549 => n -> Producer Text m r -> Producer Text m r
551 p' <- lift $ runEffect (for (p ^. splitAt n) discard)
553 {-# INLINABLE drop #-}
555 -- | Drop characters until they fail the predicate
556 dropWhile :: (Monad m)
557 => (Char -> Bool) -> Producer Text m r -> Producer Text m r
558 dropWhile predicate p = do
559 p' <- lift $ runEffect (for (p ^. span predicate) discard)
561 {-# INLINABLE dropWhile #-}
563 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
565 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
572 Right (txt, p') -> do
573 yield (T.intersperse c txt)
579 Right (txt, p') -> do
580 yield (T.singleton c)
581 yield (T.intersperse c txt)
583 {-# INLINABLE intersperse #-}
586 -- | Improper lens from unpacked 'Word8's to packaged 'ByteString's
587 pack :: Monad m => Lens' (Producer Char m r) (Producer Text m r)
588 pack k p = fmap _unpack (k (_pack p))
589 {-# INLINABLE pack #-}
591 -- | Improper lens from packed 'ByteString's to unpacked 'Word8's
592 unpack :: Monad m => Lens' (Producer Text m r) (Producer Char m r)
593 unpack k p = fmap _pack (k (_unpack p))
594 {-# INLINABLE unpack #-}
596 _pack :: Monad m => Producer Char m r -> Producer Text m r
597 _pack p = folds step id done (p^.PG.chunksOf defaultChunkSize)
599 step diffAs w8 = diffAs . (w8:)
601 done diffAs = T.pack (diffAs [])
602 {-# INLINABLE _pack #-}
604 _unpack :: Monad m => Producer Text m r -> Producer Char m r
605 _unpack p = for p (each . T.unpack)
606 {-# INLINABLE _unpack #-}
608 defaultChunkSize :: Int
609 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
612 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
614 :: (Monad m, Integral n)
615 => n -> Lens' (Producer Text m r)
616 (FreeT (Producer Text m) m r)
617 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
623 Right (txt, p') -> Free $ do
624 p'' <- (yield txt >> p') ^. splitAt n
625 return $ FreeT (go p'')
626 {-# INLINABLE chunksOf #-}
629 {-| Split a text stream into sub-streams delimited by characters that satisfy the
635 -> Producer Text m r -> FreeT (Producer Text m) m r
636 splitsWith predicate p0 = FreeT (go0 p0)
641 Left r -> return (Pure r)
645 else return $ Free $ do
646 p'' <- (yield txt >> p') ^. span (not . predicate)
647 return $ FreeT (go1 p'')
652 Right (_, p') -> Free $ do
653 p'' <- p' ^. span (not . predicate)
654 return $ FreeT (go1 p'')
655 {-# INLINABLE splitsWith #-}
657 -- | Split a text stream using the given 'Char' as the delimiter
660 -> Lens' (Producer Text m r)
661 (FreeT (Producer Text m) m r)
663 fmap (intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
664 {-# INLINABLE splits #-}
666 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
667 given equivalence relation
671 => (Char -> Char -> Bool)
672 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
673 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
674 go p = do x <- next p
675 case x of Left r -> return (Pure r)
676 Right (bs, p') -> case T.uncons bs of
678 Just (c, _) -> do return $ Free $ do
679 p'' <- (yield bs >> p')^.span (equals c)
680 return $ FreeT (go p'')
681 {-# INLINABLE groupsBy #-}
684 -- | Like 'groupsBy', where the equality predicate is ('==')
687 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
688 groups = groupsBy (==)
689 {-# INLINABLE groups #-}
693 {-| Split a text stream into 'FreeT'-delimited lines
696 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
697 lines k p = fmap _unlines (k (_lines p))
698 {-# INLINABLE lines #-}
702 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
703 unlines k p = fmap _lines (k (_unlines p))
704 {-# INLINABLE unlines #-}
707 => Producer Text m r -> FreeT (Producer Text m) m r
708 _lines p0 = FreeT (go0 p0)
713 Left r -> return (Pure r)
717 else return $ Free $ go1 (yield txt >> p')
719 p' <- p ^. break ('\n' ==)
723 Left r -> return $ Pure r
724 Right (_, p'') -> go0 p''
725 {-# INLINABLE _lines #-}
728 => FreeT (Producer Text m) m r -> Producer Text m r
729 _unlines = concats . maps (<* yield (T.singleton '\n'))
730 {-# INLINABLE _unlines #-}
732 -- | Split a text stream into 'FreeT'-delimited words. Note that
733 -- roundtripping with e.g. @over words id@ eliminates extra space
734 -- characters as with @Prelude.unwords . Prelude.words@
736 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
737 words k p = fmap _unwords (k (_words p))
738 {-# INLINABLE words #-}
742 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
743 unwords k p = fmap _words (k (_unwords p))
744 {-# INLINABLE unwords #-}
746 _words :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
747 _words p = FreeT $ do
748 x <- next (dropWhile isSpace p)
751 Right (bs, p') -> Free $ do
752 p'' <- (yield bs >> p') ^. break isSpace
754 {-# INLINABLE _words #-}
756 _unwords :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
757 _unwords = intercalates (yield $ T.singleton ' ')
758 {-# INLINABLE _unwords #-}
761 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
762 interspersing a text stream in between them
766 => Producer Text m () -> FreeT (Producer Text m) m r -> Producer Text m r
770 x <- lift (runFreeT f)
777 x <- lift (runFreeT f)
784 {-# INLINABLE intercalate #-}
790 @Data.Text@ re-exports the 'Text' type.
792 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
796 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)