1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Safe#-}
3 {-| The module @Pipes.Text@ closely follows @Pipes.ByteString@ from
4 the @pipes-bytestring@ package. A draft tutorial can be found in
39 -- * Primitive Character Parsers
55 -- * Transforming Text and Character Streams
62 -- * FreeT Transformations
76 , module Data.ByteString
82 import Control.Monad (liftM, join)
83 import Control.Monad.Trans.State.Strict (modify)
84 import qualified Data.Text as T
85 import Data.Text (Text)
86 import qualified Data.Text.Lazy as TL
87 import Data.ByteString (ByteString)
88 import Data.Functor.Constant (Constant(Constant, getConstant))
89 import Data.Functor.Identity (Identity)
92 import Pipes.Group (folds, maps, concats, intercalates, FreeT(..), FreeF(..))
93 import qualified Pipes.Group as PG
94 import qualified Pipes.Parse as PP
95 import Pipes.Parse (Parser)
96 import qualified Pipes.Prelude as P
97 import Data.Char (isSpace)
98 import Foreign.Storable (sizeOf)
99 import Data.Bits (shiftL)
100 import Prelude hiding (
130 -- >>> :set -XOverloadedStrings
131 -- >>> import Data.Text (Text)
132 -- >>> import qualified Data.Text as T
133 -- >>> import qualified Data.Text.Lazy.IO as TL
134 -- >>> import Data.Char
136 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's. Producers in
137 -- IO can be found in 'Pipes.Text.IO' or in pipes-bytestring, employed with the
138 -- decoding lenses in 'Pipes.Text.Encoding'
139 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
140 fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
141 {-# INLINE fromLazy #-}
143 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
144 a ^. lens = getConstant (lens Constant a)
146 -- | Apply a transformation to each 'Char' in the stream
148 -- >>> let margaret = ["Margaret, are you grieving\nOver Golde","ngrove unleaving?":: Text]
149 -- >>> TL.putStrLn . toLazy $ each margaret >-> map Data.Char.toUpper
150 -- MARGARET, ARE YOU GRIEVING
151 -- OVER GOLDENGROVE UNLEAVING?
152 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
153 map f = P.map (T.map f)
154 {-# INLINABLE map #-}
156 -- | Map a function over the characters of a text stream and concatenate the results
159 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
160 concatMap f = P.map (T.concatMap f)
161 {-# INLINABLE concatMap #-}
163 -- | @(take n)@ only allows @n@ individual characters to pass;
164 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
165 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
166 take n0 = go n0 where
171 let len = fromIntegral (T.length txt)
173 then yield (T.take (fromIntegral n) txt)
177 {-# INLINABLE take #-}
179 -- | Take characters until they fail the predicate
180 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
181 takeWhile predicate = go
185 let (prefix, suffix) = T.span predicate txt
191 {-# INLINABLE takeWhile #-}
193 -- | Only allows 'Char's to pass if they satisfy the predicate
194 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
195 filter predicate = P.map (T.filter predicate)
196 {-# INLINABLE filter #-}
198 -- | Strict left scan over the characters
199 -- >>> let margaret = ["Margaret, are you grieving\nOver Golde","ngrove unleaving?":: Text]
200 -- >>> let title_caser a x = case a of ' ' -> Data.Char.toUpper x; _ -> x
201 -- >>> toLazy $ each margaret >-> scan title_caser ' '
202 -- " Margaret, Are You Grieving\nOver Goldengrove Unleaving?"
206 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
208 yield (T.singleton begin)
213 let txt' = T.scanl step c txt
217 {-# INLINABLE scan #-}
219 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
220 -- here acting as 'Text' pipes, rather as they would on a lazy text
221 toCaseFold :: Monad m => Pipe Text Text m r
222 toCaseFold = P.map T.toCaseFold
223 {-# INLINEABLE toCaseFold #-}
225 -- | lowercase incoming 'Text'
226 toLower :: Monad m => Pipe Text Text m r
227 toLower = P.map T.toLower
228 {-# INLINEABLE toLower #-}
230 -- | uppercase incoming 'Text'
231 toUpper :: Monad m => Pipe Text Text m r
232 toUpper = P.map T.toUpper
233 {-# INLINEABLE toUpper #-}
235 -- | Remove leading white space from an incoming succession of 'Text's
236 stripStart :: Monad m => Pipe Text Text m r
239 let text = T.stripStart chunk
244 {-# INLINEABLE stripStart #-}
246 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
249 toLazy :: Producer Text Identity () -> TL.Text
250 toLazy = TL.fromChunks . P.toList
251 {-# INLINABLE toLazy #-}
253 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
256 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
257 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
258 immediately as they are generated instead of loading them all into memory.
260 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
261 toLazyM = liftM TL.fromChunks . P.toListM
262 {-# INLINABLE toLazyM #-}
264 -- | Reduce the text stream using a strict left fold over characters
267 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
268 foldChars step begin done = P.fold (T.foldl' step) begin done
269 {-# INLINABLE foldChars #-}
272 -- | Retrieve the first 'Char'
273 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
279 Left _ -> return Nothing
280 Right (c, _) -> return (Just c)
281 {-# INLINABLE head #-}
283 -- | Retrieve the last 'Char'
284 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
294 else go (Just $ T.last txt) p'
295 {-# INLINABLE last #-}
297 -- | Determine if the stream is empty
298 null :: (Monad m) => Producer Text m () -> m Bool
300 {-# INLINABLE null #-}
302 -- | Count the number of characters in the stream
303 length :: (Monad m, Num n) => Producer Text m () -> m n
304 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
305 {-# INLINABLE length #-}
307 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
308 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
309 any predicate = P.any (T.any predicate)
310 {-# INLINABLE any #-}
312 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
313 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
314 all predicate = P.all (T.all predicate)
315 {-# INLINABLE all #-}
317 -- | Return the maximum 'Char' within a text stream
318 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
319 maximum = P.fold step Nothing id
324 else Just $ case mc of
325 Nothing -> T.maximum txt
326 Just c -> max c (T.maximum txt)
327 {-# INLINABLE maximum #-}
329 -- | Return the minimum 'Char' within a text stream (surely very useful!)
330 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
331 minimum = P.fold step Nothing id
337 Nothing -> Just (T.minimum txt)
338 Just c -> Just (min c (T.minimum txt))
339 {-# INLINABLE minimum #-}
341 -- | Find the first element in the stream that matches the predicate
344 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
345 find predicate p = head (p >-> filter predicate)
346 {-# INLINABLE find #-}
348 -- | Index into a text stream
350 :: (Monad m, Integral a)
351 => a-> Producer Text m () -> m (Maybe Char)
352 index n p = head (drop n p)
353 {-# INLINABLE index #-}
357 -- | Consume the first character from a stream of 'Text'
359 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
360 -- succeeds with a 'Right' providing the next character and the remainder of the
366 -> m (Either r (Char, Producer Text m r))
372 Left r -> return (Left r)
373 Right (txt, p') -> case (T.uncons txt) of
375 Just (c, txt') -> return (Right (c, yield txt' >> p'))
376 {-# INLINABLE nextChar #-}
378 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
380 drawChar :: (Monad m) => Parser Text m (Maybe Char)
384 Nothing -> return Nothing
385 Just txt -> case (T.uncons txt) of
390 {-# INLINABLE drawChar #-}
392 -- | Push back a 'Char' onto the underlying 'Producer'
393 unDrawChar :: (Monad m) => Char -> Parser Text m ()
394 unDrawChar c = modify (yield (T.singleton c) >>)
395 {-# INLINABLE unDrawChar #-}
397 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
403 > Left _ -> return ()
404 > Right c -> unDrawChar c
409 peekChar :: (Monad m) => Parser Text m (Maybe Char)
414 Just c -> unDrawChar c
416 {-# INLINABLE peekChar #-}
418 {-| Check if the underlying 'Producer' has no more characters
420 Note that this will skip over empty 'Text' chunks, unlike
421 'PP.isEndOfInput' from @pipes-parse@, which would consider
422 an empty 'Text' a valid bit of input.
424 > isEndOfChars = liftM isLeft peekChar
426 isEndOfChars :: (Monad m) => Parser Text m Bool
432 {-# INLINABLE isEndOfChars #-}
434 -- | Splits a 'Producer' after the given number of characters
436 :: (Monad m, Integral n)
438 -> Lens' (Producer Text m r)
439 (Producer Text m (Producer Text m r))
440 splitAt n0 k p0 = fmap join (k (go n0 p0))
446 Left r -> return (return r)
447 Right (txt, p') -> do
448 let len = fromIntegral (T.length txt)
454 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
456 return (yield suffix >> p')
457 {-# INLINABLE splitAt #-}
460 -- | Split a text stream in two, producing the longest
461 -- consecutive group of characters that satisfies the predicate
462 -- and returning the rest
467 -> Lens' (Producer Text m r)
468 (Producer Text m (Producer Text m r))
469 span predicate k p0 = fmap join (k (go p0))
474 Left r -> return (return r)
475 Right (txt, p') -> do
476 let (prefix, suffix) = T.span predicate txt
483 return (yield suffix >> p')
484 {-# INLINABLE span #-}
486 {-| Split a text stream in two, producing the longest
487 consecutive group of characters that don't satisfy the predicate
492 -> Lens' (Producer Text m r)
493 (Producer Text m (Producer Text m r))
494 break predicate = span (not . predicate)
495 {-# INLINABLE break #-}
497 {-| Improper lens that splits after the first group of equivalent Chars, as
498 defined by the given equivalence relation
502 => (Char -> Char -> Bool)
503 -> Lens' (Producer Text m r)
504 (Producer Text m (Producer Text m r))
505 groupBy equals k p0 = fmap join (k ((go p0))) where
509 Left r -> return (return r)
510 Right (txt, p') -> case T.uncons txt of
512 Just (c, _) -> (yield txt >> p') ^. span (equals c)
513 {-# INLINABLE groupBy #-}
515 -- | Improper lens that splits after the first succession of identical 'Char' s
517 => Lens' (Producer Text m r)
518 (Producer Text m (Producer Text m r))
520 {-# INLINABLE group #-}
522 {-| Improper lens that splits a 'Producer' after the first word
524 Unlike 'words', this does not drop leading whitespace
527 => Lens' (Producer Text m r)
528 (Producer Text m (Producer Text m r))
529 word k p0 = fmap join (k (to p0))
532 p' <- p^.span isSpace
534 {-# INLINABLE word #-}
537 => Lens' (Producer Text m r)
538 (Producer Text m (Producer Text m r))
539 line = break (== '\n')
540 {-# INLINABLE line #-}
542 -- | @(drop n)@ drops the first @n@ characters
543 drop :: (Monad m, Integral n)
544 => n -> Producer Text m r -> Producer Text m r
546 p' <- lift $ runEffect (for (p ^. splitAt n) discard)
548 {-# INLINABLE drop #-}
550 -- | Drop characters until they fail the predicate
551 dropWhile :: (Monad m)
552 => (Char -> Bool) -> Producer Text m r -> Producer Text m r
553 dropWhile predicate p = do
554 p' <- lift $ runEffect (for (p ^. span predicate) discard)
556 {-# INLINABLE dropWhile #-}
558 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
560 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
567 Right (txt, p') -> do
568 yield (T.intersperse c txt)
574 Right (txt, p') -> do
575 yield (T.singleton c)
576 yield (T.intersperse c txt)
578 {-# INLINABLE intersperse #-}
581 -- | Improper lens from unpacked 'Word8's to packaged 'ByteString's
582 pack :: Monad m => Lens' (Producer Char m r) (Producer Text m r)
583 pack k p = fmap _unpack (k (_pack p))
584 {-# INLINABLE pack #-}
586 -- | Improper lens from packed 'ByteString's to unpacked 'Word8's
587 unpack :: Monad m => Lens' (Producer Text m r) (Producer Char m r)
588 unpack k p = fmap _pack (k (_unpack p))
589 {-# INLINABLE unpack #-}
591 _pack :: Monad m => Producer Char m r -> Producer Text m r
592 _pack p = folds step id done (p^.PG.chunksOf defaultChunkSize)
594 step diffAs w8 = diffAs . (w8:)
596 done diffAs = T.pack (diffAs [])
597 {-# INLINABLE _pack #-}
599 _unpack :: Monad m => Producer Text m r -> Producer Char m r
600 _unpack p = for p (each . T.unpack)
601 {-# INLINABLE _unpack #-}
603 defaultChunkSize :: Int
604 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
607 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
609 :: (Monad m, Integral n)
610 => n -> Lens' (Producer Text m r)
611 (FreeT (Producer Text m) m r)
612 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
618 Right (txt, p') -> Free $ do
619 p'' <- (yield txt >> p') ^. splitAt n
620 return $ FreeT (go p'')
621 {-# INLINABLE chunksOf #-}
624 {-| Split a text stream into sub-streams delimited by characters that satisfy the
630 -> Producer Text m r -> FreeT (Producer Text m) m r
631 splitsWith predicate p0 = FreeT (go0 p0)
636 Left r -> return (Pure r)
640 else return $ Free $ do
641 p'' <- (yield txt >> p') ^. span (not . predicate)
642 return $ FreeT (go1 p'')
647 Right (_, p') -> Free $ do
648 p'' <- p' ^. span (not . predicate)
649 return $ FreeT (go1 p'')
650 {-# INLINABLE splitsWith #-}
652 -- | Split a text stream using the given 'Char' as the delimiter
655 -> Lens' (Producer Text m r)
656 (FreeT (Producer Text m) m r)
658 fmap (intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
659 {-# INLINABLE splits #-}
661 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
662 given equivalence relation
666 => (Char -> Char -> Bool)
667 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
668 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
669 go p = do x <- next p
670 case x of Left r -> return (Pure r)
671 Right (bs, p') -> case T.uncons bs of
673 Just (c, _) -> do return $ Free $ do
674 p'' <- (yield bs >> p')^.span (equals c)
675 return $ FreeT (go p'')
676 {-# INLINABLE groupsBy #-}
679 -- | Like 'groupsBy', where the equality predicate is ('==')
682 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
683 groups = groupsBy (==)
684 {-# INLINABLE groups #-}
688 {-| Split a text stream into 'FreeT'-delimited lines
691 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
692 lines k p = fmap _unlines (k (_lines p))
693 {-# INLINABLE lines #-}
697 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
698 unlines k p = fmap _lines (k (_unlines p))
699 {-# INLINABLE unlines #-}
702 => Producer Text m r -> FreeT (Producer Text m) m r
703 _lines p0 = FreeT (go0 p0)
708 Left r -> return (Pure r)
712 else return $ Free $ go1 (yield txt >> p')
714 p' <- p ^. break ('\n' ==)
718 Left r -> return $ Pure r
719 Right (_, p'') -> go0 p''
720 {-# INLINABLE _lines #-}
723 => FreeT (Producer Text m) m r -> Producer Text m r
724 _unlines = concats . maps (<* yield (T.singleton '\n'))
725 {-# INLINABLE _unlines #-}
727 -- | Split a text stream into 'FreeT'-delimited words. Note that
728 -- roundtripping with e.g. @over words id@ eliminates extra space
729 -- characters as with @Prelude.unwords . Prelude.words@
731 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
732 words k p = fmap _unwords (k (_words p))
733 {-# INLINABLE words #-}
737 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
738 unwords k p = fmap _words (k (_unwords p))
739 {-# INLINABLE unwords #-}
741 _words :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
742 _words p = FreeT $ do
743 x <- next (dropWhile isSpace p)
746 Right (bs, p') -> Free $ do
747 p'' <- (yield bs >> p') ^. break isSpace
749 {-# INLINABLE _words #-}
751 _unwords :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
752 _unwords = intercalates (yield $ T.singleton ' ')
753 {-# INLINABLE _unwords #-}
756 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
757 interspersing a text stream in between them
761 => Producer Text m () -> FreeT (Producer Text m) m r -> Producer Text m r
765 x <- lift (runFreeT f)
772 x <- lift (runFreeT f)
779 {-# INLINABLE intercalate #-}
785 @Data.Text@ re-exports the 'Text' type.
787 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
791 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)