1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
3 {-| The module @Pipes.Text@ closely follows @Pipes.ByteString@ from
4 the @pipes-bytestring@ package. A draft tutorial can be found in
39 -- * Primitive Character Parsers
55 -- * Transforming Text and Character Streams
62 -- * FreeT Transformations
76 , module Data.ByteString
82 import Control.Applicative ((<*))
83 import Control.Monad (liftM, join)
84 import Control.Monad.Trans.State.Strict (StateT(..), modify)
85 import qualified Data.Text as T
86 import Data.Text (Text)
87 import qualified Data.Text.Lazy as TL
88 import Data.ByteString (ByteString)
89 import Data.Functor.Constant (Constant(Constant, getConstant))
90 import Data.Functor.Identity (Identity)
93 import Pipes.Group (folds, maps, concats, intercalates, FreeT(..), FreeF(..))
94 import qualified Pipes.Group as PG
95 import qualified Pipes.Parse as PP
96 import Pipes.Parse (Parser)
97 import qualified Pipes.Prelude as P
98 import Data.Char (isSpace)
99 import Data.Word (Word8)
100 import Foreign.Storable (sizeOf)
101 import Data.Bits (shiftL)
102 import Prelude hiding (
132 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
133 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
134 fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
135 {-# INLINE fromLazy #-}
137 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
138 a ^. lens = getConstant (lens Constant a)
140 -- | Apply a transformation to each 'Char' in the stream
141 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
142 map f = P.map (T.map f)
143 {-# INLINABLE map #-}
145 -- | Map a function over the characters of a text stream and concatenate the results
147 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
148 concatMap f = P.map (T.concatMap f)
149 {-# INLINABLE concatMap #-}
151 -- | @(take n)@ only allows @n@ individual characters to pass;
152 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
153 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
154 take n0 = go n0 where
159 let len = fromIntegral (T.length txt)
161 then yield (T.take (fromIntegral n) txt)
165 {-# INLINABLE take #-}
167 -- | Take characters until they fail the predicate
168 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
169 takeWhile predicate = go
173 let (prefix, suffix) = T.span predicate txt
179 {-# INLINABLE takeWhile #-}
181 -- | Only allows 'Char's to pass if they satisfy the predicate
182 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
183 filter predicate = P.map (T.filter predicate)
184 {-# INLINABLE filter #-}
186 -- | Strict left scan over the characters
189 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
191 yield (T.singleton begin)
196 let txt' = T.scanl step c txt
200 {-# INLINABLE scan #-}
202 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
203 -- here acting as 'Text' pipes, rather as they would on a lazy text
204 toCaseFold :: Monad m => Pipe Text Text m r
205 toCaseFold = P.map T.toCaseFold
206 {-# INLINEABLE toCaseFold #-}
208 -- | lowercase incoming 'Text'
209 toLower :: Monad m => Pipe Text Text m r
210 toLower = P.map T.toLower
211 {-# INLINEABLE toLower #-}
213 -- | uppercase incoming 'Text'
214 toUpper :: Monad m => Pipe Text Text m r
215 toUpper = P.map T.toUpper
216 {-# INLINEABLE toUpper #-}
218 -- | Remove leading white space from an incoming succession of 'Text's
219 stripStart :: Monad m => Pipe Text Text m r
222 let text = T.stripStart chunk
227 {-# INLINEABLE stripStart #-}
229 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
232 toLazy :: Producer Text Identity () -> TL.Text
233 toLazy = TL.fromChunks . P.toList
234 {-# INLINABLE toLazy #-}
236 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
239 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
240 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
241 immediately as they are generated instead of loading them all into memory.
243 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
244 toLazyM = liftM TL.fromChunks . P.toListM
245 {-# INLINABLE toLazyM #-}
247 -- | Reduce the text stream using a strict left fold over characters
250 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
251 foldChars step begin done = P.fold (T.foldl' step) begin done
252 {-# INLINABLE foldChars #-}
255 -- | Retrieve the first 'Char'
256 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
262 Left _ -> return Nothing
263 Right (c, _) -> return (Just c)
264 {-# INLINABLE head #-}
266 -- | Retrieve the last 'Char'
267 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
277 else go (Just $ T.last txt) p'
278 {-# INLINABLE last #-}
280 -- | Determine if the stream is empty
281 null :: (Monad m) => Producer Text m () -> m Bool
283 {-# INLINABLE null #-}
285 -- | Count the number of characters in the stream
286 length :: (Monad m, Num n) => Producer Text m () -> m n
287 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
288 {-# INLINABLE length #-}
290 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
291 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
292 any predicate = P.any (T.any predicate)
293 {-# INLINABLE any #-}
295 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
296 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
297 all predicate = P.all (T.all predicate)
298 {-# INLINABLE all #-}
300 -- | Return the maximum 'Char' within a text stream
301 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
302 maximum = P.fold step Nothing id
307 else Just $ case mc of
308 Nothing -> T.maximum txt
309 Just c -> max c (T.maximum txt)
310 {-# INLINABLE maximum #-}
312 -- | Return the minimum 'Char' within a text stream (surely very useful!)
313 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
314 minimum = P.fold step Nothing id
320 Nothing -> Just (T.minimum txt)
321 Just c -> Just (min c (T.minimum txt))
322 {-# INLINABLE minimum #-}
324 -- | Find the first element in the stream that matches the predicate
327 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
328 find predicate p = head (p >-> filter predicate)
329 {-# INLINABLE find #-}
331 -- | Index into a text stream
333 :: (Monad m, Integral a)
334 => a-> Producer Text m () -> m (Maybe Char)
335 index n p = head (drop n p)
336 {-# INLINABLE index #-}
340 -- | Consume the first character from a stream of 'Text'
342 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
343 -- succeeds with a 'Right' providing the next character and the remainder of the
349 -> m (Either r (Char, Producer Text m r))
355 Left r -> return (Left r)
356 Right (txt, p') -> case (T.uncons txt) of
358 Just (c, txt') -> return (Right (c, yield txt' >> p'))
359 {-# INLINABLE nextChar #-}
361 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
363 drawChar :: (Monad m) => Parser Text m (Maybe Char)
367 Nothing -> return Nothing
368 Just txt -> case (T.uncons txt) of
373 {-# INLINABLE drawChar #-}
375 -- | Push back a 'Char' onto the underlying 'Producer'
376 unDrawChar :: (Monad m) => Char -> Parser Text m ()
377 unDrawChar c = modify (yield (T.singleton c) >>)
378 {-# INLINABLE unDrawChar #-}
380 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
386 > Left _ -> return ()
387 > Right c -> unDrawChar c
392 peekChar :: (Monad m) => Parser Text m (Maybe Char)
397 Just c -> unDrawChar c
399 {-# INLINABLE peekChar #-}
401 {-| Check if the underlying 'Producer' has no more characters
403 Note that this will skip over empty 'Text' chunks, unlike
404 'PP.isEndOfInput' from @pipes-parse@, which would consider
405 an empty 'Text' a valid bit of input.
407 > isEndOfChars = liftM isLeft peekChar
409 isEndOfChars :: (Monad m) => Parser Text m Bool
415 {-# INLINABLE isEndOfChars #-}
417 -- | Splits a 'Producer' after the given number of characters
419 :: (Monad m, Integral n)
421 -> Lens' (Producer Text m r)
422 (Producer Text m (Producer Text m r))
423 splitAt n0 k p0 = fmap join (k (go n0 p0))
429 Left r -> return (return r)
430 Right (txt, p') -> do
431 let len = fromIntegral (T.length txt)
437 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
439 return (yield suffix >> p')
440 {-# INLINABLE splitAt #-}
443 -- | Split a text stream in two, producing the longest
444 -- consecutive group of characters that satisfies the predicate
445 -- and returning the rest
450 -> Lens' (Producer Text m r)
451 (Producer Text m (Producer Text m r))
452 span predicate k p0 = fmap join (k (go p0))
457 Left r -> return (return r)
458 Right (txt, p') -> do
459 let (prefix, suffix) = T.span predicate txt
466 return (yield suffix >> p')
467 {-# INLINABLE span #-}
469 {-| Split a text stream in two, producing the longest
470 consecutive group of characters that don't satisfy the predicate
475 -> Lens' (Producer Text m r)
476 (Producer Text m (Producer Text m r))
477 break predicate = span (not . predicate)
478 {-# INLINABLE break #-}
480 {-| Improper lens that splits after the first group of equivalent Chars, as
481 defined by the given equivalence relation
485 => (Char -> Char -> Bool)
486 -> Lens' (Producer Text m r)
487 (Producer Text m (Producer Text m r))
488 groupBy equals k p0 = fmap join (k ((go p0))) where
492 Left r -> return (return r)
493 Right (txt, p') -> case T.uncons txt of
495 Just (c, _) -> (yield txt >> p') ^. span (equals c)
496 {-# INLINABLE groupBy #-}
498 -- | Improper lens that splits after the first succession of identical 'Char' s
500 => Lens' (Producer Text m r)
501 (Producer Text m (Producer Text m r))
503 {-# INLINABLE group #-}
505 {-| Improper lens that splits a 'Producer' after the first word
507 Unlike 'words', this does not drop leading whitespace
510 => Lens' (Producer Text m r)
511 (Producer Text m (Producer Text m r))
512 word k p0 = fmap join (k (to p0))
515 p' <- p^.span isSpace
517 {-# INLINABLE word #-}
520 => Lens' (Producer Text m r)
521 (Producer Text m (Producer Text m r))
522 line = break (== '\n')
523 {-# INLINABLE line #-}
525 -- | @(drop n)@ drops the first @n@ characters
526 drop :: (Monad m, Integral n)
527 => n -> Producer Text m r -> Producer Text m r
529 p' <- lift $ runEffect (for (p ^. splitAt n) discard)
531 {-# INLINABLE drop #-}
533 -- | Drop characters until they fail the predicate
534 dropWhile :: (Monad m)
535 => (Char -> Bool) -> Producer Text m r -> Producer Text m r
536 dropWhile predicate p = do
537 p' <- lift $ runEffect (for (p ^. span predicate) discard)
539 {-# INLINABLE dropWhile #-}
541 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
543 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
550 Right (txt, p') -> do
551 yield (T.intersperse c txt)
557 Right (txt, p') -> do
558 yield (T.singleton c)
559 yield (T.intersperse c txt)
561 {-# INLINABLE intersperse #-}
564 -- | Improper lens from unpacked 'Word8's to packaged 'ByteString's
565 pack :: Monad m => Lens' (Producer Char m r) (Producer Text m r)
566 pack k p = fmap _unpack (k (_pack p))
567 {-# INLINABLE pack #-}
569 -- | Improper lens from packed 'ByteString's to unpacked 'Word8's
570 unpack :: Monad m => Lens' (Producer Text m r) (Producer Char m r)
571 unpack k p = fmap _pack (k (_unpack p))
572 {-# INLINABLE unpack #-}
574 _pack :: Monad m => Producer Char m r -> Producer Text m r
575 _pack p = folds step id done (p^.PG.chunksOf defaultChunkSize)
577 step diffAs w8 = diffAs . (w8:)
579 done diffAs = T.pack (diffAs [])
580 {-# INLINABLE _pack #-}
582 _unpack :: Monad m => Producer Text m r -> Producer Char m r
583 _unpack p = for p (each . T.unpack)
584 {-# INLINABLE _unpack #-}
586 defaultChunkSize :: Int
587 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
590 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
592 :: (Monad m, Integral n)
593 => n -> Lens' (Producer Text m r)
594 (FreeT (Producer Text m) m r)
595 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
601 Right (txt, p') -> Free $ do
602 p'' <- (yield txt >> p') ^. splitAt n
603 return $ FreeT (go p'')
604 {-# INLINABLE chunksOf #-}
607 {-| Split a text stream into sub-streams delimited by characters that satisfy the
613 -> Producer Text m r -> FreeT (Producer Text m) m r
614 splitsWith predicate p0 = FreeT (go0 p0)
619 Left r -> return (Pure r)
623 else return $ Free $ do
624 p'' <- (yield txt >> p') ^. span (not . predicate)
625 return $ FreeT (go1 p'')
630 Right (_, p') -> Free $ do
631 p'' <- p' ^. span (not . predicate)
632 return $ FreeT (go1 p'')
633 {-# INLINABLE splitsWith #-}
635 -- | Split a text stream using the given 'Char' as the delimiter
638 -> Lens' (Producer Text m r)
639 (FreeT (Producer Text m) m r)
641 fmap (intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
642 {-# INLINABLE splits #-}
644 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
645 given equivalence relation
649 => (Char -> Char -> Bool)
650 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
651 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
652 go p = do x <- next p
653 case x of Left r -> return (Pure r)
654 Right (bs, p') -> case T.uncons bs of
656 Just (c, _) -> do return $ Free $ do
657 p'' <- (yield bs >> p')^.span (equals c)
658 return $ FreeT (go p'')
659 {-# INLINABLE groupsBy #-}
662 -- | Like 'groupsBy', where the equality predicate is ('==')
665 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
666 groups = groupsBy (==)
667 {-# INLINABLE groups #-}
671 {-| Split a text stream into 'FreeT'-delimited lines
674 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
675 lines k p = fmap _unlines (k (_lines p))
676 {-# INLINABLE lines #-}
680 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
681 unlines k p = fmap _lines (k (_unlines p))
682 {-# INLINABLE unlines #-}
685 => Producer Text m r -> FreeT (Producer Text m) m r
686 _lines p0 = FreeT (go0 p0)
691 Left r -> return (Pure r)
695 else return $ Free $ go1 (yield txt >> p')
697 p' <- p ^. break ('\n' ==)
701 Left r -> return $ Pure r
702 Right (_, p'') -> go0 p''
703 {-# INLINABLE _lines #-}
706 => FreeT (Producer Text m) m r -> Producer Text m r
707 _unlines = concats . maps (<* yield (T.singleton '\n'))
708 {-# INLINABLE _unlines #-}
710 -- | Split a text stream into 'FreeT'-delimited words. Note that
711 -- roundtripping with e.g. @over words id@ eliminates extra space
712 -- characters as with @Prelude.unwords . Prelude.words@
714 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
715 words k p = fmap _unwords (k (_words p))
716 {-# INLINABLE words #-}
720 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
721 unwords k p = fmap _words (k (_unwords p))
722 {-# INLINABLE unwords #-}
724 _words :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
725 _words p = FreeT $ do
726 x <- next (dropWhile isSpace p)
729 Right (bs, p') -> Free $ do
730 p'' <- (yield bs >> p') ^. break isSpace
732 {-# INLINABLE _words #-}
734 _unwords :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
735 _unwords = intercalates (yield $ T.singleton ' ')
736 {-# INLINABLE _unwords #-}
739 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
740 interspersing a text stream in between them
744 => Producer Text m () -> FreeT (Producer Text m) m r -> Producer Text m r
748 x <- lift (runFreeT f)
755 x <- lift (runFreeT f)
762 {-# INLINABLE intercalate #-}
768 @Data.Text@ re-exports the 'Text' type.
770 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
774 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)