aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Pipes/Text.hs273
-rw-r--r--Pipes/Text/Parse.hs2
2 files changed, 201 insertions, 74 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index 4df2b5d..74576e8 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -113,16 +113,16 @@ module Pipes.Text (
113 , drawChar 113 , drawChar
114 , unDrawChar 114 , unDrawChar
115 , peekChar 115 , peekChar
116 , isEndOfChars, 116 , isEndOfChars
117 117
118 -- * Parsing Lenses 118 -- * Parsing Lenses
119 splitAt 119 , splitAt
120 , span 120 , span
121 , break 121 , break
122 , groupBy 122 , groupBy
123 , group 123 , group
124 -- , word 124 , word
125 -- , line 125 , line
126 , decodeUtf8 126 , decodeUtf8
127 , decode 127 , decode
128 128
@@ -138,12 +138,13 @@ module Pipes.Text (
138 138
139 -- * Transformations 139 -- * Transformations
140 , intersperse 140 , intersperse
141-- , packChars 141 , packChars
142 142
143 -- * Joiners 143 -- * Joiners
144 , intercalate 144 , intercalate
145 , unlines 145 , unlines
146 , unwords 146 , unwords
147
147 -- * Re-exports 148 -- * Re-exports
148 -- $reexports 149 -- $reexports
149 , module Data.ByteString 150 , module Data.ByteString
@@ -155,7 +156,7 @@ module Pipes.Text (
155 156
156import Control.Exception (throwIO, try) 157import Control.Exception (throwIO, try)
157import Control.Monad (liftM, unless, join) 158import Control.Monad (liftM, unless, join)
158import Control.Monad.Trans.State.Strict (StateT(..)) 159import Control.Monad.Trans.State.Strict (StateT(..), modify)
159import Data.Monoid ((<>)) 160import Data.Monoid ((<>))
160import qualified Data.Text as T 161import qualified Data.Text as T
161import qualified Data.Text.IO as T 162import qualified Data.Text.IO as T
@@ -180,10 +181,11 @@ import Pipes
180import qualified Pipes.ByteString as PB 181import qualified Pipes.ByteString as PB
181import qualified Pipes.Text.Internal as PE 182import qualified Pipes.Text.Internal as PE
182import Pipes.Text.Internal (Codec(..)) 183import Pipes.Text.Internal (Codec(..))
183import Pipes.Text.Parse (nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) 184-- import Pipes.Text.Parse (nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
185
184import Pipes.Core (respond, Server') 186import Pipes.Core (respond, Server')
185import qualified Pipes.Parse as PP 187import qualified Pipes.Parse as PP
186import Pipes.Parse (Parser, concats, intercalates, FreeT) 188import Pipes.Parse (Parser, concats, intercalates, FreeT(..))
187import qualified Pipes.Safe.Prelude as Safe 189import qualified Pipes.Safe.Prelude as Safe
188import qualified Pipes.Safe as Safe 190import qualified Pipes.Safe as Safe
189import Pipes.Safe (MonadSafe(..), Base(..)) 191import Pipes.Safe (MonadSafe(..), Base(..))
@@ -622,11 +624,94 @@ count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
622count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) 624count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
623{-# INLINABLE count #-} 625{-# INLINABLE count #-}
624 626
627
628{-| Consume the first character from a stream of 'Text'
629
630 'next' either fails with a 'Left' if the 'Producer' has no more characters or
631 succeeds with a 'Right' providing the next character and the remainder of the
632 'Producer'.
633-}
634nextChar
635 :: (Monad m)
636 => Producer Text m r
637 -> m (Either r (Char, Producer Text m r))
638nextChar = go
639 where
640 go p = do
641 x <- next p
642 case x of
643 Left r -> return (Left r)
644 Right (txt, p') -> case (T.uncons txt) of
645 Nothing -> go p'
646 Just (c, txt') -> return (Right (c, yield txt' >> p'))
647{-# INLINABLE nextChar #-}
648
649{-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
650 'Producer' is empty
651-}
652drawChar :: (Monad m) => Parser Text m (Maybe Char)
653drawChar = do
654 x <- PP.draw
655 case x of
656 Nothing -> return Nothing
657 Just txt -> case (T.uncons txt) of
658 Nothing -> drawChar
659 Just (c, txt') -> do
660 PP.unDraw txt'
661 return (Just c)
662{-# INLINABLE drawChar #-}
663
664-- | Push back a 'Char' onto the underlying 'Producer'
665unDrawChar :: (Monad m) => Char -> Parser Text m ()
666unDrawChar c = modify (yield (T.singleton c) >>)
667{-# INLINABLE unDrawChar #-}
668
669{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
670 push the 'Char' back
671
672> peekChar = do
673> x <- drawChar
674> case x of
675> Left _ -> return ()
676> Right c -> unDrawChar c
677> return x
678-}
679peekChar :: (Monad m) => Parser Text m (Maybe Char)
680peekChar = do
681 x <- drawChar
682 case x of
683 Nothing -> return ()
684 Just c -> unDrawChar c
685 return x
686{-# INLINABLE peekChar #-}
687
688{-| Check if the underlying 'Producer' has no more characters
689
690 Note that this will skip over empty 'Text' chunks, unlike
691 'PP.isEndOfInput' from @pipes-parse@, which would consider
692 an empty 'Text' a valid bit of input.
693
694> isEndOfChars = liftM isLeft peekChar
695-}
696isEndOfChars :: (Monad m) => Parser Text m Bool
697isEndOfChars = do
698 x <- peekChar
699 return (case x of
700 Nothing -> True
701 Just _-> False )
702{-# INLINABLE isEndOfChars #-}
703
704
705
706
707
625-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text 708-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
626-- returning a Pipe of ByteStrings that begins at the point of failure. 709-- returning a Pipe of ByteStrings that begins at the point of failure.
627 710
628decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) 711decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
629decodeUtf8 = go B.empty PE.streamDecodeUtf8 where 712 (Producer Text m (Producer ByteString m r))
713decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
714 (k (go B.empty PE.streamDecodeUtf8 p0)) where
630 go !carry dec0 p = do 715 go !carry dec0 p = do
631 x <- lift (next p) 716 x <- lift (next p)
632 case x of Left r -> if B.null carry 717 case x of Left r -> if B.null carry
@@ -647,9 +732,9 @@ decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
647splitAt 732splitAt
648 :: (Monad m, Integral n) 733 :: (Monad m, Integral n)
649 => n 734 => n
650 -> Producer Text m r 735 -> Lens' (Producer Text m r)
651 -> Producer' Text m (Producer Text m r) 736 (Producer Text m (Producer Text m r))
652splitAt = go 737splitAt n0 k p0 = fmap join (k (go n0 p0))
653 where 738 where
654 go 0 p = return p 739 go 0 p = return p
655 go n p = do 740 go n p = do
@@ -671,15 +756,16 @@ splitAt = go
671-- | Split a text stream into 'FreeT'-delimited text streams of fixed size 756-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
672chunksOf 757chunksOf
673 :: (Monad m, Integral n) 758 :: (Monad m, Integral n)
674 => n -> Producer Text m r -> FreeT (Producer Text m) m r 759 => n -> Lens' (Producer Text m r)
675chunksOf n p0 = PP.FreeT (go p0) 760 (FreeT (Producer Text m) m r)
761chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
676 where 762 where
677 go p = do 763 go p = do
678 x <- next p 764 x <- next p
679 return $ case x of 765 return $ case x of
680 Left r -> PP.Pure r 766 Left r -> PP.Pure r
681 Right (txt, p') -> PP.Free $ do 767 Right (txt, p') -> PP.Free $ do
682 p'' <- splitAt n (yield txt >> p') 768 p'' <- (yield txt >> p') ^. splitAt n
683 return $ PP.FreeT (go p'') 769 return $ PP.FreeT (go p'')
684{-# INLINABLE chunksOf #-} 770{-# INLINABLE chunksOf #-}
685 771
@@ -689,9 +775,9 @@ chunksOf n p0 = PP.FreeT (go p0)
689span 775span
690 :: (Monad m) 776 :: (Monad m)
691 => (Char -> Bool) 777 => (Char -> Bool)
692 -> Producer Text m r 778 -> Lens' (Producer Text m r)
693 -> Producer' Text m (Producer Text m r) 779 (Producer Text m (Producer Text m r))
694span predicate = go 780span predicate k p0 = fmap join (k (go p0))
695 where 781 where
696 go p = do 782 go p = do
697 x <- lift (next p) 783 x <- lift (next p)
@@ -714,11 +800,98 @@ span predicate = go
714break 800break
715 :: (Monad m) 801 :: (Monad m)
716 => (Char -> Bool) 802 => (Char -> Bool)
717 -> Producer Text m r 803 -> Lens' (Producer Text m r)
718 -> Producer Text m (Producer Text m r) 804 (Producer Text m (Producer Text m r))
719break predicate = span (not . predicate) 805break predicate = span (not . predicate)
720{-# INLINABLE break #-} 806{-# INLINABLE break #-}
721 807
808{-| Improper lens that splits after the first group of equivalent Chars, as
809 defined by the given equivalence relation
810-}
811groupBy
812 :: (Monad m)
813 => (Char -> Char -> Bool)
814 -> Lens' (Producer Text m r)
815 (Producer Text m (Producer Text m r))
816groupBy equals k p0 = fmap join (k ((go p0))) where
817 go p = do
818 x <- lift (next p)
819 case x of
820 Left r -> return (return r)
821 Right (txt, p') -> case T.uncons txt of
822 Nothing -> go p'
823 Just (c, _) -> (yield txt >> p') ^. span (equals c)
824{-# INLINABLE groupBy #-}
825
826-- | Improper lens that splits after the first succession of identical 'Char' s
827group :: Monad m
828 => Lens' (Producer Text m r)
829 (Producer Text m (Producer Text m r))
830group = groupBy (==)
831{-# INLINABLE group #-}
832
833{-| Improper lens that splits a 'Producer' after the first word
834
835 Unlike 'words', this does not drop leading whitespace
836-}
837word :: (Monad m)
838 => Lens' (Producer Text m r)
839 (Producer Text m (Producer Text m r))
840word k p0 = fmap join (k (to p0))
841 where
842 to p = do
843 p' <- p^.span isSpace
844 p'^.break isSpace
845{-# INLINABLE word #-}
846
847
848line :: (Monad m)
849 => Lens' (Producer Text m r)
850 (Producer Text m (Producer Text m r))
851line = break (== '\n')
852
853{-# INLINABLE line #-}
854
855
856-- | Intersperse a 'Char' in between the characters of stream of 'Text'
857intersperse
858 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
859intersperse c = go0
860 where
861 go0 p = do
862 x <- lift (next p)
863 case x of
864 Left r -> return r
865 Right (txt, p') -> do
866 yield (T.intersperse c txt)
867 go1 p'
868 go1 p = do
869 x <- lift (next p)
870 case x of
871 Left r -> return r
872 Right (txt, p') -> do
873 yield (T.singleton c)
874 yield (T.intersperse c txt)
875 go1 p'
876{-# INLINABLE intersperse #-}
877
878
879
880-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
881packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
882packChars = Data.Profunctor.dimap to (fmap from)
883 where
884 -- to :: Monad m => Producer Char m x -> Producer Text m x
885 to p = PP.folds step id done (p^.PP.chunksOf defaultChunkSize)
886
887 step diffAs c = diffAs . (c:)
888
889 done diffAs = T.pack (diffAs [])
890
891 -- from :: Monad m => Producer Text m x -> Producer Char m x
892 from p = for p (each . T.unpack)
893{-# INLINABLE packChars #-}
894
722{-| Split a text stream into sub-streams delimited by characters that satisfy the 895{-| Split a text stream into sub-streams delimited by characters that satisfy the
723 predicate 896 predicate
724-} 897-}
@@ -737,14 +910,14 @@ splitsWith predicate p0 = PP.FreeT (go0 p0)
737 if (T.null txt) 910 if (T.null txt)
738 then go0 p' 911 then go0 p'
739 else return $ PP.Free $ do 912 else return $ PP.Free $ do
740 p'' <- span (not . predicate) (yield txt >> p') 913 p'' <- (yield txt >> p') ^. span (not . predicate)
741 return $ PP.FreeT (go1 p'') 914 return $ PP.FreeT (go1 p'')
742 go1 p = do 915 go1 p = do
743 x <- nextChar p 916 x <- nextChar p
744 return $ case x of 917 return $ case x of
745 Left r -> PP.Pure r 918 Left r -> PP.Pure r
746 Right (_, p') -> PP.Free $ do 919 Right (_, p') -> PP.Free $ do
747 p'' <- span (not . predicate) p' 920 p'' <- p' ^. span (not . predicate)
748 return $ PP.FreeT (go1 p'') 921 return $ PP.FreeT (go1 p'')
749{-# INLINABLE splitsWith #-} 922{-# INLINABLE splitsWith #-}
750 923
@@ -756,33 +929,6 @@ split :: (Monad m)
756split c = splitsWith (c ==) 929split c = splitsWith (c ==)
757{-# INLINABLE split #-} 930{-# INLINABLE split #-}
758 931
759{-| Group a text stream into 'FreeT'-delimited text streams using the supplied
760 equality predicate
761-}
762groupBy
763 :: (Monad m)
764 => (Char -> Char -> Bool)
765 -> Producer Text m r
766 -> FreeT (Producer Text m) m r
767groupBy equal p0 = PP.FreeT (go p0)
768 where
769 go p = do
770 x <- next p
771 case x of
772 Left r -> return (PP.Pure r)
773 Right (txt, p') -> case (T.uncons txt) of
774 Nothing -> go p'
775 Just (c, _) -> do
776 return $ PP.Free $ do
777 p'' <- span (equal c) (yield txt >> p')
778 return $ PP.FreeT (go p'')
779{-# INLINABLE groupBy #-}
780
781-- | Group a text stream into 'FreeT'-delimited text streams of identical characters
782group
783 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
784group = groupBy (==)
785{-# INLINABLE group #-}
786 932
787{-| Split a text stream into 'FreeT'-delimited lines 933{-| Split a text stream into 'FreeT'-delimited lines
788-} 934-}
@@ -799,7 +945,7 @@ lines p0 = PP.FreeT (go0 p0)
799 then go0 p' 945 then go0 p'
800 else return $ PP.Free $ go1 (yield txt >> p') 946 else return $ PP.Free $ go1 (yield txt >> p')
801 go1 p = do 947 go1 p = do
802 p' <- break ('\n' ==) p 948 p' <- p ^. break ('\n' ==)
803 return $ PP.FreeT $ do 949 return $ PP.FreeT $ do
804 x <- nextChar p' 950 x <- nextChar p'
805 case x of 951 case x of
@@ -819,32 +965,13 @@ words = go
819 return $ case x of 965 return $ case x of
820 Left r -> PP.Pure r 966 Left r -> PP.Pure r
821 Right (bs, p') -> PP.Free $ do 967 Right (bs, p') -> PP.Free $ do
822 p'' <- break isSpace (yield bs >> p') 968 p'' <- (yield bs >> p') ^. break isSpace
823 return (go p'') 969 return (go p'')
824{-# INLINABLE words #-} 970{-# INLINABLE words #-}
825 971
826 972
827-- | Intersperse a 'Char' in between the characters of the text stream 973
828intersperse 974
829 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
830intersperse c = go0
831 where
832 go0 p = do
833 x <- lift (next p)
834 case x of
835 Left r -> return r
836 Right (txt, p') -> do
837 yield (T.intersperse c txt)
838 go1 p'
839 go1 p = do
840 x <- lift (next p)
841 case x of
842 Left r -> return r
843 Right (txt, p') -> do
844 yield (T.singleton c)
845 yield (T.intersperse c txt)
846 go1 p'
847{-# INLINABLE intersperse #-}
848 975
849{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after 976{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
850 interspersing a text stream in between them 977 interspersing a text stream in between them
diff --git a/Pipes/Text/Parse.hs b/Pipes/Text/Parse.hs
index 9cabaa6..317f85d 100644
--- a/Pipes/Text/Parse.hs
+++ b/Pipes/Text/Parse.hs
@@ -41,7 +41,7 @@ nextChar = go
41 Just (c, txt') -> return (Right (c, yield txt' >> p')) 41 Just (c, txt') -> return (Right (c, yield txt' >> p'))
42{-# INLINABLE nextChar #-} 42{-# INLINABLE nextChar #-}
43 43
44{-| Draw one 'Char' from the underlying 'Producer', returning 'Left' if the 44{-| Draw one 'Char' from the underlying 'Producer', returning 'Nothing' if the
45 'Producer' is empty 45 'Producer' is empty
46-} 46-}
47drawChar :: (Monad m) => StateT (Producer Text m r) m (Maybe Char) 47drawChar :: (Monad m) => StateT (Producer Text m r) m (Maybe Char)