diff options
-rw-r--r-- | Pipes/Text.hs | 273 | ||||
-rw-r--r-- | Pipes/Text/Parse.hs | 2 |
2 files changed, 201 insertions, 74 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs index 4df2b5d..74576e8 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs | |||
@@ -113,16 +113,16 @@ module Pipes.Text ( | |||
113 | , drawChar | 113 | , drawChar |
114 | , unDrawChar | 114 | , unDrawChar |
115 | , peekChar | 115 | , peekChar |
116 | , isEndOfChars, | 116 | , isEndOfChars |
117 | 117 | ||
118 | -- * Parsing Lenses | 118 | -- * Parsing Lenses |
119 | splitAt | 119 | , splitAt |
120 | , span | 120 | , span |
121 | , break | 121 | , break |
122 | , groupBy | 122 | , groupBy |
123 | , group | 123 | , group |
124 | -- , word | 124 | , word |
125 | -- , line | 125 | , line |
126 | , decodeUtf8 | 126 | , decodeUtf8 |
127 | , decode | 127 | , decode |
128 | 128 | ||
@@ -138,12 +138,13 @@ module Pipes.Text ( | |||
138 | 138 | ||
139 | -- * Transformations | 139 | -- * Transformations |
140 | , intersperse | 140 | , intersperse |
141 | -- , packChars | 141 | , packChars |
142 | 142 | ||
143 | -- * Joiners | 143 | -- * Joiners |
144 | , intercalate | 144 | , intercalate |
145 | , unlines | 145 | , unlines |
146 | , unwords | 146 | , unwords |
147 | |||
147 | -- * Re-exports | 148 | -- * Re-exports |
148 | -- $reexports | 149 | -- $reexports |
149 | , module Data.ByteString | 150 | , module Data.ByteString |
@@ -155,7 +156,7 @@ module Pipes.Text ( | |||
155 | 156 | ||
156 | import Control.Exception (throwIO, try) | 157 | import Control.Exception (throwIO, try) |
157 | import Control.Monad (liftM, unless, join) | 158 | import Control.Monad (liftM, unless, join) |
158 | import Control.Monad.Trans.State.Strict (StateT(..)) | 159 | import Control.Monad.Trans.State.Strict (StateT(..), modify) |
159 | import Data.Monoid ((<>)) | 160 | import Data.Monoid ((<>)) |
160 | import qualified Data.Text as T | 161 | import qualified Data.Text as T |
161 | import qualified Data.Text.IO as T | 162 | import qualified Data.Text.IO as T |
@@ -180,10 +181,11 @@ import Pipes | |||
180 | import qualified Pipes.ByteString as PB | 181 | import qualified Pipes.ByteString as PB |
181 | import qualified Pipes.Text.Internal as PE | 182 | import qualified Pipes.Text.Internal as PE |
182 | import Pipes.Text.Internal (Codec(..)) | 183 | import Pipes.Text.Internal (Codec(..)) |
183 | import Pipes.Text.Parse (nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) | 184 | -- import Pipes.Text.Parse (nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) |
185 | |||
184 | import Pipes.Core (respond, Server') | 186 | import Pipes.Core (respond, Server') |
185 | import qualified Pipes.Parse as PP | 187 | import qualified Pipes.Parse as PP |
186 | import Pipes.Parse (Parser, concats, intercalates, FreeT) | 188 | import Pipes.Parse (Parser, concats, intercalates, FreeT(..)) |
187 | import qualified Pipes.Safe.Prelude as Safe | 189 | import qualified Pipes.Safe.Prelude as Safe |
188 | import qualified Pipes.Safe as Safe | 190 | import qualified Pipes.Safe as Safe |
189 | import Pipes.Safe (MonadSafe(..), Base(..)) | 191 | import Pipes.Safe (MonadSafe(..), Base(..)) |
@@ -622,11 +624,94 @@ count :: (Monad m, Num n) => Text -> Producer Text m () -> m n | |||
622 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | 624 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) |
623 | {-# INLINABLE count #-} | 625 | {-# INLINABLE count #-} |
624 | 626 | ||
627 | |||
628 | {-| Consume the first character from a stream of 'Text' | ||
629 | |||
630 | 'next' either fails with a 'Left' if the 'Producer' has no more characters or | ||
631 | succeeds with a 'Right' providing the next character and the remainder of the | ||
632 | 'Producer'. | ||
633 | -} | ||
634 | nextChar | ||
635 | :: (Monad m) | ||
636 | => Producer Text m r | ||
637 | -> m (Either r (Char, Producer Text m r)) | ||
638 | nextChar = go | ||
639 | where | ||
640 | go p = do | ||
641 | x <- next p | ||
642 | case x of | ||
643 | Left r -> return (Left r) | ||
644 | Right (txt, p') -> case (T.uncons txt) of | ||
645 | Nothing -> go p' | ||
646 | Just (c, txt') -> return (Right (c, yield txt' >> p')) | ||
647 | {-# INLINABLE nextChar #-} | ||
648 | |||
649 | {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the | ||
650 | 'Producer' is empty | ||
651 | -} | ||
652 | drawChar :: (Monad m) => Parser Text m (Maybe Char) | ||
653 | drawChar = do | ||
654 | x <- PP.draw | ||
655 | case x of | ||
656 | Nothing -> return Nothing | ||
657 | Just txt -> case (T.uncons txt) of | ||
658 | Nothing -> drawChar | ||
659 | Just (c, txt') -> do | ||
660 | PP.unDraw txt' | ||
661 | return (Just c) | ||
662 | {-# INLINABLE drawChar #-} | ||
663 | |||
664 | -- | Push back a 'Char' onto the underlying 'Producer' | ||
665 | unDrawChar :: (Monad m) => Char -> Parser Text m () | ||
666 | unDrawChar c = modify (yield (T.singleton c) >>) | ||
667 | {-# INLINABLE unDrawChar #-} | ||
668 | |||
669 | {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to | ||
670 | push the 'Char' back | ||
671 | |||
672 | > peekChar = do | ||
673 | > x <- drawChar | ||
674 | > case x of | ||
675 | > Left _ -> return () | ||
676 | > Right c -> unDrawChar c | ||
677 | > return x | ||
678 | -} | ||
679 | peekChar :: (Monad m) => Parser Text m (Maybe Char) | ||
680 | peekChar = do | ||
681 | x <- drawChar | ||
682 | case x of | ||
683 | Nothing -> return () | ||
684 | Just c -> unDrawChar c | ||
685 | return x | ||
686 | {-# INLINABLE peekChar #-} | ||
687 | |||
688 | {-| Check if the underlying 'Producer' has no more characters | ||
689 | |||
690 | Note that this will skip over empty 'Text' chunks, unlike | ||
691 | 'PP.isEndOfInput' from @pipes-parse@, which would consider | ||
692 | an empty 'Text' a valid bit of input. | ||
693 | |||
694 | > isEndOfChars = liftM isLeft peekChar | ||
695 | -} | ||
696 | isEndOfChars :: (Monad m) => Parser Text m Bool | ||
697 | isEndOfChars = do | ||
698 | x <- peekChar | ||
699 | return (case x of | ||
700 | Nothing -> True | ||
701 | Just _-> False ) | ||
702 | {-# INLINABLE isEndOfChars #-} | ||
703 | |||
704 | |||
705 | |||
706 | |||
707 | |||
625 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text | 708 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text |
626 | -- returning a Pipe of ByteStrings that begins at the point of failure. | 709 | -- returning a Pipe of ByteStrings that begins at the point of failure. |
627 | 710 | ||
628 | decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | 711 | decodeUtf8 :: Monad m => Lens' (Producer ByteString m r) |
629 | decodeUtf8 = go B.empty PE.streamDecodeUtf8 where | 712 | (Producer Text m (Producer ByteString m r)) |
713 | decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8))) | ||
714 | (k (go B.empty PE.streamDecodeUtf8 p0)) where | ||
630 | go !carry dec0 p = do | 715 | go !carry dec0 p = do |
631 | x <- lift (next p) | 716 | x <- lift (next p) |
632 | case x of Left r -> if B.null carry | 717 | case x of Left r -> if B.null carry |
@@ -647,9 +732,9 @@ decodeUtf8 = go B.empty PE.streamDecodeUtf8 where | |||
647 | splitAt | 732 | splitAt |
648 | :: (Monad m, Integral n) | 733 | :: (Monad m, Integral n) |
649 | => n | 734 | => n |
650 | -> Producer Text m r | 735 | -> Lens' (Producer Text m r) |
651 | -> Producer' Text m (Producer Text m r) | 736 | (Producer Text m (Producer Text m r)) |
652 | splitAt = go | 737 | splitAt n0 k p0 = fmap join (k (go n0 p0)) |
653 | where | 738 | where |
654 | go 0 p = return p | 739 | go 0 p = return p |
655 | go n p = do | 740 | go n p = do |
@@ -671,15 +756,16 @@ splitAt = go | |||
671 | -- | Split a text stream into 'FreeT'-delimited text streams of fixed size | 756 | -- | Split a text stream into 'FreeT'-delimited text streams of fixed size |
672 | chunksOf | 757 | chunksOf |
673 | :: (Monad m, Integral n) | 758 | :: (Monad m, Integral n) |
674 | => n -> Producer Text m r -> FreeT (Producer Text m) m r | 759 | => n -> Lens' (Producer Text m r) |
675 | chunksOf n p0 = PP.FreeT (go p0) | 760 | (FreeT (Producer Text m) m r) |
761 | chunksOf n k p0 = fmap concats (k (FreeT (go p0))) | ||
676 | where | 762 | where |
677 | go p = do | 763 | go p = do |
678 | x <- next p | 764 | x <- next p |
679 | return $ case x of | 765 | return $ case x of |
680 | Left r -> PP.Pure r | 766 | Left r -> PP.Pure r |
681 | Right (txt, p') -> PP.Free $ do | 767 | Right (txt, p') -> PP.Free $ do |
682 | p'' <- splitAt n (yield txt >> p') | 768 | p'' <- (yield txt >> p') ^. splitAt n |
683 | return $ PP.FreeT (go p'') | 769 | return $ PP.FreeT (go p'') |
684 | {-# INLINABLE chunksOf #-} | 770 | {-# INLINABLE chunksOf #-} |
685 | 771 | ||
@@ -689,9 +775,9 @@ chunksOf n p0 = PP.FreeT (go p0) | |||
689 | span | 775 | span |
690 | :: (Monad m) | 776 | :: (Monad m) |
691 | => (Char -> Bool) | 777 | => (Char -> Bool) |
692 | -> Producer Text m r | 778 | -> Lens' (Producer Text m r) |
693 | -> Producer' Text m (Producer Text m r) | 779 | (Producer Text m (Producer Text m r)) |
694 | span predicate = go | 780 | span predicate k p0 = fmap join (k (go p0)) |
695 | where | 781 | where |
696 | go p = do | 782 | go p = do |
697 | x <- lift (next p) | 783 | x <- lift (next p) |
@@ -714,11 +800,98 @@ span predicate = go | |||
714 | break | 800 | break |
715 | :: (Monad m) | 801 | :: (Monad m) |
716 | => (Char -> Bool) | 802 | => (Char -> Bool) |
717 | -> Producer Text m r | 803 | -> Lens' (Producer Text m r) |
718 | -> Producer Text m (Producer Text m r) | 804 | (Producer Text m (Producer Text m r)) |
719 | break predicate = span (not . predicate) | 805 | break predicate = span (not . predicate) |
720 | {-# INLINABLE break #-} | 806 | {-# INLINABLE break #-} |
721 | 807 | ||
808 | {-| Improper lens that splits after the first group of equivalent Chars, as | ||
809 | defined by the given equivalence relation | ||
810 | -} | ||
811 | groupBy | ||
812 | :: (Monad m) | ||
813 | => (Char -> Char -> Bool) | ||
814 | -> Lens' (Producer Text m r) | ||
815 | (Producer Text m (Producer Text m r)) | ||
816 | groupBy equals k p0 = fmap join (k ((go p0))) where | ||
817 | go p = do | ||
818 | x <- lift (next p) | ||
819 | case x of | ||
820 | Left r -> return (return r) | ||
821 | Right (txt, p') -> case T.uncons txt of | ||
822 | Nothing -> go p' | ||
823 | Just (c, _) -> (yield txt >> p') ^. span (equals c) | ||
824 | {-# INLINABLE groupBy #-} | ||
825 | |||
826 | -- | Improper lens that splits after the first succession of identical 'Char' s | ||
827 | group :: Monad m | ||
828 | => Lens' (Producer Text m r) | ||
829 | (Producer Text m (Producer Text m r)) | ||
830 | group = groupBy (==) | ||
831 | {-# INLINABLE group #-} | ||
832 | |||
833 | {-| Improper lens that splits a 'Producer' after the first word | ||
834 | |||
835 | Unlike 'words', this does not drop leading whitespace | ||
836 | -} | ||
837 | word :: (Monad m) | ||
838 | => Lens' (Producer Text m r) | ||
839 | (Producer Text m (Producer Text m r)) | ||
840 | word k p0 = fmap join (k (to p0)) | ||
841 | where | ||
842 | to p = do | ||
843 | p' <- p^.span isSpace | ||
844 | p'^.break isSpace | ||
845 | {-# INLINABLE word #-} | ||
846 | |||
847 | |||
848 | line :: (Monad m) | ||
849 | => Lens' (Producer Text m r) | ||
850 | (Producer Text m (Producer Text m r)) | ||
851 | line = break (== '\n') | ||
852 | |||
853 | {-# INLINABLE line #-} | ||
854 | |||
855 | |||
856 | -- | Intersperse a 'Char' in between the characters of stream of 'Text' | ||
857 | intersperse | ||
858 | :: (Monad m) => Char -> Producer Text m r -> Producer Text m r | ||
859 | intersperse c = go0 | ||
860 | where | ||
861 | go0 p = do | ||
862 | x <- lift (next p) | ||
863 | case x of | ||
864 | Left r -> return r | ||
865 | Right (txt, p') -> do | ||
866 | yield (T.intersperse c txt) | ||
867 | go1 p' | ||
868 | go1 p = do | ||
869 | x <- lift (next p) | ||
870 | case x of | ||
871 | Left r -> return r | ||
872 | Right (txt, p') -> do | ||
873 | yield (T.singleton c) | ||
874 | yield (T.intersperse c txt) | ||
875 | go1 p' | ||
876 | {-# INLINABLE intersperse #-} | ||
877 | |||
878 | |||
879 | |||
880 | -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's | ||
881 | packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x) | ||
882 | packChars = Data.Profunctor.dimap to (fmap from) | ||
883 | where | ||
884 | -- to :: Monad m => Producer Char m x -> Producer Text m x | ||
885 | to p = PP.folds step id done (p^.PP.chunksOf defaultChunkSize) | ||
886 | |||
887 | step diffAs c = diffAs . (c:) | ||
888 | |||
889 | done diffAs = T.pack (diffAs []) | ||
890 | |||
891 | -- from :: Monad m => Producer Text m x -> Producer Char m x | ||
892 | from p = for p (each . T.unpack) | ||
893 | {-# INLINABLE packChars #-} | ||
894 | |||
722 | {-| Split a text stream into sub-streams delimited by characters that satisfy the | 895 | {-| Split a text stream into sub-streams delimited by characters that satisfy the |
723 | predicate | 896 | predicate |
724 | -} | 897 | -} |
@@ -737,14 +910,14 @@ splitsWith predicate p0 = PP.FreeT (go0 p0) | |||
737 | if (T.null txt) | 910 | if (T.null txt) |
738 | then go0 p' | 911 | then go0 p' |
739 | else return $ PP.Free $ do | 912 | else return $ PP.Free $ do |
740 | p'' <- span (not . predicate) (yield txt >> p') | 913 | p'' <- (yield txt >> p') ^. span (not . predicate) |
741 | return $ PP.FreeT (go1 p'') | 914 | return $ PP.FreeT (go1 p'') |
742 | go1 p = do | 915 | go1 p = do |
743 | x <- nextChar p | 916 | x <- nextChar p |
744 | return $ case x of | 917 | return $ case x of |
745 | Left r -> PP.Pure r | 918 | Left r -> PP.Pure r |
746 | Right (_, p') -> PP.Free $ do | 919 | Right (_, p') -> PP.Free $ do |
747 | p'' <- span (not . predicate) p' | 920 | p'' <- p' ^. span (not . predicate) |
748 | return $ PP.FreeT (go1 p'') | 921 | return $ PP.FreeT (go1 p'') |
749 | {-# INLINABLE splitsWith #-} | 922 | {-# INLINABLE splitsWith #-} |
750 | 923 | ||
@@ -756,33 +929,6 @@ split :: (Monad m) | |||
756 | split c = splitsWith (c ==) | 929 | split c = splitsWith (c ==) |
757 | {-# INLINABLE split #-} | 930 | {-# INLINABLE split #-} |
758 | 931 | ||
759 | {-| Group a text stream into 'FreeT'-delimited text streams using the supplied | ||
760 | equality predicate | ||
761 | -} | ||
762 | groupBy | ||
763 | :: (Monad m) | ||
764 | => (Char -> Char -> Bool) | ||
765 | -> Producer Text m r | ||
766 | -> FreeT (Producer Text m) m r | ||
767 | groupBy equal p0 = PP.FreeT (go p0) | ||
768 | where | ||
769 | go p = do | ||
770 | x <- next p | ||
771 | case x of | ||
772 | Left r -> return (PP.Pure r) | ||
773 | Right (txt, p') -> case (T.uncons txt) of | ||
774 | Nothing -> go p' | ||
775 | Just (c, _) -> do | ||
776 | return $ PP.Free $ do | ||
777 | p'' <- span (equal c) (yield txt >> p') | ||
778 | return $ PP.FreeT (go p'') | ||
779 | {-# INLINABLE groupBy #-} | ||
780 | |||
781 | -- | Group a text stream into 'FreeT'-delimited text streams of identical characters | ||
782 | group | ||
783 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | ||
784 | group = groupBy (==) | ||
785 | {-# INLINABLE group #-} | ||
786 | 932 | ||
787 | {-| Split a text stream into 'FreeT'-delimited lines | 933 | {-| Split a text stream into 'FreeT'-delimited lines |
788 | -} | 934 | -} |
@@ -799,7 +945,7 @@ lines p0 = PP.FreeT (go0 p0) | |||
799 | then go0 p' | 945 | then go0 p' |
800 | else return $ PP.Free $ go1 (yield txt >> p') | 946 | else return $ PP.Free $ go1 (yield txt >> p') |
801 | go1 p = do | 947 | go1 p = do |
802 | p' <- break ('\n' ==) p | 948 | p' <- p ^. break ('\n' ==) |
803 | return $ PP.FreeT $ do | 949 | return $ PP.FreeT $ do |
804 | x <- nextChar p' | 950 | x <- nextChar p' |
805 | case x of | 951 | case x of |
@@ -819,32 +965,13 @@ words = go | |||
819 | return $ case x of | 965 | return $ case x of |
820 | Left r -> PP.Pure r | 966 | Left r -> PP.Pure r |
821 | Right (bs, p') -> PP.Free $ do | 967 | Right (bs, p') -> PP.Free $ do |
822 | p'' <- break isSpace (yield bs >> p') | 968 | p'' <- (yield bs >> p') ^. break isSpace |
823 | return (go p'') | 969 | return (go p'') |
824 | {-# INLINABLE words #-} | 970 | {-# INLINABLE words #-} |
825 | 971 | ||
826 | 972 | ||
827 | -- | Intersperse a 'Char' in between the characters of the text stream | 973 | |
828 | intersperse | 974 | |
829 | :: (Monad m) => Char -> Producer Text m r -> Producer Text m r | ||
830 | intersperse c = go0 | ||
831 | where | ||
832 | go0 p = do | ||
833 | x <- lift (next p) | ||
834 | case x of | ||
835 | Left r -> return r | ||
836 | Right (txt, p') -> do | ||
837 | yield (T.intersperse c txt) | ||
838 | go1 p' | ||
839 | go1 p = do | ||
840 | x <- lift (next p) | ||
841 | case x of | ||
842 | Left r -> return r | ||
843 | Right (txt, p') -> do | ||
844 | yield (T.singleton c) | ||
845 | yield (T.intersperse c txt) | ||
846 | go1 p' | ||
847 | {-# INLINABLE intersperse #-} | ||
848 | 975 | ||
849 | {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after | 976 | {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after |
850 | interspersing a text stream in between them | 977 | interspersing a text stream in between them |
diff --git a/Pipes/Text/Parse.hs b/Pipes/Text/Parse.hs index 9cabaa6..317f85d 100644 --- a/Pipes/Text/Parse.hs +++ b/Pipes/Text/Parse.hs | |||
@@ -41,7 +41,7 @@ nextChar = go | |||
41 | Just (c, txt') -> return (Right (c, yield txt' >> p')) | 41 | Just (c, txt') -> return (Right (c, yield txt' >> p')) |
42 | {-# INLINABLE nextChar #-} | 42 | {-# INLINABLE nextChar #-} |
43 | 43 | ||
44 | {-| Draw one 'Char' from the underlying 'Producer', returning 'Left' if the | 44 | {-| Draw one 'Char' from the underlying 'Producer', returning 'Nothing' if the |
45 | 'Producer' is empty | 45 | 'Producer' is empty |
46 | -} | 46 | -} |
47 | drawChar :: (Monad m) => StateT (Producer Text m r) m (Maybe Char) | 47 | drawChar :: (Monad m) => StateT (Producer Text m r) m (Maybe Char) |