X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=Pipes%2FText.hs;h=68ede1a00b3e3bf477ac8d8ac74fa06b2a7cb94c;hb=4989a35706a2193bd5e0e2e98135d6aecd76cea1;hp=3d119fe072cab8f70921d7495ddf2ecddc1bcbbe;hpb=5e3f5409333cf06f79489169195e5cd7031ac4bd;p=github%2Ffretlink%2Ftext-pipes.git diff --git a/Pipes/Text.hs b/Pipes/Text.hs index 3d119fe..68ede1a 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs @@ -1,15 +1,17 @@ -{-# LANGUAGE RankNTypes, TypeFamilies, CPP #-} +{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-} {-| This module provides @pipes@ utilities for \"text streams\", which are - streams of 'Text' chunks. The individual chunks are uniformly @strict@, but - a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can - be associated with a 'Producer' or 'Consumer' according as it is read or written to. + streams of 'Text' chunks. The individual chunks are uniformly @strict@, but + a 'Producer' can be converted to and from lazy 'Text's, though this is generally + unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'. + An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to. To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For example, the following program copies a document from one file to another: > import Pipes -> import qualified Data.Text.Pipes as Text +> import qualified Pipes.Text as Text +> import qualified Pipes.Text.IO as Text > import System.IO > > main = @@ -20,17 +22,18 @@ To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe): > import Pipes -> import qualified Data.Text.Pipes as Text +> import qualified Pipes.Text as Text +> import qualified Pipes.Text.IO as Text > import Pipes.Safe > > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt" You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin' - and 'stdout' proxies, as with the following \"echo\" program: + and 'stdout' pipes, as with the following \"echo\" program: > main = runEffect $ Text.stdin >-> Text.stdout - You can also translate pure lazy 'TL.Text's to and from proxies: + You can also translate pure lazy 'TL.Text's to and from pipes: > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout @@ -52,133 +55,140 @@ To stream from files, the following is perhaps more Prelude-like (note that it u Note that functions in this library are designed to operate on streams that are insensitive to text boundaries. This means that they may freely split - text into smaller texts and /discard empty texts/. However, they will - /never concatenate texts/ in order to provide strict upper bounds on memory - usage. + text into smaller texts, /discard empty texts/. However, apart from the + special case of 'concatMap', they will /never concatenate texts/ in order + to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'. -} module Pipes.Text ( -- * Producers - fromLazy, - stdin, - fromHandle, - readFile, - stdinLn, + fromLazy + -- , stdin + -- , fromHandle + -- , readFile -- * Consumers - stdout, - stdoutLn, - toHandle, - writeFile, + -- , stdout + -- , toHandle + -- , writeFile -- * Pipes - map, - concatMap, - take, - drop, - takeWhile, - dropWhile, - filter, - scan, - encodeUtf8, -#if MIN_VERSION_text(0,11,4) - pipeDecodeUtf8, - pipeDecodeUtf8With, -#endif - pack, - unpack, - toCaseFold, - toLower, - toUpper, - stripStart, + , map + , concatMap + , take + , drop + , takeWhile + , dropWhile + , filter + , scan +-- , encodeUtf8 + , pack + , unpack + , toCaseFold + , toLower + , toUpper + , stripStart -- * Folds - toLazy, - toLazyM, - fold, - head, - last, - null, - length, - any, - all, - maximum, - minimum, - find, - index, - count, + , toLazy + , toLazyM + , foldChars + , head + , last + , null + , length + , any + , all + , maximum + , minimum + , find + , index + , count + + -- * Primitive Character Parsers + -- $parse + , nextChar + , drawChar + , unDrawChar + , peekChar + , isEndOfChars + + -- * Parsing Lenses + , splitAt + , span + , break + , groupBy + , group + , word + , line + + -- -- * Decoding Lenses + -- , decodeUtf8 + -- , codec + -- + -- -- * Codecs + -- , utf8 + -- , utf16_le + -- , utf16_be + -- , utf32_le + -- , utf32_be + -- + -- -- * Other Decoding/Encoding Functions + -- , decodeIso8859_1 + -- , decodeAscii + -- , encodeIso8859_1 + -- , encodeAscii + + -- * FreeT Splitters + , chunksOf + , splitsWith + , splits +-- , groupsBy +-- , groups + , lines + , words - -- * Splitters - splitAt, - chunksOf, - span, - break, - splitWith, - split, - groupBy, - group, - lines, - words, -#if MIN_VERSION_text(0,11,4) - decodeUtf8, - decodeUtf8With, -#endif -- * Transformations - intersperse, + , intersperse + , packChars -- * Joiners - intercalate, - unlines, - unwords, - - -- * Character Parsers - -- $parse - nextChar, - drawChar, - unDrawChar, - peekChar, - isEndOfChars, + , intercalate + , unlines + , unwords - -- * Re-exports + -- * Re-exports -- $reexports - module Data.Text, - module Pipes.Parse + -- , DecodeResult(..) + -- , Codec + -- , TextException(..) + , module Data.ByteString + , module Data.Text + , module Data.Profunctor + , module Pipes.Parse + , module Pipes.Group ) where -import Control.Exception (throwIO, try) -import Control.Monad (liftM, unless) -import Control.Monad.Trans.State.Strict (StateT(..)) +import Control.Applicative ((<*)) +import Control.Monad (liftM, join) +import Control.Monad.Trans.State.Strict (StateT(..), modify) import qualified Data.Text as T -import qualified Data.Text.IO as T -import qualified Data.Text.Encoding as TE -import qualified Data.Text.Encoding.Error as TE import Data.Text (Text) import qualified Data.Text.Lazy as TL -import qualified Data.Text.Lazy.IO as TL import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize) -import Data.ByteString.Unsafe (unsafeTake, unsafeDrop) import Data.ByteString (ByteString) -import qualified Data.ByteString as B -import Data.Char (ord, isSpace) +import Data.Functor.Constant (Constant(Constant, getConstant)) import Data.Functor.Identity (Identity) -import qualified Data.List as List -import Foreign.C.Error (Errno(Errno), ePIPE) -import qualified GHC.IO.Exception as G +import Data.Profunctor (Profunctor) +import qualified Data.Profunctor import Pipes -import qualified Pipes.ByteString as PB -import qualified Pipes.ByteString.Parse as PBP -import Pipes.Text.Parse ( - nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) -import Pipes.Core (respond, Server') +import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) +import qualified Pipes.Group as PG import qualified Pipes.Parse as PP -import Pipes.Parse (input, concat, FreeT) -import qualified Pipes.Safe.Prelude as Safe -import qualified Pipes.Safe as Safe -import Pipes.Safe (MonadSafe(..), Base(..)) +import Pipes.Parse (Parser) import qualified Pipes.Prelude as P -import qualified System.IO as IO import Data.Char (isSpace) -import Data.Word (Word8) + import Prelude hiding ( all, any, @@ -211,119 +221,16 @@ import Prelude hiding ( -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's fromLazy :: (Monad m) => TL.Text -> Producer' Text m () fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) -{-# INLINABLE fromLazy #-} - --- | Stream text from 'stdin' -stdin :: MonadIO m => Producer' Text m () -stdin = fromHandle IO.stdin -{-# INLINABLE stdin #-} - -{-| Convert a 'IO.Handle' into a text stream using a text size - determined by the good sense of the text library. - --} - -fromHandle :: MonadIO m => IO.Handle -> Producer' Text m () -#if MIN_VERSION_text(0,11,4) -fromHandle h = PB.fromHandle h >-> pipeDecodeUtf8 -{-# INLINABLE fromHandle#-} --- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as --- the dedicated Text IO function 'hGetChunk' ; --- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut" --- runs the same as the conduit equivalent, only slightly slower --- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut" - -#else -fromHandle h = go where - go = do txt <- liftIO (T.hGetChunk h) - unless (T.null txt) $ do yield txt - go -{-# INLINABLE fromHandle#-} -#endif -{-| Stream text from a file using Pipes.Safe - ->>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout -MAIN = PUTSTRLN "HELLO WORLD" --} - -readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m () -readFile file = Safe.withFile file IO.ReadMode fromHandle -{-# INLINABLE readFile #-} +{-# INLINE fromLazy #-} -{-| Stream lines of text from stdin (for testing in ghci etc.) ->>> let safely = runSafeT . runEffect ->>> safely $ for Text.stdinLn (lift . lift . print . T.length) -hello -5 -world -5 - --} -stdinLn :: MonadIO m => Producer' Text m () -stdinLn = go where - go = do - eof <- liftIO (IO.hIsEOF IO.stdin) - unless eof $ do - txt <- liftIO (T.hGetLine IO.stdin) - yield txt - go - - -{-| Stream text to 'stdout' - - Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe. - - Note: For best performance, use @(for source (liftIO . putStr))@ instead of - @(source >-> stdout)@ in suitable cases. --} -stdout :: MonadIO m => Consumer' Text m () -stdout = go - where - go = do - txt <- await - x <- liftIO $ try (T.putStr txt) - case x of - Left (G.IOError { G.ioe_type = G.ResourceVanished - , G.ioe_errno = Just ioe }) - | Errno ioe == ePIPE - -> return () - Left e -> liftIO (throwIO e) - Right () -> go -{-# INLINABLE stdout #-} - -stdoutLn :: (MonadIO m) => Consumer' Text m () -stdoutLn = go - where - go = do - str <- await - x <- liftIO $ try (T.putStrLn str) - case x of - Left (G.IOError { G.ioe_type = G.ResourceVanished - , G.ioe_errno = Just ioe }) - | Errno ioe == ePIPE - -> return () - Left e -> liftIO (throwIO e) - Right () -> go -{-# INLINABLE stdoutLn #-} - -{-| Convert a text stream into a 'Handle' - - Note: again, for best performance, where possible use - @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@. --} -toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r -toHandle h = for cat (liftIO . T.hPutStr h) -{-# INLINABLE toHandle #-} +type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) -{-# RULES "p >-> toHandle h" forall p h . - p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt)) - #-} +type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a) +(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b +a ^. lens = getConstant (lens Constant a) --- | Stream text into a file. Uses @pipes-safe@. -writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m () -writeFile file = Safe.withFile file IO.WriteMode toHandle -- | Apply a transformation to each 'Char' in the stream map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r @@ -347,13 +254,13 @@ concatMap f = P.map (T.concatMap f) -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@ -encodeUtf8 :: Monad m => Pipe Text ByteString m r -encodeUtf8 = P.map TE.encodeUtf8 -{-# INLINEABLE encodeUtf8 #-} - -{-# RULES "p >-> encodeUtf8" forall p . - p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt)) - #-} +-- encodeUtf8 :: Monad m => Pipe Text ByteString m r +-- encodeUtf8 = P.map TE.encodeUtf8 +-- {-# INLINEABLE encodeUtf8 #-} +-- +-- {-# RULES "p >-> encodeUtf8" forall p . +-- p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt)) +-- #-} -- | Transform a Pipe of 'String's into one of 'Text' chunks pack :: Monad m => Pipe String Text m r @@ -373,8 +280,8 @@ unpack = for cat (\t -> yield (T.unpack t)) p >-> unpack = for p (\txt -> yield (T.unpack txt)) #-} --- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility, --- here acting on a 'Text' pipe, rather as they would on a lazy text +-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities, +-- here acting as 'Text' pipes, rather as they would on a lazy text toCaseFold :: Monad m => Pipe Text Text m () toCaseFold = P.map T.toCaseFold {-# INLINEABLE toCaseFold #-} @@ -409,7 +316,8 @@ stripStart = do let text = T.stripStart chunk if T.null text then stripStart - else cat + else do yield text + cat {-# INLINEABLE stripStart #-} -- | @(take n)@ only allows @n@ individual characters to pass; @@ -482,13 +390,15 @@ filter predicate = P.map (T.filter predicate) scan :: (Monad m) => (Char -> Char -> Char) -> Char -> Pipe Text Text m r -scan step begin = go begin +scan step begin = do + yield (T.singleton begin) + go begin where go c = do txt <- await let txt' = T.scanl step c txt c' = T.last txt' - yield txt' + yield (T.tail txt') go c' {-# INLINABLE scan #-} @@ -511,11 +421,11 @@ toLazyM = liftM TL.fromChunks . P.toListM {-# INLINABLE toLazyM #-} -- | Reduce the text stream using a strict left fold over characters -fold +foldChars :: Monad m => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r -fold step begin done = P.fold (T.foldl' step) begin done -{-# INLINABLE fold #-} +foldChars step begin done = P.fold (T.foldl' step) begin done +{-# INLINABLE foldChars #-} -- | Retrieve the first 'Char' head :: (Monad m) => Producer Text m () -> m (Maybe Char) @@ -586,6 +496,7 @@ minimum = P.fold step Nothing id Just c -> Just (min c (T.minimum txt)) {-# INLINABLE minimum #-} + -- | Find the first element in the stream that matches the predicate find :: (Monad m) @@ -606,82 +517,93 @@ count :: (Monad m, Num n) => Text -> Producer Text m () -> m n count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) {-# INLINABLE count #-} -#if MIN_VERSION_text(0,11,4) --- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded --- into a Pipe of Text -decodeUtf8 - :: Monad m - => Producer ByteString m r -> Producer Text m (Producer ByteString m r) -decodeUtf8 = go TE.streamDecodeUtf8 - where go dec p = do - x <- lift (next p) - case x of - Left r -> return (return r) - Right (chunk, p') -> do - let TE.Some text l dec' = dec chunk - if B.null l - then do - yield text - go dec' p' - else return $ do - yield l - p' -{-# INLINEABLE decodeUtf8 #-} - --- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded --- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@ --- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\" -decodeUtf8With - :: Monad m - => TE.OnDecodeError - -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) -decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr) - where go dec p = do - x <- lift (next p) - case x of - Left r -> return (return r) - Right (chunk, p') -> do - let TE.Some text l dec' = dec chunk - if B.null l - then do - yield text - go dec' p' - else return $ do - yield l - p' -{-# INLINEABLE decodeUtf8With #-} - --- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise --- with any chunk that contains a sequence of bytes that is unreadable. Otherwise --- only few bytes will only be moved from one chunk to the next before decoding. -pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r -pipeDecodeUtf8 = go TE.streamDecodeUtf8 - where go dec = do chunk <- await - case dec chunk of - TE.Some text l dec' -> do yield text - go dec' -{-# INLINEABLE pipeDecodeUtf8 #-} - --- | A simple pipe from 'ByteString' to 'Text' using a replacement function. -pipeDecodeUtf8With - :: Monad m - => TE.OnDecodeError - -> Pipe ByteString Text m r -pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr) - where go dec = do chunk <- await - case dec chunk of - TE.Some text l dec' -> do yield text - go dec' -{-# INLINEABLE pipeDecodeUtf8With #-} -#endif + +{-| Consume the first character from a stream of 'Text' + + 'next' either fails with a 'Left' if the 'Producer' has no more characters or + succeeds with a 'Right' providing the next character and the remainder of the + 'Producer'. +-} +nextChar + :: (Monad m) + => Producer Text m r + -> m (Either r (Char, Producer Text m r)) +nextChar = go + where + go p = do + x <- next p + case x of + Left r -> return (Left r) + Right (txt, p') -> case (T.uncons txt) of + Nothing -> go p' + Just (c, txt') -> return (Right (c, yield txt' >> p')) +{-# INLINABLE nextChar #-} + +{-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the + 'Producer' is empty +-} +drawChar :: (Monad m) => Parser Text m (Maybe Char) +drawChar = do + x <- PP.draw + case x of + Nothing -> return Nothing + Just txt -> case (T.uncons txt) of + Nothing -> drawChar + Just (c, txt') -> do + PP.unDraw txt' + return (Just c) +{-# INLINABLE drawChar #-} + +-- | Push back a 'Char' onto the underlying 'Producer' +unDrawChar :: (Monad m) => Char -> Parser Text m () +unDrawChar c = modify (yield (T.singleton c) >>) +{-# INLINABLE unDrawChar #-} + +{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to + push the 'Char' back + +> peekChar = do +> x <- drawChar +> case x of +> Left _ -> return () +> Right c -> unDrawChar c +> return x +-} +peekChar :: (Monad m) => Parser Text m (Maybe Char) +peekChar = do + x <- drawChar + case x of + Nothing -> return () + Just c -> unDrawChar c + return x +{-# INLINABLE peekChar #-} + +{-| Check if the underlying 'Producer' has no more characters + + Note that this will skip over empty 'Text' chunks, unlike + 'PP.isEndOfInput' from @pipes-parse@, which would consider + an empty 'Text' a valid bit of input. + +> isEndOfChars = liftM isLeft peekChar +-} +isEndOfChars :: (Monad m) => Parser Text m Bool +isEndOfChars = do + x <- peekChar + return (case x of + Nothing -> True + Just _-> False ) +{-# INLINABLE isEndOfChars #-} + + + -- | Splits a 'Producer' after the given number of characters splitAt :: (Monad m, Integral n) => n - -> Producer Text m r - -> Producer' Text m (Producer Text m r) -splitAt = go + -> Lens' (Producer Text m r) + (Producer Text m (Producer Text m r)) +splitAt n0 k p0 = fmap join (k (go n0 p0)) where go 0 p = return p go n p = do @@ -700,20 +622,6 @@ splitAt = go return (yield suffix >> p') {-# INLINABLE splitAt #-} --- | Split a text stream into 'FreeT'-delimited text streams of fixed size -chunksOf - :: (Monad m, Integral n) - => n -> Producer Text m r -> FreeT (Producer Text m) m r -chunksOf n p0 = PP.FreeT (go p0) - where - go p = do - x <- next p - return $ case x of - Left r -> PP.Pure r - Right (txt, p') -> PP.Free $ do - p'' <- splitAt n (yield txt >> p') - return $ PP.FreeT (go p'') -{-# INLINABLE chunksOf #-} {-| Split a text stream in two, where the first text stream is the longest consecutive group of text that satisfy the predicate @@ -721,9 +629,9 @@ chunksOf n p0 = PP.FreeT (go p0) span :: (Monad m) => (Char -> Bool) - -> Producer Text m r - -> Producer' Text m (Producer Text m r) -span predicate = go + -> Lens' (Producer Text m r) + (Producer Text m (Producer Text m r)) +span predicate k p0 = fmap join (k (go p0)) where go p = do x <- lift (next p) @@ -746,117 +654,60 @@ span predicate = go break :: (Monad m) => (Char -> Bool) - -> Producer Text m r - -> Producer Text m (Producer Text m r) + -> Lens' (Producer Text m r) + (Producer Text m (Producer Text m r)) break predicate = span (not . predicate) {-# INLINABLE break #-} -{-| Split a text stream into sub-streams delimited by characters that satisfy the - predicate --} -splitWith - :: (Monad m) - => (Char -> Bool) - -> Producer Text m r - -> PP.FreeT (Producer Text m) m r -splitWith predicate p0 = PP.FreeT (go0 p0) - where - go0 p = do - x <- next p - case x of - Left r -> return (PP.Pure r) - Right (txt, p') -> - if (T.null txt) - then go0 p' - else return $ PP.Free $ do - p'' <- span (not . predicate) (yield txt >> p') - return $ PP.FreeT (go1 p'') - go1 p = do - x <- nextChar p - return $ case x of - Left r -> PP.Pure r - Right (_, p') -> PP.Free $ do - p'' <- span (not . predicate) p' - return $ PP.FreeT (go1 p'') -{-# INLINABLE splitWith #-} - --- | Split a text stream using the given 'Char' as the delimiter -split :: (Monad m) - => Char - -> Producer Text m r - -> FreeT (Producer Text m) m r -split c = splitWith (c ==) -{-# INLINABLE split #-} - -{-| Group a text stream into 'FreeT'-delimited text streams using the supplied - equality predicate +{-| Improper lens that splits after the first group of equivalent Chars, as + defined by the given equivalence relation -} groupBy :: (Monad m) => (Char -> Char -> Bool) - -> Producer Text m r - -> FreeT (Producer Text m) m r -groupBy equal p0 = PP.FreeT (go p0) - where + -> Lens' (Producer Text m r) + (Producer Text m (Producer Text m r)) +groupBy equals k p0 = fmap join (k ((go p0))) where go p = do - x <- next p + x <- lift (next p) case x of - Left r -> return (PP.Pure r) - Right (txt, p') -> case (T.uncons txt) of + Left r -> return (return r) + Right (txt, p') -> case T.uncons txt of Nothing -> go p' - Just (c, _) -> do - return $ PP.Free $ do - p'' <- span (equal c) (yield txt >> p') - return $ PP.FreeT (go p'') + Just (c, _) -> (yield txt >> p') ^. span (equals c) {-# INLINABLE groupBy #-} --- | Group a text stream into 'FreeT'-delimited text streams of identical characters -group - :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r +-- | Improper lens that splits after the first succession of identical 'Char' s +group :: Monad m + => Lens' (Producer Text m r) + (Producer Text m (Producer Text m r)) group = groupBy (==) {-# INLINABLE group #-} -{-| Split a text stream into 'FreeT'-delimited lines +{-| Improper lens that splits a 'Producer' after the first word + + Unlike 'words', this does not drop leading whitespace -} -lines - :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r -lines p0 = PP.FreeT (go0 p0) +word :: (Monad m) + => Lens' (Producer Text m r) + (Producer Text m (Producer Text m r)) +word k p0 = fmap join (k (to p0)) where - go0 p = do - x <- next p - case x of - Left r -> return (PP.Pure r) - Right (txt, p') -> - if (T.null txt) - then go0 p' - else return $ PP.Free $ go1 (yield txt >> p') - go1 p = do - p' <- break ('\n' ==) p - return $ PP.FreeT $ do - x <- nextChar p' - case x of - Left r -> return $ PP.Pure r - Right (_, p'') -> go0 p'' -{-# INLINABLE lines #-} + to p = do + p' <- p^.span isSpace + p'^.break isSpace +{-# INLINABLE word #-} +line :: (Monad m) + => Lens' (Producer Text m r) + (Producer Text m (Producer Text m r)) +line = break (== '\n') --- | Split a text stream into 'FreeT'-delimited words -words - :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r -words = go - where - go p = PP.FreeT $ do - x <- next (p >-> dropWhile isSpace) - return $ case x of - Left r -> PP.Pure r - Right (bs, p') -> PP.Free $ do - p'' <- break isSpace (yield bs >> p') - return (go p'') -{-# INLINABLE words #-} +{-# INLINABLE line #-} --- | Intersperse a 'Char' in between the characters of the text stream +-- | Intersperse a 'Char' in between the characters of stream of 'Text' intersperse :: (Monad m) => Char -> Producer Text m r -> Producer Text m r intersperse c = go0 @@ -878,6 +729,155 @@ intersperse c = go0 go1 p' {-# INLINABLE intersperse #-} + + +-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's +packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x) +packChars = Data.Profunctor.dimap to (fmap from) + where + -- to :: Monad m => Producer Char m x -> Producer Text m x + to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize) + + step diffAs c = diffAs . (c:) + + done diffAs = T.pack (diffAs []) + + -- from :: Monad m => Producer Text m x -> Producer Char m x + from p = for p (each . T.unpack) +{-# INLINABLE packChars #-} + + +-- | Split a text stream into 'FreeT'-delimited text streams of fixed size +chunksOf + :: (Monad m, Integral n) + => n -> Lens' (Producer Text m r) + (FreeT (Producer Text m) m r) +chunksOf n k p0 = fmap concats (k (FreeT (go p0))) + where + go p = do + x <- next p + return $ case x of + Left r -> Pure r + Right (txt, p') -> Free $ do + p'' <- (yield txt >> p') ^. splitAt n + return $ FreeT (go p'') +{-# INLINABLE chunksOf #-} + + +{-| Split a text stream into sub-streams delimited by characters that satisfy the + predicate +-} +splitsWith + :: (Monad m) + => (Char -> Bool) + -> Producer Text m r + -> FreeT (Producer Text m) m r +splitsWith predicate p0 = FreeT (go0 p0) + where + go0 p = do + x <- next p + case x of + Left r -> return (Pure r) + Right (txt, p') -> + if (T.null txt) + then go0 p' + else return $ Free $ do + p'' <- (yield txt >> p') ^. span (not . predicate) + return $ FreeT (go1 p'') + go1 p = do + x <- nextChar p + return $ case x of + Left r -> Pure r + Right (_, p') -> Free $ do + p'' <- p' ^. span (not . predicate) + return $ FreeT (go1 p'') +{-# INLINABLE splitsWith #-} + +-- | Split a text stream using the given 'Char' as the delimiter +splits :: (Monad m) + => Char + -> Lens' (Producer Text m r) + (FreeT (Producer Text m) m r) +splits c k p = + fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p)) +{-# INLINABLE splits #-} + +{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the + given equivalence relation +-} +groupsBy + :: Monad m + => (Char -> Char -> Bool) + -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x) +groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where + go p = do x <- next p + case x of Left r -> return (Pure r) + Right (bs, p') -> case T.uncons bs of + Nothing -> go p' + Just (c, _) -> do return $ Free $ do + p'' <- (yield bs >> p')^.span (equals c) + return $ FreeT (go p'') +{-# INLINABLE groupsBy #-} + + +-- | Like 'groupsBy', where the equality predicate is ('==') +groups + :: Monad m + => Lens' (Producer Text m x) (FreeT (Producer Text m) m x) +groups = groupsBy (==) +{-# INLINABLE groups #-} + + + +{-| Split a text stream into 'FreeT'-delimited lines +-} +lines + :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r) +lines = Data.Profunctor.dimap _lines (fmap _unlines) + where + _lines p0 = FreeT (go0 p0) + where + go0 p = do + x <- next p + case x of + Left r -> return (Pure r) + Right (txt, p') -> + if (T.null txt) + then go0 p' + else return $ Free $ go1 (yield txt >> p') + go1 p = do + p' <- p ^. break ('\n' ==) + return $ FreeT $ do + x <- nextChar p' + case x of + Left r -> return $ Pure r + Right (_, p'') -> go0 p'' + -- _unlines + -- :: Monad m + -- => FreeT (Producer Text m) m x -> Producer Text m x + _unlines = concats . PG.maps (<* yield (T.singleton '\n')) + + +{-# INLINABLE lines #-} + + +-- | Split a text stream into 'FreeT'-delimited words +words + :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r) +words = Data.Profunctor.dimap go (fmap _unwords) + where + go p = FreeT $ do + x <- next (p >-> dropWhile isSpace) + return $ case x of + Left r -> Pure r + Right (bs, p') -> Free $ do + p'' <- (yield bs >> p') ^. break isSpace + return (go p'') + _unwords = PG.intercalates (yield $ T.singleton ' ') + +{-# INLINABLE words #-} + + {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after interspersing a text stream in between them -} @@ -889,17 +889,17 @@ intercalate intercalate p0 = go0 where go0 f = do - x <- lift (PP.runFreeT f) + x <- lift (runFreeT f) case x of - PP.Pure r -> return r - PP.Free p -> do + Pure r -> return r + Free p -> do f' <- p go1 f' go1 f = do - x <- lift (PP.runFreeT f) + x <- lift (runFreeT f) case x of - PP.Pure r -> return r - PP.Free p -> do + Pure r -> return r + Free p -> do p0 f' <- p go1 f' @@ -912,10 +912,10 @@ unlines unlines = go where go f = do - x <- lift (PP.runFreeT f) + x <- lift (runFreeT f) case x of - PP.Pure r -> return r - PP.Free p -> do + Pure r -> return r + Free p -> do f' <- p yield $ T.singleton '\n' go f' @@ -925,7 +925,7 @@ unlines = go -} unwords :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r -unwords = intercalate (yield $ T.pack " ") +unwords = intercalate (yield $ T.singleton ' ') {-# INLINABLE unwords #-} {- $parse @@ -934,9 +934,10 @@ unwords = intercalate (yield $ T.pack " ") -} {- $reexports - @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'. @Data.Text@ re-exports the 'Text' type. - @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type). --} \ No newline at end of file + @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. +-} + +