-{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
-#if __GLASGOW_HASKELL__ >= 702
-{-# LANGUAGE Trustworthy #-}
-#endif
-{-| This module provides @pipes@ utilities for \"text streams\", which are
- streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
- a 'Producer' can be converted to and from lazy 'Text's, though this is generally
- unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
- An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
-
- To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
- example, the following program copies a document from one file to another:
+{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
-> import Pipes
-> import qualified Data.Text.Pipes as Text
-> import System.IO
->
-> main =
-> withFile "inFile.txt" ReadMode $ \hIn ->
-> withFile "outFile.txt" WriteMode $ \hOut ->
-> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
-
-To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
-
-> import Pipes
-> import qualified Data.Text.Pipes as Text
-> import Pipes.Safe
->
-> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
-
- You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
- and 'stdout' proxies, as with the following \"echo\" program:
-
-> main = runEffect $ Text.stdin >-> Text.stdout
-
- You can also translate pure lazy 'TL.Text's to and from proxies:
-
-> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
-
- In addition, this module provides many functions equivalent to lazy
- 'Text' functions so that you can transform or fold text streams. For
- example, to stream only the first three lines of 'stdin' to 'stdout' you
- might write:
-
-> import Pipes
-> import qualified Pipes.Text as Text
-> import qualified Pipes.Parse as Parse
->
-> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
-> where
-> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
-
- The above program will never bring more than one chunk of text (~ 32 KB) into
- memory, no matter how long the lines are.
-
- Note that functions in this library are designed to operate on streams that
- are insensitive to text boundaries. This means that they may freely split
- text into smaller texts, /discard empty texts/. However, apart from the
- special case of 'concatMap', they will /never concatenate texts/ in order
- to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
--}
module Pipes.Text (
+ -- * Effectful Text
+ -- $intro
+
+ -- * Lenses
+ -- $lenses
+
+ -- ** @view@ \/ @(^.)@
+ -- $view
+
+ -- ** @over@ \/ @(%~)@
+ -- $over
+
+ -- ** @zoom@
+ -- $zoom
+
+ -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
+ -- $special
+
-- * Producers
- fromLazy
- , stdin
- , fromHandle
- , readFile
- , stdinLn
-
- -- * Consumers
- , stdout
- , stdoutLn
- , toHandle
- , writeFile
+ fromLazy
-- * Pipes
, map
, dropWhile
, filter
, scan
- , encodeUtf8
, pack
, unpack
, toCaseFold
, count
-- * Primitive Character Parsers
- -- $parse
, nextChar
, drawChar
, unDrawChar
, group
, word
, line
- , decodeUtf8
- , decode
-- * FreeT Splitters
, chunksOf
, splitsWith
, splits
--- , groupsBy
--- , groups
+ , groupsBy
+ , groups
, lines
, words
-
-- * Transformations
, intersperse
, packChars
, unlines
, unwords
- -- * Re-exports
+ -- * Re-exports
-- $reexports
, module Data.ByteString
, module Data.Text
, module Data.Profunctor
- , module Data.Word
, module Pipes.Parse
+ , module Pipes.Group
) where
-import Control.Exception (throwIO, try)
import Control.Applicative ((<*))
-import Control.Monad (liftM, unless, join)
+import Control.Monad (liftM, join)
import Control.Monad.Trans.State.Strict (StateT(..), modify)
-import Data.Monoid ((<>))
import qualified Data.Text as T
-import qualified Data.Text.IO as T
-import qualified Data.Text.Encoding as TE
-import qualified Data.Text.Encoding.Error as TE
import Data.Text (Text)
import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.IO as TL
-import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
-import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
import Data.ByteString (ByteString)
-import qualified Data.ByteString as B
-import Data.Char (ord, isSpace)
import Data.Functor.Constant (Constant(Constant, getConstant))
import Data.Functor.Identity (Identity)
import Data.Profunctor (Profunctor)
import qualified Data.Profunctor
-import qualified Data.List as List
-import Foreign.C.Error (Errno(Errno), ePIPE)
-import qualified GHC.IO.Exception as G
import Pipes
-import qualified Pipes.ByteString as PB
-import qualified Pipes.Text.Internal as PE
-import Pipes.Text.Internal (Codec(..))
-import Pipes.Core (respond, Server')
+import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
+import qualified Pipes.Group as PG
import qualified Pipes.Parse as PP
-import Pipes.Parse (Parser, concats, intercalates, FreeT(..))
-import qualified Pipes.Safe.Prelude as Safe
-import qualified Pipes.Safe as Safe
-import Pipes.Safe (MonadSafe(..), Base(..))
+import Pipes.Parse (Parser)
+import Pipes.Text.Encoding (Lens'_, Iso'_)
import qualified Pipes.Prelude as P
-import qualified System.IO as IO
import Data.Char (isSpace)
import Data.Word (Word8)
-
+import Foreign.Storable (sizeOf)
+import Data.Bits (shiftL)
import Prelude hiding (
all,
any,
words,
writeFile )
--- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
-fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
-fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
-{-# INLINE fromLazy #-}
+{- $intro
+ This package provides @pipes@ utilities for /text streams/ or /character streams/,
+ realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
+ and thus you will generally want @Data.Text@ in scope. But the type
+ @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
+
+ This particular module provides many functions equivalent in one way or another to
+ the pure functions in
+ <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
+ They transform, divide, group and fold text streams. Though @Producer Text m r@
+ is the type of \'effectful Text\', the functions in this module are \'pure\'
+ in the sense that they are uniformly monad-independent.
+ Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
+ operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
+ is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
+
+ The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
+ strict Text: the implementation is arranged so that the individual strict 'Text'
+ chunks are kept to a reasonable size; the user is not aware of the divisions
+ between the connected 'Text' chunks.
+ So also here: the functions in this module are designed to operate on streams that
+ are insensitive to text boundaries. This means that they may freely split
+ text into smaller texts and /discard empty texts/. The objective, though, is
+ that they should /never concatenate texts/ in order to provide strict upper
+ bounds on memory usage.
+
+ For example, to stream only the first three lines of 'stdin' to 'stdout' you
+ might write:
--- | Stream text from 'stdin'
-stdin :: MonadIO m => Producer Text m ()
-stdin = fromHandle IO.stdin
-{-# INLINE stdin #-}
+> import Pipes
+> import qualified Pipes.Text as Text
+> import qualified Pipes.Text.IO as Text
+> import Pipes.Group (takes')
+> import Lens.Family
+>
+> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
+> where
+> takeLines n = Text.unlines . takes' n . view Text.lines
+
+ The above program will never bring more than one chunk of text (~ 32 KB) into
+ memory, no matter how long the lines are.
-{-| Convert a 'IO.Handle' into a text stream using a text size
- determined by the good sense of the text library; note that this
- is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
- but uses the system encoding and has other `Data.Text.IO` features
-}
+{- $lenses
+ As this example shows, one superficial difference from @Data.Text.Lazy@
+ is that many of the operations, like 'lines', are \'lensified\'; this has a
+ number of advantages (where it is possible); in particular it facilitates their
+ use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
+ sense.) The disadvantage, famously, is that the messages you get for type errors can be
+ a little alarming. The remarks that follow in this section are for non-lens adepts.
+
+ Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
+ intuitively corresponding function when used with @view@ or @(^.)@. Instead of
+ writing:
+
+ > splitAt 17 producer
+
+ as we would with the Prelude or Text functions, we write
+
+ > view (splitAt 17) producer
+
+ or equivalently
+
+ > producer ^. splitAt 17
+
+ This may seem a little indirect, but note that many equivalents of
+ @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
+ corresponding functions by prefixing them with @(>->)@. Thus something like
+
+> stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
-fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
-fromHandle h = go where
- go = do txt <- liftIO (T.hGetChunk h)
- unless (T.null txt) $ do yield txt
- go
-{-# INLINABLE fromHandle#-}
+ would drop the leading white space from each line.
+ The lenses in this library are marked as /improper/; this just means that
+ they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
+ Just for this reason, though, the magnificent complexities of the lens libraries
+ are a distraction. The lens combinators to keep in mind, the ones that make sense for
+ our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
-{-| Stream text from a file in the simple fashion of @Data.Text.IO@
+ One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
->>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
-MAIN = PUTSTRLN "HELLO WORLD"
-}
+{- $view
+ @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
+ is the corresponding @b@; as was said above, this function will be exactly the
+ function you think it is, given its name. Thus to uppercase the first n characters
+ of a Producer, leaving the rest the same, we could write:
-readFile :: MonadSafe m => FilePath -> Producer Text m ()
-readFile file = Safe.withFile file IO.ReadMode fromHandle
-{-# INLINE readFile #-}
-{-| Stream lines of text from stdin (for testing in ghci etc.)
+ > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
+ > p'
+-}
+{- $over
+ @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
+ @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
+ the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
+ (it can also be written @l %~ f@).
+ For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
+ So above we might have written things like these:
+
+ > stripLines = Text.lines %~ maps (>-> Text.stripStart)
+ > stripLines = over Text.lines (maps (>-> Text.stripStart))
+ > upper n = Text.splitAt n %~ (>-> Text.toUpper)
->>> let safely = runSafeT . runEffect
->>> safely $ for Text.stdinLn (lift . lift . print . T.length)
-hello
-5
-world
-5
+-}
+{- $zoom
+ @zoom l@, finally, is a function from a @Parser b m r@
+ to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
+ Its use is easiest to see with an decoding lens like 'utf8', which
+ \"sees\" a Text producer hidden inside a ByteString producer:
+ @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
+ a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
+ a list of everything produced from a Producer, leaving only the return value; it would
+ usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
+ returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
+ the Text Producer for further parsing. Suppose that we want, inexplicably, to
+ modify the casing of a Text Producer according to any instruction it might
+ contain at the start. Then we might write something like this:
+
+> obey :: Monad m => Producer Text m b -> Producer Text m b
+> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
+> let seven = T.concat ts
+> case T.toUpper seven of
+> "TOUPPER" -> p' >-> Text.toUpper
+> "TOLOWER" -> p' >-> Text.toLower
+> _ -> do yield seven
+> p'
+
+
+> >>> let doc = each ["toU","pperTh","is document.\n"]
+> >>> runEffect $ obey doc >-> Text.stdout
+> THIS DOCUMENT.
+
+ The purpose of exporting lenses is the mental economy achieved with this three-way
+ applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
+ three uses is no more surprising than that a pipe can act as a function modifying
+ the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
+ -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
+ @pipe >-> consumer@
+
+ The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
+ both <http://hackage.haskell.org/package/lens lens> and
+ <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
+ in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
+ and to some extent in the @Pipes.Text.Encoding@ module here.
-}
-stdinLn :: MonadIO m => Producer' Text m ()
-stdinLn = go where
- go = do
- eof <- liftIO (IO.hIsEOF IO.stdin)
- unless eof $ do
- txt <- liftIO (T.hGetLine IO.stdin)
- yield txt
- go
-{-# INLINABLE stdinLn #-}
+{- $special
+ These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
+ This is in the types that are most closely associated with our central text type,
+ @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
-{-| Stream text to 'stdout'
+> splitAt :: Int -> Text -> (Text, Text)
+> lines :: Text -> [Text]
+> chunksOf :: Int -> Text -> [Text]
- Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
+ which relate a Text with a pair of Texts or a list of Texts.
+ The corresponding functions here (taking account of \'lensification\') are
- Note: For best performance, use @(for source (liftIO . putStr))@ instead of
- @(source >-> stdout)@ in suitable cases.
--}
-stdout :: MonadIO m => Consumer' Text m ()
-stdout = go
- where
- go = do
- txt <- await
- x <- liftIO $ try (T.putStr txt)
- case x of
- Left (G.IOError { G.ioe_type = G.ResourceVanished
- , G.ioe_errno = Just ioe })
- | Errno ioe == ePIPE
- -> return ()
- Left e -> liftIO (throwIO e)
- Right () -> go
-{-# INLINABLE stdout #-}
-
-stdoutLn :: (MonadIO m) => Consumer' Text m ()
-stdoutLn = go
- where
- go = do
- str <- await
- x <- liftIO $ try (T.putStrLn str)
- case x of
- Left (G.IOError { G.ioe_type = G.ResourceVanished
- , G.ioe_errno = Just ioe })
- | Errno ioe == ePIPE
- -> return ()
- Left e -> liftIO (throwIO e)
- Right () -> go
-{-# INLINABLE stdoutLn #-}
-
-{-| Convert a text stream into a 'Handle'
-
- Note: again, for best performance, where possible use
- @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
--}
-toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
-toHandle h = for cat (liftIO . T.hPutStr h)
-{-# INLINABLE toHandle #-}
+> view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
+> view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
+> view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
-{-# RULES "p >-> toHandle h" forall p h .
- p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
- #-}
+ Some of the types may be more readable if you imagine that we have introduced
+ our own type synonyms
+
+> type Text m r = Producer T.Text m r
+> type Texts m r = FreeT (Producer T.Text m) m r
+
+ Then we would think of the types above as
+
+> view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
+> view lines :: (Monad m) => Text m r -> Texts m r
+> view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
+
+ which brings one closer to the types of the similar functions in @Data.Text.Lazy@
+
+ In the type @Producer Text m (Producer Text m r)@ the second
+ element of the \'pair\' of effectful Texts cannot simply be retrieved
+ with something like 'snd'. This is an \'effectful\' pair, and one must work
+ through the effects of the first element to arrive at the second Text stream, even
+ if you are proposing to throw the Text in the first element away.
+ Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
+
+> join :: Monad m => Producer Text m (Producer m r) -> Producer m r
+ The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
+ @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
+ lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
+ together things of the forms:
--- | Stream text into a file. Uses @pipes-safe@.
-writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
-writeFile file = Safe.withFile file IO.WriteMode toHandle
-{-# INLINE writeFile #-}
+> r
+> (Text,r)
+> (Text, (Text, r))
+> (Text, (Text, (Text, r)))
+> (Text, (Text, (Text, (Text, r))))
+> ...
+ (We might also have identified the sum of those types with @Free ((,) Text) r@
+ -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
+
+ Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
+ @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
+
+> m r
+> Text m r
+> Text m (Text m r)
+> Text m (Text m (Text m r))
+> Text m (Text m (Text m (Text m r)))
+> ...
+
+ We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
+ or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
+ result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
+
+ One might think that
+
+> lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
+> view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
+
+ should really have the type
+
+> lines :: Monad m => Pipe Text Text m r
+
+ as e.g. 'toUpper' does. But this would spoil the control we are
+ attempting to maintain over the size of chunks. It is in fact just
+ as unreasonable to want such a pipe as to want
+
+> Data.Text.Lazy.lines :: Text -> Text
-type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
+ to 'rechunk' the strict Text chunks inside the lazy Text to respect
+ line boundaries. In fact we have
+
+> Data.Text.Lazy.lines :: Text -> [Text]
+> Prelude.lines :: String -> [String]
+
+ where the elements of the list are themselves lazy Texts or Strings; the use
+ of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
+
+ The @Pipes.Group@ module, which can generally be imported without qualification,
+ provides many functions for working with things of type @FreeT (Producer a m) m r@.
+ In particular it conveniently exports the constructors for @FreeT@ and the associated
+ @FreeF@ type -- a fancy form of @Either@, namely
+
+> data FreeF f a b = Pure a | Free (f b)
+
+ for pattern-matching. Consider the implementation of the 'words' function, or
+ of the part of the lens that takes us to the words; it is compact but exhibits many
+ of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
+ constuctors. Keep in mind that
+
+> newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
+> next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
+
+ Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
+ the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
+
+> words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
+> words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
+> x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
+> return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
+> Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
+> Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
+> p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
+> (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
+> return (words p'') -- We then carry on with the rest, which is likely to begin with space.
+
+-}
+
+-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
+fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
+fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
+{-# INLINE fromLazy #-}
-type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
a ^. lens = getConstant (lens Constant a)
p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
#-}
--- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
--- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
--- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
-encodeUtf8 :: Monad m => Pipe Text ByteString m r
-encodeUtf8 = P.map TE.encodeUtf8
-{-# INLINEABLE encodeUtf8 #-}
-
-{-# RULES "p >-> encodeUtf8" forall p .
- p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
- #-}
-- | Transform a Pipe of 'String's into one of 'Text' chunks
pack :: Monad m => Pipe String Text m r
-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
-- here acting as 'Text' pipes, rather as they would on a lazy text
-toCaseFold :: Monad m => Pipe Text Text m ()
+toCaseFold :: Monad m => Pipe Text Text m r
toCaseFold = P.map T.toCaseFold
{-# INLINEABLE toCaseFold #-}
-- | lowercase incoming 'Text'
-toLower :: Monad m => Pipe Text Text m ()
+toLower :: Monad m => Pipe Text Text m r
toLower = P.map T.toLower
{-# INLINEABLE toLower #-}
#-}
-- | uppercase incoming 'Text'
-toUpper :: Monad m => Pipe Text Text m ()
+toUpper :: Monad m => Pipe Text Text m r
toUpper = P.map T.toUpper
{-# INLINEABLE toUpper #-}
scan
:: (Monad m)
=> (Char -> Char -> Char) -> Char -> Pipe Text Text m r
-scan step begin = go begin
+scan step begin = do
+ yield (T.singleton begin)
+ go begin
where
go c = do
txt <- await
let txt' = T.scanl step c txt
c' = T.last txt'
- yield txt'
+ yield (T.tail txt')
go c'
{-# INLINABLE scan #-}
Just c -> Just (min c (T.minimum txt))
{-# INLINABLE minimum #-}
-
-- | Find the first element in the stream that matches the predicate
find
:: (Monad m)
{-# INLINABLE count #-}
-{-| Consume the first character from a stream of 'Text'
+-- | Consume the first character from a stream of 'Text'
+--
+-- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
+-- succeeds with a 'Right' providing the next character and the remainder of the
+-- 'Producer'.
- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
- succeeds with a 'Right' providing the next character and the remainder of the
- 'Producer'.
--}
nextChar
:: (Monad m)
=> Producer Text m r
Just (c, txt') -> return (Right (c, yield txt' >> p'))
{-# INLINABLE nextChar #-}
-{-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
- 'Producer' is empty
--}
+-- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
+
drawChar :: (Monad m) => Parser Text m (Maybe Char)
drawChar = do
x <- PP.draw
> Left _ -> return ()
> Right c -> unDrawChar c
> return x
+
-}
+
peekChar :: (Monad m) => Parser Text m (Maybe Char)
peekChar = do
x <- drawChar
{-# INLINABLE isEndOfChars #-}
-
-
-
--- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
--- returning a Pipe of ByteStrings that begins at the point of failure.
-
-decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
- (Producer Text m (Producer ByteString m r))
-decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
- (k (go B.empty PE.streamDecodeUtf8 p0)) where
- go !carry dec0 p = do
- x <- lift (next p)
- case x of Left r -> if B.null carry
- then return (return r) -- all bytestrinput was consumed
- else return (do yield carry -- a potentially valid fragment remains
- return r)
-
- Right (chunk, p') -> case dec0 chunk of
- PE.Some text carry2 dec -> do yield text
- go carry2 dec p'
- PE.Other text bs -> do yield text
- return (do yield bs -- an invalid blob remains
- p')
-{-# INLINABLE decodeUtf8 #-}
-
-
-- | Splits a 'Producer' after the given number of characters
splitAt
:: (Monad m, Integral n)
=> n
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
splitAt n0 k p0 = fmap join (k (go n0 p0))
where
{-# INLINABLE splitAt #-}
-{-| Split a text stream in two, where the first text stream is the longest
- consecutive group of text that satisfy the predicate
--}
+-- | Split a text stream in two, producing the longest
+-- consecutive group of characters that satisfies the predicate
+-- and returning the rest
+
span
:: (Monad m)
=> (Char -> Bool)
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
span predicate k p0 = fmap join (k (go p0))
where
return (yield suffix >> p')
{-# INLINABLE span #-}
-{-| Split a text stream in two, where the first text stream is the longest
+{-| Split a text stream in two, producing the longest
consecutive group of characters that don't satisfy the predicate
-}
break
:: (Monad m)
=> (Char -> Bool)
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
break predicate = span (not . predicate)
{-# INLINABLE break #-}
groupBy
:: (Monad m)
=> (Char -> Char -> Bool)
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
groupBy equals k p0 = fmap join (k ((go p0))) where
go p = do
-- | Improper lens that splits after the first succession of identical 'Char' s
group :: Monad m
- => Lens' (Producer Text m r)
+ => Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
group = groupBy (==)
{-# INLINABLE group #-}
Unlike 'words', this does not drop leading whitespace
-}
word :: (Monad m)
- => Lens' (Producer Text m r)
+ => Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
word k p0 = fmap join (k (to p0))
where
line :: (Monad m)
- => Lens' (Producer Text m r)
+ => Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
line = break (== '\n')
-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
-packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
+packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
packChars = Data.Profunctor.dimap to (fmap from)
where
-- to :: Monad m => Producer Char m x -> Producer Text m x
- to p = PP.folds step id done (p^.PP.chunksOf defaultChunkSize)
+ to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
step diffAs c = diffAs . (c:)
-- from :: Monad m => Producer Text m x -> Producer Char m x
from p = for p (each . T.unpack)
+
{-# INLINABLE packChars #-}
+defaultChunkSize :: Int
+defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
chunksOf
:: (Monad m, Integral n)
- => n -> Lens' (Producer Text m r)
+ => n -> Lens'_ (Producer Text m r)
(FreeT (Producer Text m) m r)
chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
where
go p = do
x <- next p
return $ case x of
- Left r -> PP.Pure r
- Right (txt, p') -> PP.Free $ do
+ Left r -> Pure r
+ Right (txt, p') -> Free $ do
p'' <- (yield txt >> p') ^. splitAt n
- return $ PP.FreeT (go p'')
+ return $ FreeT (go p'')
{-# INLINABLE chunksOf #-}
:: (Monad m)
=> (Char -> Bool)
-> Producer Text m r
- -> PP.FreeT (Producer Text m) m r
-splitsWith predicate p0 = PP.FreeT (go0 p0)
+ -> FreeT (Producer Text m) m r
+splitsWith predicate p0 = FreeT (go0 p0)
where
go0 p = do
x <- next p
case x of
- Left r -> return (PP.Pure r)
+ Left r -> return (Pure r)
Right (txt, p') ->
if (T.null txt)
then go0 p'
- else return $ PP.Free $ do
+ else return $ Free $ do
p'' <- (yield txt >> p') ^. span (not . predicate)
- return $ PP.FreeT (go1 p'')
+ return $ FreeT (go1 p'')
go1 p = do
x <- nextChar p
return $ case x of
- Left r -> PP.Pure r
- Right (_, p') -> PP.Free $ do
+ Left r -> Pure r
+ Right (_, p') -> Free $ do
p'' <- p' ^. span (not . predicate)
- return $ PP.FreeT (go1 p'')
+ return $ FreeT (go1 p'')
{-# INLINABLE splitsWith #-}
-- | Split a text stream using the given 'Char' as the delimiter
splits :: (Monad m)
=> Char
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(FreeT (Producer Text m) m r)
splits c k p =
- fmap (PP.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
+ fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
{-# INLINABLE splits #-}
{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
groupsBy
:: Monad m
=> (Char -> Char -> Bool)
- -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
-groupsBy equals k p0 = fmap concats (k (PP.FreeT (go p0))) where
+ -> Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
+groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
go p = do x <- next p
- case x of Left r -> return (PP.Pure r)
+ case x of Left r -> return (Pure r)
Right (bs, p') -> case T.uncons bs of
Nothing -> go p'
- Just (c, _) -> do return $ PP.Free $ do
+ Just (c, _) -> do return $ Free $ do
p'' <- (yield bs >> p')^.span (equals c)
- return $ PP.FreeT (go p'')
+ return $ FreeT (go p'')
{-# INLINABLE groupsBy #-}
-- | Like 'groupsBy', where the equality predicate is ('==')
groups
:: Monad m
- => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
+ => Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
groups = groupsBy (==)
{-# INLINABLE groups #-}
{-| Split a text stream into 'FreeT'-delimited lines
-}
lines
- :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
+ :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
lines = Data.Profunctor.dimap _lines (fmap _unlines)
where
- _lines p0 = PP.FreeT (go0 p0)
+ _lines p0 = FreeT (go0 p0)
where
go0 p = do
x <- next p
case x of
- Left r -> return (PP.Pure r)
+ Left r -> return (Pure r)
Right (txt, p') ->
if (T.null txt)
then go0 p'
- else return $ PP.Free $ go1 (yield txt >> p')
+ else return $ Free $ go1 (yield txt >> p')
go1 p = do
p' <- p ^. break ('\n' ==)
- return $ PP.FreeT $ do
+ return $ FreeT $ do
x <- nextChar p'
case x of
- Left r -> return $ PP.Pure r
+ Left r -> return $ Pure r
Right (_, p'') -> go0 p''
-- _unlines
-- :: Monad m
-- => FreeT (Producer Text m) m x -> Producer Text m x
- _unlines = PP.concats . PP.transFreeT addNewline
+ _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
+
- -- addNewline
- -- :: Monad m => Producer Text m r -> Producer Text m r
- addNewline p = p <* yield (T.singleton '\n')
{-# INLINABLE lines #-}
-
-- | Split a text stream into 'FreeT'-delimited words
words
- :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
+ :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
words = Data.Profunctor.dimap go (fmap _unwords)
where
- go p = PP.FreeT $ do
+ go p = FreeT $ do
x <- next (p >-> dropWhile isSpace)
return $ case x of
- Left r -> PP.Pure r
- Right (bs, p') -> PP.Free $ do
+ Left r -> Pure r
+ Right (bs, p') -> Free $ do
p'' <- (yield bs >> p') ^. break isSpace
return (go p'')
- _unwords = PP.intercalates (yield $ T.singleton ' ')
+ _unwords = PG.intercalates (yield $ T.singleton ' ')
{-# INLINABLE words #-}
intercalate p0 = go0
where
go0 f = do
- x <- lift (PP.runFreeT f)
+ x <- lift (runFreeT f)
case x of
- PP.Pure r -> return r
- PP.Free p -> do
+ Pure r -> return r
+ Free p -> do
f' <- p
go1 f'
go1 f = do
- x <- lift (PP.runFreeT f)
+ x <- lift (runFreeT f)
case x of
- PP.Pure r -> return r
- PP.Free p -> do
+ Pure r -> return r
+ Free p -> do
p0
f' <- p
go1 f'
unlines = go
where
go f = do
- x <- lift (PP.runFreeT f)
+ x <- lift (runFreeT f)
case x of
- PP.Pure r -> return r
- PP.Free p -> do
+ Pure r -> return r
+ Free p -> do
f' <- p
yield $ T.singleton '\n'
go f'
-}
unwords
:: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
-unwords = intercalate (yield $ T.pack " ")
+unwords = intercalate (yield $ T.singleton ' ')
{-# INLINABLE unwords #-}
-{- $parse
- The following parsing utilities are single-character analogs of the ones found
- @pipes-parse@.
--}
{- $reexports
-}
-
-decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
--- decode codec = go B.empty where
--- go extra p0 =
--- do x <- lift (next p0)
--- case x of Right (chunk, p) ->
--- do let (text, stuff) = codecDecode codec (B.append extra chunk)
--- yield text
--- case stuff of Right extra' -> go extra' p
--- Left (exc,bs) -> do yield text
--- return (do yield bs
--- p)
--- Left r -> return (do yield extra
--- return r)
-
-decode d p0 = case d of
- PE.Other txt bad -> do yield txt
- return (do yield bad
- p0)
- PE.Some txt extra dec -> do yield txt
- x <- lift (next p0)
- case x of Left r -> return (do yield extra
- return r)
- Right (chunk,p1) -> decode (dec chunk) p1
-
--- go !carry dec0 p = do
--- x <- lift (next p)
--- case x of Left r -> if B.null carry
--- then return (return r) -- all bytestrinput was consumed
--- else return (do yield carry -- a potentially valid fragment remains
--- return r)
---
--- Right (chunk, p') -> case dec0 chunk of
--- PE.Some text carry2 dec -> do yield text
--- go carry2 dec p'
--- PE.Other text bs -> do yield text
--- return (do yield bs -- an invalid blob remains
--- p')
--- {-# INLINABLE decodeUtf8 #-}