module Pipes.Text (
- -- * Introduction
+ -- * Effectful Text
-- $intro
+ -- * Lenses
+ -- $lenses
+
+ -- ** @view@ \/ @(^.)@
+ -- $view
+
+ -- ** @over@ \/ @(%~)@
+ -- $over
+
+ -- ** @zoom@
+ -- $zoom
+
+ -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
+ -- $special
+
-- * Producers
fromLazy
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Text.Lazy as TL
-import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
import Data.ByteString (ByteString)
import Data.Functor.Constant (Constant(Constant, getConstant))
import Data.Functor.Identity (Identity)
import qualified Pipes.Group as PG
import qualified Pipes.Parse as PP
import Pipes.Parse (Parser)
+import Pipes.Text.Encoding (Lens'_, Iso'_)
import qualified Pipes.Prelude as P
import Data.Char (isSpace)
import Data.Word (Word8)
-
+import Foreign.Storable (sizeOf)
+import Data.Bits (shiftL)
import Prelude hiding (
all,
any,
writeFile )
{- $intro
-
- * /I. Effectful Text/
-
- This package provides @pipes@ utilities for /text streams/, understood as
- streams of 'Text' chunks. The individual chunks are uniformly /strict/, and thus you
- will generally want @Data.Text@ in scope. But the type @Producer Text m r@ as we
- are using it is a sort of pipes equivalent of the lazy @Text@ type.
+ This package provides @pipes@ utilities for /text streams/ or /character streams/,
+ realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
+ and thus you will generally want @Data.Text@ in scope. But the type
+ @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
This particular module provides many functions equivalent in one way or another to
the pure functions in
The above program will never bring more than one chunk of text (~ 32 KB) into
memory, no matter how long the lines are.
- * /II. Lenses/
-
+-}
+{- $lenses
As this example shows, one superficial difference from @Data.Text.Lazy@
is that many of the operations, like 'lines', are \'lensified\'; this has a
number of advantages (where it is possible); in particular it facilitates their
> view (splitAt 17) producer
- or
+ or equivalently
> producer ^. splitAt 17
would drop the leading white space from each line.
The lenses in this library are marked as /improper/; this just means that
- they don't admit all the operations of an ideal lens, but only "getting" and "focussing".
+ they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
Just for this reason, though, the magnificent complexities of the lens libraries
are a distraction. The lens combinators to keep in mind, the ones that make sense for
our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
- One need only keep in mind that if @l@ is a @Lens' a b@, then:
-
- - @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
+ One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
+
+-}
+{- $view
+ @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
is the corresponding @b@; as was said above, this function will be exactly the
function you think it is, given its name. Thus to uppercase the first n characters
of a Producer, leaving the rest the same, we could write:
> upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
> p'
-
-
- - @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
+-}
+{- $over
+ @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
@b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
(it can also be written @l %~ f@).
> stripLines = Text.lines %~ maps (>-> Text.stripStart)
> stripLines = over Text.lines (maps (>-> Text.stripStart))
> upper n = Text.splitAt n %~ (>-> Text.toUpper)
-
- - @zoom l@, finally, is a function from a @Parser b m r@
+
+-}
+{- $zoom
+ @zoom l@, finally, is a function from a @Parser b m r@
to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
Its use is easiest to see with an decoding lens like 'utf8', which
\"sees\" a Text producer hidden inside a ByteString producer:
contain at the start. Then we might write something like this:
> obey :: Monad m => Producer Text m b -> Producer Text m b
-> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 8) drawAll) p
+> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
> let seven = T.concat ts
> case T.toUpper seven of
> "TOUPPER" -> p' >-> Text.toUpper
> _ -> do yield seven
> p'
+
+> >>> let doc = each ["toU","pperTh","is document.\n"]
+> >>> runEffect $ obey doc >-> Text.stdout
+> THIS DOCUMENT.
+
The purpose of exporting lenses is the mental economy achieved with this three-way
applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
three uses is no more surprising than that a pipe can act as a function modifying
in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
and to some extent in the @Pipes.Text.Encoding@ module here.
-
- * /III. Special types:/ @Producer Text m (Producer Text m r)@ /and/ @FreeT (Producer Text m) m r@
-
+-}
+{- $special
These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
This is in the types that are most closely associated with our central text type,
@Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
> join :: Monad m => Producer Text m (Producer m r) -> Producer m r
- The return type of 'lines', 'words', 'chunksOf' and the other "splitter" functions,
+ The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
@FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
together things of the forms:
One might think that
-> lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
+> lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
> view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
should really have the type
of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
The @Pipes.Group@ module, which can generally be imported without qualification,
- provides many functions for working with things of type @FreeT (Producer a m) m r@
+ provides many functions for working with things of type @FreeT (Producer a m) m r@.
In particular it conveniently exports the constructors for @FreeT@ and the associated
@FreeF@ type -- a fancy form of @Either@, namely
-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
-fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
+fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
{-# INLINE fromLazy #-}
-type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
-
-type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
-
(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
a ^. lens = getConstant (lens Constant a)
splitAt
:: (Monad m, Integral n)
=> n
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
splitAt n0 k p0 = fmap join (k (go n0 p0))
where
span
:: (Monad m)
=> (Char -> Bool)
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
span predicate k p0 = fmap join (k (go p0))
where
break
:: (Monad m)
=> (Char -> Bool)
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
break predicate = span (not . predicate)
{-# INLINABLE break #-}
groupBy
:: (Monad m)
=> (Char -> Char -> Bool)
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
groupBy equals k p0 = fmap join (k ((go p0))) where
go p = do
-- | Improper lens that splits after the first succession of identical 'Char' s
group :: Monad m
- => Lens' (Producer Text m r)
+ => Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
group = groupBy (==)
{-# INLINABLE group #-}
Unlike 'words', this does not drop leading whitespace
-}
word :: (Monad m)
- => Lens' (Producer Text m r)
+ => Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
word k p0 = fmap join (k (to p0))
where
line :: (Monad m)
- => Lens' (Producer Text m r)
+ => Lens'_ (Producer Text m r)
(Producer Text m (Producer Text m r))
line = break (== '\n')
-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
-packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
+packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
packChars = Data.Profunctor.dimap to (fmap from)
where
-- to :: Monad m => Producer Char m x -> Producer Text m x
-- from :: Monad m => Producer Text m x -> Producer Char m x
from p = for p (each . T.unpack)
+
{-# INLINABLE packChars #-}
+defaultChunkSize :: Int
+defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
chunksOf
:: (Monad m, Integral n)
- => n -> Lens' (Producer Text m r)
+ => n -> Lens'_ (Producer Text m r)
(FreeT (Producer Text m) m r)
chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
where
-- | Split a text stream using the given 'Char' as the delimiter
splits :: (Monad m)
=> Char
- -> Lens' (Producer Text m r)
+ -> Lens'_ (Producer Text m r)
(FreeT (Producer Text m) m r)
splits c k p =
fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
groupsBy
:: Monad m
=> (Char -> Char -> Bool)
- -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
+ -> Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
go p = do x <- next p
case x of Left r -> return (Pure r)
-- | Like 'groupsBy', where the equality predicate is ('==')
groups
:: Monad m
- => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
+ => Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
groups = groupsBy (==)
{-# INLINABLE groups #-}
{-| Split a text stream into 'FreeT'-delimited lines
-}
lines
- :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
+ :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
lines = Data.Profunctor.dimap _lines (fmap _unlines)
where
_lines p0 = FreeT (go0 p0)
-- | Split a text stream into 'FreeT'-delimited words
words
- :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
+ :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
words = Data.Profunctor.dimap go (fmap _unwords)
where
go p = FreeT $ do