X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=Pipes%2FText.hs;h=58b9c26d2158fa78eb04871dba25866e62b7fc72;hb=79917d53aa8a1e2c8332e330337f74440859306d;hp=9f8442942b5150be7f71218ef5b163571470cf76;hpb=80a490ef5673cd22586215732bf8f596437e8f59;p=github%2Ffretlink%2Ftext-pipes.git diff --git a/Pipes/Text.hs b/Pipes/Text.hs index 9f84429..58b9c26 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs @@ -2,9 +2,24 @@ module Pipes.Text ( - -- * Introduction + -- * Effectful Text -- $intro + -- * Lenses + -- $lenses + + -- ** @view@ \/ @(^.)@ + -- $view + + -- ** @over@ \/ @(%~)@ + -- $over + + -- ** @zoom@ + -- $zoom + + -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@ + -- $special + -- * Producers fromLazy @@ -89,7 +104,6 @@ import Control.Monad.Trans.State.Strict (StateT(..), modify) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Text.Lazy as TL -import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize) import Data.ByteString (ByteString) import Data.Functor.Constant (Constant(Constant, getConstant)) import Data.Functor.Identity (Identity) @@ -100,10 +114,12 @@ import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) import qualified Pipes.Group as PG import qualified Pipes.Parse as PP import Pipes.Parse (Parser) +import Pipes.Text.Encoding (Lens'_, Iso'_) import qualified Pipes.Prelude as P import Data.Char (isSpace) import Data.Word (Word8) - +import Foreign.Storable (sizeOf) +import Data.Bits (shiftL) import Prelude hiding ( all, any, @@ -134,13 +150,10 @@ import Prelude hiding ( writeFile ) {- $intro - - * /I. Effectful Text/ - - This package provides @pipes@ utilities for /text streams/, understood as - streams of 'Text' chunks. The individual chunks are uniformly /strict/, and thus you - will generally want @Data.Text@ in scope. But the type @Producer Text m r@ as we - are using it is a sort of pipes equivalent of the lazy @Text@ type. + This package provides @pipes@ utilities for /text streams/ or /character streams/, + realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/, + and thus you will generally want @Data.Text@ in scope. But the type + @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type. This particular module provides many functions equivalent in one way or another to the pure functions in @@ -178,8 +191,8 @@ import Prelude hiding ( The above program will never bring more than one chunk of text (~ 32 KB) into memory, no matter how long the lines are. - * /II. Lenses/ - +-} +{- $lenses As this example shows, one superficial difference from @Data.Text.Lazy@ is that many of the operations, like 'lines', are \'lensified\'; this has a number of advantages (where it is possible); in particular it facilitates their @@ -197,7 +210,7 @@ import Prelude hiding ( > view (splitAt 17) producer - or + or equivalently > producer ^. splitAt 17 @@ -210,14 +223,16 @@ import Prelude hiding ( would drop the leading white space from each line. The lenses in this library are marked as /improper/; this just means that - they don't admit all the operations of an ideal lens, but only "getting" and "focussing". + they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/. Just for this reason, though, the magnificent complexities of the lens libraries are a distraction. The lens combinators to keep in mind, the ones that make sense for our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@. - One need only keep in mind that if @l@ is a @Lens' a b@, then: - - - @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ ) + One need only keep in mind that if @l@ is a @Lens'_ a b@, then: + +-} +{- $view + @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ ) is the corresponding @b@; as was said above, this function will be exactly the function you think it is, given its name. Thus to uppercase the first n characters of a Producer, leaving the rest the same, we could write: @@ -225,9 +240,9 @@ import Prelude hiding ( > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper > p' - - - - @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies +-} +{- $over + @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@ (it can also be written @l %~ f@). @@ -237,8 +252,10 @@ import Prelude hiding ( > stripLines = Text.lines %~ maps (>-> Text.stripStart) > stripLines = over Text.lines (maps (>-> Text.stripStart)) > upper n = Text.splitAt n %~ (>-> Text.toUpper) - - - @zoom l@, finally, is a function from a @Parser b m r@ + +-} +{- $zoom + @zoom l@, finally, is a function from a @Parser b m r@ to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@). Its use is easiest to see with an decoding lens like 'utf8', which \"sees\" a Text producer hidden inside a ByteString producer: @@ -252,7 +269,7 @@ import Prelude hiding ( contain at the start. Then we might write something like this: > obey :: Monad m => Producer Text m b -> Producer Text m b -> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 8) drawAll) p +> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p > let seven = T.concat ts > case T.toUpper seven of > "TOUPPER" -> p' >-> Text.toUpper @@ -260,6 +277,11 @@ import Prelude hiding ( > _ -> do yield seven > p' + +> >>> let doc = each ["toU","pperTh","is document.\n"] +> >>> runEffect $ obey doc >-> Text.stdout +> THIS DOCUMENT. + The purpose of exporting lenses is the mental economy achieved with this three-way applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these three uses is no more surprising than that a pipe can act as a function modifying @@ -273,9 +295,8 @@ import Prelude hiding ( in and to some extent in the @Pipes.Text.Encoding@ module here. - - * /III. Special types:/ @Producer Text m (Producer Text m r)@ /and/ @FreeT (Producer Text m) m r@ - +-} +{- $special These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ . This is in the types that are most closely associated with our central text type, @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like @@ -314,7 +335,7 @@ import Prelude hiding ( > join :: Monad m => Producer Text m (Producer m r) -> Producer m r - The return type of 'lines', 'words', 'chunksOf' and the other "splitter" functions, + The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions, @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful) lists of (effectful) texts. The type @([Text],r)@ might be seen to gather together things of the forms: @@ -345,7 +366,7 @@ import Prelude hiding ( One might think that -> lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r) +> lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r) > view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r should really have the type @@ -368,7 +389,7 @@ import Prelude hiding ( of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this. The @Pipes.Group@ module, which can generally be imported without qualification, - provides many functions for working with things of type @FreeT (Producer a m) m r@ + provides many functions for working with things of type @FreeT (Producer a m) m r@. In particular it conveniently exports the constructors for @FreeT@ and the associated @FreeF@ type -- a fancy form of @Either@, namely @@ -399,14 +420,10 @@ import Prelude hiding ( -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's fromLazy :: (Monad m) => TL.Text -> Producer' Text m () -fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) +fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ()) {-# INLINE fromLazy #-} -type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) - -type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a) - (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b a ^. lens = getConstant (lens Constant a) @@ -768,7 +785,7 @@ isEndOfChars = do splitAt :: (Monad m, Integral n) => n - -> Lens' (Producer Text m r) + -> Lens'_ (Producer Text m r) (Producer Text m (Producer Text m r)) splitAt n0 k p0 = fmap join (k (go n0 p0)) where @@ -797,7 +814,7 @@ splitAt n0 k p0 = fmap join (k (go n0 p0)) span :: (Monad m) => (Char -> Bool) - -> Lens' (Producer Text m r) + -> Lens'_ (Producer Text m r) (Producer Text m (Producer Text m r)) span predicate k p0 = fmap join (k (go p0)) where @@ -822,7 +839,7 @@ span predicate k p0 = fmap join (k (go p0)) break :: (Monad m) => (Char -> Bool) - -> Lens' (Producer Text m r) + -> Lens'_ (Producer Text m r) (Producer Text m (Producer Text m r)) break predicate = span (not . predicate) {-# INLINABLE break #-} @@ -833,7 +850,7 @@ break predicate = span (not . predicate) groupBy :: (Monad m) => (Char -> Char -> Bool) - -> Lens' (Producer Text m r) + -> Lens'_ (Producer Text m r) (Producer Text m (Producer Text m r)) groupBy equals k p0 = fmap join (k ((go p0))) where go p = do @@ -847,7 +864,7 @@ groupBy equals k p0 = fmap join (k ((go p0))) where -- | Improper lens that splits after the first succession of identical 'Char' s group :: Monad m - => Lens' (Producer Text m r) + => Lens'_ (Producer Text m r) (Producer Text m (Producer Text m r)) group = groupBy (==) {-# INLINABLE group #-} @@ -857,7 +874,7 @@ group = groupBy (==) Unlike 'words', this does not drop leading whitespace -} word :: (Monad m) - => Lens' (Producer Text m r) + => Lens'_ (Producer Text m r) (Producer Text m (Producer Text m r)) word k p0 = fmap join (k (to p0)) where @@ -868,7 +885,7 @@ word k p0 = fmap join (k (to p0)) line :: (Monad m) - => Lens' (Producer Text m r) + => Lens'_ (Producer Text m r) (Producer Text m (Producer Text m r)) line = break (== '\n') @@ -900,7 +917,7 @@ intersperse c = go0 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's -packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x) +packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x) packChars = Data.Profunctor.dimap to (fmap from) where -- to :: Monad m => Producer Char m x -> Producer Text m x @@ -912,13 +929,16 @@ packChars = Data.Profunctor.dimap to (fmap from) -- from :: Monad m => Producer Text m x -> Producer Char m x from p = for p (each . T.unpack) + {-# INLINABLE packChars #-} +defaultChunkSize :: Int +defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1) -- | Split a text stream into 'FreeT'-delimited text streams of fixed size chunksOf :: (Monad m, Integral n) - => n -> Lens' (Producer Text m r) + => n -> Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r) chunksOf n k p0 = fmap concats (k (FreeT (go p0))) where @@ -964,7 +984,7 @@ splitsWith predicate p0 = FreeT (go0 p0) -- | Split a text stream using the given 'Char' as the delimiter splits :: (Monad m) => Char - -> Lens' (Producer Text m r) + -> Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r) splits c k p = fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p)) @@ -976,7 +996,7 @@ splits c k p = groupsBy :: Monad m => (Char -> Char -> Bool) - -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x) + -> Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x) groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where go p = do x <- next p case x of Left r -> return (Pure r) @@ -991,7 +1011,7 @@ groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where -- | Like 'groupsBy', where the equality predicate is ('==') groups :: Monad m - => Lens' (Producer Text m x) (FreeT (Producer Text m) m x) + => Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x) groups = groupsBy (==) {-# INLINABLE groups #-} @@ -1000,7 +1020,7 @@ groups = groupsBy (==) {-| Split a text stream into 'FreeT'-delimited lines -} lines - :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r) + :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r) lines = Data.Profunctor.dimap _lines (fmap _unlines) where _lines p0 = FreeT (go0 p0) @@ -1031,7 +1051,7 @@ lines = Data.Profunctor.dimap _lines (fmap _unlines) -- | Split a text stream into 'FreeT'-delimited words words - :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r) + :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r) words = Data.Profunctor.dimap go (fmap _unwords) where go p = FreeT $ do