tutorial etc

author michaelt <what_is_it_to_do_anything@yahoo.com>

Sun, 9 Nov 2014 03:30:32 +0000 (22:30 -0500)

committer michaelt <what_is_it_to_do_anything@yahoo.com>

Sun, 9 Nov 2014 03:30:32 +0000 (22:30 -0500)
author michaelt <what_is_it_to_do_anything@yahoo.com>
Sun, 9 Nov 2014 03:30:32 +0000 (22:30 -0500)
committer michaelt <what_is_it_to_do_anything@yahoo.com>
Sun, 9 Nov 2014 03:30:32 +0000 (22:30 -0500)
diff --git a/Pipes/Text.hs b/Pipes/Text.hs

index 45b92995d555b6cbfd0c972a2c69221389d69cc8..7722f7fcec949892b5a7b0ec58e6864f24e374a2 100644 (file)
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -1,24 +1,11 @@
  {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
  
-module Pipes.Text  (
-    -- * Effectful Text
-    -- $intro
-
-    -- * Lenses
-    -- $lenses
-
-    -- ** @view@ \/ @(^.)@
-    -- $view
-
-    -- ** @over@ \/ @(%~)@
-    -- $over
-
-    -- ** @zoom@
-    -- $zoom
-
-    -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
-    -- $special
+{-| The module @Pipes.Text@ closely follows @Pipes.ByteString@ from 
+    the @pipes-bytestring@ package. A draft tutorial can be found in
+    @Pipes.Text.Tutorial@. 
+-}
  
+module Pipes.Text  (
      -- * Producers
      fromLazy
  
@@ -141,274 +128,6 @@ import Prelude hiding (
      words,
      writeFile )
  
-{- $intro
-    This package provides @pipes@ utilities for /text streams/ or /character streams/,
-    realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
-    and thus you will generally want @Data.Text@ in scope.  But the type
-    @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
-
-    This particular module provides many functions equivalent in one way or another to
-    the pure functions in
-    <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
-    They transform, divide, group and fold text streams. Though @Producer Text m r@
-    is the type of \'effectful Text\', the functions in this module are \'pure\'
-    in the sense that they are uniformly monad-independent.
-    Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
-    operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
-    is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
-
-    The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
-    strict Text: the implementation is arranged so that the individual strict 'Text'
-    chunks are kept to a reasonable size; the user is not aware of the divisions
-    between the connected 'Text' chunks.
-    So also here: the functions in this module are designed to operate on streams that
-    are insensitive to text boundaries. This means that they may freely split
-    text into smaller texts and /discard empty texts/.  The objective, though, is
-    that they should /never concatenate texts/ in order to provide strict upper
-    bounds on memory usage.
-
-    For example, to stream only the first three lines of 'stdin' to 'stdout' you
-    might write:
-
-> import Pipes
-> import qualified Pipes.Text as Text
-> import qualified Pipes.Text.IO as Text
-> import Pipes.Group (takes')
-> import Lens.Family
->
-> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
->   where
->     takeLines n = Text.unlines . takes' n . view Text.lines
-
-    The above program will never bring more than one chunk of text (~ 32 KB) into
-    memory, no matter how long the lines are.
-
--}
-{- $lenses
-    As this example shows, one superficial difference from @Data.Text.Lazy@
-    is that many of the operations, like 'lines', are \'lensified\'; this has a
-    number of advantages (where it is possible); in particular it facilitates their
-    use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
-    sense.) The disadvantage, famously, is that the messages you get for type errors can be
-    a little alarming. The remarks that follow in this section are for non-lens adepts.
-
-    Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
-    intuitively corresponding function when used with @view@ or @(^.)@. Instead of
-    writing:
-
-    > splitAt 17 producer
-
-    as we would with the Prelude or Text functions, we write
-
-    > view (splitAt 17) producer
-
-    or equivalently
-
-    > producer ^. splitAt 17
-
-    This may seem a little indirect, but note that many equivalents of
-    @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
-    corresponding functions by prefixing them with @(>->)@. Thus something like
-
->  stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
-
-    would drop the leading white space from each line.
-
-    The lenses in this library are marked as /improper/; this just means that
-    they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
-    Just for this reason, though, the magnificent complexities of the lens libraries
-    are a distraction. The lens combinators to keep in mind, the ones that make sense for
-    our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
-
-    One need only keep in mind that if @l@ is a @Lens' a b@, then:
-
--}
-{- $view
-    @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
-    is the corresponding @b@; as was said above, this function will be exactly the
-    function you think it is, given its name. Thus to uppercase the first n characters
-    of a Producer, leaving the rest the same, we could write:
-
-
-    > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
-    >                p'
--}
-{- $over
-    @over l@ is a function @(b -> b) -> a -> a@.  Thus, given a function that modifies
-    @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
-    the @b@ that we can \"see\" through the lens. So  @over l f :: a -> a@
-    (it can also be written @l %~ f@).
-    For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
-    So above we might have written things like these:
-
-    > stripLines = Text.lines %~ maps (>-> Text.stripStart)
-    > stripLines = over Text.lines (maps (>-> Text.stripStart))
-    > upper n    =  Text.splitAt n %~ (>-> Text.toUpper)
-
--}
-{- $zoom
-    @zoom l@, finally, is a function from a @Parser b m r@
-    to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
-    Its use is easiest to see with an decoding lens like 'utf8', which
-    \"sees\" a Text producer hidden inside a ByteString producer:
-    @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
-    a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
-    a list of everything produced from a Producer, leaving only the return value; it would
-    usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
-    returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
-    the Text Producer for further parsing. Suppose that we want, inexplicably, to
-    modify the casing of a Text Producer according to any instruction it might
-    contain at the start. Then we might write something like this:
-
->     obey :: Monad m => Producer Text m b -> Producer Text m b
->     obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
->                 let seven = T.concat ts
->                 case T.toUpper seven of
->                    "TOUPPER" -> p' >-> Text.toUpper
->                    "TOLOWER" -> p' >-> Text.toLower
->                    _         -> do yield seven
->                                    p'
-
-
-> >>> let doc = each ["toU","pperTh","is document.\n"]
-> >>> runEffect $ obey doc >-> Text.stdout
-> THIS DOCUMENT.
-
-    The purpose of exporting lenses is the mental economy achieved with this three-way
-    applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
-    three uses is no more surprising than that a pipe can act as a function modifying
-    the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
-    -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
-    @pipe >-> consumer@
-
-    The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
-    both <http://hackage.haskell.org/package/lens lens> and
-    <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
-    in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
-    and to some extent in the @Pipes.Text.Encoding@ module here.
-
--}
-{- $special
-    These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
-    This is in the types that are most closely associated with our central text type,
-    @Producer Text m r@.  In @Data.Text@ and @Data.Text.Lazy@ we find functions like
-
->   splitAt  :: Int -> Text -> (Text, Text)
->   lines    ::        Text -> [Text]
->   chunksOf :: Int -> Text -> [Text]
-
-    which relate a Text with a pair of Texts or a list of Texts.
-    The corresponding functions here (taking account of \'lensification\') are
-
->   view . splitAt  :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
->   view lines      :: Monad m               =>      Producer Text m r -> FreeT (Producer Text m) m r
->   view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
-
-    Some of the types may be more readable if you imagine that we have introduced
-    our own type synonyms
-
->   type Text m r  = Producer T.Text m r
->   type Texts m r = FreeT (Producer T.Text m) m r
-
-    Then we would think of the types above as
-
->   view . splitAt  :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
->   view lines      :: (Monad m)             =>      Text m r -> Texts m r
->   view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
-
-    which brings one closer to the types of the similar functions in @Data.Text.Lazy@
-
-    In the type @Producer Text m (Producer Text m r)@ the second
-    element of the \'pair\' of effectful Texts cannot simply be retrieved
-    with something like 'snd'. This is an \'effectful\' pair, and one must work
-    through the effects of the first element to arrive at the second Text stream, even
-    if you are proposing to throw the Text in the first element away.
-    Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
-
->    join :: Monad m => Producer Text m (Producer m r) -> Producer m r
-
-    The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
-    @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
-    lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
-    together things of the forms:
-
-> r
-> (Text,r)
-> (Text, (Text, r))
-> (Text, (Text, (Text, r)))
-> (Text, (Text, (Text, (Text, r))))
-> ...
-
-    (We might also have identified the sum of those types with @Free ((,) Text) r@
-    -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
-
-    Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
-    @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
-
-> m r
-> Text m r
-> Text m (Text m r)
-> Text m (Text m (Text m r))
-> Text m (Text m (Text m (Text m r)))
-> ...
-
-    We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
-    or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
-    result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
-
-    One might think that
-
->   lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
->   view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
-
-    should really have the type
-
->   lines :: Monad m => Pipe Text Text m r
-
-    as e.g. 'toUpper' does. But this would spoil the control we are
-    attempting to maintain over the size of chunks. It is in fact just
-    as unreasonable to want such a pipe as to want
-
-> Data.Text.Lazy.lines :: Text -> Text
-
-    to 'rechunk' the strict Text chunks inside the lazy Text to respect
-    line boundaries. In fact we have
-
-> Data.Text.Lazy.lines :: Text -> [Text]
-> Prelude.lines :: String -> [String]
-
-    where the elements of the list are themselves lazy Texts or Strings; the use
-    of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
-
-    The @Pipes.Group@ module, which can generally be imported without qualification,
-    provides many functions for working with things of type @FreeT (Producer a m) m r@.
-    In particular it conveniently exports the constructors for @FreeT@ and the associated
-    @FreeF@ type -- a fancy form of @Either@, namely
-
-> data FreeF f a b = Pure a | Free (f b)
-
-    for pattern-matching. Consider the implementation of the 'words' function, or
-    of the part of the lens that takes us to the words; it is compact but exhibits many
-    of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
-    constuctors.  Keep in mind that
-
->  newtype FreeT f m a  = FreeT (m (FreeF f a (FreeT f m a)))
->  next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
-
-   Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
-   the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
-
-> words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
-> words p = FreeT $ do                   -- With 'next' we will inspect p's first chunk, excluding spaces;
->   x <- next (p >-> dropWhile isSpace)  --   note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
->   return $ case x of                   -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
->     Left   r       -> Pure r           -- 'Left' means we got no Text chunk, but only the return value; so we are done.
->     Right (txt, p') -> Free $ do       -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
->         p'' <- view (break isSpace)    -- When we apply 'break isSpace', we get a Producer that returns a Producer;
->                     (yield txt >> p')  --   so here we yield everything up to the next space, and get the rest back.
->         return (words p'')             -- We then carry on with the rest, which is likely to begin with space.
-
--}
  
  -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
  fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
diff --git a/Pipes/Text/Tutorial.hs b/Pipes/Text/Tutorial.hs

new file mode 100644 (file)

index 0000000..07b8751
--- /dev/null
+++ b/Pipes/Text/Tutorial.hs
@@ -0,0 +1,310 @@
+{-# OPTIONS_GHC -fno-warn-unused-imports #-}
+
+module Pipes.Text.Tutorial (
+    -- * Effectful Text
+    -- $intro
+    -- ** @Pipes.Text@
+    -- $pipestext
+    -- ** @Pipes.Text.IO@
+    -- $pipestextio
+    -- ** @Pipes.Text.Encoding@
+    -- $pipestextencoding
+    -- * Lenses
+    -- $lenses
+
+    -- ** @view@ \/ @(^.)@
+    -- $view
+
+    -- ** @over@ \/ @(%~)@
+    -- $over
+
+    -- ** @zoom@
+    -- $zoom
+
+    -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
+    -- $special
+    ) where
+      
+import Pipes
+import Pipes.Text
+import Pipes.Text.IO
+import Pipes.Text.Encoding
+      
+{- $intro
+    This package provides @pipes@ utilities for /character streams/,
+    realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
+    and thus the @Text@ type we are using is the one from @Data.Text@, not @Data.Text.Lazy@ 
+    But the type @Producer Text m r@, as we are using it, is a sort of /pipes/ equivalent of 
+    the lazy @Text@ type.
+
+    The main @Pipes.Text@ module provides many functions equivalent 
+    in one way or another to the pure functions in
+    <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy> 
+    (and the corresponding @Prelude@ functions for @String@ s): they transform, 
+    divide, group and fold text streams. Though @Producer Text m r@
+    is the type of \'effectful Text\', the functions in @Pipes.Text@ are \'pure\'
+    in the sense that they are uniformly monad-independent.
+    Simple /IO/ operations are defined in @Pipes.Text.IO@ - as lazy IO @Text@
+    operations are in @Data.Text.Lazy.IO@. Similarly, as @Data.Text.Lazy.Encoding@ 
+    handles inter-operation with @Data.ByteString.Lazy@, @Pipes.Text.Encoding@ provides for
+    interoperation with the \'effectful ByteStrings\' of @Pipes.ByteString@.
+
+    Remember that the @Text@ type exported by @Data.Text.Lazy@ is basically 
+    that of a lazy list of strict @Text@: the implementation is arranged so that 
+    the individual strict 'Text' chunks are kept to a reasonable size; the user 
+    is not aware of the divisions between the connected 'Text' chunks, but uses
+    operations akin to those for strict text.
+    So also here: the functions in this module are designed to operate on character streams that
+    in a way that is independent of the boundaries of the underlying @Text@ chunks. 
+    This means that they may freely split text into smaller texts and /discard empty texts/.  
+    The objective, though, is that they should not /concatenate texts/ in order to provide strict upper
+    bounds on memory usage.
+
+    For example, to stream only the first three lines of 'stdin' to 'stdout' you
+    might write:
+
+> import Pipes
+> import qualified Pipes.Text as Text
+> import qualified Pipes.Text.IO as Text
+> import Pipes.Group (takes')
+> import Lens.Family (view)
+>
+> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
+>   where
+>     takeLines n = view Text.unlines . takes' n . view Text.lines
+
+     This program will never bring more into memory than what @Text.stdin@ considers
+     one chunk of text (~ 32 KB), even if individual lines are split across many chunks.
+
+-}
+{- $lenses
+    As the use of @view@ in this example shows, one superficial difference from @Data.Text.Lazy@
+    is that many of the operations, like 'lines', are \'lensified\'; this has a
+    number of advantages; in particular it facilitates their use with 'Parser's of Text 
+    (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
+    sense.) The remarks that follow in this section are for non-lens adepts.
+
+    Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
+    intuitively corresponding function when used with @view@ or @(^.)@. Instead of
+    writing:
+
+    > splitAt 17 producer
+
+    as we would with the Prelude or Text functions, we write
+
+    > view (splitAt 17) producer
+
+    or equivalently
+
+    > producer ^. splitAt 17
+
+    This may seem a little indirect, but note that many equivalents of
+    @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
+    corresponding functions by prefixing them with @(>->)@. Thus something like
+
+>  stripLines =  view Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
+
+    would drop the leading white space from each line. 
+
+    The lenses in this library are marked as /improper/; this just means that
+    they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
+    Just for this reason, though, the magnificent complexities of the lens libraries
+    are a distraction. The lens combinators to keep in mind, the ones that make sense for
+    our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
+
+    One need only keep in mind that if @l@ is a @Lens' a b@, then:
+
+-}
+{- $view
+    @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
+    is the corresponding @b@; as was said above, this function will typically be 
+    the pipes equivalent of the function you think it is, given its name. So for example 
+    
+    > view (Text.drop)
+    > view (Text.splitAt 300) :: Producer Text m r -> Producer Text (Producer Text m r)
+    > Text.stdin ^. splitAt 300 :: Producer Text IO (Producer Text IO r) 
+    
+    I.e., it produces the first 300 characters, and returns the rest of the producer. 
+    Thus to uppercase the first n characters
+    of a Producer, leaving the rest the same, we could write:
+
+
+    > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
+    >                p'
+-}
+{- $over
+    @over l@ is a function @(b -> b) -> a -> a@.  Thus, given a function that modifies
+    @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
+    the @b@ that we can \"see\" through the lens. So  @over l f :: a -> a@
+    (it can also be written @l %~ f@).
+    For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
+    So above we might have written things like these:
+
+    > stripLines = Text.lines %~ maps (>-> Text.stripStart)
+    > stripLines = over Text.lines (maps (>-> Text.stripStart))
+    > upper n    =  Text.splitAt n %~ (>-> Text.toUpper)
+
+-}
+{- $zoom
+    @zoom l@, finally, is a function from a @Parser b m r@
+    to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
+    Its use is easiest to see with an decoding lens like 'utf8', which
+    \"sees\" a Text producer hidden inside a ByteString producer:
+    @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
+    a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
+    a list of everything produced from a Producer, leaving only the return value; it would
+    usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
+    returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
+    the Text Producer for further parsing. Suppose that we want, inexplicably, to
+    modify the casing of a Text Producer according to any instruction it might
+    contain at the start. Then we might write something like this:
+
+>     obey :: Monad m => Producer Text m b -> Producer Text m b
+>     obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
+>                 let seven = T.concat ts
+>                 case T.toUpper seven of
+>                    "TOUPPER" -> p' >-> Text.toUpper
+>                    "TOLOWER" -> p' >-> Text.toLower
+>                    _         -> do yield seven
+>                                    p'
+
+
+> >>> let doc = each ["toU","pperTh","is document.\n"]
+> >>> runEffect $ obey doc >-> Text.stdout
+> THIS DOCUMENT.
+
+    The purpose of exporting lenses is the mental economy achieved with this three-way
+    applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
+    three uses is no more surprising than that a pipe can act as a function modifying
+    the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
+    -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
+    @pipe >-> consumer@
+
+    The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
+    both <http://hackage.haskell.org/package/lens lens> and
+    <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
+    in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
+    and to some extent in the @Pipes.Text.Encoding@ module here.
+
+-}
+{- $special
+    These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
+    This is in the types that are most closely associated with our central text type,
+    @Producer Text m r@.  In @Data.Text@ and @Data.Text.Lazy@ we find functions like
+
+>   splitAt  :: Int -> Text -> (Text, Text)
+>   lines    ::        Text -> [Text]
+>   chunksOf :: Int -> Text -> [Text]
+
+    which relate a Text with a pair of Texts or a list of Texts.
+    The corresponding functions here (taking account of \'lensification\') are
+
+>   view . splitAt  :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
+>   view lines      :: Monad m               =>      Producer Text m r -> FreeT (Producer Text m) m r
+>   view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
+
+    Some of the types may be more readable if you imagine that we have introduced
+    our own type synonyms
+
+>   type Text m r  = Producer T.Text m r
+>   type Texts m r = FreeT (Producer T.Text m) m r
+
+    Then we would think of the types above as
+
+>   view . splitAt  :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
+>   view lines      :: (Monad m)             =>      Text m r -> Texts m r
+>   view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
+
+    which brings one closer to the types of the similar functions in @Data.Text.Lazy@
+
+    In the type @Producer Text m (Producer Text m r)@ the second
+    element of the \'pair\' of effectful Texts cannot simply be retrieved
+    with something like 'snd'. This is an \'effectful\' pair, and one must work
+    through the effects of the first element to arrive at the second Text stream, even
+    if you are proposing to throw the Text in the first element away.
+    Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
+
+>    join :: Monad m => Producer Text m (Producer m r) -> Producer m r
+
+    The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
+    @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
+    lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
+    together things of the forms:
+
+> r
+> (Text,r)
+> (Text, (Text, r))
+> (Text, (Text, (Text, r)))
+> (Text, (Text, (Text, (Text, r))))
+> ...
+
+    (We might also have identified the sum of those types with @Free ((,) Text) r@
+    -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
+
+    Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
+    @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
+
+> m r
+> Text m r
+> Text m (Text m r)
+> Text m (Text m (Text m r))
+> Text m (Text m (Text m (Text m r)))
+> ...
+
+    We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
+    or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
+    result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
+
+    One might think that
+
+>   lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
+>   view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
+
+    should really have the type
+
+>   lines :: Monad m => Pipe Text Text m r
+
+    as e.g. 'toUpper' does. But this would spoil the control we are
+    attempting to maintain over the size of chunks. It is in fact just
+    as unreasonable to want such a pipe as to want
+
+> Data.Text.Lazy.lines :: Text -> Text
+
+    to 'rechunk' the strict Text chunks inside the lazy Text to respect
+    line boundaries. In fact we have
+
+> Data.Text.Lazy.lines :: Text -> [Text]
+> Prelude.lines :: String -> [String]
+
+    where the elements of the list are themselves lazy Texts or Strings; the use
+    of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
+
+    The @Pipes.Group@ module, which can generally be imported without qualification,
+    provides many functions for working with things of type @FreeT (Producer a m) m r@.
+    In particular it conveniently exports the constructors for @FreeT@ and the associated
+    @FreeF@ type -- a fancy form of @Either@, namely
+
+> data FreeF f a b = Pure a | Free (f b)
+
+    for pattern-matching. Consider the implementation of the 'words' function, or
+    of the part of the lens that takes us to the words; it is compact but exhibits many
+    of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
+    constuctors.  Keep in mind that
+
+>  newtype FreeT f m a  = FreeT (m (FreeF f a (FreeT f m a)))
+>  next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
+
+   Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
+   the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
+
+> words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
+> words p = FreeT $ do                   -- With 'next' we will inspect p's first chunk, excluding spaces;
+>   x <- next (p >-> dropWhile isSpace)  --   note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
+>   return $ case x of                   -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
+>     Left   r       -> Pure r           -- 'Left' means we got no Text chunk, but only the return value; so we are done.
+>     Right (txt, p') -> Free $ do       -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
+>         p'' <- view (break isSpace)    -- When we apply 'break isSpace', we get a Producer that returns a Producer;
+>                     (yield txt >> p')  --   so here we yield everything up to the next space, and get the rest back.
+>         return (words p'')             -- We then carry on with the rest, which is likely to begin with space.
+
+-}
diff --git a/examples/attoparser.hs b/examples/attoparser.hs

new file mode 100644 (file)

index 0000000..ddf770d
--- /dev/null
+++ b/examples/attoparser.hs
@@ -0,0 +1,21 @@
+import Pipes
+import Pipes.Text.IO (fromHandle)
+import Pipes.Attoparsec (parsed)
+import qualified System.IO as IO
+
+data Test = Test {
+  a :: Int,
+  b :: Int
+  } deriving (Show)
+
+testParser :: Parser Test
+testParser = do
+  a <- decimal
+  space
+  b <- decimal
+  endOfLine
+  return $ Test a b
+  
+main = IO.withFile "./testfile" IO.ReadMode $ \handle -> runEffect $
+    for test_parser (lift . print)
+  where (parsed (testParser <* endOfLine) (fromHandle handle))
+\ No newline at end of file
diff --git a/examples/decode.hs b/examples/decode.hs

new file mode 100644 (file)

index 0000000..8cb44f8
--- /dev/null
+++ b/examples/decode.hs
@@ -0,0 +1,30 @@
+-- http://www.haskellforall.com/2014/02/pipes-parse-30-lens-based-parsing.html
+
+import Data.ByteString (ByteString)
+import Data.Text       (Text)
+import Lens.Family.State.Strict (zoom)
+import Pipes
+import Pipes.Parse
+import qualified Pipes.ByteString as ByteString
+import qualified Pipes.Text       as Text
+
+-- Retrieve all `Text` chunks up to 10 characters
+parser :: Monad m => Parser ByteString m [Text]
+parser = zoom (Text.decodeUtf8 . Text.splitAt 10) drawAll
+
+main = do
+    (textChunks, leftovers) <- runStateT parser ByteString.stdin
+    print textChunks
+
+    -- Now print the remaining `ByteString` chunks
+    byteChunks <- evalStateT drawAll leftovers
+    print byteChunks
+{-
+$ ./decode
+Hello, 世界!!!<Enter>
+["Hello, \19990\30028!"]
+abcdefg<Enter>
+<Ctrl-D>
+["!!\n","abcdefg\n"]
+
+-}
+\ No newline at end of file
diff --git a/examples/lines_url.hs b/examples/lines_url.hs

new file mode 100644 (file)

index 0000000..b676656
--- /dev/null
+++ b/examples/lines_url.hs
@@ -0,0 +1,37 @@
+{-# LANGUAGE OverloadedStrings #-}
+-- https://gist.github.com/michaelt/88e1fac12876857deefe
+-- following
+-- https://gist.github.com/gelisam/c769d186493221d7ebbe and associated controversy.
+
+module Main where
+ 
+import Prelude hiding (lines)
+import Lens.Family
+import Pipes
+import Pipes.Group
+import Pipes.HTTP
+import Pipes.Text
+import Pipes.Text.Encoding
+import Pipes.Text.IO (toHandle,stdout)
+import qualified System.IO as IO
+import Data.Functor (void)
+import qualified Data.Text as T
+
+main = do
+  req <- parseUrl "https://gist.github.com/gelisam/c769d186493221d7ebbe"
+                -- "http://www.example.com"
+                -- "http://www.gutenberg.org/files/10/10-h/10-h.htm"
+  withManager tlsManagerSettings $ \m ->
+    withHTTP req m $ \resp ->  void $ runEffect $ 
+         number_lines_of (responseBody resp ^. utf8 . lines) >-> toHandle IO.stdout
+
+number_lines_of :: Monad m => FreeT (Producer Text m) m bad -> Producer Text m bad 
+number_lines_of  = number_loop (1 :: Int) where
+  number_loop n freeProducers = do
+        freeProducer <- lift $ runFreeT freeProducers
+        case freeProducer of
+          Pure badbytes -> do yield $ T.pack "\n"
+                              return badbytes -- these could be inspected ... 
+          Free p -> do yield $ T.pack ("\n" ++ show n ++ "  ") 
+                       nextFreeProducers <- p
+                       number_loop (n+1) nextFreeProducers
diff --git a/examples/zoom.hs b/examples/zoom.hs

new file mode 100644 (file)

index 0000000..3442dc8
--- /dev/null
+++ b/examples/zoom.hs
@@ -0,0 +1,152 @@
+-- this file illustrates several uses of `zoom` 
+-- one of them is quadratic in the length of the file
+-- since it has to decode and encode repeatedly,
+-- and is thus no good on long files. 
+
+{-# LANGUAGE OverloadedStrings  #-}
+{-# LANGUAGE BangPatterns#-}
+{-# LANGUAGE RankNTypes #-}
+import           Blaze.ByteString.Builder  (Builder, fromByteString, toByteString)
+import           Control.Exception         (Exception)
+import           Control.Monad.Trans.Class (lift)
+import           Data.ByteString           (ByteString)
+import qualified Data.ByteString           as S
+import qualified Data.ByteString.Lazy      as L
+import           Data.Monoid
+import           Data.Text                 (Text)
+import qualified Data.Text                 as T
+import qualified Data.Text.Encoding        as TEE
+import qualified Data.Text.Lazy            as TL
+import qualified Data.Text.Lazy.Encoding   as TLE
+
+import Pipes
+import Pipes.Parse
+import qualified Pipes.Prelude as PP
+import qualified Pipes.ByteString as Bytes
+import qualified Pipes.Text as Txt
+import Pipes.Text.Encoding (utf8)
+
+import Control.Lens
+import Control.Lens.Internal.Zoom
+import Control.Monad
+import qualified System.IO as IO
+import Control.Monad.Trans.Maybe
+import Control.Monad.State.Class
+
+main :: IO ()
+main = do -- S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1
+          -- parse_file fileParser0  -- pathological
+          -- parse_file fileParser1  -- programs
+          parse_file fileParser2  -- good program 
+          
+   where 
+   parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h ->
+                         do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print
+                            runEffect $ p' >-> PP.print
+   parseWith parser = loop where
+      loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p)
+                  case m of Nothing -> return p'
+                            Just file -> do yield file
+                                            loop p'
+   fp = "encoded.fileformat"
+   contents n =  (toByteString . mconcat . replicate n . encodeFiles) input
+                 <> S.pack (replicate 10 250)
+
+
+
+fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
+fileParser0  = do (name, len) <- zoom utf8 parseText
+                  contents    <- zoom (Bytes.splitAt len) (lift drawAll)
+                  return (File name (S.concat contents))
+    where
+    -- this parser aggregates all Text parsing into one preliminary parser
+    -- which is then applied with `zoom utf8`
+    -- we cannot tell in advance how long, e.g. the file name will be
+    parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
+    parseText = do nameLength    <- parseNumber
+                   names         <- zoom (Txt.splitAt nameLength) $ (lift drawAll)
+                   contentLength <- parseNumber
+                   return $! (T.concat names, contentLength)
+
+-- here we disaggregate the little Text parsers but still apply them with `zoom utf8`
+-- this makes no difference
+fileParser1  = do nameLength    <- zoom utf8 parseNumber
+                  names         <- zoom (utf8 . Txt.splitAt nameLength)  (lift drawAll)
+                  contentLength <- zoom utf8 parseNumber
+                  contents      <- zoom (Bytes.splitAt contentLength) (lift drawAll)
+                  return (File (T.concat names) (S.concat contents))
+
+-- this is the good program; be reflecting on the fact that file names
+-- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10
+-- we can restrict the length of the byte stream to which we apply `zoom utf8`
+fileParser2  = do nameLength  <- Bytes.splitAt 3 ~~> utf8 ~~> parseNumber
+                  names       <- Bytes.splitAt nameLength ~~> utf8 ~~> lift drawAll
+                  len         <- Bytes.splitAt 10 ~~>  utf8 ~~> parseNumber
+                  contents    <- Bytes.splitAt len ~~> lift drawAll
+                  return (File (T.concat names) (S.concat contents))
+
+-- infix lens nonsense
+infixr 1 ~~>
+(~~>) :: Zoom m n s t 
+      => ((s -> Zoomed n c s) -> t -> Zoomed n c t)
+      -> m c -> n c
+(~~>) = zoom
+{-# INLINE (~~>) #-}
+
+parseNumber :: Monad m =>  MaybeT (StateT (Producer Text m x) m) Int
+parseNumber  = loop  0 where
+   loop !n = do c <- MaybeT  Txt.drawChar
+                case c of ':' -> return n
+                          _   -> do guard ('0' <= c && c <= '9')
+                                    loop  $! n * 10 + (fromEnum c - fromEnum '0')
+
+
+
+-- --- Michael S's `File` type and its binary encoding, etc.
+
+
+data File = File
+    { fileName     :: !Text
+    , fileContents :: !ByteString
+    }
+    deriving Show
+
+encodeFile :: File -> Builder
+encodeFile (File name contents) =
+    tellLength (S.length bytesname) <>
+    fromByteString bytesname        <>
+    tellLength (S.length contents)  <>
+    fromByteString contents
+  where
+    tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":"))
+    bytesname = TEE.encodeUtf8 name
+
+encodeFiles :: [File] -> Builder
+encodeFiles = mconcat . map encodeFile
+
+input :: [File]
+input =
+    [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
+    , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
+    , File "binary.dat" "we'll pretend to be binary"
+    ]
+
+
+---
+
+-- This desperate scheme actually has some efficacy, if used before `utf8` in a zoom
+-- but not much 
+
+chunk :: Monad m => Int -> Lens' (Producer ByteString m r) (Producer ByteString m r)
+chunk n = lens (chunkyN n) (\_ b -> b) where
+
+    chunkyN  :: Monad m => Int -> Producer ByteString m r -> Producer ByteString m r
+    chunkyN n  = prod_loop where
+
+      prod_loop p = do mbs <- lift $ next p
+                       case mbs of Left r -> return r
+                                   Right (bs, p') -> do bs_loop bs
+                                                        prod_loop p'
+      bs_loop bs = unless (S.null bs) $ do yield fore
+                                           unless (S.null aft)  (bs_loop aft)
+            where (fore, aft) = S.splitAt n bs
diff --git a/pipes-text.cabal b/pipes-text.cabal

index 041f2c7462331afbc7289f8b74f2f8fe909a8f19..bcd49110247f4c8feb238087d534868d2288625d 100644 (file)
--- a/pipes-text.cabal
+++ b/pipes-text.cabal
@@ -1,5 +1,5 @@
  name:                pipes-text
-version:             0.0.0.12
+version:             0.0.0.14
  synopsis:            Text pipes.
  description:         * This package will be in a draft, or testing, phase until version 0.0.1. Please report any installation difficulties, or any wisdom about the api, on the github page or the <https://groups.google.com/forum/#!forum/haskell-pipes pipes list>
                       .
@@ -36,7 +36,7 @@ library
    exposed-modules:     Pipes.Text, Pipes.Text.Encoding
    build-depends:       base              >= 4       && < 5   ,
                         bytestring        >= 0.9.2.1 && < 0.11,
-                       text              >= 0.11.2  && < 1.2 ,
+                       text              >= 0.11.2  && < 1.3 ,
                         streaming-commons >= 0.1     && < 0.2 ,  
                         pipes             >= 4.0     && < 4.2 ,
                         pipes-group       >= 1.0.0   && < 1.1 ,
@@ -50,6 +50,6 @@ library
    ghc-options: -O2
  
    if !flag(noio)
-    exposed-modules:   Pipes.Text.IO
-    build-depends:     text >=0.11.3              && < 1.2
+    exposed-modules:   Pipes.Text.IO, Pipes.Text.Tutorial
+    build-depends:     text >=0.11.3              && < 1.3
author	michaelt <what_is_it_to_do_anything@yahoo.com>
	Sun, 9 Nov 2014 03:30:32 +0000 (22:30 -0500)
committer	michaelt <what_is_it_to_do_anything@yahoo.com>
	Sun, 9 Nov 2014 03:30:32 +0000 (22:30 -0500)
Pipes/Text.hs		patch \| blob \| blame \| history
Pipes/Text/Tutorial.hs	[new file with mode: 0644]	patch \| blob
examples/attoparser.hs	[new file with mode: 0644]	patch \| blob
examples/decode.hs	[new file with mode: 0644]	patch \| blob
examples/lines_url.hs	[new file with mode: 0644]	patch \| blob
examples/zoom.hs	[new file with mode: 0644]	patch \| blob
pipes-text.cabal		patch \| blob \| blame \| history