more attempts to make haddock/hackage happy

[github/fretlink/text-pipes.git] / Pipes / Text.hs
diff --git a/Pipes/Text.hs b/Pipes/Text.hs

index 95fc0e610596ca820937deec6d95907adb60aed0..58b9c26d2158fa78eb04871dba25866e62b7fc72 100644 (file)
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -2,9 +2,24 @@
  
  
  module Pipes.Text  (
-    -- * Introduction
+    -- * Effectful Text
      -- $intro
      
+    -- * Lenses
+    -- $lenses
+    
+    -- ** @view@ \/ @(^.)@
+    -- $view
+
+    -- ** @over@ \/ @(%~)@
+    -- $over
+    
+    -- ** @zoom@
+    -- $zoom
+    
+    -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
+    -- $special
+    
      -- * Producers
      fromLazy
  
@@ -89,7 +104,6 @@ import Control.Monad.Trans.State.Strict (StateT(..), modify)
  import qualified Data.Text as T
  import Data.Text (Text)
  import qualified Data.Text.Lazy as TL
-import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
  import Data.ByteString (ByteString)
  import Data.Functor.Constant (Constant(Constant, getConstant))
  import Data.Functor.Identity (Identity)
@@ -100,10 +114,12 @@ import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
  import qualified Pipes.Group as PG
  import qualified Pipes.Parse as PP
  import Pipes.Parse (Parser)
+import Pipes.Text.Encoding (Lens'_, Iso'_)
  import qualified Pipes.Prelude as P
  import Data.Char (isSpace)
  import Data.Word (Word8)
-
+import Foreign.Storable (sizeOf)
+import Data.Bits (shiftL)
  import Prelude hiding (
      all,
      any,
@@ -134,13 +150,10 @@ import Prelude hiding (
      writeFile )
  
  {- $intro
-
-    * /Effectful Text/
-
-    This package provides @pipes@ utilities for /text streams/, understood as
-    streams of 'Text' chunks. The individual chunks are uniformly /strict/, and thus you 
-    will generally want @Data.Text@ in scope.  But the type @Producer Text m r@ as we
-    are using it is a sort of pipes equivalent of the lazy @Text@ type. 
+    This package provides @pipes@ utilities for /text streams/ or /character streams/, 
+    realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/, 
+    and thus you will generally want @Data.Text@ in scope.  But the type 
+    @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type. 
      
      This particular module provides many functions equivalent in one way or another to 
      the pure functions in 
@@ -178,37 +191,112 @@ import Prelude hiding (
      The above program will never bring more than one chunk of text (~ 32 KB) into
      memory, no matter how long the lines are.
  
-    * /Lenses/
-
+-}
+{- $lenses
      As this example shows, one superficial difference from @Data.Text.Lazy@ 
-    is that many of the operations, like 'lines',
-    are \'lensified\'; this has a number of advantages (where it is possible), in particular 
-    it facilitates their use with 'Parser's of Text (in the general 
-    <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse> 
-    sense.) 
-    Each such lens, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the 
-    intuitively corresponding function when used with @view@ or @(^.)@. 
+    is that many of the operations, like 'lines', are \'lensified\'; this has a 
+    number of advantages (where it is possible); in particular it facilitates their 
+    use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse> 
+    sense.) The disadvantage, famously, is that the messages you get for type errors can be
+    a little alarming. The remarks that follow in this section are for non-lens adepts.
+
+    Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the 
+    intuitively corresponding function when used with @view@ or @(^.)@. Instead of
+    writing:
+    
+    > splitAt 17 producer
+    
+    as we would with the Prelude or Text functions, we write 
+    
+    > view (splitAt 17) producer
+    
+    or equivalently
+    
+    > producer ^. splitAt 17
  
-    Note similarly that many equivalents of 'Text -> Text' functions are exported here as 'Pipe's.
-    They reduce to the intuitively corresponding functions when used with '(>->)'. Thus something like
+    This may seem a little indirect, but note that many equivalents of 
+    @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively 
+    corresponding functions by prefixing them with @(>->)@. Thus something like
  
  >  stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines 
  
      would drop the leading white space from each line. 
  
-    The lens combinators
-    you will find indispensible are @view@ / @(^.)@), @zoom@ and probably @over@. These
-    are supplied by both <http://hackage.haskell.org/package/lens lens> and 
-    <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
-    in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial> 
-    and to some extent in the @Pipes.Text.Encoding@ module here. The use of
-    @over@ is simple, illustrated by the fact that we can rewrite @stripLines@ above as
+    The lenses in this library are marked as /improper/; this just means that 
+    they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/. 
+    Just for this reason, though, the magnificent complexities of the lens libraries 
+    are a distraction. The lens combinators to keep in mind, the ones that make sense for 
+    our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@. 
  
->  stripLines = over Text.lines $ maps (>-> stripStart)
+    One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
  
+-}
+{- $view
+    @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ ) 
+    is the corresponding @b@; as was said above, this function will be exactly the 
+    function you think it is, given its name. Thus to uppercase the first n characters 
+    of a Producer, leaving the rest the same, we could write: 
  
-    * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
-    
+
+    > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
+    >                p'
+-}
+{- $over
+    @over l@ is a function @(b -> b) -> a -> a@.  Thus, given a function that modifies
+    @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to 
+    the @b@ that we can \"see\" through the lens. So  @over l f :: a -> a@ 
+    (it can also be written @l %~ f@). 
+    For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@. 
+    So above we might have written things like these: 
+
+    > stripLines = Text.lines %~ maps (>-> Text.stripStart)
+    > stripLines = over Text.lines (maps (>-> Text.stripStart))
+    > upper n    =  Text.splitAt n %~ (>-> Text.toUpper)
+
+-}
+{- $zoom
+    @zoom l@, finally, is a function from a @Parser b m r@  
+    to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).  
+    Its use is easiest to see with an decoding lens like 'utf8', which
+    \"sees\" a Text producer hidden inside a ByteString producer:
+    @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is 
+    a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns 
+    a list of everything produced from a Producer, leaving only the return value; it would 
+    usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
+    returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
+    the Text Producer for further parsing. Suppose that we want, inexplicably, to 
+    modify the casing of a Text Producer according to any instruction it might 
+    contain at the start. Then we might write something like this:
+
+>     obey :: Monad m => Producer Text m b -> Producer Text m b
+>     obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
+>                 let seven = T.concat ts
+>                 case T.toUpper seven of 
+>                    "TOUPPER" -> p' >-> Text.toUpper
+>                    "TOLOWER" -> p' >-> Text.toLower
+>                    _         -> do yield seven
+>                                    p'
+
+
+> >>> let doc = each ["toU","pperTh","is document.\n"]
+> >>> runEffect $ obey doc >-> Text.stdout
+> THIS DOCUMENT.
+
+    The purpose of exporting lenses is the mental economy achieved with this three-way 
+    applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these 
+    three uses is no more surprising than that a pipe can act as a function modifying 
+    the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
+    -- but can /also/ modify the inputs to a consumer by using @>->@ to its right: 
+    @pipe >-> consumer@
+
+    The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by 
+    both <http://hackage.haskell.org/package/lens lens> and 
+    <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
+    in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial> 
+    and to some extent in the @Pipes.Text.Encoding@ module here. 
+
+-}
+{- $special
      These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ . 
      This is in the types that are most closely associated with our central text type, 
      @Producer Text m r@.  In @Data.Text@ and @Data.Text.Lazy@ we find functions like
@@ -247,7 +335,7 @@ import Prelude hiding (
  
  >    join :: Monad m => Producer Text m (Producer m r) -> Producer m r
  
-    The return type of 'lines', 'words', 'chunksOf' and the other "splitter" functions,
+    The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
      @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
      lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
      together things of the forms:
@@ -259,19 +347,26 @@ import Prelude hiding (
  > (Text, (Text, (Text, (Text, r))))
  > ...
  
-    We might also have identified the sum of those types with @Free ((,) Text) r@ 
-    -- or, more absurdly, @FreeT ((,) Text) Identity r@. Similarly, @FreeT (Producer Text m) m r@
-    encompasses all the members of the sequence:
+    (We might also have identified the sum of those types with @Free ((,) Text) r@ 
+    -- or, more absurdly, @FreeT ((,) Text) Identity r@.) 
+    
+    Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called 
+    @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
     
  > m r
-> Producer Text m r
-> Producer Text m (Producer Text m r)
-> Producer Text m (Producer Text m (Producer Text m r))
+> Text m r
+> Text m (Text m r)
+> Text m (Text m (Text m r))
+> Text m (Text m (Text m (Text m r)))
  > ...
  
+    We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
+    or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
+    result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ . 
+
      One might think that 
  
->   lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
+>   lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
  >   view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
  
      should really have the type
@@ -294,21 +389,41 @@ import Prelude hiding (
      of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this. 
      
      The @Pipes.Group@ module, which can generally be imported without qualification,
-    provides many functions for working with things of type @FreeT (Producer a m) m r@
+    provides many functions for working with things of type @FreeT (Producer a m) m r@.
+    In particular it conveniently exports the constructors for @FreeT@ and the associated
+    @FreeF@ type -- a fancy form of @Either@, namely 
      
-   
-   -}
+> data FreeF f a b = Pure a | Free (f b)
+
+    for pattern-matching. Consider the implementation of the 'words' function, or 
+    of the part of the lens that takes us to the words; it is compact but exhibits many 
+    of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
+    constuctors.  Keep in mind that 
+
+>  newtype FreeT f m a  = FreeT (m (FreeF f a (FreeT f m a)))
+>  next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
+
+   Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
+   the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
+
+> words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
+> words p = FreeT $ do                   -- With 'next' we will inspect p's first chunk, excluding spaces;
+>   x <- next (p >-> dropWhile isSpace)  --   note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
+>   return $ case x of                   -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
+>     Left   r       -> Pure r           -- 'Left' means we got no Text chunk, but only the return value; so we are done.
+>     Right (txt, p') -> Free $ do       -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
+>         p'' <- view (break isSpace)    -- When we apply 'break isSpace', we get a Producer that returns a Producer;
+>                     (yield txt >> p')  --   so here we yield everything up to the next space, and get the rest back.
+>         return (words p'')             -- We then carry on with the rest, which is likely to begin with space.
+  
+-}
  
  -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
  fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
-fromLazy  = foldrChunks (\e a -> yield e >> a) (return ()) 
+fromLazy  = TL.foldrChunks (\e a -> yield e >> a) (return ()) 
  {-# INLINE fromLazy #-}
  
  
-type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
-
-type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
-
  (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
  a ^. lens = getConstant (lens Constant a)
  
@@ -670,7 +785,7 @@ isEndOfChars = do
  splitAt
      :: (Monad m, Integral n)
      => n
-    -> Lens' (Producer Text m r)
+    -> Lens'_ (Producer Text m r)
               (Producer Text m (Producer Text m r))
  splitAt n0 k p0 = fmap join (k (go n0 p0))
    where
@@ -699,7 +814,7 @@ splitAt n0 k p0 = fmap join (k (go n0 p0))
  span
      :: (Monad m)
      => (Char -> Bool)
-    -> Lens' (Producer Text m r)
+    -> Lens'_ (Producer Text m r)
               (Producer Text m (Producer Text m r))
  span predicate k p0 = fmap join (k (go p0))
    where
@@ -724,7 +839,7 @@ span predicate k p0 = fmap join (k (go p0))
  break
      :: (Monad m)
      => (Char -> Bool)
-    -> Lens' (Producer Text m r)
+    -> Lens'_ (Producer Text m r)
               (Producer Text m (Producer Text m r))
  break predicate = span (not . predicate)
  {-# INLINABLE break #-}
@@ -735,7 +850,7 @@ break predicate = span (not . predicate)
  groupBy
      :: (Monad m)
      => (Char -> Char -> Bool)
-    -> Lens' (Producer Text m r)
+    -> Lens'_ (Producer Text m r)
               (Producer Text m (Producer Text m r))
  groupBy equals k p0 = fmap join (k ((go p0))) where
      go p = do
@@ -749,7 +864,7 @@ groupBy equals k p0 = fmap join (k ((go p0))) where
  
  -- | Improper lens that splits after the first succession of identical 'Char' s
  group :: Monad m 
-      => Lens' (Producer Text m r)
+      => Lens'_ (Producer Text m r)
                 (Producer Text m (Producer Text m r))
  group = groupBy (==)
  {-# INLINABLE group #-}
@@ -759,7 +874,7 @@ group = groupBy (==)
      Unlike 'words', this does not drop leading whitespace 
  -}
  word :: (Monad m) 
-     => Lens' (Producer Text m r)
+     => Lens'_ (Producer Text m r)
                (Producer Text m (Producer Text m r))
  word k p0 = fmap join (k (to p0))
    where
@@ -770,7 +885,7 @@ word k p0 = fmap join (k (to p0))
  
  
  line :: (Monad m) 
-     => Lens' (Producer Text m r)
+     => Lens'_ (Producer Text m r)
                (Producer Text m (Producer Text m r))
  line = break (== '\n')
  
@@ -802,7 +917,7 @@ intersperse c = go0
  
  
  -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
-packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
+packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
  packChars = Data.Profunctor.dimap to (fmap from)
    where
      -- to :: Monad m => Producer Char m x -> Producer Text m x
@@ -814,13 +929,16 @@ packChars = Data.Profunctor.dimap to (fmap from)
  
      -- from :: Monad m => Producer Text m x -> Producer Char m x
      from p = for p (each . T.unpack)
+    
  {-# INLINABLE packChars #-}
  
+defaultChunkSize :: Int
+defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
  
  -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
  chunksOf
      :: (Monad m, Integral n)
-    => n -> Lens' (Producer Text m r) 
+    => n -> Lens'_ (Producer Text m r) 
                    (FreeT (Producer Text m) m r)
  chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
    where
@@ -866,7 +984,7 @@ splitsWith predicate p0 = FreeT (go0 p0)
  -- | Split a text stream using the given 'Char' as the delimiter
  splits :: (Monad m)
        => Char
-      -> Lens' (Producer Text m r)
+      -> Lens'_ (Producer Text m r)
                 (FreeT (Producer Text m) m r)
  splits c k p =
            fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
@@ -878,7 +996,7 @@ splits c k p =
  groupsBy
      :: Monad m
      => (Char -> Char -> Bool)
-    -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
+    -> Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
  groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where 
    go p = do x <- next p
              case x of Left   r       -> return (Pure r)
@@ -893,7 +1011,7 @@ groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
  -- | Like 'groupsBy', where the equality predicate is ('==')
  groups
      :: Monad m
-    => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
+    => Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
  groups = groupsBy (==)
  {-# INLINABLE groups #-}
  
@@ -902,7 +1020,7 @@ groups = groupsBy (==)
  {-| Split a text stream into 'FreeT'-delimited lines
  -}
  lines
-    :: (Monad m) => Iso' (Producer Text m r)  (FreeT (Producer Text m) m r)
+    :: (Monad m) => Iso'_ (Producer Text m r)  (FreeT (Producer Text m) m r)
  lines = Data.Profunctor.dimap _lines (fmap _unlines)
    where
    _lines p0 = FreeT (go0 p0) 
@@ -933,7 +1051,7 @@ lines = Data.Profunctor.dimap _lines (fmap _unlines)
  
  -- | Split a text stream into 'FreeT'-delimited words
  words
-    :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
+    :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
  words = Data.Profunctor.dimap go (fmap _unwords)
    where
      go p = FreeT $ do