Pipes/Text.hs

   1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
   2
   3
   4 module Pipes.Text  (
   5     -- * Introduction
   6     -- $intro
   7
   8     -- * Producers
   9     fromLazy
  10
  11     -- * Pipes
  12     , map
  13     , concatMap
  14     , take
  15     , drop
  16     , takeWhile
  17     , dropWhile
  18     , filter
  19     , scan
  20     , pack
  21     , unpack
  22     , toCaseFold
  23     , toLower
  24     , toUpper
  25     , stripStart
  26
  27     -- * Folds
  28     , toLazy
  29     , toLazyM
  30     , foldChars
  31     , head
  32     , last
  33     , null
  34     , length
  35     , any
  36     , all
  37     , maximum
  38     , minimum
  39     , find
  40     , index
  41     , count
  42
  43     -- * Primitive Character Parsers
  44     , nextChar
  45     , drawChar
  46     , unDrawChar
  47     , peekChar
  48     , isEndOfChars
  49
  50     -- * Parsing Lenses
  51     , splitAt
  52     , span
  53     , break
  54     , groupBy
  55     , group
  56     , word
  57     , line
  58
  59     -- * FreeT Splitters
  60     , chunksOf
  61     , splitsWith
  62     , splits
  63     , groupsBy
  64     , groups
  65     , lines
  66     , words
  67
  68     -- * Transformations
  69     , intersperse
  70     , packChars
  71
  72     -- * Joiners
  73     , intercalate
  74     , unlines
  75     , unwords
  76
  77     -- * Re-exports
  78     -- $reexports
  79     , module Data.ByteString
  80     , module Data.Text
  81     , module Data.Profunctor
  82     , module Pipes.Parse
  83     , module Pipes.Group
  84     ) where
  85
  86 import Control.Applicative ((<*))
  87 import Control.Monad (liftM, join)
  88 import Control.Monad.Trans.State.Strict (StateT(..), modify)
  89 import qualified Data.Text as T
  90 import Data.Text (Text)
  91 import qualified Data.Text.Lazy as TL
  92 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
  93 import Data.ByteString (ByteString)
  94 import Data.Functor.Constant (Constant(Constant, getConstant))
  95 import Data.Functor.Identity (Identity)
  96 import Data.Profunctor (Profunctor)
  97 import qualified Data.Profunctor
  98 import Pipes
  99 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
 100 import qualified Pipes.Group as PG
 101 import qualified Pipes.Parse as PP
 102 import Pipes.Parse (Parser)
 103 import qualified Pipes.Prelude as P
 104 import Data.Char (isSpace)
 105 import Data.Word (Word8)
 106
 107 import Prelude hiding (
 108     all,
 109     any,
 110     break,
 111     concat,
 112     concatMap,
 113     drop,
 114     dropWhile,
 115     elem,
 116     filter,
 117     head,
 118     last,
 119     lines,
 120     length,
 121     map,
 122     maximum,
 123     minimum,
 124     notElem,
 125     null,
 126     readFile,
 127     span,
 128     splitAt,
 129     take,
 130     takeWhile,
 131     unlines,
 132     unwords,
 133     words,
 134     writeFile )
 135
 136 {- $intro
 137
 138     * /I. Effectful Text/
 139
 140     This package provides @pipes@ utilities for /text streams/, understood as
 141     streams of 'Text' chunks. The individual chunks are uniformly /strict/, and thus you
 142     will generally want @Data.Text@ in scope.  But the type @Producer Text m r@ as we
 143     are using it is a sort of pipes equivalent of the lazy @Text@ type.
 144
 145     This particular module provides many functions equivalent in one way or another to
 146     the pure functions in
 147     <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
 148     They transform, divide, group and fold text streams. Though @Producer Text m r@
 149     is the type of \'effectful Text\', the functions in this module are \'pure\'
 150     in the sense that they are uniformly monad-independent.
 151     Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
 152     operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
 153     is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
 154
 155     The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
 156     strict Text: the implementation is arranged so that the individual strict 'Text'
 157     chunks are kept to a reasonable size; the user is not aware of the divisions
 158     between the connected 'Text' chunks.
 159     So also here: the functions in this module are designed to operate on streams that
 160     are insensitive to text boundaries. This means that they may freely split
 161     text into smaller texts and /discard empty texts/.  The objective, though, is
 162     that they should /never concatenate texts/ in order to provide strict upper
 163     bounds on memory usage.
 164
 165     For example, to stream only the first three lines of 'stdin' to 'stdout' you
 166     might write:
 167
 168 > import Pipes
 169 > import qualified Pipes.Text as Text
 170 > import qualified Pipes.Text.IO as Text
 171 > import Pipes.Group (takes')
 172 > import Lens.Family
 173 >
 174 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
 175 >   where
 176 >     takeLines n = Text.unlines . takes' n . view Text.lines
 177
 178     The above program will never bring more than one chunk of text (~ 32 KB) into
 179     memory, no matter how long the lines are.
 180
 181     * /II. Lenses/
 182
 183     As this example shows, one superficial difference from @Data.Text.Lazy@
 184     is that many of the operations, like 'lines', are \'lensified\'; this has a
 185     number of advantages (where it is possible); in particular it facilitates their
 186     use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
 187     sense.) The disadvantage, famously, is that the messages you get for type errors can be
 188     a little alarming. The remarks that follow in this section are for non-lens adepts.
 189
 190     Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
 191     intuitively corresponding function when used with @view@ or @(^.)@. Instead of
 192     writing:
 193
 194     > splitAt 17 producer
 195
 196     as we would with the Prelude or Text functions, we write
 197
 198     > view (splitAt 17) producer
 199
 200     or
 201
 202     > producer ^. splitAt 17
 203
 204     This may seem a little indirect, but note that many equivalents of
 205     @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
 206     corresponding functions by prefixing them with @(>->)@. Thus something like
 207
 208 >  stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
 209
 210     would drop the leading white space from each line.
 211
 212     The lenses in this library are marked as /improper/; this just means that
 213     they don't admit all the operations of an ideal lens, but only "getting" and "focussing".
 214     Just for this reason, though, the magnificent complexities of the lens libraries
 215     are a distraction. The lens combinators to keep in mind, the ones that make sense for
 216     our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
 217
 218     One need only keep in mind that if @l@ is a @Lens' a b@, then:
 219
 220     - @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
 221     is the corresponding @b@; as was said above, this function will be exactly the
 222     function you think it is, given its name. Thus to uppercase the first n characters
 223     of a Producer, leaving the rest the same, we could write:
 224
 225
 226     > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
 227     >                p'
 228
 229
 230     - @over l@ is a function @(b -> b) -> a -> a@.  Thus, given a function that modifies
 231     @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
 232     the @b@ that we can \"see\" through the lens. So  @over l f :: a -> a@
 233     (it can also be written @l %~ f@).
 234     For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
 235     So above we might have written things like these:
 236
 237     > stripLines = Text.lines %~ maps (>-> Text.stripStart)
 238     > stripLines = over Text.lines (maps (>-> Text.stripStart))
 239     > upper n    =  Text.splitAt n %~ (>-> Text.toUpper)
 240
 241     - @zoom l@, finally, is a function from a @Parser b m r@
 242     to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
 243     Its use is easiest to see with an decoding lens like 'utf8', which
 244     \"sees\" a Text producer hidden inside a ByteString producer:
 245     @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
 246     a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
 247     a list of everything produced from a Producer, leaving only the return value; it would
 248     usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
 249     returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
 250     the Text Producer for further parsing. Suppose that we want, inexplicably, to
 251     modify the casing of a Text Producer according to any instruction it might
 252     contain at the start. Then we might write something like this:
 253
 254 >     obey :: Monad m => Producer Text m b -> Producer Text m b
 255 >     obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 8) drawAll) p
 256 >                 let seven = T.concat ts
 257 >                 case T.toUpper seven of
 258 >                    "TOUPPER" -> p' >-> Text.toUpper
 259 >                    "TOLOWER" -> p' >-> Text.toLower
 260 >                    _         -> do yield seven
 261 >                                    p'
 262
 263     The purpose of exporting lenses is the mental economy achieved with this three-way
 264     applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
 265     three uses is no more surprising than that a pipe can act as a function modifying
 266     the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
 267     -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
 268     @pipe >-> consumer@
 269
 270     The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
 271     both <http://hackage.haskell.org/package/lens lens> and
 272     <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
 273     in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
 274     and to some extent in the @Pipes.Text.Encoding@ module here.
 275
 276
 277     * /III.  Special types:/ @Producer Text m (Producer Text m r)@ /and/ @FreeT (Producer Text m) m r@
 278
 279     These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
 280     This is in the types that are most closely associated with our central text type,
 281     @Producer Text m r@.  In @Data.Text@ and @Data.Text.Lazy@ we find functions like
 282
 283 >   splitAt  :: Int -> Text -> (Text, Text)
 284 >   lines    ::        Text -> [Text]
 285 >   chunksOf :: Int -> Text -> [Text]
 286
 287     which relate a Text with a pair of Texts or a list of Texts.
 288     The corresponding functions here (taking account of \'lensification\') are
 289
 290 >   view . splitAt  :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
 291 >   view lines      :: Monad m               =>      Producer Text m r -> FreeT (Producer Text m) m r
 292 >   view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
 293
 294     Some of the types may be more readable if you imagine that we have introduced
 295     our own type synonyms
 296
 297 >   type Text m r  = Producer T.Text m r
 298 >   type Texts m r = FreeT (Producer T.Text m) m r
 299
 300     Then we would think of the types above as
 301
 302 >   view . splitAt  :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
 303 >   view lines      :: (Monad m)             =>      Text m r -> Texts m r
 304 >   view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
 305
 306     which brings one closer to the types of the similar functions in @Data.Text.Lazy@
 307
 308     In the type @Producer Text m (Producer Text m r)@ the second
 309     element of the \'pair\' of effectful Texts cannot simply be retrieved
 310     with something like 'snd'. This is an \'effectful\' pair, and one must work
 311     through the effects of the first element to arrive at the second Text stream, even
 312     if you are proposing to throw the Text in the first element away.
 313     Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
 314
 315 >    join :: Monad m => Producer Text m (Producer m r) -> Producer m r
 316
 317     The return type of 'lines', 'words', 'chunksOf' and the other "splitter" functions,
 318     @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
 319     lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
 320     together things of the forms:
 321
 322 > r
 323 > (Text,r)
 324 > (Text, (Text, r))
 325 > (Text, (Text, (Text, r)))
 326 > (Text, (Text, (Text, (Text, r))))
 327 > ...
 328
 329     (We might also have identified the sum of those types with @Free ((,) Text) r@
 330     -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
 331
 332     Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
 333     @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
 334
 335 > m r
 336 > Text m r
 337 > Text m (Text m r)
 338 > Text m (Text m (Text m r))
 339 > Text m (Text m (Text m (Text m r)))
 340 > ...
 341
 342     We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
 343     or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
 344     result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
 345
 346     One might think that
 347
 348 >   lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
 349 >   view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
 350
 351     should really have the type
 352
 353 >   lines :: Monad m => Pipe Text Text m r
 354
 355     as e.g. 'toUpper' does. But this would spoil the control we are
 356     attempting to maintain over the size of chunks. It is in fact just
 357     as unreasonable to want such a pipe as to want
 358
 359 > Data.Text.Lazy.lines :: Text -> Text
 360
 361     to 'rechunk' the strict Text chunks inside the lazy Text to respect
 362     line boundaries. In fact we have
 363
 364 > Data.Text.Lazy.lines :: Text -> [Text]
 365 > Prelude.lines :: String -> [String]
 366
 367     where the elements of the list are themselves lazy Texts or Strings; the use
 368     of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
 369
 370     The @Pipes.Group@ module, which can generally be imported without qualification,
 371     provides many functions for working with things of type @FreeT (Producer a m) m r@
 372     In particular it conveniently exports the constructors for @FreeT@ and the associated
 373     @FreeF@ type -- a fancy form of @Either@, namely
 374
 375 > data FreeF f a b = Pure a | Free (f b)
 376
 377     for pattern-matching. Consider the implementation of the 'words' function, or
 378     of the part of the lens that takes us to the words; it is compact but exhibits many
 379     of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
 380     constuctors.  Keep in mind that
 381
 382 >  newtype FreeT f m a  = FreeT (m (FreeF f a (FreeT f m a)))
 383 >  next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
 384
 385    Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
 386    the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
 387
 388 > words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
 389 > words p = FreeT $ do                   -- With 'next' we will inspect p's first chunk, excluding spaces;
 390 >   x <- next (p >-> dropWhile isSpace)  --   note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
 391 >   return $ case x of                   -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
 392 >     Left   r       -> Pure r           -- 'Left' means we got no Text chunk, but only the return value; so we are done.
 393 >     Right (txt, p') -> Free $ do       -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
 394 >         p'' <- view (break isSpace)    -- When we apply 'break isSpace', we get a Producer that returns a Producer;
 395 >                     (yield txt >> p')  --   so here we yield everything up to the next space, and get the rest back.
 396 >         return (words p'')             -- We then carry on with the rest, which is likely to begin with space.
 397
 398 -}
 399
 400 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
 401 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
 402 fromLazy  = foldrChunks (\e a -> yield e >> a) (return ())
 403 {-# INLINE fromLazy #-}
 404
 405
 406 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
 407
 408 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
 409
 410 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
 411 a ^. lens = getConstant (lens Constant a)
 412
 413
 414 -- | Apply a transformation to each 'Char' in the stream
 415 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
 416 map f = P.map (T.map f)
 417 {-# INLINABLE map #-}
 418
 419 {-# RULES "p >-> map f" forall p f .
 420         p >-> map f = for p (\txt -> yield (T.map f txt))
 421   #-}
 422
 423 -- | Map a function over the characters of a text stream and concatenate the results
 424 concatMap
 425     :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
 426 concatMap f = P.map (T.concatMap f)
 427 {-# INLINABLE concatMap #-}
 428
 429 {-# RULES "p >-> concatMap f" forall p f .
 430         p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
 431   #-}
 432
 433
 434 -- | Transform a Pipe of 'String's into one of 'Text' chunks
 435 pack :: Monad m => Pipe String Text m r
 436 pack = P.map T.pack
 437 {-# INLINEABLE pack #-}
 438
 439 {-# RULES "p >-> pack" forall p .
 440         p >-> pack = for p (\txt -> yield (T.pack txt))
 441   #-}
 442
 443 -- | Transform a Pipes of 'Text' chunks into one of 'String's
 444 unpack :: Monad m => Pipe Text String m r
 445 unpack = for cat (\t -> yield (T.unpack t))
 446 {-# INLINEABLE unpack #-}
 447
 448 {-# RULES "p >-> unpack" forall p .
 449         p >-> unpack = for p (\txt -> yield (T.unpack txt))
 450   #-}
 451
 452 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
 453 -- here acting as 'Text' pipes, rather as they would  on a lazy text
 454 toCaseFold :: Monad m => Pipe Text Text m r
 455 toCaseFold = P.map T.toCaseFold
 456 {-# INLINEABLE toCaseFold #-}
 457
 458 {-# RULES "p >-> toCaseFold" forall p .
 459         p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
 460   #-}
 461
 462
 463 -- | lowercase incoming 'Text'
 464 toLower :: Monad m => Pipe Text Text m r
 465 toLower = P.map T.toLower
 466 {-# INLINEABLE toLower #-}
 467
 468 {-# RULES "p >-> toLower" forall p .
 469         p >-> toLower = for p (\txt -> yield (T.toLower txt))
 470   #-}
 471
 472 -- | uppercase incoming 'Text'
 473 toUpper :: Monad m => Pipe Text Text m r
 474 toUpper = P.map T.toUpper
 475 {-# INLINEABLE toUpper #-}
 476
 477 {-# RULES "p >-> toUpper" forall p .
 478         p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
 479   #-}
 480
 481 -- | Remove leading white space from an incoming succession of 'Text's
 482 stripStart :: Monad m => Pipe Text Text m r
 483 stripStart = do
 484     chunk <- await
 485     let text = T.stripStart chunk
 486     if T.null text
 487       then stripStart
 488       else do yield text
 489               cat
 490 {-# INLINEABLE stripStart #-}
 491
 492 -- | @(take n)@ only allows @n@ individual characters to pass;
 493 --  contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
 494 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
 495 take n0 = go n0 where
 496     go n
 497         | n <= 0    = return ()
 498         | otherwise = do
 499             txt <- await
 500             let len = fromIntegral (T.length txt)
 501             if (len > n)
 502                 then yield (T.take (fromIntegral n) txt)
 503                 else do
 504                     yield txt
 505                     go (n - len)
 506 {-# INLINABLE take #-}
 507
 508 -- | @(drop n)@ drops the first @n@ characters
 509 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
 510 drop n0 = go n0 where
 511     go n
 512         | n <= 0    = cat
 513         | otherwise = do
 514             txt <- await
 515             let len = fromIntegral (T.length txt)
 516             if (len >= n)
 517                 then do
 518                     yield (T.drop (fromIntegral n) txt)
 519                     cat
 520                 else go (n - len)
 521 {-# INLINABLE drop #-}
 522
 523 -- | Take characters until they fail the predicate
 524 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
 525 takeWhile predicate = go
 526   where
 527     go = do
 528         txt <- await
 529         let (prefix, suffix) = T.span predicate txt
 530         if (T.null suffix)
 531             then do
 532                 yield txt
 533                 go
 534             else yield prefix
 535 {-# INLINABLE takeWhile #-}
 536
 537 -- | Drop characters until they fail the predicate
 538 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 539 dropWhile predicate = go where
 540     go = do
 541         txt <- await
 542         case T.findIndex (not . predicate) txt of
 543             Nothing -> go
 544             Just i -> do
 545                 yield (T.drop i txt)
 546                 cat
 547 {-# INLINABLE dropWhile #-}
 548
 549 -- | Only allows 'Char's to pass if they satisfy the predicate
 550 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 551 filter predicate = P.map (T.filter predicate)
 552 {-# INLINABLE filter #-}
 553
 554 {-# RULES "p >-> filter q" forall p q .
 555         p >-> filter q = for p (\txt -> yield (T.filter q txt))
 556   #-}
 557
 558 -- | Strict left scan over the characters
 559 scan
 560     :: (Monad m)
 561     => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
 562 scan step begin = do
 563     yield (T.singleton begin)
 564     go begin
 565   where
 566     go c = do
 567         txt <- await
 568         let txt' = T.scanl step c txt
 569             c' = T.last txt'
 570         yield (T.tail txt')
 571         go c'
 572 {-# INLINABLE scan #-}
 573
 574 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
 575     'TL.Text'
 576 -}
 577 toLazy :: Producer Text Identity () -> TL.Text
 578 toLazy = TL.fromChunks . P.toList
 579 {-# INLINABLE toLazy #-}
 580
 581 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
 582     'TL.Text'
 583
 584     Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
 585     simple testing purposes.  Idiomatic @pipes@ style consumes the chunks
 586     immediately as they are generated instead of loading them all into memory.
 587 -}
 588 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
 589 toLazyM = liftM TL.fromChunks . P.toListM
 590 {-# INLINABLE toLazyM #-}
 591
 592 -- | Reduce the text stream using a strict left fold over characters
 593 foldChars
 594     :: Monad m
 595     => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
 596 foldChars step begin done = P.fold (T.foldl' step) begin done
 597 {-# INLINABLE foldChars #-}
 598
 599 -- | Retrieve the first 'Char'
 600 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
 601 head = go
 602   where
 603     go p = do
 604         x <- nextChar p
 605         case x of
 606             Left   _      -> return  Nothing
 607             Right (c, _) -> return (Just c)
 608 {-# INLINABLE head #-}
 609
 610 -- | Retrieve the last 'Char'
 611 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
 612 last = go Nothing
 613   where
 614     go r p = do
 615         x <- next p
 616         case x of
 617             Left   ()      -> return r
 618             Right (txt, p') ->
 619                 if (T.null txt)
 620                 then go r p'
 621                 else go (Just $ T.last txt) p'
 622 {-# INLINABLE last #-}
 623
 624 -- | Determine if the stream is empty
 625 null :: (Monad m) => Producer Text m () -> m Bool
 626 null = P.all T.null
 627 {-# INLINABLE null #-}
 628
 629 -- | Count the number of characters in the stream
 630 length :: (Monad m, Num n) => Producer Text m () -> m n
 631 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
 632 {-# INLINABLE length #-}
 633
 634 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
 635 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 636 any predicate = P.any (T.any predicate)
 637 {-# INLINABLE any #-}
 638
 639 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
 640 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 641 all predicate = P.all (T.all predicate)
 642 {-# INLINABLE all #-}
 643
 644 -- | Return the maximum 'Char' within a text stream
 645 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 646 maximum = P.fold step Nothing id
 647   where
 648     step mc txt =
 649         if (T.null txt)
 650         then mc
 651         else Just $ case mc of
 652             Nothing -> T.maximum txt
 653             Just c -> max c (T.maximum txt)
 654 {-# INLINABLE maximum #-}
 655
 656 -- | Return the minimum 'Char' within a text stream (surely very useful!)
 657 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 658 minimum = P.fold step Nothing id
 659   where
 660     step mc txt =
 661         if (T.null txt)
 662         then mc
 663         else case mc of
 664             Nothing -> Just (T.minimum txt)
 665             Just c -> Just (min c (T.minimum txt))
 666 {-# INLINABLE minimum #-}
 667
 668 -- | Find the first element in the stream that matches the predicate
 669 find
 670     :: (Monad m)
 671     => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
 672 find predicate p = head (p >-> filter predicate)
 673 {-# INLINABLE find #-}
 674
 675 -- | Index into a text stream
 676 index
 677     :: (Monad m, Integral a)
 678     => a-> Producer Text m () -> m (Maybe Char)
 679 index n p = head (p >-> drop n)
 680 {-# INLINABLE index #-}
 681
 682
 683 -- | Store a tally of how many segments match the given 'Text'
 684 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
 685 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
 686 {-# INLINABLE count #-}
 687
 688
 689 -- | Consume the first character from a stream of 'Text'
 690 --
 691 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
 692 -- succeeds with a 'Right' providing the next character and the remainder of the
 693 -- 'Producer'.
 694
 695 nextChar
 696     :: (Monad m)
 697     => Producer Text m r
 698     -> m (Either r (Char, Producer Text m r))
 699 nextChar = go
 700   where
 701     go p = do
 702         x <- next p
 703         case x of
 704             Left   r       -> return (Left r)
 705             Right (txt, p') -> case (T.uncons txt) of
 706                 Nothing        -> go p'
 707                 Just (c, txt') -> return (Right (c, yield txt' >> p'))
 708 {-# INLINABLE nextChar #-}
 709
 710 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
 711
 712 drawChar :: (Monad m) => Parser Text m (Maybe Char)
 713 drawChar = do
 714     x <- PP.draw
 715     case x of
 716         Nothing  -> return Nothing
 717         Just txt -> case (T.uncons txt) of
 718             Nothing        -> drawChar
 719             Just (c, txt') -> do
 720                 PP.unDraw txt'
 721                 return (Just c)
 722 {-# INLINABLE drawChar #-}
 723
 724 -- | Push back a 'Char' onto the underlying 'Producer'
 725 unDrawChar :: (Monad m) => Char -> Parser Text m ()
 726 unDrawChar c = modify (yield (T.singleton c) >>)
 727 {-# INLINABLE unDrawChar #-}
 728
 729 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
 730     push the 'Char' back
 731
 732 > peekChar = do
 733 >     x <- drawChar
 734 >     case x of
 735 >         Left  _  -> return ()
 736 >         Right c -> unDrawChar c
 737 >     return x
 738
 739 -}
 740
 741 peekChar :: (Monad m) => Parser Text m (Maybe Char)
 742 peekChar = do
 743     x <- drawChar
 744     case x of
 745         Nothing  -> return ()
 746         Just c -> unDrawChar c
 747     return x
 748 {-# INLINABLE peekChar #-}
 749
 750 {-| Check if the underlying 'Producer' has no more characters
 751
 752     Note that this will skip over empty 'Text' chunks, unlike
 753     'PP.isEndOfInput' from @pipes-parse@, which would consider
 754     an empty 'Text' a valid bit of input.
 755
 756 > isEndOfChars = liftM isLeft peekChar
 757 -}
 758 isEndOfChars :: (Monad m) => Parser Text m Bool
 759 isEndOfChars = do
 760     x <- peekChar
 761     return (case x of
 762         Nothing -> True
 763         Just _-> False )
 764 {-# INLINABLE isEndOfChars #-}
 765
 766
 767 -- | Splits a 'Producer' after the given number of characters
 768 splitAt
 769     :: (Monad m, Integral n)
 770     => n
 771     -> Lens' (Producer Text m r)
 772              (Producer Text m (Producer Text m r))
 773 splitAt n0 k p0 = fmap join (k (go n0 p0))
 774   where
 775     go 0 p = return p
 776     go n p = do
 777         x <- lift (next p)
 778         case x of
 779             Left   r       -> return (return r)
 780             Right (txt, p') -> do
 781                 let len = fromIntegral (T.length txt)
 782                 if (len <= n)
 783                     then do
 784                         yield txt
 785                         go (n - len) p'
 786                     else do
 787                         let (prefix, suffix) = T.splitAt (fromIntegral n) txt
 788                         yield prefix
 789                         return (yield suffix >> p')
 790 {-# INLINABLE splitAt #-}
 791
 792
 793 -- | Split a text stream in two, producing the longest
 794 --   consecutive group of characters that satisfies the predicate
 795 --   and returning the rest
 796
 797 span
 798     :: (Monad m)
 799     => (Char -> Bool)
 800     -> Lens' (Producer Text m r)
 801              (Producer Text m (Producer Text m r))
 802 span predicate k p0 = fmap join (k (go p0))
 803   where
 804     go p = do
 805         x <- lift (next p)
 806         case x of
 807             Left   r       -> return (return r)
 808             Right (txt, p') -> do
 809                 let (prefix, suffix) = T.span predicate txt
 810                 if (T.null suffix)
 811                     then do
 812                         yield txt
 813                         go p'
 814                     else do
 815                         yield prefix
 816                         return (yield suffix >> p')
 817 {-# INLINABLE span #-}
 818
 819 {-| Split a text stream in two, producing the longest
 820     consecutive group of characters that don't satisfy the predicate
 821 -}
 822 break
 823     :: (Monad m)
 824     => (Char -> Bool)
 825     -> Lens' (Producer Text m r)
 826              (Producer Text m (Producer Text m r))
 827 break predicate = span (not . predicate)
 828 {-# INLINABLE break #-}
 829
 830 {-| Improper lens that splits after the first group of equivalent Chars, as
 831     defined by the given equivalence relation
 832 -}
 833 groupBy
 834     :: (Monad m)
 835     => (Char -> Char -> Bool)
 836     -> Lens' (Producer Text m r)
 837              (Producer Text m (Producer Text m r))
 838 groupBy equals k p0 = fmap join (k ((go p0))) where
 839     go p = do
 840         x <- lift (next p)
 841         case x of
 842             Left   r       -> return (return r)
 843             Right (txt, p') -> case T.uncons txt of
 844                 Nothing      -> go p'
 845                 Just (c, _) -> (yield txt >> p') ^. span (equals c)
 846 {-# INLINABLE groupBy #-}
 847
 848 -- | Improper lens that splits after the first succession of identical 'Char' s
 849 group :: Monad m
 850       => Lens' (Producer Text m r)
 851                (Producer Text m (Producer Text m r))
 852 group = groupBy (==)
 853 {-# INLINABLE group #-}
 854
 855 {-| Improper lens that splits a 'Producer' after the first word
 856
 857     Unlike 'words', this does not drop leading whitespace
 858 -}
 859 word :: (Monad m)
 860      => Lens' (Producer Text m r)
 861               (Producer Text m (Producer Text m r))
 862 word k p0 = fmap join (k (to p0))
 863   where
 864     to p = do
 865         p' <- p^.span isSpace
 866         p'^.break isSpace
 867 {-# INLINABLE word #-}
 868
 869
 870 line :: (Monad m)
 871      => Lens' (Producer Text m r)
 872               (Producer Text m (Producer Text m r))
 873 line = break (== '\n')
 874
 875 {-# INLINABLE line #-}
 876
 877
 878 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
 879 intersperse
 880     :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
 881 intersperse c = go0
 882   where
 883     go0 p = do
 884         x <- lift (next p)
 885         case x of
 886             Left   r       -> return r
 887             Right (txt, p') -> do
 888                 yield (T.intersperse c txt)
 889                 go1 p'
 890     go1 p = do
 891         x <- lift (next p)
 892         case x of
 893             Left   r       -> return r
 894             Right (txt, p') -> do
 895                 yield (T.singleton c)
 896                 yield (T.intersperse c txt)
 897                 go1 p'
 898 {-# INLINABLE intersperse #-}
 899
 900
 901
 902 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
 903 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
 904 packChars = Data.Profunctor.dimap to (fmap from)
 905   where
 906     -- to :: Monad m => Producer Char m x -> Producer Text m x
 907     to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
 908
 909     step diffAs c = diffAs . (c:)
 910
 911     done diffAs = T.pack (diffAs [])
 912
 913     -- from :: Monad m => Producer Text m x -> Producer Char m x
 914     from p = for p (each . T.unpack)
 915 {-# INLINABLE packChars #-}
 916
 917
 918 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
 919 chunksOf
 920     :: (Monad m, Integral n)
 921     => n -> Lens' (Producer Text m r)
 922                   (FreeT (Producer Text m) m r)
 923 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
 924   where
 925     go p = do
 926         x <- next p
 927         return $ case x of
 928             Left   r       -> Pure r
 929             Right (txt, p') -> Free $ do
 930                 p'' <- (yield txt >> p') ^. splitAt n
 931                 return $ FreeT (go p'')
 932 {-# INLINABLE chunksOf #-}
 933
 934
 935 {-| Split a text stream into sub-streams delimited by characters that satisfy the
 936     predicate
 937 -}
 938 splitsWith
 939     :: (Monad m)
 940     => (Char -> Bool)
 941     -> Producer Text m r
 942     -> FreeT (Producer Text m) m r
 943 splitsWith predicate p0 = FreeT (go0 p0)
 944   where
 945     go0 p = do
 946         x <- next p
 947         case x of
 948             Left   r       -> return (Pure r)
 949             Right (txt, p') ->
 950                 if (T.null txt)
 951                 then go0 p'
 952                 else return $ Free $ do
 953                     p'' <-  (yield txt >> p') ^. span (not . predicate)
 954                     return $ FreeT (go1 p'')
 955     go1 p = do
 956         x <- nextChar p
 957         return $ case x of
 958             Left   r      -> Pure r
 959             Right (_, p') -> Free $ do
 960                     p'' <- p' ^. span (not . predicate)
 961                     return $ FreeT (go1 p'')
 962 {-# INLINABLE splitsWith #-}
 963
 964 -- | Split a text stream using the given 'Char' as the delimiter
 965 splits :: (Monad m)
 966       => Char
 967       -> Lens' (Producer Text m r)
 968                (FreeT (Producer Text m) m r)
 969 splits c k p =
 970           fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
 971 {-# INLINABLE splits #-}
 972
 973 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
 974     given equivalence relation
 975 -}
 976 groupsBy
 977     :: Monad m
 978     => (Char -> Char -> Bool)
 979     -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
 980 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
 981   go p = do x <- next p
 982             case x of Left   r       -> return (Pure r)
 983                       Right (bs, p') -> case T.uncons bs of
 984                              Nothing      -> go p'
 985                              Just (c, _) -> do return $ Free $ do
 986                                                  p'' <- (yield bs >> p')^.span (equals c)
 987                                                  return $ FreeT (go p'')
 988 {-# INLINABLE groupsBy #-}
 989
 990
 991 -- | Like 'groupsBy', where the equality predicate is ('==')
 992 groups
 993     :: Monad m
 994     => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
 995 groups = groupsBy (==)
 996 {-# INLINABLE groups #-}
 997
 998
 999
1000 {-| Split a text stream into 'FreeT'-delimited lines
1001 -}
1002 lines
1003     :: (Monad m) => Iso' (Producer Text m r)  (FreeT (Producer Text m) m r)
1004 lines = Data.Profunctor.dimap _lines (fmap _unlines)
1005   where
1006   _lines p0 = FreeT (go0 p0)
1007     where
1008       go0 p = do
1009               x <- next p
1010               case x of
1011                   Left   r       -> return (Pure r)
1012                   Right (txt, p') ->
1013                       if (T.null txt)
1014                       then go0 p'
1015                       else return $ Free $ go1 (yield txt >> p')
1016       go1 p = do
1017               p' <- p ^. break ('\n' ==)
1018               return $ FreeT $ do
1019                   x  <- nextChar p'
1020                   case x of
1021                       Left   r      -> return $ Pure r
1022                       Right (_, p'') -> go0 p''
1023   -- _unlines
1024   --     :: Monad m
1025   --      => FreeT (Producer Text m) m x -> Producer Text m x
1026   _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1027
1028
1029 {-# INLINABLE lines #-}
1030
1031
1032 -- | Split a text stream into 'FreeT'-delimited words
1033 words
1034     :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
1035 words = Data.Profunctor.dimap go (fmap _unwords)
1036   where
1037     go p = FreeT $ do
1038         x <- next (p >-> dropWhile isSpace)
1039         return $ case x of
1040             Left   r       -> Pure r
1041             Right (bs, p') -> Free $ do
1042                 p'' <-  (yield bs >> p') ^. break isSpace
1043                 return (go p'')
1044     _unwords = PG.intercalates (yield $ T.singleton ' ')
1045
1046 {-# INLINABLE words #-}
1047
1048
1049 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1050     interspersing a text stream in between them
1051 -}
1052 intercalate
1053     :: (Monad m)
1054     => Producer Text m ()
1055     -> FreeT (Producer Text m) m r
1056     -> Producer Text m r
1057 intercalate p0 = go0
1058   where
1059     go0 f = do
1060         x <- lift (runFreeT f)
1061         case x of
1062             Pure r -> return r
1063             Free p -> do
1064                 f' <- p
1065                 go1 f'
1066     go1 f = do
1067         x <- lift (runFreeT f)
1068         case x of
1069             Pure r -> return r
1070             Free p -> do
1071                 p0
1072                 f' <- p
1073                 go1 f'
1074 {-# INLINABLE intercalate #-}
1075
1076 {-| Join 'FreeT'-delimited lines into a text stream
1077 -}
1078 unlines
1079     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1080 unlines = go
1081   where
1082     go f = do
1083         x <- lift (runFreeT f)
1084         case x of
1085             Pure r -> return r
1086             Free p -> do
1087                 f' <- p
1088                 yield $ T.singleton '\n'
1089                 go f'
1090 {-# INLINABLE unlines #-}
1091
1092 {-| Join 'FreeT'-delimited words into a text stream
1093 -}
1094 unwords
1095     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1096 unwords = intercalate (yield $ T.singleton ' ')
1097 {-# INLINABLE unwords #-}
1098
1099
1100 {- $reexports
1101
1102     @Data.Text@ re-exports the 'Text' type.
1103
1104     @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1105 -}
1106
1107