Pipes/Text.hs

   1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
   2
   3
   4 module Pipes.Text  (
   5     -- * Effectful Text
   6     -- $intro
   7
   8     -- * Lenses
   9     -- $lenses
  10
  11     -- ** @view@ \/ @(^.)@
  12     -- $view
  13
  14     -- ** @over@ \/ @(%~)@
  15     -- $over
  16
  17     -- ** @zoom@
  18     -- $zoom
  19
  20     -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
  21     -- $special
  22
  23     -- * Producers
  24     fromLazy
  25
  26     -- * Pipes
  27     , map
  28     , concatMap
  29     , take
  30     , drop
  31     , takeWhile
  32     , dropWhile
  33     , filter
  34     , scan
  35     , pack
  36     , unpack
  37     , toCaseFold
  38     , toLower
  39     , toUpper
  40     , stripStart
  41
  42     -- * Folds
  43     , toLazy
  44     , toLazyM
  45     , foldChars
  46     , head
  47     , last
  48     , null
  49     , length
  50     , any
  51     , all
  52     , maximum
  53     , minimum
  54     , find
  55     , index
  56     , count
  57
  58     -- * Primitive Character Parsers
  59     , nextChar
  60     , drawChar
  61     , unDrawChar
  62     , peekChar
  63     , isEndOfChars
  64
  65     -- * Parsing Lenses
  66     , splitAt
  67     , span
  68     , break
  69     , groupBy
  70     , group
  71     , word
  72     , line
  73
  74     -- * FreeT Splitters
  75     , chunksOf
  76     , splitsWith
  77     , splits
  78     , groupsBy
  79     , groups
  80     , lines
  81     , words
  82
  83     -- * Transformations
  84     , intersperse
  85     , packChars
  86
  87     -- * Joiners
  88     , intercalate
  89     , unlines
  90     , unwords
  91
  92     -- * Re-exports
  93     -- $reexports
  94     , module Data.ByteString
  95     , module Data.Text
  96     , module Data.Profunctor
  97     , module Pipes.Parse
  98     , module Pipes.Group
  99     ) where
 100
 101 import Control.Applicative ((<*))
 102 import Control.Monad (liftM, join)
 103 import Control.Monad.Trans.State.Strict (StateT(..), modify)
 104 import qualified Data.Text as T
 105 import Data.Text (Text)
 106 import qualified Data.Text.Lazy as TL
 107 import Data.ByteString (ByteString)
 108 import Data.Functor.Constant (Constant(Constant, getConstant))
 109 import Data.Functor.Identity (Identity)
 110 import Data.Profunctor (Profunctor)
 111 import qualified Data.Profunctor
 112 import Pipes
 113 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
 114 import qualified Pipes.Group as PG
 115 import qualified Pipes.Parse as PP
 116 import Pipes.Parse (Parser)
 117 import Pipes.Text.Encoding (Lens'_, Iso'_)
 118 import qualified Pipes.Prelude as P
 119 import Data.Char (isSpace)
 120 import Data.Word (Word8)
 121 import Foreign.Storable (sizeOf)
 122 import Data.Bits (shiftL)
 123 import Prelude hiding (
 124     all,
 125     any,
 126     break,
 127     concat,
 128     concatMap,
 129     drop,
 130     dropWhile,
 131     elem,
 132     filter,
 133     head,
 134     last,
 135     lines,
 136     length,
 137     map,
 138     maximum,
 139     minimum,
 140     notElem,
 141     null,
 142     readFile,
 143     span,
 144     splitAt,
 145     take,
 146     takeWhile,
 147     unlines,
 148     unwords,
 149     words,
 150     writeFile )
 151
 152 {- $intro
 153     This package provides @pipes@ utilities for /text streams/ or /character streams/,
 154     realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
 155     and thus you will generally want @Data.Text@ in scope.  But the type
 156     @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
 157
 158     This particular module provides many functions equivalent in one way or another to
 159     the pure functions in
 160     <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
 161     They transform, divide, group and fold text streams. Though @Producer Text m r@
 162     is the type of \'effectful Text\', the functions in this module are \'pure\'
 163     in the sense that they are uniformly monad-independent.
 164     Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
 165     operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
 166     is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
 167
 168     The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
 169     strict Text: the implementation is arranged so that the individual strict 'Text'
 170     chunks are kept to a reasonable size; the user is not aware of the divisions
 171     between the connected 'Text' chunks.
 172     So also here: the functions in this module are designed to operate on streams that
 173     are insensitive to text boundaries. This means that they may freely split
 174     text into smaller texts and /discard empty texts/.  The objective, though, is
 175     that they should /never concatenate texts/ in order to provide strict upper
 176     bounds on memory usage.
 177
 178     For example, to stream only the first three lines of 'stdin' to 'stdout' you
 179     might write:
 180
 181 > import Pipes
 182 > import qualified Pipes.Text as Text
 183 > import qualified Pipes.Text.IO as Text
 184 > import Pipes.Group (takes')
 185 > import Lens.Family
 186 >
 187 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
 188 >   where
 189 >     takeLines n = Text.unlines . takes' n . view Text.lines
 190
 191     The above program will never bring more than one chunk of text (~ 32 KB) into
 192     memory, no matter how long the lines are.
 193
 194 -}
 195 {- $lenses
 196     As this example shows, one superficial difference from @Data.Text.Lazy@
 197     is that many of the operations, like 'lines', are \'lensified\'; this has a
 198     number of advantages (where it is possible); in particular it facilitates their
 199     use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
 200     sense.) The disadvantage, famously, is that the messages you get for type errors can be
 201     a little alarming. The remarks that follow in this section are for non-lens adepts.
 202
 203     Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
 204     intuitively corresponding function when used with @view@ or @(^.)@. Instead of
 205     writing:
 206
 207     > splitAt 17 producer
 208
 209     as we would with the Prelude or Text functions, we write
 210
 211     > view (splitAt 17) producer
 212
 213     or equivalently
 214
 215     > producer ^. splitAt 17
 216
 217     This may seem a little indirect, but note that many equivalents of
 218     @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
 219     corresponding functions by prefixing them with @(>->)@. Thus something like
 220
 221 >  stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
 222
 223     would drop the leading white space from each line.
 224
 225     The lenses in this library are marked as /improper/; this just means that
 226     they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
 227     Just for this reason, though, the magnificent complexities of the lens libraries
 228     are a distraction. The lens combinators to keep in mind, the ones that make sense for
 229     our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
 230
 231     One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
 232
 233 -}
 234 {- $view
 235     @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
 236     is the corresponding @b@; as was said above, this function will be exactly the
 237     function you think it is, given its name. Thus to uppercase the first n characters
 238     of a Producer, leaving the rest the same, we could write:
 239
 240
 241     > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
 242     >                p'
 243 -}
 244 {- $over
 245     @over l@ is a function @(b -> b) -> a -> a@.  Thus, given a function that modifies
 246     @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
 247     the @b@ that we can \"see\" through the lens. So  @over l f :: a -> a@
 248     (it can also be written @l %~ f@).
 249     For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
 250     So above we might have written things like these:
 251
 252     > stripLines = Text.lines %~ maps (>-> Text.stripStart)
 253     > stripLines = over Text.lines (maps (>-> Text.stripStart))
 254     > upper n    =  Text.splitAt n %~ (>-> Text.toUpper)
 255
 256 -}
 257 {- $zoom
 258     @zoom l@, finally, is a function from a @Parser b m r@
 259     to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
 260     Its use is easiest to see with an decoding lens like 'utf8', which
 261     \"sees\" a Text producer hidden inside a ByteString producer:
 262     @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
 263     a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
 264     a list of everything produced from a Producer, leaving only the return value; it would
 265     usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
 266     returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
 267     the Text Producer for further parsing. Suppose that we want, inexplicably, to
 268     modify the casing of a Text Producer according to any instruction it might
 269     contain at the start. Then we might write something like this:
 270
 271 >     obey :: Monad m => Producer Text m b -> Producer Text m b
 272 >     obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
 273 >                 let seven = T.concat ts
 274 >                 case T.toUpper seven of
 275 >                    "TOUPPER" -> p' >-> Text.toUpper
 276 >                    "TOLOWER" -> p' >-> Text.toLower
 277 >                    _         -> do yield seven
 278 >                                    p'
 279
 280
 281 > >>> let doc = each ["toU","pperTh","is document.\n"]
 282 > >>> runEffect $ obey doc >-> Text.stdout
 283 > THIS DOCUMENT.
 284
 285     The purpose of exporting lenses is the mental economy achieved with this three-way
 286     applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
 287     three uses is no more surprising than that a pipe can act as a function modifying
 288     the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
 289     -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
 290     @pipe >-> consumer@
 291
 292     The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
 293     both <http://hackage.haskell.org/package/lens lens> and
 294     <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
 295     in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
 296     and to some extent in the @Pipes.Text.Encoding@ module here.
 297
 298 -}
 299 {- $special
 300     These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
 301     This is in the types that are most closely associated with our central text type,
 302     @Producer Text m r@.  In @Data.Text@ and @Data.Text.Lazy@ we find functions like
 303
 304 >   splitAt  :: Int -> Text -> (Text, Text)
 305 >   lines    ::        Text -> [Text]
 306 >   chunksOf :: Int -> Text -> [Text]
 307
 308     which relate a Text with a pair of Texts or a list of Texts.
 309     The corresponding functions here (taking account of \'lensification\') are
 310
 311 >   view . splitAt  :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
 312 >   view lines      :: Monad m               =>      Producer Text m r -> FreeT (Producer Text m) m r
 313 >   view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
 314
 315     Some of the types may be more readable if you imagine that we have introduced
 316     our own type synonyms
 317
 318 >   type Text m r  = Producer T.Text m r
 319 >   type Texts m r = FreeT (Producer T.Text m) m r
 320
 321     Then we would think of the types above as
 322
 323 >   view . splitAt  :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
 324 >   view lines      :: (Monad m)             =>      Text m r -> Texts m r
 325 >   view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
 326
 327     which brings one closer to the types of the similar functions in @Data.Text.Lazy@
 328
 329     In the type @Producer Text m (Producer Text m r)@ the second
 330     element of the \'pair\' of effectful Texts cannot simply be retrieved
 331     with something like 'snd'. This is an \'effectful\' pair, and one must work
 332     through the effects of the first element to arrive at the second Text stream, even
 333     if you are proposing to throw the Text in the first element away.
 334     Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
 335
 336 >    join :: Monad m => Producer Text m (Producer m r) -> Producer m r
 337
 338     The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
 339     @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
 340     lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
 341     together things of the forms:
 342
 343 > r
 344 > (Text,r)
 345 > (Text, (Text, r))
 346 > (Text, (Text, (Text, r)))
 347 > (Text, (Text, (Text, (Text, r))))
 348 > ...
 349
 350     (We might also have identified the sum of those types with @Free ((,) Text) r@
 351     -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
 352
 353     Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
 354     @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
 355
 356 > m r
 357 > Text m r
 358 > Text m (Text m r)
 359 > Text m (Text m (Text m r))
 360 > Text m (Text m (Text m (Text m r)))
 361 > ...
 362
 363     We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
 364     or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
 365     result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
 366
 367     One might think that
 368
 369 >   lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
 370 >   view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
 371
 372     should really have the type
 373
 374 >   lines :: Monad m => Pipe Text Text m r
 375
 376     as e.g. 'toUpper' does. But this would spoil the control we are
 377     attempting to maintain over the size of chunks. It is in fact just
 378     as unreasonable to want such a pipe as to want
 379
 380 > Data.Text.Lazy.lines :: Text -> Text
 381
 382     to 'rechunk' the strict Text chunks inside the lazy Text to respect
 383     line boundaries. In fact we have
 384
 385 > Data.Text.Lazy.lines :: Text -> [Text]
 386 > Prelude.lines :: String -> [String]
 387
 388     where the elements of the list are themselves lazy Texts or Strings; the use
 389     of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
 390
 391     The @Pipes.Group@ module, which can generally be imported without qualification,
 392     provides many functions for working with things of type @FreeT (Producer a m) m r@.
 393     In particular it conveniently exports the constructors for @FreeT@ and the associated
 394     @FreeF@ type -- a fancy form of @Either@, namely
 395
 396 > data FreeF f a b = Pure a | Free (f b)
 397
 398     for pattern-matching. Consider the implementation of the 'words' function, or
 399     of the part of the lens that takes us to the words; it is compact but exhibits many
 400     of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
 401     constuctors.  Keep in mind that
 402
 403 >  newtype FreeT f m a  = FreeT (m (FreeF f a (FreeT f m a)))
 404 >  next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
 405
 406    Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
 407    the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
 408
 409 > words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
 410 > words p = FreeT $ do                   -- With 'next' we will inspect p's first chunk, excluding spaces;
 411 >   x <- next (p >-> dropWhile isSpace)  --   note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
 412 >   return $ case x of                   -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
 413 >     Left   r       -> Pure r           -- 'Left' means we got no Text chunk, but only the return value; so we are done.
 414 >     Right (txt, p') -> Free $ do       -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
 415 >         p'' <- view (break isSpace)    -- When we apply 'break isSpace', we get a Producer that returns a Producer;
 416 >                     (yield txt >> p')  --   so here we yield everything up to the next space, and get the rest back.
 417 >         return (words p'')             -- We then carry on with the rest, which is likely to begin with space.
 418
 419 -}
 420
 421 type Lens s t a b = forall f . Functor f => (a -> f b) -> (s -> f t)
 422
 423
 424 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
 425 a ^. lens = getConstant (lens Constant a)
 426
 427 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
 428 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
 429 fromLazy  = TL.foldrChunks (\e a -> yield e >> a) (return ())
 430 {-# INLINE fromLazy #-}
 431
 432
 433
 434 -- | Apply a transformation to each 'Char' in the stream
 435 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
 436 map f = P.map (T.map f)
 437 {-# INLINABLE map #-}
 438
 439 {-# RULES "p >-> map f" forall p f .
 440         p >-> map f = for p (\txt -> yield (T.map f txt))
 441   #-}
 442
 443 -- | Map a function over the characters of a text stream and concatenate the results
 444 concatMap
 445     :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
 446 concatMap f = P.map (T.concatMap f)
 447 {-# INLINABLE concatMap #-}
 448
 449 {-# RULES "p >-> concatMap f" forall p f .
 450         p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
 451   #-}
 452
 453
 454 -- | Transform a Pipe of 'String's into one of 'Text' chunks
 455 pack :: Monad m => Pipe String Text m r
 456 pack = P.map T.pack
 457 {-# INLINEABLE pack #-}
 458
 459 {-# RULES "p >-> pack" forall p .
 460         p >-> pack = for p (\txt -> yield (T.pack txt))
 461   #-}
 462
 463 -- | Transform a Pipes of 'Text' chunks into one of 'String's
 464 unpack :: Monad m => Pipe Text String m r
 465 unpack = for cat (\t -> yield (T.unpack t))
 466 {-# INLINEABLE unpack #-}
 467
 468 {-# RULES "p >-> unpack" forall p .
 469         p >-> unpack = for p (\txt -> yield (T.unpack txt))
 470   #-}
 471
 472 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
 473 -- here acting as 'Text' pipes, rather as they would  on a lazy text
 474 toCaseFold :: Monad m => Pipe Text Text m r
 475 toCaseFold = P.map T.toCaseFold
 476 {-# INLINEABLE toCaseFold #-}
 477
 478 {-# RULES "p >-> toCaseFold" forall p .
 479         p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
 480   #-}
 481
 482
 483 -- | lowercase incoming 'Text'
 484 toLower :: Monad m => Pipe Text Text m r
 485 toLower = P.map T.toLower
 486 {-# INLINEABLE toLower #-}
 487
 488 {-# RULES "p >-> toLower" forall p .
 489         p >-> toLower = for p (\txt -> yield (T.toLower txt))
 490   #-}
 491
 492 -- | uppercase incoming 'Text'
 493 toUpper :: Monad m => Pipe Text Text m r
 494 toUpper = P.map T.toUpper
 495 {-# INLINEABLE toUpper #-}
 496
 497 {-# RULES "p >-> toUpper" forall p .
 498         p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
 499   #-}
 500
 501 -- | Remove leading white space from an incoming succession of 'Text's
 502 stripStart :: Monad m => Pipe Text Text m r
 503 stripStart = do
 504     chunk <- await
 505     let text = T.stripStart chunk
 506     if T.null text
 507       then stripStart
 508       else do yield text
 509               cat
 510 {-# INLINEABLE stripStart #-}
 511
 512 -- | @(take n)@ only allows @n@ individual characters to pass;
 513 --  contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
 514 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
 515 take n0 = go n0 where
 516     go n
 517         | n <= 0    = return ()
 518         | otherwise = do
 519             txt <- await
 520             let len = fromIntegral (T.length txt)
 521             if (len > n)
 522                 then yield (T.take (fromIntegral n) txt)
 523                 else do
 524                     yield txt
 525                     go (n - len)
 526 {-# INLINABLE take #-}
 527
 528 -- | @(drop n)@ drops the first @n@ characters
 529 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
 530 drop n0 = go n0 where
 531     go n
 532         | n <= 0    = cat
 533         | otherwise = do
 534             txt <- await
 535             let len = fromIntegral (T.length txt)
 536             if (len >= n)
 537                 then do
 538                     yield (T.drop (fromIntegral n) txt)
 539                     cat
 540                 else go (n - len)
 541 {-# INLINABLE drop #-}
 542
 543 -- | Take characters until they fail the predicate
 544 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
 545 takeWhile predicate = go
 546   where
 547     go = do
 548         txt <- await
 549         let (prefix, suffix) = T.span predicate txt
 550         if (T.null suffix)
 551             then do
 552                 yield txt
 553                 go
 554             else yield prefix
 555 {-# INLINABLE takeWhile #-}
 556
 557 -- | Drop characters until they fail the predicate
 558 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 559 dropWhile predicate = go where
 560     go = do
 561         txt <- await
 562         case T.findIndex (not . predicate) txt of
 563             Nothing -> go
 564             Just i -> do
 565                 yield (T.drop i txt)
 566                 cat
 567 {-# INLINABLE dropWhile #-}
 568
 569 -- | Only allows 'Char's to pass if they satisfy the predicate
 570 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 571 filter predicate = P.map (T.filter predicate)
 572 {-# INLINABLE filter #-}
 573
 574 {-# RULES "p >-> filter q" forall p q .
 575         p >-> filter q = for p (\txt -> yield (T.filter q txt))
 576   #-}
 577
 578 -- | Strict left scan over the characters
 579 scan
 580     :: (Monad m)
 581     => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
 582 scan step begin = do
 583     yield (T.singleton begin)
 584     go begin
 585   where
 586     go c = do
 587         txt <- await
 588         let txt' = T.scanl step c txt
 589             c' = T.last txt'
 590         yield (T.tail txt')
 591         go c'
 592 {-# INLINABLE scan #-}
 593
 594 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
 595     'TL.Text'
 596 -}
 597 toLazy :: Producer Text Identity () -> TL.Text
 598 toLazy = TL.fromChunks . P.toList
 599 {-# INLINABLE toLazy #-}
 600
 601 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
 602     'TL.Text'
 603
 604     Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
 605     simple testing purposes.  Idiomatic @pipes@ style consumes the chunks
 606     immediately as they are generated instead of loading them all into memory.
 607 -}
 608 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
 609 toLazyM = liftM TL.fromChunks . P.toListM
 610 {-# INLINABLE toLazyM #-}
 611
 612 -- | Reduce the text stream using a strict left fold over characters
 613 foldChars
 614     :: Monad m
 615     => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
 616 foldChars step begin done = P.fold (T.foldl' step) begin done
 617 {-# INLINABLE foldChars #-}
 618
 619 -- | Retrieve the first 'Char'
 620 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
 621 head = go
 622   where
 623     go p = do
 624         x <- nextChar p
 625         case x of
 626             Left   _      -> return  Nothing
 627             Right (c, _) -> return (Just c)
 628 {-# INLINABLE head #-}
 629
 630 -- | Retrieve the last 'Char'
 631 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
 632 last = go Nothing
 633   where
 634     go r p = do
 635         x <- next p
 636         case x of
 637             Left   ()      -> return r
 638             Right (txt, p') ->
 639                 if (T.null txt)
 640                 then go r p'
 641                 else go (Just $ T.last txt) p'
 642 {-# INLINABLE last #-}
 643
 644 -- | Determine if the stream is empty
 645 null :: (Monad m) => Producer Text m () -> m Bool
 646 null = P.all T.null
 647 {-# INLINABLE null #-}
 648
 649 -- | Count the number of characters in the stream
 650 length :: (Monad m, Num n) => Producer Text m () -> m n
 651 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
 652 {-# INLINABLE length #-}
 653
 654 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
 655 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 656 any predicate = P.any (T.any predicate)
 657 {-# INLINABLE any #-}
 658
 659 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
 660 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 661 all predicate = P.all (T.all predicate)
 662 {-# INLINABLE all #-}
 663
 664 -- | Return the maximum 'Char' within a text stream
 665 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 666 maximum = P.fold step Nothing id
 667   where
 668     step mc txt =
 669         if (T.null txt)
 670         then mc
 671         else Just $ case mc of
 672             Nothing -> T.maximum txt
 673             Just c -> max c (T.maximum txt)
 674 {-# INLINABLE maximum #-}
 675
 676 -- | Return the minimum 'Char' within a text stream (surely very useful!)
 677 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 678 minimum = P.fold step Nothing id
 679   where
 680     step mc txt =
 681         if (T.null txt)
 682         then mc
 683         else case mc of
 684             Nothing -> Just (T.minimum txt)
 685             Just c -> Just (min c (T.minimum txt))
 686 {-# INLINABLE minimum #-}
 687
 688 -- | Find the first element in the stream that matches the predicate
 689 find
 690     :: (Monad m)
 691     => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
 692 find predicate p = head (p >-> filter predicate)
 693 {-# INLINABLE find #-}
 694
 695 -- | Index into a text stream
 696 index
 697     :: (Monad m, Integral a)
 698     => a-> Producer Text m () -> m (Maybe Char)
 699 index n p = head (p >-> drop n)
 700 {-# INLINABLE index #-}
 701
 702
 703 -- | Store a tally of how many segments match the given 'Text'
 704 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
 705 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
 706 {-# INLINABLE count #-}
 707
 708
 709 -- | Consume the first character from a stream of 'Text'
 710 --
 711 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
 712 -- succeeds with a 'Right' providing the next character and the remainder of the
 713 -- 'Producer'.
 714
 715 nextChar
 716     :: (Monad m)
 717     => Producer Text m r
 718     -> m (Either r (Char, Producer Text m r))
 719 nextChar = go
 720   where
 721     go p = do
 722         x <- next p
 723         case x of
 724             Left   r       -> return (Left r)
 725             Right (txt, p') -> case (T.uncons txt) of
 726                 Nothing        -> go p'
 727                 Just (c, txt') -> return (Right (c, yield txt' >> p'))
 728 {-# INLINABLE nextChar #-}
 729
 730 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
 731
 732 drawChar :: (Monad m) => Parser Text m (Maybe Char)
 733 drawChar = do
 734     x <- PP.draw
 735     case x of
 736         Nothing  -> return Nothing
 737         Just txt -> case (T.uncons txt) of
 738             Nothing        -> drawChar
 739             Just (c, txt') -> do
 740                 PP.unDraw txt'
 741                 return (Just c)
 742 {-# INLINABLE drawChar #-}
 743
 744 -- | Push back a 'Char' onto the underlying 'Producer'
 745 unDrawChar :: (Monad m) => Char -> Parser Text m ()
 746 unDrawChar c = modify (yield (T.singleton c) >>)
 747 {-# INLINABLE unDrawChar #-}
 748
 749 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
 750     push the 'Char' back
 751
 752 > peekChar = do
 753 >     x <- drawChar
 754 >     case x of
 755 >         Left  _  -> return ()
 756 >         Right c -> unDrawChar c
 757 >     return x
 758
 759 -}
 760
 761 peekChar :: (Monad m) => Parser Text m (Maybe Char)
 762 peekChar = do
 763     x <- drawChar
 764     case x of
 765         Nothing  -> return ()
 766         Just c -> unDrawChar c
 767     return x
 768 {-# INLINABLE peekChar #-}
 769
 770 {-| Check if the underlying 'Producer' has no more characters
 771
 772     Note that this will skip over empty 'Text' chunks, unlike
 773     'PP.isEndOfInput' from @pipes-parse@, which would consider
 774     an empty 'Text' a valid bit of input.
 775
 776 > isEndOfChars = liftM isLeft peekChar
 777 -}
 778 isEndOfChars :: (Monad m) => Parser Text m Bool
 779 isEndOfChars = do
 780     x <- peekChar
 781     return (case x of
 782         Nothing -> True
 783         Just _-> False )
 784 {-# INLINABLE isEndOfChars #-}
 785
 786
 787 -- | Splits a 'Producer' after the given number of characters
 788 splitAt
 789     :: (Monad m, Integral n)
 790     => n
 791     -> Lens (Producer Text m x)
 792             (Producer Text m y)
 793             (Producer Text m (Producer Text m x))
 794             (Producer Text m (Producer Text m y))
 795 splitAt n0 k p0 = fmap join (k (go n0 p0))
 796   where
 797     go 0 p = return p
 798     go n p = do
 799         x <- lift (next p)
 800         case x of
 801             Left   r       -> return (return r)
 802             Right (txt, p') -> do
 803                 let len = fromIntegral (T.length txt)
 804                 if (len <= n)
 805                     then do
 806                         yield txt
 807                         go (n - len) p'
 808                     else do
 809                         let (prefix, suffix) = T.splitAt (fromIntegral n) txt
 810                         yield prefix
 811                         return (yield suffix >> p')
 812 {-# INLINABLE splitAt #-}
 813
 814
 815 -- | Split a text stream in two, producing the longest
 816 --   consecutive group of characters that satisfies the predicate
 817 --   and returning the rest
 818
 819 span
 820     :: (Monad m)
 821     => (Char -> Bool)
 822     -> Lens (Producer Text m x)
 823             (Producer Text m y)
 824             (Producer Text m (Producer Text m x))
 825             (Producer Text m (Producer Text m y))
 826 span predicate k p0 = fmap join (k (go p0))
 827   where
 828     go p = do
 829         x <- lift (next p)
 830         case x of
 831             Left   r       -> return (return r)
 832             Right (txt, p') -> do
 833                 let (prefix, suffix) = T.span predicate txt
 834                 if (T.null suffix)
 835                     then do
 836                         yield txt
 837                         go p'
 838                     else do
 839                         yield prefix
 840                         return (yield suffix >> p')
 841 {-# INLINABLE span #-}
 842
 843 {-| Split a text stream in two, producing the longest
 844     consecutive group of characters that don't satisfy the predicate
 845 -}
 846 break
 847     :: (Monad m)
 848     => (Char -> Bool)
 849     -> Lens (Producer Text m x)
 850             (Producer Text m y)
 851             (Producer Text m (Producer Text m x))
 852             (Producer Text m (Producer Text m y))
 853 break predicate = span (not . predicate)
 854 {-# INLINABLE break #-}
 855
 856 {-| Improper lens that splits after the first group of equivalent Chars, as
 857     defined by the given equivalence relation
 858 -}
 859 groupBy
 860     :: (Monad m)
 861     => (Char -> Char -> Bool)
 862     -> Lens (Producer Text m x)
 863             (Producer Text m y)
 864             (Producer Text m (Producer Text m x))
 865             (Producer Text m (Producer Text m y))
 866 groupBy equals k p0 = fmap join (k ((go p0))) where
 867     go p = do
 868         x <- lift (next p)
 869         case x of
 870             Left   r       -> return (return r)
 871             Right (txt, p') -> case T.uncons txt of
 872                 Nothing      -> go p'
 873                 Just (c, _) -> (yield txt >> p') ^. span (equals c)
 874 {-# INLINABLE groupBy #-}
 875
 876 -- | Improper lens that splits after the first succession of identical 'Char' s
 877 group :: Monad m
 878       => Lens'_ (Producer Text m r)
 879                (Producer Text m (Producer Text m r))
 880 group = groupBy (==)
 881 {-# INLINABLE group #-}
 882
 883 {-| Improper lens that splits a 'Producer' after the first word
 884
 885     Unlike 'words', this does not drop leading whitespace
 886 -}
 887 word :: (Monad m)
 888      => Lens (Producer Text m x)
 889              (Producer Text m y)
 890              (Producer Text m (Producer Text m x))
 891              (Producer Text m (Producer Text m y))
 892 word k p0 = fmap join (k (to p0))
 893   where
 894     to p = do
 895         p' <- p^.span isSpace
 896         p'^.break isSpace
 897 {-# INLINABLE word #-}
 898
 899
 900 line :: (Monad m)
 901      => Lens (Producer Text m x)
 902              (Producer Text m y)
 903              (Producer Text m (Producer Text m x))
 904              (Producer Text m (Producer Text m y))
 905 line = break (== '\n')
 906
 907 {-# INLINABLE line #-}
 908
 909
 910 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
 911 intersperse
 912     :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
 913 intersperse c = go0
 914   where
 915     go0 p = do
 916         x <- lift (next p)
 917         case x of
 918             Left   r       -> return r
 919             Right (txt, p') -> do
 920                 yield (T.intersperse c txt)
 921                 go1 p'
 922     go1 p = do
 923         x <- lift (next p)
 924         case x of
 925             Left   r       -> return r
 926             Right (txt, p') -> do
 927                 yield (T.singleton c)
 928                 yield (T.intersperse c txt)
 929                 go1 p'
 930 {-# INLINABLE intersperse #-}
 931
 932
 933
 934 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
 935 packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
 936 packChars = Data.Profunctor.dimap to (fmap from)
 937   where
 938     -- to :: Monad m => Producer Char m x -> Producer Text m x
 939     to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
 940
 941     step diffAs c = diffAs . (c:)
 942
 943     done diffAs = T.pack (diffAs [])
 944
 945     -- from :: Monad m => Producer Text m x -> Producer Char m x
 946     from p = for p (each . T.unpack)
 947
 948 {-# INLINABLE packChars #-}
 949
 950 defaultChunkSize :: Int
 951 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
 952
 953 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
 954 chunksOf
 955     :: (Monad m, Integral n)
 956     => n -> Lens (Producer Text m x)
 957                  (Producer Text m y)
 958                  (FreeT (Producer Text m) m x)
 959                  (FreeT (Producer Text m) m y)
 960 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
 961   where
 962     go p = do
 963         x <- next p
 964         return $ case x of
 965             Left   r       -> Pure r
 966             Right (txt, p') -> Free $ do
 967                 p'' <- (yield txt >> p') ^. splitAt n
 968                 return $ FreeT (go p'')
 969 {-# INLINABLE chunksOf #-}
 970
 971
 972 {-| Split a text stream into sub-streams delimited by characters that satisfy the
 973     predicate
 974 -}
 975 splitsWith
 976     :: (Monad m)
 977     => (Char -> Bool)
 978     -> Producer Text m r
 979     -> FreeT (Producer Text m) m r
 980 splitsWith predicate p0 = FreeT (go0 p0)
 981   where
 982     go0 p = do
 983         x <- next p
 984         case x of
 985             Left   r       -> return (Pure r)
 986             Right (txt, p') ->
 987                 if (T.null txt)
 988                 then go0 p'
 989                 else return $ Free $ do
 990                     p'' <-  (yield txt >> p') ^. span (not . predicate)
 991                     return $ FreeT (go1 p'')
 992     go1 p = do
 993         x <- nextChar p
 994         return $ case x of
 995             Left   r      -> Pure r
 996             Right (_, p') -> Free $ do
 997                     p'' <- p' ^. span (not . predicate)
 998                     return $ FreeT (go1 p'')
 999 {-# INLINABLE splitsWith #-}
1000
1001 -- | Split a text stream using the given 'Char' as the delimiter
1002 splits :: (Monad m)
1003       => Char -> Lens (Producer Text m x)
1004                       (Producer Text m y)
1005                       (FreeT (Producer Text m) m x)
1006                       (FreeT (Producer Text m) m y)
1007 splits c k p =
1008           fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
1009 {-# INLINABLE splits #-}
1010
1011 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
1012     given equivalence relation
1013 -}
1014 groupsBy
1015     :: Monad m
1016     => (Char -> Char -> Bool)
1017     -> Lens (Producer Text m x)
1018             (Producer Text m y)
1019             (FreeT (Producer Text m) m x)
1020             (FreeT (Producer Text m) m y)
1021 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
1022   go p = do x <- next p
1023             case x of Left   r       -> return (Pure r)
1024                       Right (bs, p') -> case T.uncons bs of
1025                              Nothing      -> go p'
1026                              Just (c, _) -> do return $ Free $ do
1027                                                  p'' <- (yield bs >> p')^.span (equals c)
1028                                                  return $ FreeT (go p'')
1029 {-# INLINABLE groupsBy #-}
1030
1031
1032 -- | Like 'groupsBy', where the equality predicate is ('==')
1033 groups
1034     :: Monad m
1035     => Lens (Producer Text m x)
1036             (Producer Text m y)
1037             (FreeT (Producer Text m) m x)
1038             (FreeT (Producer Text m) m y)
1039 groups = groupsBy (==)
1040 {-# INLINABLE groups #-}
1041
1042
1043
1044 {-| Split a text stream into 'FreeT'-delimited lines
1045 -}
1046 lines
1047     :: (Monad m)
1048     => Lens (Producer Text m x)
1049             (Producer Text m y)
1050             (FreeT (Producer Text m) m x)
1051             (FreeT (Producer Text m) m y)
1052 lines k p = fmap _unlines (k (_lines p))
1053 {-# INLINABLE lines #-}
1054
1055 _lines
1056     :: Monad m
1057      => Producer Text m x -> FreeT (Producer Text m) m x
1058 _lines p0 = FreeT (go0 p0)
1059     where
1060       go0 p = do
1061               x <- next p
1062               case x of
1063                   Left   r       -> return (Pure r)
1064                   Right (txt, p') ->
1065                       if (T.null txt)
1066                       then go0 p'
1067                       else return $ Free $ go1 (yield txt >> p')
1068       go1 p = do
1069               p' <- p ^. break ('\n' ==)
1070               return $ FreeT $ do
1071                   x  <- nextChar p'
1072                   case x of
1073                       Left   r      -> return $ Pure r
1074                       Right (_, p'') -> go0 p''
1075 {-# INLINABLE _lines #-}
1076
1077 _unlines
1078     :: Monad m
1079      => FreeT (Producer Text m) m x -> Producer Text m x
1080 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1081 {-# INLINABLE _unlines #-}
1082
1083
1084
1085
1086 -- | Split a text stream into 'FreeT'-delimited words
1087 words
1088     :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
1089 words = Data.Profunctor.dimap go (fmap _unwords)
1090   where
1091     go p = FreeT $ do
1092         x <- next (p >-> dropWhile isSpace)
1093         return $ case x of
1094             Left   r       -> Pure r
1095             Right (bs, p') -> Free $ do
1096                 p'' <-  (yield bs >> p') ^. break isSpace
1097                 return (go p'')
1098     _unwords = PG.intercalates (yield $ T.singleton ' ')
1099
1100 {-# INLINABLE words #-}
1101
1102
1103 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1104     interspersing a text stream in between them
1105 -}
1106 intercalate
1107     :: (Monad m)
1108     => Producer Text m ()
1109     -> FreeT (Producer Text m) m r
1110     -> Producer Text m r
1111 intercalate p0 = go0
1112   where
1113     go0 f = do
1114         x <- lift (runFreeT f)
1115         case x of
1116             Pure r -> return r
1117             Free p -> do
1118                 f' <- p
1119                 go1 f'
1120     go1 f = do
1121         x <- lift (runFreeT f)
1122         case x of
1123             Pure r -> return r
1124             Free p -> do
1125                 p0
1126                 f' <- p
1127                 go1 f'
1128 {-# INLINABLE intercalate #-}
1129
1130 {-| Join 'FreeT'-delimited lines into a text stream
1131 -}
1132 unlines
1133     :: (Monad m)
1134     => Lens (FreeT (Producer Text m) m x)
1135             (FreeT (Producer Text m) m y)
1136             (Producer Text m x)
1137             (Producer Text m y)
1138
1139 unlines k p = fmap _lines (k (_unlines p))
1140 {-# INLINABLE unlines #-}
1141
1142 {-| Join 'FreeT'-delimited words into a text stream
1143 -}
1144 unwords
1145     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1146 unwords = intercalate (yield $ T.singleton ' ')
1147 {-# INLINABLE unwords #-}
1148
1149
1150 {- $reexports
1151
1152     @Data.Text@ re-exports the 'Text' type.
1153
1154     @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1155 -}
1156
1157