Pipes/Text.hs

   1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
   2
   3
   4 module Pipes.Text  (
   5     -- * Effectful Text
   6     -- $intro
   7
   8     -- * Lenses
   9     -- $lenses
  10
  11     -- ** @view@ \/ @(^.)@
  12     -- $view
  13
  14     -- ** @over@ \/ @(%~)@
  15     -- $over
  16
  17     -- ** @zoom@
  18     -- $zoom
  19
  20     -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
  21     -- $special
  22
  23     -- * Producers
  24     fromLazy
  25
  26     -- * Pipes
  27     , map
  28     , concatMap
  29     , take
  30     , drop
  31     , takeWhile
  32     , dropWhile
  33     , filter
  34     , scan
  35     , pack
  36     , unpack
  37     , toCaseFold
  38     , toLower
  39     , toUpper
  40     , stripStart
  41
  42     -- * Folds
  43     , toLazy
  44     , toLazyM
  45     , foldChars
  46     , head
  47     , last
  48     , null
  49     , length
  50     , any
  51     , all
  52     , maximum
  53     , minimum
  54     , find
  55     , index
  56     , count
  57
  58     -- * Primitive Character Parsers
  59     , nextChar
  60     , drawChar
  61     , unDrawChar
  62     , peekChar
  63     , isEndOfChars
  64
  65     -- * Parsing Lenses
  66     , splitAt
  67     , span
  68     , break
  69     , groupBy
  70     , group
  71     , word
  72     , line
  73
  74     -- * FreeT Splitters
  75     , chunksOf
  76     , splitsWith
  77     , splits
  78     , groupsBy
  79     , groups
  80     , lines
  81     , words
  82
  83     -- * Transformations
  84     , intersperse
  85     , packChars
  86
  87     -- * Joiners
  88     , intercalate
  89     , unlines
  90     , unwords
  91
  92     -- * Re-exports
  93     -- $reexports
  94     , module Data.ByteString
  95     , module Data.Text
  96     , module Data.Profunctor
  97     , module Pipes.Parse
  98     , module Pipes.Group
  99     ) where
 100
 101 import Control.Applicative ((<*))
 102 import Control.Monad (liftM, join)
 103 import Control.Monad.Trans.State.Strict (StateT(..), modify)
 104 import qualified Data.Text as T
 105 import Data.Text (Text)
 106 import qualified Data.Text.Lazy as TL
 107 import Data.ByteString (ByteString)
 108 import Data.Functor.Constant (Constant(Constant, getConstant))
 109 import Data.Functor.Identity (Identity)
 110 import Data.Profunctor (Profunctor)
 111 import qualified Data.Profunctor
 112 import Pipes
 113 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
 114 import qualified Pipes.Group as PG
 115 import qualified Pipes.Parse as PP
 116 import Pipes.Parse (Parser)
 117 import Pipes.Text.Encoding (Lens'_, Iso'_)
 118 import qualified Pipes.Prelude as P
 119 import Data.Char (isSpace)
 120 import Data.Word (Word8)
 121 import Foreign.Storable (sizeOf)
 122 import Data.Bits (shiftL)
 123 import Prelude hiding (
 124     all,
 125     any,
 126     break,
 127     concat,
 128     concatMap,
 129     drop,
 130     dropWhile,
 131     elem,
 132     filter,
 133     head,
 134     last,
 135     lines,
 136     length,
 137     map,
 138     maximum,
 139     minimum,
 140     notElem,
 141     null,
 142     readFile,
 143     span,
 144     splitAt,
 145     take,
 146     takeWhile,
 147     unlines,
 148     unwords,
 149     words,
 150     writeFile )
 151
 152 {- $intro
 153     This package provides @pipes@ utilities for /text streams/ or /character streams/,
 154     realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
 155     and thus you will generally want @Data.Text@ in scope.  But the type
 156     @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
 157
 158     This particular module provides many functions equivalent in one way or another to
 159     the pure functions in
 160     <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
 161     They transform, divide, group and fold text streams. Though @Producer Text m r@
 162     is the type of \'effectful Text\', the functions in this module are \'pure\'
 163     in the sense that they are uniformly monad-independent.
 164     Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
 165     operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
 166     is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
 167
 168     The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
 169     strict Text: the implementation is arranged so that the individual strict 'Text'
 170     chunks are kept to a reasonable size; the user is not aware of the divisions
 171     between the connected 'Text' chunks.
 172     So also here: the functions in this module are designed to operate on streams that
 173     are insensitive to text boundaries. This means that they may freely split
 174     text into smaller texts and /discard empty texts/.  The objective, though, is
 175     that they should /never concatenate texts/ in order to provide strict upper
 176     bounds on memory usage.
 177
 178     For example, to stream only the first three lines of 'stdin' to 'stdout' you
 179     might write:
 180
 181 > import Pipes
 182 > import qualified Pipes.Text as Text
 183 > import qualified Pipes.Text.IO as Text
 184 > import Pipes.Group (takes')
 185 > import Lens.Family
 186 >
 187 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
 188 >   where
 189 >     takeLines n = Text.unlines . takes' n . view Text.lines
 190
 191     The above program will never bring more than one chunk of text (~ 32 KB) into
 192     memory, no matter how long the lines are.
 193
 194 -}
 195 {- $lenses
 196     As this example shows, one superficial difference from @Data.Text.Lazy@
 197     is that many of the operations, like 'lines', are \'lensified\'; this has a
 198     number of advantages (where it is possible); in particular it facilitates their
 199     use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
 200     sense.) The disadvantage, famously, is that the messages you get for type errors can be
 201     a little alarming. The remarks that follow in this section are for non-lens adepts.
 202
 203     Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
 204     intuitively corresponding function when used with @view@ or @(^.)@. Instead of
 205     writing:
 206
 207     > splitAt 17 producer
 208
 209     as we would with the Prelude or Text functions, we write
 210
 211     > view (splitAt 17) producer
 212
 213     or equivalently
 214
 215     > producer ^. splitAt 17
 216
 217     This may seem a little indirect, but note that many equivalents of
 218     @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
 219     corresponding functions by prefixing them with @(>->)@. Thus something like
 220
 221 >  stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
 222
 223     would drop the leading white space from each line.
 224
 225     The lenses in this library are marked as /improper/; this just means that
 226     they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
 227     Just for this reason, though, the magnificent complexities of the lens libraries
 228     are a distraction. The lens combinators to keep in mind, the ones that make sense for
 229     our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
 230
 231     One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
 232
 233 -}
 234 {- $view
 235     @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
 236     is the corresponding @b@; as was said above, this function will be exactly the
 237     function you think it is, given its name. Thus to uppercase the first n characters
 238     of a Producer, leaving the rest the same, we could write:
 239
 240
 241     > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
 242     >                p'
 243 -}
 244 {- $over
 245     @over l@ is a function @(b -> b) -> a -> a@.  Thus, given a function that modifies
 246     @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
 247     the @b@ that we can \"see\" through the lens. So  @over l f :: a -> a@
 248     (it can also be written @l %~ f@).
 249     For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
 250     So above we might have written things like these:
 251
 252     > stripLines = Text.lines %~ maps (>-> Text.stripStart)
 253     > stripLines = over Text.lines (maps (>-> Text.stripStart))
 254     > upper n    =  Text.splitAt n %~ (>-> Text.toUpper)
 255
 256 -}
 257 {- $zoom
 258     @zoom l@, finally, is a function from a @Parser b m r@
 259     to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
 260     Its use is easiest to see with an decoding lens like 'utf8', which
 261     \"sees\" a Text producer hidden inside a ByteString producer:
 262     @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
 263     a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
 264     a list of everything produced from a Producer, leaving only the return value; it would
 265     usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
 266     returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
 267     the Text Producer for further parsing. Suppose that we want, inexplicably, to
 268     modify the casing of a Text Producer according to any instruction it might
 269     contain at the start. Then we might write something like this:
 270
 271 >     obey :: Monad m => Producer Text m b -> Producer Text m b
 272 >     obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
 273 >                 let seven = T.concat ts
 274 >                 case T.toUpper seven of
 275 >                    "TOUPPER" -> p' >-> Text.toUpper
 276 >                    "TOLOWER" -> p' >-> Text.toLower
 277 >                    _         -> do yield seven
 278 >                                    p'
 279
 280
 281 > >>> let doc = each ["toU","pperTh","is document.\n"]
 282 > >>> runEffect $ obey doc >-> Text.stdout
 283 > THIS DOCUMENT.
 284
 285     The purpose of exporting lenses is the mental economy achieved with this three-way
 286     applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
 287     three uses is no more surprising than that a pipe can act as a function modifying
 288     the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
 289     -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
 290     @pipe >-> consumer@
 291
 292     The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
 293     both <http://hackage.haskell.org/package/lens lens> and
 294     <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
 295     in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
 296     and to some extent in the @Pipes.Text.Encoding@ module here.
 297
 298 -}
 299 {- $special
 300     These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
 301     This is in the types that are most closely associated with our central text type,
 302     @Producer Text m r@.  In @Data.Text@ and @Data.Text.Lazy@ we find functions like
 303
 304 >   splitAt  :: Int -> Text -> (Text, Text)
 305 >   lines    ::        Text -> [Text]
 306 >   chunksOf :: Int -> Text -> [Text]
 307
 308     which relate a Text with a pair of Texts or a list of Texts.
 309     The corresponding functions here (taking account of \'lensification\') are
 310
 311 >   view . splitAt  :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
 312 >   view lines      :: Monad m               =>      Producer Text m r -> FreeT (Producer Text m) m r
 313 >   view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
 314
 315     Some of the types may be more readable if you imagine that we have introduced
 316     our own type synonyms
 317
 318 >   type Text m r  = Producer T.Text m r
 319 >   type Texts m r = FreeT (Producer T.Text m) m r
 320
 321     Then we would think of the types above as
 322
 323 >   view . splitAt  :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
 324 >   view lines      :: (Monad m)             =>      Text m r -> Texts m r
 325 >   view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
 326
 327     which brings one closer to the types of the similar functions in @Data.Text.Lazy@
 328
 329     In the type @Producer Text m (Producer Text m r)@ the second
 330     element of the \'pair\' of effectful Texts cannot simply be retrieved
 331     with something like 'snd'. This is an \'effectful\' pair, and one must work
 332     through the effects of the first element to arrive at the second Text stream, even
 333     if you are proposing to throw the Text in the first element away.
 334     Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
 335
 336 >    join :: Monad m => Producer Text m (Producer m r) -> Producer m r
 337
 338     The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
 339     @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
 340     lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
 341     together things of the forms:
 342
 343 > r
 344 > (Text,r)
 345 > (Text, (Text, r))
 346 > (Text, (Text, (Text, r)))
 347 > (Text, (Text, (Text, (Text, r))))
 348 > ...
 349
 350     (We might also have identified the sum of those types with @Free ((,) Text) r@
 351     -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
 352
 353     Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
 354     @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
 355
 356 > m r
 357 > Text m r
 358 > Text m (Text m r)
 359 > Text m (Text m (Text m r))
 360 > Text m (Text m (Text m (Text m r)))
 361 > ...
 362
 363     We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
 364     or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
 365     result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
 366
 367     One might think that
 368
 369 >   lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
 370 >   view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
 371
 372     should really have the type
 373
 374 >   lines :: Monad m => Pipe Text Text m r
 375
 376     as e.g. 'toUpper' does. But this would spoil the control we are
 377     attempting to maintain over the size of chunks. It is in fact just
 378     as unreasonable to want such a pipe as to want
 379
 380 > Data.Text.Lazy.lines :: Text -> Text
 381
 382     to 'rechunk' the strict Text chunks inside the lazy Text to respect
 383     line boundaries. In fact we have
 384
 385 > Data.Text.Lazy.lines :: Text -> [Text]
 386 > Prelude.lines :: String -> [String]
 387
 388     where the elements of the list are themselves lazy Texts or Strings; the use
 389     of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
 390
 391     The @Pipes.Group@ module, which can generally be imported without qualification,
 392     provides many functions for working with things of type @FreeT (Producer a m) m r@.
 393     In particular it conveniently exports the constructors for @FreeT@ and the associated
 394     @FreeF@ type -- a fancy form of @Either@, namely
 395
 396 > data FreeF f a b = Pure a | Free (f b)
 397
 398     for pattern-matching. Consider the implementation of the 'words' function, or
 399     of the part of the lens that takes us to the words; it is compact but exhibits many
 400     of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
 401     constuctors.  Keep in mind that
 402
 403 >  newtype FreeT f m a  = FreeT (m (FreeF f a (FreeT f m a)))
 404 >  next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
 405
 406    Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
 407    the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
 408
 409 > words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
 410 > words p = FreeT $ do                   -- With 'next' we will inspect p's first chunk, excluding spaces;
 411 >   x <- next (p >-> dropWhile isSpace)  --   note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
 412 >   return $ case x of                   -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
 413 >     Left   r       -> Pure r           -- 'Left' means we got no Text chunk, but only the return value; so we are done.
 414 >     Right (txt, p') -> Free $ do       -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
 415 >         p'' <- view (break isSpace)    -- When we apply 'break isSpace', we get a Producer that returns a Producer;
 416 >                     (yield txt >> p')  --   so here we yield everything up to the next space, and get the rest back.
 417 >         return (words p'')             -- We then carry on with the rest, which is likely to begin with space.
 418
 419 -}
 420
 421 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
 422 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
 423 fromLazy  = TL.foldrChunks (\e a -> yield e >> a) (return ())
 424 {-# INLINE fromLazy #-}
 425
 426
 427 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
 428 a ^. lens = getConstant (lens Constant a)
 429
 430
 431 -- | Apply a transformation to each 'Char' in the stream
 432 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
 433 map f = P.map (T.map f)
 434 {-# INLINABLE map #-}
 435
 436 {-# RULES "p >-> map f" forall p f .
 437         p >-> map f = for p (\txt -> yield (T.map f txt))
 438   #-}
 439
 440 -- | Map a function over the characters of a text stream and concatenate the results
 441 concatMap
 442     :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
 443 concatMap f = P.map (T.concatMap f)
 444 {-# INLINABLE concatMap #-}
 445
 446 {-# RULES "p >-> concatMap f" forall p f .
 447         p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
 448   #-}
 449
 450
 451 -- | Transform a Pipe of 'String's into one of 'Text' chunks
 452 pack :: Monad m => Pipe String Text m r
 453 pack = P.map T.pack
 454 {-# INLINEABLE pack #-}
 455
 456 {-# RULES "p >-> pack" forall p .
 457         p >-> pack = for p (\txt -> yield (T.pack txt))
 458   #-}
 459
 460 -- | Transform a Pipes of 'Text' chunks into one of 'String's
 461 unpack :: Monad m => Pipe Text String m r
 462 unpack = for cat (\t -> yield (T.unpack t))
 463 {-# INLINEABLE unpack #-}
 464
 465 {-# RULES "p >-> unpack" forall p .
 466         p >-> unpack = for p (\txt -> yield (T.unpack txt))
 467   #-}
 468
 469 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
 470 -- here acting as 'Text' pipes, rather as they would  on a lazy text
 471 toCaseFold :: Monad m => Pipe Text Text m r
 472 toCaseFold = P.map T.toCaseFold
 473 {-# INLINEABLE toCaseFold #-}
 474
 475 {-# RULES "p >-> toCaseFold" forall p .
 476         p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
 477   #-}
 478
 479
 480 -- | lowercase incoming 'Text'
 481 toLower :: Monad m => Pipe Text Text m r
 482 toLower = P.map T.toLower
 483 {-# INLINEABLE toLower #-}
 484
 485 {-# RULES "p >-> toLower" forall p .
 486         p >-> toLower = for p (\txt -> yield (T.toLower txt))
 487   #-}
 488
 489 -- | uppercase incoming 'Text'
 490 toUpper :: Monad m => Pipe Text Text m r
 491 toUpper = P.map T.toUpper
 492 {-# INLINEABLE toUpper #-}
 493
 494 {-# RULES "p >-> toUpper" forall p .
 495         p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
 496   #-}
 497
 498 -- | Remove leading white space from an incoming succession of 'Text's
 499 stripStart :: Monad m => Pipe Text Text m r
 500 stripStart = do
 501     chunk <- await
 502     let text = T.stripStart chunk
 503     if T.null text
 504       then stripStart
 505       else do yield text
 506               cat
 507 {-# INLINEABLE stripStart #-}
 508
 509 -- | @(take n)@ only allows @n@ individual characters to pass;
 510 --  contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
 511 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
 512 take n0 = go n0 where
 513     go n
 514         | n <= 0    = return ()
 515         | otherwise = do
 516             txt <- await
 517             let len = fromIntegral (T.length txt)
 518             if (len > n)
 519                 then yield (T.take (fromIntegral n) txt)
 520                 else do
 521                     yield txt
 522                     go (n - len)
 523 {-# INLINABLE take #-}
 524
 525 -- | @(drop n)@ drops the first @n@ characters
 526 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
 527 drop n0 = go n0 where
 528     go n
 529         | n <= 0    = cat
 530         | otherwise = do
 531             txt <- await
 532             let len = fromIntegral (T.length txt)
 533             if (len >= n)
 534                 then do
 535                     yield (T.drop (fromIntegral n) txt)
 536                     cat
 537                 else go (n - len)
 538 {-# INLINABLE drop #-}
 539
 540 -- | Take characters until they fail the predicate
 541 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
 542 takeWhile predicate = go
 543   where
 544     go = do
 545         txt <- await
 546         let (prefix, suffix) = T.span predicate txt
 547         if (T.null suffix)
 548             then do
 549                 yield txt
 550                 go
 551             else yield prefix
 552 {-# INLINABLE takeWhile #-}
 553
 554 -- | Drop characters until they fail the predicate
 555 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 556 dropWhile predicate = go where
 557     go = do
 558         txt <- await
 559         case T.findIndex (not . predicate) txt of
 560             Nothing -> go
 561             Just i -> do
 562                 yield (T.drop i txt)
 563                 cat
 564 {-# INLINABLE dropWhile #-}
 565
 566 -- | Only allows 'Char's to pass if they satisfy the predicate
 567 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 568 filter predicate = P.map (T.filter predicate)
 569 {-# INLINABLE filter #-}
 570
 571 {-# RULES "p >-> filter q" forall p q .
 572         p >-> filter q = for p (\txt -> yield (T.filter q txt))
 573   #-}
 574
 575 -- | Strict left scan over the characters
 576 scan
 577     :: (Monad m)
 578     => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
 579 scan step begin = do
 580     yield (T.singleton begin)
 581     go begin
 582   where
 583     go c = do
 584         txt <- await
 585         let txt' = T.scanl step c txt
 586             c' = T.last txt'
 587         yield (T.tail txt')
 588         go c'
 589 {-# INLINABLE scan #-}
 590
 591 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
 592     'TL.Text'
 593 -}
 594 toLazy :: Producer Text Identity () -> TL.Text
 595 toLazy = TL.fromChunks . P.toList
 596 {-# INLINABLE toLazy #-}
 597
 598 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
 599     'TL.Text'
 600
 601     Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
 602     simple testing purposes.  Idiomatic @pipes@ style consumes the chunks
 603     immediately as they are generated instead of loading them all into memory.
 604 -}
 605 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
 606 toLazyM = liftM TL.fromChunks . P.toListM
 607 {-# INLINABLE toLazyM #-}
 608
 609 -- | Reduce the text stream using a strict left fold over characters
 610 foldChars
 611     :: Monad m
 612     => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
 613 foldChars step begin done = P.fold (T.foldl' step) begin done
 614 {-# INLINABLE foldChars #-}
 615
 616 -- | Retrieve the first 'Char'
 617 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
 618 head = go
 619   where
 620     go p = do
 621         x <- nextChar p
 622         case x of
 623             Left   _      -> return  Nothing
 624             Right (c, _) -> return (Just c)
 625 {-# INLINABLE head #-}
 626
 627 -- | Retrieve the last 'Char'
 628 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
 629 last = go Nothing
 630   where
 631     go r p = do
 632         x <- next p
 633         case x of
 634             Left   ()      -> return r
 635             Right (txt, p') ->
 636                 if (T.null txt)
 637                 then go r p'
 638                 else go (Just $ T.last txt) p'
 639 {-# INLINABLE last #-}
 640
 641 -- | Determine if the stream is empty
 642 null :: (Monad m) => Producer Text m () -> m Bool
 643 null = P.all T.null
 644 {-# INLINABLE null #-}
 645
 646 -- | Count the number of characters in the stream
 647 length :: (Monad m, Num n) => Producer Text m () -> m n
 648 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
 649 {-# INLINABLE length #-}
 650
 651 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
 652 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 653 any predicate = P.any (T.any predicate)
 654 {-# INLINABLE any #-}
 655
 656 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
 657 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 658 all predicate = P.all (T.all predicate)
 659 {-# INLINABLE all #-}
 660
 661 -- | Return the maximum 'Char' within a text stream
 662 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 663 maximum = P.fold step Nothing id
 664   where
 665     step mc txt =
 666         if (T.null txt)
 667         then mc
 668         else Just $ case mc of
 669             Nothing -> T.maximum txt
 670             Just c -> max c (T.maximum txt)
 671 {-# INLINABLE maximum #-}
 672
 673 -- | Return the minimum 'Char' within a text stream (surely very useful!)
 674 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 675 minimum = P.fold step Nothing id
 676   where
 677     step mc txt =
 678         if (T.null txt)
 679         then mc
 680         else case mc of
 681             Nothing -> Just (T.minimum txt)
 682             Just c -> Just (min c (T.minimum txt))
 683 {-# INLINABLE minimum #-}
 684
 685 -- | Find the first element in the stream that matches the predicate
 686 find
 687     :: (Monad m)
 688     => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
 689 find predicate p = head (p >-> filter predicate)
 690 {-# INLINABLE find #-}
 691
 692 -- | Index into a text stream
 693 index
 694     :: (Monad m, Integral a)
 695     => a-> Producer Text m () -> m (Maybe Char)
 696 index n p = head (p >-> drop n)
 697 {-# INLINABLE index #-}
 698
 699
 700 -- | Store a tally of how many segments match the given 'Text'
 701 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
 702 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
 703 {-# INLINABLE count #-}
 704
 705
 706 -- | Consume the first character from a stream of 'Text'
 707 --
 708 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
 709 -- succeeds with a 'Right' providing the next character and the remainder of the
 710 -- 'Producer'.
 711
 712 nextChar
 713     :: (Monad m)
 714     => Producer Text m r
 715     -> m (Either r (Char, Producer Text m r))
 716 nextChar = go
 717   where
 718     go p = do
 719         x <- next p
 720         case x of
 721             Left   r       -> return (Left r)
 722             Right (txt, p') -> case (T.uncons txt) of
 723                 Nothing        -> go p'
 724                 Just (c, txt') -> return (Right (c, yield txt' >> p'))
 725 {-# INLINABLE nextChar #-}
 726
 727 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
 728
 729 drawChar :: (Monad m) => Parser Text m (Maybe Char)
 730 drawChar = do
 731     x <- PP.draw
 732     case x of
 733         Nothing  -> return Nothing
 734         Just txt -> case (T.uncons txt) of
 735             Nothing        -> drawChar
 736             Just (c, txt') -> do
 737                 PP.unDraw txt'
 738                 return (Just c)
 739 {-# INLINABLE drawChar #-}
 740
 741 -- | Push back a 'Char' onto the underlying 'Producer'
 742 unDrawChar :: (Monad m) => Char -> Parser Text m ()
 743 unDrawChar c = modify (yield (T.singleton c) >>)
 744 {-# INLINABLE unDrawChar #-}
 745
 746 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
 747     push the 'Char' back
 748
 749 > peekChar = do
 750 >     x <- drawChar
 751 >     case x of
 752 >         Left  _  -> return ()
 753 >         Right c -> unDrawChar c
 754 >     return x
 755
 756 -}
 757
 758 peekChar :: (Monad m) => Parser Text m (Maybe Char)
 759 peekChar = do
 760     x <- drawChar
 761     case x of
 762         Nothing  -> return ()
 763         Just c -> unDrawChar c
 764     return x
 765 {-# INLINABLE peekChar #-}
 766
 767 {-| Check if the underlying 'Producer' has no more characters
 768
 769     Note that this will skip over empty 'Text' chunks, unlike
 770     'PP.isEndOfInput' from @pipes-parse@, which would consider
 771     an empty 'Text' a valid bit of input.
 772
 773 > isEndOfChars = liftM isLeft peekChar
 774 -}
 775 isEndOfChars :: (Monad m) => Parser Text m Bool
 776 isEndOfChars = do
 777     x <- peekChar
 778     return (case x of
 779         Nothing -> True
 780         Just _-> False )
 781 {-# INLINABLE isEndOfChars #-}
 782
 783
 784 -- | Splits a 'Producer' after the given number of characters
 785 splitAt
 786     :: (Monad m, Integral n)
 787     => n
 788     -> Lens'_ (Producer Text m r)
 789              (Producer Text m (Producer Text m r))
 790 splitAt n0 k p0 = fmap join (k (go n0 p0))
 791   where
 792     go 0 p = return p
 793     go n p = do
 794         x <- lift (next p)
 795         case x of
 796             Left   r       -> return (return r)
 797             Right (txt, p') -> do
 798                 let len = fromIntegral (T.length txt)
 799                 if (len <= n)
 800                     then do
 801                         yield txt
 802                         go (n - len) p'
 803                     else do
 804                         let (prefix, suffix) = T.splitAt (fromIntegral n) txt
 805                         yield prefix
 806                         return (yield suffix >> p')
 807 {-# INLINABLE splitAt #-}
 808
 809
 810 -- | Split a text stream in two, producing the longest
 811 --   consecutive group of characters that satisfies the predicate
 812 --   and returning the rest
 813
 814 span
 815     :: (Monad m)
 816     => (Char -> Bool)
 817     -> Lens'_ (Producer Text m r)
 818              (Producer Text m (Producer Text m r))
 819 span predicate k p0 = fmap join (k (go p0))
 820   where
 821     go p = do
 822         x <- lift (next p)
 823         case x of
 824             Left   r       -> return (return r)
 825             Right (txt, p') -> do
 826                 let (prefix, suffix) = T.span predicate txt
 827                 if (T.null suffix)
 828                     then do
 829                         yield txt
 830                         go p'
 831                     else do
 832                         yield prefix
 833                         return (yield suffix >> p')
 834 {-# INLINABLE span #-}
 835
 836 {-| Split a text stream in two, producing the longest
 837     consecutive group of characters that don't satisfy the predicate
 838 -}
 839 break
 840     :: (Monad m)
 841     => (Char -> Bool)
 842     -> Lens'_ (Producer Text m r)
 843              (Producer Text m (Producer Text m r))
 844 break predicate = span (not . predicate)
 845 {-# INLINABLE break #-}
 846
 847 {-| Improper lens that splits after the first group of equivalent Chars, as
 848     defined by the given equivalence relation
 849 -}
 850 groupBy
 851     :: (Monad m)
 852     => (Char -> Char -> Bool)
 853     -> Lens'_ (Producer Text m r)
 854              (Producer Text m (Producer Text m r))
 855 groupBy equals k p0 = fmap join (k ((go p0))) where
 856     go p = do
 857         x <- lift (next p)
 858         case x of
 859             Left   r       -> return (return r)
 860             Right (txt, p') -> case T.uncons txt of
 861                 Nothing      -> go p'
 862                 Just (c, _) -> (yield txt >> p') ^. span (equals c)
 863 {-# INLINABLE groupBy #-}
 864
 865 -- | Improper lens that splits after the first succession of identical 'Char' s
 866 group :: Monad m
 867       => Lens'_ (Producer Text m r)
 868                (Producer Text m (Producer Text m r))
 869 group = groupBy (==)
 870 {-# INLINABLE group #-}
 871
 872 {-| Improper lens that splits a 'Producer' after the first word
 873
 874     Unlike 'words', this does not drop leading whitespace
 875 -}
 876 word :: (Monad m)
 877      => Lens'_ (Producer Text m r)
 878               (Producer Text m (Producer Text m r))
 879 word k p0 = fmap join (k (to p0))
 880   where
 881     to p = do
 882         p' <- p^.span isSpace
 883         p'^.break isSpace
 884 {-# INLINABLE word #-}
 885
 886
 887 line :: (Monad m)
 888      => Lens'_ (Producer Text m r)
 889               (Producer Text m (Producer Text m r))
 890 line = break (== '\n')
 891
 892 {-# INLINABLE line #-}
 893
 894
 895 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
 896 intersperse
 897     :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
 898 intersperse c = go0
 899   where
 900     go0 p = do
 901         x <- lift (next p)
 902         case x of
 903             Left   r       -> return r
 904             Right (txt, p') -> do
 905                 yield (T.intersperse c txt)
 906                 go1 p'
 907     go1 p = do
 908         x <- lift (next p)
 909         case x of
 910             Left   r       -> return r
 911             Right (txt, p') -> do
 912                 yield (T.singleton c)
 913                 yield (T.intersperse c txt)
 914                 go1 p'
 915 {-# INLINABLE intersperse #-}
 916
 917
 918
 919 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
 920 packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
 921 packChars = Data.Profunctor.dimap to (fmap from)
 922   where
 923     -- to :: Monad m => Producer Char m x -> Producer Text m x
 924     to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
 925
 926     step diffAs c = diffAs . (c:)
 927
 928     done diffAs = T.pack (diffAs [])
 929
 930     -- from :: Monad m => Producer Text m x -> Producer Char m x
 931     from p = for p (each . T.unpack)
 932
 933 {-# INLINABLE packChars #-}
 934
 935 defaultChunkSize :: Int
 936 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
 937
 938 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
 939 chunksOf
 940     :: (Monad m, Integral n)
 941     => n -> Lens'_ (Producer Text m r)
 942                   (FreeT (Producer Text m) m r)
 943 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
 944   where
 945     go p = do
 946         x <- next p
 947         return $ case x of
 948             Left   r       -> Pure r
 949             Right (txt, p') -> Free $ do
 950                 p'' <- (yield txt >> p') ^. splitAt n
 951                 return $ FreeT (go p'')
 952 {-# INLINABLE chunksOf #-}
 953
 954
 955 {-| Split a text stream into sub-streams delimited by characters that satisfy the
 956     predicate
 957 -}
 958 splitsWith
 959     :: (Monad m)
 960     => (Char -> Bool)
 961     -> Producer Text m r
 962     -> FreeT (Producer Text m) m r
 963 splitsWith predicate p0 = FreeT (go0 p0)
 964   where
 965     go0 p = do
 966         x <- next p
 967         case x of
 968             Left   r       -> return (Pure r)
 969             Right (txt, p') ->
 970                 if (T.null txt)
 971                 then go0 p'
 972                 else return $ Free $ do
 973                     p'' <-  (yield txt >> p') ^. span (not . predicate)
 974                     return $ FreeT (go1 p'')
 975     go1 p = do
 976         x <- nextChar p
 977         return $ case x of
 978             Left   r      -> Pure r
 979             Right (_, p') -> Free $ do
 980                     p'' <- p' ^. span (not . predicate)
 981                     return $ FreeT (go1 p'')
 982 {-# INLINABLE splitsWith #-}
 983
 984 -- | Split a text stream using the given 'Char' as the delimiter
 985 splits :: (Monad m)
 986       => Char
 987       -> Lens'_ (Producer Text m r)
 988                (FreeT (Producer Text m) m r)
 989 splits c k p =
 990           fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
 991 {-# INLINABLE splits #-}
 992
 993 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
 994     given equivalence relation
 995 -}
 996 groupsBy
 997     :: Monad m
 998     => (Char -> Char -> Bool)
 999     -> Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
1000 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
1001   go p = do x <- next p
1002             case x of Left   r       -> return (Pure r)
1003                       Right (bs, p') -> case T.uncons bs of
1004                              Nothing      -> go p'
1005                              Just (c, _) -> do return $ Free $ do
1006                                                  p'' <- (yield bs >> p')^.span (equals c)
1007                                                  return $ FreeT (go p'')
1008 {-# INLINABLE groupsBy #-}
1009
1010
1011 -- | Like 'groupsBy', where the equality predicate is ('==')
1012 groups
1013     :: Monad m
1014     => Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
1015 groups = groupsBy (==)
1016 {-# INLINABLE groups #-}
1017
1018
1019
1020 {-| Split a text stream into 'FreeT'-delimited lines
1021 -}
1022 lines
1023     :: (Monad m) => Iso'_ (Producer Text m r)  (FreeT (Producer Text m) m r)
1024 lines = Data.Profunctor.dimap _lines (fmap _unlines)
1025   where
1026   _lines p0 = FreeT (go0 p0)
1027     where
1028       go0 p = do
1029               x <- next p
1030               case x of
1031                   Left   r       -> return (Pure r)
1032                   Right (txt, p') ->
1033                       if (T.null txt)
1034                       then go0 p'
1035                       else return $ Free $ go1 (yield txt >> p')
1036       go1 p = do
1037               p' <- p ^. break ('\n' ==)
1038               return $ FreeT $ do
1039                   x  <- nextChar p'
1040                   case x of
1041                       Left   r      -> return $ Pure r
1042                       Right (_, p'') -> go0 p''
1043   -- _unlines
1044   --     :: Monad m
1045   --      => FreeT (Producer Text m) m x -> Producer Text m x
1046   _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1047
1048
1049 {-# INLINABLE lines #-}
1050
1051
1052 -- | Split a text stream into 'FreeT'-delimited words
1053 words
1054     :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
1055 words = Data.Profunctor.dimap go (fmap _unwords)
1056   where
1057     go p = FreeT $ do
1058         x <- next (p >-> dropWhile isSpace)
1059         return $ case x of
1060             Left   r       -> Pure r
1061             Right (bs, p') -> Free $ do
1062                 p'' <-  (yield bs >> p') ^. break isSpace
1063                 return (go p'')
1064     _unwords = PG.intercalates (yield $ T.singleton ' ')
1065
1066 {-# INLINABLE words #-}
1067
1068
1069 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1070     interspersing a text stream in between them
1071 -}
1072 intercalate
1073     :: (Monad m)
1074     => Producer Text m ()
1075     -> FreeT (Producer Text m) m r
1076     -> Producer Text m r
1077 intercalate p0 = go0
1078   where
1079     go0 f = do
1080         x <- lift (runFreeT f)
1081         case x of
1082             Pure r -> return r
1083             Free p -> do
1084                 f' <- p
1085                 go1 f'
1086     go1 f = do
1087         x <- lift (runFreeT f)
1088         case x of
1089             Pure r -> return r
1090             Free p -> do
1091                 p0
1092                 f' <- p
1093                 go1 f'
1094 {-# INLINABLE intercalate #-}
1095
1096 {-| Join 'FreeT'-delimited lines into a text stream
1097 -}
1098 unlines
1099     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1100 unlines = go
1101   where
1102     go f = do
1103         x <- lift (runFreeT f)
1104         case x of
1105             Pure r -> return r
1106             Free p -> do
1107                 f' <- p
1108                 yield $ T.singleton '\n'
1109                 go f'
1110 {-# INLINABLE unlines #-}
1111
1112 {-| Join 'FreeT'-delimited words into a text stream
1113 -}
1114 unwords
1115     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1116 unwords = intercalate (yield $ T.singleton ' ')
1117 {-# INLINABLE unwords #-}
1118
1119
1120 {- $reexports
1121
1122     @Data.Text@ re-exports the 'Text' type.
1123
1124     @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1125 -}
1126
1127