Pipes/Text.hs

   1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
   2
   3
   4 module Pipes.Text  (
   5     -- * Effectful Text
   6     -- $intro
   7
   8     -- * Lenses
   9     -- $lenses
  10
  11     -- ** @view@ \/ @(^.)@
  12     -- $view
  13
  14     -- ** @over@ \/ @(%~)@
  15     -- $over
  16
  17     -- ** @zoom@
  18     -- $zoom
  19
  20     -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
  21     -- $special
  22
  23     -- * Producers
  24     fromLazy
  25
  26     -- * Pipes
  27     , map
  28     , concatMap
  29     , take
  30     , drop
  31     , takeWhile
  32     , dropWhile
  33     , filter
  34     , scan
  35     , pack
  36     , unpack
  37     , toCaseFold
  38     , toLower
  39     , toUpper
  40     , stripStart
  41
  42     -- * Folds
  43     , toLazy
  44     , toLazyM
  45     , foldChars
  46     , head
  47     , last
  48     , null
  49     , length
  50     , any
  51     , all
  52     , maximum
  53     , minimum
  54     , find
  55     , index
  56     , count
  57
  58     -- * Primitive Character Parsers
  59     , nextChar
  60     , drawChar
  61     , unDrawChar
  62     , peekChar
  63     , isEndOfChars
  64
  65     -- * Parsing Lenses
  66     , splitAt
  67     , span
  68     , break
  69     , groupBy
  70     , group
  71     , word
  72     , line
  73
  74     -- * FreeT Splitters
  75     , chunksOf
  76     , splitsWith
  77     , splits
  78     , groupsBy
  79     , groups
  80     , lines
  81     , words
  82
  83     -- * Transformations
  84     , intersperse
  85     , packChars
  86
  87     -- * Joiners
  88     , intercalate
  89     , unlines
  90     , unwords
  91
  92     -- * Re-exports
  93     -- $reexports
  94     , module Data.ByteString
  95     , module Data.Text
  96     , module Data.Profunctor
  97     , module Pipes.Parse
  98     , module Pipes.Group
  99     ) where
 100
 101 import Control.Applicative ((<*))
 102 import Control.Monad (liftM, join)
 103 import Control.Monad.Trans.State.Strict (StateT(..), modify)
 104 import qualified Data.Text as T
 105 import Data.Text (Text)
 106 import qualified Data.Text.Lazy as TL
 107 import Data.ByteString (ByteString)
 108 import Data.Functor.Constant (Constant(Constant, getConstant))
 109 import Data.Functor.Identity (Identity)
 110 import Data.Profunctor (Profunctor)
 111 import qualified Data.Profunctor
 112 import Pipes
 113 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
 114 import qualified Pipes.Group as PG
 115 import qualified Pipes.Parse as PP
 116 import Pipes.Parse (Parser)
 117 import qualified Pipes.Prelude as P
 118 import Data.Char (isSpace)
 119 import Data.Word (Word8)
 120 import Foreign.Storable (sizeOf)
 121 import Data.Bits (shiftL)
 122 import Prelude hiding (
 123     all,
 124     any,
 125     break,
 126     concat,
 127     concatMap,
 128     drop,
 129     dropWhile,
 130     elem,
 131     filter,
 132     head,
 133     last,
 134     lines,
 135     length,
 136     map,
 137     maximum,
 138     minimum,
 139     notElem,
 140     null,
 141     readFile,
 142     span,
 143     splitAt,
 144     take,
 145     takeWhile,
 146     unlines,
 147     unwords,
 148     words,
 149     writeFile )
 150
 151 {- $intro
 152     This package provides @pipes@ utilities for /text streams/ or /character streams/,
 153     realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
 154     and thus you will generally want @Data.Text@ in scope.  But the type
 155     @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
 156
 157     This particular module provides many functions equivalent in one way or another to
 158     the pure functions in
 159     <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
 160     They transform, divide, group and fold text streams. Though @Producer Text m r@
 161     is the type of \'effectful Text\', the functions in this module are \'pure\'
 162     in the sense that they are uniformly monad-independent.
 163     Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
 164     operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
 165     is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
 166
 167     The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
 168     strict Text: the implementation is arranged so that the individual strict 'Text'
 169     chunks are kept to a reasonable size; the user is not aware of the divisions
 170     between the connected 'Text' chunks.
 171     So also here: the functions in this module are designed to operate on streams that
 172     are insensitive to text boundaries. This means that they may freely split
 173     text into smaller texts and /discard empty texts/.  The objective, though, is
 174     that they should /never concatenate texts/ in order to provide strict upper
 175     bounds on memory usage.
 176
 177     For example, to stream only the first three lines of 'stdin' to 'stdout' you
 178     might write:
 179
 180 > import Pipes
 181 > import qualified Pipes.Text as Text
 182 > import qualified Pipes.Text.IO as Text
 183 > import Pipes.Group (takes')
 184 > import Lens.Family
 185 >
 186 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
 187 >   where
 188 >     takeLines n = Text.unlines . takes' n . view Text.lines
 189
 190     The above program will never bring more than one chunk of text (~ 32 KB) into
 191     memory, no matter how long the lines are.
 192
 193 -}
 194 {- $lenses
 195     As this example shows, one superficial difference from @Data.Text.Lazy@
 196     is that many of the operations, like 'lines', are \'lensified\'; this has a
 197     number of advantages (where it is possible); in particular it facilitates their
 198     use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
 199     sense.) The disadvantage, famously, is that the messages you get for type errors can be
 200     a little alarming. The remarks that follow in this section are for non-lens adepts.
 201
 202     Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
 203     intuitively corresponding function when used with @view@ or @(^.)@. Instead of
 204     writing:
 205
 206     > splitAt 17 producer
 207
 208     as we would with the Prelude or Text functions, we write
 209
 210     > view (splitAt 17) producer
 211
 212     or equivalently
 213
 214     > producer ^. splitAt 17
 215
 216     This may seem a little indirect, but note that many equivalents of
 217     @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
 218     corresponding functions by prefixing them with @(>->)@. Thus something like
 219
 220 >  stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
 221
 222     would drop the leading white space from each line.
 223
 224     The lenses in this library are marked as /improper/; this just means that
 225     they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
 226     Just for this reason, though, the magnificent complexities of the lens libraries
 227     are a distraction. The lens combinators to keep in mind, the ones that make sense for
 228     our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
 229
 230     One need only keep in mind that if @l@ is a @Lens' a b@, then:
 231
 232 -}
 233 {- $view
 234     @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
 235     is the corresponding @b@; as was said above, this function will be exactly the
 236     function you think it is, given its name. Thus to uppercase the first n characters
 237     of a Producer, leaving the rest the same, we could write:
 238
 239
 240     > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
 241     >                p'
 242 -}
 243 {- $over
 244     @over l@ is a function @(b -> b) -> a -> a@.  Thus, given a function that modifies
 245     @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
 246     the @b@ that we can \"see\" through the lens. So  @over l f :: a -> a@
 247     (it can also be written @l %~ f@).
 248     For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
 249     So above we might have written things like these:
 250
 251     > stripLines = Text.lines %~ maps (>-> Text.stripStart)
 252     > stripLines = over Text.lines (maps (>-> Text.stripStart))
 253     > upper n    =  Text.splitAt n %~ (>-> Text.toUpper)
 254
 255 -}
 256 {- $zoom
 257     @zoom l@, finally, is a function from a @Parser b m r@
 258     to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
 259     Its use is easiest to see with an decoding lens like 'utf8', which
 260     \"sees\" a Text producer hidden inside a ByteString producer:
 261     @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
 262     a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
 263     a list of everything produced from a Producer, leaving only the return value; it would
 264     usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
 265     returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
 266     the Text Producer for further parsing. Suppose that we want, inexplicably, to
 267     modify the casing of a Text Producer according to any instruction it might
 268     contain at the start. Then we might write something like this:
 269
 270 >     obey :: Monad m => Producer Text m b -> Producer Text m b
 271 >     obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
 272 >                 let seven = T.concat ts
 273 >                 case T.toUpper seven of
 274 >                    "TOUPPER" -> p' >-> Text.toUpper
 275 >                    "TOLOWER" -> p' >-> Text.toLower
 276 >                    _         -> do yield seven
 277 >                                    p'
 278
 279
 280 > >>> let doc = each ["toU","pperTh","is document.\n"]
 281 > >>> runEffect $ obey doc >-> Text.stdout
 282 > THIS DOCUMENT.
 283
 284     The purpose of exporting lenses is the mental economy achieved with this three-way
 285     applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
 286     three uses is no more surprising than that a pipe can act as a function modifying
 287     the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
 288     -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
 289     @pipe >-> consumer@
 290
 291     The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
 292     both <http://hackage.haskell.org/package/lens lens> and
 293     <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
 294     in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
 295     and to some extent in the @Pipes.Text.Encoding@ module here.
 296
 297 -}
 298 {- $special
 299     These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
 300     This is in the types that are most closely associated with our central text type,
 301     @Producer Text m r@.  In @Data.Text@ and @Data.Text.Lazy@ we find functions like
 302
 303 >   splitAt  :: Int -> Text -> (Text, Text)
 304 >   lines    ::        Text -> [Text]
 305 >   chunksOf :: Int -> Text -> [Text]
 306
 307     which relate a Text with a pair of Texts or a list of Texts.
 308     The corresponding functions here (taking account of \'lensification\') are
 309
 310 >   view . splitAt  :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
 311 >   view lines      :: Monad m               =>      Producer Text m r -> FreeT (Producer Text m) m r
 312 >   view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
 313
 314     Some of the types may be more readable if you imagine that we have introduced
 315     our own type synonyms
 316
 317 >   type Text m r  = Producer T.Text m r
 318 >   type Texts m r = FreeT (Producer T.Text m) m r
 319
 320     Then we would think of the types above as
 321
 322 >   view . splitAt  :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
 323 >   view lines      :: (Monad m)             =>      Text m r -> Texts m r
 324 >   view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
 325
 326     which brings one closer to the types of the similar functions in @Data.Text.Lazy@
 327
 328     In the type @Producer Text m (Producer Text m r)@ the second
 329     element of the \'pair\' of effectful Texts cannot simply be retrieved
 330     with something like 'snd'. This is an \'effectful\' pair, and one must work
 331     through the effects of the first element to arrive at the second Text stream, even
 332     if you are proposing to throw the Text in the first element away.
 333     Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
 334
 335 >    join :: Monad m => Producer Text m (Producer m r) -> Producer m r
 336
 337     The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
 338     @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
 339     lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
 340     together things of the forms:
 341
 342 > r
 343 > (Text,r)
 344 > (Text, (Text, r))
 345 > (Text, (Text, (Text, r)))
 346 > (Text, (Text, (Text, (Text, r))))
 347 > ...
 348
 349     (We might also have identified the sum of those types with @Free ((,) Text) r@
 350     -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
 351
 352     Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
 353     @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
 354
 355 > m r
 356 > Text m r
 357 > Text m (Text m r)
 358 > Text m (Text m (Text m r))
 359 > Text m (Text m (Text m (Text m r)))
 360 > ...
 361
 362     We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
 363     or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
 364     result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
 365
 366     One might think that
 367
 368 >   lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
 369 >   view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
 370
 371     should really have the type
 372
 373 >   lines :: Monad m => Pipe Text Text m r
 374
 375     as e.g. 'toUpper' does. But this would spoil the control we are
 376     attempting to maintain over the size of chunks. It is in fact just
 377     as unreasonable to want such a pipe as to want
 378
 379 > Data.Text.Lazy.lines :: Text -> Text
 380
 381     to 'rechunk' the strict Text chunks inside the lazy Text to respect
 382     line boundaries. In fact we have
 383
 384 > Data.Text.Lazy.lines :: Text -> [Text]
 385 > Prelude.lines :: String -> [String]
 386
 387     where the elements of the list are themselves lazy Texts or Strings; the use
 388     of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
 389
 390     The @Pipes.Group@ module, which can generally be imported without qualification,
 391     provides many functions for working with things of type @FreeT (Producer a m) m r@.
 392     In particular it conveniently exports the constructors for @FreeT@ and the associated
 393     @FreeF@ type -- a fancy form of @Either@, namely
 394
 395 > data FreeF f a b = Pure a | Free (f b)
 396
 397     for pattern-matching. Consider the implementation of the 'words' function, or
 398     of the part of the lens that takes us to the words; it is compact but exhibits many
 399     of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
 400     constuctors.  Keep in mind that
 401
 402 >  newtype FreeT f m a  = FreeT (m (FreeF f a (FreeT f m a)))
 403 >  next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
 404
 405    Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
 406    the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
 407
 408 > words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
 409 > words p = FreeT $ do                   -- With 'next' we will inspect p's first chunk, excluding spaces;
 410 >   x <- next (p >-> dropWhile isSpace)  --   note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
 411 >   return $ case x of                   -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
 412 >     Left   r       -> Pure r           -- 'Left' means we got no Text chunk, but only the return value; so we are done.
 413 >     Right (txt, p') -> Free $ do       -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
 414 >         p'' <- view (break isSpace)    -- When we apply 'break isSpace', we get a Producer that returns a Producer;
 415 >                     (yield txt >> p')  --   so here we yield everything up to the next space, and get the rest back.
 416 >         return (words p'')             -- We then carry on with the rest, which is likely to begin with space.
 417
 418 -}
 419
 420 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
 421 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
 422 fromLazy  = TL.foldrChunks (\e a -> yield e >> a) (return ())
 423 {-# INLINE fromLazy #-}
 424
 425 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
 426 a ^. lens = getConstant (lens Constant a)
 427
 428 -- | Apply a transformation to each 'Char' in the stream
 429 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
 430 map f = P.map (T.map f)
 431 {-# INLINABLE map #-}
 432
 433 -- | Map a function over the characters of a text stream and concatenate the results
 434 concatMap
 435     :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
 436 concatMap f = P.map (T.concatMap f)
 437 {-# INLINABLE concatMap #-}
 438
 439 -- | Transform a Pipe of 'String's into one of 'Text' chunks
 440 pack :: Monad m => Pipe String Text m r
 441 pack = P.map T.pack
 442 {-# INLINEABLE pack #-}
 443
 444 -- | Transform a Pipes of 'Text' chunks into one of 'String's
 445 unpack :: Monad m => Pipe Text String m r
 446 unpack = for cat (\t -> yield (T.unpack t))
 447 {-# INLINEABLE unpack #-}
 448
 449 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
 450 -- here acting as 'Text' pipes, rather as they would  on a lazy text
 451 toCaseFold :: Monad m => Pipe Text Text m r
 452 toCaseFold = P.map T.toCaseFold
 453 {-# INLINEABLE toCaseFold #-}
 454
 455 -- | lowercase incoming 'Text'
 456 toLower :: Monad m => Pipe Text Text m r
 457 toLower = P.map T.toLower
 458 {-# INLINEABLE toLower #-}
 459
 460 -- | uppercase incoming 'Text'
 461 toUpper :: Monad m => Pipe Text Text m r
 462 toUpper = P.map T.toUpper
 463 {-# INLINEABLE toUpper #-}
 464
 465 -- | Remove leading white space from an incoming succession of 'Text's
 466 stripStart :: Monad m => Pipe Text Text m r
 467 stripStart = do
 468     chunk <- await
 469     let text = T.stripStart chunk
 470     if T.null text
 471       then stripStart
 472       else do yield text
 473               cat
 474 {-# INLINEABLE stripStart #-}
 475
 476 -- | @(take n)@ only allows @n@ individual characters to pass;
 477 --  contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
 478 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
 479 take n0 = go n0 where
 480     go n
 481         | n <= 0    = return ()
 482         | otherwise = do
 483             txt <- await
 484             let len = fromIntegral (T.length txt)
 485             if (len > n)
 486                 then yield (T.take (fromIntegral n) txt)
 487                 else do
 488                     yield txt
 489                     go (n - len)
 490 {-# INLINABLE take #-}
 491
 492 -- | @(drop n)@ drops the first @n@ characters
 493 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
 494 drop n0 = go n0 where
 495     go n
 496         | n <= 0    = cat
 497         | otherwise = do
 498             txt <- await
 499             let len = fromIntegral (T.length txt)
 500             if (len >= n)
 501                 then do
 502                     yield (T.drop (fromIntegral n) txt)
 503                     cat
 504                 else go (n - len)
 505 {-# INLINABLE drop #-}
 506
 507 -- | Take characters until they fail the predicate
 508 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
 509 takeWhile predicate = go
 510   where
 511     go = do
 512         txt <- await
 513         let (prefix, suffix) = T.span predicate txt
 514         if (T.null suffix)
 515             then do
 516                 yield txt
 517                 go
 518             else yield prefix
 519 {-# INLINABLE takeWhile #-}
 520
 521 -- | Drop characters until they fail the predicate
 522 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 523 dropWhile predicate = go where
 524     go = do
 525         txt <- await
 526         case T.findIndex (not . predicate) txt of
 527             Nothing -> go
 528             Just i -> do
 529                 yield (T.drop i txt)
 530                 cat
 531 {-# INLINABLE dropWhile #-}
 532
 533 -- | Only allows 'Char's to pass if they satisfy the predicate
 534 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 535 filter predicate = P.map (T.filter predicate)
 536 {-# INLINABLE filter #-}
 537
 538 -- | Strict left scan over the characters
 539 scan
 540     :: (Monad m)
 541     => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
 542 scan step begin = do
 543     yield (T.singleton begin)
 544     go begin
 545   where
 546     go c = do
 547         txt <- await
 548         let txt' = T.scanl step c txt
 549             c' = T.last txt'
 550         yield (T.tail txt')
 551         go c'
 552 {-# INLINABLE scan #-}
 553
 554 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
 555     'TL.Text'
 556 -}
 557 toLazy :: Producer Text Identity () -> TL.Text
 558 toLazy = TL.fromChunks . P.toList
 559 {-# INLINABLE toLazy #-}
 560
 561 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
 562     'TL.Text'
 563
 564     Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
 565     simple testing purposes.  Idiomatic @pipes@ style consumes the chunks
 566     immediately as they are generated instead of loading them all into memory.
 567 -}
 568 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
 569 toLazyM = liftM TL.fromChunks . P.toListM
 570 {-# INLINABLE toLazyM #-}
 571
 572 -- | Reduce the text stream using a strict left fold over characters
 573 foldChars
 574     :: Monad m
 575     => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
 576 foldChars step begin done = P.fold (T.foldl' step) begin done
 577 {-# INLINABLE foldChars #-}
 578
 579 -- | Retrieve the first 'Char'
 580 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
 581 head = go
 582   where
 583     go p = do
 584         x <- nextChar p
 585         case x of
 586             Left   _      -> return  Nothing
 587             Right (c, _) -> return (Just c)
 588 {-# INLINABLE head #-}
 589
 590 -- | Retrieve the last 'Char'
 591 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
 592 last = go Nothing
 593   where
 594     go r p = do
 595         x <- next p
 596         case x of
 597             Left   ()      -> return r
 598             Right (txt, p') ->
 599                 if (T.null txt)
 600                 then go r p'
 601                 else go (Just $ T.last txt) p'
 602 {-# INLINABLE last #-}
 603
 604 -- | Determine if the stream is empty
 605 null :: (Monad m) => Producer Text m () -> m Bool
 606 null = P.all T.null
 607 {-# INLINABLE null #-}
 608
 609 -- | Count the number of characters in the stream
 610 length :: (Monad m, Num n) => Producer Text m () -> m n
 611 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
 612 {-# INLINABLE length #-}
 613
 614 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
 615 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 616 any predicate = P.any (T.any predicate)
 617 {-# INLINABLE any #-}
 618
 619 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
 620 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 621 all predicate = P.all (T.all predicate)
 622 {-# INLINABLE all #-}
 623
 624 -- | Return the maximum 'Char' within a text stream
 625 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 626 maximum = P.fold step Nothing id
 627   where
 628     step mc txt =
 629         if (T.null txt)
 630         then mc
 631         else Just $ case mc of
 632             Nothing -> T.maximum txt
 633             Just c -> max c (T.maximum txt)
 634 {-# INLINABLE maximum #-}
 635
 636 -- | Return the minimum 'Char' within a text stream (surely very useful!)
 637 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 638 minimum = P.fold step Nothing id
 639   where
 640     step mc txt =
 641         if (T.null txt)
 642         then mc
 643         else case mc of
 644             Nothing -> Just (T.minimum txt)
 645             Just c -> Just (min c (T.minimum txt))
 646 {-# INLINABLE minimum #-}
 647
 648 -- | Find the first element in the stream that matches the predicate
 649 find
 650     :: (Monad m)
 651     => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
 652 find predicate p = head (p >-> filter predicate)
 653 {-# INLINABLE find #-}
 654
 655 -- | Index into a text stream
 656 index
 657     :: (Monad m, Integral a)
 658     => a-> Producer Text m () -> m (Maybe Char)
 659 index n p = head (p >-> drop n)
 660 {-# INLINABLE index #-}
 661
 662
 663 -- | Store a tally of how many segments match the given 'Text'
 664 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
 665 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
 666 {-# INLINABLE count #-}
 667
 668
 669 -- | Consume the first character from a stream of 'Text'
 670 --
 671 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
 672 -- succeeds with a 'Right' providing the next character and the remainder of the
 673 -- 'Producer'.
 674
 675 nextChar
 676     :: (Monad m)
 677     => Producer Text m r
 678     -> m (Either r (Char, Producer Text m r))
 679 nextChar = go
 680   where
 681     go p = do
 682         x <- next p
 683         case x of
 684             Left   r       -> return (Left r)
 685             Right (txt, p') -> case (T.uncons txt) of
 686                 Nothing        -> go p'
 687                 Just (c, txt') -> return (Right (c, yield txt' >> p'))
 688 {-# INLINABLE nextChar #-}
 689
 690 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
 691
 692 drawChar :: (Monad m) => Parser Text m (Maybe Char)
 693 drawChar = do
 694     x <- PP.draw
 695     case x of
 696         Nothing  -> return Nothing
 697         Just txt -> case (T.uncons txt) of
 698             Nothing        -> drawChar
 699             Just (c, txt') -> do
 700                 PP.unDraw txt'
 701                 return (Just c)
 702 {-# INLINABLE drawChar #-}
 703
 704 -- | Push back a 'Char' onto the underlying 'Producer'
 705 unDrawChar :: (Monad m) => Char -> Parser Text m ()
 706 unDrawChar c = modify (yield (T.singleton c) >>)
 707 {-# INLINABLE unDrawChar #-}
 708
 709 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
 710     push the 'Char' back
 711
 712 > peekChar = do
 713 >     x <- drawChar
 714 >     case x of
 715 >         Left  _  -> return ()
 716 >         Right c -> unDrawChar c
 717 >     return x
 718
 719 -}
 720
 721 peekChar :: (Monad m) => Parser Text m (Maybe Char)
 722 peekChar = do
 723     x <- drawChar
 724     case x of
 725         Nothing  -> return ()
 726         Just c -> unDrawChar c
 727     return x
 728 {-# INLINABLE peekChar #-}
 729
 730 {-| Check if the underlying 'Producer' has no more characters
 731
 732     Note that this will skip over empty 'Text' chunks, unlike
 733     'PP.isEndOfInput' from @pipes-parse@, which would consider
 734     an empty 'Text' a valid bit of input.
 735
 736 > isEndOfChars = liftM isLeft peekChar
 737 -}
 738 isEndOfChars :: (Monad m) => Parser Text m Bool
 739 isEndOfChars = do
 740     x <- peekChar
 741     return (case x of
 742         Nothing -> True
 743         Just _-> False )
 744 {-# INLINABLE isEndOfChars #-}
 745
 746
 747 -- | Splits a 'Producer' after the given number of characters
 748 splitAt
 749     :: (Monad m, Integral n)
 750     => n
 751     -> Lens' (Producer Text m r)
 752              (Producer Text m (Producer Text m r))
 753 splitAt n0 k p0 = fmap join (k (go n0 p0))
 754   where
 755     go 0 p = return p
 756     go n p = do
 757         x <- lift (next p)
 758         case x of
 759             Left   r       -> return (return r)
 760             Right (txt, p') -> do
 761                 let len = fromIntegral (T.length txt)
 762                 if (len <= n)
 763                     then do
 764                         yield txt
 765                         go (n - len) p'
 766                     else do
 767                         let (prefix, suffix) = T.splitAt (fromIntegral n) txt
 768                         yield prefix
 769                         return (yield suffix >> p')
 770 {-# INLINABLE splitAt #-}
 771
 772
 773 -- | Split a text stream in two, producing the longest
 774 --   consecutive group of characters that satisfies the predicate
 775 --   and returning the rest
 776
 777 span
 778     :: (Monad m)
 779     => (Char -> Bool)
 780     -> Lens' (Producer Text m r)
 781              (Producer Text m (Producer Text m r))
 782 span predicate k p0 = fmap join (k (go p0))
 783   where
 784     go p = do
 785         x <- lift (next p)
 786         case x of
 787             Left   r       -> return (return r)
 788             Right (txt, p') -> do
 789                 let (prefix, suffix) = T.span predicate txt
 790                 if (T.null suffix)
 791                     then do
 792                         yield txt
 793                         go p'
 794                     else do
 795                         yield prefix
 796                         return (yield suffix >> p')
 797 {-# INLINABLE span #-}
 798
 799 {-| Split a text stream in two, producing the longest
 800     consecutive group of characters that don't satisfy the predicate
 801 -}
 802 break
 803     :: (Monad m)
 804     => (Char -> Bool)
 805     -> Lens' (Producer Text m r)
 806              (Producer Text m (Producer Text m r))
 807 break predicate = span (not . predicate)
 808 {-# INLINABLE break #-}
 809
 810 {-| Improper lens that splits after the first group of equivalent Chars, as
 811     defined by the given equivalence relation
 812 -}
 813 groupBy
 814     :: (Monad m)
 815     => (Char -> Char -> Bool)
 816     -> Lens' (Producer Text m r)
 817              (Producer Text m (Producer Text m r))
 818 groupBy equals k p0 = fmap join (k ((go p0))) where
 819     go p = do
 820         x <- lift (next p)
 821         case x of
 822             Left   r       -> return (return r)
 823             Right (txt, p') -> case T.uncons txt of
 824                 Nothing      -> go p'
 825                 Just (c, _) -> (yield txt >> p') ^. span (equals c)
 826 {-# INLINABLE groupBy #-}
 827
 828 -- | Improper lens that splits after the first succession of identical 'Char' s
 829 group :: Monad m
 830       => Lens' (Producer Text m r)
 831                (Producer Text m (Producer Text m r))
 832 group = groupBy (==)
 833 {-# INLINABLE group #-}
 834
 835 {-| Improper lens that splits a 'Producer' after the first word
 836
 837     Unlike 'words', this does not drop leading whitespace
 838 -}
 839 word :: (Monad m)
 840      => Lens' (Producer Text m r)
 841               (Producer Text m (Producer Text m r))
 842 word k p0 = fmap join (k (to p0))
 843   where
 844     to p = do
 845         p' <- p^.span isSpace
 846         p'^.break isSpace
 847 {-# INLINABLE word #-}
 848
 849
 850 line :: (Monad m)
 851      => Lens' (Producer Text m r)
 852               (Producer Text m (Producer Text m r))
 853 line = break (== '\n')
 854
 855 {-# INLINABLE line #-}
 856
 857
 858 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
 859 intersperse
 860     :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
 861 intersperse c = go0
 862   where
 863     go0 p = do
 864         x <- lift (next p)
 865         case x of
 866             Left   r       -> return r
 867             Right (txt, p') -> do
 868                 yield (T.intersperse c txt)
 869                 go1 p'
 870     go1 p = do
 871         x <- lift (next p)
 872         case x of
 873             Left   r       -> return r
 874             Right (txt, p') -> do
 875                 yield (T.singleton c)
 876                 yield (T.intersperse c txt)
 877                 go1 p'
 878 {-# INLINABLE intersperse #-}
 879
 880
 881 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
 882 packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
 883 packChars = Data.Profunctor.dimap to (fmap from)
 884   where
 885     -- to :: Monad m => Producer Char m x -> Producer Text m x
 886     to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
 887
 888     step diffAs c = diffAs . (c:)
 889
 890     done diffAs = T.pack (diffAs [])
 891
 892     -- from :: Monad m => Producer Text m x -> Producer Char m x
 893     from p = for p (each . T.unpack)
 894 {-# INLINABLE packChars #-}
 895
 896 defaultChunkSize :: Int
 897 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
 898
 899 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
 900 chunksOf
 901     :: (Monad m, Integral n)
 902     => n -> Lens' (Producer Text m r)
 903                   (FreeT (Producer Text m) m r)
 904 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
 905   where
 906     go p = do
 907         x <- next p
 908         return $ case x of
 909             Left   r       -> Pure r
 910             Right (txt, p') -> Free $ do
 911                 p'' <- (yield txt >> p') ^. splitAt n
 912                 return $ FreeT (go p'')
 913 {-# INLINABLE chunksOf #-}
 914
 915
 916 {-| Split a text stream into sub-streams delimited by characters that satisfy the
 917     predicate
 918 -}
 919 splitsWith
 920     :: (Monad m)
 921     => (Char -> Bool)
 922     -> Producer Text m r
 923     -> FreeT (Producer Text m) m r
 924 splitsWith predicate p0 = FreeT (go0 p0)
 925   where
 926     go0 p = do
 927         x <- next p
 928         case x of
 929             Left   r       -> return (Pure r)
 930             Right (txt, p') ->
 931                 if (T.null txt)
 932                 then go0 p'
 933                 else return $ Free $ do
 934                     p'' <-  (yield txt >> p') ^. span (not . predicate)
 935                     return $ FreeT (go1 p'')
 936     go1 p = do
 937         x <- nextChar p
 938         return $ case x of
 939             Left   r      -> Pure r
 940             Right (_, p') -> Free $ do
 941                     p'' <- p' ^. span (not . predicate)
 942                     return $ FreeT (go1 p'')
 943 {-# INLINABLE splitsWith #-}
 944
 945 -- | Split a text stream using the given 'Char' as the delimiter
 946 splits :: (Monad m)
 947       => Char
 948       -> Lens' (Producer Text m r)
 949                (FreeT (Producer Text m) m r)
 950 splits c k p =
 951           fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
 952 {-# INLINABLE splits #-}
 953
 954 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
 955     given equivalence relation
 956 -}
 957 groupsBy
 958     :: Monad m
 959     => (Char -> Char -> Bool)
 960     -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
 961 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
 962   go p = do x <- next p
 963             case x of Left   r       -> return (Pure r)
 964                       Right (bs, p') -> case T.uncons bs of
 965                              Nothing      -> go p'
 966                              Just (c, _) -> do return $ Free $ do
 967                                                  p'' <- (yield bs >> p')^.span (equals c)
 968                                                  return $ FreeT (go p'')
 969 {-# INLINABLE groupsBy #-}
 970
 971
 972 -- | Like 'groupsBy', where the equality predicate is ('==')
 973 groups
 974     :: Monad m
 975     => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
 976 groups = groupsBy (==)
 977 {-# INLINABLE groups #-}
 978
 979
 980
 981 {-| Split a text stream into 'FreeT'-delimited lines
 982 -}
 983 lines
 984     :: (Monad m) => Iso'_ (Producer Text m r)  (FreeT (Producer Text m) m r)
 985 lines = Data.Profunctor.dimap _lines (fmap _unlines)
 986   where
 987   _lines p0 = FreeT (go0 p0)
 988     where
 989       go0 p = do
 990               x <- next p
 991               case x of
 992                   Left   r       -> return (Pure r)
 993                   Right (txt, p') ->
 994                       if (T.null txt)
 995                       then go0 p'
 996                       else return $ Free $ go1 (yield txt >> p')
 997       go1 p = do
 998               p' <- p ^. break ('\n' ==)
 999               return $ FreeT $ do
1000                   x  <- nextChar p'
1001                   case x of
1002                       Left   r      -> return $ Pure r
1003                       Right (_, p'') -> go0 p''
1004   -- _unlines
1005   --     :: Monad m
1006   --      => FreeT (Producer Text m) m x -> Producer Text m x
1007   _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1008
1009 {-# INLINABLE lines #-}
1010
1011
1012 -- | Split a text stream into 'FreeT'-delimited words
1013 words
1014     :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
1015 words = Data.Profunctor.dimap go (fmap _unwords)
1016   where
1017     go p = FreeT $ do
1018         x <- next (p >-> dropWhile isSpace)
1019         return $ case x of
1020             Left   r       -> Pure r
1021             Right (bs, p') -> Free $ do
1022                 p'' <-  (yield bs >> p') ^. break isSpace
1023                 return (go p'')
1024     _unwords = PG.intercalates (yield $ T.singleton ' ')
1025
1026 {-# INLINABLE words #-}
1027
1028
1029 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1030     interspersing a text stream in between them
1031 -}
1032 intercalate
1033     :: (Monad m)
1034     => Producer Text m ()
1035     -> FreeT (Producer Text m) m r
1036     -> Producer Text m r
1037 intercalate p0 = go0
1038   where
1039     go0 f = do
1040         x <- lift (runFreeT f)
1041         case x of
1042             Pure r -> return r
1043             Free p -> do
1044                 f' <- p
1045                 go1 f'
1046     go1 f = do
1047         x <- lift (runFreeT f)
1048         case x of
1049             Pure r -> return r
1050             Free p -> do
1051                 p0
1052                 f' <- p
1053                 go1 f'
1054 {-# INLINABLE intercalate #-}
1055
1056 {-| Join 'FreeT'-delimited lines into a text stream
1057 -}
1058 unlines
1059     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1060 unlines = go
1061   where
1062     go f = do
1063         x <- lift (runFreeT f)
1064         case x of
1065             Pure r -> return r
1066             Free p -> do
1067                 f' <- p
1068                 yield $ T.singleton '\n'
1069                 go f'
1070 {-# INLINABLE unlines #-}
1071
1072 {-| Join 'FreeT'-delimited words into a text stream
1073 -}
1074 unwords
1075     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1076 unwords = intercalate (yield $ T.singleton ' ')
1077 {-# INLINABLE unwords #-}
1078
1079
1080 {- $reexports
1081
1082     @Data.Text@ re-exports the 'Text' type.
1083
1084     @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1085 -}
1086
1087
1088 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)