Pipes/Text.hs

   1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
   2
   3 {-| This module provides @pipes@ utilities for \"text streams\", which are
   4     streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
   5     a 'Producer' can be converted to and from lazy 'Text's, though this is generally
   6     unwise.  Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
   7     An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
   8
   9     To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'.  For
  10     example, the following program copies a document from one file to another:
  11
  12 > import Pipes
  13 > import qualified Pipes.Text as Text
  14 > import qualified Pipes.Text.IO as Text
  15 > import System.IO
  16 >
  17 > main =
  18 >     withFile "inFile.txt"  ReadMode  $ \hIn  ->
  19 >     withFile "outFile.txt" WriteMode $ \hOut ->
  20 >     runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
  21
  22 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
  23
  24 > import Pipes
  25 > import qualified Pipes.Text as Text
  26 > import qualified Pipes.Text.IO as Text
  27 > import Pipes.Safe
  28 >
  29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
  30
  31     You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
  32     and 'stdout' pipes, as with the following \"echo\" program:
  33
  34 > main = runEffect $ Text.stdin >-> Text.stdout
  35
  36     You can also translate pure lazy 'TL.Text's to and from pipes:
  37
  38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
  39
  40     In addition, this module provides many functions equivalent to lazy
  41     'Text' functions so that you can transform or fold text streams.  For
  42     example, to stream only the first three lines of 'stdin' to 'stdout' you
  43     might write:
  44
  45 > import Pipes
  46 > import qualified Pipes.Text as Text
  47 > import qualified Pipes.Parse as Parse
  48 >
  49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
  50 >   where
  51 >     takeLines n = Text.unlines . Parse.takeFree n . Text.lines
  52
  53     The above program will never bring more than one chunk of text (~ 32 KB) into
  54     memory, no matter how long the lines are.
  55
  56     Note that functions in this library are designed to operate on streams that
  57     are insensitive to text boundaries.  This means that they may freely split
  58     text into smaller texts, /discard empty texts/.  However, apart from the
  59     special case of 'concatMap', they will /never concatenate texts/ in order
  60     to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
  61 -}
  62
  63 module Pipes.Text  (
  64     -- * Producers
  65       fromLazy
  66     -- , stdin
  67     -- , fromHandle
  68     -- , readFile
  69
  70     -- * Consumers
  71     -- , stdout
  72     -- , toHandle
  73     -- , writeFile
  74
  75     -- * Pipes
  76     , map
  77     , concatMap
  78     , take
  79     , drop
  80     , takeWhile
  81     , dropWhile
  82     , filter
  83     , scan
  84 --    , encodeUtf8
  85     , pack
  86     , unpack
  87     , toCaseFold
  88     , toLower
  89     , toUpper
  90     , stripStart
  91
  92     -- * Folds
  93     , toLazy
  94     , toLazyM
  95     , foldChars
  96     , head
  97     , last
  98     , null
  99     , length
 100     , any
 101     , all
 102     , maximum
 103     , minimum
 104     , find
 105     , index
 106     , count
 107
 108     -- * Primitive Character Parsers
 109     -- $parse
 110     , nextChar
 111     , drawChar
 112     , unDrawChar
 113     , peekChar
 114     , isEndOfChars
 115
 116     -- * Parsing Lenses
 117     , splitAt
 118     , span
 119     , break
 120     , groupBy
 121     , group
 122     , word
 123     , line
 124
 125     -- -- * Decoding Lenses
 126     -- , decodeUtf8
 127     -- , codec
 128     --
 129     -- -- * Codecs
 130     -- , utf8
 131     -- , utf16_le
 132     -- , utf16_be
 133     -- , utf32_le
 134     -- , utf32_be
 135     --
 136     -- -- * Other Decoding/Encoding Functions
 137     -- , decodeIso8859_1
 138     -- , decodeAscii
 139     -- , encodeIso8859_1
 140     -- , encodeAscii
 141
 142     -- * FreeT Splitters
 143     , chunksOf
 144     , splitsWith
 145     , splits
 146 --  , groupsBy
 147 --  , groups
 148     , lines
 149     , words
 150
 151     -- * Transformations
 152     , intersperse
 153     , packChars
 154
 155     -- * Joiners
 156     , intercalate
 157     , unlines
 158     , unwords
 159
 160    -- * Re-exports
 161     -- $reexports
 162     -- , DecodeResult(..)
 163     -- , Codec
 164     -- , TextException(..)
 165     , module Data.ByteString
 166     , module Data.Text
 167     , module Data.Profunctor
 168     , module Data.Word
 169     , module Pipes.Parse
 170     , module Pipes.Group
 171     ) where
 172
 173 import Control.Applicative ((<*))
 174 import Control.Monad (liftM, unless, join)
 175 import Control.Monad.Trans.State.Strict (StateT(..), modify)
 176 import Data.Monoid ((<>))
 177 import qualified Data.Text as T
 178 import qualified Data.Text.IO as T
 179 import qualified Data.Text.Encoding as TE
 180 import qualified Data.Text.Encoding.Error as TE
 181 import Data.Text (Text)
 182 import qualified Data.Text.Lazy as TL
 183 import qualified Data.Text.Lazy.IO as TL
 184 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
 185 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
 186 import Data.ByteString (ByteString)
 187 import qualified Data.ByteString as B
 188 import qualified Data.ByteString.Char8 as B8
 189 import Data.Char (ord, isSpace)
 190 import Data.Functor.Constant (Constant(Constant, getConstant))
 191 import Data.Functor.Identity (Identity)
 192 import Data.Profunctor (Profunctor)
 193 import qualified Data.Profunctor
 194 import qualified Data.List as List
 195 import Pipes
 196 import qualified Pipes.ByteString as PB
 197 -- import Pipes.Text.Decoding
 198 import Pipes.Core (respond, Server')
 199 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
 200 import qualified Pipes.Group as PG
 201 import qualified Pipes.Parse as PP
 202 import Pipes.Parse (Parser)
 203
 204 import qualified Pipes.Prelude as P
 205 import qualified System.IO as IO
 206 import Data.Char (isSpace)
 207 import Data.Word (Word8)
 208 import Data.Text.StreamDecoding
 209
 210 import Prelude hiding (
 211     all,
 212     any,
 213     break,
 214     concat,
 215     concatMap,
 216     drop,
 217     dropWhile,
 218     elem,
 219     filter,
 220     head,
 221     last,
 222     lines,
 223     length,
 224     map,
 225     maximum,
 226     minimum,
 227     notElem,
 228     null,
 229     readFile,
 230     span,
 231     splitAt,
 232     take,
 233     takeWhile,
 234     unlines,
 235     unwords,
 236     words,
 237     writeFile )
 238
 239 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
 240 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
 241 fromLazy  = foldrChunks (\e a -> yield e >> a) (return ())
 242 {-# INLINE fromLazy #-}
 243
 244
 245 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
 246
 247 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
 248
 249 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
 250 a ^. lens = getConstant (lens Constant a)
 251
 252
 253 -- | Apply a transformation to each 'Char' in the stream
 254 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
 255 map f = P.map (T.map f)
 256 {-# INLINABLE map #-}
 257
 258 {-# RULES "p >-> map f" forall p f .
 259         p >-> map f = for p (\txt -> yield (T.map f txt))
 260   #-}
 261
 262 -- | Map a function over the characters of a text stream and concatenate the results
 263 concatMap
 264     :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
 265 concatMap f = P.map (T.concatMap f)
 266 {-# INLINABLE concatMap #-}
 267
 268 {-# RULES "p >-> concatMap f" forall p f .
 269         p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
 270   #-}
 271
 272 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
 273 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
 274 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
 275 encodeUtf8 :: Monad m => Pipe Text ByteString m r
 276 encodeUtf8 = P.map TE.encodeUtf8
 277 {-# INLINEABLE encodeUtf8 #-}
 278
 279 {-# RULES "p >-> encodeUtf8" forall p .
 280         p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
 281   #-}
 282
 283 -- | Transform a Pipe of 'String's into one of 'Text' chunks
 284 pack :: Monad m => Pipe String Text m r
 285 pack = P.map T.pack
 286 {-# INLINEABLE pack #-}
 287
 288 {-# RULES "p >-> pack" forall p .
 289         p >-> pack = for p (\txt -> yield (T.pack txt))
 290   #-}
 291
 292 -- | Transform a Pipes of 'Text' chunks into one of 'String's
 293 unpack :: Monad m => Pipe Text String m r
 294 unpack = for cat (\t -> yield (T.unpack t))
 295 {-# INLINEABLE unpack #-}
 296
 297 {-# RULES "p >-> unpack" forall p .
 298         p >-> unpack = for p (\txt -> yield (T.unpack txt))
 299   #-}
 300
 301 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
 302 -- here acting as 'Text' pipes, rather as they would  on a lazy text
 303 toCaseFold :: Monad m => Pipe Text Text m ()
 304 toCaseFold = P.map T.toCaseFold
 305 {-# INLINEABLE toCaseFold #-}
 306
 307 {-# RULES "p >-> toCaseFold" forall p .
 308         p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
 309   #-}
 310
 311
 312 -- | lowercase incoming 'Text'
 313 toLower :: Monad m => Pipe Text Text m ()
 314 toLower = P.map T.toLower
 315 {-# INLINEABLE toLower #-}
 316
 317 {-# RULES "p >-> toLower" forall p .
 318         p >-> toLower = for p (\txt -> yield (T.toLower txt))
 319   #-}
 320
 321 -- | uppercase incoming 'Text'
 322 toUpper :: Monad m => Pipe Text Text m ()
 323 toUpper = P.map T.toUpper
 324 {-# INLINEABLE toUpper #-}
 325
 326 {-# RULES "p >-> toUpper" forall p .
 327         p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
 328   #-}
 329
 330 -- | Remove leading white space from an incoming succession of 'Text's
 331 stripStart :: Monad m => Pipe Text Text m r
 332 stripStart = do
 333     chunk <- await
 334     let text = T.stripStart chunk
 335     if T.null text
 336       then stripStart
 337       else do yield text
 338               cat
 339 {-# INLINEABLE stripStart #-}
 340
 341 -- | @(take n)@ only allows @n@ individual characters to pass;
 342 --  contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
 343 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
 344 take n0 = go n0 where
 345     go n
 346         | n <= 0    = return ()
 347         | otherwise = do
 348             txt <- await
 349             let len = fromIntegral (T.length txt)
 350             if (len > n)
 351                 then yield (T.take (fromIntegral n) txt)
 352                 else do
 353                     yield txt
 354                     go (n - len)
 355 {-# INLINABLE take #-}
 356
 357 -- | @(drop n)@ drops the first @n@ characters
 358 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
 359 drop n0 = go n0 where
 360     go n
 361         | n <= 0    = cat
 362         | otherwise = do
 363             txt <- await
 364             let len = fromIntegral (T.length txt)
 365             if (len >= n)
 366                 then do
 367                     yield (T.drop (fromIntegral n) txt)
 368                     cat
 369                 else go (n - len)
 370 {-# INLINABLE drop #-}
 371
 372 -- | Take characters until they fail the predicate
 373 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
 374 takeWhile predicate = go
 375   where
 376     go = do
 377         txt <- await
 378         let (prefix, suffix) = T.span predicate txt
 379         if (T.null suffix)
 380             then do
 381                 yield txt
 382                 go
 383             else yield prefix
 384 {-# INLINABLE takeWhile #-}
 385
 386 -- | Drop characters until they fail the predicate
 387 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 388 dropWhile predicate = go where
 389     go = do
 390         txt <- await
 391         case T.findIndex (not . predicate) txt of
 392             Nothing -> go
 393             Just i -> do
 394                 yield (T.drop i txt)
 395                 cat
 396 {-# INLINABLE dropWhile #-}
 397
 398 -- | Only allows 'Char's to pass if they satisfy the predicate
 399 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
 400 filter predicate = P.map (T.filter predicate)
 401 {-# INLINABLE filter #-}
 402
 403 {-# RULES "p >-> filter q" forall p q .
 404         p >-> filter q = for p (\txt -> yield (T.filter q txt))
 405   #-}
 406
 407 -- | Strict left scan over the characters
 408 scan
 409     :: (Monad m)
 410     => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
 411 scan step begin = do
 412     yield (T.singleton begin)
 413     go begin
 414   where
 415     go c = do
 416         txt <- await
 417         let txt' = T.scanl step c txt
 418             c' = T.last txt'
 419         yield (T.tail txt')
 420         go c'
 421 {-# INLINABLE scan #-}
 422
 423 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
 424     'TL.Text'
 425 -}
 426 toLazy :: Producer Text Identity () -> TL.Text
 427 toLazy = TL.fromChunks . P.toList
 428 {-# INLINABLE toLazy #-}
 429
 430 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
 431     'TL.Text'
 432
 433     Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
 434     simple testing purposes.  Idiomatic @pipes@ style consumes the chunks
 435     immediately as they are generated instead of loading them all into memory.
 436 -}
 437 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
 438 toLazyM = liftM TL.fromChunks . P.toListM
 439 {-# INLINABLE toLazyM #-}
 440
 441 -- | Reduce the text stream using a strict left fold over characters
 442 foldChars
 443     :: Monad m
 444     => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
 445 foldChars step begin done = P.fold (T.foldl' step) begin done
 446 {-# INLINABLE foldChars #-}
 447
 448 -- | Retrieve the first 'Char'
 449 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
 450 head = go
 451   where
 452     go p = do
 453         x <- nextChar p
 454         case x of
 455             Left   _      -> return  Nothing
 456             Right (c, _) -> return (Just c)
 457 {-# INLINABLE head #-}
 458
 459 -- | Retrieve the last 'Char'
 460 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
 461 last = go Nothing
 462   where
 463     go r p = do
 464         x <- next p
 465         case x of
 466             Left   ()      -> return r
 467             Right (txt, p') ->
 468                 if (T.null txt)
 469                 then go r p'
 470                 else go (Just $ T.last txt) p'
 471 {-# INLINABLE last #-}
 472
 473 -- | Determine if the stream is empty
 474 null :: (Monad m) => Producer Text m () -> m Bool
 475 null = P.all T.null
 476 {-# INLINABLE null #-}
 477
 478 -- | Count the number of characters in the stream
 479 length :: (Monad m, Num n) => Producer Text m () -> m n
 480 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
 481 {-# INLINABLE length #-}
 482
 483 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
 484 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 485 any predicate = P.any (T.any predicate)
 486 {-# INLINABLE any #-}
 487
 488 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
 489 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
 490 all predicate = P.all (T.all predicate)
 491 {-# INLINABLE all #-}
 492
 493 -- | Return the maximum 'Char' within a text stream
 494 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 495 maximum = P.fold step Nothing id
 496   where
 497     step mc txt =
 498         if (T.null txt)
 499         then mc
 500         else Just $ case mc of
 501             Nothing -> T.maximum txt
 502             Just c -> max c (T.maximum txt)
 503 {-# INLINABLE maximum #-}
 504
 505 -- | Return the minimum 'Char' within a text stream (surely very useful!)
 506 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
 507 minimum = P.fold step Nothing id
 508   where
 509     step mc txt =
 510         if (T.null txt)
 511         then mc
 512         else case mc of
 513             Nothing -> Just (T.minimum txt)
 514             Just c -> Just (min c (T.minimum txt))
 515 {-# INLINABLE minimum #-}
 516
 517
 518 -- | Find the first element in the stream that matches the predicate
 519 find
 520     :: (Monad m)
 521     => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
 522 find predicate p = head (p >-> filter predicate)
 523 {-# INLINABLE find #-}
 524
 525 -- | Index into a text stream
 526 index
 527     :: (Monad m, Integral a)
 528     => a-> Producer Text m () -> m (Maybe Char)
 529 index n p = head (p >-> drop n)
 530 {-# INLINABLE index #-}
 531
 532
 533 -- | Store a tally of how many segments match the given 'Text'
 534 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
 535 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
 536 {-# INLINABLE count #-}
 537
 538
 539 {-| Consume the first character from a stream of 'Text'
 540
 541     'next' either fails with a 'Left' if the 'Producer' has no more characters or
 542     succeeds with a 'Right' providing the next character and the remainder of the
 543     'Producer'.
 544 -}
 545 nextChar
 546     :: (Monad m)
 547     => Producer Text m r
 548     -> m (Either r (Char, Producer Text m r))
 549 nextChar = go
 550   where
 551     go p = do
 552         x <- next p
 553         case x of
 554             Left   r       -> return (Left r)
 555             Right (txt, p') -> case (T.uncons txt) of
 556                 Nothing        -> go p'
 557                 Just (c, txt') -> return (Right (c, yield txt' >> p'))
 558 {-# INLINABLE nextChar #-}
 559
 560 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
 561     'Producer' is empty
 562 -}
 563 drawChar :: (Monad m) => Parser Text m (Maybe Char)
 564 drawChar = do
 565     x <- PP.draw
 566     case x of
 567         Nothing  -> return Nothing
 568         Just txt -> case (T.uncons txt) of
 569             Nothing        -> drawChar
 570             Just (c, txt') -> do
 571                 PP.unDraw txt'
 572                 return (Just c)
 573 {-# INLINABLE drawChar #-}
 574
 575 -- | Push back a 'Char' onto the underlying 'Producer'
 576 unDrawChar :: (Monad m) => Char -> Parser Text m ()
 577 unDrawChar c = modify (yield (T.singleton c) >>)
 578 {-# INLINABLE unDrawChar #-}
 579
 580 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
 581     push the 'Char' back
 582
 583 > peekChar = do
 584 >     x <- drawChar
 585 >     case x of
 586 >         Left  _  -> return ()
 587 >         Right c -> unDrawChar c
 588 >     return x
 589 -}
 590 peekChar :: (Monad m) => Parser Text m (Maybe Char)
 591 peekChar = do
 592     x <- drawChar
 593     case x of
 594         Nothing  -> return ()
 595         Just c -> unDrawChar c
 596     return x
 597 {-# INLINABLE peekChar #-}
 598
 599 {-| Check if the underlying 'Producer' has no more characters
 600
 601     Note that this will skip over empty 'Text' chunks, unlike
 602     'PP.isEndOfInput' from @pipes-parse@, which would consider
 603     an empty 'Text' a valid bit of input.
 604
 605 > isEndOfChars = liftM isLeft peekChar
 606 -}
 607 isEndOfChars :: (Monad m) => Parser Text m Bool
 608 isEndOfChars = do
 609     x <- peekChar
 610     return (case x of
 611         Nothing -> True
 612         Just _-> False )
 613 {-# INLINABLE isEndOfChars #-}
 614
 615
 616
 617
 618 -- | Splits a 'Producer' after the given number of characters
 619 splitAt
 620     :: (Monad m, Integral n)
 621     => n
 622     -> Lens' (Producer Text m r)
 623              (Producer Text m (Producer Text m r))
 624 splitAt n0 k p0 = fmap join (k (go n0 p0))
 625   where
 626     go 0 p = return p
 627     go n p = do
 628         x <- lift (next p)
 629         case x of
 630             Left   r       -> return (return r)
 631             Right (txt, p') -> do
 632                 let len = fromIntegral (T.length txt)
 633                 if (len <= n)
 634                     then do
 635                         yield txt
 636                         go (n - len) p'
 637                     else do
 638                         let (prefix, suffix) = T.splitAt (fromIntegral n) txt
 639                         yield prefix
 640                         return (yield suffix >> p')
 641 {-# INLINABLE splitAt #-}
 642
 643
 644 {-| Split a text stream in two, where the first text stream is the longest
 645     consecutive group of text that satisfy the predicate
 646 -}
 647 span
 648     :: (Monad m)
 649     => (Char -> Bool)
 650     -> Lens' (Producer Text m r)
 651              (Producer Text m (Producer Text m r))
 652 span predicate k p0 = fmap join (k (go p0))
 653   where
 654     go p = do
 655         x <- lift (next p)
 656         case x of
 657             Left   r       -> return (return r)
 658             Right (txt, p') -> do
 659                 let (prefix, suffix) = T.span predicate txt
 660                 if (T.null suffix)
 661                     then do
 662                         yield txt
 663                         go p'
 664                     else do
 665                         yield prefix
 666                         return (yield suffix >> p')
 667 {-# INLINABLE span #-}
 668
 669 {-| Split a text stream in two, where the first text stream is the longest
 670     consecutive group of characters that don't satisfy the predicate
 671 -}
 672 break
 673     :: (Monad m)
 674     => (Char -> Bool)
 675     -> Lens' (Producer Text m r)
 676              (Producer Text m (Producer Text m r))
 677 break predicate = span (not . predicate)
 678 {-# INLINABLE break #-}
 679
 680 {-| Improper lens that splits after the first group of equivalent Chars, as
 681     defined by the given equivalence relation
 682 -}
 683 groupBy
 684     :: (Monad m)
 685     => (Char -> Char -> Bool)
 686     -> Lens' (Producer Text m r)
 687              (Producer Text m (Producer Text m r))
 688 groupBy equals k p0 = fmap join (k ((go p0))) where
 689     go p = do
 690         x <- lift (next p)
 691         case x of
 692             Left   r       -> return (return r)
 693             Right (txt, p') -> case T.uncons txt of
 694                 Nothing      -> go p'
 695                 Just (c, _) -> (yield txt >> p') ^. span (equals c)
 696 {-# INLINABLE groupBy #-}
 697
 698 -- | Improper lens that splits after the first succession of identical 'Char' s
 699 group :: Monad m
 700       => Lens' (Producer Text m r)
 701                (Producer Text m (Producer Text m r))
 702 group = groupBy (==)
 703 {-# INLINABLE group #-}
 704
 705 {-| Improper lens that splits a 'Producer' after the first word
 706
 707     Unlike 'words', this does not drop leading whitespace
 708 -}
 709 word :: (Monad m)
 710      => Lens' (Producer Text m r)
 711               (Producer Text m (Producer Text m r))
 712 word k p0 = fmap join (k (to p0))
 713   where
 714     to p = do
 715         p' <- p^.span isSpace
 716         p'^.break isSpace
 717 {-# INLINABLE word #-}
 718
 719
 720 line :: (Monad m)
 721      => Lens' (Producer Text m r)
 722               (Producer Text m (Producer Text m r))
 723 line = break (== '\n')
 724
 725 {-# INLINABLE line #-}
 726
 727
 728 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
 729 intersperse
 730     :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
 731 intersperse c = go0
 732   where
 733     go0 p = do
 734         x <- lift (next p)
 735         case x of
 736             Left   r       -> return r
 737             Right (txt, p') -> do
 738                 yield (T.intersperse c txt)
 739                 go1 p'
 740     go1 p = do
 741         x <- lift (next p)
 742         case x of
 743             Left   r       -> return r
 744             Right (txt, p') -> do
 745                 yield (T.singleton c)
 746                 yield (T.intersperse c txt)
 747                 go1 p'
 748 {-# INLINABLE intersperse #-}
 749
 750
 751
 752 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
 753 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
 754 packChars = Data.Profunctor.dimap to (fmap from)
 755   where
 756     -- to :: Monad m => Producer Char m x -> Producer Text m x
 757     to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
 758
 759     step diffAs c = diffAs . (c:)
 760
 761     done diffAs = T.pack (diffAs [])
 762
 763     -- from :: Monad m => Producer Text m x -> Producer Char m x
 764     from p = for p (each . T.unpack)
 765 {-# INLINABLE packChars #-}
 766
 767
 768 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
 769 chunksOf
 770     :: (Monad m, Integral n)
 771     => n -> Lens' (Producer Text m r)
 772                   (FreeT (Producer Text m) m r)
 773 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
 774   where
 775     go p = do
 776         x <- next p
 777         return $ case x of
 778             Left   r       -> Pure r
 779             Right (txt, p') -> Free $ do
 780                 p'' <- (yield txt >> p') ^. splitAt n
 781                 return $ FreeT (go p'')
 782 {-# INLINABLE chunksOf #-}
 783
 784
 785 {-| Split a text stream into sub-streams delimited by characters that satisfy the
 786     predicate
 787 -}
 788 splitsWith
 789     :: (Monad m)
 790     => (Char -> Bool)
 791     -> Producer Text m r
 792     -> FreeT (Producer Text m) m r
 793 splitsWith predicate p0 = FreeT (go0 p0)
 794   where
 795     go0 p = do
 796         x <- next p
 797         case x of
 798             Left   r       -> return (Pure r)
 799             Right (txt, p') ->
 800                 if (T.null txt)
 801                 then go0 p'
 802                 else return $ Free $ do
 803                     p'' <-  (yield txt >> p') ^. span (not . predicate)
 804                     return $ FreeT (go1 p'')
 805     go1 p = do
 806         x <- nextChar p
 807         return $ case x of
 808             Left   r      -> Pure r
 809             Right (_, p') -> Free $ do
 810                     p'' <- p' ^. span (not . predicate)
 811                     return $ FreeT (go1 p'')
 812 {-# INLINABLE splitsWith #-}
 813
 814 -- | Split a text stream using the given 'Char' as the delimiter
 815 splits :: (Monad m)
 816       => Char
 817       -> Lens' (Producer Text m r)
 818                (FreeT (Producer Text m) m r)
 819 splits c k p =
 820           fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
 821 {-# INLINABLE splits #-}
 822
 823 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
 824     given equivalence relation
 825 -}
 826 groupsBy
 827     :: Monad m
 828     => (Char -> Char -> Bool)
 829     -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
 830 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
 831   go p = do x <- next p
 832             case x of Left   r       -> return (Pure r)
 833                       Right (bs, p') -> case T.uncons bs of
 834                              Nothing      -> go p'
 835                              Just (c, _) -> do return $ Free $ do
 836                                                  p'' <- (yield bs >> p')^.span (equals c)
 837                                                  return $ FreeT (go p'')
 838 {-# INLINABLE groupsBy #-}
 839
 840
 841 -- | Like 'groupsBy', where the equality predicate is ('==')
 842 groups
 843     :: Monad m
 844     => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
 845 groups = groupsBy (==)
 846 {-# INLINABLE groups #-}
 847
 848
 849
 850 {-| Split a text stream into 'FreeT'-delimited lines
 851 -}
 852 lines
 853     :: (Monad m) => Iso' (Producer Text m r)  (FreeT (Producer Text m) m r)
 854 lines = Data.Profunctor.dimap _lines (fmap _unlines)
 855   where
 856   _lines p0 = FreeT (go0 p0)
 857     where
 858       go0 p = do
 859               x <- next p
 860               case x of
 861                   Left   r       -> return (Pure r)
 862                   Right (txt, p') ->
 863                       if (T.null txt)
 864                       then go0 p'
 865                       else return $ Free $ go1 (yield txt >> p')
 866       go1 p = do
 867               p' <- p ^. break ('\n' ==)
 868               return $ FreeT $ do
 869                   x  <- nextChar p'
 870                   case x of
 871                       Left   r      -> return $ Pure r
 872                       Right (_, p'') -> go0 p''
 873   -- _unlines
 874   --     :: Monad m
 875   --      => FreeT (Producer Text m) m x -> Producer Text m x
 876   _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
 877
 878
 879 {-# INLINABLE lines #-}
 880
 881
 882 -- | Split a text stream into 'FreeT'-delimited words
 883 words
 884     :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
 885 words = Data.Profunctor.dimap go (fmap _unwords)
 886   where
 887     go p = FreeT $ do
 888         x <- next (p >-> dropWhile isSpace)
 889         return $ case x of
 890             Left   r       -> Pure r
 891             Right (bs, p') -> Free $ do
 892                 p'' <-  (yield bs >> p') ^. break isSpace
 893                 return (go p'')
 894     _unwords = PG.intercalates (yield $ T.singleton ' ')
 895
 896 {-# INLINABLE words #-}
 897
 898
 899 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
 900     interspersing a text stream in between them
 901 -}
 902 intercalate
 903     :: (Monad m)
 904     => Producer Text m ()
 905     -> FreeT (Producer Text m) m r
 906     -> Producer Text m r
 907 intercalate p0 = go0
 908   where
 909     go0 f = do
 910         x <- lift (runFreeT f)
 911         case x of
 912             Pure r -> return r
 913             Free p -> do
 914                 f' <- p
 915                 go1 f'
 916     go1 f = do
 917         x <- lift (runFreeT f)
 918         case x of
 919             Pure r -> return r
 920             Free p -> do
 921                 p0
 922                 f' <- p
 923                 go1 f'
 924 {-# INLINABLE intercalate #-}
 925
 926 {-| Join 'FreeT'-delimited lines into a text stream
 927 -}
 928 unlines
 929     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
 930 unlines = go
 931   where
 932     go f = do
 933         x <- lift (runFreeT f)
 934         case x of
 935             Pure r -> return r
 936             Free p -> do
 937                 f' <- p
 938                 yield $ T.singleton '\n'
 939                 go f'
 940 {-# INLINABLE unlines #-}
 941
 942 {-| Join 'FreeT'-delimited words into a text stream
 943 -}
 944 unwords
 945     :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
 946 unwords = intercalate (yield $ T.singleton ' ')
 947 {-# INLINABLE unwords #-}
 948
 949 {- $parse
 950     The following parsing utilities are single-character analogs of the ones found
 951     @pipes-parse@.
 952 -}
 953
 954 {- $reexports
 955
 956     @Data.Text@ re-exports the 'Text' type.
 957
 958     @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
 959 -}
 960
 961