]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
use new text-stream-decoding
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
9667f797
GG
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
13a43263 3{-| This module provides @pipes@ utilities for \"text streams\", which are
64e03122 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 8
63ea9ffd 9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 10 example, the following program copies a document from one file to another:
91727d11 11
12> import Pipes
bbdfd305 13> import qualified Pipes.Text as Text
14> import qualified Pipes.Text.IO as Text
91727d11 15> import System.IO
16>
17> main =
18> withFile "inFile.txt" ReadMode $ \hIn ->
19> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 20> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
21
22To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 23
13a43263 24> import Pipes
bbdfd305 25> import qualified Pipes.Text as Text
26> import qualified Pipes.Text.IO as Text
13a43263 27> import Pipes.Safe
28>
31f41a5d 29> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 30
91727d11 31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
c8027236 32 and 'stdout' pipes, as with the following \"echo\" program:
91727d11 33
31f41a5d 34> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 35
c8027236 36 You can also translate pure lazy 'TL.Text's to and from pipes:
91727d11 37
31f41a5d 38> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 39
40 In addition, this module provides many functions equivalent to lazy
31f41a5d 41 'Text' functions so that you can transform or fold text streams. For
91727d11 42 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 43 might write:
91727d11 44
45> import Pipes
31f41a5d 46> import qualified Pipes.Text as Text
47> import qualified Pipes.Parse as Parse
91727d11 48>
31f41a5d 49> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 50> where
31f41a5d 51> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 52
31f41a5d 53 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
31f41a5d 57 are insensitive to text boundaries. This means that they may freely split
64e03122 58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
91727d11 61-}
62
7faef8bc 63module Pipes.Text (
91727d11 64 -- * Producers
1677dc12 65 fromLazy
bbdfd305 66 -- , stdin
67 -- , fromHandle
68 -- , readFile
91727d11 69
70 -- * Consumers
bbdfd305 71 -- , stdout
72 -- , toHandle
73 -- , writeFile
91727d11 74
75 -- * Pipes
1677dc12 76 , map
77 , concatMap
78 , take
79 , drop
80 , takeWhile
81 , dropWhile
82 , filter
83 , scan
bbdfd305 84-- , encodeUtf8
1677dc12 85 , pack
86 , unpack
87 , toCaseFold
88 , toLower
89 , toUpper
90 , stripStart
91727d11 91
92 -- * Folds
1677dc12 93 , toLazy
94 , toLazyM
95 , foldChars
96 , head
97 , last
98 , null
99 , length
100 , any
101 , all
102 , maximum
103 , minimum
104 , find
105 , index
106 , count
107
108 -- * Primitive Character Parsers
109 -- $parse
110 , nextChar
111 , drawChar
112 , unDrawChar
113 , peekChar
9e9bb0ce 114 , isEndOfChars
1677dc12 115
116 -- * Parsing Lenses
9e9bb0ce 117 , splitAt
1677dc12 118 , span
119 , break
120 , groupBy
121 , group
9e9bb0ce 122 , word
123 , line
90189414 124
bbdfd305 125 -- -- * Decoding Lenses
126 -- , decodeUtf8
127 -- , codec
128 --
129 -- -- * Codecs
130 -- , utf8
131 -- , utf16_le
132 -- , utf16_be
133 -- , utf32_le
134 -- , utf32_be
135 --
136 -- -- * Other Decoding/Encoding Functions
137 -- , decodeIso8859_1
138 -- , decodeAscii
139 -- , encodeIso8859_1
140 -- , encodeAscii
1677dc12 141
142 -- * FreeT Splitters
143 , chunksOf
144 , splitsWith
0f8c6f1b 145 , splits
1677dc12 146-- , groupsBy
147-- , groups
148 , lines
149 , words
150
91727d11 151 -- * Transformations
1677dc12 152 , intersperse
9e9bb0ce 153 , packChars
31f41a5d 154
91727d11 155 -- * Joiners
1677dc12 156 , intercalate
157 , unlines
158 , unwords
9e9bb0ce 159
1677dc12 160 -- * Re-exports
91727d11 161 -- $reexports
bbdfd305 162 -- , DecodeResult(..)
163 -- , Codec
164 -- , TextException(..)
1677dc12 165 , module Data.ByteString
166 , module Data.Text
167 , module Data.Profunctor
168 , module Data.Word
169 , module Pipes.Parse
7ed76745 170 , module Pipes.Group
91727d11 171 ) where
172
0f8c6f1b 173import Control.Applicative ((<*))
64e03122 174import Control.Monad (liftM, unless, join)
9e9bb0ce 175import Control.Monad.Trans.State.Strict (StateT(..), modify)
ca6f90a0 176import Data.Monoid ((<>))
91727d11 177import qualified Data.Text as T
178import qualified Data.Text.IO as T
31f41a5d 179import qualified Data.Text.Encoding as TE
63ea9ffd 180import qualified Data.Text.Encoding.Error as TE
91727d11 181import Data.Text (Text)
182import qualified Data.Text.Lazy as TL
183import qualified Data.Text.Lazy.IO as TL
184import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
185import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 186import Data.ByteString (ByteString)
187import qualified Data.ByteString as B
90189414 188import qualified Data.ByteString.Char8 as B8
cf10d6f1 189import Data.Char (ord, isSpace)
1677dc12 190import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 191import Data.Functor.Identity (Identity)
1677dc12 192import Data.Profunctor (Profunctor)
193import qualified Data.Profunctor
91727d11 194import qualified Data.List as List
91727d11 195import Pipes
5e3f5409 196import qualified Pipes.ByteString as PB
bbdfd305 197-- import Pipes.Text.Decoding
91727d11 198import Pipes.Core (respond, Server')
7fc48f7c 199import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 200import qualified Pipes.Group as PG
91727d11 201import qualified Pipes.Parse as PP
7ed76745 202import Pipes.Parse (Parser)
bbdfd305 203
91727d11 204import qualified Pipes.Prelude as P
205import qualified System.IO as IO
206import Data.Char (isSpace)
63ea9ffd 207import Data.Word (Word8)
bbdfd305 208import Data.Text.StreamDecoding
1677dc12 209
91727d11 210import Prelude hiding (
211 all,
212 any,
213 break,
214 concat,
215 concatMap,
216 drop,
217 dropWhile,
218 elem,
219 filter,
220 head,
221 last,
222 lines,
223 length,
224 map,
225 maximum,
226 minimum,
227 notElem,
228 null,
229 readFile,
230 span,
231 splitAt,
232 take,
233 takeWhile,
234 unlines,
235 unwords,
236 words,
237 writeFile )
238
239-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
240fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
241fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 242{-# INLINE fromLazy #-}
91727d11 243
1677dc12 244
245type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
246
247type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
248
249(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
250a ^. lens = getConstant (lens Constant a)
251
252
91727d11 253-- | Apply a transformation to each 'Char' in the stream
254map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
255map f = P.map (T.map f)
256{-# INLINABLE map #-}
257
ff38b9f0 258{-# RULES "p >-> map f" forall p f .
259 p >-> map f = for p (\txt -> yield (T.map f txt))
260 #-}
261
31f41a5d 262-- | Map a function over the characters of a text stream and concatenate the results
91727d11 263concatMap
264 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
265concatMap f = P.map (T.concatMap f)
266{-# INLINABLE concatMap #-}
267
ff38b9f0 268{-# RULES "p >-> concatMap f" forall p f .
269 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
270 #-}
7faef8bc 271
272-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 273-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
274-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 275encodeUtf8 :: Monad m => Pipe Text ByteString m r
276encodeUtf8 = P.map TE.encodeUtf8
277{-# INLINEABLE encodeUtf8 #-}
278
ff38b9f0 279{-# RULES "p >-> encodeUtf8" forall p .
280 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
281 #-}
282
c0343bc9 283-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 284pack :: Monad m => Pipe String Text m r
285pack = P.map T.pack
286{-# INLINEABLE pack #-}
287
ff38b9f0 288{-# RULES "p >-> pack" forall p .
289 p >-> pack = for p (\txt -> yield (T.pack txt))
290 #-}
291
292-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 293unpack :: Monad m => Pipe Text String m r
d4732515 294unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 295{-# INLINEABLE unpack #-}
296
ff38b9f0 297{-# RULES "p >-> unpack" forall p .
298 p >-> unpack = for p (\txt -> yield (T.unpack txt))
299 #-}
d4732515 300
b0d86a59 301-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
302-- here acting as 'Text' pipes, rather as they would on a lazy text
7faef8bc 303toCaseFold :: Monad m => Pipe Text Text m ()
304toCaseFold = P.map T.toCaseFold
305{-# INLINEABLE toCaseFold #-}
306
ff38b9f0 307{-# RULES "p >-> toCaseFold" forall p .
308 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
309 #-}
310
311
c0343bc9 312-- | lowercase incoming 'Text'
7faef8bc 313toLower :: Monad m => Pipe Text Text m ()
314toLower = P.map T.toLower
315{-# INLINEABLE toLower #-}
316
ff38b9f0 317{-# RULES "p >-> toLower" forall p .
318 p >-> toLower = for p (\txt -> yield (T.toLower txt))
319 #-}
320
c0343bc9 321-- | uppercase incoming 'Text'
7faef8bc 322toUpper :: Monad m => Pipe Text Text m ()
323toUpper = P.map T.toUpper
324{-# INLINEABLE toUpper #-}
325
ff38b9f0 326{-# RULES "p >-> toUpper" forall p .
327 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
328 #-}
329
c0343bc9 330-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 331stripStart :: Monad m => Pipe Text Text m r
332stripStart = do
333 chunk <- await
334 let text = T.stripStart chunk
335 if T.null text
336 then stripStart
b0d86a59 337 else do yield text
338 cat
7faef8bc 339{-# INLINEABLE stripStart #-}
340
31f41a5d 341-- | @(take n)@ only allows @n@ individual characters to pass;
342-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 343take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
344take n0 = go n0 where
345 go n
346 | n <= 0 = return ()
347 | otherwise = do
31f41a5d 348 txt <- await
349 let len = fromIntegral (T.length txt)
91727d11 350 if (len > n)
31f41a5d 351 then yield (T.take (fromIntegral n) txt)
91727d11 352 else do
31f41a5d 353 yield txt
91727d11 354 go (n - len)
355{-# INLINABLE take #-}
356
31f41a5d 357-- | @(drop n)@ drops the first @n@ characters
91727d11 358drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
359drop n0 = go n0 where
360 go n
361 | n <= 0 = cat
362 | otherwise = do
31f41a5d 363 txt <- await
364 let len = fromIntegral (T.length txt)
91727d11 365 if (len >= n)
366 then do
31f41a5d 367 yield (T.drop (fromIntegral n) txt)
91727d11 368 cat
369 else go (n - len)
370{-# INLINABLE drop #-}
371
31f41a5d 372-- | Take characters until they fail the predicate
91727d11 373takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
374takeWhile predicate = go
375 where
376 go = do
31f41a5d 377 txt <- await
378 let (prefix, suffix) = T.span predicate txt
91727d11 379 if (T.null suffix)
380 then do
31f41a5d 381 yield txt
91727d11 382 go
383 else yield prefix
384{-# INLINABLE takeWhile #-}
385
31f41a5d 386-- | Drop characters until they fail the predicate
91727d11 387dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
388dropWhile predicate = go where
389 go = do
31f41a5d 390 txt <- await
391 case T.findIndex (not . predicate) txt of
91727d11 392 Nothing -> go
393 Just i -> do
31f41a5d 394 yield (T.drop i txt)
91727d11 395 cat
396{-# INLINABLE dropWhile #-}
397
398-- | Only allows 'Char's to pass if they satisfy the predicate
399filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
400filter predicate = P.map (T.filter predicate)
401{-# INLINABLE filter #-}
402
ff38b9f0 403{-# RULES "p >-> filter q" forall p q .
404 p >-> filter q = for p (\txt -> yield (T.filter q txt))
405 #-}
406
31f41a5d 407-- | Strict left scan over the characters
91727d11 408scan
409 :: (Monad m)
410 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
11645cdc
GG
411scan step begin = do
412 yield (T.singleton begin)
413 go begin
91727d11 414 where
31f41a5d 415 go c = do
416 txt <- await
417 let txt' = T.scanl step c txt
418 c' = T.last txt'
11645cdc 419 yield (T.tail txt')
31f41a5d 420 go c'
91727d11 421{-# INLINABLE scan #-}
422
423{-| Fold a pure 'Producer' of strict 'Text's into a lazy
424 'TL.Text'
425-}
426toLazy :: Producer Text Identity () -> TL.Text
427toLazy = TL.fromChunks . P.toList
428{-# INLINABLE toLazy #-}
429
430{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
431 'TL.Text'
432
433 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
434 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
435 immediately as they are generated instead of loading them all into memory.
436-}
437toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
438toLazyM = liftM TL.fromChunks . P.toListM
439{-# INLINABLE toLazyM #-}
440
31f41a5d 441-- | Reduce the text stream using a strict left fold over characters
64e03122 442foldChars
91727d11 443 :: Monad m
444 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 445foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 446{-# INLINABLE foldChars #-}
91727d11 447
448-- | Retrieve the first 'Char'
449head :: (Monad m) => Producer Text m () -> m (Maybe Char)
450head = go
451 where
452 go p = do
453 x <- nextChar p
454 case x of
455 Left _ -> return Nothing
31f41a5d 456 Right (c, _) -> return (Just c)
91727d11 457{-# INLINABLE head #-}
458
459-- | Retrieve the last 'Char'
460last :: (Monad m) => Producer Text m () -> m (Maybe Char)
461last = go Nothing
462 where
463 go r p = do
464 x <- next p
465 case x of
466 Left () -> return r
31f41a5d 467 Right (txt, p') ->
468 if (T.null txt)
91727d11 469 then go r p'
31f41a5d 470 else go (Just $ T.last txt) p'
91727d11 471{-# INLINABLE last #-}
472
473-- | Determine if the stream is empty
474null :: (Monad m) => Producer Text m () -> m Bool
475null = P.all T.null
476{-# INLINABLE null #-}
477
62e8521c 478-- | Count the number of characters in the stream
91727d11 479length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 480length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 481{-# INLINABLE length #-}
482
483-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
484any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
485any predicate = P.any (T.any predicate)
486{-# INLINABLE any #-}
487
488-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
489all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
490all predicate = P.all (T.all predicate)
491{-# INLINABLE all #-}
492
62e8521c 493-- | Return the maximum 'Char' within a text stream
91727d11 494maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
495maximum = P.fold step Nothing id
496 where
31f41a5d 497 step mc txt =
498 if (T.null txt)
499 then mc
500 else Just $ case mc of
501 Nothing -> T.maximum txt
502 Just c -> max c (T.maximum txt)
91727d11 503{-# INLINABLE maximum #-}
504
62e8521c 505-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 506minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
507minimum = P.fold step Nothing id
508 where
31f41a5d 509 step mc txt =
510 if (T.null txt)
511 then mc
512 else case mc of
513 Nothing -> Just (T.minimum txt)
514 Just c -> Just (min c (T.minimum txt))
91727d11 515{-# INLINABLE minimum #-}
516
1677dc12 517
91727d11 518-- | Find the first element in the stream that matches the predicate
519find
520 :: (Monad m)
521 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
522find predicate p = head (p >-> filter predicate)
523{-# INLINABLE find #-}
524
62e8521c 525-- | Index into a text stream
91727d11 526index
527 :: (Monad m, Integral a)
528 => a-> Producer Text m () -> m (Maybe Char)
529index n p = head (p >-> drop n)
530{-# INLINABLE index #-}
531
63ea9ffd 532
31f41a5d 533-- | Store a tally of how many segments match the given 'Text'
534count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
535count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
536{-# INLINABLE count #-}
537
9e9bb0ce 538
539{-| Consume the first character from a stream of 'Text'
540
541 'next' either fails with a 'Left' if the 'Producer' has no more characters or
542 succeeds with a 'Right' providing the next character and the remainder of the
543 'Producer'.
544-}
545nextChar
546 :: (Monad m)
547 => Producer Text m r
548 -> m (Either r (Char, Producer Text m r))
549nextChar = go
550 where
551 go p = do
552 x <- next p
553 case x of
554 Left r -> return (Left r)
555 Right (txt, p') -> case (T.uncons txt) of
556 Nothing -> go p'
557 Just (c, txt') -> return (Right (c, yield txt' >> p'))
558{-# INLINABLE nextChar #-}
559
560{-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
561 'Producer' is empty
562-}
563drawChar :: (Monad m) => Parser Text m (Maybe Char)
564drawChar = do
565 x <- PP.draw
566 case x of
567 Nothing -> return Nothing
568 Just txt -> case (T.uncons txt) of
569 Nothing -> drawChar
570 Just (c, txt') -> do
571 PP.unDraw txt'
572 return (Just c)
573{-# INLINABLE drawChar #-}
574
575-- | Push back a 'Char' onto the underlying 'Producer'
576unDrawChar :: (Monad m) => Char -> Parser Text m ()
577unDrawChar c = modify (yield (T.singleton c) >>)
578{-# INLINABLE unDrawChar #-}
579
580{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
581 push the 'Char' back
582
583> peekChar = do
584> x <- drawChar
585> case x of
586> Left _ -> return ()
587> Right c -> unDrawChar c
588> return x
589-}
590peekChar :: (Monad m) => Parser Text m (Maybe Char)
591peekChar = do
592 x <- drawChar
593 case x of
594 Nothing -> return ()
595 Just c -> unDrawChar c
596 return x
597{-# INLINABLE peekChar #-}
598
599{-| Check if the underlying 'Producer' has no more characters
600
601 Note that this will skip over empty 'Text' chunks, unlike
602 'PP.isEndOfInput' from @pipes-parse@, which would consider
603 an empty 'Text' a valid bit of input.
604
605> isEndOfChars = liftM isLeft peekChar
606-}
607isEndOfChars :: (Monad m) => Parser Text m Bool
608isEndOfChars = do
609 x <- peekChar
610 return (case x of
611 Nothing -> True
612 Just _-> False )
613{-# INLINABLE isEndOfChars #-}
614
615
ca6f90a0 616
31f41a5d 617
618-- | Splits a 'Producer' after the given number of characters
91727d11 619splitAt
620 :: (Monad m, Integral n)
621 => n
9e9bb0ce 622 -> Lens' (Producer Text m r)
623 (Producer Text m (Producer Text m r))
624splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 625 where
626 go 0 p = return p
627 go n p = do
628 x <- lift (next p)
629 case x of
630 Left r -> return (return r)
31f41a5d 631 Right (txt, p') -> do
632 let len = fromIntegral (T.length txt)
91727d11 633 if (len <= n)
634 then do
31f41a5d 635 yield txt
91727d11 636 go (n - len) p'
637 else do
31f41a5d 638 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 639 yield prefix
640 return (yield suffix >> p')
641{-# INLINABLE splitAt #-}
642
91727d11 643
31f41a5d 644{-| Split a text stream in two, where the first text stream is the longest
645 consecutive group of text that satisfy the predicate
91727d11 646-}
647span
648 :: (Monad m)
649 => (Char -> Bool)
9e9bb0ce 650 -> Lens' (Producer Text m r)
651 (Producer Text m (Producer Text m r))
652span predicate k p0 = fmap join (k (go p0))
91727d11 653 where
654 go p = do
655 x <- lift (next p)
656 case x of
657 Left r -> return (return r)
31f41a5d 658 Right (txt, p') -> do
659 let (prefix, suffix) = T.span predicate txt
91727d11 660 if (T.null suffix)
661 then do
31f41a5d 662 yield txt
91727d11 663 go p'
664 else do
665 yield prefix
666 return (yield suffix >> p')
667{-# INLINABLE span #-}
668
62e8521c 669{-| Split a text stream in two, where the first text stream is the longest
670 consecutive group of characters that don't satisfy the predicate
91727d11 671-}
672break
673 :: (Monad m)
674 => (Char -> Bool)
9e9bb0ce 675 -> Lens' (Producer Text m r)
676 (Producer Text m (Producer Text m r))
91727d11 677break predicate = span (not . predicate)
678{-# INLINABLE break #-}
679
9e9bb0ce 680{-| Improper lens that splits after the first group of equivalent Chars, as
681 defined by the given equivalence relation
682-}
683groupBy
684 :: (Monad m)
685 => (Char -> Char -> Bool)
686 -> Lens' (Producer Text m r)
687 (Producer Text m (Producer Text m r))
688groupBy equals k p0 = fmap join (k ((go p0))) where
689 go p = do
690 x <- lift (next p)
691 case x of
692 Left r -> return (return r)
693 Right (txt, p') -> case T.uncons txt of
694 Nothing -> go p'
695 Just (c, _) -> (yield txt >> p') ^. span (equals c)
696{-# INLINABLE groupBy #-}
697
698-- | Improper lens that splits after the first succession of identical 'Char' s
699group :: Monad m
700 => Lens' (Producer Text m r)
701 (Producer Text m (Producer Text m r))
702group = groupBy (==)
703{-# INLINABLE group #-}
704
705{-| Improper lens that splits a 'Producer' after the first word
706
707 Unlike 'words', this does not drop leading whitespace
708-}
709word :: (Monad m)
710 => Lens' (Producer Text m r)
711 (Producer Text m (Producer Text m r))
712word k p0 = fmap join (k (to p0))
713 where
714 to p = do
715 p' <- p^.span isSpace
716 p'^.break isSpace
717{-# INLINABLE word #-}
718
719
720line :: (Monad m)
721 => Lens' (Producer Text m r)
722 (Producer Text m (Producer Text m r))
723line = break (== '\n')
724
725{-# INLINABLE line #-}
726
727
728-- | Intersperse a 'Char' in between the characters of stream of 'Text'
729intersperse
730 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
731intersperse c = go0
732 where
733 go0 p = do
734 x <- lift (next p)
735 case x of
736 Left r -> return r
737 Right (txt, p') -> do
738 yield (T.intersperse c txt)
739 go1 p'
740 go1 p = do
741 x <- lift (next p)
742 case x of
743 Left r -> return r
744 Right (txt, p') -> do
745 yield (T.singleton c)
746 yield (T.intersperse c txt)
747 go1 p'
748{-# INLINABLE intersperse #-}
749
750
751
752-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
753packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
754packChars = Data.Profunctor.dimap to (fmap from)
755 where
756 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 757 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 758
759 step diffAs c = diffAs . (c:)
760
761 done diffAs = T.pack (diffAs [])
762
763 -- from :: Monad m => Producer Text m x -> Producer Char m x
764 from p = for p (each . T.unpack)
765{-# INLINABLE packChars #-}
766
0f8c6f1b 767
768-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
769chunksOf
770 :: (Monad m, Integral n)
771 => n -> Lens' (Producer Text m r)
772 (FreeT (Producer Text m) m r)
773chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
774 where
775 go p = do
776 x <- next p
777 return $ case x of
7ed76745 778 Left r -> Pure r
779 Right (txt, p') -> Free $ do
0f8c6f1b 780 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 781 return $ FreeT (go p'')
0f8c6f1b 782{-# INLINABLE chunksOf #-}
783
784
62e8521c 785{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 786 predicate
787-}
1677dc12 788splitsWith
91727d11 789 :: (Monad m)
790 => (Char -> Bool)
791 -> Producer Text m r
7ed76745 792 -> FreeT (Producer Text m) m r
793splitsWith predicate p0 = FreeT (go0 p0)
91727d11 794 where
795 go0 p = do
796 x <- next p
797 case x of
7ed76745 798 Left r -> return (Pure r)
31f41a5d 799 Right (txt, p') ->
800 if (T.null txt)
91727d11 801 then go0 p'
7ed76745 802 else return $ Free $ do
9e9bb0ce 803 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 804 return $ FreeT (go1 p'')
91727d11 805 go1 p = do
806 x <- nextChar p
807 return $ case x of
7ed76745 808 Left r -> Pure r
809 Right (_, p') -> Free $ do
9e9bb0ce 810 p'' <- p' ^. span (not . predicate)
7ed76745 811 return $ FreeT (go1 p'')
1677dc12 812{-# INLINABLE splitsWith #-}
91727d11 813
31f41a5d 814-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 815splits :: (Monad m)
91727d11 816 => Char
0f8c6f1b 817 -> Lens' (Producer Text m r)
818 (FreeT (Producer Text m) m r)
819splits c k p =
7ed76745 820 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 821{-# INLINABLE splits #-}
822
823{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
824 given equivalence relation
825-}
826groupsBy
827 :: Monad m
828 => (Char -> Char -> Bool)
829 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
7ed76745 830groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 831 go p = do x <- next p
7ed76745 832 case x of Left r -> return (Pure r)
0f8c6f1b 833 Right (bs, p') -> case T.uncons bs of
834 Nothing -> go p'
7ed76745 835 Just (c, _) -> do return $ Free $ do
0f8c6f1b 836 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 837 return $ FreeT (go p'')
0f8c6f1b 838{-# INLINABLE groupsBy #-}
839
840
841-- | Like 'groupsBy', where the equality predicate is ('==')
842groups
843 :: Monad m
844 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
845groups = groupsBy (==)
846{-# INLINABLE groups #-}
847
91727d11 848
91727d11 849
62e8521c 850{-| Split a text stream into 'FreeT'-delimited lines
91727d11 851-}
852lines
0f8c6f1b 853 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
854lines = Data.Profunctor.dimap _lines (fmap _unlines)
91727d11 855 where
7ed76745 856 _lines p0 = FreeT (go0 p0)
0f8c6f1b 857 where
858 go0 p = do
859 x <- next p
860 case x of
7ed76745 861 Left r -> return (Pure r)
0f8c6f1b 862 Right (txt, p') ->
863 if (T.null txt)
864 then go0 p'
7ed76745 865 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 866 go1 p = do
867 p' <- p ^. break ('\n' ==)
7ed76745 868 return $ FreeT $ do
0f8c6f1b 869 x <- nextChar p'
870 case x of
7ed76745 871 Left r -> return $ Pure r
0f8c6f1b 872 Right (_, p'') -> go0 p''
873 -- _unlines
874 -- :: Monad m
875 -- => FreeT (Producer Text m) m x -> Producer Text m x
7fc48f7c 876 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
877
0f8c6f1b 878
91727d11 879{-# INLINABLE lines #-}
91727d11 880
31f41a5d 881
31f41a5d 882-- | Split a text stream into 'FreeT'-delimited words
91727d11 883words
0f8c6f1b 884 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
885words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 886 where
7ed76745 887 go p = FreeT $ do
cf10d6f1 888 x <- next (p >-> dropWhile isSpace)
889 return $ case x of
7ed76745 890 Left r -> Pure r
891 Right (bs, p') -> Free $ do
9e9bb0ce 892 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 893 return (go p'')
7ed76745 894 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 895
91727d11 896{-# INLINABLE words #-}
897
cf10d6f1 898
31f41a5d 899{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
900 interspersing a text stream in between them
91727d11 901-}
902intercalate
903 :: (Monad m)
904 => Producer Text m ()
905 -> FreeT (Producer Text m) m r
906 -> Producer Text m r
907intercalate p0 = go0
908 where
909 go0 f = do
7ed76745 910 x <- lift (runFreeT f)
91727d11 911 case x of
7ed76745 912 Pure r -> return r
913 Free p -> do
91727d11 914 f' <- p
915 go1 f'
916 go1 f = do
7ed76745 917 x <- lift (runFreeT f)
91727d11 918 case x of
7ed76745 919 Pure r -> return r
920 Free p -> do
91727d11 921 p0
922 f' <- p
923 go1 f'
924{-# INLINABLE intercalate #-}
925
62e8521c 926{-| Join 'FreeT'-delimited lines into a text stream
91727d11 927-}
928unlines
929 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
930unlines = go
931 where
932 go f = do
7ed76745 933 x <- lift (runFreeT f)
91727d11 934 case x of
7ed76745 935 Pure r -> return r
936 Free p -> do
91727d11 937 f' <- p
938 yield $ T.singleton '\n'
939 go f'
940{-# INLINABLE unlines #-}
941
31f41a5d 942{-| Join 'FreeT'-delimited words into a text stream
91727d11 943-}
944unwords
945 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 946unwords = intercalate (yield $ T.singleton ' ')
91727d11 947{-# INLINABLE unwords #-}
948
949{- $parse
31f41a5d 950 The following parsing utilities are single-character analogs of the ones found
951 @pipes-parse@.
91727d11 952-}
953
91727d11 954{- $reexports
91727d11 955
956 @Data.Text@ re-exports the 'Text' type.
957
0f8c6f1b 958 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 959-}
960
bbdfd305 961