]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
d5b93f1e4351f8a445fe9e7fbcccd0c86dc94a3d
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3 {-| This package provides @pipes@ utilities for \'text streams\', which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, and you
5 will generally want @Data.Text@ in scope. But the type @Producer Text m r@ is
6 in some ways the pipes equivalent of the lazy @Text@ type.
7
8 This module provides many functions equivalent in one way or another to
9 the 'pure' functions in
10 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
11 They transform, divide, group and fold text streams. Though @Producer Text m r@
12 is \'effectful\' Text, functions
13 in this module are \'pure\' in the sense that they are uniformly monad-independent.
14 Simple IO operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
15 operations are in @Data.Text.Lazy.IO@. Interoperation with @ByteString@
16 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
17
18 The Text type exported by @Data.Text.Lazy@ is basically '[Text]'. The implementation
19 is arranged so that the individual strict 'Text' chunks are kept to a reasonable size;
20 the user is not aware of the divisions between the connected 'Text' chunks.
21 So also here: the functions in this module are designed to operate on streams that
22 are insensitive to text boundaries. This means that they may freely split
23 text into smaller texts and /discard empty texts/. However, the objective is
24 that they should /never concatenate texts/ in order to provide strict upper
25 bounds on memory usage.
26
27 For example, to stream only the first three lines of 'stdin' to 'stdout' you
28 might write:
29
30 > import Pipes
31 > import qualified Pipes.Text as Text
32 > import qualified Pipes.Text.IO as Text
33 > import Pipes.Group
34 > import Lens.Family
35 >
36 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
37 > where
38 > takeLines n = Text.unlines . takes' n . view Text.lines
39 > -- or equivalently:
40 > -- takeLines n = over Text.lines (takes' n)
41
42 The above program will never bring more than one chunk of text (~ 32 KB) into
43 memory, no matter how long the lines are.
44
45 As this example shows, one superficial difference from @Data.Text.Lazy@
46 is that many of the operations, like 'lines',
47 are \'lensified\'; this has a number of advantages where it is possible, in particular
48 it facilitates their use with 'Parser's of Text (in the general
49 <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
50 sense.)
51 Each such expression, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
52 intuitively corresponding function when used with @view@ or @(^.)@.
53
54 A more important difference the example reveals is in the types closely associated with
55 the central type, @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@
56 we find functions like
57
58 > splitAt :: Int -> Text -> (Text, Text)
59 > lines :: Int -> Text -> [Text]
60 > chunksOf :: Int -> Text -> [Text]
61
62 which relate a Text with a pair or list of Texts. The corresponding functions here (taking
63 account of \'lensification\') are
64
65 > view . splitAt :: (Monad m, Integral n)
66 > => n -> Producer Text m r -> Producer Text.Text m (Producer Text.Text m r)
67 > view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
68 > view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
69
70 In the type @Producer Text m (Producer Text m r)@ the second
71 element of the \'pair\' of of \'effectful Texts\' cannot simply be retrieved
72 with 'snd'. This is an \'effectful\' pair, and one must work through the effects
73 of the first element to arrive at the second. Similarly in @FreeT (Producer Text m) m r@,
74 which corresponds with @[Text]@, on cannot simply drop 10 Producers and take the others;
75 we can only get to the ones we want to take by working through their predecessors.
76
77 Some of the types may be more readable if you imagine that we have introduced
78 our own type synonyms
79
80 > type Text m r = Producer T.Text m r
81 > type Texts m r = FreeT (Producer T.Text m) m r
82
83 Then we would think of the types above as
84
85 > view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
86 > view lines :: (Monad m) => Text m r -> Texts m r
87 > view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
88
89 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
90
91 -}
92
93 module Pipes.Text (
94 -- * Producers
95 fromLazy
96
97 -- * Pipes
98 , map
99 , concatMap
100 , take
101 , drop
102 , takeWhile
103 , dropWhile
104 , filter
105 , scan
106 , pack
107 , unpack
108 , toCaseFold
109 , toLower
110 , toUpper
111 , stripStart
112
113 -- * Folds
114 , toLazy
115 , toLazyM
116 , foldChars
117 , head
118 , last
119 , null
120 , length
121 , any
122 , all
123 , maximum
124 , minimum
125 , find
126 , index
127 , count
128
129 -- * Primitive Character Parsers
130 , nextChar
131 , drawChar
132 , unDrawChar
133 , peekChar
134 , isEndOfChars
135
136 -- * Parsing Lenses
137 , splitAt
138 , span
139 , break
140 , groupBy
141 , group
142 , word
143 , line
144
145 -- * FreeT Splitters
146 , chunksOf
147 , splitsWith
148 , splits
149 , groupsBy
150 , groups
151 , lines
152 , words
153
154 -- * Transformations
155 , intersperse
156 , packChars
157
158 -- * Joiners
159 , intercalate
160 , unlines
161 , unwords
162
163 -- * Re-exports
164 -- $reexports
165 , module Data.ByteString
166 , module Data.Text
167 , module Data.Profunctor
168 , module Pipes.Parse
169 , module Pipes.Group
170 ) where
171
172 import Control.Applicative ((<*))
173 import Control.Monad (liftM, join)
174 import Control.Monad.Trans.State.Strict (StateT(..), modify)
175 import qualified Data.Text as T
176 import Data.Text (Text)
177 import qualified Data.Text.Lazy as TL
178 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
179 import Data.ByteString (ByteString)
180 import Data.Functor.Constant (Constant(Constant, getConstant))
181 import Data.Functor.Identity (Identity)
182 import Data.Profunctor (Profunctor)
183 import qualified Data.Profunctor
184 import Pipes
185 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
186 import qualified Pipes.Group as PG
187 import qualified Pipes.Parse as PP
188 import Pipes.Parse (Parser)
189 import qualified Pipes.Prelude as P
190 import Data.Char (isSpace)
191 import Data.Word (Word8)
192
193 import Prelude hiding (
194 all,
195 any,
196 break,
197 concat,
198 concatMap,
199 drop,
200 dropWhile,
201 elem,
202 filter,
203 head,
204 last,
205 lines,
206 length,
207 map,
208 maximum,
209 minimum,
210 notElem,
211 null,
212 readFile,
213 span,
214 splitAt,
215 take,
216 takeWhile,
217 unlines,
218 unwords,
219 words,
220 writeFile )
221
222 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
223 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
224 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
225 {-# INLINE fromLazy #-}
226
227
228 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
229
230 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
231
232 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
233 a ^. lens = getConstant (lens Constant a)
234
235
236 -- | Apply a transformation to each 'Char' in the stream
237 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
238 map f = P.map (T.map f)
239 {-# INLINABLE map #-}
240
241 {-# RULES "p >-> map f" forall p f .
242 p >-> map f = for p (\txt -> yield (T.map f txt))
243 #-}
244
245 -- | Map a function over the characters of a text stream and concatenate the results
246 concatMap
247 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
248 concatMap f = P.map (T.concatMap f)
249 {-# INLINABLE concatMap #-}
250
251 {-# RULES "p >-> concatMap f" forall p f .
252 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
253 #-}
254
255
256 -- | Transform a Pipe of 'String's into one of 'Text' chunks
257 pack :: Monad m => Pipe String Text m r
258 pack = P.map T.pack
259 {-# INLINEABLE pack #-}
260
261 {-# RULES "p >-> pack" forall p .
262 p >-> pack = for p (\txt -> yield (T.pack txt))
263 #-}
264
265 -- | Transform a Pipes of 'Text' chunks into one of 'String's
266 unpack :: Monad m => Pipe Text String m r
267 unpack = for cat (\t -> yield (T.unpack t))
268 {-# INLINEABLE unpack #-}
269
270 {-# RULES "p >-> unpack" forall p .
271 p >-> unpack = for p (\txt -> yield (T.unpack txt))
272 #-}
273
274 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
275 -- here acting as 'Text' pipes, rather as they would on a lazy text
276 toCaseFold :: Monad m => Pipe Text Text m ()
277 toCaseFold = P.map T.toCaseFold
278 {-# INLINEABLE toCaseFold #-}
279
280 {-# RULES "p >-> toCaseFold" forall p .
281 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
282 #-}
283
284
285 -- | lowercase incoming 'Text'
286 toLower :: Monad m => Pipe Text Text m ()
287 toLower = P.map T.toLower
288 {-# INLINEABLE toLower #-}
289
290 {-# RULES "p >-> toLower" forall p .
291 p >-> toLower = for p (\txt -> yield (T.toLower txt))
292 #-}
293
294 -- | uppercase incoming 'Text'
295 toUpper :: Monad m => Pipe Text Text m ()
296 toUpper = P.map T.toUpper
297 {-# INLINEABLE toUpper #-}
298
299 {-# RULES "p >-> toUpper" forall p .
300 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
301 #-}
302
303 -- | Remove leading white space from an incoming succession of 'Text's
304 stripStart :: Monad m => Pipe Text Text m r
305 stripStart = do
306 chunk <- await
307 let text = T.stripStart chunk
308 if T.null text
309 then stripStart
310 else do yield text
311 cat
312 {-# INLINEABLE stripStart #-}
313
314 -- | @(take n)@ only allows @n@ individual characters to pass;
315 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
316 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
317 take n0 = go n0 where
318 go n
319 | n <= 0 = return ()
320 | otherwise = do
321 txt <- await
322 let len = fromIntegral (T.length txt)
323 if (len > n)
324 then yield (T.take (fromIntegral n) txt)
325 else do
326 yield txt
327 go (n - len)
328 {-# INLINABLE take #-}
329
330 -- | @(drop n)@ drops the first @n@ characters
331 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
332 drop n0 = go n0 where
333 go n
334 | n <= 0 = cat
335 | otherwise = do
336 txt <- await
337 let len = fromIntegral (T.length txt)
338 if (len >= n)
339 then do
340 yield (T.drop (fromIntegral n) txt)
341 cat
342 else go (n - len)
343 {-# INLINABLE drop #-}
344
345 -- | Take characters until they fail the predicate
346 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
347 takeWhile predicate = go
348 where
349 go = do
350 txt <- await
351 let (prefix, suffix) = T.span predicate txt
352 if (T.null suffix)
353 then do
354 yield txt
355 go
356 else yield prefix
357 {-# INLINABLE takeWhile #-}
358
359 -- | Drop characters until they fail the predicate
360 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
361 dropWhile predicate = go where
362 go = do
363 txt <- await
364 case T.findIndex (not . predicate) txt of
365 Nothing -> go
366 Just i -> do
367 yield (T.drop i txt)
368 cat
369 {-# INLINABLE dropWhile #-}
370
371 -- | Only allows 'Char's to pass if they satisfy the predicate
372 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
373 filter predicate = P.map (T.filter predicate)
374 {-# INLINABLE filter #-}
375
376 {-# RULES "p >-> filter q" forall p q .
377 p >-> filter q = for p (\txt -> yield (T.filter q txt))
378 #-}
379
380 -- | Strict left scan over the characters
381 scan
382 :: (Monad m)
383 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
384 scan step begin = do
385 yield (T.singleton begin)
386 go begin
387 where
388 go c = do
389 txt <- await
390 let txt' = T.scanl step c txt
391 c' = T.last txt'
392 yield (T.tail txt')
393 go c'
394 {-# INLINABLE scan #-}
395
396 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
397 'TL.Text'
398 -}
399 toLazy :: Producer Text Identity () -> TL.Text
400 toLazy = TL.fromChunks . P.toList
401 {-# INLINABLE toLazy #-}
402
403 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
404 'TL.Text'
405
406 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
407 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
408 immediately as they are generated instead of loading them all into memory.
409 -}
410 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
411 toLazyM = liftM TL.fromChunks . P.toListM
412 {-# INLINABLE toLazyM #-}
413
414 -- | Reduce the text stream using a strict left fold over characters
415 foldChars
416 :: Monad m
417 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
418 foldChars step begin done = P.fold (T.foldl' step) begin done
419 {-# INLINABLE foldChars #-}
420
421 -- | Retrieve the first 'Char'
422 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
423 head = go
424 where
425 go p = do
426 x <- nextChar p
427 case x of
428 Left _ -> return Nothing
429 Right (c, _) -> return (Just c)
430 {-# INLINABLE head #-}
431
432 -- | Retrieve the last 'Char'
433 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
434 last = go Nothing
435 where
436 go r p = do
437 x <- next p
438 case x of
439 Left () -> return r
440 Right (txt, p') ->
441 if (T.null txt)
442 then go r p'
443 else go (Just $ T.last txt) p'
444 {-# INLINABLE last #-}
445
446 -- | Determine if the stream is empty
447 null :: (Monad m) => Producer Text m () -> m Bool
448 null = P.all T.null
449 {-# INLINABLE null #-}
450
451 -- | Count the number of characters in the stream
452 length :: (Monad m, Num n) => Producer Text m () -> m n
453 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
454 {-# INLINABLE length #-}
455
456 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
457 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
458 any predicate = P.any (T.any predicate)
459 {-# INLINABLE any #-}
460
461 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
462 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
463 all predicate = P.all (T.all predicate)
464 {-# INLINABLE all #-}
465
466 -- | Return the maximum 'Char' within a text stream
467 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
468 maximum = P.fold step Nothing id
469 where
470 step mc txt =
471 if (T.null txt)
472 then mc
473 else Just $ case mc of
474 Nothing -> T.maximum txt
475 Just c -> max c (T.maximum txt)
476 {-# INLINABLE maximum #-}
477
478 -- | Return the minimum 'Char' within a text stream (surely very useful!)
479 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
480 minimum = P.fold step Nothing id
481 where
482 step mc txt =
483 if (T.null txt)
484 then mc
485 else case mc of
486 Nothing -> Just (T.minimum txt)
487 Just c -> Just (min c (T.minimum txt))
488 {-# INLINABLE minimum #-}
489
490 -- | Find the first element in the stream that matches the predicate
491 find
492 :: (Monad m)
493 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
494 find predicate p = head (p >-> filter predicate)
495 {-# INLINABLE find #-}
496
497 -- | Index into a text stream
498 index
499 :: (Monad m, Integral a)
500 => a-> Producer Text m () -> m (Maybe Char)
501 index n p = head (p >-> drop n)
502 {-# INLINABLE index #-}
503
504
505 -- | Store a tally of how many segments match the given 'Text'
506 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
507 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
508 {-# INLINABLE count #-}
509
510
511 -- | Consume the first character from a stream of 'Text'
512 --
513 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
514 -- succeeds with a 'Right' providing the next character and the remainder of the
515 -- 'Producer'.
516
517 nextChar
518 :: (Monad m)
519 => Producer Text m r
520 -> m (Either r (Char, Producer Text m r))
521 nextChar = go
522 where
523 go p = do
524 x <- next p
525 case x of
526 Left r -> return (Left r)
527 Right (txt, p') -> case (T.uncons txt) of
528 Nothing -> go p'
529 Just (c, txt') -> return (Right (c, yield txt' >> p'))
530 {-# INLINABLE nextChar #-}
531
532 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
533
534 drawChar :: (Monad m) => Parser Text m (Maybe Char)
535 drawChar = do
536 x <- PP.draw
537 case x of
538 Nothing -> return Nothing
539 Just txt -> case (T.uncons txt) of
540 Nothing -> drawChar
541 Just (c, txt') -> do
542 PP.unDraw txt'
543 return (Just c)
544 {-# INLINABLE drawChar #-}
545
546 -- | Push back a 'Char' onto the underlying 'Producer'
547 unDrawChar :: (Monad m) => Char -> Parser Text m ()
548 unDrawChar c = modify (yield (T.singleton c) >>)
549 {-# INLINABLE unDrawChar #-}
550
551 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
552 push the 'Char' back
553
554 > peekChar = do
555 > x <- drawChar
556 > case x of
557 > Left _ -> return ()
558 > Right c -> unDrawChar c
559 > return x
560
561 -}
562
563 peekChar :: (Monad m) => Parser Text m (Maybe Char)
564 peekChar = do
565 x <- drawChar
566 case x of
567 Nothing -> return ()
568 Just c -> unDrawChar c
569 return x
570 {-# INLINABLE peekChar #-}
571
572 {-| Check if the underlying 'Producer' has no more characters
573
574 Note that this will skip over empty 'Text' chunks, unlike
575 'PP.isEndOfInput' from @pipes-parse@, which would consider
576 an empty 'Text' a valid bit of input.
577
578 > isEndOfChars = liftM isLeft peekChar
579 -}
580 isEndOfChars :: (Monad m) => Parser Text m Bool
581 isEndOfChars = do
582 x <- peekChar
583 return (case x of
584 Nothing -> True
585 Just _-> False )
586 {-# INLINABLE isEndOfChars #-}
587
588
589 -- | Splits a 'Producer' after the given number of characters
590 splitAt
591 :: (Monad m, Integral n)
592 => n
593 -> Lens' (Producer Text m r)
594 (Producer Text m (Producer Text m r))
595 splitAt n0 k p0 = fmap join (k (go n0 p0))
596 where
597 go 0 p = return p
598 go n p = do
599 x <- lift (next p)
600 case x of
601 Left r -> return (return r)
602 Right (txt, p') -> do
603 let len = fromIntegral (T.length txt)
604 if (len <= n)
605 then do
606 yield txt
607 go (n - len) p'
608 else do
609 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
610 yield prefix
611 return (yield suffix >> p')
612 {-# INLINABLE splitAt #-}
613
614
615 -- | Split a text stream in two, producing the longest
616 -- consecutive group of characters that satisfies the predicate
617 -- and returning the rest
618
619 span
620 :: (Monad m)
621 => (Char -> Bool)
622 -> Lens' (Producer Text m r)
623 (Producer Text m (Producer Text m r))
624 span predicate k p0 = fmap join (k (go p0))
625 where
626 go p = do
627 x <- lift (next p)
628 case x of
629 Left r -> return (return r)
630 Right (txt, p') -> do
631 let (prefix, suffix) = T.span predicate txt
632 if (T.null suffix)
633 then do
634 yield txt
635 go p'
636 else do
637 yield prefix
638 return (yield suffix >> p')
639 {-# INLINABLE span #-}
640
641 {-| Split a text stream in two, producing the longest
642 consecutive group of characters that don't satisfy the predicate
643 -}
644 break
645 :: (Monad m)
646 => (Char -> Bool)
647 -> Lens' (Producer Text m r)
648 (Producer Text m (Producer Text m r))
649 break predicate = span (not . predicate)
650 {-# INLINABLE break #-}
651
652 {-| Improper lens that splits after the first group of equivalent Chars, as
653 defined by the given equivalence relation
654 -}
655 groupBy
656 :: (Monad m)
657 => (Char -> Char -> Bool)
658 -> Lens' (Producer Text m r)
659 (Producer Text m (Producer Text m r))
660 groupBy equals k p0 = fmap join (k ((go p0))) where
661 go p = do
662 x <- lift (next p)
663 case x of
664 Left r -> return (return r)
665 Right (txt, p') -> case T.uncons txt of
666 Nothing -> go p'
667 Just (c, _) -> (yield txt >> p') ^. span (equals c)
668 {-# INLINABLE groupBy #-}
669
670 -- | Improper lens that splits after the first succession of identical 'Char' s
671 group :: Monad m
672 => Lens' (Producer Text m r)
673 (Producer Text m (Producer Text m r))
674 group = groupBy (==)
675 {-# INLINABLE group #-}
676
677 {-| Improper lens that splits a 'Producer' after the first word
678
679 Unlike 'words', this does not drop leading whitespace
680 -}
681 word :: (Monad m)
682 => Lens' (Producer Text m r)
683 (Producer Text m (Producer Text m r))
684 word k p0 = fmap join (k (to p0))
685 where
686 to p = do
687 p' <- p^.span isSpace
688 p'^.break isSpace
689 {-# INLINABLE word #-}
690
691
692 line :: (Monad m)
693 => Lens' (Producer Text m r)
694 (Producer Text m (Producer Text m r))
695 line = break (== '\n')
696
697 {-# INLINABLE line #-}
698
699
700 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
701 intersperse
702 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
703 intersperse c = go0
704 where
705 go0 p = do
706 x <- lift (next p)
707 case x of
708 Left r -> return r
709 Right (txt, p') -> do
710 yield (T.intersperse c txt)
711 go1 p'
712 go1 p = do
713 x <- lift (next p)
714 case x of
715 Left r -> return r
716 Right (txt, p') -> do
717 yield (T.singleton c)
718 yield (T.intersperse c txt)
719 go1 p'
720 {-# INLINABLE intersperse #-}
721
722
723
724 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
725 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
726 packChars = Data.Profunctor.dimap to (fmap from)
727 where
728 -- to :: Monad m => Producer Char m x -> Producer Text m x
729 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
730
731 step diffAs c = diffAs . (c:)
732
733 done diffAs = T.pack (diffAs [])
734
735 -- from :: Monad m => Producer Text m x -> Producer Char m x
736 from p = for p (each . T.unpack)
737 {-# INLINABLE packChars #-}
738
739
740 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
741 chunksOf
742 :: (Monad m, Integral n)
743 => n -> Lens' (Producer Text m r)
744 (FreeT (Producer Text m) m r)
745 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
746 where
747 go p = do
748 x <- next p
749 return $ case x of
750 Left r -> Pure r
751 Right (txt, p') -> Free $ do
752 p'' <- (yield txt >> p') ^. splitAt n
753 return $ FreeT (go p'')
754 {-# INLINABLE chunksOf #-}
755
756
757 {-| Split a text stream into sub-streams delimited by characters that satisfy the
758 predicate
759 -}
760 splitsWith
761 :: (Monad m)
762 => (Char -> Bool)
763 -> Producer Text m r
764 -> FreeT (Producer Text m) m r
765 splitsWith predicate p0 = FreeT (go0 p0)
766 where
767 go0 p = do
768 x <- next p
769 case x of
770 Left r -> return (Pure r)
771 Right (txt, p') ->
772 if (T.null txt)
773 then go0 p'
774 else return $ Free $ do
775 p'' <- (yield txt >> p') ^. span (not . predicate)
776 return $ FreeT (go1 p'')
777 go1 p = do
778 x <- nextChar p
779 return $ case x of
780 Left r -> Pure r
781 Right (_, p') -> Free $ do
782 p'' <- p' ^. span (not . predicate)
783 return $ FreeT (go1 p'')
784 {-# INLINABLE splitsWith #-}
785
786 -- | Split a text stream using the given 'Char' as the delimiter
787 splits :: (Monad m)
788 => Char
789 -> Lens' (Producer Text m r)
790 (FreeT (Producer Text m) m r)
791 splits c k p =
792 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
793 {-# INLINABLE splits #-}
794
795 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
796 given equivalence relation
797 -}
798 groupsBy
799 :: Monad m
800 => (Char -> Char -> Bool)
801 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
802 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
803 go p = do x <- next p
804 case x of Left r -> return (Pure r)
805 Right (bs, p') -> case T.uncons bs of
806 Nothing -> go p'
807 Just (c, _) -> do return $ Free $ do
808 p'' <- (yield bs >> p')^.span (equals c)
809 return $ FreeT (go p'')
810 {-# INLINABLE groupsBy #-}
811
812
813 -- | Like 'groupsBy', where the equality predicate is ('==')
814 groups
815 :: Monad m
816 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
817 groups = groupsBy (==)
818 {-# INLINABLE groups #-}
819
820
821
822 {-| Split a text stream into 'FreeT'-delimited lines
823 -}
824 lines
825 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
826 lines = Data.Profunctor.dimap _lines (fmap _unlines)
827 where
828 _lines p0 = FreeT (go0 p0)
829 where
830 go0 p = do
831 x <- next p
832 case x of
833 Left r -> return (Pure r)
834 Right (txt, p') ->
835 if (T.null txt)
836 then go0 p'
837 else return $ Free $ go1 (yield txt >> p')
838 go1 p = do
839 p' <- p ^. break ('\n' ==)
840 return $ FreeT $ do
841 x <- nextChar p'
842 case x of
843 Left r -> return $ Pure r
844 Right (_, p'') -> go0 p''
845 -- _unlines
846 -- :: Monad m
847 -- => FreeT (Producer Text m) m x -> Producer Text m x
848 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
849
850
851 {-# INLINABLE lines #-}
852
853
854 -- | Split a text stream into 'FreeT'-delimited words
855 words
856 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
857 words = Data.Profunctor.dimap go (fmap _unwords)
858 where
859 go p = FreeT $ do
860 x <- next (p >-> dropWhile isSpace)
861 return $ case x of
862 Left r -> Pure r
863 Right (bs, p') -> Free $ do
864 p'' <- (yield bs >> p') ^. break isSpace
865 return (go p'')
866 _unwords = PG.intercalates (yield $ T.singleton ' ')
867
868 {-# INLINABLE words #-}
869
870
871 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
872 interspersing a text stream in between them
873 -}
874 intercalate
875 :: (Monad m)
876 => Producer Text m ()
877 -> FreeT (Producer Text m) m r
878 -> Producer Text m r
879 intercalate p0 = go0
880 where
881 go0 f = do
882 x <- lift (runFreeT f)
883 case x of
884 Pure r -> return r
885 Free p -> do
886 f' <- p
887 go1 f'
888 go1 f = do
889 x <- lift (runFreeT f)
890 case x of
891 Pure r -> return r
892 Free p -> do
893 p0
894 f' <- p
895 go1 f'
896 {-# INLINABLE intercalate #-}
897
898 {-| Join 'FreeT'-delimited lines into a text stream
899 -}
900 unlines
901 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
902 unlines = go
903 where
904 go f = do
905 x <- lift (runFreeT f)
906 case x of
907 Pure r -> return r
908 Free p -> do
909 f' <- p
910 yield $ T.singleton '\n'
911 go f'
912 {-# INLINABLE unlines #-}
913
914 {-| Join 'FreeT'-delimited words into a text stream
915 -}
916 unwords
917 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
918 unwords = intercalate (yield $ T.singleton ' ')
919 {-# INLINABLE unwords #-}
920
921
922 {- $reexports
923
924 @Data.Text@ re-exports the 'Text' type.
925
926 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
927 -}
928
929