]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
finished somewhat wordy documentation for Pipes.Text
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3
4 module Pipes.Text (
5 -- * Introduction
6 -- $intro
7
8 -- * Producers
9 fromLazy
10
11 -- * Pipes
12 , map
13 , concatMap
14 , take
15 , drop
16 , takeWhile
17 , dropWhile
18 , filter
19 , scan
20 , pack
21 , unpack
22 , toCaseFold
23 , toLower
24 , toUpper
25 , stripStart
26
27 -- * Folds
28 , toLazy
29 , toLazyM
30 , foldChars
31 , head
32 , last
33 , null
34 , length
35 , any
36 , all
37 , maximum
38 , minimum
39 , find
40 , index
41 , count
42
43 -- * Primitive Character Parsers
44 , nextChar
45 , drawChar
46 , unDrawChar
47 , peekChar
48 , isEndOfChars
49
50 -- * Parsing Lenses
51 , splitAt
52 , span
53 , break
54 , groupBy
55 , group
56 , word
57 , line
58
59 -- * FreeT Splitters
60 , chunksOf
61 , splitsWith
62 , splits
63 , groupsBy
64 , groups
65 , lines
66 , words
67
68 -- * Transformations
69 , intersperse
70 , packChars
71
72 -- * Joiners
73 , intercalate
74 , unlines
75 , unwords
76
77 -- * Re-exports
78 -- $reexports
79 , module Data.ByteString
80 , module Data.Text
81 , module Data.Profunctor
82 , module Pipes.Parse
83 , module Pipes.Group
84 ) where
85
86 import Control.Applicative ((<*))
87 import Control.Monad (liftM, join)
88 import Control.Monad.Trans.State.Strict (StateT(..), modify)
89 import qualified Data.Text as T
90 import Data.Text (Text)
91 import qualified Data.Text.Lazy as TL
92 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
93 import Data.ByteString (ByteString)
94 import Data.Functor.Constant (Constant(Constant, getConstant))
95 import Data.Functor.Identity (Identity)
96 import Data.Profunctor (Profunctor)
97 import qualified Data.Profunctor
98 import Pipes
99 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
100 import qualified Pipes.Group as PG
101 import qualified Pipes.Parse as PP
102 import Pipes.Parse (Parser)
103 import qualified Pipes.Prelude as P
104 import Data.Char (isSpace)
105 import Data.Word (Word8)
106
107 import Prelude hiding (
108 all,
109 any,
110 break,
111 concat,
112 concatMap,
113 drop,
114 dropWhile,
115 elem,
116 filter,
117 head,
118 last,
119 lines,
120 length,
121 map,
122 maximum,
123 minimum,
124 notElem,
125 null,
126 readFile,
127 span,
128 splitAt,
129 take,
130 takeWhile,
131 unlines,
132 unwords,
133 words,
134 writeFile )
135
136 {- $intro
137
138 * /Effectful Text/
139
140 This package provides @pipes@ utilities for /text streams/, understood as
141 streams of 'Text' chunks. The individual chunks are uniformly /strict/, and thus you
142 will generally want @Data.Text@ in scope. But the type @Producer Text m r@ as we
143 are using it is a sort of pipes equivalent of the lazy @Text@ type.
144
145 This particular module provides many functions equivalent in one way or another to
146 the pure functions in
147 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
148 They transform, divide, group and fold text streams. Though @Producer Text m r@
149 is the type of \'effectful Text\', the functions in this module are \'pure\'
150 in the sense that they are uniformly monad-independent.
151 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
152 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
153 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
154
155 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
156 strict Text: the implementation is arranged so that the individual strict 'Text'
157 chunks are kept to a reasonable size; the user is not aware of the divisions
158 between the connected 'Text' chunks.
159 So also here: the functions in this module are designed to operate on streams that
160 are insensitive to text boundaries. This means that they may freely split
161 text into smaller texts and /discard empty texts/. The objective, though, is
162 that they should /never concatenate texts/ in order to provide strict upper
163 bounds on memory usage.
164
165 For example, to stream only the first three lines of 'stdin' to 'stdout' you
166 might write:
167
168 > import Pipes
169 > import qualified Pipes.Text as Text
170 > import qualified Pipes.Text.IO as Text
171 > import Pipes.Group (takes')
172 > import Lens.Family
173 >
174 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
175 > where
176 > takeLines n = Text.unlines . takes' n . view Text.lines
177
178 The above program will never bring more than one chunk of text (~ 32 KB) into
179 memory, no matter how long the lines are.
180
181 * /Lenses/
182
183 As this example shows, one superficial difference from @Data.Text.Lazy@
184 is that many of the operations, like 'lines',
185 are \'lensified\'; this has a number of advantages (where it is possible), in particular
186 it facilitates their use with 'Parser's of Text (in the general
187 <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
188 sense.)
189 Each such lens, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
190 intuitively corresponding function when used with @view@ or @(^.)@.
191
192 Note similarly that many equivalents of 'Text -> Text' functions are exported here as 'Pipe's.
193 They reduce to the intuitively corresponding functions when used with '(>->)'. Thus something like
194
195 > stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
196
197 would drop the leading white space from each line.
198
199 The lens combinators
200 you will find indispensible are @view@ / @(^.)@), @zoom@ and probably @over@. These
201 are supplied by both <http://hackage.haskell.org/package/lens lens> and
202 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
203 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
204 and to some extent in the @Pipes.Text.Encoding@ module here. The use of
205 @over@ is simple, illustrated by the fact that we can rewrite @stripLines@ above as
206
207 > stripLines = over Text.lines $ maps (>-> stripStart)
208
209
210 * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
211
212 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
213 This is in the types that are most closely associated with our central text type,
214 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
215
216 > splitAt :: Int -> Text -> (Text, Text)
217 > lines :: Text -> [Text]
218 > chunksOf :: Int -> Text -> [Text]
219
220 which relate a Text with a pair of Texts or a list of Texts.
221 The corresponding functions here (taking account of \'lensification\') are
222
223 > view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
224 > view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
225 > view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
226
227 Some of the types may be more readable if you imagine that we have introduced
228 our own type synonyms
229
230 > type Text m r = Producer T.Text m r
231 > type Texts m r = FreeT (Producer T.Text m) m r
232
233 Then we would think of the types above as
234
235 > view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
236 > view lines :: (Monad m) => Text m r -> Texts m r
237 > view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
238
239 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
240
241 In the type @Producer Text m (Producer Text m r)@ the second
242 element of the \'pair\' of effectful Texts cannot simply be retrieved
243 with something like 'snd'. This is an \'effectful\' pair, and one must work
244 through the effects of the first element to arrive at the second Text stream, even
245 if you are proposing to throw the Text in the first element away.
246 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
247
248 > join :: Monad m => Producer Text m (Producer m r) -> Producer m r
249
250 The return type of 'lines', 'words', 'chunksOf' and the other "splitter" functions,
251 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
252 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
253 together things of the forms:
254
255 > r
256 > (Text,r)
257 > (Text, (Text, r))
258 > (Text, (Text, (Text, r)))
259 > (Text, (Text, (Text, (Text, r))))
260 > ...
261
262 We might also have identified the sum of those types with @Free ((,) Text) r@
263 -- or, more absurdly, @FreeT ((,) Text) Identity r@. Similarly, @FreeT (Producer Text m) m r@
264 encompasses all the members of the sequence:
265
266 > m r
267 > Producer Text m r
268 > Producer Text m (Producer Text m r)
269 > Producer Text m (Producer Text m (Producer Text m r))
270 > ...
271
272 One might think that
273
274 > lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
275 > view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
276
277 should really have the type
278
279 > lines :: Monad m => Pipe Text Text m r
280
281 as e.g. 'toUpper' does. But this would spoil the control we are
282 attempting to maintain over the size of chunks. It is in fact just
283 as unreasonable to want such a pipe as to want
284
285 > Data.Text.Lazy.lines :: Text -> Text
286
287 to 'rechunk' the strict Text chunks inside the lazy Text to respect
288 line boundaries. In fact we have
289
290 > Data.Text.Lazy.lines :: Text -> [Text]
291 > Prelude.lines :: String -> [String]
292
293 where the elements of the list are themselves lazy Texts or Strings; the use
294 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
295
296 The @Pipes.Group@ module, which can generally be imported without qualification,
297 provides many functions for working with things of type @FreeT (Producer a m) m r@
298
299
300 -}
301
302 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
303 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
304 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
305 {-# INLINE fromLazy #-}
306
307
308 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
309
310 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
311
312 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
313 a ^. lens = getConstant (lens Constant a)
314
315
316 -- | Apply a transformation to each 'Char' in the stream
317 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
318 map f = P.map (T.map f)
319 {-# INLINABLE map #-}
320
321 {-# RULES "p >-> map f" forall p f .
322 p >-> map f = for p (\txt -> yield (T.map f txt))
323 #-}
324
325 -- | Map a function over the characters of a text stream and concatenate the results
326 concatMap
327 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
328 concatMap f = P.map (T.concatMap f)
329 {-# INLINABLE concatMap #-}
330
331 {-# RULES "p >-> concatMap f" forall p f .
332 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
333 #-}
334
335
336 -- | Transform a Pipe of 'String's into one of 'Text' chunks
337 pack :: Monad m => Pipe String Text m r
338 pack = P.map T.pack
339 {-# INLINEABLE pack #-}
340
341 {-# RULES "p >-> pack" forall p .
342 p >-> pack = for p (\txt -> yield (T.pack txt))
343 #-}
344
345 -- | Transform a Pipes of 'Text' chunks into one of 'String's
346 unpack :: Monad m => Pipe Text String m r
347 unpack = for cat (\t -> yield (T.unpack t))
348 {-# INLINEABLE unpack #-}
349
350 {-# RULES "p >-> unpack" forall p .
351 p >-> unpack = for p (\txt -> yield (T.unpack txt))
352 #-}
353
354 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
355 -- here acting as 'Text' pipes, rather as they would on a lazy text
356 toCaseFold :: Monad m => Pipe Text Text m r
357 toCaseFold = P.map T.toCaseFold
358 {-# INLINEABLE toCaseFold #-}
359
360 {-# RULES "p >-> toCaseFold" forall p .
361 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
362 #-}
363
364
365 -- | lowercase incoming 'Text'
366 toLower :: Monad m => Pipe Text Text m r
367 toLower = P.map T.toLower
368 {-# INLINEABLE toLower #-}
369
370 {-# RULES "p >-> toLower" forall p .
371 p >-> toLower = for p (\txt -> yield (T.toLower txt))
372 #-}
373
374 -- | uppercase incoming 'Text'
375 toUpper :: Monad m => Pipe Text Text m r
376 toUpper = P.map T.toUpper
377 {-# INLINEABLE toUpper #-}
378
379 {-# RULES "p >-> toUpper" forall p .
380 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
381 #-}
382
383 -- | Remove leading white space from an incoming succession of 'Text's
384 stripStart :: Monad m => Pipe Text Text m r
385 stripStart = do
386 chunk <- await
387 let text = T.stripStart chunk
388 if T.null text
389 then stripStart
390 else do yield text
391 cat
392 {-# INLINEABLE stripStart #-}
393
394 -- | @(take n)@ only allows @n@ individual characters to pass;
395 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
396 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
397 take n0 = go n0 where
398 go n
399 | n <= 0 = return ()
400 | otherwise = do
401 txt <- await
402 let len = fromIntegral (T.length txt)
403 if (len > n)
404 then yield (T.take (fromIntegral n) txt)
405 else do
406 yield txt
407 go (n - len)
408 {-# INLINABLE take #-}
409
410 -- | @(drop n)@ drops the first @n@ characters
411 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
412 drop n0 = go n0 where
413 go n
414 | n <= 0 = cat
415 | otherwise = do
416 txt <- await
417 let len = fromIntegral (T.length txt)
418 if (len >= n)
419 then do
420 yield (T.drop (fromIntegral n) txt)
421 cat
422 else go (n - len)
423 {-# INLINABLE drop #-}
424
425 -- | Take characters until they fail the predicate
426 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
427 takeWhile predicate = go
428 where
429 go = do
430 txt <- await
431 let (prefix, suffix) = T.span predicate txt
432 if (T.null suffix)
433 then do
434 yield txt
435 go
436 else yield prefix
437 {-# INLINABLE takeWhile #-}
438
439 -- | Drop characters until they fail the predicate
440 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
441 dropWhile predicate = go where
442 go = do
443 txt <- await
444 case T.findIndex (not . predicate) txt of
445 Nothing -> go
446 Just i -> do
447 yield (T.drop i txt)
448 cat
449 {-# INLINABLE dropWhile #-}
450
451 -- | Only allows 'Char's to pass if they satisfy the predicate
452 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
453 filter predicate = P.map (T.filter predicate)
454 {-# INLINABLE filter #-}
455
456 {-# RULES "p >-> filter q" forall p q .
457 p >-> filter q = for p (\txt -> yield (T.filter q txt))
458 #-}
459
460 -- | Strict left scan over the characters
461 scan
462 :: (Monad m)
463 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
464 scan step begin = do
465 yield (T.singleton begin)
466 go begin
467 where
468 go c = do
469 txt <- await
470 let txt' = T.scanl step c txt
471 c' = T.last txt'
472 yield (T.tail txt')
473 go c'
474 {-# INLINABLE scan #-}
475
476 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
477 'TL.Text'
478 -}
479 toLazy :: Producer Text Identity () -> TL.Text
480 toLazy = TL.fromChunks . P.toList
481 {-# INLINABLE toLazy #-}
482
483 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
484 'TL.Text'
485
486 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
487 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
488 immediately as they are generated instead of loading them all into memory.
489 -}
490 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
491 toLazyM = liftM TL.fromChunks . P.toListM
492 {-# INLINABLE toLazyM #-}
493
494 -- | Reduce the text stream using a strict left fold over characters
495 foldChars
496 :: Monad m
497 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
498 foldChars step begin done = P.fold (T.foldl' step) begin done
499 {-# INLINABLE foldChars #-}
500
501 -- | Retrieve the first 'Char'
502 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
503 head = go
504 where
505 go p = do
506 x <- nextChar p
507 case x of
508 Left _ -> return Nothing
509 Right (c, _) -> return (Just c)
510 {-# INLINABLE head #-}
511
512 -- | Retrieve the last 'Char'
513 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
514 last = go Nothing
515 where
516 go r p = do
517 x <- next p
518 case x of
519 Left () -> return r
520 Right (txt, p') ->
521 if (T.null txt)
522 then go r p'
523 else go (Just $ T.last txt) p'
524 {-# INLINABLE last #-}
525
526 -- | Determine if the stream is empty
527 null :: (Monad m) => Producer Text m () -> m Bool
528 null = P.all T.null
529 {-# INLINABLE null #-}
530
531 -- | Count the number of characters in the stream
532 length :: (Monad m, Num n) => Producer Text m () -> m n
533 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
534 {-# INLINABLE length #-}
535
536 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
537 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
538 any predicate = P.any (T.any predicate)
539 {-# INLINABLE any #-}
540
541 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
542 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
543 all predicate = P.all (T.all predicate)
544 {-# INLINABLE all #-}
545
546 -- | Return the maximum 'Char' within a text stream
547 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
548 maximum = P.fold step Nothing id
549 where
550 step mc txt =
551 if (T.null txt)
552 then mc
553 else Just $ case mc of
554 Nothing -> T.maximum txt
555 Just c -> max c (T.maximum txt)
556 {-# INLINABLE maximum #-}
557
558 -- | Return the minimum 'Char' within a text stream (surely very useful!)
559 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
560 minimum = P.fold step Nothing id
561 where
562 step mc txt =
563 if (T.null txt)
564 then mc
565 else case mc of
566 Nothing -> Just (T.minimum txt)
567 Just c -> Just (min c (T.minimum txt))
568 {-# INLINABLE minimum #-}
569
570 -- | Find the first element in the stream that matches the predicate
571 find
572 :: (Monad m)
573 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
574 find predicate p = head (p >-> filter predicate)
575 {-# INLINABLE find #-}
576
577 -- | Index into a text stream
578 index
579 :: (Monad m, Integral a)
580 => a-> Producer Text m () -> m (Maybe Char)
581 index n p = head (p >-> drop n)
582 {-# INLINABLE index #-}
583
584
585 -- | Store a tally of how many segments match the given 'Text'
586 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
587 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
588 {-# INLINABLE count #-}
589
590
591 -- | Consume the first character from a stream of 'Text'
592 --
593 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
594 -- succeeds with a 'Right' providing the next character and the remainder of the
595 -- 'Producer'.
596
597 nextChar
598 :: (Monad m)
599 => Producer Text m r
600 -> m (Either r (Char, Producer Text m r))
601 nextChar = go
602 where
603 go p = do
604 x <- next p
605 case x of
606 Left r -> return (Left r)
607 Right (txt, p') -> case (T.uncons txt) of
608 Nothing -> go p'
609 Just (c, txt') -> return (Right (c, yield txt' >> p'))
610 {-# INLINABLE nextChar #-}
611
612 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
613
614 drawChar :: (Monad m) => Parser Text m (Maybe Char)
615 drawChar = do
616 x <- PP.draw
617 case x of
618 Nothing -> return Nothing
619 Just txt -> case (T.uncons txt) of
620 Nothing -> drawChar
621 Just (c, txt') -> do
622 PP.unDraw txt'
623 return (Just c)
624 {-# INLINABLE drawChar #-}
625
626 -- | Push back a 'Char' onto the underlying 'Producer'
627 unDrawChar :: (Monad m) => Char -> Parser Text m ()
628 unDrawChar c = modify (yield (T.singleton c) >>)
629 {-# INLINABLE unDrawChar #-}
630
631 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
632 push the 'Char' back
633
634 > peekChar = do
635 > x <- drawChar
636 > case x of
637 > Left _ -> return ()
638 > Right c -> unDrawChar c
639 > return x
640
641 -}
642
643 peekChar :: (Monad m) => Parser Text m (Maybe Char)
644 peekChar = do
645 x <- drawChar
646 case x of
647 Nothing -> return ()
648 Just c -> unDrawChar c
649 return x
650 {-# INLINABLE peekChar #-}
651
652 {-| Check if the underlying 'Producer' has no more characters
653
654 Note that this will skip over empty 'Text' chunks, unlike
655 'PP.isEndOfInput' from @pipes-parse@, which would consider
656 an empty 'Text' a valid bit of input.
657
658 > isEndOfChars = liftM isLeft peekChar
659 -}
660 isEndOfChars :: (Monad m) => Parser Text m Bool
661 isEndOfChars = do
662 x <- peekChar
663 return (case x of
664 Nothing -> True
665 Just _-> False )
666 {-# INLINABLE isEndOfChars #-}
667
668
669 -- | Splits a 'Producer' after the given number of characters
670 splitAt
671 :: (Monad m, Integral n)
672 => n
673 -> Lens' (Producer Text m r)
674 (Producer Text m (Producer Text m r))
675 splitAt n0 k p0 = fmap join (k (go n0 p0))
676 where
677 go 0 p = return p
678 go n p = do
679 x <- lift (next p)
680 case x of
681 Left r -> return (return r)
682 Right (txt, p') -> do
683 let len = fromIntegral (T.length txt)
684 if (len <= n)
685 then do
686 yield txt
687 go (n - len) p'
688 else do
689 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
690 yield prefix
691 return (yield suffix >> p')
692 {-# INLINABLE splitAt #-}
693
694
695 -- | Split a text stream in two, producing the longest
696 -- consecutive group of characters that satisfies the predicate
697 -- and returning the rest
698
699 span
700 :: (Monad m)
701 => (Char -> Bool)
702 -> Lens' (Producer Text m r)
703 (Producer Text m (Producer Text m r))
704 span predicate k p0 = fmap join (k (go p0))
705 where
706 go p = do
707 x <- lift (next p)
708 case x of
709 Left r -> return (return r)
710 Right (txt, p') -> do
711 let (prefix, suffix) = T.span predicate txt
712 if (T.null suffix)
713 then do
714 yield txt
715 go p'
716 else do
717 yield prefix
718 return (yield suffix >> p')
719 {-# INLINABLE span #-}
720
721 {-| Split a text stream in two, producing the longest
722 consecutive group of characters that don't satisfy the predicate
723 -}
724 break
725 :: (Monad m)
726 => (Char -> Bool)
727 -> Lens' (Producer Text m r)
728 (Producer Text m (Producer Text m r))
729 break predicate = span (not . predicate)
730 {-# INLINABLE break #-}
731
732 {-| Improper lens that splits after the first group of equivalent Chars, as
733 defined by the given equivalence relation
734 -}
735 groupBy
736 :: (Monad m)
737 => (Char -> Char -> Bool)
738 -> Lens' (Producer Text m r)
739 (Producer Text m (Producer Text m r))
740 groupBy equals k p0 = fmap join (k ((go p0))) where
741 go p = do
742 x <- lift (next p)
743 case x of
744 Left r -> return (return r)
745 Right (txt, p') -> case T.uncons txt of
746 Nothing -> go p'
747 Just (c, _) -> (yield txt >> p') ^. span (equals c)
748 {-# INLINABLE groupBy #-}
749
750 -- | Improper lens that splits after the first succession of identical 'Char' s
751 group :: Monad m
752 => Lens' (Producer Text m r)
753 (Producer Text m (Producer Text m r))
754 group = groupBy (==)
755 {-# INLINABLE group #-}
756
757 {-| Improper lens that splits a 'Producer' after the first word
758
759 Unlike 'words', this does not drop leading whitespace
760 -}
761 word :: (Monad m)
762 => Lens' (Producer Text m r)
763 (Producer Text m (Producer Text m r))
764 word k p0 = fmap join (k (to p0))
765 where
766 to p = do
767 p' <- p^.span isSpace
768 p'^.break isSpace
769 {-# INLINABLE word #-}
770
771
772 line :: (Monad m)
773 => Lens' (Producer Text m r)
774 (Producer Text m (Producer Text m r))
775 line = break (== '\n')
776
777 {-# INLINABLE line #-}
778
779
780 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
781 intersperse
782 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
783 intersperse c = go0
784 where
785 go0 p = do
786 x <- lift (next p)
787 case x of
788 Left r -> return r
789 Right (txt, p') -> do
790 yield (T.intersperse c txt)
791 go1 p'
792 go1 p = do
793 x <- lift (next p)
794 case x of
795 Left r -> return r
796 Right (txt, p') -> do
797 yield (T.singleton c)
798 yield (T.intersperse c txt)
799 go1 p'
800 {-# INLINABLE intersperse #-}
801
802
803
804 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
805 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
806 packChars = Data.Profunctor.dimap to (fmap from)
807 where
808 -- to :: Monad m => Producer Char m x -> Producer Text m x
809 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
810
811 step diffAs c = diffAs . (c:)
812
813 done diffAs = T.pack (diffAs [])
814
815 -- from :: Monad m => Producer Text m x -> Producer Char m x
816 from p = for p (each . T.unpack)
817 {-# INLINABLE packChars #-}
818
819
820 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
821 chunksOf
822 :: (Monad m, Integral n)
823 => n -> Lens' (Producer Text m r)
824 (FreeT (Producer Text m) m r)
825 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
826 where
827 go p = do
828 x <- next p
829 return $ case x of
830 Left r -> Pure r
831 Right (txt, p') -> Free $ do
832 p'' <- (yield txt >> p') ^. splitAt n
833 return $ FreeT (go p'')
834 {-# INLINABLE chunksOf #-}
835
836
837 {-| Split a text stream into sub-streams delimited by characters that satisfy the
838 predicate
839 -}
840 splitsWith
841 :: (Monad m)
842 => (Char -> Bool)
843 -> Producer Text m r
844 -> FreeT (Producer Text m) m r
845 splitsWith predicate p0 = FreeT (go0 p0)
846 where
847 go0 p = do
848 x <- next p
849 case x of
850 Left r -> return (Pure r)
851 Right (txt, p') ->
852 if (T.null txt)
853 then go0 p'
854 else return $ Free $ do
855 p'' <- (yield txt >> p') ^. span (not . predicate)
856 return $ FreeT (go1 p'')
857 go1 p = do
858 x <- nextChar p
859 return $ case x of
860 Left r -> Pure r
861 Right (_, p') -> Free $ do
862 p'' <- p' ^. span (not . predicate)
863 return $ FreeT (go1 p'')
864 {-# INLINABLE splitsWith #-}
865
866 -- | Split a text stream using the given 'Char' as the delimiter
867 splits :: (Monad m)
868 => Char
869 -> Lens' (Producer Text m r)
870 (FreeT (Producer Text m) m r)
871 splits c k p =
872 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
873 {-# INLINABLE splits #-}
874
875 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
876 given equivalence relation
877 -}
878 groupsBy
879 :: Monad m
880 => (Char -> Char -> Bool)
881 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
882 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
883 go p = do x <- next p
884 case x of Left r -> return (Pure r)
885 Right (bs, p') -> case T.uncons bs of
886 Nothing -> go p'
887 Just (c, _) -> do return $ Free $ do
888 p'' <- (yield bs >> p')^.span (equals c)
889 return $ FreeT (go p'')
890 {-# INLINABLE groupsBy #-}
891
892
893 -- | Like 'groupsBy', where the equality predicate is ('==')
894 groups
895 :: Monad m
896 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
897 groups = groupsBy (==)
898 {-# INLINABLE groups #-}
899
900
901
902 {-| Split a text stream into 'FreeT'-delimited lines
903 -}
904 lines
905 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
906 lines = Data.Profunctor.dimap _lines (fmap _unlines)
907 where
908 _lines p0 = FreeT (go0 p0)
909 where
910 go0 p = do
911 x <- next p
912 case x of
913 Left r -> return (Pure r)
914 Right (txt, p') ->
915 if (T.null txt)
916 then go0 p'
917 else return $ Free $ go1 (yield txt >> p')
918 go1 p = do
919 p' <- p ^. break ('\n' ==)
920 return $ FreeT $ do
921 x <- nextChar p'
922 case x of
923 Left r -> return $ Pure r
924 Right (_, p'') -> go0 p''
925 -- _unlines
926 -- :: Monad m
927 -- => FreeT (Producer Text m) m x -> Producer Text m x
928 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
929
930
931 {-# INLINABLE lines #-}
932
933
934 -- | Split a text stream into 'FreeT'-delimited words
935 words
936 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
937 words = Data.Profunctor.dimap go (fmap _unwords)
938 where
939 go p = FreeT $ do
940 x <- next (p >-> dropWhile isSpace)
941 return $ case x of
942 Left r -> Pure r
943 Right (bs, p') -> Free $ do
944 p'' <- (yield bs >> p') ^. break isSpace
945 return (go p'')
946 _unwords = PG.intercalates (yield $ T.singleton ' ')
947
948 {-# INLINABLE words #-}
949
950
951 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
952 interspersing a text stream in between them
953 -}
954 intercalate
955 :: (Monad m)
956 => Producer Text m ()
957 -> FreeT (Producer Text m) m r
958 -> Producer Text m r
959 intercalate p0 = go0
960 where
961 go0 f = do
962 x <- lift (runFreeT f)
963 case x of
964 Pure r -> return r
965 Free p -> do
966 f' <- p
967 go1 f'
968 go1 f = do
969 x <- lift (runFreeT f)
970 case x of
971 Pure r -> return r
972 Free p -> do
973 p0
974 f' <- p
975 go1 f'
976 {-# INLINABLE intercalate #-}
977
978 {-| Join 'FreeT'-delimited lines into a text stream
979 -}
980 unlines
981 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
982 unlines = go
983 where
984 go f = do
985 x <- lift (runFreeT f)
986 case x of
987 Pure r -> return r
988 Free p -> do
989 f' <- p
990 yield $ T.singleton '\n'
991 go f'
992 {-# INLINABLE unlines #-}
993
994 {-| Join 'FreeT'-delimited words into a text stream
995 -}
996 unwords
997 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
998 unwords = intercalate (yield $ T.singleton ' ')
999 {-# INLINABLE unwords #-}
1000
1001
1002 {- $reexports
1003
1004 @Data.Text@ re-exports the 'Text' type.
1005
1006 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1007 -}
1008
1009