]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
Pipes.Text documentation approaching tutorial length
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3
4 module Pipes.Text (
5 -- * Introduction
6 -- $intro
7
8 -- * Producers
9 fromLazy
10
11 -- * Pipes
12 , map
13 , concatMap
14 , take
15 , drop
16 , takeWhile
17 , dropWhile
18 , filter
19 , scan
20 , pack
21 , unpack
22 , toCaseFold
23 , toLower
24 , toUpper
25 , stripStart
26
27 -- * Folds
28 , toLazy
29 , toLazyM
30 , foldChars
31 , head
32 , last
33 , null
34 , length
35 , any
36 , all
37 , maximum
38 , minimum
39 , find
40 , index
41 , count
42
43 -- * Primitive Character Parsers
44 , nextChar
45 , drawChar
46 , unDrawChar
47 , peekChar
48 , isEndOfChars
49
50 -- * Parsing Lenses
51 , splitAt
52 , span
53 , break
54 , groupBy
55 , group
56 , word
57 , line
58
59 -- * FreeT Splitters
60 , chunksOf
61 , splitsWith
62 , splits
63 , groupsBy
64 , groups
65 , lines
66 , words
67
68 -- * Transformations
69 , intersperse
70 , packChars
71
72 -- * Joiners
73 , intercalate
74 , unlines
75 , unwords
76
77 -- * Re-exports
78 -- $reexports
79 , module Data.ByteString
80 , module Data.Text
81 , module Data.Profunctor
82 , module Pipes.Parse
83 , module Pipes.Group
84 ) where
85
86 import Control.Applicative ((<*))
87 import Control.Monad (liftM, join)
88 import Control.Monad.Trans.State.Strict (StateT(..), modify)
89 import qualified Data.Text as T
90 import Data.Text (Text)
91 import qualified Data.Text.Lazy as TL
92 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
93 import Data.ByteString (ByteString)
94 import Data.Functor.Constant (Constant(Constant, getConstant))
95 import Data.Functor.Identity (Identity)
96 import Data.Profunctor (Profunctor)
97 import qualified Data.Profunctor
98 import Pipes
99 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
100 import qualified Pipes.Group as PG
101 import qualified Pipes.Parse as PP
102 import Pipes.Parse (Parser)
103 import qualified Pipes.Prelude as P
104 import Data.Char (isSpace)
105 import Data.Word (Word8)
106
107 import Prelude hiding (
108 all,
109 any,
110 break,
111 concat,
112 concatMap,
113 drop,
114 dropWhile,
115 elem,
116 filter,
117 head,
118 last,
119 lines,
120 length,
121 map,
122 maximum,
123 minimum,
124 notElem,
125 null,
126 readFile,
127 span,
128 splitAt,
129 take,
130 takeWhile,
131 unlines,
132 unwords,
133 words,
134 writeFile )
135
136 {- $intro
137
138 * /I. Effectful Text/
139
140 This package provides @pipes@ utilities for /text streams/, understood as
141 streams of 'Text' chunks. The individual chunks are uniformly /strict/, and thus you
142 will generally want @Data.Text@ in scope. But the type @Producer Text m r@ as we
143 are using it is a sort of pipes equivalent of the lazy @Text@ type.
144
145 This particular module provides many functions equivalent in one way or another to
146 the pure functions in
147 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
148 They transform, divide, group and fold text streams. Though @Producer Text m r@
149 is the type of \'effectful Text\', the functions in this module are \'pure\'
150 in the sense that they are uniformly monad-independent.
151 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
152 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
153 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
154
155 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
156 strict Text: the implementation is arranged so that the individual strict 'Text'
157 chunks are kept to a reasonable size; the user is not aware of the divisions
158 between the connected 'Text' chunks.
159 So also here: the functions in this module are designed to operate on streams that
160 are insensitive to text boundaries. This means that they may freely split
161 text into smaller texts and /discard empty texts/. The objective, though, is
162 that they should /never concatenate texts/ in order to provide strict upper
163 bounds on memory usage.
164
165 For example, to stream only the first three lines of 'stdin' to 'stdout' you
166 might write:
167
168 > import Pipes
169 > import qualified Pipes.Text as Text
170 > import qualified Pipes.Text.IO as Text
171 > import Pipes.Group (takes')
172 > import Lens.Family
173 >
174 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
175 > where
176 > takeLines n = Text.unlines . takes' n . view Text.lines
177
178 The above program will never bring more than one chunk of text (~ 32 KB) into
179 memory, no matter how long the lines are.
180
181 * /II. Lenses/
182
183 As this example shows, one superficial difference from @Data.Text.Lazy@
184 is that many of the operations, like 'lines', are \'lensified\'; this has a
185 number of advantages (where it is possible); in particular it facilitates their
186 use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
187 sense.) The disadvantage, famously, is that the messages you get for type errors can be
188 a little alarming. The remarks that follow in this section are for non-lens adepts.
189
190 Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
191 intuitively corresponding function when used with @view@ or @(^.)@. Instead of
192 writing:
193
194 > splitAt 17 producer
195
196 as we would with the Prelude or Text functions, we write
197
198 > view (splitAt 17) producer
199
200 or
201
202 > producer ^. splitAt 17
203
204 This may seem a little indirect, but note that many equivalents of
205 @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
206 corresponding functions by prefixing them with @(>->)@. Thus something like
207
208 > stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
209
210 would drop the leading white space from each line.
211
212 The lenses in this library are marked as /improper/; this just means that
213 they don't admit all the operations of an ideal lens, but only "getting" and "focussing".
214 Just for this reason, though, the magnificent complexities of the lens libraries
215 are a distraction. The lens combinators to keep in mind, the ones that make sense for
216 our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
217
218 One need only keep in mind that if @l@ is a @Lens' a b@, then:
219
220 - @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
221 is the corresponding @b@; as was said above, this function will be exactly the
222 function you think it is, given its name. Thus to uppercase the first n characters
223 of a Producer, leaving the rest the same, we could write:
224
225
226 > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
227 > p'
228
229
230 - @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
231 @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
232 the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
233 (it can also be written @l %~ f@).
234 For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
235 So above we might have written things like these:
236
237 > stripLines = Text.lines %~ maps (>-> Text.stripStart)
238 > stripLines = over Text.lines (maps (>-> Text.stripStart))
239 > upper n = Text.splitAt n %~ (>-> Text.toUpper)
240
241 - @zoom l@, finally, is a function from a @Parser b m r@
242 to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
243 Its use is easiest to see with an decoding lens like 'utf8', which
244 \"sees\" a Text producer hidden inside a ByteString producer:
245 @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
246 a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
247 a list of everything produced from a Producer, leaving only the return value; it would
248 usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
249 returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
250 the Text Producer for further parsing. Suppose that we want, inexplicably, to
251 modify the casing of a Text Producer according to any instruction it might
252 contain at the start. Then we might write something like this:
253
254 > obey :: Monad m => Producer Text m b -> Producer Text m b
255 > obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 8) drawAll) p
256 > let seven = T.concat ts
257 > case T.toUpper seven of
258 > "TOUPPER" -> p' >-> Text.toUpper
259 > "TOLOWER" -> p' >-> Text.toLower
260 > _ -> do yield seven
261 > p'
262
263 The purpose of exporting lenses is the mental economy achieved with this three-way
264 applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
265 three uses is no more surprising than that a pipe can act as a function modifying
266 the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
267 -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
268 @pipe >-> consumer@
269
270 The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
271 both <http://hackage.haskell.org/package/lens lens> and
272 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
273 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
274 and to some extent in the @Pipes.Text.Encoding@ module here.
275
276
277 * /III. Special types:/ @Producer Text m (Producer Text m r)@ /and/ @FreeT (Producer Text m) m r@
278
279 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
280 This is in the types that are most closely associated with our central text type,
281 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
282
283 > splitAt :: Int -> Text -> (Text, Text)
284 > lines :: Text -> [Text]
285 > chunksOf :: Int -> Text -> [Text]
286
287 which relate a Text with a pair of Texts or a list of Texts.
288 The corresponding functions here (taking account of \'lensification\') are
289
290 > view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
291 > view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
292 > view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
293
294 Some of the types may be more readable if you imagine that we have introduced
295 our own type synonyms
296
297 > type Text m r = Producer T.Text m r
298 > type Texts m r = FreeT (Producer T.Text m) m r
299
300 Then we would think of the types above as
301
302 > view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
303 > view lines :: (Monad m) => Text m r -> Texts m r
304 > view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
305
306 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
307
308 In the type @Producer Text m (Producer Text m r)@ the second
309 element of the \'pair\' of effectful Texts cannot simply be retrieved
310 with something like 'snd'. This is an \'effectful\' pair, and one must work
311 through the effects of the first element to arrive at the second Text stream, even
312 if you are proposing to throw the Text in the first element away.
313 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
314
315 > join :: Monad m => Producer Text m (Producer m r) -> Producer m r
316
317 The return type of 'lines', 'words', 'chunksOf' and the other "splitter" functions,
318 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
319 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
320 together things of the forms:
321
322 > r
323 > (Text,r)
324 > (Text, (Text, r))
325 > (Text, (Text, (Text, r)))
326 > (Text, (Text, (Text, (Text, r))))
327 > ...
328
329 (We might also have identified the sum of those types with @Free ((,) Text) r@
330 -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
331
332 Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
333 @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
334
335 > m r
336 > Text m r
337 > Text m (Text m r)
338 > Text m (Text m (Text m r))
339 > Text m (Text m (Text m (Text m r)))
340 > ...
341
342 We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
343 or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
344 result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
345
346 One might think that
347
348 > lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
349 > view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
350
351 should really have the type
352
353 > lines :: Monad m => Pipe Text Text m r
354
355 as e.g. 'toUpper' does. But this would spoil the control we are
356 attempting to maintain over the size of chunks. It is in fact just
357 as unreasonable to want such a pipe as to want
358
359 > Data.Text.Lazy.lines :: Text -> Text
360
361 to 'rechunk' the strict Text chunks inside the lazy Text to respect
362 line boundaries. In fact we have
363
364 > Data.Text.Lazy.lines :: Text -> [Text]
365 > Prelude.lines :: String -> [String]
366
367 where the elements of the list are themselves lazy Texts or Strings; the use
368 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
369
370 The @Pipes.Group@ module, which can generally be imported without qualification,
371 provides many functions for working with things of type @FreeT (Producer a m) m r@
372 In particular it conveniently exports the constructors for @FreeT@ and the associated
373 @FreeF@ type -- a fancy form of @Either@, namely
374
375 > data FreeF f a b = Pure a | Free (f b)
376
377 for pattern-matching. Consider the implementation of the 'words' function, or
378 of the part of the lens that takes us to the words; it is compact but exhibits many
379 of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
380 constuctors. Keep in mind that
381
382 > newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
383 > next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
384
385 Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
386 the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
387
388 > words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
389 > words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
390 > x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
391 > return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
392 > Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
393 > Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
394 > p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
395 > (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
396 > return (words p'') -- We then carry on with the rest, which is likely to begin with space.
397
398 -}
399
400 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
401 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
402 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
403 {-# INLINE fromLazy #-}
404
405
406 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
407
408 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
409
410 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
411 a ^. lens = getConstant (lens Constant a)
412
413
414 -- | Apply a transformation to each 'Char' in the stream
415 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
416 map f = P.map (T.map f)
417 {-# INLINABLE map #-}
418
419 {-# RULES "p >-> map f" forall p f .
420 p >-> map f = for p (\txt -> yield (T.map f txt))
421 #-}
422
423 -- | Map a function over the characters of a text stream and concatenate the results
424 concatMap
425 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
426 concatMap f = P.map (T.concatMap f)
427 {-# INLINABLE concatMap #-}
428
429 {-# RULES "p >-> concatMap f" forall p f .
430 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
431 #-}
432
433
434 -- | Transform a Pipe of 'String's into one of 'Text' chunks
435 pack :: Monad m => Pipe String Text m r
436 pack = P.map T.pack
437 {-# INLINEABLE pack #-}
438
439 {-# RULES "p >-> pack" forall p .
440 p >-> pack = for p (\txt -> yield (T.pack txt))
441 #-}
442
443 -- | Transform a Pipes of 'Text' chunks into one of 'String's
444 unpack :: Monad m => Pipe Text String m r
445 unpack = for cat (\t -> yield (T.unpack t))
446 {-# INLINEABLE unpack #-}
447
448 {-# RULES "p >-> unpack" forall p .
449 p >-> unpack = for p (\txt -> yield (T.unpack txt))
450 #-}
451
452 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
453 -- here acting as 'Text' pipes, rather as they would on a lazy text
454 toCaseFold :: Monad m => Pipe Text Text m r
455 toCaseFold = P.map T.toCaseFold
456 {-# INLINEABLE toCaseFold #-}
457
458 {-# RULES "p >-> toCaseFold" forall p .
459 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
460 #-}
461
462
463 -- | lowercase incoming 'Text'
464 toLower :: Monad m => Pipe Text Text m r
465 toLower = P.map T.toLower
466 {-# INLINEABLE toLower #-}
467
468 {-# RULES "p >-> toLower" forall p .
469 p >-> toLower = for p (\txt -> yield (T.toLower txt))
470 #-}
471
472 -- | uppercase incoming 'Text'
473 toUpper :: Monad m => Pipe Text Text m r
474 toUpper = P.map T.toUpper
475 {-# INLINEABLE toUpper #-}
476
477 {-# RULES "p >-> toUpper" forall p .
478 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
479 #-}
480
481 -- | Remove leading white space from an incoming succession of 'Text's
482 stripStart :: Monad m => Pipe Text Text m r
483 stripStart = do
484 chunk <- await
485 let text = T.stripStart chunk
486 if T.null text
487 then stripStart
488 else do yield text
489 cat
490 {-# INLINEABLE stripStart #-}
491
492 -- | @(take n)@ only allows @n@ individual characters to pass;
493 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
494 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
495 take n0 = go n0 where
496 go n
497 | n <= 0 = return ()
498 | otherwise = do
499 txt <- await
500 let len = fromIntegral (T.length txt)
501 if (len > n)
502 then yield (T.take (fromIntegral n) txt)
503 else do
504 yield txt
505 go (n - len)
506 {-# INLINABLE take #-}
507
508 -- | @(drop n)@ drops the first @n@ characters
509 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
510 drop n0 = go n0 where
511 go n
512 | n <= 0 = cat
513 | otherwise = do
514 txt <- await
515 let len = fromIntegral (T.length txt)
516 if (len >= n)
517 then do
518 yield (T.drop (fromIntegral n) txt)
519 cat
520 else go (n - len)
521 {-# INLINABLE drop #-}
522
523 -- | Take characters until they fail the predicate
524 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
525 takeWhile predicate = go
526 where
527 go = do
528 txt <- await
529 let (prefix, suffix) = T.span predicate txt
530 if (T.null suffix)
531 then do
532 yield txt
533 go
534 else yield prefix
535 {-# INLINABLE takeWhile #-}
536
537 -- | Drop characters until they fail the predicate
538 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
539 dropWhile predicate = go where
540 go = do
541 txt <- await
542 case T.findIndex (not . predicate) txt of
543 Nothing -> go
544 Just i -> do
545 yield (T.drop i txt)
546 cat
547 {-# INLINABLE dropWhile #-}
548
549 -- | Only allows 'Char's to pass if they satisfy the predicate
550 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
551 filter predicate = P.map (T.filter predicate)
552 {-# INLINABLE filter #-}
553
554 {-# RULES "p >-> filter q" forall p q .
555 p >-> filter q = for p (\txt -> yield (T.filter q txt))
556 #-}
557
558 -- | Strict left scan over the characters
559 scan
560 :: (Monad m)
561 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
562 scan step begin = do
563 yield (T.singleton begin)
564 go begin
565 where
566 go c = do
567 txt <- await
568 let txt' = T.scanl step c txt
569 c' = T.last txt'
570 yield (T.tail txt')
571 go c'
572 {-# INLINABLE scan #-}
573
574 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
575 'TL.Text'
576 -}
577 toLazy :: Producer Text Identity () -> TL.Text
578 toLazy = TL.fromChunks . P.toList
579 {-# INLINABLE toLazy #-}
580
581 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
582 'TL.Text'
583
584 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
585 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
586 immediately as they are generated instead of loading them all into memory.
587 -}
588 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
589 toLazyM = liftM TL.fromChunks . P.toListM
590 {-# INLINABLE toLazyM #-}
591
592 -- | Reduce the text stream using a strict left fold over characters
593 foldChars
594 :: Monad m
595 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
596 foldChars step begin done = P.fold (T.foldl' step) begin done
597 {-# INLINABLE foldChars #-}
598
599 -- | Retrieve the first 'Char'
600 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
601 head = go
602 where
603 go p = do
604 x <- nextChar p
605 case x of
606 Left _ -> return Nothing
607 Right (c, _) -> return (Just c)
608 {-# INLINABLE head #-}
609
610 -- | Retrieve the last 'Char'
611 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
612 last = go Nothing
613 where
614 go r p = do
615 x <- next p
616 case x of
617 Left () -> return r
618 Right (txt, p') ->
619 if (T.null txt)
620 then go r p'
621 else go (Just $ T.last txt) p'
622 {-# INLINABLE last #-}
623
624 -- | Determine if the stream is empty
625 null :: (Monad m) => Producer Text m () -> m Bool
626 null = P.all T.null
627 {-# INLINABLE null #-}
628
629 -- | Count the number of characters in the stream
630 length :: (Monad m, Num n) => Producer Text m () -> m n
631 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
632 {-# INLINABLE length #-}
633
634 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
635 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
636 any predicate = P.any (T.any predicate)
637 {-# INLINABLE any #-}
638
639 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
640 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
641 all predicate = P.all (T.all predicate)
642 {-# INLINABLE all #-}
643
644 -- | Return the maximum 'Char' within a text stream
645 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
646 maximum = P.fold step Nothing id
647 where
648 step mc txt =
649 if (T.null txt)
650 then mc
651 else Just $ case mc of
652 Nothing -> T.maximum txt
653 Just c -> max c (T.maximum txt)
654 {-# INLINABLE maximum #-}
655
656 -- | Return the minimum 'Char' within a text stream (surely very useful!)
657 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
658 minimum = P.fold step Nothing id
659 where
660 step mc txt =
661 if (T.null txt)
662 then mc
663 else case mc of
664 Nothing -> Just (T.minimum txt)
665 Just c -> Just (min c (T.minimum txt))
666 {-# INLINABLE minimum #-}
667
668 -- | Find the first element in the stream that matches the predicate
669 find
670 :: (Monad m)
671 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
672 find predicate p = head (p >-> filter predicate)
673 {-# INLINABLE find #-}
674
675 -- | Index into a text stream
676 index
677 :: (Monad m, Integral a)
678 => a-> Producer Text m () -> m (Maybe Char)
679 index n p = head (p >-> drop n)
680 {-# INLINABLE index #-}
681
682
683 -- | Store a tally of how many segments match the given 'Text'
684 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
685 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
686 {-# INLINABLE count #-}
687
688
689 -- | Consume the first character from a stream of 'Text'
690 --
691 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
692 -- succeeds with a 'Right' providing the next character and the remainder of the
693 -- 'Producer'.
694
695 nextChar
696 :: (Monad m)
697 => Producer Text m r
698 -> m (Either r (Char, Producer Text m r))
699 nextChar = go
700 where
701 go p = do
702 x <- next p
703 case x of
704 Left r -> return (Left r)
705 Right (txt, p') -> case (T.uncons txt) of
706 Nothing -> go p'
707 Just (c, txt') -> return (Right (c, yield txt' >> p'))
708 {-# INLINABLE nextChar #-}
709
710 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
711
712 drawChar :: (Monad m) => Parser Text m (Maybe Char)
713 drawChar = do
714 x <- PP.draw
715 case x of
716 Nothing -> return Nothing
717 Just txt -> case (T.uncons txt) of
718 Nothing -> drawChar
719 Just (c, txt') -> do
720 PP.unDraw txt'
721 return (Just c)
722 {-# INLINABLE drawChar #-}
723
724 -- | Push back a 'Char' onto the underlying 'Producer'
725 unDrawChar :: (Monad m) => Char -> Parser Text m ()
726 unDrawChar c = modify (yield (T.singleton c) >>)
727 {-# INLINABLE unDrawChar #-}
728
729 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
730 push the 'Char' back
731
732 > peekChar = do
733 > x <- drawChar
734 > case x of
735 > Left _ -> return ()
736 > Right c -> unDrawChar c
737 > return x
738
739 -}
740
741 peekChar :: (Monad m) => Parser Text m (Maybe Char)
742 peekChar = do
743 x <- drawChar
744 case x of
745 Nothing -> return ()
746 Just c -> unDrawChar c
747 return x
748 {-# INLINABLE peekChar #-}
749
750 {-| Check if the underlying 'Producer' has no more characters
751
752 Note that this will skip over empty 'Text' chunks, unlike
753 'PP.isEndOfInput' from @pipes-parse@, which would consider
754 an empty 'Text' a valid bit of input.
755
756 > isEndOfChars = liftM isLeft peekChar
757 -}
758 isEndOfChars :: (Monad m) => Parser Text m Bool
759 isEndOfChars = do
760 x <- peekChar
761 return (case x of
762 Nothing -> True
763 Just _-> False )
764 {-# INLINABLE isEndOfChars #-}
765
766
767 -- | Splits a 'Producer' after the given number of characters
768 splitAt
769 :: (Monad m, Integral n)
770 => n
771 -> Lens' (Producer Text m r)
772 (Producer Text m (Producer Text m r))
773 splitAt n0 k p0 = fmap join (k (go n0 p0))
774 where
775 go 0 p = return p
776 go n p = do
777 x <- lift (next p)
778 case x of
779 Left r -> return (return r)
780 Right (txt, p') -> do
781 let len = fromIntegral (T.length txt)
782 if (len <= n)
783 then do
784 yield txt
785 go (n - len) p'
786 else do
787 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
788 yield prefix
789 return (yield suffix >> p')
790 {-# INLINABLE splitAt #-}
791
792
793 -- | Split a text stream in two, producing the longest
794 -- consecutive group of characters that satisfies the predicate
795 -- and returning the rest
796
797 span
798 :: (Monad m)
799 => (Char -> Bool)
800 -> Lens' (Producer Text m r)
801 (Producer Text m (Producer Text m r))
802 span predicate k p0 = fmap join (k (go p0))
803 where
804 go p = do
805 x <- lift (next p)
806 case x of
807 Left r -> return (return r)
808 Right (txt, p') -> do
809 let (prefix, suffix) = T.span predicate txt
810 if (T.null suffix)
811 then do
812 yield txt
813 go p'
814 else do
815 yield prefix
816 return (yield suffix >> p')
817 {-# INLINABLE span #-}
818
819 {-| Split a text stream in two, producing the longest
820 consecutive group of characters that don't satisfy the predicate
821 -}
822 break
823 :: (Monad m)
824 => (Char -> Bool)
825 -> Lens' (Producer Text m r)
826 (Producer Text m (Producer Text m r))
827 break predicate = span (not . predicate)
828 {-# INLINABLE break #-}
829
830 {-| Improper lens that splits after the first group of equivalent Chars, as
831 defined by the given equivalence relation
832 -}
833 groupBy
834 :: (Monad m)
835 => (Char -> Char -> Bool)
836 -> Lens' (Producer Text m r)
837 (Producer Text m (Producer Text m r))
838 groupBy equals k p0 = fmap join (k ((go p0))) where
839 go p = do
840 x <- lift (next p)
841 case x of
842 Left r -> return (return r)
843 Right (txt, p') -> case T.uncons txt of
844 Nothing -> go p'
845 Just (c, _) -> (yield txt >> p') ^. span (equals c)
846 {-# INLINABLE groupBy #-}
847
848 -- | Improper lens that splits after the first succession of identical 'Char' s
849 group :: Monad m
850 => Lens' (Producer Text m r)
851 (Producer Text m (Producer Text m r))
852 group = groupBy (==)
853 {-# INLINABLE group #-}
854
855 {-| Improper lens that splits a 'Producer' after the first word
856
857 Unlike 'words', this does not drop leading whitespace
858 -}
859 word :: (Monad m)
860 => Lens' (Producer Text m r)
861 (Producer Text m (Producer Text m r))
862 word k p0 = fmap join (k (to p0))
863 where
864 to p = do
865 p' <- p^.span isSpace
866 p'^.break isSpace
867 {-# INLINABLE word #-}
868
869
870 line :: (Monad m)
871 => Lens' (Producer Text m r)
872 (Producer Text m (Producer Text m r))
873 line = break (== '\n')
874
875 {-# INLINABLE line #-}
876
877
878 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
879 intersperse
880 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
881 intersperse c = go0
882 where
883 go0 p = do
884 x <- lift (next p)
885 case x of
886 Left r -> return r
887 Right (txt, p') -> do
888 yield (T.intersperse c txt)
889 go1 p'
890 go1 p = do
891 x <- lift (next p)
892 case x of
893 Left r -> return r
894 Right (txt, p') -> do
895 yield (T.singleton c)
896 yield (T.intersperse c txt)
897 go1 p'
898 {-# INLINABLE intersperse #-}
899
900
901
902 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
903 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
904 packChars = Data.Profunctor.dimap to (fmap from)
905 where
906 -- to :: Monad m => Producer Char m x -> Producer Text m x
907 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
908
909 step diffAs c = diffAs . (c:)
910
911 done diffAs = T.pack (diffAs [])
912
913 -- from :: Monad m => Producer Text m x -> Producer Char m x
914 from p = for p (each . T.unpack)
915 {-# INLINABLE packChars #-}
916
917
918 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
919 chunksOf
920 :: (Monad m, Integral n)
921 => n -> Lens' (Producer Text m r)
922 (FreeT (Producer Text m) m r)
923 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
924 where
925 go p = do
926 x <- next p
927 return $ case x of
928 Left r -> Pure r
929 Right (txt, p') -> Free $ do
930 p'' <- (yield txt >> p') ^. splitAt n
931 return $ FreeT (go p'')
932 {-# INLINABLE chunksOf #-}
933
934
935 {-| Split a text stream into sub-streams delimited by characters that satisfy the
936 predicate
937 -}
938 splitsWith
939 :: (Monad m)
940 => (Char -> Bool)
941 -> Producer Text m r
942 -> FreeT (Producer Text m) m r
943 splitsWith predicate p0 = FreeT (go0 p0)
944 where
945 go0 p = do
946 x <- next p
947 case x of
948 Left r -> return (Pure r)
949 Right (txt, p') ->
950 if (T.null txt)
951 then go0 p'
952 else return $ Free $ do
953 p'' <- (yield txt >> p') ^. span (not . predicate)
954 return $ FreeT (go1 p'')
955 go1 p = do
956 x <- nextChar p
957 return $ case x of
958 Left r -> Pure r
959 Right (_, p') -> Free $ do
960 p'' <- p' ^. span (not . predicate)
961 return $ FreeT (go1 p'')
962 {-# INLINABLE splitsWith #-}
963
964 -- | Split a text stream using the given 'Char' as the delimiter
965 splits :: (Monad m)
966 => Char
967 -> Lens' (Producer Text m r)
968 (FreeT (Producer Text m) m r)
969 splits c k p =
970 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
971 {-# INLINABLE splits #-}
972
973 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
974 given equivalence relation
975 -}
976 groupsBy
977 :: Monad m
978 => (Char -> Char -> Bool)
979 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
980 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
981 go p = do x <- next p
982 case x of Left r -> return (Pure r)
983 Right (bs, p') -> case T.uncons bs of
984 Nothing -> go p'
985 Just (c, _) -> do return $ Free $ do
986 p'' <- (yield bs >> p')^.span (equals c)
987 return $ FreeT (go p'')
988 {-# INLINABLE groupsBy #-}
989
990
991 -- | Like 'groupsBy', where the equality predicate is ('==')
992 groups
993 :: Monad m
994 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
995 groups = groupsBy (==)
996 {-# INLINABLE groups #-}
997
998
999
1000 {-| Split a text stream into 'FreeT'-delimited lines
1001 -}
1002 lines
1003 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
1004 lines = Data.Profunctor.dimap _lines (fmap _unlines)
1005 where
1006 _lines p0 = FreeT (go0 p0)
1007 where
1008 go0 p = do
1009 x <- next p
1010 case x of
1011 Left r -> return (Pure r)
1012 Right (txt, p') ->
1013 if (T.null txt)
1014 then go0 p'
1015 else return $ Free $ go1 (yield txt >> p')
1016 go1 p = do
1017 p' <- p ^. break ('\n' ==)
1018 return $ FreeT $ do
1019 x <- nextChar p'
1020 case x of
1021 Left r -> return $ Pure r
1022 Right (_, p'') -> go0 p''
1023 -- _unlines
1024 -- :: Monad m
1025 -- => FreeT (Producer Text m) m x -> Producer Text m x
1026 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1027
1028
1029 {-# INLINABLE lines #-}
1030
1031
1032 -- | Split a text stream into 'FreeT'-delimited words
1033 words
1034 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
1035 words = Data.Profunctor.dimap go (fmap _unwords)
1036 where
1037 go p = FreeT $ do
1038 x <- next (p >-> dropWhile isSpace)
1039 return $ case x of
1040 Left r -> Pure r
1041 Right (bs, p') -> Free $ do
1042 p'' <- (yield bs >> p') ^. break isSpace
1043 return (go p'')
1044 _unwords = PG.intercalates (yield $ T.singleton ' ')
1045
1046 {-# INLINABLE words #-}
1047
1048
1049 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1050 interspersing a text stream in between them
1051 -}
1052 intercalate
1053 :: (Monad m)
1054 => Producer Text m ()
1055 -> FreeT (Producer Text m) m r
1056 -> Producer Text m r
1057 intercalate p0 = go0
1058 where
1059 go0 f = do
1060 x <- lift (runFreeT f)
1061 case x of
1062 Pure r -> return r
1063 Free p -> do
1064 f' <- p
1065 go1 f'
1066 go1 f = do
1067 x <- lift (runFreeT f)
1068 case x of
1069 Pure r -> return r
1070 Free p -> do
1071 p0
1072 f' <- p
1073 go1 f'
1074 {-# INLINABLE intercalate #-}
1075
1076 {-| Join 'FreeT'-delimited lines into a text stream
1077 -}
1078 unlines
1079 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1080 unlines = go
1081 where
1082 go f = do
1083 x <- lift (runFreeT f)
1084 case x of
1085 Pure r -> return r
1086 Free p -> do
1087 f' <- p
1088 yield $ T.singleton '\n'
1089 go f'
1090 {-# INLINABLE unlines #-}
1091
1092 {-| Join 'FreeT'-delimited words into a text stream
1093 -}
1094 unwords
1095 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1096 unwords = intercalate (yield $ T.singleton ' ')
1097 {-# INLINABLE unwords #-}
1098
1099
1100 {- $reexports
1101
1102 @Data.Text@ re-exports the 'Text' type.
1103
1104 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1105 -}
1106
1107