]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
58b9c26d2158fa78eb04871dba25866e62b7fc72
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3
4 module Pipes.Text (
5 -- * Effectful Text
6 -- $intro
7
8 -- * Lenses
9 -- $lenses
10
11 -- ** @view@ \/ @(^.)@
12 -- $view
13
14 -- ** @over@ \/ @(%~)@
15 -- $over
16
17 -- ** @zoom@
18 -- $zoom
19
20 -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
21 -- $special
22
23 -- * Producers
24 fromLazy
25
26 -- * Pipes
27 , map
28 , concatMap
29 , take
30 , drop
31 , takeWhile
32 , dropWhile
33 , filter
34 , scan
35 , pack
36 , unpack
37 , toCaseFold
38 , toLower
39 , toUpper
40 , stripStart
41
42 -- * Folds
43 , toLazy
44 , toLazyM
45 , foldChars
46 , head
47 , last
48 , null
49 , length
50 , any
51 , all
52 , maximum
53 , minimum
54 , find
55 , index
56 , count
57
58 -- * Primitive Character Parsers
59 , nextChar
60 , drawChar
61 , unDrawChar
62 , peekChar
63 , isEndOfChars
64
65 -- * Parsing Lenses
66 , splitAt
67 , span
68 , break
69 , groupBy
70 , group
71 , word
72 , line
73
74 -- * FreeT Splitters
75 , chunksOf
76 , splitsWith
77 , splits
78 , groupsBy
79 , groups
80 , lines
81 , words
82
83 -- * Transformations
84 , intersperse
85 , packChars
86
87 -- * Joiners
88 , intercalate
89 , unlines
90 , unwords
91
92 -- * Re-exports
93 -- $reexports
94 , module Data.ByteString
95 , module Data.Text
96 , module Data.Profunctor
97 , module Pipes.Parse
98 , module Pipes.Group
99 ) where
100
101 import Control.Applicative ((<*))
102 import Control.Monad (liftM, join)
103 import Control.Monad.Trans.State.Strict (StateT(..), modify)
104 import qualified Data.Text as T
105 import Data.Text (Text)
106 import qualified Data.Text.Lazy as TL
107 import Data.ByteString (ByteString)
108 import Data.Functor.Constant (Constant(Constant, getConstant))
109 import Data.Functor.Identity (Identity)
110 import Data.Profunctor (Profunctor)
111 import qualified Data.Profunctor
112 import Pipes
113 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
114 import qualified Pipes.Group as PG
115 import qualified Pipes.Parse as PP
116 import Pipes.Parse (Parser)
117 import Pipes.Text.Encoding (Lens'_, Iso'_)
118 import qualified Pipes.Prelude as P
119 import Data.Char (isSpace)
120 import Data.Word (Word8)
121 import Foreign.Storable (sizeOf)
122 import Data.Bits (shiftL)
123 import Prelude hiding (
124 all,
125 any,
126 break,
127 concat,
128 concatMap,
129 drop,
130 dropWhile,
131 elem,
132 filter,
133 head,
134 last,
135 lines,
136 length,
137 map,
138 maximum,
139 minimum,
140 notElem,
141 null,
142 readFile,
143 span,
144 splitAt,
145 take,
146 takeWhile,
147 unlines,
148 unwords,
149 words,
150 writeFile )
151
152 {- $intro
153 This package provides @pipes@ utilities for /text streams/ or /character streams/,
154 realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
155 and thus you will generally want @Data.Text@ in scope. But the type
156 @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
157
158 This particular module provides many functions equivalent in one way or another to
159 the pure functions in
160 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
161 They transform, divide, group and fold text streams. Though @Producer Text m r@
162 is the type of \'effectful Text\', the functions in this module are \'pure\'
163 in the sense that they are uniformly monad-independent.
164 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
165 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
166 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
167
168 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
169 strict Text: the implementation is arranged so that the individual strict 'Text'
170 chunks are kept to a reasonable size; the user is not aware of the divisions
171 between the connected 'Text' chunks.
172 So also here: the functions in this module are designed to operate on streams that
173 are insensitive to text boundaries. This means that they may freely split
174 text into smaller texts and /discard empty texts/. The objective, though, is
175 that they should /never concatenate texts/ in order to provide strict upper
176 bounds on memory usage.
177
178 For example, to stream only the first three lines of 'stdin' to 'stdout' you
179 might write:
180
181 > import Pipes
182 > import qualified Pipes.Text as Text
183 > import qualified Pipes.Text.IO as Text
184 > import Pipes.Group (takes')
185 > import Lens.Family
186 >
187 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
188 > where
189 > takeLines n = Text.unlines . takes' n . view Text.lines
190
191 The above program will never bring more than one chunk of text (~ 32 KB) into
192 memory, no matter how long the lines are.
193
194 -}
195 {- $lenses
196 As this example shows, one superficial difference from @Data.Text.Lazy@
197 is that many of the operations, like 'lines', are \'lensified\'; this has a
198 number of advantages (where it is possible); in particular it facilitates their
199 use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
200 sense.) The disadvantage, famously, is that the messages you get for type errors can be
201 a little alarming. The remarks that follow in this section are for non-lens adepts.
202
203 Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
204 intuitively corresponding function when used with @view@ or @(^.)@. Instead of
205 writing:
206
207 > splitAt 17 producer
208
209 as we would with the Prelude or Text functions, we write
210
211 > view (splitAt 17) producer
212
213 or equivalently
214
215 > producer ^. splitAt 17
216
217 This may seem a little indirect, but note that many equivalents of
218 @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
219 corresponding functions by prefixing them with @(>->)@. Thus something like
220
221 > stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
222
223 would drop the leading white space from each line.
224
225 The lenses in this library are marked as /improper/; this just means that
226 they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
227 Just for this reason, though, the magnificent complexities of the lens libraries
228 are a distraction. The lens combinators to keep in mind, the ones that make sense for
229 our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
230
231 One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
232
233 -}
234 {- $view
235 @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
236 is the corresponding @b@; as was said above, this function will be exactly the
237 function you think it is, given its name. Thus to uppercase the first n characters
238 of a Producer, leaving the rest the same, we could write:
239
240
241 > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
242 > p'
243 -}
244 {- $over
245 @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
246 @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
247 the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
248 (it can also be written @l %~ f@).
249 For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
250 So above we might have written things like these:
251
252 > stripLines = Text.lines %~ maps (>-> Text.stripStart)
253 > stripLines = over Text.lines (maps (>-> Text.stripStart))
254 > upper n = Text.splitAt n %~ (>-> Text.toUpper)
255
256 -}
257 {- $zoom
258 @zoom l@, finally, is a function from a @Parser b m r@
259 to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
260 Its use is easiest to see with an decoding lens like 'utf8', which
261 \"sees\" a Text producer hidden inside a ByteString producer:
262 @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
263 a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
264 a list of everything produced from a Producer, leaving only the return value; it would
265 usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
266 returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
267 the Text Producer for further parsing. Suppose that we want, inexplicably, to
268 modify the casing of a Text Producer according to any instruction it might
269 contain at the start. Then we might write something like this:
270
271 > obey :: Monad m => Producer Text m b -> Producer Text m b
272 > obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
273 > let seven = T.concat ts
274 > case T.toUpper seven of
275 > "TOUPPER" -> p' >-> Text.toUpper
276 > "TOLOWER" -> p' >-> Text.toLower
277 > _ -> do yield seven
278 > p'
279
280
281 > >>> let doc = each ["toU","pperTh","is document.\n"]
282 > >>> runEffect $ obey doc >-> Text.stdout
283 > THIS DOCUMENT.
284
285 The purpose of exporting lenses is the mental economy achieved with this three-way
286 applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
287 three uses is no more surprising than that a pipe can act as a function modifying
288 the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
289 -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
290 @pipe >-> consumer@
291
292 The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
293 both <http://hackage.haskell.org/package/lens lens> and
294 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
295 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
296 and to some extent in the @Pipes.Text.Encoding@ module here.
297
298 -}
299 {- $special
300 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
301 This is in the types that are most closely associated with our central text type,
302 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
303
304 > splitAt :: Int -> Text -> (Text, Text)
305 > lines :: Text -> [Text]
306 > chunksOf :: Int -> Text -> [Text]
307
308 which relate a Text with a pair of Texts or a list of Texts.
309 The corresponding functions here (taking account of \'lensification\') are
310
311 > view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
312 > view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
313 > view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
314
315 Some of the types may be more readable if you imagine that we have introduced
316 our own type synonyms
317
318 > type Text m r = Producer T.Text m r
319 > type Texts m r = FreeT (Producer T.Text m) m r
320
321 Then we would think of the types above as
322
323 > view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
324 > view lines :: (Monad m) => Text m r -> Texts m r
325 > view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
326
327 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
328
329 In the type @Producer Text m (Producer Text m r)@ the second
330 element of the \'pair\' of effectful Texts cannot simply be retrieved
331 with something like 'snd'. This is an \'effectful\' pair, and one must work
332 through the effects of the first element to arrive at the second Text stream, even
333 if you are proposing to throw the Text in the first element away.
334 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
335
336 > join :: Monad m => Producer Text m (Producer m r) -> Producer m r
337
338 The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
339 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
340 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
341 together things of the forms:
342
343 > r
344 > (Text,r)
345 > (Text, (Text, r))
346 > (Text, (Text, (Text, r)))
347 > (Text, (Text, (Text, (Text, r))))
348 > ...
349
350 (We might also have identified the sum of those types with @Free ((,) Text) r@
351 -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
352
353 Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
354 @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
355
356 > m r
357 > Text m r
358 > Text m (Text m r)
359 > Text m (Text m (Text m r))
360 > Text m (Text m (Text m (Text m r)))
361 > ...
362
363 We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
364 or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
365 result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
366
367 One might think that
368
369 > lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
370 > view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
371
372 should really have the type
373
374 > lines :: Monad m => Pipe Text Text m r
375
376 as e.g. 'toUpper' does. But this would spoil the control we are
377 attempting to maintain over the size of chunks. It is in fact just
378 as unreasonable to want such a pipe as to want
379
380 > Data.Text.Lazy.lines :: Text -> Text
381
382 to 'rechunk' the strict Text chunks inside the lazy Text to respect
383 line boundaries. In fact we have
384
385 > Data.Text.Lazy.lines :: Text -> [Text]
386 > Prelude.lines :: String -> [String]
387
388 where the elements of the list are themselves lazy Texts or Strings; the use
389 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
390
391 The @Pipes.Group@ module, which can generally be imported without qualification,
392 provides many functions for working with things of type @FreeT (Producer a m) m r@.
393 In particular it conveniently exports the constructors for @FreeT@ and the associated
394 @FreeF@ type -- a fancy form of @Either@, namely
395
396 > data FreeF f a b = Pure a | Free (f b)
397
398 for pattern-matching. Consider the implementation of the 'words' function, or
399 of the part of the lens that takes us to the words; it is compact but exhibits many
400 of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
401 constuctors. Keep in mind that
402
403 > newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
404 > next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
405
406 Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
407 the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
408
409 > words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
410 > words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
411 > x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
412 > return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
413 > Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
414 > Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
415 > p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
416 > (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
417 > return (words p'') -- We then carry on with the rest, which is likely to begin with space.
418
419 -}
420
421 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
422 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
423 fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
424 {-# INLINE fromLazy #-}
425
426
427 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
428 a ^. lens = getConstant (lens Constant a)
429
430
431 -- | Apply a transformation to each 'Char' in the stream
432 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
433 map f = P.map (T.map f)
434 {-# INLINABLE map #-}
435
436 {-# RULES "p >-> map f" forall p f .
437 p >-> map f = for p (\txt -> yield (T.map f txt))
438 #-}
439
440 -- | Map a function over the characters of a text stream and concatenate the results
441 concatMap
442 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
443 concatMap f = P.map (T.concatMap f)
444 {-# INLINABLE concatMap #-}
445
446 {-# RULES "p >-> concatMap f" forall p f .
447 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
448 #-}
449
450
451 -- | Transform a Pipe of 'String's into one of 'Text' chunks
452 pack :: Monad m => Pipe String Text m r
453 pack = P.map T.pack
454 {-# INLINEABLE pack #-}
455
456 {-# RULES "p >-> pack" forall p .
457 p >-> pack = for p (\txt -> yield (T.pack txt))
458 #-}
459
460 -- | Transform a Pipes of 'Text' chunks into one of 'String's
461 unpack :: Monad m => Pipe Text String m r
462 unpack = for cat (\t -> yield (T.unpack t))
463 {-# INLINEABLE unpack #-}
464
465 {-# RULES "p >-> unpack" forall p .
466 p >-> unpack = for p (\txt -> yield (T.unpack txt))
467 #-}
468
469 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
470 -- here acting as 'Text' pipes, rather as they would on a lazy text
471 toCaseFold :: Monad m => Pipe Text Text m r
472 toCaseFold = P.map T.toCaseFold
473 {-# INLINEABLE toCaseFold #-}
474
475 {-# RULES "p >-> toCaseFold" forall p .
476 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
477 #-}
478
479
480 -- | lowercase incoming 'Text'
481 toLower :: Monad m => Pipe Text Text m r
482 toLower = P.map T.toLower
483 {-# INLINEABLE toLower #-}
484
485 {-# RULES "p >-> toLower" forall p .
486 p >-> toLower = for p (\txt -> yield (T.toLower txt))
487 #-}
488
489 -- | uppercase incoming 'Text'
490 toUpper :: Monad m => Pipe Text Text m r
491 toUpper = P.map T.toUpper
492 {-# INLINEABLE toUpper #-}
493
494 {-# RULES "p >-> toUpper" forall p .
495 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
496 #-}
497
498 -- | Remove leading white space from an incoming succession of 'Text's
499 stripStart :: Monad m => Pipe Text Text m r
500 stripStart = do
501 chunk <- await
502 let text = T.stripStart chunk
503 if T.null text
504 then stripStart
505 else do yield text
506 cat
507 {-# INLINEABLE stripStart #-}
508
509 -- | @(take n)@ only allows @n@ individual characters to pass;
510 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
511 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
512 take n0 = go n0 where
513 go n
514 | n <= 0 = return ()
515 | otherwise = do
516 txt <- await
517 let len = fromIntegral (T.length txt)
518 if (len > n)
519 then yield (T.take (fromIntegral n) txt)
520 else do
521 yield txt
522 go (n - len)
523 {-# INLINABLE take #-}
524
525 -- | @(drop n)@ drops the first @n@ characters
526 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
527 drop n0 = go n0 where
528 go n
529 | n <= 0 = cat
530 | otherwise = do
531 txt <- await
532 let len = fromIntegral (T.length txt)
533 if (len >= n)
534 then do
535 yield (T.drop (fromIntegral n) txt)
536 cat
537 else go (n - len)
538 {-# INLINABLE drop #-}
539
540 -- | Take characters until they fail the predicate
541 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
542 takeWhile predicate = go
543 where
544 go = do
545 txt <- await
546 let (prefix, suffix) = T.span predicate txt
547 if (T.null suffix)
548 then do
549 yield txt
550 go
551 else yield prefix
552 {-# INLINABLE takeWhile #-}
553
554 -- | Drop characters until they fail the predicate
555 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
556 dropWhile predicate = go where
557 go = do
558 txt <- await
559 case T.findIndex (not . predicate) txt of
560 Nothing -> go
561 Just i -> do
562 yield (T.drop i txt)
563 cat
564 {-# INLINABLE dropWhile #-}
565
566 -- | Only allows 'Char's to pass if they satisfy the predicate
567 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
568 filter predicate = P.map (T.filter predicate)
569 {-# INLINABLE filter #-}
570
571 {-# RULES "p >-> filter q" forall p q .
572 p >-> filter q = for p (\txt -> yield (T.filter q txt))
573 #-}
574
575 -- | Strict left scan over the characters
576 scan
577 :: (Monad m)
578 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
579 scan step begin = do
580 yield (T.singleton begin)
581 go begin
582 where
583 go c = do
584 txt <- await
585 let txt' = T.scanl step c txt
586 c' = T.last txt'
587 yield (T.tail txt')
588 go c'
589 {-# INLINABLE scan #-}
590
591 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
592 'TL.Text'
593 -}
594 toLazy :: Producer Text Identity () -> TL.Text
595 toLazy = TL.fromChunks . P.toList
596 {-# INLINABLE toLazy #-}
597
598 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
599 'TL.Text'
600
601 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
602 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
603 immediately as they are generated instead of loading them all into memory.
604 -}
605 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
606 toLazyM = liftM TL.fromChunks . P.toListM
607 {-# INLINABLE toLazyM #-}
608
609 -- | Reduce the text stream using a strict left fold over characters
610 foldChars
611 :: Monad m
612 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
613 foldChars step begin done = P.fold (T.foldl' step) begin done
614 {-# INLINABLE foldChars #-}
615
616 -- | Retrieve the first 'Char'
617 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
618 head = go
619 where
620 go p = do
621 x <- nextChar p
622 case x of
623 Left _ -> return Nothing
624 Right (c, _) -> return (Just c)
625 {-# INLINABLE head #-}
626
627 -- | Retrieve the last 'Char'
628 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
629 last = go Nothing
630 where
631 go r p = do
632 x <- next p
633 case x of
634 Left () -> return r
635 Right (txt, p') ->
636 if (T.null txt)
637 then go r p'
638 else go (Just $ T.last txt) p'
639 {-# INLINABLE last #-}
640
641 -- | Determine if the stream is empty
642 null :: (Monad m) => Producer Text m () -> m Bool
643 null = P.all T.null
644 {-# INLINABLE null #-}
645
646 -- | Count the number of characters in the stream
647 length :: (Monad m, Num n) => Producer Text m () -> m n
648 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
649 {-# INLINABLE length #-}
650
651 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
652 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
653 any predicate = P.any (T.any predicate)
654 {-# INLINABLE any #-}
655
656 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
657 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
658 all predicate = P.all (T.all predicate)
659 {-# INLINABLE all #-}
660
661 -- | Return the maximum 'Char' within a text stream
662 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
663 maximum = P.fold step Nothing id
664 where
665 step mc txt =
666 if (T.null txt)
667 then mc
668 else Just $ case mc of
669 Nothing -> T.maximum txt
670 Just c -> max c (T.maximum txt)
671 {-# INLINABLE maximum #-}
672
673 -- | Return the minimum 'Char' within a text stream (surely very useful!)
674 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
675 minimum = P.fold step Nothing id
676 where
677 step mc txt =
678 if (T.null txt)
679 then mc
680 else case mc of
681 Nothing -> Just (T.minimum txt)
682 Just c -> Just (min c (T.minimum txt))
683 {-# INLINABLE minimum #-}
684
685 -- | Find the first element in the stream that matches the predicate
686 find
687 :: (Monad m)
688 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
689 find predicate p = head (p >-> filter predicate)
690 {-# INLINABLE find #-}
691
692 -- | Index into a text stream
693 index
694 :: (Monad m, Integral a)
695 => a-> Producer Text m () -> m (Maybe Char)
696 index n p = head (p >-> drop n)
697 {-# INLINABLE index #-}
698
699
700 -- | Store a tally of how many segments match the given 'Text'
701 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
702 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
703 {-# INLINABLE count #-}
704
705
706 -- | Consume the first character from a stream of 'Text'
707 --
708 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
709 -- succeeds with a 'Right' providing the next character and the remainder of the
710 -- 'Producer'.
711
712 nextChar
713 :: (Monad m)
714 => Producer Text m r
715 -> m (Either r (Char, Producer Text m r))
716 nextChar = go
717 where
718 go p = do
719 x <- next p
720 case x of
721 Left r -> return (Left r)
722 Right (txt, p') -> case (T.uncons txt) of
723 Nothing -> go p'
724 Just (c, txt') -> return (Right (c, yield txt' >> p'))
725 {-# INLINABLE nextChar #-}
726
727 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
728
729 drawChar :: (Monad m) => Parser Text m (Maybe Char)
730 drawChar = do
731 x <- PP.draw
732 case x of
733 Nothing -> return Nothing
734 Just txt -> case (T.uncons txt) of
735 Nothing -> drawChar
736 Just (c, txt') -> do
737 PP.unDraw txt'
738 return (Just c)
739 {-# INLINABLE drawChar #-}
740
741 -- | Push back a 'Char' onto the underlying 'Producer'
742 unDrawChar :: (Monad m) => Char -> Parser Text m ()
743 unDrawChar c = modify (yield (T.singleton c) >>)
744 {-# INLINABLE unDrawChar #-}
745
746 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
747 push the 'Char' back
748
749 > peekChar = do
750 > x <- drawChar
751 > case x of
752 > Left _ -> return ()
753 > Right c -> unDrawChar c
754 > return x
755
756 -}
757
758 peekChar :: (Monad m) => Parser Text m (Maybe Char)
759 peekChar = do
760 x <- drawChar
761 case x of
762 Nothing -> return ()
763 Just c -> unDrawChar c
764 return x
765 {-# INLINABLE peekChar #-}
766
767 {-| Check if the underlying 'Producer' has no more characters
768
769 Note that this will skip over empty 'Text' chunks, unlike
770 'PP.isEndOfInput' from @pipes-parse@, which would consider
771 an empty 'Text' a valid bit of input.
772
773 > isEndOfChars = liftM isLeft peekChar
774 -}
775 isEndOfChars :: (Monad m) => Parser Text m Bool
776 isEndOfChars = do
777 x <- peekChar
778 return (case x of
779 Nothing -> True
780 Just _-> False )
781 {-# INLINABLE isEndOfChars #-}
782
783
784 -- | Splits a 'Producer' after the given number of characters
785 splitAt
786 :: (Monad m, Integral n)
787 => n
788 -> Lens'_ (Producer Text m r)
789 (Producer Text m (Producer Text m r))
790 splitAt n0 k p0 = fmap join (k (go n0 p0))
791 where
792 go 0 p = return p
793 go n p = do
794 x <- lift (next p)
795 case x of
796 Left r -> return (return r)
797 Right (txt, p') -> do
798 let len = fromIntegral (T.length txt)
799 if (len <= n)
800 then do
801 yield txt
802 go (n - len) p'
803 else do
804 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
805 yield prefix
806 return (yield suffix >> p')
807 {-# INLINABLE splitAt #-}
808
809
810 -- | Split a text stream in two, producing the longest
811 -- consecutive group of characters that satisfies the predicate
812 -- and returning the rest
813
814 span
815 :: (Monad m)
816 => (Char -> Bool)
817 -> Lens'_ (Producer Text m r)
818 (Producer Text m (Producer Text m r))
819 span predicate k p0 = fmap join (k (go p0))
820 where
821 go p = do
822 x <- lift (next p)
823 case x of
824 Left r -> return (return r)
825 Right (txt, p') -> do
826 let (prefix, suffix) = T.span predicate txt
827 if (T.null suffix)
828 then do
829 yield txt
830 go p'
831 else do
832 yield prefix
833 return (yield suffix >> p')
834 {-# INLINABLE span #-}
835
836 {-| Split a text stream in two, producing the longest
837 consecutive group of characters that don't satisfy the predicate
838 -}
839 break
840 :: (Monad m)
841 => (Char -> Bool)
842 -> Lens'_ (Producer Text m r)
843 (Producer Text m (Producer Text m r))
844 break predicate = span (not . predicate)
845 {-# INLINABLE break #-}
846
847 {-| Improper lens that splits after the first group of equivalent Chars, as
848 defined by the given equivalence relation
849 -}
850 groupBy
851 :: (Monad m)
852 => (Char -> Char -> Bool)
853 -> Lens'_ (Producer Text m r)
854 (Producer Text m (Producer Text m r))
855 groupBy equals k p0 = fmap join (k ((go p0))) where
856 go p = do
857 x <- lift (next p)
858 case x of
859 Left r -> return (return r)
860 Right (txt, p') -> case T.uncons txt of
861 Nothing -> go p'
862 Just (c, _) -> (yield txt >> p') ^. span (equals c)
863 {-# INLINABLE groupBy #-}
864
865 -- | Improper lens that splits after the first succession of identical 'Char' s
866 group :: Monad m
867 => Lens'_ (Producer Text m r)
868 (Producer Text m (Producer Text m r))
869 group = groupBy (==)
870 {-# INLINABLE group #-}
871
872 {-| Improper lens that splits a 'Producer' after the first word
873
874 Unlike 'words', this does not drop leading whitespace
875 -}
876 word :: (Monad m)
877 => Lens'_ (Producer Text m r)
878 (Producer Text m (Producer Text m r))
879 word k p0 = fmap join (k (to p0))
880 where
881 to p = do
882 p' <- p^.span isSpace
883 p'^.break isSpace
884 {-# INLINABLE word #-}
885
886
887 line :: (Monad m)
888 => Lens'_ (Producer Text m r)
889 (Producer Text m (Producer Text m r))
890 line = break (== '\n')
891
892 {-# INLINABLE line #-}
893
894
895 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
896 intersperse
897 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
898 intersperse c = go0
899 where
900 go0 p = do
901 x <- lift (next p)
902 case x of
903 Left r -> return r
904 Right (txt, p') -> do
905 yield (T.intersperse c txt)
906 go1 p'
907 go1 p = do
908 x <- lift (next p)
909 case x of
910 Left r -> return r
911 Right (txt, p') -> do
912 yield (T.singleton c)
913 yield (T.intersperse c txt)
914 go1 p'
915 {-# INLINABLE intersperse #-}
916
917
918
919 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
920 packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
921 packChars = Data.Profunctor.dimap to (fmap from)
922 where
923 -- to :: Monad m => Producer Char m x -> Producer Text m x
924 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
925
926 step diffAs c = diffAs . (c:)
927
928 done diffAs = T.pack (diffAs [])
929
930 -- from :: Monad m => Producer Text m x -> Producer Char m x
931 from p = for p (each . T.unpack)
932
933 {-# INLINABLE packChars #-}
934
935 defaultChunkSize :: Int
936 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
937
938 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
939 chunksOf
940 :: (Monad m, Integral n)
941 => n -> Lens'_ (Producer Text m r)
942 (FreeT (Producer Text m) m r)
943 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
944 where
945 go p = do
946 x <- next p
947 return $ case x of
948 Left r -> Pure r
949 Right (txt, p') -> Free $ do
950 p'' <- (yield txt >> p') ^. splitAt n
951 return $ FreeT (go p'')
952 {-# INLINABLE chunksOf #-}
953
954
955 {-| Split a text stream into sub-streams delimited by characters that satisfy the
956 predicate
957 -}
958 splitsWith
959 :: (Monad m)
960 => (Char -> Bool)
961 -> Producer Text m r
962 -> FreeT (Producer Text m) m r
963 splitsWith predicate p0 = FreeT (go0 p0)
964 where
965 go0 p = do
966 x <- next p
967 case x of
968 Left r -> return (Pure r)
969 Right (txt, p') ->
970 if (T.null txt)
971 then go0 p'
972 else return $ Free $ do
973 p'' <- (yield txt >> p') ^. span (not . predicate)
974 return $ FreeT (go1 p'')
975 go1 p = do
976 x <- nextChar p
977 return $ case x of
978 Left r -> Pure r
979 Right (_, p') -> Free $ do
980 p'' <- p' ^. span (not . predicate)
981 return $ FreeT (go1 p'')
982 {-# INLINABLE splitsWith #-}
983
984 -- | Split a text stream using the given 'Char' as the delimiter
985 splits :: (Monad m)
986 => Char
987 -> Lens'_ (Producer Text m r)
988 (FreeT (Producer Text m) m r)
989 splits c k p =
990 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
991 {-# INLINABLE splits #-}
992
993 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
994 given equivalence relation
995 -}
996 groupsBy
997 :: Monad m
998 => (Char -> Char -> Bool)
999 -> Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
1000 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
1001 go p = do x <- next p
1002 case x of Left r -> return (Pure r)
1003 Right (bs, p') -> case T.uncons bs of
1004 Nothing -> go p'
1005 Just (c, _) -> do return $ Free $ do
1006 p'' <- (yield bs >> p')^.span (equals c)
1007 return $ FreeT (go p'')
1008 {-# INLINABLE groupsBy #-}
1009
1010
1011 -- | Like 'groupsBy', where the equality predicate is ('==')
1012 groups
1013 :: Monad m
1014 => Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
1015 groups = groupsBy (==)
1016 {-# INLINABLE groups #-}
1017
1018
1019
1020 {-| Split a text stream into 'FreeT'-delimited lines
1021 -}
1022 lines
1023 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
1024 lines = Data.Profunctor.dimap _lines (fmap _unlines)
1025 where
1026 _lines p0 = FreeT (go0 p0)
1027 where
1028 go0 p = do
1029 x <- next p
1030 case x of
1031 Left r -> return (Pure r)
1032 Right (txt, p') ->
1033 if (T.null txt)
1034 then go0 p'
1035 else return $ Free $ go1 (yield txt >> p')
1036 go1 p = do
1037 p' <- p ^. break ('\n' ==)
1038 return $ FreeT $ do
1039 x <- nextChar p'
1040 case x of
1041 Left r -> return $ Pure r
1042 Right (_, p'') -> go0 p''
1043 -- _unlines
1044 -- :: Monad m
1045 -- => FreeT (Producer Text m) m x -> Producer Text m x
1046 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1047
1048
1049 {-# INLINABLE lines #-}
1050
1051
1052 -- | Split a text stream into 'FreeT'-delimited words
1053 words
1054 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
1055 words = Data.Profunctor.dimap go (fmap _unwords)
1056 where
1057 go p = FreeT $ do
1058 x <- next (p >-> dropWhile isSpace)
1059 return $ case x of
1060 Left r -> Pure r
1061 Right (bs, p') -> Free $ do
1062 p'' <- (yield bs >> p') ^. break isSpace
1063 return (go p'')
1064 _unwords = PG.intercalates (yield $ T.singleton ' ')
1065
1066 {-# INLINABLE words #-}
1067
1068
1069 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1070 interspersing a text stream in between them
1071 -}
1072 intercalate
1073 :: (Monad m)
1074 => Producer Text m ()
1075 -> FreeT (Producer Text m) m r
1076 -> Producer Text m r
1077 intercalate p0 = go0
1078 where
1079 go0 f = do
1080 x <- lift (runFreeT f)
1081 case x of
1082 Pure r -> return r
1083 Free p -> do
1084 f' <- p
1085 go1 f'
1086 go1 f = do
1087 x <- lift (runFreeT f)
1088 case x of
1089 Pure r -> return r
1090 Free p -> do
1091 p0
1092 f' <- p
1093 go1 f'
1094 {-# INLINABLE intercalate #-}
1095
1096 {-| Join 'FreeT'-delimited lines into a text stream
1097 -}
1098 unlines
1099 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1100 unlines = go
1101 where
1102 go f = do
1103 x <- lift (runFreeT f)
1104 case x of
1105 Pure r -> return r
1106 Free p -> do
1107 f' <- p
1108 yield $ T.singleton '\n'
1109 go f'
1110 {-# INLINABLE unlines #-}
1111
1112 {-| Join 'FreeT'-delimited words into a text stream
1113 -}
1114 unwords
1115 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1116 unwords = intercalate (yield $ T.singleton ' ')
1117 {-# INLINABLE unwords #-}
1118
1119
1120 {- $reexports
1121
1122 @Data.Text@ re-exports the 'Text' type.
1123
1124 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1125 -}
1126
1127