]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
started generalizing lenses
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3
4 module Pipes.Text (
5 -- * Effectful Text
6 -- $intro
7
8 -- * Lenses
9 -- $lenses
10
11 -- ** @view@ \/ @(^.)@
12 -- $view
13
14 -- ** @over@ \/ @(%~)@
15 -- $over
16
17 -- ** @zoom@
18 -- $zoom
19
20 -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
21 -- $special
22
23 -- * Producers
24 fromLazy
25
26 -- * Pipes
27 , map
28 , concatMap
29 , take
30 , drop
31 , takeWhile
32 , dropWhile
33 , filter
34 , scan
35 , pack
36 , unpack
37 , toCaseFold
38 , toLower
39 , toUpper
40 , stripStart
41
42 -- * Folds
43 , toLazy
44 , toLazyM
45 , foldChars
46 , head
47 , last
48 , null
49 , length
50 , any
51 , all
52 , maximum
53 , minimum
54 , find
55 , index
56 , count
57
58 -- * Primitive Character Parsers
59 , nextChar
60 , drawChar
61 , unDrawChar
62 , peekChar
63 , isEndOfChars
64
65 -- * Parsing Lenses
66 , splitAt
67 , span
68 , break
69 , groupBy
70 , group
71 , word
72 , line
73
74 -- * FreeT Splitters
75 , chunksOf
76 , splitsWith
77 , splits
78 , groupsBy
79 , groups
80 , lines
81 , words
82
83 -- * Transformations
84 , intersperse
85 , packChars
86
87 -- * Joiners
88 , intercalate
89 , unlines
90 , unwords
91
92 -- * Re-exports
93 -- $reexports
94 , module Data.ByteString
95 , module Data.Text
96 , module Data.Profunctor
97 , module Pipes.Parse
98 , module Pipes.Group
99 ) where
100
101 import Control.Applicative ((<*))
102 import Control.Monad (liftM, join)
103 import Control.Monad.Trans.State.Strict (StateT(..), modify)
104 import qualified Data.Text as T
105 import Data.Text (Text)
106 import qualified Data.Text.Lazy as TL
107 import Data.ByteString (ByteString)
108 import Data.Functor.Constant (Constant(Constant, getConstant))
109 import Data.Functor.Identity (Identity)
110 import Data.Profunctor (Profunctor)
111 import qualified Data.Profunctor
112 import Pipes
113 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
114 import qualified Pipes.Group as PG
115 import qualified Pipes.Parse as PP
116 import Pipes.Parse (Parser)
117 import Pipes.Text.Encoding (Lens'_, Iso'_)
118 import qualified Pipes.Prelude as P
119 import Data.Char (isSpace)
120 import Data.Word (Word8)
121 import Foreign.Storable (sizeOf)
122 import Data.Bits (shiftL)
123 import Prelude hiding (
124 all,
125 any,
126 break,
127 concat,
128 concatMap,
129 drop,
130 dropWhile,
131 elem,
132 filter,
133 head,
134 last,
135 lines,
136 length,
137 map,
138 maximum,
139 minimum,
140 notElem,
141 null,
142 readFile,
143 span,
144 splitAt,
145 take,
146 takeWhile,
147 unlines,
148 unwords,
149 words,
150 writeFile )
151
152 {- $intro
153 This package provides @pipes@ utilities for /text streams/ or /character streams/,
154 realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
155 and thus you will generally want @Data.Text@ in scope. But the type
156 @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
157
158 This particular module provides many functions equivalent in one way or another to
159 the pure functions in
160 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
161 They transform, divide, group and fold text streams. Though @Producer Text m r@
162 is the type of \'effectful Text\', the functions in this module are \'pure\'
163 in the sense that they are uniformly monad-independent.
164 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
165 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
166 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
167
168 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
169 strict Text: the implementation is arranged so that the individual strict 'Text'
170 chunks are kept to a reasonable size; the user is not aware of the divisions
171 between the connected 'Text' chunks.
172 So also here: the functions in this module are designed to operate on streams that
173 are insensitive to text boundaries. This means that they may freely split
174 text into smaller texts and /discard empty texts/. The objective, though, is
175 that they should /never concatenate texts/ in order to provide strict upper
176 bounds on memory usage.
177
178 For example, to stream only the first three lines of 'stdin' to 'stdout' you
179 might write:
180
181 > import Pipes
182 > import qualified Pipes.Text as Text
183 > import qualified Pipes.Text.IO as Text
184 > import Pipes.Group (takes')
185 > import Lens.Family
186 >
187 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
188 > where
189 > takeLines n = Text.unlines . takes' n . view Text.lines
190
191 The above program will never bring more than one chunk of text (~ 32 KB) into
192 memory, no matter how long the lines are.
193
194 -}
195 {- $lenses
196 As this example shows, one superficial difference from @Data.Text.Lazy@
197 is that many of the operations, like 'lines', are \'lensified\'; this has a
198 number of advantages (where it is possible); in particular it facilitates their
199 use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
200 sense.) The disadvantage, famously, is that the messages you get for type errors can be
201 a little alarming. The remarks that follow in this section are for non-lens adepts.
202
203 Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
204 intuitively corresponding function when used with @view@ or @(^.)@. Instead of
205 writing:
206
207 > splitAt 17 producer
208
209 as we would with the Prelude or Text functions, we write
210
211 > view (splitAt 17) producer
212
213 or equivalently
214
215 > producer ^. splitAt 17
216
217 This may seem a little indirect, but note that many equivalents of
218 @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
219 corresponding functions by prefixing them with @(>->)@. Thus something like
220
221 > stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
222
223 would drop the leading white space from each line.
224
225 The lenses in this library are marked as /improper/; this just means that
226 they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
227 Just for this reason, though, the magnificent complexities of the lens libraries
228 are a distraction. The lens combinators to keep in mind, the ones that make sense for
229 our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
230
231 One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
232
233 -}
234 {- $view
235 @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
236 is the corresponding @b@; as was said above, this function will be exactly the
237 function you think it is, given its name. Thus to uppercase the first n characters
238 of a Producer, leaving the rest the same, we could write:
239
240
241 > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
242 > p'
243 -}
244 {- $over
245 @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
246 @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
247 the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
248 (it can also be written @l %~ f@).
249 For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
250 So above we might have written things like these:
251
252 > stripLines = Text.lines %~ maps (>-> Text.stripStart)
253 > stripLines = over Text.lines (maps (>-> Text.stripStart))
254 > upper n = Text.splitAt n %~ (>-> Text.toUpper)
255
256 -}
257 {- $zoom
258 @zoom l@, finally, is a function from a @Parser b m r@
259 to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
260 Its use is easiest to see with an decoding lens like 'utf8', which
261 \"sees\" a Text producer hidden inside a ByteString producer:
262 @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
263 a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
264 a list of everything produced from a Producer, leaving only the return value; it would
265 usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
266 returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
267 the Text Producer for further parsing. Suppose that we want, inexplicably, to
268 modify the casing of a Text Producer according to any instruction it might
269 contain at the start. Then we might write something like this:
270
271 > obey :: Monad m => Producer Text m b -> Producer Text m b
272 > obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
273 > let seven = T.concat ts
274 > case T.toUpper seven of
275 > "TOUPPER" -> p' >-> Text.toUpper
276 > "TOLOWER" -> p' >-> Text.toLower
277 > _ -> do yield seven
278 > p'
279
280
281 > >>> let doc = each ["toU","pperTh","is document.\n"]
282 > >>> runEffect $ obey doc >-> Text.stdout
283 > THIS DOCUMENT.
284
285 The purpose of exporting lenses is the mental economy achieved with this three-way
286 applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
287 three uses is no more surprising than that a pipe can act as a function modifying
288 the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
289 -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
290 @pipe >-> consumer@
291
292 The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
293 both <http://hackage.haskell.org/package/lens lens> and
294 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
295 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
296 and to some extent in the @Pipes.Text.Encoding@ module here.
297
298 -}
299 {- $special
300 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
301 This is in the types that are most closely associated with our central text type,
302 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
303
304 > splitAt :: Int -> Text -> (Text, Text)
305 > lines :: Text -> [Text]
306 > chunksOf :: Int -> Text -> [Text]
307
308 which relate a Text with a pair of Texts or a list of Texts.
309 The corresponding functions here (taking account of \'lensification\') are
310
311 > view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
312 > view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
313 > view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
314
315 Some of the types may be more readable if you imagine that we have introduced
316 our own type synonyms
317
318 > type Text m r = Producer T.Text m r
319 > type Texts m r = FreeT (Producer T.Text m) m r
320
321 Then we would think of the types above as
322
323 > view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
324 > view lines :: (Monad m) => Text m r -> Texts m r
325 > view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
326
327 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
328
329 In the type @Producer Text m (Producer Text m r)@ the second
330 element of the \'pair\' of effectful Texts cannot simply be retrieved
331 with something like 'snd'. This is an \'effectful\' pair, and one must work
332 through the effects of the first element to arrive at the second Text stream, even
333 if you are proposing to throw the Text in the first element away.
334 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
335
336 > join :: Monad m => Producer Text m (Producer m r) -> Producer m r
337
338 The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
339 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
340 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
341 together things of the forms:
342
343 > r
344 > (Text,r)
345 > (Text, (Text, r))
346 > (Text, (Text, (Text, r)))
347 > (Text, (Text, (Text, (Text, r))))
348 > ...
349
350 (We might also have identified the sum of those types with @Free ((,) Text) r@
351 -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
352
353 Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
354 @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
355
356 > m r
357 > Text m r
358 > Text m (Text m r)
359 > Text m (Text m (Text m r))
360 > Text m (Text m (Text m (Text m r)))
361 > ...
362
363 We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
364 or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
365 result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
366
367 One might think that
368
369 > lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
370 > view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
371
372 should really have the type
373
374 > lines :: Monad m => Pipe Text Text m r
375
376 as e.g. 'toUpper' does. But this would spoil the control we are
377 attempting to maintain over the size of chunks. It is in fact just
378 as unreasonable to want such a pipe as to want
379
380 > Data.Text.Lazy.lines :: Text -> Text
381
382 to 'rechunk' the strict Text chunks inside the lazy Text to respect
383 line boundaries. In fact we have
384
385 > Data.Text.Lazy.lines :: Text -> [Text]
386 > Prelude.lines :: String -> [String]
387
388 where the elements of the list are themselves lazy Texts or Strings; the use
389 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
390
391 The @Pipes.Group@ module, which can generally be imported without qualification,
392 provides many functions for working with things of type @FreeT (Producer a m) m r@.
393 In particular it conveniently exports the constructors for @FreeT@ and the associated
394 @FreeF@ type -- a fancy form of @Either@, namely
395
396 > data FreeF f a b = Pure a | Free (f b)
397
398 for pattern-matching. Consider the implementation of the 'words' function, or
399 of the part of the lens that takes us to the words; it is compact but exhibits many
400 of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
401 constuctors. Keep in mind that
402
403 > newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
404 > next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
405
406 Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
407 the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
408
409 > words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
410 > words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
411 > x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
412 > return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
413 > Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
414 > Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
415 > p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
416 > (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
417 > return (words p'') -- We then carry on with the rest, which is likely to begin with space.
418
419 -}
420
421 type Lens s t a b = forall f . Functor f => (a -> f b) -> (s -> f t)
422
423
424 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
425 a ^. lens = getConstant (lens Constant a)
426
427 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
428 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
429 fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
430 {-# INLINE fromLazy #-}
431
432
433
434 -- | Apply a transformation to each 'Char' in the stream
435 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
436 map f = P.map (T.map f)
437 {-# INLINABLE map #-}
438
439 {-# RULES "p >-> map f" forall p f .
440 p >-> map f = for p (\txt -> yield (T.map f txt))
441 #-}
442
443 -- | Map a function over the characters of a text stream and concatenate the results
444 concatMap
445 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
446 concatMap f = P.map (T.concatMap f)
447 {-# INLINABLE concatMap #-}
448
449 {-# RULES "p >-> concatMap f" forall p f .
450 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
451 #-}
452
453
454 -- | Transform a Pipe of 'String's into one of 'Text' chunks
455 pack :: Monad m => Pipe String Text m r
456 pack = P.map T.pack
457 {-# INLINEABLE pack #-}
458
459 {-# RULES "p >-> pack" forall p .
460 p >-> pack = for p (\txt -> yield (T.pack txt))
461 #-}
462
463 -- | Transform a Pipes of 'Text' chunks into one of 'String's
464 unpack :: Monad m => Pipe Text String m r
465 unpack = for cat (\t -> yield (T.unpack t))
466 {-# INLINEABLE unpack #-}
467
468 {-# RULES "p >-> unpack" forall p .
469 p >-> unpack = for p (\txt -> yield (T.unpack txt))
470 #-}
471
472 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
473 -- here acting as 'Text' pipes, rather as they would on a lazy text
474 toCaseFold :: Monad m => Pipe Text Text m r
475 toCaseFold = P.map T.toCaseFold
476 {-# INLINEABLE toCaseFold #-}
477
478 {-# RULES "p >-> toCaseFold" forall p .
479 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
480 #-}
481
482
483 -- | lowercase incoming 'Text'
484 toLower :: Monad m => Pipe Text Text m r
485 toLower = P.map T.toLower
486 {-# INLINEABLE toLower #-}
487
488 {-# RULES "p >-> toLower" forall p .
489 p >-> toLower = for p (\txt -> yield (T.toLower txt))
490 #-}
491
492 -- | uppercase incoming 'Text'
493 toUpper :: Monad m => Pipe Text Text m r
494 toUpper = P.map T.toUpper
495 {-# INLINEABLE toUpper #-}
496
497 {-# RULES "p >-> toUpper" forall p .
498 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
499 #-}
500
501 -- | Remove leading white space from an incoming succession of 'Text's
502 stripStart :: Monad m => Pipe Text Text m r
503 stripStart = do
504 chunk <- await
505 let text = T.stripStart chunk
506 if T.null text
507 then stripStart
508 else do yield text
509 cat
510 {-# INLINEABLE stripStart #-}
511
512 -- | @(take n)@ only allows @n@ individual characters to pass;
513 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
514 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
515 take n0 = go n0 where
516 go n
517 | n <= 0 = return ()
518 | otherwise = do
519 txt <- await
520 let len = fromIntegral (T.length txt)
521 if (len > n)
522 then yield (T.take (fromIntegral n) txt)
523 else do
524 yield txt
525 go (n - len)
526 {-# INLINABLE take #-}
527
528 -- | @(drop n)@ drops the first @n@ characters
529 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
530 drop n0 = go n0 where
531 go n
532 | n <= 0 = cat
533 | otherwise = do
534 txt <- await
535 let len = fromIntegral (T.length txt)
536 if (len >= n)
537 then do
538 yield (T.drop (fromIntegral n) txt)
539 cat
540 else go (n - len)
541 {-# INLINABLE drop #-}
542
543 -- | Take characters until they fail the predicate
544 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
545 takeWhile predicate = go
546 where
547 go = do
548 txt <- await
549 let (prefix, suffix) = T.span predicate txt
550 if (T.null suffix)
551 then do
552 yield txt
553 go
554 else yield prefix
555 {-# INLINABLE takeWhile #-}
556
557 -- | Drop characters until they fail the predicate
558 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
559 dropWhile predicate = go where
560 go = do
561 txt <- await
562 case T.findIndex (not . predicate) txt of
563 Nothing -> go
564 Just i -> do
565 yield (T.drop i txt)
566 cat
567 {-# INLINABLE dropWhile #-}
568
569 -- | Only allows 'Char's to pass if they satisfy the predicate
570 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
571 filter predicate = P.map (T.filter predicate)
572 {-# INLINABLE filter #-}
573
574 {-# RULES "p >-> filter q" forall p q .
575 p >-> filter q = for p (\txt -> yield (T.filter q txt))
576 #-}
577
578 -- | Strict left scan over the characters
579 scan
580 :: (Monad m)
581 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
582 scan step begin = do
583 yield (T.singleton begin)
584 go begin
585 where
586 go c = do
587 txt <- await
588 let txt' = T.scanl step c txt
589 c' = T.last txt'
590 yield (T.tail txt')
591 go c'
592 {-# INLINABLE scan #-}
593
594 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
595 'TL.Text'
596 -}
597 toLazy :: Producer Text Identity () -> TL.Text
598 toLazy = TL.fromChunks . P.toList
599 {-# INLINABLE toLazy #-}
600
601 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
602 'TL.Text'
603
604 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
605 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
606 immediately as they are generated instead of loading them all into memory.
607 -}
608 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
609 toLazyM = liftM TL.fromChunks . P.toListM
610 {-# INLINABLE toLazyM #-}
611
612 -- | Reduce the text stream using a strict left fold over characters
613 foldChars
614 :: Monad m
615 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
616 foldChars step begin done = P.fold (T.foldl' step) begin done
617 {-# INLINABLE foldChars #-}
618
619 -- | Retrieve the first 'Char'
620 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
621 head = go
622 where
623 go p = do
624 x <- nextChar p
625 case x of
626 Left _ -> return Nothing
627 Right (c, _) -> return (Just c)
628 {-# INLINABLE head #-}
629
630 -- | Retrieve the last 'Char'
631 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
632 last = go Nothing
633 where
634 go r p = do
635 x <- next p
636 case x of
637 Left () -> return r
638 Right (txt, p') ->
639 if (T.null txt)
640 then go r p'
641 else go (Just $ T.last txt) p'
642 {-# INLINABLE last #-}
643
644 -- | Determine if the stream is empty
645 null :: (Monad m) => Producer Text m () -> m Bool
646 null = P.all T.null
647 {-# INLINABLE null #-}
648
649 -- | Count the number of characters in the stream
650 length :: (Monad m, Num n) => Producer Text m () -> m n
651 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
652 {-# INLINABLE length #-}
653
654 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
655 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
656 any predicate = P.any (T.any predicate)
657 {-# INLINABLE any #-}
658
659 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
660 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
661 all predicate = P.all (T.all predicate)
662 {-# INLINABLE all #-}
663
664 -- | Return the maximum 'Char' within a text stream
665 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
666 maximum = P.fold step Nothing id
667 where
668 step mc txt =
669 if (T.null txt)
670 then mc
671 else Just $ case mc of
672 Nothing -> T.maximum txt
673 Just c -> max c (T.maximum txt)
674 {-# INLINABLE maximum #-}
675
676 -- | Return the minimum 'Char' within a text stream (surely very useful!)
677 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
678 minimum = P.fold step Nothing id
679 where
680 step mc txt =
681 if (T.null txt)
682 then mc
683 else case mc of
684 Nothing -> Just (T.minimum txt)
685 Just c -> Just (min c (T.minimum txt))
686 {-# INLINABLE minimum #-}
687
688 -- | Find the first element in the stream that matches the predicate
689 find
690 :: (Monad m)
691 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
692 find predicate p = head (p >-> filter predicate)
693 {-# INLINABLE find #-}
694
695 -- | Index into a text stream
696 index
697 :: (Monad m, Integral a)
698 => a-> Producer Text m () -> m (Maybe Char)
699 index n p = head (p >-> drop n)
700 {-# INLINABLE index #-}
701
702
703 -- | Store a tally of how many segments match the given 'Text'
704 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
705 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
706 {-# INLINABLE count #-}
707
708
709 -- | Consume the first character from a stream of 'Text'
710 --
711 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
712 -- succeeds with a 'Right' providing the next character and the remainder of the
713 -- 'Producer'.
714
715 nextChar
716 :: (Monad m)
717 => Producer Text m r
718 -> m (Either r (Char, Producer Text m r))
719 nextChar = go
720 where
721 go p = do
722 x <- next p
723 case x of
724 Left r -> return (Left r)
725 Right (txt, p') -> case (T.uncons txt) of
726 Nothing -> go p'
727 Just (c, txt') -> return (Right (c, yield txt' >> p'))
728 {-# INLINABLE nextChar #-}
729
730 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
731
732 drawChar :: (Monad m) => Parser Text m (Maybe Char)
733 drawChar = do
734 x <- PP.draw
735 case x of
736 Nothing -> return Nothing
737 Just txt -> case (T.uncons txt) of
738 Nothing -> drawChar
739 Just (c, txt') -> do
740 PP.unDraw txt'
741 return (Just c)
742 {-# INLINABLE drawChar #-}
743
744 -- | Push back a 'Char' onto the underlying 'Producer'
745 unDrawChar :: (Monad m) => Char -> Parser Text m ()
746 unDrawChar c = modify (yield (T.singleton c) >>)
747 {-# INLINABLE unDrawChar #-}
748
749 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
750 push the 'Char' back
751
752 > peekChar = do
753 > x <- drawChar
754 > case x of
755 > Left _ -> return ()
756 > Right c -> unDrawChar c
757 > return x
758
759 -}
760
761 peekChar :: (Monad m) => Parser Text m (Maybe Char)
762 peekChar = do
763 x <- drawChar
764 case x of
765 Nothing -> return ()
766 Just c -> unDrawChar c
767 return x
768 {-# INLINABLE peekChar #-}
769
770 {-| Check if the underlying 'Producer' has no more characters
771
772 Note that this will skip over empty 'Text' chunks, unlike
773 'PP.isEndOfInput' from @pipes-parse@, which would consider
774 an empty 'Text' a valid bit of input.
775
776 > isEndOfChars = liftM isLeft peekChar
777 -}
778 isEndOfChars :: (Monad m) => Parser Text m Bool
779 isEndOfChars = do
780 x <- peekChar
781 return (case x of
782 Nothing -> True
783 Just _-> False )
784 {-# INLINABLE isEndOfChars #-}
785
786
787 -- | Splits a 'Producer' after the given number of characters
788 splitAt
789 :: (Monad m, Integral n)
790 => n
791 -> Lens (Producer Text m x)
792 (Producer Text m y)
793 (Producer Text m (Producer Text m x))
794 (Producer Text m (Producer Text m y))
795 splitAt n0 k p0 = fmap join (k (go n0 p0))
796 where
797 go 0 p = return p
798 go n p = do
799 x <- lift (next p)
800 case x of
801 Left r -> return (return r)
802 Right (txt, p') -> do
803 let len = fromIntegral (T.length txt)
804 if (len <= n)
805 then do
806 yield txt
807 go (n - len) p'
808 else do
809 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
810 yield prefix
811 return (yield suffix >> p')
812 {-# INLINABLE splitAt #-}
813
814
815 -- | Split a text stream in two, producing the longest
816 -- consecutive group of characters that satisfies the predicate
817 -- and returning the rest
818
819 span
820 :: (Monad m)
821 => (Char -> Bool)
822 -> Lens (Producer Text m x)
823 (Producer Text m y)
824 (Producer Text m (Producer Text m x))
825 (Producer Text m (Producer Text m y))
826 span predicate k p0 = fmap join (k (go p0))
827 where
828 go p = do
829 x <- lift (next p)
830 case x of
831 Left r -> return (return r)
832 Right (txt, p') -> do
833 let (prefix, suffix) = T.span predicate txt
834 if (T.null suffix)
835 then do
836 yield txt
837 go p'
838 else do
839 yield prefix
840 return (yield suffix >> p')
841 {-# INLINABLE span #-}
842
843 {-| Split a text stream in two, producing the longest
844 consecutive group of characters that don't satisfy the predicate
845 -}
846 break
847 :: (Monad m)
848 => (Char -> Bool)
849 -> Lens (Producer Text m x)
850 (Producer Text m y)
851 (Producer Text m (Producer Text m x))
852 (Producer Text m (Producer Text m y))
853 break predicate = span (not . predicate)
854 {-# INLINABLE break #-}
855
856 {-| Improper lens that splits after the first group of equivalent Chars, as
857 defined by the given equivalence relation
858 -}
859 groupBy
860 :: (Monad m)
861 => (Char -> Char -> Bool)
862 -> Lens (Producer Text m x)
863 (Producer Text m y)
864 (Producer Text m (Producer Text m x))
865 (Producer Text m (Producer Text m y))
866 groupBy equals k p0 = fmap join (k ((go p0))) where
867 go p = do
868 x <- lift (next p)
869 case x of
870 Left r -> return (return r)
871 Right (txt, p') -> case T.uncons txt of
872 Nothing -> go p'
873 Just (c, _) -> (yield txt >> p') ^. span (equals c)
874 {-# INLINABLE groupBy #-}
875
876 -- | Improper lens that splits after the first succession of identical 'Char' s
877 group :: Monad m
878 => Lens'_ (Producer Text m r)
879 (Producer Text m (Producer Text m r))
880 group = groupBy (==)
881 {-# INLINABLE group #-}
882
883 {-| Improper lens that splits a 'Producer' after the first word
884
885 Unlike 'words', this does not drop leading whitespace
886 -}
887 word :: (Monad m)
888 => Lens (Producer Text m x)
889 (Producer Text m y)
890 (Producer Text m (Producer Text m x))
891 (Producer Text m (Producer Text m y))
892 word k p0 = fmap join (k (to p0))
893 where
894 to p = do
895 p' <- p^.span isSpace
896 p'^.break isSpace
897 {-# INLINABLE word #-}
898
899
900 line :: (Monad m)
901 => Lens (Producer Text m x)
902 (Producer Text m y)
903 (Producer Text m (Producer Text m x))
904 (Producer Text m (Producer Text m y))
905 line = break (== '\n')
906
907 {-# INLINABLE line #-}
908
909
910 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
911 intersperse
912 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
913 intersperse c = go0
914 where
915 go0 p = do
916 x <- lift (next p)
917 case x of
918 Left r -> return r
919 Right (txt, p') -> do
920 yield (T.intersperse c txt)
921 go1 p'
922 go1 p = do
923 x <- lift (next p)
924 case x of
925 Left r -> return r
926 Right (txt, p') -> do
927 yield (T.singleton c)
928 yield (T.intersperse c txt)
929 go1 p'
930 {-# INLINABLE intersperse #-}
931
932
933
934 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
935 packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
936 packChars = Data.Profunctor.dimap to (fmap from)
937 where
938 -- to :: Monad m => Producer Char m x -> Producer Text m x
939 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
940
941 step diffAs c = diffAs . (c:)
942
943 done diffAs = T.pack (diffAs [])
944
945 -- from :: Monad m => Producer Text m x -> Producer Char m x
946 from p = for p (each . T.unpack)
947
948 {-# INLINABLE packChars #-}
949
950 defaultChunkSize :: Int
951 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
952
953 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
954 chunksOf
955 :: (Monad m, Integral n)
956 => n -> Lens (Producer Text m x)
957 (Producer Text m y)
958 (FreeT (Producer Text m) m x)
959 (FreeT (Producer Text m) m y)
960 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
961 where
962 go p = do
963 x <- next p
964 return $ case x of
965 Left r -> Pure r
966 Right (txt, p') -> Free $ do
967 p'' <- (yield txt >> p') ^. splitAt n
968 return $ FreeT (go p'')
969 {-# INLINABLE chunksOf #-}
970
971
972 {-| Split a text stream into sub-streams delimited by characters that satisfy the
973 predicate
974 -}
975 splitsWith
976 :: (Monad m)
977 => (Char -> Bool)
978 -> Producer Text m r
979 -> FreeT (Producer Text m) m r
980 splitsWith predicate p0 = FreeT (go0 p0)
981 where
982 go0 p = do
983 x <- next p
984 case x of
985 Left r -> return (Pure r)
986 Right (txt, p') ->
987 if (T.null txt)
988 then go0 p'
989 else return $ Free $ do
990 p'' <- (yield txt >> p') ^. span (not . predicate)
991 return $ FreeT (go1 p'')
992 go1 p = do
993 x <- nextChar p
994 return $ case x of
995 Left r -> Pure r
996 Right (_, p') -> Free $ do
997 p'' <- p' ^. span (not . predicate)
998 return $ FreeT (go1 p'')
999 {-# INLINABLE splitsWith #-}
1000
1001 -- | Split a text stream using the given 'Char' as the delimiter
1002 splits :: (Monad m)
1003 => Char -> Lens (Producer Text m x)
1004 (Producer Text m y)
1005 (FreeT (Producer Text m) m x)
1006 (FreeT (Producer Text m) m y)
1007 splits c k p =
1008 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
1009 {-# INLINABLE splits #-}
1010
1011 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
1012 given equivalence relation
1013 -}
1014 groupsBy
1015 :: Monad m
1016 => (Char -> Char -> Bool)
1017 -> Lens (Producer Text m x)
1018 (Producer Text m y)
1019 (FreeT (Producer Text m) m x)
1020 (FreeT (Producer Text m) m y)
1021 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
1022 go p = do x <- next p
1023 case x of Left r -> return (Pure r)
1024 Right (bs, p') -> case T.uncons bs of
1025 Nothing -> go p'
1026 Just (c, _) -> do return $ Free $ do
1027 p'' <- (yield bs >> p')^.span (equals c)
1028 return $ FreeT (go p'')
1029 {-# INLINABLE groupsBy #-}
1030
1031
1032 -- | Like 'groupsBy', where the equality predicate is ('==')
1033 groups
1034 :: Monad m
1035 => Lens (Producer Text m x)
1036 (Producer Text m y)
1037 (FreeT (Producer Text m) m x)
1038 (FreeT (Producer Text m) m y)
1039 groups = groupsBy (==)
1040 {-# INLINABLE groups #-}
1041
1042
1043
1044 {-| Split a text stream into 'FreeT'-delimited lines
1045 -}
1046 lines
1047 :: (Monad m)
1048 => Lens (Producer Text m x)
1049 (Producer Text m y)
1050 (FreeT (Producer Text m) m x)
1051 (FreeT (Producer Text m) m y)
1052 lines k p = fmap _unlines (k (_lines p))
1053 {-# INLINABLE lines #-}
1054
1055 _lines
1056 :: Monad m
1057 => Producer Text m x -> FreeT (Producer Text m) m x
1058 _lines p0 = FreeT (go0 p0)
1059 where
1060 go0 p = do
1061 x <- next p
1062 case x of
1063 Left r -> return (Pure r)
1064 Right (txt, p') ->
1065 if (T.null txt)
1066 then go0 p'
1067 else return $ Free $ go1 (yield txt >> p')
1068 go1 p = do
1069 p' <- p ^. break ('\n' ==)
1070 return $ FreeT $ do
1071 x <- nextChar p'
1072 case x of
1073 Left r -> return $ Pure r
1074 Right (_, p'') -> go0 p''
1075 {-# INLINABLE _lines #-}
1076
1077 _unlines
1078 :: Monad m
1079 => FreeT (Producer Text m) m x -> Producer Text m x
1080 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1081 {-# INLINABLE _unlines #-}
1082
1083
1084
1085
1086 -- | Split a text stream into 'FreeT'-delimited words
1087 words
1088 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
1089 words = Data.Profunctor.dimap go (fmap _unwords)
1090 where
1091 go p = FreeT $ do
1092 x <- next (p >-> dropWhile isSpace)
1093 return $ case x of
1094 Left r -> Pure r
1095 Right (bs, p') -> Free $ do
1096 p'' <- (yield bs >> p') ^. break isSpace
1097 return (go p'')
1098 _unwords = PG.intercalates (yield $ T.singleton ' ')
1099
1100 {-# INLINABLE words #-}
1101
1102
1103 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1104 interspersing a text stream in between them
1105 -}
1106 intercalate
1107 :: (Monad m)
1108 => Producer Text m ()
1109 -> FreeT (Producer Text m) m r
1110 -> Producer Text m r
1111 intercalate p0 = go0
1112 where
1113 go0 f = do
1114 x <- lift (runFreeT f)
1115 case x of
1116 Pure r -> return r
1117 Free p -> do
1118 f' <- p
1119 go1 f'
1120 go1 f = do
1121 x <- lift (runFreeT f)
1122 case x of
1123 Pure r -> return r
1124 Free p -> do
1125 p0
1126 f' <- p
1127 go1 f'
1128 {-# INLINABLE intercalate #-}
1129
1130 {-| Join 'FreeT'-delimited lines into a text stream
1131 -}
1132 unlines
1133 :: (Monad m)
1134 => Lens (FreeT (Producer Text m) m x)
1135 (FreeT (Producer Text m) m y)
1136 (Producer Text m x)
1137 (Producer Text m y)
1138
1139 unlines k p = fmap _lines (k (_unlines p))
1140 {-# INLINABLE unlines #-}
1141
1142 {-| Join 'FreeT'-delimited words into a text stream
1143 -}
1144 unwords
1145 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1146 unwords = intercalate (yield $ T.singleton ' ')
1147 {-# INLINABLE unwords #-}
1148
1149
1150 {- $reexports
1151
1152 @Data.Text@ re-exports the 'Text' type.
1153
1154 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1155 -}
1156
1157