]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
omit >->/map rules
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3
4 module Pipes.Text (
5 -- * Effectful Text
6 -- $intro
7
8 -- * Lenses
9 -- $lenses
10
11 -- ** @view@ \/ @(^.)@
12 -- $view
13
14 -- ** @over@ \/ @(%~)@
15 -- $over
16
17 -- ** @zoom@
18 -- $zoom
19
20 -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
21 -- $special
22
23 -- * Producers
24 fromLazy
25
26 -- * Pipes
27 , map
28 , concatMap
29 , take
30 , drop
31 , takeWhile
32 , dropWhile
33 , filter
34 , scan
35 , pack
36 , unpack
37 , toCaseFold
38 , toLower
39 , toUpper
40 , stripStart
41
42 -- * Folds
43 , toLazy
44 , toLazyM
45 , foldChars
46 , head
47 , last
48 , null
49 , length
50 , any
51 , all
52 , maximum
53 , minimum
54 , find
55 , index
56 , count
57
58 -- * Primitive Character Parsers
59 , nextChar
60 , drawChar
61 , unDrawChar
62 , peekChar
63 , isEndOfChars
64
65 -- * Parsing Lenses
66 , splitAt
67 , span
68 , break
69 , groupBy
70 , group
71 , word
72 , line
73
74 -- * FreeT Splitters
75 , chunksOf
76 , splitsWith
77 , splits
78 , groupsBy
79 , groups
80 , lines
81 , words
82
83 -- * Transformations
84 , intersperse
85 , packChars
86
87 -- * Joiners
88 , intercalate
89 , unlines
90 , unwords
91
92 -- * Re-exports
93 -- $reexports
94 , module Data.ByteString
95 , module Data.Text
96 , module Data.Profunctor
97 , module Pipes.Parse
98 , module Pipes.Group
99 ) where
100
101 import Control.Applicative ((<*))
102 import Control.Monad (liftM, join)
103 import Control.Monad.Trans.State.Strict (StateT(..), modify)
104 import qualified Data.Text as T
105 import Data.Text (Text)
106 import qualified Data.Text.Lazy as TL
107 import Data.ByteString (ByteString)
108 import Data.Functor.Constant (Constant(Constant, getConstant))
109 import Data.Functor.Identity (Identity)
110 import Data.Profunctor (Profunctor)
111 import qualified Data.Profunctor
112 import Pipes
113 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
114 import qualified Pipes.Group as PG
115 import qualified Pipes.Parse as PP
116 import Pipes.Parse (Parser)
117 import qualified Pipes.Prelude as P
118 import Data.Char (isSpace)
119 import Data.Word (Word8)
120 import Foreign.Storable (sizeOf)
121 import Data.Bits (shiftL)
122 import Prelude hiding (
123 all,
124 any,
125 break,
126 concat,
127 concatMap,
128 drop,
129 dropWhile,
130 elem,
131 filter,
132 head,
133 last,
134 lines,
135 length,
136 map,
137 maximum,
138 minimum,
139 notElem,
140 null,
141 readFile,
142 span,
143 splitAt,
144 take,
145 takeWhile,
146 unlines,
147 unwords,
148 words,
149 writeFile )
150
151 {- $intro
152 This package provides @pipes@ utilities for /text streams/ or /character streams/,
153 realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
154 and thus you will generally want @Data.Text@ in scope. But the type
155 @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
156
157 This particular module provides many functions equivalent in one way or another to
158 the pure functions in
159 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
160 They transform, divide, group and fold text streams. Though @Producer Text m r@
161 is the type of \'effectful Text\', the functions in this module are \'pure\'
162 in the sense that they are uniformly monad-independent.
163 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
164 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
165 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
166
167 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
168 strict Text: the implementation is arranged so that the individual strict 'Text'
169 chunks are kept to a reasonable size; the user is not aware of the divisions
170 between the connected 'Text' chunks.
171 So also here: the functions in this module are designed to operate on streams that
172 are insensitive to text boundaries. This means that they may freely split
173 text into smaller texts and /discard empty texts/. The objective, though, is
174 that they should /never concatenate texts/ in order to provide strict upper
175 bounds on memory usage.
176
177 For example, to stream only the first three lines of 'stdin' to 'stdout' you
178 might write:
179
180 > import Pipes
181 > import qualified Pipes.Text as Text
182 > import qualified Pipes.Text.IO as Text
183 > import Pipes.Group (takes')
184 > import Lens.Family
185 >
186 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
187 > where
188 > takeLines n = Text.unlines . takes' n . view Text.lines
189
190 The above program will never bring more than one chunk of text (~ 32 KB) into
191 memory, no matter how long the lines are.
192
193 -}
194 {- $lenses
195 As this example shows, one superficial difference from @Data.Text.Lazy@
196 is that many of the operations, like 'lines', are \'lensified\'; this has a
197 number of advantages (where it is possible); in particular it facilitates their
198 use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
199 sense.) The disadvantage, famously, is that the messages you get for type errors can be
200 a little alarming. The remarks that follow in this section are for non-lens adepts.
201
202 Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
203 intuitively corresponding function when used with @view@ or @(^.)@. Instead of
204 writing:
205
206 > splitAt 17 producer
207
208 as we would with the Prelude or Text functions, we write
209
210 > view (splitAt 17) producer
211
212 or equivalently
213
214 > producer ^. splitAt 17
215
216 This may seem a little indirect, but note that many equivalents of
217 @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
218 corresponding functions by prefixing them with @(>->)@. Thus something like
219
220 > stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
221
222 would drop the leading white space from each line.
223
224 The lenses in this library are marked as /improper/; this just means that
225 they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
226 Just for this reason, though, the magnificent complexities of the lens libraries
227 are a distraction. The lens combinators to keep in mind, the ones that make sense for
228 our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
229
230 One need only keep in mind that if @l@ is a @Lens' a b@, then:
231
232 -}
233 {- $view
234 @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
235 is the corresponding @b@; as was said above, this function will be exactly the
236 function you think it is, given its name. Thus to uppercase the first n characters
237 of a Producer, leaving the rest the same, we could write:
238
239
240 > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
241 > p'
242 -}
243 {- $over
244 @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
245 @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
246 the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
247 (it can also be written @l %~ f@).
248 For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
249 So above we might have written things like these:
250
251 > stripLines = Text.lines %~ maps (>-> Text.stripStart)
252 > stripLines = over Text.lines (maps (>-> Text.stripStart))
253 > upper n = Text.splitAt n %~ (>-> Text.toUpper)
254
255 -}
256 {- $zoom
257 @zoom l@, finally, is a function from a @Parser b m r@
258 to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
259 Its use is easiest to see with an decoding lens like 'utf8', which
260 \"sees\" a Text producer hidden inside a ByteString producer:
261 @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
262 a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
263 a list of everything produced from a Producer, leaving only the return value; it would
264 usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
265 returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
266 the Text Producer for further parsing. Suppose that we want, inexplicably, to
267 modify the casing of a Text Producer according to any instruction it might
268 contain at the start. Then we might write something like this:
269
270 > obey :: Monad m => Producer Text m b -> Producer Text m b
271 > obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
272 > let seven = T.concat ts
273 > case T.toUpper seven of
274 > "TOUPPER" -> p' >-> Text.toUpper
275 > "TOLOWER" -> p' >-> Text.toLower
276 > _ -> do yield seven
277 > p'
278
279
280 > >>> let doc = each ["toU","pperTh","is document.\n"]
281 > >>> runEffect $ obey doc >-> Text.stdout
282 > THIS DOCUMENT.
283
284 The purpose of exporting lenses is the mental economy achieved with this three-way
285 applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
286 three uses is no more surprising than that a pipe can act as a function modifying
287 the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
288 -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
289 @pipe >-> consumer@
290
291 The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
292 both <http://hackage.haskell.org/package/lens lens> and
293 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
294 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
295 and to some extent in the @Pipes.Text.Encoding@ module here.
296
297 -}
298 {- $special
299 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
300 This is in the types that are most closely associated with our central text type,
301 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
302
303 > splitAt :: Int -> Text -> (Text, Text)
304 > lines :: Text -> [Text]
305 > chunksOf :: Int -> Text -> [Text]
306
307 which relate a Text with a pair of Texts or a list of Texts.
308 The corresponding functions here (taking account of \'lensification\') are
309
310 > view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
311 > view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
312 > view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
313
314 Some of the types may be more readable if you imagine that we have introduced
315 our own type synonyms
316
317 > type Text m r = Producer T.Text m r
318 > type Texts m r = FreeT (Producer T.Text m) m r
319
320 Then we would think of the types above as
321
322 > view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
323 > view lines :: (Monad m) => Text m r -> Texts m r
324 > view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
325
326 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
327
328 In the type @Producer Text m (Producer Text m r)@ the second
329 element of the \'pair\' of effectful Texts cannot simply be retrieved
330 with something like 'snd'. This is an \'effectful\' pair, and one must work
331 through the effects of the first element to arrive at the second Text stream, even
332 if you are proposing to throw the Text in the first element away.
333 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
334
335 > join :: Monad m => Producer Text m (Producer m r) -> Producer m r
336
337 The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
338 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
339 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
340 together things of the forms:
341
342 > r
343 > (Text,r)
344 > (Text, (Text, r))
345 > (Text, (Text, (Text, r)))
346 > (Text, (Text, (Text, (Text, r))))
347 > ...
348
349 (We might also have identified the sum of those types with @Free ((,) Text) r@
350 -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
351
352 Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
353 @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
354
355 > m r
356 > Text m r
357 > Text m (Text m r)
358 > Text m (Text m (Text m r))
359 > Text m (Text m (Text m (Text m r)))
360 > ...
361
362 We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
363 or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
364 result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
365
366 One might think that
367
368 > lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
369 > view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
370
371 should really have the type
372
373 > lines :: Monad m => Pipe Text Text m r
374
375 as e.g. 'toUpper' does. But this would spoil the control we are
376 attempting to maintain over the size of chunks. It is in fact just
377 as unreasonable to want such a pipe as to want
378
379 > Data.Text.Lazy.lines :: Text -> Text
380
381 to 'rechunk' the strict Text chunks inside the lazy Text to respect
382 line boundaries. In fact we have
383
384 > Data.Text.Lazy.lines :: Text -> [Text]
385 > Prelude.lines :: String -> [String]
386
387 where the elements of the list are themselves lazy Texts or Strings; the use
388 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
389
390 The @Pipes.Group@ module, which can generally be imported without qualification,
391 provides many functions for working with things of type @FreeT (Producer a m) m r@.
392 In particular it conveniently exports the constructors for @FreeT@ and the associated
393 @FreeF@ type -- a fancy form of @Either@, namely
394
395 > data FreeF f a b = Pure a | Free (f b)
396
397 for pattern-matching. Consider the implementation of the 'words' function, or
398 of the part of the lens that takes us to the words; it is compact but exhibits many
399 of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
400 constuctors. Keep in mind that
401
402 > newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
403 > next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
404
405 Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
406 the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
407
408 > words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
409 > words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
410 > x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
411 > return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
412 > Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
413 > Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
414 > p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
415 > (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
416 > return (words p'') -- We then carry on with the rest, which is likely to begin with space.
417
418 -}
419
420 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
421 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
422 fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
423 {-# INLINE fromLazy #-}
424
425 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
426 a ^. lens = getConstant (lens Constant a)
427
428 -- | Apply a transformation to each 'Char' in the stream
429 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
430 map f = P.map (T.map f)
431 {-# INLINABLE map #-}
432
433 -- | Map a function over the characters of a text stream and concatenate the results
434 concatMap
435 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
436 concatMap f = P.map (T.concatMap f)
437 {-# INLINABLE concatMap #-}
438
439 -- | Transform a Pipe of 'String's into one of 'Text' chunks
440 pack :: Monad m => Pipe String Text m r
441 pack = P.map T.pack
442 {-# INLINEABLE pack #-}
443
444 -- | Transform a Pipes of 'Text' chunks into one of 'String's
445 unpack :: Monad m => Pipe Text String m r
446 unpack = for cat (\t -> yield (T.unpack t))
447 {-# INLINEABLE unpack #-}
448
449 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
450 -- here acting as 'Text' pipes, rather as they would on a lazy text
451 toCaseFold :: Monad m => Pipe Text Text m r
452 toCaseFold = P.map T.toCaseFold
453 {-# INLINEABLE toCaseFold #-}
454
455 -- | lowercase incoming 'Text'
456 toLower :: Monad m => Pipe Text Text m r
457 toLower = P.map T.toLower
458 {-# INLINEABLE toLower #-}
459
460 -- | uppercase incoming 'Text'
461 toUpper :: Monad m => Pipe Text Text m r
462 toUpper = P.map T.toUpper
463 {-# INLINEABLE toUpper #-}
464
465 -- | Remove leading white space from an incoming succession of 'Text's
466 stripStart :: Monad m => Pipe Text Text m r
467 stripStart = do
468 chunk <- await
469 let text = T.stripStart chunk
470 if T.null text
471 then stripStart
472 else do yield text
473 cat
474 {-# INLINEABLE stripStart #-}
475
476 -- | @(take n)@ only allows @n@ individual characters to pass;
477 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
478 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
479 take n0 = go n0 where
480 go n
481 | n <= 0 = return ()
482 | otherwise = do
483 txt <- await
484 let len = fromIntegral (T.length txt)
485 if (len > n)
486 then yield (T.take (fromIntegral n) txt)
487 else do
488 yield txt
489 go (n - len)
490 {-# INLINABLE take #-}
491
492 -- | @(drop n)@ drops the first @n@ characters
493 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
494 drop n0 = go n0 where
495 go n
496 | n <= 0 = cat
497 | otherwise = do
498 txt <- await
499 let len = fromIntegral (T.length txt)
500 if (len >= n)
501 then do
502 yield (T.drop (fromIntegral n) txt)
503 cat
504 else go (n - len)
505 {-# INLINABLE drop #-}
506
507 -- | Take characters until they fail the predicate
508 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
509 takeWhile predicate = go
510 where
511 go = do
512 txt <- await
513 let (prefix, suffix) = T.span predicate txt
514 if (T.null suffix)
515 then do
516 yield txt
517 go
518 else yield prefix
519 {-# INLINABLE takeWhile #-}
520
521 -- | Drop characters until they fail the predicate
522 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
523 dropWhile predicate = go where
524 go = do
525 txt <- await
526 case T.findIndex (not . predicate) txt of
527 Nothing -> go
528 Just i -> do
529 yield (T.drop i txt)
530 cat
531 {-# INLINABLE dropWhile #-}
532
533 -- | Only allows 'Char's to pass if they satisfy the predicate
534 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
535 filter predicate = P.map (T.filter predicate)
536 {-# INLINABLE filter #-}
537
538 -- | Strict left scan over the characters
539 scan
540 :: (Monad m)
541 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
542 scan step begin = do
543 yield (T.singleton begin)
544 go begin
545 where
546 go c = do
547 txt <- await
548 let txt' = T.scanl step c txt
549 c' = T.last txt'
550 yield (T.tail txt')
551 go c'
552 {-# INLINABLE scan #-}
553
554 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
555 'TL.Text'
556 -}
557 toLazy :: Producer Text Identity () -> TL.Text
558 toLazy = TL.fromChunks . P.toList
559 {-# INLINABLE toLazy #-}
560
561 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
562 'TL.Text'
563
564 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
565 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
566 immediately as they are generated instead of loading them all into memory.
567 -}
568 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
569 toLazyM = liftM TL.fromChunks . P.toListM
570 {-# INLINABLE toLazyM #-}
571
572 -- | Reduce the text stream using a strict left fold over characters
573 foldChars
574 :: Monad m
575 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
576 foldChars step begin done = P.fold (T.foldl' step) begin done
577 {-# INLINABLE foldChars #-}
578
579 -- | Retrieve the first 'Char'
580 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
581 head = go
582 where
583 go p = do
584 x <- nextChar p
585 case x of
586 Left _ -> return Nothing
587 Right (c, _) -> return (Just c)
588 {-# INLINABLE head #-}
589
590 -- | Retrieve the last 'Char'
591 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
592 last = go Nothing
593 where
594 go r p = do
595 x <- next p
596 case x of
597 Left () -> return r
598 Right (txt, p') ->
599 if (T.null txt)
600 then go r p'
601 else go (Just $ T.last txt) p'
602 {-# INLINABLE last #-}
603
604 -- | Determine if the stream is empty
605 null :: (Monad m) => Producer Text m () -> m Bool
606 null = P.all T.null
607 {-# INLINABLE null #-}
608
609 -- | Count the number of characters in the stream
610 length :: (Monad m, Num n) => Producer Text m () -> m n
611 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
612 {-# INLINABLE length #-}
613
614 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
615 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
616 any predicate = P.any (T.any predicate)
617 {-# INLINABLE any #-}
618
619 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
620 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
621 all predicate = P.all (T.all predicate)
622 {-# INLINABLE all #-}
623
624 -- | Return the maximum 'Char' within a text stream
625 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
626 maximum = P.fold step Nothing id
627 where
628 step mc txt =
629 if (T.null txt)
630 then mc
631 else Just $ case mc of
632 Nothing -> T.maximum txt
633 Just c -> max c (T.maximum txt)
634 {-# INLINABLE maximum #-}
635
636 -- | Return the minimum 'Char' within a text stream (surely very useful!)
637 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
638 minimum = P.fold step Nothing id
639 where
640 step mc txt =
641 if (T.null txt)
642 then mc
643 else case mc of
644 Nothing -> Just (T.minimum txt)
645 Just c -> Just (min c (T.minimum txt))
646 {-# INLINABLE minimum #-}
647
648 -- | Find the first element in the stream that matches the predicate
649 find
650 :: (Monad m)
651 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
652 find predicate p = head (p >-> filter predicate)
653 {-# INLINABLE find #-}
654
655 -- | Index into a text stream
656 index
657 :: (Monad m, Integral a)
658 => a-> Producer Text m () -> m (Maybe Char)
659 index n p = head (p >-> drop n)
660 {-# INLINABLE index #-}
661
662
663 -- | Store a tally of how many segments match the given 'Text'
664 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
665 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
666 {-# INLINABLE count #-}
667
668
669 -- | Consume the first character from a stream of 'Text'
670 --
671 -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
672 -- succeeds with a 'Right' providing the next character and the remainder of the
673 -- 'Producer'.
674
675 nextChar
676 :: (Monad m)
677 => Producer Text m r
678 -> m (Either r (Char, Producer Text m r))
679 nextChar = go
680 where
681 go p = do
682 x <- next p
683 case x of
684 Left r -> return (Left r)
685 Right (txt, p') -> case (T.uncons txt) of
686 Nothing -> go p'
687 Just (c, txt') -> return (Right (c, yield txt' >> p'))
688 {-# INLINABLE nextChar #-}
689
690 -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
691
692 drawChar :: (Monad m) => Parser Text m (Maybe Char)
693 drawChar = do
694 x <- PP.draw
695 case x of
696 Nothing -> return Nothing
697 Just txt -> case (T.uncons txt) of
698 Nothing -> drawChar
699 Just (c, txt') -> do
700 PP.unDraw txt'
701 return (Just c)
702 {-# INLINABLE drawChar #-}
703
704 -- | Push back a 'Char' onto the underlying 'Producer'
705 unDrawChar :: (Monad m) => Char -> Parser Text m ()
706 unDrawChar c = modify (yield (T.singleton c) >>)
707 {-# INLINABLE unDrawChar #-}
708
709 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
710 push the 'Char' back
711
712 > peekChar = do
713 > x <- drawChar
714 > case x of
715 > Left _ -> return ()
716 > Right c -> unDrawChar c
717 > return x
718
719 -}
720
721 peekChar :: (Monad m) => Parser Text m (Maybe Char)
722 peekChar = do
723 x <- drawChar
724 case x of
725 Nothing -> return ()
726 Just c -> unDrawChar c
727 return x
728 {-# INLINABLE peekChar #-}
729
730 {-| Check if the underlying 'Producer' has no more characters
731
732 Note that this will skip over empty 'Text' chunks, unlike
733 'PP.isEndOfInput' from @pipes-parse@, which would consider
734 an empty 'Text' a valid bit of input.
735
736 > isEndOfChars = liftM isLeft peekChar
737 -}
738 isEndOfChars :: (Monad m) => Parser Text m Bool
739 isEndOfChars = do
740 x <- peekChar
741 return (case x of
742 Nothing -> True
743 Just _-> False )
744 {-# INLINABLE isEndOfChars #-}
745
746
747 -- | Splits a 'Producer' after the given number of characters
748 splitAt
749 :: (Monad m, Integral n)
750 => n
751 -> Lens' (Producer Text m r)
752 (Producer Text m (Producer Text m r))
753 splitAt n0 k p0 = fmap join (k (go n0 p0))
754 where
755 go 0 p = return p
756 go n p = do
757 x <- lift (next p)
758 case x of
759 Left r -> return (return r)
760 Right (txt, p') -> do
761 let len = fromIntegral (T.length txt)
762 if (len <= n)
763 then do
764 yield txt
765 go (n - len) p'
766 else do
767 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
768 yield prefix
769 return (yield suffix >> p')
770 {-# INLINABLE splitAt #-}
771
772
773 -- | Split a text stream in two, producing the longest
774 -- consecutive group of characters that satisfies the predicate
775 -- and returning the rest
776
777 span
778 :: (Monad m)
779 => (Char -> Bool)
780 -> Lens' (Producer Text m r)
781 (Producer Text m (Producer Text m r))
782 span predicate k p0 = fmap join (k (go p0))
783 where
784 go p = do
785 x <- lift (next p)
786 case x of
787 Left r -> return (return r)
788 Right (txt, p') -> do
789 let (prefix, suffix) = T.span predicate txt
790 if (T.null suffix)
791 then do
792 yield txt
793 go p'
794 else do
795 yield prefix
796 return (yield suffix >> p')
797 {-# INLINABLE span #-}
798
799 {-| Split a text stream in two, producing the longest
800 consecutive group of characters that don't satisfy the predicate
801 -}
802 break
803 :: (Monad m)
804 => (Char -> Bool)
805 -> Lens' (Producer Text m r)
806 (Producer Text m (Producer Text m r))
807 break predicate = span (not . predicate)
808 {-# INLINABLE break #-}
809
810 {-| Improper lens that splits after the first group of equivalent Chars, as
811 defined by the given equivalence relation
812 -}
813 groupBy
814 :: (Monad m)
815 => (Char -> Char -> Bool)
816 -> Lens' (Producer Text m r)
817 (Producer Text m (Producer Text m r))
818 groupBy equals k p0 = fmap join (k ((go p0))) where
819 go p = do
820 x <- lift (next p)
821 case x of
822 Left r -> return (return r)
823 Right (txt, p') -> case T.uncons txt of
824 Nothing -> go p'
825 Just (c, _) -> (yield txt >> p') ^. span (equals c)
826 {-# INLINABLE groupBy #-}
827
828 -- | Improper lens that splits after the first succession of identical 'Char' s
829 group :: Monad m
830 => Lens' (Producer Text m r)
831 (Producer Text m (Producer Text m r))
832 group = groupBy (==)
833 {-# INLINABLE group #-}
834
835 {-| Improper lens that splits a 'Producer' after the first word
836
837 Unlike 'words', this does not drop leading whitespace
838 -}
839 word :: (Monad m)
840 => Lens' (Producer Text m r)
841 (Producer Text m (Producer Text m r))
842 word k p0 = fmap join (k (to p0))
843 where
844 to p = do
845 p' <- p^.span isSpace
846 p'^.break isSpace
847 {-# INLINABLE word #-}
848
849
850 line :: (Monad m)
851 => Lens' (Producer Text m r)
852 (Producer Text m (Producer Text m r))
853 line = break (== '\n')
854
855 {-# INLINABLE line #-}
856
857
858 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
859 intersperse
860 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
861 intersperse c = go0
862 where
863 go0 p = do
864 x <- lift (next p)
865 case x of
866 Left r -> return r
867 Right (txt, p') -> do
868 yield (T.intersperse c txt)
869 go1 p'
870 go1 p = do
871 x <- lift (next p)
872 case x of
873 Left r -> return r
874 Right (txt, p') -> do
875 yield (T.singleton c)
876 yield (T.intersperse c txt)
877 go1 p'
878 {-# INLINABLE intersperse #-}
879
880
881 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
882 packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
883 packChars = Data.Profunctor.dimap to (fmap from)
884 where
885 -- to :: Monad m => Producer Char m x -> Producer Text m x
886 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
887
888 step diffAs c = diffAs . (c:)
889
890 done diffAs = T.pack (diffAs [])
891
892 -- from :: Monad m => Producer Text m x -> Producer Char m x
893 from p = for p (each . T.unpack)
894 {-# INLINABLE packChars #-}
895
896 defaultChunkSize :: Int
897 defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
898
899 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
900 chunksOf
901 :: (Monad m, Integral n)
902 => n -> Lens' (Producer Text m r)
903 (FreeT (Producer Text m) m r)
904 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
905 where
906 go p = do
907 x <- next p
908 return $ case x of
909 Left r -> Pure r
910 Right (txt, p') -> Free $ do
911 p'' <- (yield txt >> p') ^. splitAt n
912 return $ FreeT (go p'')
913 {-# INLINABLE chunksOf #-}
914
915
916 {-| Split a text stream into sub-streams delimited by characters that satisfy the
917 predicate
918 -}
919 splitsWith
920 :: (Monad m)
921 => (Char -> Bool)
922 -> Producer Text m r
923 -> FreeT (Producer Text m) m r
924 splitsWith predicate p0 = FreeT (go0 p0)
925 where
926 go0 p = do
927 x <- next p
928 case x of
929 Left r -> return (Pure r)
930 Right (txt, p') ->
931 if (T.null txt)
932 then go0 p'
933 else return $ Free $ do
934 p'' <- (yield txt >> p') ^. span (not . predicate)
935 return $ FreeT (go1 p'')
936 go1 p = do
937 x <- nextChar p
938 return $ case x of
939 Left r -> Pure r
940 Right (_, p') -> Free $ do
941 p'' <- p' ^. span (not . predicate)
942 return $ FreeT (go1 p'')
943 {-# INLINABLE splitsWith #-}
944
945 -- | Split a text stream using the given 'Char' as the delimiter
946 splits :: (Monad m)
947 => Char
948 -> Lens' (Producer Text m r)
949 (FreeT (Producer Text m) m r)
950 splits c k p =
951 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
952 {-# INLINABLE splits #-}
953
954 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
955 given equivalence relation
956 -}
957 groupsBy
958 :: Monad m
959 => (Char -> Char -> Bool)
960 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
961 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
962 go p = do x <- next p
963 case x of Left r -> return (Pure r)
964 Right (bs, p') -> case T.uncons bs of
965 Nothing -> go p'
966 Just (c, _) -> do return $ Free $ do
967 p'' <- (yield bs >> p')^.span (equals c)
968 return $ FreeT (go p'')
969 {-# INLINABLE groupsBy #-}
970
971
972 -- | Like 'groupsBy', where the equality predicate is ('==')
973 groups
974 :: Monad m
975 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
976 groups = groupsBy (==)
977 {-# INLINABLE groups #-}
978
979
980
981 {-| Split a text stream into 'FreeT'-delimited lines
982 -}
983 lines
984 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
985 lines = Data.Profunctor.dimap _lines (fmap _unlines)
986 where
987 _lines p0 = FreeT (go0 p0)
988 where
989 go0 p = do
990 x <- next p
991 case x of
992 Left r -> return (Pure r)
993 Right (txt, p') ->
994 if (T.null txt)
995 then go0 p'
996 else return $ Free $ go1 (yield txt >> p')
997 go1 p = do
998 p' <- p ^. break ('\n' ==)
999 return $ FreeT $ do
1000 x <- nextChar p'
1001 case x of
1002 Left r -> return $ Pure r
1003 Right (_, p'') -> go0 p''
1004 -- _unlines
1005 -- :: Monad m
1006 -- => FreeT (Producer Text m) m x -> Producer Text m x
1007 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1008
1009 {-# INLINABLE lines #-}
1010
1011
1012 -- | Split a text stream into 'FreeT'-delimited words
1013 words
1014 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
1015 words = Data.Profunctor.dimap go (fmap _unwords)
1016 where
1017 go p = FreeT $ do
1018 x <- next (p >-> dropWhile isSpace)
1019 return $ case x of
1020 Left r -> Pure r
1021 Right (bs, p') -> Free $ do
1022 p'' <- (yield bs >> p') ^. break isSpace
1023 return (go p'')
1024 _unwords = PG.intercalates (yield $ T.singleton ' ')
1025
1026 {-# INLINABLE words #-}
1027
1028
1029 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1030 interspersing a text stream in between them
1031 -}
1032 intercalate
1033 :: (Monad m)
1034 => Producer Text m ()
1035 -> FreeT (Producer Text m) m r
1036 -> Producer Text m r
1037 intercalate p0 = go0
1038 where
1039 go0 f = do
1040 x <- lift (runFreeT f)
1041 case x of
1042 Pure r -> return r
1043 Free p -> do
1044 f' <- p
1045 go1 f'
1046 go1 f = do
1047 x <- lift (runFreeT f)
1048 case x of
1049 Pure r -> return r
1050 Free p -> do
1051 p0
1052 f' <- p
1053 go1 f'
1054 {-# INLINABLE intercalate #-}
1055
1056 {-| Join 'FreeT'-delimited lines into a text stream
1057 -}
1058 unlines
1059 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1060 unlines = go
1061 where
1062 go f = do
1063 x <- lift (runFreeT f)
1064 case x of
1065 Pure r -> return r
1066 Free p -> do
1067 f' <- p
1068 yield $ T.singleton '\n'
1069 go f'
1070 {-# INLINABLE unlines #-}
1071
1072 {-| Join 'FreeT'-delimited words into a text stream
1073 -}
1074 unwords
1075 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1076 unwords = intercalate (yield $ T.singleton ' ')
1077 {-# INLINABLE unwords #-}
1078
1079
1080 {- $reexports
1081
1082 @Data.Text@ re-exports the 'Text' type.
1083
1084 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1085 -}
1086
1087
1088 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)