]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
started generalizing lenses
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
9667f797
GG
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
91727d11 3
7faef8bc 4module Pipes.Text (
82fe661b 5 -- * Effectful Text
e4b6dc67 6 -- $intro
7
82fe661b 8 -- * Lenses
9 -- $lenses
10
11 -- ** @view@ \/ @(^.)@
12 -- $view
13
14 -- ** @over@ \/ @(%~)@
15 -- $over
16
17 -- ** @zoom@
18 -- $zoom
19
20 -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
21 -- $special
22
91727d11 23 -- * Producers
1a83ae4e 24 fromLazy
91727d11 25
26 -- * Pipes
1677dc12 27 , map
28 , concatMap
29 , take
30 , drop
31 , takeWhile
32 , dropWhile
33 , filter
34 , scan
1677dc12 35 , pack
36 , unpack
37 , toCaseFold
38 , toLower
39 , toUpper
40 , stripStart
91727d11 41
42 -- * Folds
1677dc12 43 , toLazy
44 , toLazyM
45 , foldChars
46 , head
47 , last
48 , null
49 , length
50 , any
51 , all
52 , maximum
53 , minimum
54 , find
55 , index
56 , count
57
58 -- * Primitive Character Parsers
1677dc12 59 , nextChar
60 , drawChar
61 , unDrawChar
62 , peekChar
9e9bb0ce 63 , isEndOfChars
1677dc12 64
65 -- * Parsing Lenses
9e9bb0ce 66 , splitAt
1677dc12 67 , span
68 , break
69 , groupBy
70 , group
9e9bb0ce 71 , word
72 , line
1677dc12 73
74 -- * FreeT Splitters
75 , chunksOf
76 , splitsWith
0f8c6f1b 77 , splits
1a83ae4e 78 , groupsBy
79 , groups
1677dc12 80 , lines
81 , words
82
91727d11 83 -- * Transformations
1677dc12 84 , intersperse
9e9bb0ce 85 , packChars
31f41a5d 86
91727d11 87 -- * Joiners
1677dc12 88 , intercalate
89 , unlines
90 , unwords
9e9bb0ce 91
1a83ae4e 92 -- * Re-exports
91727d11 93 -- $reexports
1677dc12 94 , module Data.ByteString
95 , module Data.Text
96 , module Data.Profunctor
1677dc12 97 , module Pipes.Parse
7ed76745 98 , module Pipes.Group
91727d11 99 ) where
100
0f8c6f1b 101import Control.Applicative ((<*))
70125641 102import Control.Monad (liftM, join)
9e9bb0ce 103import Control.Monad.Trans.State.Strict (StateT(..), modify)
91727d11 104import qualified Data.Text as T
91727d11 105import Data.Text (Text)
106import qualified Data.Text.Lazy as TL
31f41a5d 107import Data.ByteString (ByteString)
1677dc12 108import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 109import Data.Functor.Identity (Identity)
1677dc12 110import Data.Profunctor (Profunctor)
111import qualified Data.Profunctor
91727d11 112import Pipes
7fc48f7c 113import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 114import qualified Pipes.Group as PG
91727d11 115import qualified Pipes.Parse as PP
7ed76745 116import Pipes.Parse (Parser)
79917d53 117import Pipes.Text.Encoding (Lens'_, Iso'_)
91727d11 118import qualified Pipes.Prelude as P
91727d11 119import Data.Char (isSpace)
1a83ae4e 120import Data.Word (Word8)
79917d53 121import Foreign.Storable (sizeOf)
122import Data.Bits (shiftL)
91727d11 123import Prelude hiding (
124 all,
125 any,
126 break,
127 concat,
128 concatMap,
129 drop,
130 dropWhile,
131 elem,
132 filter,
133 head,
134 last,
135 lines,
136 length,
137 map,
138 maximum,
139 minimum,
140 notElem,
141 null,
142 readFile,
143 span,
144 splitAt,
145 take,
146 takeWhile,
147 unlines,
148 unwords,
149 words,
150 writeFile )
151
e4b6dc67 152{- $intro
e1ed9621 153 This package provides @pipes@ utilities for /text streams/ or /character streams/,
154 realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
155 and thus you will generally want @Data.Text@ in scope. But the type
156 @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
e4b6dc67 157
158 This particular module provides many functions equivalent in one way or another to
159 the pure functions in
160 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
161 They transform, divide, group and fold text streams. Though @Producer Text m r@
162 is the type of \'effectful Text\', the functions in this module are \'pure\'
163 in the sense that they are uniformly monad-independent.
164 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
165 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
166 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
167
168 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
169 strict Text: the implementation is arranged so that the individual strict 'Text'
170 chunks are kept to a reasonable size; the user is not aware of the divisions
171 between the connected 'Text' chunks.
172 So also here: the functions in this module are designed to operate on streams that
173 are insensitive to text boundaries. This means that they may freely split
174 text into smaller texts and /discard empty texts/. The objective, though, is
175 that they should /never concatenate texts/ in order to provide strict upper
176 bounds on memory usage.
177
178 For example, to stream only the first three lines of 'stdin' to 'stdout' you
179 might write:
180
181> import Pipes
182> import qualified Pipes.Text as Text
183> import qualified Pipes.Text.IO as Text
184> import Pipes.Group (takes')
185> import Lens.Family
186>
187> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
188> where
189> takeLines n = Text.unlines . takes' n . view Text.lines
190
191 The above program will never bring more than one chunk of text (~ 32 KB) into
192 memory, no matter how long the lines are.
193
82fe661b 194-}
195{- $lenses
e4b6dc67 196 As this example shows, one superficial difference from @Data.Text.Lazy@
80a490ef 197 is that many of the operations, like 'lines', are \'lensified\'; this has a
198 number of advantages (where it is possible); in particular it facilitates their
199 use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
200 sense.) The disadvantage, famously, is that the messages you get for type errors can be
201 a little alarming. The remarks that follow in this section are for non-lens adepts.
202
203 Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
204 intuitively corresponding function when used with @view@ or @(^.)@. Instead of
205 writing:
206
207 > splitAt 17 producer
208
209 as we would with the Prelude or Text functions, we write
210
211 > view (splitAt 17) producer
212
82fe661b 213 or equivalently
80a490ef 214
215 > producer ^. splitAt 17
e4b6dc67 216
80a490ef 217 This may seem a little indirect, but note that many equivalents of
218 @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
219 corresponding functions by prefixing them with @(>->)@. Thus something like
e4b6dc67 220
221> stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
222
223 would drop the leading white space from each line.
224
80a490ef 225 The lenses in this library are marked as /improper/; this just means that
e1ed9621 226 they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
80a490ef 227 Just for this reason, though, the magnificent complexities of the lens libraries
228 are a distraction. The lens combinators to keep in mind, the ones that make sense for
229 our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
230
79917d53 231 One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
82fe661b 232
233-}
234{- $view
235 @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
80a490ef 236 is the corresponding @b@; as was said above, this function will be exactly the
237 function you think it is, given its name. Thus to uppercase the first n characters
238 of a Producer, leaving the rest the same, we could write:
239
240
241 > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
242 > p'
82fe661b 243-}
244{- $over
245 @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
80a490ef 246 @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
247 the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
248 (it can also be written @l %~ f@).
249 For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
250 So above we might have written things like these:
251
252 > stripLines = Text.lines %~ maps (>-> Text.stripStart)
253 > stripLines = over Text.lines (maps (>-> Text.stripStart))
254 > upper n = Text.splitAt n %~ (>-> Text.toUpper)
82fe661b 255
256-}
257{- $zoom
258 @zoom l@, finally, is a function from a @Parser b m r@
80a490ef 259 to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
260 Its use is easiest to see with an decoding lens like 'utf8', which
261 \"sees\" a Text producer hidden inside a ByteString producer:
262 @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
263 a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
264 a list of everything produced from a Producer, leaving only the return value; it would
265 usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
266 returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
267 the Text Producer for further parsing. Suppose that we want, inexplicably, to
268 modify the casing of a Text Producer according to any instruction it might
269 contain at the start. Then we might write something like this:
270
271> obey :: Monad m => Producer Text m b -> Producer Text m b
e1ed9621 272> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
80a490ef 273> let seven = T.concat ts
274> case T.toUpper seven of
275> "TOUPPER" -> p' >-> Text.toUpper
276> "TOLOWER" -> p' >-> Text.toLower
277> _ -> do yield seven
278> p'
279
e1ed9621 280
281> >>> let doc = each ["toU","pperTh","is document.\n"]
282> >>> runEffect $ obey doc >-> Text.stdout
283> THIS DOCUMENT.
284
80a490ef 285 The purpose of exporting lenses is the mental economy achieved with this three-way
286 applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
287 three uses is no more surprising than that a pipe can act as a function modifying
288 the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
289 -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
290 @pipe >-> consumer@
291
292 The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
293 both <http://hackage.haskell.org/package/lens lens> and
e4b6dc67 294 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
295 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
80a490ef 296 and to some extent in the @Pipes.Text.Encoding@ module here.
e4b6dc67 297
82fe661b 298-}
299{- $special
e4b6dc67 300 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
301 This is in the types that are most closely associated with our central text type,
302 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
303
304> splitAt :: Int -> Text -> (Text, Text)
305> lines :: Text -> [Text]
306> chunksOf :: Int -> Text -> [Text]
307
308 which relate a Text with a pair of Texts or a list of Texts.
309 The corresponding functions here (taking account of \'lensification\') are
310
311> view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
312> view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
313> view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
314
315 Some of the types may be more readable if you imagine that we have introduced
316 our own type synonyms
317
318> type Text m r = Producer T.Text m r
319> type Texts m r = FreeT (Producer T.Text m) m r
320
321 Then we would think of the types above as
322
323> view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
324> view lines :: (Monad m) => Text m r -> Texts m r
325> view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
326
327 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
328
329 In the type @Producer Text m (Producer Text m r)@ the second
330 element of the \'pair\' of effectful Texts cannot simply be retrieved
331 with something like 'snd'. This is an \'effectful\' pair, and one must work
332 through the effects of the first element to arrive at the second Text stream, even
333 if you are proposing to throw the Text in the first element away.
334 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
335
336> join :: Monad m => Producer Text m (Producer m r) -> Producer m r
337
e1ed9621 338 The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
e4b6dc67 339 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
340 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
341 together things of the forms:
342
343> r
344> (Text,r)
345> (Text, (Text, r))
346> (Text, (Text, (Text, r)))
347> (Text, (Text, (Text, (Text, r))))
348> ...
349
80a490ef 350 (We might also have identified the sum of those types with @Free ((,) Text) r@
351 -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
352
353 Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
354 @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
e4b6dc67 355
356> m r
80a490ef 357> Text m r
358> Text m (Text m r)
359> Text m (Text m (Text m r))
360> Text m (Text m (Text m (Text m r)))
e4b6dc67 361> ...
362
80a490ef 363 We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
364 or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
365 result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
366
e4b6dc67 367 One might think that
368
79917d53 369> lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
e4b6dc67 370> view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
371
372 should really have the type
373
374> lines :: Monad m => Pipe Text Text m r
375
376 as e.g. 'toUpper' does. But this would spoil the control we are
377 attempting to maintain over the size of chunks. It is in fact just
378 as unreasonable to want such a pipe as to want
379
380> Data.Text.Lazy.lines :: Text -> Text
381
382 to 'rechunk' the strict Text chunks inside the lazy Text to respect
383 line boundaries. In fact we have
384
385> Data.Text.Lazy.lines :: Text -> [Text]
386> Prelude.lines :: String -> [String]
387
388 where the elements of the list are themselves lazy Texts or Strings; the use
389 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
390
391 The @Pipes.Group@ module, which can generally be imported without qualification,
e1ed9621 392 provides many functions for working with things of type @FreeT (Producer a m) m r@.
80a490ef 393 In particular it conveniently exports the constructors for @FreeT@ and the associated
394 @FreeF@ type -- a fancy form of @Either@, namely
e4b6dc67 395
80a490ef 396> data FreeF f a b = Pure a | Free (f b)
397
398 for pattern-matching. Consider the implementation of the 'words' function, or
399 of the part of the lens that takes us to the words; it is compact but exhibits many
400 of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
401 constuctors. Keep in mind that
402
403> newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
404> next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
405
406 Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
407 the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
408
409> words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
410> words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
411> x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
412> return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
413> Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
414> Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
415> p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
416> (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
417> return (words p'') -- We then carry on with the rest, which is likely to begin with space.
418
419-}
e4b6dc67 420
c3c19f9b 421type Lens s t a b = forall f . Functor f => (a -> f b) -> (s -> f t)
422
423
424(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
425a ^. lens = getConstant (lens Constant a)
426
91727d11 427-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
428fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
79917d53 429fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 430{-# INLINE fromLazy #-}
91727d11 431
1677dc12 432
1677dc12 433
91727d11 434-- | Apply a transformation to each 'Char' in the stream
435map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
436map f = P.map (T.map f)
437{-# INLINABLE map #-}
438
ff38b9f0 439{-# RULES "p >-> map f" forall p f .
440 p >-> map f = for p (\txt -> yield (T.map f txt))
441 #-}
442
31f41a5d 443-- | Map a function over the characters of a text stream and concatenate the results
91727d11 444concatMap
445 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
446concatMap f = P.map (T.concatMap f)
447{-# INLINABLE concatMap #-}
448
ff38b9f0 449{-# RULES "p >-> concatMap f" forall p f .
450 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
451 #-}
7faef8bc 452
ff38b9f0 453
c0343bc9 454-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 455pack :: Monad m => Pipe String Text m r
456pack = P.map T.pack
457{-# INLINEABLE pack #-}
458
ff38b9f0 459{-# RULES "p >-> pack" forall p .
460 p >-> pack = for p (\txt -> yield (T.pack txt))
461 #-}
462
463-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 464unpack :: Monad m => Pipe Text String m r
d4732515 465unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 466{-# INLINEABLE unpack #-}
467
ff38b9f0 468{-# RULES "p >-> unpack" forall p .
469 p >-> unpack = for p (\txt -> yield (T.unpack txt))
470 #-}
d4732515 471
b0d86a59 472-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
473-- here acting as 'Text' pipes, rather as they would on a lazy text
a4913c42 474toCaseFold :: Monad m => Pipe Text Text m r
7faef8bc 475toCaseFold = P.map T.toCaseFold
476{-# INLINEABLE toCaseFold #-}
477
ff38b9f0 478{-# RULES "p >-> toCaseFold" forall p .
479 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
480 #-}
481
482
c0343bc9 483-- | lowercase incoming 'Text'
a4913c42 484toLower :: Monad m => Pipe Text Text m r
7faef8bc 485toLower = P.map T.toLower
486{-# INLINEABLE toLower #-}
487
ff38b9f0 488{-# RULES "p >-> toLower" forall p .
489 p >-> toLower = for p (\txt -> yield (T.toLower txt))
490 #-}
491
c0343bc9 492-- | uppercase incoming 'Text'
c70edb9d 493toUpper :: Monad m => Pipe Text Text m r
7faef8bc 494toUpper = P.map T.toUpper
495{-# INLINEABLE toUpper #-}
496
ff38b9f0 497{-# RULES "p >-> toUpper" forall p .
498 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
499 #-}
500
c0343bc9 501-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 502stripStart :: Monad m => Pipe Text Text m r
503stripStart = do
504 chunk <- await
505 let text = T.stripStart chunk
506 if T.null text
507 then stripStart
b0d86a59 508 else do yield text
509 cat
7faef8bc 510{-# INLINEABLE stripStart #-}
511
31f41a5d 512-- | @(take n)@ only allows @n@ individual characters to pass;
513-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 514take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
515take n0 = go n0 where
516 go n
517 | n <= 0 = return ()
518 | otherwise = do
31f41a5d 519 txt <- await
520 let len = fromIntegral (T.length txt)
91727d11 521 if (len > n)
31f41a5d 522 then yield (T.take (fromIntegral n) txt)
91727d11 523 else do
31f41a5d 524 yield txt
91727d11 525 go (n - len)
526{-# INLINABLE take #-}
527
31f41a5d 528-- | @(drop n)@ drops the first @n@ characters
91727d11 529drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
530drop n0 = go n0 where
531 go n
532 | n <= 0 = cat
533 | otherwise = do
31f41a5d 534 txt <- await
535 let len = fromIntegral (T.length txt)
91727d11 536 if (len >= n)
537 then do
31f41a5d 538 yield (T.drop (fromIntegral n) txt)
91727d11 539 cat
540 else go (n - len)
541{-# INLINABLE drop #-}
542
31f41a5d 543-- | Take characters until they fail the predicate
91727d11 544takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
545takeWhile predicate = go
546 where
547 go = do
31f41a5d 548 txt <- await
549 let (prefix, suffix) = T.span predicate txt
91727d11 550 if (T.null suffix)
551 then do
31f41a5d 552 yield txt
91727d11 553 go
554 else yield prefix
555{-# INLINABLE takeWhile #-}
556
31f41a5d 557-- | Drop characters until they fail the predicate
91727d11 558dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
559dropWhile predicate = go where
560 go = do
31f41a5d 561 txt <- await
562 case T.findIndex (not . predicate) txt of
91727d11 563 Nothing -> go
564 Just i -> do
31f41a5d 565 yield (T.drop i txt)
91727d11 566 cat
567{-# INLINABLE dropWhile #-}
568
569-- | Only allows 'Char's to pass if they satisfy the predicate
570filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
571filter predicate = P.map (T.filter predicate)
572{-# INLINABLE filter #-}
573
ff38b9f0 574{-# RULES "p >-> filter q" forall p q .
575 p >-> filter q = for p (\txt -> yield (T.filter q txt))
576 #-}
577
31f41a5d 578-- | Strict left scan over the characters
91727d11 579scan
580 :: (Monad m)
581 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
11645cdc
GG
582scan step begin = do
583 yield (T.singleton begin)
584 go begin
91727d11 585 where
31f41a5d 586 go c = do
587 txt <- await
588 let txt' = T.scanl step c txt
589 c' = T.last txt'
11645cdc 590 yield (T.tail txt')
31f41a5d 591 go c'
91727d11 592{-# INLINABLE scan #-}
593
594{-| Fold a pure 'Producer' of strict 'Text's into a lazy
595 'TL.Text'
596-}
597toLazy :: Producer Text Identity () -> TL.Text
598toLazy = TL.fromChunks . P.toList
599{-# INLINABLE toLazy #-}
600
601{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
602 'TL.Text'
603
604 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
605 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
606 immediately as they are generated instead of loading them all into memory.
607-}
608toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
609toLazyM = liftM TL.fromChunks . P.toListM
610{-# INLINABLE toLazyM #-}
611
31f41a5d 612-- | Reduce the text stream using a strict left fold over characters
64e03122 613foldChars
91727d11 614 :: Monad m
615 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 616foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 617{-# INLINABLE foldChars #-}
91727d11 618
619-- | Retrieve the first 'Char'
620head :: (Monad m) => Producer Text m () -> m (Maybe Char)
621head = go
622 where
623 go p = do
624 x <- nextChar p
625 case x of
626 Left _ -> return Nothing
31f41a5d 627 Right (c, _) -> return (Just c)
91727d11 628{-# INLINABLE head #-}
629
630-- | Retrieve the last 'Char'
631last :: (Monad m) => Producer Text m () -> m (Maybe Char)
632last = go Nothing
633 where
634 go r p = do
635 x <- next p
636 case x of
637 Left () -> return r
31f41a5d 638 Right (txt, p') ->
639 if (T.null txt)
91727d11 640 then go r p'
31f41a5d 641 else go (Just $ T.last txt) p'
91727d11 642{-# INLINABLE last #-}
643
644-- | Determine if the stream is empty
645null :: (Monad m) => Producer Text m () -> m Bool
646null = P.all T.null
647{-# INLINABLE null #-}
648
62e8521c 649-- | Count the number of characters in the stream
91727d11 650length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 651length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 652{-# INLINABLE length #-}
653
654-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
655any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
656any predicate = P.any (T.any predicate)
657{-# INLINABLE any #-}
658
659-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
660all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
661all predicate = P.all (T.all predicate)
662{-# INLINABLE all #-}
663
62e8521c 664-- | Return the maximum 'Char' within a text stream
91727d11 665maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
666maximum = P.fold step Nothing id
667 where
31f41a5d 668 step mc txt =
669 if (T.null txt)
670 then mc
671 else Just $ case mc of
672 Nothing -> T.maximum txt
673 Just c -> max c (T.maximum txt)
91727d11 674{-# INLINABLE maximum #-}
675
62e8521c 676-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 677minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
678minimum = P.fold step Nothing id
679 where
31f41a5d 680 step mc txt =
681 if (T.null txt)
682 then mc
683 else case mc of
684 Nothing -> Just (T.minimum txt)
685 Just c -> Just (min c (T.minimum txt))
91727d11 686{-# INLINABLE minimum #-}
687
91727d11 688-- | Find the first element in the stream that matches the predicate
689find
690 :: (Monad m)
691 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
692find predicate p = head (p >-> filter predicate)
693{-# INLINABLE find #-}
694
62e8521c 695-- | Index into a text stream
91727d11 696index
697 :: (Monad m, Integral a)
698 => a-> Producer Text m () -> m (Maybe Char)
699index n p = head (p >-> drop n)
700{-# INLINABLE index #-}
701
63ea9ffd 702
31f41a5d 703-- | Store a tally of how many segments match the given 'Text'
704count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
705count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
706{-# INLINABLE count #-}
707
9e9bb0ce 708
1a83ae4e 709-- | Consume the first character from a stream of 'Text'
710--
711-- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
712-- succeeds with a 'Right' providing the next character and the remainder of the
713-- 'Producer'.
9e9bb0ce 714
9e9bb0ce 715nextChar
716 :: (Monad m)
717 => Producer Text m r
718 -> m (Either r (Char, Producer Text m r))
719nextChar = go
720 where
721 go p = do
722 x <- next p
723 case x of
724 Left r -> return (Left r)
725 Right (txt, p') -> case (T.uncons txt) of
726 Nothing -> go p'
727 Just (c, txt') -> return (Right (c, yield txt' >> p'))
728{-# INLINABLE nextChar #-}
729
1a83ae4e 730-- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
731
9e9bb0ce 732drawChar :: (Monad m) => Parser Text m (Maybe Char)
733drawChar = do
734 x <- PP.draw
735 case x of
736 Nothing -> return Nothing
737 Just txt -> case (T.uncons txt) of
738 Nothing -> drawChar
739 Just (c, txt') -> do
740 PP.unDraw txt'
741 return (Just c)
742{-# INLINABLE drawChar #-}
743
744-- | Push back a 'Char' onto the underlying 'Producer'
745unDrawChar :: (Monad m) => Char -> Parser Text m ()
746unDrawChar c = modify (yield (T.singleton c) >>)
747{-# INLINABLE unDrawChar #-}
748
749{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
750 push the 'Char' back
751
752> peekChar = do
753> x <- drawChar
754> case x of
755> Left _ -> return ()
756> Right c -> unDrawChar c
757> return x
1a83ae4e 758
9e9bb0ce 759-}
1a83ae4e 760
9e9bb0ce 761peekChar :: (Monad m) => Parser Text m (Maybe Char)
762peekChar = do
763 x <- drawChar
764 case x of
765 Nothing -> return ()
766 Just c -> unDrawChar c
767 return x
768{-# INLINABLE peekChar #-}
769
770{-| Check if the underlying 'Producer' has no more characters
771
772 Note that this will skip over empty 'Text' chunks, unlike
773 'PP.isEndOfInput' from @pipes-parse@, which would consider
774 an empty 'Text' a valid bit of input.
775
776> isEndOfChars = liftM isLeft peekChar
777-}
778isEndOfChars :: (Monad m) => Parser Text m Bool
779isEndOfChars = do
780 x <- peekChar
781 return (case x of
782 Nothing -> True
783 Just _-> False )
784{-# INLINABLE isEndOfChars #-}
785
786
31f41a5d 787-- | Splits a 'Producer' after the given number of characters
91727d11 788splitAt
789 :: (Monad m, Integral n)
790 => n
c3c19f9b 791 -> Lens (Producer Text m x)
792 (Producer Text m y)
793 (Producer Text m (Producer Text m x))
794 (Producer Text m (Producer Text m y))
9e9bb0ce 795splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 796 where
797 go 0 p = return p
798 go n p = do
799 x <- lift (next p)
800 case x of
801 Left r -> return (return r)
31f41a5d 802 Right (txt, p') -> do
803 let len = fromIntegral (T.length txt)
91727d11 804 if (len <= n)
805 then do
31f41a5d 806 yield txt
91727d11 807 go (n - len) p'
808 else do
31f41a5d 809 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 810 yield prefix
811 return (yield suffix >> p')
812{-# INLINABLE splitAt #-}
813
91727d11 814
1a83ae4e 815-- | Split a text stream in two, producing the longest
816-- consecutive group of characters that satisfies the predicate
817-- and returning the rest
818
91727d11 819span
820 :: (Monad m)
821 => (Char -> Bool)
c3c19f9b 822 -> Lens (Producer Text m x)
823 (Producer Text m y)
824 (Producer Text m (Producer Text m x))
825 (Producer Text m (Producer Text m y))
9e9bb0ce 826span predicate k p0 = fmap join (k (go p0))
91727d11 827 where
828 go p = do
829 x <- lift (next p)
830 case x of
831 Left r -> return (return r)
31f41a5d 832 Right (txt, p') -> do
833 let (prefix, suffix) = T.span predicate txt
91727d11 834 if (T.null suffix)
835 then do
31f41a5d 836 yield txt
91727d11 837 go p'
838 else do
839 yield prefix
840 return (yield suffix >> p')
841{-# INLINABLE span #-}
842
1a83ae4e 843{-| Split a text stream in two, producing the longest
62e8521c 844 consecutive group of characters that don't satisfy the predicate
91727d11 845-}
846break
847 :: (Monad m)
848 => (Char -> Bool)
c3c19f9b 849 -> Lens (Producer Text m x)
850 (Producer Text m y)
851 (Producer Text m (Producer Text m x))
852 (Producer Text m (Producer Text m y))
91727d11 853break predicate = span (not . predicate)
854{-# INLINABLE break #-}
855
9e9bb0ce 856{-| Improper lens that splits after the first group of equivalent Chars, as
857 defined by the given equivalence relation
858-}
859groupBy
860 :: (Monad m)
861 => (Char -> Char -> Bool)
c3c19f9b 862 -> Lens (Producer Text m x)
863 (Producer Text m y)
864 (Producer Text m (Producer Text m x))
865 (Producer Text m (Producer Text m y))
9e9bb0ce 866groupBy equals k p0 = fmap join (k ((go p0))) where
867 go p = do
868 x <- lift (next p)
869 case x of
870 Left r -> return (return r)
871 Right (txt, p') -> case T.uncons txt of
872 Nothing -> go p'
873 Just (c, _) -> (yield txt >> p') ^. span (equals c)
874{-# INLINABLE groupBy #-}
875
876-- | Improper lens that splits after the first succession of identical 'Char' s
877group :: Monad m
79917d53 878 => Lens'_ (Producer Text m r)
9e9bb0ce 879 (Producer Text m (Producer Text m r))
880group = groupBy (==)
881{-# INLINABLE group #-}
882
883{-| Improper lens that splits a 'Producer' after the first word
884
885 Unlike 'words', this does not drop leading whitespace
886-}
887word :: (Monad m)
c3c19f9b 888 => Lens (Producer Text m x)
889 (Producer Text m y)
890 (Producer Text m (Producer Text m x))
891 (Producer Text m (Producer Text m y))
9e9bb0ce 892word k p0 = fmap join (k (to p0))
893 where
894 to p = do
895 p' <- p^.span isSpace
896 p'^.break isSpace
897{-# INLINABLE word #-}
898
899
900line :: (Monad m)
c3c19f9b 901 => Lens (Producer Text m x)
902 (Producer Text m y)
903 (Producer Text m (Producer Text m x))
904 (Producer Text m (Producer Text m y))
9e9bb0ce 905line = break (== '\n')
906
907{-# INLINABLE line #-}
908
909
910-- | Intersperse a 'Char' in between the characters of stream of 'Text'
911intersperse
912 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
913intersperse c = go0
914 where
915 go0 p = do
916 x <- lift (next p)
917 case x of
918 Left r -> return r
919 Right (txt, p') -> do
920 yield (T.intersperse c txt)
921 go1 p'
922 go1 p = do
923 x <- lift (next p)
924 case x of
925 Left r -> return r
926 Right (txt, p') -> do
927 yield (T.singleton c)
928 yield (T.intersperse c txt)
929 go1 p'
930{-# INLINABLE intersperse #-}
931
932
933
934-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
79917d53 935packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
9e9bb0ce 936packChars = Data.Profunctor.dimap to (fmap from)
937 where
938 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 939 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 940
941 step diffAs c = diffAs . (c:)
942
943 done diffAs = T.pack (diffAs [])
944
945 -- from :: Monad m => Producer Text m x -> Producer Char m x
946 from p = for p (each . T.unpack)
79917d53 947
9e9bb0ce 948{-# INLINABLE packChars #-}
949
79917d53 950defaultChunkSize :: Int
951defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
0f8c6f1b 952
953-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
954chunksOf
955 :: (Monad m, Integral n)
c3c19f9b 956 => n -> Lens (Producer Text m x)
957 (Producer Text m y)
958 (FreeT (Producer Text m) m x)
959 (FreeT (Producer Text m) m y)
0f8c6f1b 960chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
961 where
962 go p = do
963 x <- next p
964 return $ case x of
7ed76745 965 Left r -> Pure r
966 Right (txt, p') -> Free $ do
0f8c6f1b 967 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 968 return $ FreeT (go p'')
0f8c6f1b 969{-# INLINABLE chunksOf #-}
970
971
62e8521c 972{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 973 predicate
974-}
1677dc12 975splitsWith
91727d11 976 :: (Monad m)
977 => (Char -> Bool)
978 -> Producer Text m r
7ed76745 979 -> FreeT (Producer Text m) m r
980splitsWith predicate p0 = FreeT (go0 p0)
91727d11 981 where
982 go0 p = do
983 x <- next p
984 case x of
7ed76745 985 Left r -> return (Pure r)
31f41a5d 986 Right (txt, p') ->
987 if (T.null txt)
91727d11 988 then go0 p'
7ed76745 989 else return $ Free $ do
9e9bb0ce 990 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 991 return $ FreeT (go1 p'')
91727d11 992 go1 p = do
993 x <- nextChar p
994 return $ case x of
7ed76745 995 Left r -> Pure r
996 Right (_, p') -> Free $ do
9e9bb0ce 997 p'' <- p' ^. span (not . predicate)
7ed76745 998 return $ FreeT (go1 p'')
1677dc12 999{-# INLINABLE splitsWith #-}
91727d11 1000
31f41a5d 1001-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 1002splits :: (Monad m)
c3c19f9b 1003 => Char -> Lens (Producer Text m x)
1004 (Producer Text m y)
1005 (FreeT (Producer Text m) m x)
1006 (FreeT (Producer Text m) m y)
0f8c6f1b 1007splits c k p =
7ed76745 1008 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 1009{-# INLINABLE splits #-}
1010
1011{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
1012 given equivalence relation
1013-}
1014groupsBy
1015 :: Monad m
1016 => (Char -> Char -> Bool)
c3c19f9b 1017 -> Lens (Producer Text m x)
1018 (Producer Text m y)
1019 (FreeT (Producer Text m) m x)
1020 (FreeT (Producer Text m) m y)
7ed76745 1021groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 1022 go p = do x <- next p
7ed76745 1023 case x of Left r -> return (Pure r)
0f8c6f1b 1024 Right (bs, p') -> case T.uncons bs of
1025 Nothing -> go p'
7ed76745 1026 Just (c, _) -> do return $ Free $ do
0f8c6f1b 1027 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 1028 return $ FreeT (go p'')
0f8c6f1b 1029{-# INLINABLE groupsBy #-}
1030
1031
1032-- | Like 'groupsBy', where the equality predicate is ('==')
1033groups
1034 :: Monad m
c3c19f9b 1035 => Lens (Producer Text m x)
1036 (Producer Text m y)
1037 (FreeT (Producer Text m) m x)
1038 (FreeT (Producer Text m) m y)
0f8c6f1b 1039groups = groupsBy (==)
1040{-# INLINABLE groups #-}
1041
91727d11 1042
91727d11 1043
62e8521c 1044{-| Split a text stream into 'FreeT'-delimited lines
91727d11 1045-}
1046lines
c3c19f9b 1047 :: (Monad m)
1048 => Lens (Producer Text m x)
1049 (Producer Text m y)
1050 (FreeT (Producer Text m) m x)
1051 (FreeT (Producer Text m) m y)
1052lines k p = fmap _unlines (k (_lines p))
1053{-# INLINABLE lines #-}
1054
1055_lines
1056 :: Monad m
1057 => Producer Text m x -> FreeT (Producer Text m) m x
1058_lines p0 = FreeT (go0 p0)
0f8c6f1b 1059 where
1060 go0 p = do
1061 x <- next p
1062 case x of
7ed76745 1063 Left r -> return (Pure r)
0f8c6f1b 1064 Right (txt, p') ->
1065 if (T.null txt)
1066 then go0 p'
7ed76745 1067 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 1068 go1 p = do
1069 p' <- p ^. break ('\n' ==)
7ed76745 1070 return $ FreeT $ do
0f8c6f1b 1071 x <- nextChar p'
1072 case x of
7ed76745 1073 Left r -> return $ Pure r
0f8c6f1b 1074 Right (_, p'') -> go0 p''
c3c19f9b 1075{-# INLINABLE _lines #-}
1076
1077_unlines
1078 :: Monad m
1079 => FreeT (Producer Text m) m x -> Producer Text m x
1080_unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1081{-# INLINABLE _unlines #-}
1082
0f8c6f1b 1083
91727d11 1084
31f41a5d 1085
31f41a5d 1086-- | Split a text stream into 'FreeT'-delimited words
91727d11 1087words
79917d53 1088 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
0f8c6f1b 1089words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 1090 where
7ed76745 1091 go p = FreeT $ do
cf10d6f1 1092 x <- next (p >-> dropWhile isSpace)
1093 return $ case x of
7ed76745 1094 Left r -> Pure r
1095 Right (bs, p') -> Free $ do
9e9bb0ce 1096 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 1097 return (go p'')
7ed76745 1098 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 1099
91727d11 1100{-# INLINABLE words #-}
1101
cf10d6f1 1102
31f41a5d 1103{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1104 interspersing a text stream in between them
91727d11 1105-}
1106intercalate
1107 :: (Monad m)
1108 => Producer Text m ()
1109 -> FreeT (Producer Text m) m r
1110 -> Producer Text m r
1111intercalate p0 = go0
1112 where
1113 go0 f = do
7ed76745 1114 x <- lift (runFreeT f)
91727d11 1115 case x of
7ed76745 1116 Pure r -> return r
1117 Free p -> do
91727d11 1118 f' <- p
1119 go1 f'
1120 go1 f = do
7ed76745 1121 x <- lift (runFreeT f)
91727d11 1122 case x of
7ed76745 1123 Pure r -> return r
1124 Free p -> do
91727d11 1125 p0
1126 f' <- p
1127 go1 f'
1128{-# INLINABLE intercalate #-}
1129
62e8521c 1130{-| Join 'FreeT'-delimited lines into a text stream
91727d11 1131-}
1132unlines
c3c19f9b 1133 :: (Monad m)
1134 => Lens (FreeT (Producer Text m) m x)
1135 (FreeT (Producer Text m) m y)
1136 (Producer Text m x)
1137 (Producer Text m y)
1138
1139unlines k p = fmap _lines (k (_unlines p))
91727d11 1140{-# INLINABLE unlines #-}
1141
31f41a5d 1142{-| Join 'FreeT'-delimited words into a text stream
91727d11 1143-}
1144unwords
1145 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 1146unwords = intercalate (yield $ T.singleton ' ')
91727d11 1147{-# INLINABLE unwords #-}
1148
91727d11 1149
91727d11 1150{- $reexports
91727d11 1151
1152 @Data.Text@ re-exports the 'Text' type.
1153
0f8c6f1b 1154 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 1155-}
1156
bbdfd305 1157