]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
Revert "started generalizing lenses"
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
9667f797
GG
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
91727d11 3
7faef8bc 4module Pipes.Text (
82fe661b 5 -- * Effectful Text
e4b6dc67 6 -- $intro
7
82fe661b 8 -- * Lenses
9 -- $lenses
10
11 -- ** @view@ \/ @(^.)@
12 -- $view
13
14 -- ** @over@ \/ @(%~)@
15 -- $over
16
17 -- ** @zoom@
18 -- $zoom
19
20 -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
21 -- $special
22
91727d11 23 -- * Producers
1a83ae4e 24 fromLazy
91727d11 25
26 -- * Pipes
1677dc12 27 , map
28 , concatMap
29 , take
30 , drop
31 , takeWhile
32 , dropWhile
33 , filter
34 , scan
1677dc12 35 , pack
36 , unpack
37 , toCaseFold
38 , toLower
39 , toUpper
40 , stripStart
91727d11 41
42 -- * Folds
1677dc12 43 , toLazy
44 , toLazyM
45 , foldChars
46 , head
47 , last
48 , null
49 , length
50 , any
51 , all
52 , maximum
53 , minimum
54 , find
55 , index
56 , count
57
58 -- * Primitive Character Parsers
1677dc12 59 , nextChar
60 , drawChar
61 , unDrawChar
62 , peekChar
9e9bb0ce 63 , isEndOfChars
1677dc12 64
65 -- * Parsing Lenses
9e9bb0ce 66 , splitAt
1677dc12 67 , span
68 , break
69 , groupBy
70 , group
9e9bb0ce 71 , word
72 , line
1677dc12 73
74 -- * FreeT Splitters
75 , chunksOf
76 , splitsWith
0f8c6f1b 77 , splits
1a83ae4e 78 , groupsBy
79 , groups
1677dc12 80 , lines
81 , words
82
91727d11 83 -- * Transformations
1677dc12 84 , intersperse
9e9bb0ce 85 , packChars
31f41a5d 86
91727d11 87 -- * Joiners
1677dc12 88 , intercalate
89 , unlines
90 , unwords
9e9bb0ce 91
1a83ae4e 92 -- * Re-exports
91727d11 93 -- $reexports
1677dc12 94 , module Data.ByteString
95 , module Data.Text
96 , module Data.Profunctor
1677dc12 97 , module Pipes.Parse
7ed76745 98 , module Pipes.Group
91727d11 99 ) where
100
0f8c6f1b 101import Control.Applicative ((<*))
70125641 102import Control.Monad (liftM, join)
9e9bb0ce 103import Control.Monad.Trans.State.Strict (StateT(..), modify)
91727d11 104import qualified Data.Text as T
91727d11 105import Data.Text (Text)
106import qualified Data.Text.Lazy as TL
31f41a5d 107import Data.ByteString (ByteString)
1677dc12 108import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 109import Data.Functor.Identity (Identity)
1677dc12 110import Data.Profunctor (Profunctor)
111import qualified Data.Profunctor
91727d11 112import Pipes
7fc48f7c 113import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 114import qualified Pipes.Group as PG
91727d11 115import qualified Pipes.Parse as PP
7ed76745 116import Pipes.Parse (Parser)
79917d53 117import Pipes.Text.Encoding (Lens'_, Iso'_)
91727d11 118import qualified Pipes.Prelude as P
91727d11 119import Data.Char (isSpace)
1a83ae4e 120import Data.Word (Word8)
79917d53 121import Foreign.Storable (sizeOf)
122import Data.Bits (shiftL)
91727d11 123import Prelude hiding (
124 all,
125 any,
126 break,
127 concat,
128 concatMap,
129 drop,
130 dropWhile,
131 elem,
132 filter,
133 head,
134 last,
135 lines,
136 length,
137 map,
138 maximum,
139 minimum,
140 notElem,
141 null,
142 readFile,
143 span,
144 splitAt,
145 take,
146 takeWhile,
147 unlines,
148 unwords,
149 words,
150 writeFile )
151
e4b6dc67 152{- $intro
e1ed9621 153 This package provides @pipes@ utilities for /text streams/ or /character streams/,
154 realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
155 and thus you will generally want @Data.Text@ in scope. But the type
156 @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
e4b6dc67 157
158 This particular module provides many functions equivalent in one way or another to
159 the pure functions in
160 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
161 They transform, divide, group and fold text streams. Though @Producer Text m r@
162 is the type of \'effectful Text\', the functions in this module are \'pure\'
163 in the sense that they are uniformly monad-independent.
164 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
165 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
166 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
167
168 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
169 strict Text: the implementation is arranged so that the individual strict 'Text'
170 chunks are kept to a reasonable size; the user is not aware of the divisions
171 between the connected 'Text' chunks.
172 So also here: the functions in this module are designed to operate on streams that
173 are insensitive to text boundaries. This means that they may freely split
174 text into smaller texts and /discard empty texts/. The objective, though, is
175 that they should /never concatenate texts/ in order to provide strict upper
176 bounds on memory usage.
177
178 For example, to stream only the first three lines of 'stdin' to 'stdout' you
179 might write:
180
181> import Pipes
182> import qualified Pipes.Text as Text
183> import qualified Pipes.Text.IO as Text
184> import Pipes.Group (takes')
185> import Lens.Family
186>
187> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
188> where
189> takeLines n = Text.unlines . takes' n . view Text.lines
190
191 The above program will never bring more than one chunk of text (~ 32 KB) into
192 memory, no matter how long the lines are.
193
82fe661b 194-}
195{- $lenses
e4b6dc67 196 As this example shows, one superficial difference from @Data.Text.Lazy@
80a490ef 197 is that many of the operations, like 'lines', are \'lensified\'; this has a
198 number of advantages (where it is possible); in particular it facilitates their
199 use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
200 sense.) The disadvantage, famously, is that the messages you get for type errors can be
201 a little alarming. The remarks that follow in this section are for non-lens adepts.
202
203 Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
204 intuitively corresponding function when used with @view@ or @(^.)@. Instead of
205 writing:
206
207 > splitAt 17 producer
208
209 as we would with the Prelude or Text functions, we write
210
211 > view (splitAt 17) producer
212
82fe661b 213 or equivalently
80a490ef 214
215 > producer ^. splitAt 17
e4b6dc67 216
80a490ef 217 This may seem a little indirect, but note that many equivalents of
218 @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
219 corresponding functions by prefixing them with @(>->)@. Thus something like
e4b6dc67 220
221> stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
222
223 would drop the leading white space from each line.
224
80a490ef 225 The lenses in this library are marked as /improper/; this just means that
e1ed9621 226 they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
80a490ef 227 Just for this reason, though, the magnificent complexities of the lens libraries
228 are a distraction. The lens combinators to keep in mind, the ones that make sense for
229 our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
230
79917d53 231 One need only keep in mind that if @l@ is a @Lens'_ a b@, then:
82fe661b 232
233-}
234{- $view
235 @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
80a490ef 236 is the corresponding @b@; as was said above, this function will be exactly the
237 function you think it is, given its name. Thus to uppercase the first n characters
238 of a Producer, leaving the rest the same, we could write:
239
240
241 > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
242 > p'
82fe661b 243-}
244{- $over
245 @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
80a490ef 246 @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
247 the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
248 (it can also be written @l %~ f@).
249 For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
250 So above we might have written things like these:
251
252 > stripLines = Text.lines %~ maps (>-> Text.stripStart)
253 > stripLines = over Text.lines (maps (>-> Text.stripStart))
254 > upper n = Text.splitAt n %~ (>-> Text.toUpper)
82fe661b 255
256-}
257{- $zoom
258 @zoom l@, finally, is a function from a @Parser b m r@
80a490ef 259 to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
260 Its use is easiest to see with an decoding lens like 'utf8', which
261 \"sees\" a Text producer hidden inside a ByteString producer:
262 @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
263 a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
264 a list of everything produced from a Producer, leaving only the return value; it would
265 usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
266 returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
267 the Text Producer for further parsing. Suppose that we want, inexplicably, to
268 modify the casing of a Text Producer according to any instruction it might
269 contain at the start. Then we might write something like this:
270
271> obey :: Monad m => Producer Text m b -> Producer Text m b
e1ed9621 272> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
80a490ef 273> let seven = T.concat ts
274> case T.toUpper seven of
275> "TOUPPER" -> p' >-> Text.toUpper
276> "TOLOWER" -> p' >-> Text.toLower
277> _ -> do yield seven
278> p'
279
e1ed9621 280
281> >>> let doc = each ["toU","pperTh","is document.\n"]
282> >>> runEffect $ obey doc >-> Text.stdout
283> THIS DOCUMENT.
284
80a490ef 285 The purpose of exporting lenses is the mental economy achieved with this three-way
286 applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
287 three uses is no more surprising than that a pipe can act as a function modifying
288 the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
289 -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
290 @pipe >-> consumer@
291
292 The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
293 both <http://hackage.haskell.org/package/lens lens> and
e4b6dc67 294 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
295 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
80a490ef 296 and to some extent in the @Pipes.Text.Encoding@ module here.
e4b6dc67 297
82fe661b 298-}
299{- $special
e4b6dc67 300 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
301 This is in the types that are most closely associated with our central text type,
302 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
303
304> splitAt :: Int -> Text -> (Text, Text)
305> lines :: Text -> [Text]
306> chunksOf :: Int -> Text -> [Text]
307
308 which relate a Text with a pair of Texts or a list of Texts.
309 The corresponding functions here (taking account of \'lensification\') are
310
311> view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
312> view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
313> view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
314
315 Some of the types may be more readable if you imagine that we have introduced
316 our own type synonyms
317
318> type Text m r = Producer T.Text m r
319> type Texts m r = FreeT (Producer T.Text m) m r
320
321 Then we would think of the types above as
322
323> view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
324> view lines :: (Monad m) => Text m r -> Texts m r
325> view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
326
327 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
328
329 In the type @Producer Text m (Producer Text m r)@ the second
330 element of the \'pair\' of effectful Texts cannot simply be retrieved
331 with something like 'snd'. This is an \'effectful\' pair, and one must work
332 through the effects of the first element to arrive at the second Text stream, even
333 if you are proposing to throw the Text in the first element away.
334 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
335
336> join :: Monad m => Producer Text m (Producer m r) -> Producer m r
337
e1ed9621 338 The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
e4b6dc67 339 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
340 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
341 together things of the forms:
342
343> r
344> (Text,r)
345> (Text, (Text, r))
346> (Text, (Text, (Text, r)))
347> (Text, (Text, (Text, (Text, r))))
348> ...
349
80a490ef 350 (We might also have identified the sum of those types with @Free ((,) Text) r@
351 -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
352
353 Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
354 @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
e4b6dc67 355
356> m r
80a490ef 357> Text m r
358> Text m (Text m r)
359> Text m (Text m (Text m r))
360> Text m (Text m (Text m (Text m r)))
e4b6dc67 361> ...
362
80a490ef 363 We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
364 or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
365 result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
366
e4b6dc67 367 One might think that
368
79917d53 369> lines :: Monad m => Lens'_ (Producer Text m r) (FreeT (Producer Text m) m r)
e4b6dc67 370> view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
371
372 should really have the type
373
374> lines :: Monad m => Pipe Text Text m r
375
376 as e.g. 'toUpper' does. But this would spoil the control we are
377 attempting to maintain over the size of chunks. It is in fact just
378 as unreasonable to want such a pipe as to want
379
380> Data.Text.Lazy.lines :: Text -> Text
381
382 to 'rechunk' the strict Text chunks inside the lazy Text to respect
383 line boundaries. In fact we have
384
385> Data.Text.Lazy.lines :: Text -> [Text]
386> Prelude.lines :: String -> [String]
387
388 where the elements of the list are themselves lazy Texts or Strings; the use
389 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
390
391 The @Pipes.Group@ module, which can generally be imported without qualification,
e1ed9621 392 provides many functions for working with things of type @FreeT (Producer a m) m r@.
80a490ef 393 In particular it conveniently exports the constructors for @FreeT@ and the associated
394 @FreeF@ type -- a fancy form of @Either@, namely
e4b6dc67 395
80a490ef 396> data FreeF f a b = Pure a | Free (f b)
397
398 for pattern-matching. Consider the implementation of the 'words' function, or
399 of the part of the lens that takes us to the words; it is compact but exhibits many
400 of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
401 constuctors. Keep in mind that
402
403> newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
404> next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
405
406 Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
407 the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
408
409> words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
410> words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
411> x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
412> return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
413> Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
414> Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
415> p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
416> (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
417> return (words p'') -- We then carry on with the rest, which is likely to begin with space.
418
419-}
e4b6dc67 420
91727d11 421-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
422fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
79917d53 423fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 424{-# INLINE fromLazy #-}
91727d11 425
1677dc12 426
d199072b 427(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
428a ^. lens = getConstant (lens Constant a)
429
1677dc12 430
91727d11 431-- | Apply a transformation to each 'Char' in the stream
432map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
433map f = P.map (T.map f)
434{-# INLINABLE map #-}
435
ff38b9f0 436{-# RULES "p >-> map f" forall p f .
437 p >-> map f = for p (\txt -> yield (T.map f txt))
438 #-}
439
31f41a5d 440-- | Map a function over the characters of a text stream and concatenate the results
91727d11 441concatMap
442 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
443concatMap f = P.map (T.concatMap f)
444{-# INLINABLE concatMap #-}
445
ff38b9f0 446{-# RULES "p >-> concatMap f" forall p f .
447 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
448 #-}
7faef8bc 449
ff38b9f0 450
c0343bc9 451-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 452pack :: Monad m => Pipe String Text m r
453pack = P.map T.pack
454{-# INLINEABLE pack #-}
455
ff38b9f0 456{-# RULES "p >-> pack" forall p .
457 p >-> pack = for p (\txt -> yield (T.pack txt))
458 #-}
459
460-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 461unpack :: Monad m => Pipe Text String m r
d4732515 462unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 463{-# INLINEABLE unpack #-}
464
ff38b9f0 465{-# RULES "p >-> unpack" forall p .
466 p >-> unpack = for p (\txt -> yield (T.unpack txt))
467 #-}
d4732515 468
b0d86a59 469-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
470-- here acting as 'Text' pipes, rather as they would on a lazy text
a4913c42 471toCaseFold :: Monad m => Pipe Text Text m r
7faef8bc 472toCaseFold = P.map T.toCaseFold
473{-# INLINEABLE toCaseFold #-}
474
ff38b9f0 475{-# RULES "p >-> toCaseFold" forall p .
476 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
477 #-}
478
479
c0343bc9 480-- | lowercase incoming 'Text'
a4913c42 481toLower :: Monad m => Pipe Text Text m r
7faef8bc 482toLower = P.map T.toLower
483{-# INLINEABLE toLower #-}
484
ff38b9f0 485{-# RULES "p >-> toLower" forall p .
486 p >-> toLower = for p (\txt -> yield (T.toLower txt))
487 #-}
488
c0343bc9 489-- | uppercase incoming 'Text'
c70edb9d 490toUpper :: Monad m => Pipe Text Text m r
7faef8bc 491toUpper = P.map T.toUpper
492{-# INLINEABLE toUpper #-}
493
ff38b9f0 494{-# RULES "p >-> toUpper" forall p .
495 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
496 #-}
497
c0343bc9 498-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 499stripStart :: Monad m => Pipe Text Text m r
500stripStart = do
501 chunk <- await
502 let text = T.stripStart chunk
503 if T.null text
504 then stripStart
b0d86a59 505 else do yield text
506 cat
7faef8bc 507{-# INLINEABLE stripStart #-}
508
31f41a5d 509-- | @(take n)@ only allows @n@ individual characters to pass;
510-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 511take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
512take n0 = go n0 where
513 go n
514 | n <= 0 = return ()
515 | otherwise = do
31f41a5d 516 txt <- await
517 let len = fromIntegral (T.length txt)
91727d11 518 if (len > n)
31f41a5d 519 then yield (T.take (fromIntegral n) txt)
91727d11 520 else do
31f41a5d 521 yield txt
91727d11 522 go (n - len)
523{-# INLINABLE take #-}
524
31f41a5d 525-- | @(drop n)@ drops the first @n@ characters
91727d11 526drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
527drop n0 = go n0 where
528 go n
529 | n <= 0 = cat
530 | otherwise = do
31f41a5d 531 txt <- await
532 let len = fromIntegral (T.length txt)
91727d11 533 if (len >= n)
534 then do
31f41a5d 535 yield (T.drop (fromIntegral n) txt)
91727d11 536 cat
537 else go (n - len)
538{-# INLINABLE drop #-}
539
31f41a5d 540-- | Take characters until they fail the predicate
91727d11 541takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
542takeWhile predicate = go
543 where
544 go = do
31f41a5d 545 txt <- await
546 let (prefix, suffix) = T.span predicate txt
91727d11 547 if (T.null suffix)
548 then do
31f41a5d 549 yield txt
91727d11 550 go
551 else yield prefix
552{-# INLINABLE takeWhile #-}
553
31f41a5d 554-- | Drop characters until they fail the predicate
91727d11 555dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
556dropWhile predicate = go where
557 go = do
31f41a5d 558 txt <- await
559 case T.findIndex (not . predicate) txt of
91727d11 560 Nothing -> go
561 Just i -> do
31f41a5d 562 yield (T.drop i txt)
91727d11 563 cat
564{-# INLINABLE dropWhile #-}
565
566-- | Only allows 'Char's to pass if they satisfy the predicate
567filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
568filter predicate = P.map (T.filter predicate)
569{-# INLINABLE filter #-}
570
ff38b9f0 571{-# RULES "p >-> filter q" forall p q .
572 p >-> filter q = for p (\txt -> yield (T.filter q txt))
573 #-}
574
31f41a5d 575-- | Strict left scan over the characters
91727d11 576scan
577 :: (Monad m)
578 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
11645cdc
GG
579scan step begin = do
580 yield (T.singleton begin)
581 go begin
91727d11 582 where
31f41a5d 583 go c = do
584 txt <- await
585 let txt' = T.scanl step c txt
586 c' = T.last txt'
11645cdc 587 yield (T.tail txt')
31f41a5d 588 go c'
91727d11 589{-# INLINABLE scan #-}
590
591{-| Fold a pure 'Producer' of strict 'Text's into a lazy
592 'TL.Text'
593-}
594toLazy :: Producer Text Identity () -> TL.Text
595toLazy = TL.fromChunks . P.toList
596{-# INLINABLE toLazy #-}
597
598{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
599 'TL.Text'
600
601 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
602 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
603 immediately as they are generated instead of loading them all into memory.
604-}
605toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
606toLazyM = liftM TL.fromChunks . P.toListM
607{-# INLINABLE toLazyM #-}
608
31f41a5d 609-- | Reduce the text stream using a strict left fold over characters
64e03122 610foldChars
91727d11 611 :: Monad m
612 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 613foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 614{-# INLINABLE foldChars #-}
91727d11 615
616-- | Retrieve the first 'Char'
617head :: (Monad m) => Producer Text m () -> m (Maybe Char)
618head = go
619 where
620 go p = do
621 x <- nextChar p
622 case x of
623 Left _ -> return Nothing
31f41a5d 624 Right (c, _) -> return (Just c)
91727d11 625{-# INLINABLE head #-}
626
627-- | Retrieve the last 'Char'
628last :: (Monad m) => Producer Text m () -> m (Maybe Char)
629last = go Nothing
630 where
631 go r p = do
632 x <- next p
633 case x of
634 Left () -> return r
31f41a5d 635 Right (txt, p') ->
636 if (T.null txt)
91727d11 637 then go r p'
31f41a5d 638 else go (Just $ T.last txt) p'
91727d11 639{-# INLINABLE last #-}
640
641-- | Determine if the stream is empty
642null :: (Monad m) => Producer Text m () -> m Bool
643null = P.all T.null
644{-# INLINABLE null #-}
645
62e8521c 646-- | Count the number of characters in the stream
91727d11 647length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 648length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 649{-# INLINABLE length #-}
650
651-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
652any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
653any predicate = P.any (T.any predicate)
654{-# INLINABLE any #-}
655
656-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
657all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
658all predicate = P.all (T.all predicate)
659{-# INLINABLE all #-}
660
62e8521c 661-- | Return the maximum 'Char' within a text stream
91727d11 662maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
663maximum = P.fold step Nothing id
664 where
31f41a5d 665 step mc txt =
666 if (T.null txt)
667 then mc
668 else Just $ case mc of
669 Nothing -> T.maximum txt
670 Just c -> max c (T.maximum txt)
91727d11 671{-# INLINABLE maximum #-}
672
62e8521c 673-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 674minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
675minimum = P.fold step Nothing id
676 where
31f41a5d 677 step mc txt =
678 if (T.null txt)
679 then mc
680 else case mc of
681 Nothing -> Just (T.minimum txt)
682 Just c -> Just (min c (T.minimum txt))
91727d11 683{-# INLINABLE minimum #-}
684
91727d11 685-- | Find the first element in the stream that matches the predicate
686find
687 :: (Monad m)
688 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
689find predicate p = head (p >-> filter predicate)
690{-# INLINABLE find #-}
691
62e8521c 692-- | Index into a text stream
91727d11 693index
694 :: (Monad m, Integral a)
695 => a-> Producer Text m () -> m (Maybe Char)
696index n p = head (p >-> drop n)
697{-# INLINABLE index #-}
698
63ea9ffd 699
31f41a5d 700-- | Store a tally of how many segments match the given 'Text'
701count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
702count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
703{-# INLINABLE count #-}
704
9e9bb0ce 705
1a83ae4e 706-- | Consume the first character from a stream of 'Text'
707--
708-- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
709-- succeeds with a 'Right' providing the next character and the remainder of the
710-- 'Producer'.
9e9bb0ce 711
9e9bb0ce 712nextChar
713 :: (Monad m)
714 => Producer Text m r
715 -> m (Either r (Char, Producer Text m r))
716nextChar = go
717 where
718 go p = do
719 x <- next p
720 case x of
721 Left r -> return (Left r)
722 Right (txt, p') -> case (T.uncons txt) of
723 Nothing -> go p'
724 Just (c, txt') -> return (Right (c, yield txt' >> p'))
725{-# INLINABLE nextChar #-}
726
1a83ae4e 727-- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
728
9e9bb0ce 729drawChar :: (Monad m) => Parser Text m (Maybe Char)
730drawChar = do
731 x <- PP.draw
732 case x of
733 Nothing -> return Nothing
734 Just txt -> case (T.uncons txt) of
735 Nothing -> drawChar
736 Just (c, txt') -> do
737 PP.unDraw txt'
738 return (Just c)
739{-# INLINABLE drawChar #-}
740
741-- | Push back a 'Char' onto the underlying 'Producer'
742unDrawChar :: (Monad m) => Char -> Parser Text m ()
743unDrawChar c = modify (yield (T.singleton c) >>)
744{-# INLINABLE unDrawChar #-}
745
746{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
747 push the 'Char' back
748
749> peekChar = do
750> x <- drawChar
751> case x of
752> Left _ -> return ()
753> Right c -> unDrawChar c
754> return x
1a83ae4e 755
9e9bb0ce 756-}
1a83ae4e 757
9e9bb0ce 758peekChar :: (Monad m) => Parser Text m (Maybe Char)
759peekChar = do
760 x <- drawChar
761 case x of
762 Nothing -> return ()
763 Just c -> unDrawChar c
764 return x
765{-# INLINABLE peekChar #-}
766
767{-| Check if the underlying 'Producer' has no more characters
768
769 Note that this will skip over empty 'Text' chunks, unlike
770 'PP.isEndOfInput' from @pipes-parse@, which would consider
771 an empty 'Text' a valid bit of input.
772
773> isEndOfChars = liftM isLeft peekChar
774-}
775isEndOfChars :: (Monad m) => Parser Text m Bool
776isEndOfChars = do
777 x <- peekChar
778 return (case x of
779 Nothing -> True
780 Just _-> False )
781{-# INLINABLE isEndOfChars #-}
782
783
31f41a5d 784-- | Splits a 'Producer' after the given number of characters
91727d11 785splitAt
786 :: (Monad m, Integral n)
787 => n
d199072b 788 -> Lens'_ (Producer Text m r)
789 (Producer Text m (Producer Text m r))
9e9bb0ce 790splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 791 where
792 go 0 p = return p
793 go n p = do
794 x <- lift (next p)
795 case x of
796 Left r -> return (return r)
31f41a5d 797 Right (txt, p') -> do
798 let len = fromIntegral (T.length txt)
91727d11 799 if (len <= n)
800 then do
31f41a5d 801 yield txt
91727d11 802 go (n - len) p'
803 else do
31f41a5d 804 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 805 yield prefix
806 return (yield suffix >> p')
807{-# INLINABLE splitAt #-}
808
91727d11 809
1a83ae4e 810-- | Split a text stream in two, producing the longest
811-- consecutive group of characters that satisfies the predicate
812-- and returning the rest
813
91727d11 814span
815 :: (Monad m)
816 => (Char -> Bool)
d199072b 817 -> Lens'_ (Producer Text m r)
818 (Producer Text m (Producer Text m r))
9e9bb0ce 819span predicate k p0 = fmap join (k (go p0))
91727d11 820 where
821 go p = do
822 x <- lift (next p)
823 case x of
824 Left r -> return (return r)
31f41a5d 825 Right (txt, p') -> do
826 let (prefix, suffix) = T.span predicate txt
91727d11 827 if (T.null suffix)
828 then do
31f41a5d 829 yield txt
91727d11 830 go p'
831 else do
832 yield prefix
833 return (yield suffix >> p')
834{-# INLINABLE span #-}
835
1a83ae4e 836{-| Split a text stream in two, producing the longest
62e8521c 837 consecutive group of characters that don't satisfy the predicate
91727d11 838-}
839break
840 :: (Monad m)
841 => (Char -> Bool)
d199072b 842 -> Lens'_ (Producer Text m r)
843 (Producer Text m (Producer Text m r))
91727d11 844break predicate = span (not . predicate)
845{-# INLINABLE break #-}
846
9e9bb0ce 847{-| Improper lens that splits after the first group of equivalent Chars, as
848 defined by the given equivalence relation
849-}
850groupBy
851 :: (Monad m)
852 => (Char -> Char -> Bool)
d199072b 853 -> Lens'_ (Producer Text m r)
854 (Producer Text m (Producer Text m r))
9e9bb0ce 855groupBy equals k p0 = fmap join (k ((go p0))) where
856 go p = do
857 x <- lift (next p)
858 case x of
859 Left r -> return (return r)
860 Right (txt, p') -> case T.uncons txt of
861 Nothing -> go p'
862 Just (c, _) -> (yield txt >> p') ^. span (equals c)
863{-# INLINABLE groupBy #-}
864
865-- | Improper lens that splits after the first succession of identical 'Char' s
866group :: Monad m
79917d53 867 => Lens'_ (Producer Text m r)
9e9bb0ce 868 (Producer Text m (Producer Text m r))
869group = groupBy (==)
870{-# INLINABLE group #-}
871
872{-| Improper lens that splits a 'Producer' after the first word
873
874 Unlike 'words', this does not drop leading whitespace
875-}
876word :: (Monad m)
d199072b 877 => Lens'_ (Producer Text m r)
878 (Producer Text m (Producer Text m r))
9e9bb0ce 879word k p0 = fmap join (k (to p0))
880 where
881 to p = do
882 p' <- p^.span isSpace
883 p'^.break isSpace
884{-# INLINABLE word #-}
885
886
887line :: (Monad m)
d199072b 888 => Lens'_ (Producer Text m r)
889 (Producer Text m (Producer Text m r))
9e9bb0ce 890line = break (== '\n')
891
892{-# INLINABLE line #-}
893
894
895-- | Intersperse a 'Char' in between the characters of stream of 'Text'
896intersperse
897 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
898intersperse c = go0
899 where
900 go0 p = do
901 x <- lift (next p)
902 case x of
903 Left r -> return r
904 Right (txt, p') -> do
905 yield (T.intersperse c txt)
906 go1 p'
907 go1 p = do
908 x <- lift (next p)
909 case x of
910 Left r -> return r
911 Right (txt, p') -> do
912 yield (T.singleton c)
913 yield (T.intersperse c txt)
914 go1 p'
915{-# INLINABLE intersperse #-}
916
917
918
919-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
79917d53 920packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
9e9bb0ce 921packChars = Data.Profunctor.dimap to (fmap from)
922 where
923 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 924 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 925
926 step diffAs c = diffAs . (c:)
927
928 done diffAs = T.pack (diffAs [])
929
930 -- from :: Monad m => Producer Text m x -> Producer Char m x
931 from p = for p (each . T.unpack)
79917d53 932
9e9bb0ce 933{-# INLINABLE packChars #-}
934
79917d53 935defaultChunkSize :: Int
936defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
0f8c6f1b 937
938-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
939chunksOf
940 :: (Monad m, Integral n)
d199072b 941 => n -> Lens'_ (Producer Text m r)
942 (FreeT (Producer Text m) m r)
0f8c6f1b 943chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
944 where
945 go p = do
946 x <- next p
947 return $ case x of
7ed76745 948 Left r -> Pure r
949 Right (txt, p') -> Free $ do
0f8c6f1b 950 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 951 return $ FreeT (go p'')
0f8c6f1b 952{-# INLINABLE chunksOf #-}
953
954
62e8521c 955{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 956 predicate
957-}
1677dc12 958splitsWith
91727d11 959 :: (Monad m)
960 => (Char -> Bool)
961 -> Producer Text m r
7ed76745 962 -> FreeT (Producer Text m) m r
963splitsWith predicate p0 = FreeT (go0 p0)
91727d11 964 where
965 go0 p = do
966 x <- next p
967 case x of
7ed76745 968 Left r -> return (Pure r)
31f41a5d 969 Right (txt, p') ->
970 if (T.null txt)
91727d11 971 then go0 p'
7ed76745 972 else return $ Free $ do
9e9bb0ce 973 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 974 return $ FreeT (go1 p'')
91727d11 975 go1 p = do
976 x <- nextChar p
977 return $ case x of
7ed76745 978 Left r -> Pure r
979 Right (_, p') -> Free $ do
9e9bb0ce 980 p'' <- p' ^. span (not . predicate)
7ed76745 981 return $ FreeT (go1 p'')
1677dc12 982{-# INLINABLE splitsWith #-}
91727d11 983
31f41a5d 984-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 985splits :: (Monad m)
d199072b 986 => Char
987 -> Lens'_ (Producer Text m r)
988 (FreeT (Producer Text m) m r)
0f8c6f1b 989splits c k p =
7ed76745 990 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 991{-# INLINABLE splits #-}
992
993{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
994 given equivalence relation
995-}
996groupsBy
997 :: Monad m
998 => (Char -> Char -> Bool)
d199072b 999 -> Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
7ed76745 1000groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 1001 go p = do x <- next p
7ed76745 1002 case x of Left r -> return (Pure r)
0f8c6f1b 1003 Right (bs, p') -> case T.uncons bs of
1004 Nothing -> go p'
7ed76745 1005 Just (c, _) -> do return $ Free $ do
0f8c6f1b 1006 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 1007 return $ FreeT (go p'')
0f8c6f1b 1008{-# INLINABLE groupsBy #-}
1009
1010
1011-- | Like 'groupsBy', where the equality predicate is ('==')
1012groups
1013 :: Monad m
d199072b 1014 => Lens'_ (Producer Text m x) (FreeT (Producer Text m) m x)
0f8c6f1b 1015groups = groupsBy (==)
1016{-# INLINABLE groups #-}
1017
91727d11 1018
91727d11 1019
62e8521c 1020{-| Split a text stream into 'FreeT'-delimited lines
91727d11 1021-}
1022lines
d199072b 1023 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
1024lines = Data.Profunctor.dimap _lines (fmap _unlines)
1025 where
1026 _lines p0 = FreeT (go0 p0)
0f8c6f1b 1027 where
1028 go0 p = do
1029 x <- next p
1030 case x of
7ed76745 1031 Left r -> return (Pure r)
0f8c6f1b 1032 Right (txt, p') ->
1033 if (T.null txt)
1034 then go0 p'
7ed76745 1035 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 1036 go1 p = do
1037 p' <- p ^. break ('\n' ==)
7ed76745 1038 return $ FreeT $ do
0f8c6f1b 1039 x <- nextChar p'
1040 case x of
7ed76745 1041 Left r -> return $ Pure r
0f8c6f1b 1042 Right (_, p'') -> go0 p''
d199072b 1043 -- _unlines
1044 -- :: Monad m
1045 -- => FreeT (Producer Text m) m x -> Producer Text m x
1046 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1047
0f8c6f1b 1048
d199072b 1049{-# INLINABLE lines #-}
91727d11 1050
31f41a5d 1051
31f41a5d 1052-- | Split a text stream into 'FreeT'-delimited words
91727d11 1053words
79917d53 1054 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
0f8c6f1b 1055words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 1056 where
7ed76745 1057 go p = FreeT $ do
cf10d6f1 1058 x <- next (p >-> dropWhile isSpace)
1059 return $ case x of
7ed76745 1060 Left r -> Pure r
1061 Right (bs, p') -> Free $ do
9e9bb0ce 1062 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 1063 return (go p'')
7ed76745 1064 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 1065
91727d11 1066{-# INLINABLE words #-}
1067
cf10d6f1 1068
31f41a5d 1069{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1070 interspersing a text stream in between them
91727d11 1071-}
1072intercalate
1073 :: (Monad m)
1074 => Producer Text m ()
1075 -> FreeT (Producer Text m) m r
1076 -> Producer Text m r
1077intercalate p0 = go0
1078 where
1079 go0 f = do
7ed76745 1080 x <- lift (runFreeT f)
91727d11 1081 case x of
7ed76745 1082 Pure r -> return r
1083 Free p -> do
91727d11 1084 f' <- p
1085 go1 f'
1086 go1 f = do
7ed76745 1087 x <- lift (runFreeT f)
91727d11 1088 case x of
7ed76745 1089 Pure r -> return r
1090 Free p -> do
91727d11 1091 p0
1092 f' <- p
1093 go1 f'
1094{-# INLINABLE intercalate #-}
1095
62e8521c 1096{-| Join 'FreeT'-delimited lines into a text stream
91727d11 1097-}
1098unlines
d199072b 1099 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1100unlines = go
1101 where
1102 go f = do
1103 x <- lift (runFreeT f)
1104 case x of
1105 Pure r -> return r
1106 Free p -> do
1107 f' <- p
1108 yield $ T.singleton '\n'
1109 go f'
91727d11 1110{-# INLINABLE unlines #-}
1111
31f41a5d 1112{-| Join 'FreeT'-delimited words into a text stream
91727d11 1113-}
1114unwords
1115 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 1116unwords = intercalate (yield $ T.singleton ' ')
91727d11 1117{-# INLINABLE unwords #-}
1118
91727d11 1119
91727d11 1120{- $reexports
91727d11 1121
1122 @Data.Text@ re-exports the 'Text' type.
1123
0f8c6f1b 1124 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 1125-}
1126
bbdfd305 1127