]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
Bumped bounds for pipes-bytestring. Made types agree with pipes-bytestring where...
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
9667f797
GG
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
7faef8bc 3module Pipes.Text (
82fe661b 4 -- * Effectful Text
e4b6dc67 5 -- $intro
2f4a83f8 6
82fe661b 7 -- * Lenses
8 -- $lenses
2f4a83f8 9
82fe661b 10 -- ** @view@ \/ @(^.)@
11 -- $view
12
13 -- ** @over@ \/ @(%~)@
14 -- $over
2f4a83f8 15
82fe661b 16 -- ** @zoom@
17 -- $zoom
2f4a83f8 18
82fe661b 19 -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
20 -- $special
2f4a83f8 21
91727d11 22 -- * Producers
1a83ae4e 23 fromLazy
91727d11 24
25 -- * Pipes
1677dc12 26 , map
27 , concatMap
28 , take
1677dc12 29 , takeWhile
1677dc12 30 , filter
1677dc12 31 , toCaseFold
32 , toLower
33 , toUpper
34 , stripStart
2f4a83f8 35 , scan
91727d11 36
37 -- * Folds
1677dc12 38 , toLazy
39 , toLazyM
40 , foldChars
41 , head
42 , last
43 , null
44 , length
45 , any
46 , all
47 , maximum
48 , minimum
49 , find
50 , index
1677dc12 51
52 -- * Primitive Character Parsers
1677dc12 53 , nextChar
54 , drawChar
55 , unDrawChar
56 , peekChar
9e9bb0ce 57 , isEndOfChars
1677dc12 58
2f4a83f8 59 -- * Parsing Lenses
9e9bb0ce 60 , splitAt
1677dc12 61 , span
62 , break
63 , groupBy
64 , group
9e9bb0ce 65 , word
66 , line
1677dc12 67
2f4a83f8 68 -- * Transforming Text and Character Streams
69 , drop
70 , dropWhile
71 , pack
72 , unpack
73 , intersperse
74
75 -- * FreeT Transformations
1677dc12 76 , chunksOf
77 , splitsWith
0f8c6f1b 78 , splits
1a83ae4e 79 , groupsBy
80 , groups
1677dc12 81 , lines
1677dc12 82 , unlines
2f4a83f8 83 , words
1677dc12 84 , unwords
2f4a83f8 85 , intercalate
9e9bb0ce 86
1a83ae4e 87 -- * Re-exports
91727d11 88 -- $reexports
1677dc12 89 , module Data.ByteString
90 , module Data.Text
1677dc12 91 , module Pipes.Parse
7ed76745 92 , module Pipes.Group
91727d11 93 ) where
94
2f4a83f8 95import Control.Applicative ((<*))
70125641 96import Control.Monad (liftM, join)
9e9bb0ce 97import Control.Monad.Trans.State.Strict (StateT(..), modify)
91727d11 98import qualified Data.Text as T
91727d11 99import Data.Text (Text)
100import qualified Data.Text.Lazy as TL
31f41a5d 101import Data.ByteString (ByteString)
1677dc12 102import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 103import Data.Functor.Identity (Identity)
2f4a83f8 104
91727d11 105import Pipes
2f4a83f8 106import Pipes.Group (folds, maps, concats, intercalates, FreeT(..), FreeF(..))
7ed76745 107import qualified Pipes.Group as PG
91727d11 108import qualified Pipes.Parse as PP
7ed76745 109import Pipes.Parse (Parser)
91727d11 110import qualified Pipes.Prelude as P
91727d11 111import Data.Char (isSpace)
1a83ae4e 112import Data.Word (Word8)
79917d53 113import Foreign.Storable (sizeOf)
114import Data.Bits (shiftL)
91727d11 115import Prelude hiding (
116 all,
117 any,
118 break,
119 concat,
120 concatMap,
121 drop,
122 dropWhile,
123 elem,
124 filter,
125 head,
126 last,
127 lines,
128 length,
129 map,
130 maximum,
131 minimum,
132 notElem,
133 null,
134 readFile,
135 span,
136 splitAt,
137 take,
138 takeWhile,
139 unlines,
140 unwords,
141 words,
142 writeFile )
143
e4b6dc67 144{- $intro
2f4a83f8 145 This package provides @pipes@ utilities for /text streams/ or /character streams/,
146 realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
147 and thus you will generally want @Data.Text@ in scope. But the type
148 @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
149
150 This particular module provides many functions equivalent in one way or another to
151 the pure functions in
152 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
153 They transform, divide, group and fold text streams. Though @Producer Text m r@
154 is the type of \'effectful Text\', the functions in this module are \'pure\'
e4b6dc67 155 in the sense that they are uniformly monad-independent.
2f4a83f8 156 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
157 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
158 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
159
160 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
161 strict Text: the implementation is arranged so that the individual strict 'Text'
162 chunks are kept to a reasonable size; the user is not aware of the divisions
163 between the connected 'Text' chunks.
e4b6dc67 164 So also here: the functions in this module are designed to operate on streams that
165 are insensitive to text boundaries. This means that they may freely split
2f4a83f8 166 text into smaller texts and /discard empty texts/. The objective, though, is
167 that they should /never concatenate texts/ in order to provide strict upper
168 bounds on memory usage.
e4b6dc67 169
170 For example, to stream only the first three lines of 'stdin' to 'stdout' you
171 might write:
172
173> import Pipes
174> import qualified Pipes.Text as Text
175> import qualified Pipes.Text.IO as Text
176> import Pipes.Group (takes')
2f4a83f8 177> import Lens.Family
178>
e4b6dc67 179> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
2f4a83f8 180> where
e4b6dc67 181> takeLines n = Text.unlines . takes' n . view Text.lines
182
183 The above program will never bring more than one chunk of text (~ 32 KB) into
184 memory, no matter how long the lines are.
185
82fe661b 186-}
187{- $lenses
2f4a83f8 188 As this example shows, one superficial difference from @Data.Text.Lazy@
189 is that many of the operations, like 'lines', are \'lensified\'; this has a
190 number of advantages (where it is possible); in particular it facilitates their
191 use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
80a490ef 192 sense.) The disadvantage, famously, is that the messages you get for type errors can be
193 a little alarming. The remarks that follow in this section are for non-lens adepts.
194
2f4a83f8 195 Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
80a490ef 196 intuitively corresponding function when used with @view@ or @(^.)@. Instead of
197 writing:
2f4a83f8 198
80a490ef 199 > splitAt 17 producer
2f4a83f8 200
201 as we would with the Prelude or Text functions, we write
202
80a490ef 203 > view (splitAt 17) producer
2f4a83f8 204
82fe661b 205 or equivalently
2f4a83f8 206
80a490ef 207 > producer ^. splitAt 17
e4b6dc67 208
2f4a83f8 209 This may seem a little indirect, but note that many equivalents of
210 @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
80a490ef 211 corresponding functions by prefixing them with @(>->)@. Thus something like
e4b6dc67 212
2f4a83f8 213> stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
e4b6dc67 214
2f4a83f8 215 would drop the leading white space from each line.
e4b6dc67 216
2f4a83f8 217 The lenses in this library are marked as /improper/; this just means that
218 they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
219 Just for this reason, though, the magnificent complexities of the lens libraries
220 are a distraction. The lens combinators to keep in mind, the ones that make sense for
221 our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
80a490ef 222
57454c33 223 One need only keep in mind that if @l@ is a @Lens' a b@, then:
82fe661b 224
225-}
226{- $view
2f4a83f8 227 @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
228 is the corresponding @b@; as was said above, this function will be exactly the
229 function you think it is, given its name. Thus to uppercase the first n characters
230 of a Producer, leaving the rest the same, we could write:
80a490ef 231
232
233 > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
234 > p'
82fe661b 235-}
236{- $over
237 @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
2f4a83f8 238 @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
239 the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
240 (it can also be written @l %~ f@).
241 For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
242 So above we might have written things like these:
80a490ef 243
244 > stripLines = Text.lines %~ maps (>-> Text.stripStart)
245 > stripLines = over Text.lines (maps (>-> Text.stripStart))
246 > upper n = Text.splitAt n %~ (>-> Text.toUpper)
82fe661b 247
248-}
249{- $zoom
2f4a83f8 250 @zoom l@, finally, is a function from a @Parser b m r@
251 to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
80a490ef 252 Its use is easiest to see with an decoding lens like 'utf8', which
253 \"sees\" a Text producer hidden inside a ByteString producer:
2f4a83f8 254 @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
255 a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
256 a list of everything produced from a Producer, leaving only the return value; it would
80a490ef 257 usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
258 returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
2f4a83f8 259 the Text Producer for further parsing. Suppose that we want, inexplicably, to
260 modify the casing of a Text Producer according to any instruction it might
80a490ef 261 contain at the start. Then we might write something like this:
262
263> obey :: Monad m => Producer Text m b -> Producer Text m b
e1ed9621 264> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
80a490ef 265> let seven = T.concat ts
2f4a83f8 266> case T.toUpper seven of
80a490ef 267> "TOUPPER" -> p' >-> Text.toUpper
268> "TOLOWER" -> p' >-> Text.toLower
269> _ -> do yield seven
270> p'
271
e1ed9621 272
273> >>> let doc = each ["toU","pperTh","is document.\n"]
274> >>> runEffect $ obey doc >-> Text.stdout
275> THIS DOCUMENT.
276
2f4a83f8 277 The purpose of exporting lenses is the mental economy achieved with this three-way
278 applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
279 three uses is no more surprising than that a pipe can act as a function modifying
80a490ef 280 the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
2f4a83f8 281 -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
80a490ef 282 @pipe >-> consumer@
283
2f4a83f8 284 The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
285 both <http://hackage.haskell.org/package/lens lens> and
e4b6dc67 286 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
2f4a83f8 287 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
288 and to some extent in the @Pipes.Text.Encoding@ module here.
e4b6dc67 289
82fe661b 290-}
291{- $special
2f4a83f8 292 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
293 This is in the types that are most closely associated with our central text type,
e4b6dc67 294 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
295
296> splitAt :: Int -> Text -> (Text, Text)
297> lines :: Text -> [Text]
298> chunksOf :: Int -> Text -> [Text]
299
2f4a83f8 300 which relate a Text with a pair of Texts or a list of Texts.
301 The corresponding functions here (taking account of \'lensification\') are
e4b6dc67 302
303> view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
304> view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
305> view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
306
307 Some of the types may be more readable if you imagine that we have introduced
308 our own type synonyms
309
310> type Text m r = Producer T.Text m r
311> type Texts m r = FreeT (Producer T.Text m) m r
312
313 Then we would think of the types above as
314
315> view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
316> view lines :: (Monad m) => Text m r -> Texts m r
317> view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
318
319 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
320
2f4a83f8 321 In the type @Producer Text m (Producer Text m r)@ the second
322 element of the \'pair\' of effectful Texts cannot simply be retrieved
323 with something like 'snd'. This is an \'effectful\' pair, and one must work
e4b6dc67 324 through the effects of the first element to arrive at the second Text stream, even
2f4a83f8 325 if you are proposing to throw the Text in the first element away.
326 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
e4b6dc67 327
328> join :: Monad m => Producer Text m (Producer m r) -> Producer m r
329
e1ed9621 330 The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
e4b6dc67 331 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
332 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
333 together things of the forms:
334
335> r
336> (Text,r)
337> (Text, (Text, r))
338> (Text, (Text, (Text, r)))
339> (Text, (Text, (Text, (Text, r))))
340> ...
341
2f4a83f8 342 (We might also have identified the sum of those types with @Free ((,) Text) r@
343 -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
344
345 Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
80a490ef 346 @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
2f4a83f8 347
e4b6dc67 348> m r
80a490ef 349> Text m r
350> Text m (Text m r)
351> Text m (Text m (Text m r))
352> Text m (Text m (Text m (Text m r)))
e4b6dc67 353> ...
354
80a490ef 355 We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
356 or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
2f4a83f8 357 result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
80a490ef 358
2f4a83f8 359 One might think that
e4b6dc67 360
57454c33 361> lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
e4b6dc67 362> view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
363
364 should really have the type
2f4a83f8 365
e4b6dc67 366> lines :: Monad m => Pipe Text Text m r
367
2f4a83f8 368 as e.g. 'toUpper' does. But this would spoil the control we are
369 attempting to maintain over the size of chunks. It is in fact just
e4b6dc67 370 as unreasonable to want such a pipe as to want
371
2f4a83f8 372> Data.Text.Lazy.lines :: Text -> Text
e4b6dc67 373
2f4a83f8 374 to 'rechunk' the strict Text chunks inside the lazy Text to respect
375 line boundaries. In fact we have
e4b6dc67 376
377> Data.Text.Lazy.lines :: Text -> [Text]
378> Prelude.lines :: String -> [String]
379
380 where the elements of the list are themselves lazy Texts or Strings; the use
2f4a83f8 381 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
382
e4b6dc67 383 The @Pipes.Group@ module, which can generally be imported without qualification,
e1ed9621 384 provides many functions for working with things of type @FreeT (Producer a m) m r@.
80a490ef 385 In particular it conveniently exports the constructors for @FreeT@ and the associated
2f4a83f8 386 @FreeF@ type -- a fancy form of @Either@, namely
387
80a490ef 388> data FreeF f a b = Pure a | Free (f b)
389
2f4a83f8 390 for pattern-matching. Consider the implementation of the 'words' function, or
391 of the part of the lens that takes us to the words; it is compact but exhibits many
80a490ef 392 of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
2f4a83f8 393 constuctors. Keep in mind that
80a490ef 394
395> newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
396> next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
397
398 Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
399 the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
400
401> words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
402> words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
403> x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
404> return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
405> Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
406> Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
407> p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
408> (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
409> return (words p'') -- We then carry on with the rest, which is likely to begin with space.
2f4a83f8 410
80a490ef 411-}
e4b6dc67 412
91727d11 413-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
414fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
2f4a83f8 415fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 416{-# INLINE fromLazy #-}
91727d11 417
d199072b 418(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
419a ^. lens = getConstant (lens Constant a)
420
91727d11 421-- | Apply a transformation to each 'Char' in the stream
422map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
423map f = P.map (T.map f)
424{-# INLINABLE map #-}
425
31f41a5d 426-- | Map a function over the characters of a text stream and concatenate the results
91727d11 427concatMap
428 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
429concatMap f = P.map (T.concatMap f)
430{-# INLINABLE concatMap #-}
431
2f4a83f8 432-- | @(take n)@ only allows @n@ individual characters to pass;
31f41a5d 433-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 434take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
435take n0 = go n0 where
436 go n
437 | n <= 0 = return ()
438 | otherwise = do
31f41a5d 439 txt <- await
440 let len = fromIntegral (T.length txt)
91727d11 441 if (len > n)
31f41a5d 442 then yield (T.take (fromIntegral n) txt)
91727d11 443 else do
31f41a5d 444 yield txt
91727d11 445 go (n - len)
446{-# INLINABLE take #-}
447
31f41a5d 448-- | Take characters until they fail the predicate
91727d11 449takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
450takeWhile predicate = go
451 where
452 go = do
31f41a5d 453 txt <- await
454 let (prefix, suffix) = T.span predicate txt
91727d11 455 if (T.null suffix)
456 then do
31f41a5d 457 yield txt
91727d11 458 go
459 else yield prefix
460{-# INLINABLE takeWhile #-}
461
91727d11 462-- | Only allows 'Char's to pass if they satisfy the predicate
463filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
464filter predicate = P.map (T.filter predicate)
465{-# INLINABLE filter #-}
466
31f41a5d 467-- | Strict left scan over the characters
91727d11 468scan
469 :: (Monad m)
470 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
11645cdc
GG
471scan step begin = do
472 yield (T.singleton begin)
473 go begin
91727d11 474 where
31f41a5d 475 go c = do
476 txt <- await
477 let txt' = T.scanl step c txt
478 c' = T.last txt'
11645cdc 479 yield (T.tail txt')
31f41a5d 480 go c'
91727d11 481{-# INLINABLE scan #-}
482
2f4a83f8 483-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
484-- here acting as 'Text' pipes, rather as they would on a lazy text
485toCaseFold :: Monad m => Pipe Text Text m r
486toCaseFold = P.map T.toCaseFold
487{-# INLINEABLE toCaseFold #-}
488
489-- | lowercase incoming 'Text'
490toLower :: Monad m => Pipe Text Text m r
491toLower = P.map T.toLower
492{-# INLINEABLE toLower #-}
493
494-- | uppercase incoming 'Text'
495toUpper :: Monad m => Pipe Text Text m r
496toUpper = P.map T.toUpper
497{-# INLINEABLE toUpper #-}
498
499-- | Remove leading white space from an incoming succession of 'Text's
500stripStart :: Monad m => Pipe Text Text m r
501stripStart = do
502 chunk <- await
503 let text = T.stripStart chunk
504 if T.null text
505 then stripStart
506 else do yield text
507 cat
508{-# INLINEABLE stripStart #-}
509
91727d11 510{-| Fold a pure 'Producer' of strict 'Text's into a lazy
511 'TL.Text'
512-}
513toLazy :: Producer Text Identity () -> TL.Text
514toLazy = TL.fromChunks . P.toList
515{-# INLINABLE toLazy #-}
516
517{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
518 'TL.Text'
519
520 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
521 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
522 immediately as they are generated instead of loading them all into memory.
523-}
524toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
525toLazyM = liftM TL.fromChunks . P.toListM
526{-# INLINABLE toLazyM #-}
527
31f41a5d 528-- | Reduce the text stream using a strict left fold over characters
64e03122 529foldChars
91727d11 530 :: Monad m
531 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 532foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 533{-# INLINABLE foldChars #-}
91727d11 534
2f4a83f8 535
91727d11 536-- | Retrieve the first 'Char'
537head :: (Monad m) => Producer Text m () -> m (Maybe Char)
538head = go
539 where
540 go p = do
541 x <- nextChar p
542 case x of
543 Left _ -> return Nothing
31f41a5d 544 Right (c, _) -> return (Just c)
91727d11 545{-# INLINABLE head #-}
546
547-- | Retrieve the last 'Char'
548last :: (Monad m) => Producer Text m () -> m (Maybe Char)
549last = go Nothing
550 where
551 go r p = do
552 x <- next p
553 case x of
554 Left () -> return r
31f41a5d 555 Right (txt, p') ->
556 if (T.null txt)
91727d11 557 then go r p'
31f41a5d 558 else go (Just $ T.last txt) p'
91727d11 559{-# INLINABLE last #-}
560
561-- | Determine if the stream is empty
562null :: (Monad m) => Producer Text m () -> m Bool
563null = P.all T.null
564{-# INLINABLE null #-}
565
62e8521c 566-- | Count the number of characters in the stream
91727d11 567length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 568length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 569{-# INLINABLE length #-}
570
571-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
572any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
573any predicate = P.any (T.any predicate)
574{-# INLINABLE any #-}
575
576-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
577all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
578all predicate = P.all (T.all predicate)
579{-# INLINABLE all #-}
580
62e8521c 581-- | Return the maximum 'Char' within a text stream
91727d11 582maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
583maximum = P.fold step Nothing id
584 where
31f41a5d 585 step mc txt =
586 if (T.null txt)
587 then mc
588 else Just $ case mc of
589 Nothing -> T.maximum txt
590 Just c -> max c (T.maximum txt)
91727d11 591{-# INLINABLE maximum #-}
592
62e8521c 593-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 594minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
595minimum = P.fold step Nothing id
596 where
31f41a5d 597 step mc txt =
598 if (T.null txt)
599 then mc
600 else case mc of
601 Nothing -> Just (T.minimum txt)
602 Just c -> Just (min c (T.minimum txt))
91727d11 603{-# INLINABLE minimum #-}
604
91727d11 605-- | Find the first element in the stream that matches the predicate
606find
607 :: (Monad m)
608 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
609find predicate p = head (p >-> filter predicate)
610{-# INLINABLE find #-}
611
62e8521c 612-- | Index into a text stream
91727d11 613index
614 :: (Monad m, Integral a)
615 => a-> Producer Text m () -> m (Maybe Char)
2f4a83f8 616index n p = head (drop n p)
91727d11 617{-# INLINABLE index #-}
618
63ea9ffd 619
9e9bb0ce 620
1a83ae4e 621-- | Consume the first character from a stream of 'Text'
2f4a83f8 622--
1a83ae4e 623-- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
624-- succeeds with a 'Right' providing the next character and the remainder of the
625-- 'Producer'.
9e9bb0ce 626
9e9bb0ce 627nextChar
628 :: (Monad m)
629 => Producer Text m r
630 -> m (Either r (Char, Producer Text m r))
631nextChar = go
632 where
633 go p = do
634 x <- next p
635 case x of
636 Left r -> return (Left r)
637 Right (txt, p') -> case (T.uncons txt) of
638 Nothing -> go p'
639 Just (c, txt') -> return (Right (c, yield txt' >> p'))
640{-# INLINABLE nextChar #-}
641
1a83ae4e 642-- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
643
9e9bb0ce 644drawChar :: (Monad m) => Parser Text m (Maybe Char)
645drawChar = do
646 x <- PP.draw
647 case x of
648 Nothing -> return Nothing
649 Just txt -> case (T.uncons txt) of
650 Nothing -> drawChar
651 Just (c, txt') -> do
652 PP.unDraw txt'
653 return (Just c)
654{-# INLINABLE drawChar #-}
655
656-- | Push back a 'Char' onto the underlying 'Producer'
657unDrawChar :: (Monad m) => Char -> Parser Text m ()
658unDrawChar c = modify (yield (T.singleton c) >>)
659{-# INLINABLE unDrawChar #-}
660
661{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
662 push the 'Char' back
663
664> peekChar = do
665> x <- drawChar
666> case x of
667> Left _ -> return ()
668> Right c -> unDrawChar c
669> return x
1a83ae4e 670
9e9bb0ce 671-}
1a83ae4e 672
9e9bb0ce 673peekChar :: (Monad m) => Parser Text m (Maybe Char)
674peekChar = do
675 x <- drawChar
676 case x of
677 Nothing -> return ()
678 Just c -> unDrawChar c
679 return x
680{-# INLINABLE peekChar #-}
681
682{-| Check if the underlying 'Producer' has no more characters
683
684 Note that this will skip over empty 'Text' chunks, unlike
685 'PP.isEndOfInput' from @pipes-parse@, which would consider
686 an empty 'Text' a valid bit of input.
687
688> isEndOfChars = liftM isLeft peekChar
689-}
690isEndOfChars :: (Monad m) => Parser Text m Bool
691isEndOfChars = do
692 x <- peekChar
693 return (case x of
694 Nothing -> True
695 Just _-> False )
696{-# INLINABLE isEndOfChars #-}
697
31f41a5d 698-- | Splits a 'Producer' after the given number of characters
91727d11 699splitAt
700 :: (Monad m, Integral n)
701 => n
57454c33 702 -> Lens' (Producer Text m r)
d199072b 703 (Producer Text m (Producer Text m r))
9e9bb0ce 704splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 705 where
706 go 0 p = return p
707 go n p = do
708 x <- lift (next p)
709 case x of
710 Left r -> return (return r)
31f41a5d 711 Right (txt, p') -> do
712 let len = fromIntegral (T.length txt)
91727d11 713 if (len <= n)
714 then do
31f41a5d 715 yield txt
91727d11 716 go (n - len) p'
717 else do
31f41a5d 718 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 719 yield prefix
720 return (yield suffix >> p')
721{-# INLINABLE splitAt #-}
722
91727d11 723
1a83ae4e 724-- | Split a text stream in two, producing the longest
725-- consecutive group of characters that satisfies the predicate
726-- and returning the rest
727
91727d11 728span
729 :: (Monad m)
730 => (Char -> Bool)
57454c33 731 -> Lens' (Producer Text m r)
d199072b 732 (Producer Text m (Producer Text m r))
9e9bb0ce 733span predicate k p0 = fmap join (k (go p0))
91727d11 734 where
735 go p = do
736 x <- lift (next p)
737 case x of
738 Left r -> return (return r)
31f41a5d 739 Right (txt, p') -> do
740 let (prefix, suffix) = T.span predicate txt
91727d11 741 if (T.null suffix)
742 then do
31f41a5d 743 yield txt
91727d11 744 go p'
745 else do
746 yield prefix
747 return (yield suffix >> p')
748{-# INLINABLE span #-}
749
1a83ae4e 750{-| Split a text stream in two, producing the longest
62e8521c 751 consecutive group of characters that don't satisfy the predicate
91727d11 752-}
753break
754 :: (Monad m)
755 => (Char -> Bool)
57454c33 756 -> Lens' (Producer Text m r)
d199072b 757 (Producer Text m (Producer Text m r))
91727d11 758break predicate = span (not . predicate)
759{-# INLINABLE break #-}
760
9e9bb0ce 761{-| Improper lens that splits after the first group of equivalent Chars, as
762 defined by the given equivalence relation
763-}
764groupBy
765 :: (Monad m)
766 => (Char -> Char -> Bool)
57454c33 767 -> Lens' (Producer Text m r)
d199072b 768 (Producer Text m (Producer Text m r))
9e9bb0ce 769groupBy equals k p0 = fmap join (k ((go p0))) where
770 go p = do
771 x <- lift (next p)
772 case x of
773 Left r -> return (return r)
774 Right (txt, p') -> case T.uncons txt of
775 Nothing -> go p'
2f4a83f8 776 Just (c, _) -> (yield txt >> p') ^. span (equals c)
9e9bb0ce 777{-# INLINABLE groupBy #-}
778
779-- | Improper lens that splits after the first succession of identical 'Char' s
2f4a83f8 780group :: Monad m
57454c33 781 => Lens' (Producer Text m r)
9e9bb0ce 782 (Producer Text m (Producer Text m r))
783group = groupBy (==)
784{-# INLINABLE group #-}
785
786{-| Improper lens that splits a 'Producer' after the first word
787
2f4a83f8 788 Unlike 'words', this does not drop leading whitespace
9e9bb0ce 789-}
2f4a83f8 790word :: (Monad m)
57454c33 791 => Lens' (Producer Text m r)
d199072b 792 (Producer Text m (Producer Text m r))
9e9bb0ce 793word k p0 = fmap join (k (to p0))
794 where
795 to p = do
796 p' <- p^.span isSpace
797 p'^.break isSpace
798{-# INLINABLE word #-}
799
2f4a83f8 800line :: (Monad m)
57454c33 801 => Lens' (Producer Text m r)
d199072b 802 (Producer Text m (Producer Text m r))
9e9bb0ce 803line = break (== '\n')
9e9bb0ce 804{-# INLINABLE line #-}
805
2f4a83f8 806-- | @(drop n)@ drops the first @n@ characters
807drop :: (Monad m, Integral n)
808 => n -> Producer Text m r -> Producer Text m r
809drop n p = do
810 p' <- lift $ runEffect (for (p ^. splitAt n) discard)
811 p'
812{-# INLINABLE drop #-}
813
814-- | Drop characters until they fail the predicate
815dropWhile :: (Monad m)
816 => (Char -> Bool) -> Producer Text m r -> Producer Text m r
817dropWhile predicate p = do
818 p' <- lift $ runEffect (for (p ^. span predicate) discard)
819 p'
820{-# INLINABLE dropWhile #-}
9e9bb0ce 821
822-- | Intersperse a 'Char' in between the characters of stream of 'Text'
823intersperse
824 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
825intersperse c = go0
826 where
827 go0 p = do
828 x <- lift (next p)
829 case x of
830 Left r -> return r
831 Right (txt, p') -> do
832 yield (T.intersperse c txt)
833 go1 p'
834 go1 p = do
835 x <- lift (next p)
836 case x of
837 Left r -> return r
838 Right (txt, p') -> do
839 yield (T.singleton c)
840 yield (T.intersperse c txt)
841 go1 p'
842{-# INLINABLE intersperse #-}
843
844
2f4a83f8 845-- | Improper lens from unpacked 'Word8's to packaged 'ByteString's
846pack :: Monad m => Lens' (Producer Char m r) (Producer Text m r)
847pack k p = fmap _unpack (k (_pack p))
848{-# INLINABLE pack #-}
849
850-- | Improper lens from packed 'ByteString's to unpacked 'Word8's
851unpack :: Monad m => Lens' (Producer Text m r) (Producer Char m r)
852unpack k p = fmap _pack (k (_unpack p))
853{-# INLINABLE unpack #-}
9e9bb0ce 854
2f4a83f8 855_pack :: Monad m => Producer Char m r -> Producer Text m r
856_pack p = folds step id done (p^.PG.chunksOf defaultChunkSize)
857 where
858 step diffAs w8 = diffAs . (w8:)
9e9bb0ce 859
860 done diffAs = T.pack (diffAs [])
2f4a83f8 861{-# INLINABLE _pack #-}
9e9bb0ce 862
2f4a83f8 863_unpack :: Monad m => Producer Text m r -> Producer Char m r
864_unpack p = for p (each . T.unpack)
865{-# INLINABLE _unpack #-}
9e9bb0ce 866
79917d53 867defaultChunkSize :: Int
868defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
0f8c6f1b 869
2f4a83f8 870
0f8c6f1b 871-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
872chunksOf
873 :: (Monad m, Integral n)
2f4a83f8 874 => n -> Lens' (Producer Text m r)
d199072b 875 (FreeT (Producer Text m) m r)
0f8c6f1b 876chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
877 where
878 go p = do
879 x <- next p
880 return $ case x of
7ed76745 881 Left r -> Pure r
882 Right (txt, p') -> Free $ do
2f4a83f8 883 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 884 return $ FreeT (go p'')
0f8c6f1b 885{-# INLINABLE chunksOf #-}
886
887
62e8521c 888{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 889 predicate
890-}
1677dc12 891splitsWith
91727d11 892 :: (Monad m)
893 => (Char -> Bool)
2f4a83f8 894 -> Producer Text m r -> FreeT (Producer Text m) m r
7ed76745 895splitsWith predicate p0 = FreeT (go0 p0)
91727d11 896 where
897 go0 p = do
898 x <- next p
899 case x of
7ed76745 900 Left r -> return (Pure r)
31f41a5d 901 Right (txt, p') ->
902 if (T.null txt)
91727d11 903 then go0 p'
7ed76745 904 else return $ Free $ do
9e9bb0ce 905 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 906 return $ FreeT (go1 p'')
91727d11 907 go1 p = do
908 x <- nextChar p
909 return $ case x of
7ed76745 910 Left r -> Pure r
911 Right (_, p') -> Free $ do
2f4a83f8 912 p'' <- p' ^. span (not . predicate)
7ed76745 913 return $ FreeT (go1 p'')
1677dc12 914{-# INLINABLE splitsWith #-}
91727d11 915
31f41a5d 916-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 917splits :: (Monad m)
d199072b 918 => Char
57454c33 919 -> Lens' (Producer Text m r)
d199072b 920 (FreeT (Producer Text m) m r)
0f8c6f1b 921splits c k p =
2f4a83f8 922 fmap (intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 923{-# INLINABLE splits #-}
924
925{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
926 given equivalence relation
927-}
928groupsBy
929 :: Monad m
930 => (Char -> Char -> Bool)
57454c33 931 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
2f4a83f8 932groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 933 go p = do x <- next p
7ed76745 934 case x of Left r -> return (Pure r)
0f8c6f1b 935 Right (bs, p') -> case T.uncons bs of
936 Nothing -> go p'
7ed76745 937 Just (c, _) -> do return $ Free $ do
0f8c6f1b 938 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 939 return $ FreeT (go p'')
0f8c6f1b 940{-# INLINABLE groupsBy #-}
941
942
943-- | Like 'groupsBy', where the equality predicate is ('==')
944groups
945 :: Monad m
57454c33 946 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
0f8c6f1b 947groups = groupsBy (==)
948{-# INLINABLE groups #-}
949
91727d11 950
91727d11 951
62e8521c 952{-| Split a text stream into 'FreeT'-delimited lines
91727d11 953-}
954lines
2f4a83f8 955 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
956lines k p = fmap _unlines (k (_lines p))
957{-# INLINABLE lines #-}
958
959unlines
960 :: Monad m
961 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
962unlines k p = fmap _lines (k (_unlines p))
963{-# INLINABLE unlines #-}
964
965_lines :: Monad m
966 => Producer Text m r -> FreeT (Producer Text m) m r
967_lines p0 = FreeT (go0 p0)
0f8c6f1b 968 where
969 go0 p = do
970 x <- next p
971 case x of
7ed76745 972 Left r -> return (Pure r)
0f8c6f1b 973 Right (txt, p') ->
974 if (T.null txt)
975 then go0 p'
7ed76745 976 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 977 go1 p = do
978 p' <- p ^. break ('\n' ==)
7ed76745 979 return $ FreeT $ do
0f8c6f1b 980 x <- nextChar p'
981 case x of
7ed76745 982 Left r -> return $ Pure r
0f8c6f1b 983 Right (_, p'') -> go0 p''
2f4a83f8 984{-# INLINABLE _lines #-}
0f8c6f1b 985
2f4a83f8 986_unlines :: Monad m
987 => FreeT (Producer Text m) m r -> Producer Text m r
988_unlines = concats . maps (<* yield (T.singleton '\n'))
989{-# INLINABLE _unlines #-}
91727d11 990
2f4a83f8 991-- | Split a text stream into 'FreeT'-delimited words. Note that
992-- roundtripping with e.g. @over words id@ eliminates extra space
993-- characters as with @Prelude.unwords . Prelude.words@
91727d11 994words
2f4a83f8 995 :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
996words k p = fmap _unwords (k (_words p))
997{-# INLINABLE words #-}
998
999unwords
1000 :: Monad m
1001 => Lens' (FreeT (Producer Text m) m r) (Producer Text m r)
1002unwords k p = fmap _words (k (_unwords p))
1003{-# INLINABLE unwords #-}
1004
1005_words :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
1006_words p = FreeT $ do
1007 x <- next (dropWhile isSpace p)
cf10d6f1 1008 return $ case x of
7ed76745 1009 Left r -> Pure r
1010 Right (bs, p') -> Free $ do
9e9bb0ce 1011 p'' <- (yield bs >> p') ^. break isSpace
2f4a83f8 1012 return (_words p'')
1013{-# INLINABLE _words #-}
1014
1015_unwords :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1016_unwords = intercalates (yield $ T.singleton ' ')
1017{-# INLINABLE _unwords #-}
91727d11 1018
cf10d6f1 1019
31f41a5d 1020{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1021 interspersing a text stream in between them
91727d11 1022-}
1023intercalate
1024 :: (Monad m)
2f4a83f8 1025 => Producer Text m () -> FreeT (Producer Text m) m r -> Producer Text m r
91727d11 1026intercalate p0 = go0
1027 where
1028 go0 f = do
7ed76745 1029 x <- lift (runFreeT f)
91727d11 1030 case x of
7ed76745 1031 Pure r -> return r
1032 Free p -> do
91727d11 1033 f' <- p
1034 go1 f'
1035 go1 f = do
7ed76745 1036 x <- lift (runFreeT f)
91727d11 1037 case x of
7ed76745 1038 Pure r -> return r
1039 Free p -> do
91727d11 1040 p0
1041 f' <- p
1042 go1 f'
1043{-# INLINABLE intercalate #-}
1044
91727d11 1045
91727d11 1046
91727d11 1047{- $reexports
2f4a83f8 1048
91727d11 1049 @Data.Text@ re-exports the 'Text' type.
1050
2f4a83f8 1051 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 1052-}
1053
bbdfd305 1054
57454c33 1055type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)