]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
omit >->/map rules
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
9667f797
GG
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
91727d11 3
7faef8bc 4module Pipes.Text (
82fe661b 5 -- * Effectful Text
e4b6dc67 6 -- $intro
7
82fe661b 8 -- * Lenses
9 -- $lenses
10
11 -- ** @view@ \/ @(^.)@
12 -- $view
13
14 -- ** @over@ \/ @(%~)@
15 -- $over
16
17 -- ** @zoom@
18 -- $zoom
19
20 -- * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
21 -- $special
22
91727d11 23 -- * Producers
1a83ae4e 24 fromLazy
91727d11 25
26 -- * Pipes
1677dc12 27 , map
28 , concatMap
29 , take
30 , drop
31 , takeWhile
32 , dropWhile
33 , filter
34 , scan
1677dc12 35 , pack
36 , unpack
37 , toCaseFold
38 , toLower
39 , toUpper
40 , stripStart
91727d11 41
42 -- * Folds
1677dc12 43 , toLazy
44 , toLazyM
45 , foldChars
46 , head
47 , last
48 , null
49 , length
50 , any
51 , all
52 , maximum
53 , minimum
54 , find
55 , index
56 , count
57
58 -- * Primitive Character Parsers
1677dc12 59 , nextChar
60 , drawChar
61 , unDrawChar
62 , peekChar
9e9bb0ce 63 , isEndOfChars
1677dc12 64
65 -- * Parsing Lenses
9e9bb0ce 66 , splitAt
1677dc12 67 , span
68 , break
69 , groupBy
70 , group
9e9bb0ce 71 , word
72 , line
1677dc12 73
74 -- * FreeT Splitters
75 , chunksOf
76 , splitsWith
0f8c6f1b 77 , splits
1a83ae4e 78 , groupsBy
79 , groups
1677dc12 80 , lines
81 , words
82
91727d11 83 -- * Transformations
1677dc12 84 , intersperse
9e9bb0ce 85 , packChars
31f41a5d 86
91727d11 87 -- * Joiners
1677dc12 88 , intercalate
89 , unlines
90 , unwords
9e9bb0ce 91
1a83ae4e 92 -- * Re-exports
91727d11 93 -- $reexports
1677dc12 94 , module Data.ByteString
95 , module Data.Text
96 , module Data.Profunctor
1677dc12 97 , module Pipes.Parse
7ed76745 98 , module Pipes.Group
91727d11 99 ) where
100
0f8c6f1b 101import Control.Applicative ((<*))
70125641 102import Control.Monad (liftM, join)
9e9bb0ce 103import Control.Monad.Trans.State.Strict (StateT(..), modify)
91727d11 104import qualified Data.Text as T
91727d11 105import Data.Text (Text)
106import qualified Data.Text.Lazy as TL
31f41a5d 107import Data.ByteString (ByteString)
1677dc12 108import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 109import Data.Functor.Identity (Identity)
1677dc12 110import Data.Profunctor (Profunctor)
111import qualified Data.Profunctor
91727d11 112import Pipes
7fc48f7c 113import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 114import qualified Pipes.Group as PG
91727d11 115import qualified Pipes.Parse as PP
7ed76745 116import Pipes.Parse (Parser)
91727d11 117import qualified Pipes.Prelude as P
91727d11 118import Data.Char (isSpace)
1a83ae4e 119import Data.Word (Word8)
79917d53 120import Foreign.Storable (sizeOf)
121import Data.Bits (shiftL)
91727d11 122import Prelude hiding (
123 all,
124 any,
125 break,
126 concat,
127 concatMap,
128 drop,
129 dropWhile,
130 elem,
131 filter,
132 head,
133 last,
134 lines,
135 length,
136 map,
137 maximum,
138 minimum,
139 notElem,
140 null,
141 readFile,
142 span,
143 splitAt,
144 take,
145 takeWhile,
146 unlines,
147 unwords,
148 words,
149 writeFile )
150
e4b6dc67 151{- $intro
e1ed9621 152 This package provides @pipes@ utilities for /text streams/ or /character streams/,
153 realized as streams of 'Text' chunks. The individual chunks are uniformly /strict/,
154 and thus you will generally want @Data.Text@ in scope. But the type
155 @Producer Text m r@ ,as we are using it, is a sort of /pipes/ equivalent of the lazy @Text@ type.
e4b6dc67 156
157 This particular module provides many functions equivalent in one way or another to
158 the pure functions in
159 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
160 They transform, divide, group and fold text streams. Though @Producer Text m r@
161 is the type of \'effectful Text\', the functions in this module are \'pure\'
162 in the sense that they are uniformly monad-independent.
163 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
164 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
165 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
166
167 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
168 strict Text: the implementation is arranged so that the individual strict 'Text'
169 chunks are kept to a reasonable size; the user is not aware of the divisions
170 between the connected 'Text' chunks.
171 So also here: the functions in this module are designed to operate on streams that
172 are insensitive to text boundaries. This means that they may freely split
173 text into smaller texts and /discard empty texts/. The objective, though, is
174 that they should /never concatenate texts/ in order to provide strict upper
175 bounds on memory usage.
176
177 For example, to stream only the first three lines of 'stdin' to 'stdout' you
178 might write:
179
180> import Pipes
181> import qualified Pipes.Text as Text
182> import qualified Pipes.Text.IO as Text
183> import Pipes.Group (takes')
184> import Lens.Family
185>
186> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
187> where
188> takeLines n = Text.unlines . takes' n . view Text.lines
189
190 The above program will never bring more than one chunk of text (~ 32 KB) into
191 memory, no matter how long the lines are.
192
82fe661b 193-}
194{- $lenses
e4b6dc67 195 As this example shows, one superficial difference from @Data.Text.Lazy@
80a490ef 196 is that many of the operations, like 'lines', are \'lensified\'; this has a
197 number of advantages (where it is possible); in particular it facilitates their
198 use with 'Parser's of Text (in the general <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
199 sense.) The disadvantage, famously, is that the messages you get for type errors can be
200 a little alarming. The remarks that follow in this section are for non-lens adepts.
201
202 Each lens exported here, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
203 intuitively corresponding function when used with @view@ or @(^.)@. Instead of
204 writing:
205
206 > splitAt 17 producer
207
208 as we would with the Prelude or Text functions, we write
209
210 > view (splitAt 17) producer
211
82fe661b 212 or equivalently
80a490ef 213
214 > producer ^. splitAt 17
e4b6dc67 215
80a490ef 216 This may seem a little indirect, but note that many equivalents of
217 @Text -> Text@ functions are exported here as 'Pipe's. Here too we recover the intuitively
218 corresponding functions by prefixing them with @(>->)@. Thus something like
e4b6dc67 219
220> stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
221
222 would drop the leading white space from each line.
223
80a490ef 224 The lenses in this library are marked as /improper/; this just means that
e1ed9621 225 they don't admit all the operations of an ideal lens, but only /getting/ and /focusing/.
80a490ef 226 Just for this reason, though, the magnificent complexities of the lens libraries
227 are a distraction. The lens combinators to keep in mind, the ones that make sense for
228 our lenses, are @view@ \/ @(^.)@), @over@ \/ @(%~)@ , and @zoom@.
229
57454c33 230 One need only keep in mind that if @l@ is a @Lens' a b@, then:
82fe661b 231
232-}
233{- $view
234 @view l@ is a function @a -> b@ . Thus @view l a@ (also written @a ^. l@ )
80a490ef 235 is the corresponding @b@; as was said above, this function will be exactly the
236 function you think it is, given its name. Thus to uppercase the first n characters
237 of a Producer, leaving the rest the same, we could write:
238
239
240 > upper n p = do p' <- p ^. Text.splitAt n >-> Text.toUpper
241 > p'
82fe661b 242-}
243{- $over
244 @over l@ is a function @(b -> b) -> a -> a@. Thus, given a function that modifies
80a490ef 245 @b@s, the lens lets us modify an @a@ by applying @f :: b -> b@ to
246 the @b@ that we can \"see\" through the lens. So @over l f :: a -> a@
247 (it can also be written @l %~ f@).
248 For any particular @a@, then, @over l f a@ or @(l %~ f) a@ is a revised @a@.
249 So above we might have written things like these:
250
251 > stripLines = Text.lines %~ maps (>-> Text.stripStart)
252 > stripLines = over Text.lines (maps (>-> Text.stripStart))
253 > upper n = Text.splitAt n %~ (>-> Text.toUpper)
82fe661b 254
255-}
256{- $zoom
257 @zoom l@, finally, is a function from a @Parser b m r@
80a490ef 258 to a @Parser a m r@ (or more generally a @StateT (Producer b m x) m r@).
259 Its use is easiest to see with an decoding lens like 'utf8', which
260 \"sees\" a Text producer hidden inside a ByteString producer:
261 @drawChar@ is a Text parser, returning a @Maybe Char@, @zoom utf8 drawChar@ is
262 a /ByteString/ parser, returning a @Maybe Char@. @drawAll@ is a Parser that returns
263 a list of everything produced from a Producer, leaving only the return value; it would
264 usually be unreasonable to use it. But @zoom (splitAt 17) drawAll@
265 returns a list of Text chunks containing the first seventeen Chars, and returns the rest of
266 the Text Producer for further parsing. Suppose that we want, inexplicably, to
267 modify the casing of a Text Producer according to any instruction it might
268 contain at the start. Then we might write something like this:
269
270> obey :: Monad m => Producer Text m b -> Producer Text m b
e1ed9621 271> obey p = do (ts, p') <- lift $ runStateT (zoom (Text.splitAt 7) drawAll) p
80a490ef 272> let seven = T.concat ts
273> case T.toUpper seven of
274> "TOUPPER" -> p' >-> Text.toUpper
275> "TOLOWER" -> p' >-> Text.toLower
276> _ -> do yield seven
277> p'
278
e1ed9621 279
280> >>> let doc = each ["toU","pperTh","is document.\n"]
281> >>> runEffect $ obey doc >-> Text.stdout
282> THIS DOCUMENT.
283
80a490ef 284 The purpose of exporting lenses is the mental economy achieved with this three-way
285 applicability. That one expression, e.g. @lines@ or @splitAt 17@ can have these
286 three uses is no more surprising than that a pipe can act as a function modifying
287 the output of a producer, namely by using @>->@ to its left: @producer >-> pipe@
288 -- but can /also/ modify the inputs to a consumer by using @>->@ to its right:
289 @pipe >-> consumer@
290
291 The three functions, @view@ \/ @(^.)@, @over@ \/ @(%~)@ and @zoom@ are supplied by
292 both <http://hackage.haskell.org/package/lens lens> and
e4b6dc67 293 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
294 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
80a490ef 295 and to some extent in the @Pipes.Text.Encoding@ module here.
e4b6dc67 296
82fe661b 297-}
298{- $special
e4b6dc67 299 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
300 This is in the types that are most closely associated with our central text type,
301 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
302
303> splitAt :: Int -> Text -> (Text, Text)
304> lines :: Text -> [Text]
305> chunksOf :: Int -> Text -> [Text]
306
307 which relate a Text with a pair of Texts or a list of Texts.
308 The corresponding functions here (taking account of \'lensification\') are
309
310> view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
311> view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
312> view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
313
314 Some of the types may be more readable if you imagine that we have introduced
315 our own type synonyms
316
317> type Text m r = Producer T.Text m r
318> type Texts m r = FreeT (Producer T.Text m) m r
319
320 Then we would think of the types above as
321
322> view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
323> view lines :: (Monad m) => Text m r -> Texts m r
324> view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
325
326 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
327
328 In the type @Producer Text m (Producer Text m r)@ the second
329 element of the \'pair\' of effectful Texts cannot simply be retrieved
330 with something like 'snd'. This is an \'effectful\' pair, and one must work
331 through the effects of the first element to arrive at the second Text stream, even
332 if you are proposing to throw the Text in the first element away.
333 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
334
335> join :: Monad m => Producer Text m (Producer m r) -> Producer m r
336
e1ed9621 337 The return type of 'lines', 'words', 'chunksOf' and the other /splitter/ functions,
e4b6dc67 338 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
339 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
340 together things of the forms:
341
342> r
343> (Text,r)
344> (Text, (Text, r))
345> (Text, (Text, (Text, r)))
346> (Text, (Text, (Text, (Text, r))))
347> ...
348
80a490ef 349 (We might also have identified the sum of those types with @Free ((,) Text) r@
350 -- or, more absurdly, @FreeT ((,) Text) Identity r@.)
351
352 Similarly, our type @Texts m r@, or @FreeT (Text m) m r@ -- in fact called
353 @FreeT (Producer Text m) m r@ here -- encompasses all the members of the sequence:
e4b6dc67 354
355> m r
80a490ef 356> Text m r
357> Text m (Text m r)
358> Text m (Text m (Text m r))
359> Text m (Text m (Text m (Text m r)))
e4b6dc67 360> ...
361
80a490ef 362 We might have used a more specialized type in place of @FreeT (Producer a m) m r@,
363 or indeed of @FreeT (Producer Text m) m r@, but it is clear that the correct
364 result type of 'lines' will be isomorphic to @FreeT (Producer Text m) m r@ .
365
e4b6dc67 366 One might think that
367
57454c33 368> lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
e4b6dc67 369> view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
370
371 should really have the type
372
373> lines :: Monad m => Pipe Text Text m r
374
375 as e.g. 'toUpper' does. But this would spoil the control we are
376 attempting to maintain over the size of chunks. It is in fact just
377 as unreasonable to want such a pipe as to want
378
379> Data.Text.Lazy.lines :: Text -> Text
380
381 to 'rechunk' the strict Text chunks inside the lazy Text to respect
382 line boundaries. In fact we have
383
384> Data.Text.Lazy.lines :: Text -> [Text]
385> Prelude.lines :: String -> [String]
386
387 where the elements of the list are themselves lazy Texts or Strings; the use
388 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
389
390 The @Pipes.Group@ module, which can generally be imported without qualification,
e1ed9621 391 provides many functions for working with things of type @FreeT (Producer a m) m r@.
80a490ef 392 In particular it conveniently exports the constructors for @FreeT@ and the associated
393 @FreeF@ type -- a fancy form of @Either@, namely
e4b6dc67 394
80a490ef 395> data FreeF f a b = Pure a | Free (f b)
396
397 for pattern-matching. Consider the implementation of the 'words' function, or
398 of the part of the lens that takes us to the words; it is compact but exhibits many
399 of the points under discussion, including explicit handling of the @FreeT@ and @FreeF@
400 constuctors. Keep in mind that
401
402> newtype FreeT f m a = FreeT (m (FreeF f a (FreeT f m a)))
403> next :: Monad m => Producer a m r -> m (Either r (a, Producer a m r))
404
405 Thus the @do@ block after the @FreeT@ constructor is in the base monad, e.g. 'IO' or 'Identity';
406 the later subordinate block, opened by the @Free@ constructor, is in the @Producer@ monad:
407
408> words :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
409> words p = FreeT $ do -- With 'next' we will inspect p's first chunk, excluding spaces;
410> x <- next (p >-> dropWhile isSpace) -- note that 'dropWhile isSpace' is a pipe, and is thus *applied* with '>->'.
411> return $ case x of -- We use 'return' and so need something of type 'FreeF (Text m) r (Texts m r)'
412> Left r -> Pure r -- 'Left' means we got no Text chunk, but only the return value; so we are done.
413> Right (txt, p') -> Free $ do -- If we get a chunk and the rest of the producer, p', we enter the 'Producer' monad
414> p'' <- view (break isSpace) -- When we apply 'break isSpace', we get a Producer that returns a Producer;
415> (yield txt >> p') -- so here we yield everything up to the next space, and get the rest back.
416> return (words p'') -- We then carry on with the rest, which is likely to begin with space.
417
418-}
e4b6dc67 419
91727d11 420-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
421fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
79917d53 422fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 423{-# INLINE fromLazy #-}
91727d11 424
d199072b 425(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
426a ^. lens = getConstant (lens Constant a)
427
91727d11 428-- | Apply a transformation to each 'Char' in the stream
429map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
430map f = P.map (T.map f)
431{-# INLINABLE map #-}
432
31f41a5d 433-- | Map a function over the characters of a text stream and concatenate the results
91727d11 434concatMap
435 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
436concatMap f = P.map (T.concatMap f)
437{-# INLINABLE concatMap #-}
438
c0343bc9 439-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 440pack :: Monad m => Pipe String Text m r
441pack = P.map T.pack
442{-# INLINEABLE pack #-}
443
ff38b9f0 444-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 445unpack :: Monad m => Pipe Text String m r
d4732515 446unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 447{-# INLINEABLE unpack #-}
448
b0d86a59 449-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
450-- here acting as 'Text' pipes, rather as they would on a lazy text
a4913c42 451toCaseFold :: Monad m => Pipe Text Text m r
7faef8bc 452toCaseFold = P.map T.toCaseFold
453{-# INLINEABLE toCaseFold #-}
454
c0343bc9 455-- | lowercase incoming 'Text'
a4913c42 456toLower :: Monad m => Pipe Text Text m r
7faef8bc 457toLower = P.map T.toLower
458{-# INLINEABLE toLower #-}
459
c0343bc9 460-- | uppercase incoming 'Text'
c70edb9d 461toUpper :: Monad m => Pipe Text Text m r
7faef8bc 462toUpper = P.map T.toUpper
463{-# INLINEABLE toUpper #-}
464
c0343bc9 465-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 466stripStart :: Monad m => Pipe Text Text m r
467stripStart = do
468 chunk <- await
469 let text = T.stripStart chunk
470 if T.null text
471 then stripStart
b0d86a59 472 else do yield text
473 cat
7faef8bc 474{-# INLINEABLE stripStart #-}
475
31f41a5d 476-- | @(take n)@ only allows @n@ individual characters to pass;
477-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 478take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
479take n0 = go n0 where
480 go n
481 | n <= 0 = return ()
482 | otherwise = do
31f41a5d 483 txt <- await
484 let len = fromIntegral (T.length txt)
91727d11 485 if (len > n)
31f41a5d 486 then yield (T.take (fromIntegral n) txt)
91727d11 487 else do
31f41a5d 488 yield txt
91727d11 489 go (n - len)
490{-# INLINABLE take #-}
491
31f41a5d 492-- | @(drop n)@ drops the first @n@ characters
91727d11 493drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
494drop n0 = go n0 where
495 go n
496 | n <= 0 = cat
497 | otherwise = do
31f41a5d 498 txt <- await
499 let len = fromIntegral (T.length txt)
91727d11 500 if (len >= n)
501 then do
31f41a5d 502 yield (T.drop (fromIntegral n) txt)
91727d11 503 cat
504 else go (n - len)
505{-# INLINABLE drop #-}
506
31f41a5d 507-- | Take characters until they fail the predicate
91727d11 508takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
509takeWhile predicate = go
510 where
511 go = do
31f41a5d 512 txt <- await
513 let (prefix, suffix) = T.span predicate txt
91727d11 514 if (T.null suffix)
515 then do
31f41a5d 516 yield txt
91727d11 517 go
518 else yield prefix
519{-# INLINABLE takeWhile #-}
520
31f41a5d 521-- | Drop characters until they fail the predicate
91727d11 522dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
523dropWhile predicate = go where
524 go = do
31f41a5d 525 txt <- await
526 case T.findIndex (not . predicate) txt of
91727d11 527 Nothing -> go
528 Just i -> do
31f41a5d 529 yield (T.drop i txt)
91727d11 530 cat
531{-# INLINABLE dropWhile #-}
532
533-- | Only allows 'Char's to pass if they satisfy the predicate
534filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
535filter predicate = P.map (T.filter predicate)
536{-# INLINABLE filter #-}
537
31f41a5d 538-- | Strict left scan over the characters
91727d11 539scan
540 :: (Monad m)
541 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
11645cdc
GG
542scan step begin = do
543 yield (T.singleton begin)
544 go begin
91727d11 545 where
31f41a5d 546 go c = do
547 txt <- await
548 let txt' = T.scanl step c txt
549 c' = T.last txt'
11645cdc 550 yield (T.tail txt')
31f41a5d 551 go c'
91727d11 552{-# INLINABLE scan #-}
553
554{-| Fold a pure 'Producer' of strict 'Text's into a lazy
555 'TL.Text'
556-}
557toLazy :: Producer Text Identity () -> TL.Text
558toLazy = TL.fromChunks . P.toList
559{-# INLINABLE toLazy #-}
560
561{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
562 'TL.Text'
563
564 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
565 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
566 immediately as they are generated instead of loading them all into memory.
567-}
568toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
569toLazyM = liftM TL.fromChunks . P.toListM
570{-# INLINABLE toLazyM #-}
571
31f41a5d 572-- | Reduce the text stream using a strict left fold over characters
64e03122 573foldChars
91727d11 574 :: Monad m
575 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 576foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 577{-# INLINABLE foldChars #-}
91727d11 578
579-- | Retrieve the first 'Char'
580head :: (Monad m) => Producer Text m () -> m (Maybe Char)
581head = go
582 where
583 go p = do
584 x <- nextChar p
585 case x of
586 Left _ -> return Nothing
31f41a5d 587 Right (c, _) -> return (Just c)
91727d11 588{-# INLINABLE head #-}
589
590-- | Retrieve the last 'Char'
591last :: (Monad m) => Producer Text m () -> m (Maybe Char)
592last = go Nothing
593 where
594 go r p = do
595 x <- next p
596 case x of
597 Left () -> return r
31f41a5d 598 Right (txt, p') ->
599 if (T.null txt)
91727d11 600 then go r p'
31f41a5d 601 else go (Just $ T.last txt) p'
91727d11 602{-# INLINABLE last #-}
603
604-- | Determine if the stream is empty
605null :: (Monad m) => Producer Text m () -> m Bool
606null = P.all T.null
607{-# INLINABLE null #-}
608
62e8521c 609-- | Count the number of characters in the stream
91727d11 610length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 611length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 612{-# INLINABLE length #-}
613
614-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
615any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
616any predicate = P.any (T.any predicate)
617{-# INLINABLE any #-}
618
619-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
620all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
621all predicate = P.all (T.all predicate)
622{-# INLINABLE all #-}
623
62e8521c 624-- | Return the maximum 'Char' within a text stream
91727d11 625maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
626maximum = P.fold step Nothing id
627 where
31f41a5d 628 step mc txt =
629 if (T.null txt)
630 then mc
631 else Just $ case mc of
632 Nothing -> T.maximum txt
633 Just c -> max c (T.maximum txt)
91727d11 634{-# INLINABLE maximum #-}
635
62e8521c 636-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 637minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
638minimum = P.fold step Nothing id
639 where
31f41a5d 640 step mc txt =
641 if (T.null txt)
642 then mc
643 else case mc of
644 Nothing -> Just (T.minimum txt)
645 Just c -> Just (min c (T.minimum txt))
91727d11 646{-# INLINABLE minimum #-}
647
91727d11 648-- | Find the first element in the stream that matches the predicate
649find
650 :: (Monad m)
651 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
652find predicate p = head (p >-> filter predicate)
653{-# INLINABLE find #-}
654
62e8521c 655-- | Index into a text stream
91727d11 656index
657 :: (Monad m, Integral a)
658 => a-> Producer Text m () -> m (Maybe Char)
659index n p = head (p >-> drop n)
660{-# INLINABLE index #-}
661
63ea9ffd 662
31f41a5d 663-- | Store a tally of how many segments match the given 'Text'
664count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
665count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
666{-# INLINABLE count #-}
667
9e9bb0ce 668
1a83ae4e 669-- | Consume the first character from a stream of 'Text'
670--
671-- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
672-- succeeds with a 'Right' providing the next character and the remainder of the
673-- 'Producer'.
9e9bb0ce 674
9e9bb0ce 675nextChar
676 :: (Monad m)
677 => Producer Text m r
678 -> m (Either r (Char, Producer Text m r))
679nextChar = go
680 where
681 go p = do
682 x <- next p
683 case x of
684 Left r -> return (Left r)
685 Right (txt, p') -> case (T.uncons txt) of
686 Nothing -> go p'
687 Just (c, txt') -> return (Right (c, yield txt' >> p'))
688{-# INLINABLE nextChar #-}
689
1a83ae4e 690-- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
691
9e9bb0ce 692drawChar :: (Monad m) => Parser Text m (Maybe Char)
693drawChar = do
694 x <- PP.draw
695 case x of
696 Nothing -> return Nothing
697 Just txt -> case (T.uncons txt) of
698 Nothing -> drawChar
699 Just (c, txt') -> do
700 PP.unDraw txt'
701 return (Just c)
702{-# INLINABLE drawChar #-}
703
704-- | Push back a 'Char' onto the underlying 'Producer'
705unDrawChar :: (Monad m) => Char -> Parser Text m ()
706unDrawChar c = modify (yield (T.singleton c) >>)
707{-# INLINABLE unDrawChar #-}
708
709{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
710 push the 'Char' back
711
712> peekChar = do
713> x <- drawChar
714> case x of
715> Left _ -> return ()
716> Right c -> unDrawChar c
717> return x
1a83ae4e 718
9e9bb0ce 719-}
1a83ae4e 720
9e9bb0ce 721peekChar :: (Monad m) => Parser Text m (Maybe Char)
722peekChar = do
723 x <- drawChar
724 case x of
725 Nothing -> return ()
726 Just c -> unDrawChar c
727 return x
728{-# INLINABLE peekChar #-}
729
730{-| Check if the underlying 'Producer' has no more characters
731
732 Note that this will skip over empty 'Text' chunks, unlike
733 'PP.isEndOfInput' from @pipes-parse@, which would consider
734 an empty 'Text' a valid bit of input.
735
736> isEndOfChars = liftM isLeft peekChar
737-}
738isEndOfChars :: (Monad m) => Parser Text m Bool
739isEndOfChars = do
740 x <- peekChar
741 return (case x of
742 Nothing -> True
743 Just _-> False )
744{-# INLINABLE isEndOfChars #-}
745
746
31f41a5d 747-- | Splits a 'Producer' after the given number of characters
91727d11 748splitAt
749 :: (Monad m, Integral n)
750 => n
57454c33 751 -> Lens' (Producer Text m r)
d199072b 752 (Producer Text m (Producer Text m r))
9e9bb0ce 753splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 754 where
755 go 0 p = return p
756 go n p = do
757 x <- lift (next p)
758 case x of
759 Left r -> return (return r)
31f41a5d 760 Right (txt, p') -> do
761 let len = fromIntegral (T.length txt)
91727d11 762 if (len <= n)
763 then do
31f41a5d 764 yield txt
91727d11 765 go (n - len) p'
766 else do
31f41a5d 767 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 768 yield prefix
769 return (yield suffix >> p')
770{-# INLINABLE splitAt #-}
771
91727d11 772
1a83ae4e 773-- | Split a text stream in two, producing the longest
774-- consecutive group of characters that satisfies the predicate
775-- and returning the rest
776
91727d11 777span
778 :: (Monad m)
779 => (Char -> Bool)
57454c33 780 -> Lens' (Producer Text m r)
d199072b 781 (Producer Text m (Producer Text m r))
9e9bb0ce 782span predicate k p0 = fmap join (k (go p0))
91727d11 783 where
784 go p = do
785 x <- lift (next p)
786 case x of
787 Left r -> return (return r)
31f41a5d 788 Right (txt, p') -> do
789 let (prefix, suffix) = T.span predicate txt
91727d11 790 if (T.null suffix)
791 then do
31f41a5d 792 yield txt
91727d11 793 go p'
794 else do
795 yield prefix
796 return (yield suffix >> p')
797{-# INLINABLE span #-}
798
1a83ae4e 799{-| Split a text stream in two, producing the longest
62e8521c 800 consecutive group of characters that don't satisfy the predicate
91727d11 801-}
802break
803 :: (Monad m)
804 => (Char -> Bool)
57454c33 805 -> Lens' (Producer Text m r)
d199072b 806 (Producer Text m (Producer Text m r))
91727d11 807break predicate = span (not . predicate)
808{-# INLINABLE break #-}
809
9e9bb0ce 810{-| Improper lens that splits after the first group of equivalent Chars, as
811 defined by the given equivalence relation
812-}
813groupBy
814 :: (Monad m)
815 => (Char -> Char -> Bool)
57454c33 816 -> Lens' (Producer Text m r)
d199072b 817 (Producer Text m (Producer Text m r))
9e9bb0ce 818groupBy equals k p0 = fmap join (k ((go p0))) where
819 go p = do
820 x <- lift (next p)
821 case x of
822 Left r -> return (return r)
823 Right (txt, p') -> case T.uncons txt of
824 Nothing -> go p'
825 Just (c, _) -> (yield txt >> p') ^. span (equals c)
826{-# INLINABLE groupBy #-}
827
828-- | Improper lens that splits after the first succession of identical 'Char' s
829group :: Monad m
57454c33 830 => Lens' (Producer Text m r)
9e9bb0ce 831 (Producer Text m (Producer Text m r))
832group = groupBy (==)
833{-# INLINABLE group #-}
834
835{-| Improper lens that splits a 'Producer' after the first word
836
837 Unlike 'words', this does not drop leading whitespace
838-}
839word :: (Monad m)
57454c33 840 => Lens' (Producer Text m r)
d199072b 841 (Producer Text m (Producer Text m r))
9e9bb0ce 842word k p0 = fmap join (k (to p0))
843 where
844 to p = do
845 p' <- p^.span isSpace
846 p'^.break isSpace
847{-# INLINABLE word #-}
848
849
850line :: (Monad m)
57454c33 851 => Lens' (Producer Text m r)
d199072b 852 (Producer Text m (Producer Text m r))
9e9bb0ce 853line = break (== '\n')
854
855{-# INLINABLE line #-}
856
857
858-- | Intersperse a 'Char' in between the characters of stream of 'Text'
859intersperse
860 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
861intersperse c = go0
862 where
863 go0 p = do
864 x <- lift (next p)
865 case x of
866 Left r -> return r
867 Right (txt, p') -> do
868 yield (T.intersperse c txt)
869 go1 p'
870 go1 p = do
871 x <- lift (next p)
872 case x of
873 Left r -> return r
874 Right (txt, p') -> do
875 yield (T.singleton c)
876 yield (T.intersperse c txt)
877 go1 p'
878{-# INLINABLE intersperse #-}
879
880
9e9bb0ce 881-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
79917d53 882packChars :: Monad m => Iso'_ (Producer Char m x) (Producer Text m x)
9e9bb0ce 883packChars = Data.Profunctor.dimap to (fmap from)
884 where
885 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 886 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 887
888 step diffAs c = diffAs . (c:)
889
890 done diffAs = T.pack (diffAs [])
891
892 -- from :: Monad m => Producer Text m x -> Producer Char m x
893 from p = for p (each . T.unpack)
894{-# INLINABLE packChars #-}
895
79917d53 896defaultChunkSize :: Int
897defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1)
0f8c6f1b 898
899-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
900chunksOf
901 :: (Monad m, Integral n)
57454c33 902 => n -> Lens' (Producer Text m r)
d199072b 903 (FreeT (Producer Text m) m r)
0f8c6f1b 904chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
905 where
906 go p = do
907 x <- next p
908 return $ case x of
7ed76745 909 Left r -> Pure r
910 Right (txt, p') -> Free $ do
0f8c6f1b 911 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 912 return $ FreeT (go p'')
0f8c6f1b 913{-# INLINABLE chunksOf #-}
914
915
62e8521c 916{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 917 predicate
918-}
1677dc12 919splitsWith
91727d11 920 :: (Monad m)
921 => (Char -> Bool)
922 -> Producer Text m r
7ed76745 923 -> FreeT (Producer Text m) m r
924splitsWith predicate p0 = FreeT (go0 p0)
91727d11 925 where
926 go0 p = do
927 x <- next p
928 case x of
7ed76745 929 Left r -> return (Pure r)
31f41a5d 930 Right (txt, p') ->
931 if (T.null txt)
91727d11 932 then go0 p'
7ed76745 933 else return $ Free $ do
9e9bb0ce 934 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 935 return $ FreeT (go1 p'')
91727d11 936 go1 p = do
937 x <- nextChar p
938 return $ case x of
7ed76745 939 Left r -> Pure r
940 Right (_, p') -> Free $ do
9e9bb0ce 941 p'' <- p' ^. span (not . predicate)
7ed76745 942 return $ FreeT (go1 p'')
1677dc12 943{-# INLINABLE splitsWith #-}
91727d11 944
31f41a5d 945-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 946splits :: (Monad m)
d199072b 947 => Char
57454c33 948 -> Lens' (Producer Text m r)
d199072b 949 (FreeT (Producer Text m) m r)
0f8c6f1b 950splits c k p =
7ed76745 951 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 952{-# INLINABLE splits #-}
953
954{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
955 given equivalence relation
956-}
957groupsBy
958 :: Monad m
959 => (Char -> Char -> Bool)
57454c33 960 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
7ed76745 961groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 962 go p = do x <- next p
7ed76745 963 case x of Left r -> return (Pure r)
0f8c6f1b 964 Right (bs, p') -> case T.uncons bs of
965 Nothing -> go p'
7ed76745 966 Just (c, _) -> do return $ Free $ do
0f8c6f1b 967 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 968 return $ FreeT (go p'')
0f8c6f1b 969{-# INLINABLE groupsBy #-}
970
971
972-- | Like 'groupsBy', where the equality predicate is ('==')
973groups
974 :: Monad m
57454c33 975 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
0f8c6f1b 976groups = groupsBy (==)
977{-# INLINABLE groups #-}
978
91727d11 979
91727d11 980
62e8521c 981{-| Split a text stream into 'FreeT'-delimited lines
91727d11 982-}
983lines
d199072b 984 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
985lines = Data.Profunctor.dimap _lines (fmap _unlines)
986 where
987 _lines p0 = FreeT (go0 p0)
0f8c6f1b 988 where
989 go0 p = do
990 x <- next p
991 case x of
7ed76745 992 Left r -> return (Pure r)
0f8c6f1b 993 Right (txt, p') ->
994 if (T.null txt)
995 then go0 p'
7ed76745 996 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 997 go1 p = do
998 p' <- p ^. break ('\n' ==)
7ed76745 999 return $ FreeT $ do
0f8c6f1b 1000 x <- nextChar p'
1001 case x of
7ed76745 1002 Left r -> return $ Pure r
0f8c6f1b 1003 Right (_, p'') -> go0 p''
d199072b 1004 -- _unlines
1005 -- :: Monad m
1006 -- => FreeT (Producer Text m) m x -> Producer Text m x
1007 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
0f8c6f1b 1008
d199072b 1009{-# INLINABLE lines #-}
91727d11 1010
31f41a5d 1011
31f41a5d 1012-- | Split a text stream into 'FreeT'-delimited words
91727d11 1013words
79917d53 1014 :: (Monad m) => Iso'_ (Producer Text m r) (FreeT (Producer Text m) m r)
0f8c6f1b 1015words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 1016 where
7ed76745 1017 go p = FreeT $ do
cf10d6f1 1018 x <- next (p >-> dropWhile isSpace)
1019 return $ case x of
7ed76745 1020 Left r -> Pure r
1021 Right (bs, p') -> Free $ do
9e9bb0ce 1022 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 1023 return (go p'')
7ed76745 1024 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 1025
91727d11 1026{-# INLINABLE words #-}
1027
cf10d6f1 1028
31f41a5d 1029{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1030 interspersing a text stream in between them
91727d11 1031-}
1032intercalate
1033 :: (Monad m)
1034 => Producer Text m ()
1035 -> FreeT (Producer Text m) m r
1036 -> Producer Text m r
1037intercalate p0 = go0
1038 where
1039 go0 f = do
7ed76745 1040 x <- lift (runFreeT f)
91727d11 1041 case x of
7ed76745 1042 Pure r -> return r
1043 Free p -> do
91727d11 1044 f' <- p
1045 go1 f'
1046 go1 f = do
7ed76745 1047 x <- lift (runFreeT f)
91727d11 1048 case x of
7ed76745 1049 Pure r -> return r
1050 Free p -> do
91727d11 1051 p0
1052 f' <- p
1053 go1 f'
1054{-# INLINABLE intercalate #-}
1055
62e8521c 1056{-| Join 'FreeT'-delimited lines into a text stream
91727d11 1057-}
1058unlines
d199072b 1059 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1060unlines = go
1061 where
1062 go f = do
1063 x <- lift (runFreeT f)
1064 case x of
1065 Pure r -> return r
1066 Free p -> do
1067 f' <- p
1068 yield $ T.singleton '\n'
1069 go f'
91727d11 1070{-# INLINABLE unlines #-}
1071
31f41a5d 1072{-| Join 'FreeT'-delimited words into a text stream
91727d11 1073-}
1074unwords
1075 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 1076unwords = intercalate (yield $ T.singleton ' ')
91727d11 1077{-# INLINABLE unwords #-}
1078
91727d11 1079
91727d11 1080{- $reexports
91727d11 1081
1082 @Data.Text@ re-exports the 'Text' type.
1083
0f8c6f1b 1084 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 1085-}
1086
bbdfd305 1087
57454c33 1088type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)