]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
more documentation
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
9667f797
GG
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
74377aa0 3{-| This package provides @pipes@ utilities for \'text streams\', which are
0ac0c414 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, and you
5 will generally want @Data.Text@ in scope. But the type @Producer Text m r@ is
74377aa0 6 in some ways the pipes equivalent of the lazy @Text@ type.
0ac0c414 7
8 This module provides many functions equivalent in one way or another to
9 the 'pure' functions in
10 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
74377aa0 11 They transform, divide, group and fold text streams. Though @Producer Text m r@
12 is \'effectful\' Text, functions
0ac0c414 13 in this module are \'pure\' in the sense that they are uniformly monad-independent.
74377aa0 14 Simple IO operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
15 operations are in @Data.Text.Lazy.IO@. Interoperation with @ByteString@
16 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
17
18 The Text type exported by @Data.Text.Lazy@ is basically '[Text]'. The implementation
19 is arranged so that the individual strict 'Text' chunks are kept to a reasonable size;
20 the user is not aware of the divisions between the connected 'Text' chunks.
21 So also here: the functions in this module are designed to operate on streams that
0ac0c414 22 are insensitive to text boundaries. This means that they may freely split
74377aa0 23 text into smaller texts and /discard empty texts/. However, the objective is
24 that they should /never concatenate texts/ in order to provide strict upper
25 bounds on memory usage.
26
0ac0c414 27 For example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 28 might write:
91727d11 29
30> import Pipes
31f41a5d 31> import qualified Pipes.Text as Text
74377aa0 32> import qualified Pipes.Text.IO as Text
33> import Pipes.Group
34> import Lens.Family
35>
31f41a5d 36> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
74377aa0 37> where
38> takeLines n = Text.unlines . takes' n . view Text.lines
39> -- or equivalently:
40> -- takeLines n = over Text.lines (takes' n)
91727d11 41
31f41a5d 42 The above program will never bring more than one chunk of text (~ 32 KB) into
74377aa0 43 memory, no matter how long the lines are.
44
45 As this example shows, one superficial difference from @Data.Text.Lazy@
46 is that many of the operations, like 'lines',
47 are \'lensified\'; this has a number of advantages where it is possible, in particular
e7ad3643 48 it facilitates their use with 'Parser's of Text (in the general
49 <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
50 sense.)
51 Each such expression, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
52 intuitively corresponding function when used with @view@ or @(^.)@.
74377aa0 53
54 A more important difference the example reveals is in the types closely associated with
55 the central type, @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@
56 we find functions like
57
58> splitAt :: Int -> Text -> (Text, Text)
59> lines :: Int -> Text -> [Text]
e7ad3643 60> chunksOf :: Int -> Text -> [Text]
74377aa0 61
62 which relate a Text with a pair or list of Texts. The corresponding functions here (taking
63 account of \'lensification\') are
64
65> view . splitAt :: (Monad m, Integral n)
66> => n -> Producer Text m r -> Producer Text.Text m (Producer Text.Text m r)
67> view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
e7ad3643 68> view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
74377aa0 69
e7ad3643 70 In the type @Producer Text m (Producer Text m r)@ the second
74377aa0 71 element of the \'pair\' of of \'effectful Texts\' cannot simply be retrieved
72 with 'snd'. This is an \'effectful\' pair, and one must work through the effects
73 of the first element to arrive at the second. Similarly in @FreeT (Producer Text m) m r@,
74 which corresponds with @[Text]@, on cannot simply drop 10 Producers and take the others;
75 we can only get to the ones we want to take by working through their predecessors.
76
77 Some of the types may be more readable if you imagine that we have introduced
78 our own type synonyms
79
80> type Text m r = Producer T.Text m r
81> type Texts m r = FreeT (Producer T.Text m) m r
82
83 Then we would think of the types above as
84
85> view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
86> view lines :: (Monad m) => Text m r -> Texts m r
e7ad3643 87> view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
74377aa0 88
89 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
90
91727d11 91-}
92
7faef8bc 93module Pipes.Text (
91727d11 94 -- * Producers
1a83ae4e 95 fromLazy
91727d11 96
97 -- * Pipes
1677dc12 98 , map
99 , concatMap
100 , take
101 , drop
102 , takeWhile
103 , dropWhile
104 , filter
105 , scan
1677dc12 106 , pack
107 , unpack
108 , toCaseFold
109 , toLower
110 , toUpper
111 , stripStart
91727d11 112
113 -- * Folds
1677dc12 114 , toLazy
115 , toLazyM
116 , foldChars
117 , head
118 , last
119 , null
120 , length
121 , any
122 , all
123 , maximum
124 , minimum
125 , find
126 , index
127 , count
128
129 -- * Primitive Character Parsers
1677dc12 130 , nextChar
131 , drawChar
132 , unDrawChar
133 , peekChar
9e9bb0ce 134 , isEndOfChars
1677dc12 135
136 -- * Parsing Lenses
9e9bb0ce 137 , splitAt
1677dc12 138 , span
139 , break
140 , groupBy
141 , group
9e9bb0ce 142 , word
143 , line
1677dc12 144
145 -- * FreeT Splitters
146 , chunksOf
147 , splitsWith
0f8c6f1b 148 , splits
1a83ae4e 149 , groupsBy
150 , groups
1677dc12 151 , lines
152 , words
153
91727d11 154 -- * Transformations
1677dc12 155 , intersperse
9e9bb0ce 156 , packChars
31f41a5d 157
91727d11 158 -- * Joiners
1677dc12 159 , intercalate
160 , unlines
161 , unwords
9e9bb0ce 162
1a83ae4e 163 -- * Re-exports
91727d11 164 -- $reexports
1677dc12 165 , module Data.ByteString
166 , module Data.Text
167 , module Data.Profunctor
1677dc12 168 , module Pipes.Parse
7ed76745 169 , module Pipes.Group
91727d11 170 ) where
171
0f8c6f1b 172import Control.Applicative ((<*))
70125641 173import Control.Monad (liftM, join)
9e9bb0ce 174import Control.Monad.Trans.State.Strict (StateT(..), modify)
91727d11 175import qualified Data.Text as T
91727d11 176import Data.Text (Text)
177import qualified Data.Text.Lazy as TL
91727d11 178import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
31f41a5d 179import Data.ByteString (ByteString)
1677dc12 180import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 181import Data.Functor.Identity (Identity)
1677dc12 182import Data.Profunctor (Profunctor)
183import qualified Data.Profunctor
91727d11 184import Pipes
7fc48f7c 185import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 186import qualified Pipes.Group as PG
91727d11 187import qualified Pipes.Parse as PP
7ed76745 188import Pipes.Parse (Parser)
91727d11 189import qualified Pipes.Prelude as P
91727d11 190import Data.Char (isSpace)
1a83ae4e 191import Data.Word (Word8)
1677dc12 192
91727d11 193import Prelude hiding (
194 all,
195 any,
196 break,
197 concat,
198 concatMap,
199 drop,
200 dropWhile,
201 elem,
202 filter,
203 head,
204 last,
205 lines,
206 length,
207 map,
208 maximum,
209 minimum,
210 notElem,
211 null,
212 readFile,
213 span,
214 splitAt,
215 take,
216 takeWhile,
217 unlines,
218 unwords,
219 words,
220 writeFile )
221
222-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
223fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
224fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 225{-# INLINE fromLazy #-}
91727d11 226
1677dc12 227
228type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
229
230type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
231
232(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
233a ^. lens = getConstant (lens Constant a)
234
235
91727d11 236-- | Apply a transformation to each 'Char' in the stream
237map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
238map f = P.map (T.map f)
239{-# INLINABLE map #-}
240
ff38b9f0 241{-# RULES "p >-> map f" forall p f .
242 p >-> map f = for p (\txt -> yield (T.map f txt))
243 #-}
244
31f41a5d 245-- | Map a function over the characters of a text stream and concatenate the results
91727d11 246concatMap
247 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
248concatMap f = P.map (T.concatMap f)
249{-# INLINABLE concatMap #-}
250
ff38b9f0 251{-# RULES "p >-> concatMap f" forall p f .
252 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
253 #-}
7faef8bc 254
ff38b9f0 255
c0343bc9 256-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 257pack :: Monad m => Pipe String Text m r
258pack = P.map T.pack
259{-# INLINEABLE pack #-}
260
ff38b9f0 261{-# RULES "p >-> pack" forall p .
262 p >-> pack = for p (\txt -> yield (T.pack txt))
263 #-}
264
265-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 266unpack :: Monad m => Pipe Text String m r
d4732515 267unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 268{-# INLINEABLE unpack #-}
269
ff38b9f0 270{-# RULES "p >-> unpack" forall p .
271 p >-> unpack = for p (\txt -> yield (T.unpack txt))
272 #-}
d4732515 273
b0d86a59 274-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
275-- here acting as 'Text' pipes, rather as they would on a lazy text
7faef8bc 276toCaseFold :: Monad m => Pipe Text Text m ()
277toCaseFold = P.map T.toCaseFold
278{-# INLINEABLE toCaseFold #-}
279
ff38b9f0 280{-# RULES "p >-> toCaseFold" forall p .
281 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
282 #-}
283
284
c0343bc9 285-- | lowercase incoming 'Text'
7faef8bc 286toLower :: Monad m => Pipe Text Text m ()
287toLower = P.map T.toLower
288{-# INLINEABLE toLower #-}
289
ff38b9f0 290{-# RULES "p >-> toLower" forall p .
291 p >-> toLower = for p (\txt -> yield (T.toLower txt))
292 #-}
293
c0343bc9 294-- | uppercase incoming 'Text'
7faef8bc 295toUpper :: Monad m => Pipe Text Text m ()
296toUpper = P.map T.toUpper
297{-# INLINEABLE toUpper #-}
298
ff38b9f0 299{-# RULES "p >-> toUpper" forall p .
300 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
301 #-}
302
c0343bc9 303-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 304stripStart :: Monad m => Pipe Text Text m r
305stripStart = do
306 chunk <- await
307 let text = T.stripStart chunk
308 if T.null text
309 then stripStart
b0d86a59 310 else do yield text
311 cat
7faef8bc 312{-# INLINEABLE stripStart #-}
313
31f41a5d 314-- | @(take n)@ only allows @n@ individual characters to pass;
315-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 316take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
317take n0 = go n0 where
318 go n
319 | n <= 0 = return ()
320 | otherwise = do
31f41a5d 321 txt <- await
322 let len = fromIntegral (T.length txt)
91727d11 323 if (len > n)
31f41a5d 324 then yield (T.take (fromIntegral n) txt)
91727d11 325 else do
31f41a5d 326 yield txt
91727d11 327 go (n - len)
328{-# INLINABLE take #-}
329
31f41a5d 330-- | @(drop n)@ drops the first @n@ characters
91727d11 331drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
332drop n0 = go n0 where
333 go n
334 | n <= 0 = cat
335 | otherwise = do
31f41a5d 336 txt <- await
337 let len = fromIntegral (T.length txt)
91727d11 338 if (len >= n)
339 then do
31f41a5d 340 yield (T.drop (fromIntegral n) txt)
91727d11 341 cat
342 else go (n - len)
343{-# INLINABLE drop #-}
344
31f41a5d 345-- | Take characters until they fail the predicate
91727d11 346takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
347takeWhile predicate = go
348 where
349 go = do
31f41a5d 350 txt <- await
351 let (prefix, suffix) = T.span predicate txt
91727d11 352 if (T.null suffix)
353 then do
31f41a5d 354 yield txt
91727d11 355 go
356 else yield prefix
357{-# INLINABLE takeWhile #-}
358
31f41a5d 359-- | Drop characters until they fail the predicate
91727d11 360dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
361dropWhile predicate = go where
362 go = do
31f41a5d 363 txt <- await
364 case T.findIndex (not . predicate) txt of
91727d11 365 Nothing -> go
366 Just i -> do
31f41a5d 367 yield (T.drop i txt)
91727d11 368 cat
369{-# INLINABLE dropWhile #-}
370
371-- | Only allows 'Char's to pass if they satisfy the predicate
372filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
373filter predicate = P.map (T.filter predicate)
374{-# INLINABLE filter #-}
375
ff38b9f0 376{-# RULES "p >-> filter q" forall p q .
377 p >-> filter q = for p (\txt -> yield (T.filter q txt))
378 #-}
379
31f41a5d 380-- | Strict left scan over the characters
91727d11 381scan
382 :: (Monad m)
383 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
11645cdc
GG
384scan step begin = do
385 yield (T.singleton begin)
386 go begin
91727d11 387 where
31f41a5d 388 go c = do
389 txt <- await
390 let txt' = T.scanl step c txt
391 c' = T.last txt'
11645cdc 392 yield (T.tail txt')
31f41a5d 393 go c'
91727d11 394{-# INLINABLE scan #-}
395
396{-| Fold a pure 'Producer' of strict 'Text's into a lazy
397 'TL.Text'
398-}
399toLazy :: Producer Text Identity () -> TL.Text
400toLazy = TL.fromChunks . P.toList
401{-# INLINABLE toLazy #-}
402
403{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
404 'TL.Text'
405
406 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
407 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
408 immediately as they are generated instead of loading them all into memory.
409-}
410toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
411toLazyM = liftM TL.fromChunks . P.toListM
412{-# INLINABLE toLazyM #-}
413
31f41a5d 414-- | Reduce the text stream using a strict left fold over characters
64e03122 415foldChars
91727d11 416 :: Monad m
417 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 418foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 419{-# INLINABLE foldChars #-}
91727d11 420
421-- | Retrieve the first 'Char'
422head :: (Monad m) => Producer Text m () -> m (Maybe Char)
423head = go
424 where
425 go p = do
426 x <- nextChar p
427 case x of
428 Left _ -> return Nothing
31f41a5d 429 Right (c, _) -> return (Just c)
91727d11 430{-# INLINABLE head #-}
431
432-- | Retrieve the last 'Char'
433last :: (Monad m) => Producer Text m () -> m (Maybe Char)
434last = go Nothing
435 where
436 go r p = do
437 x <- next p
438 case x of
439 Left () -> return r
31f41a5d 440 Right (txt, p') ->
441 if (T.null txt)
91727d11 442 then go r p'
31f41a5d 443 else go (Just $ T.last txt) p'
91727d11 444{-# INLINABLE last #-}
445
446-- | Determine if the stream is empty
447null :: (Monad m) => Producer Text m () -> m Bool
448null = P.all T.null
449{-# INLINABLE null #-}
450
62e8521c 451-- | Count the number of characters in the stream
91727d11 452length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 453length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 454{-# INLINABLE length #-}
455
456-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
457any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
458any predicate = P.any (T.any predicate)
459{-# INLINABLE any #-}
460
461-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
462all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
463all predicate = P.all (T.all predicate)
464{-# INLINABLE all #-}
465
62e8521c 466-- | Return the maximum 'Char' within a text stream
91727d11 467maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
468maximum = P.fold step Nothing id
469 where
31f41a5d 470 step mc txt =
471 if (T.null txt)
472 then mc
473 else Just $ case mc of
474 Nothing -> T.maximum txt
475 Just c -> max c (T.maximum txt)
91727d11 476{-# INLINABLE maximum #-}
477
62e8521c 478-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 479minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
480minimum = P.fold step Nothing id
481 where
31f41a5d 482 step mc txt =
483 if (T.null txt)
484 then mc
485 else case mc of
486 Nothing -> Just (T.minimum txt)
487 Just c -> Just (min c (T.minimum txt))
91727d11 488{-# INLINABLE minimum #-}
489
91727d11 490-- | Find the first element in the stream that matches the predicate
491find
492 :: (Monad m)
493 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
494find predicate p = head (p >-> filter predicate)
495{-# INLINABLE find #-}
496
62e8521c 497-- | Index into a text stream
91727d11 498index
499 :: (Monad m, Integral a)
500 => a-> Producer Text m () -> m (Maybe Char)
501index n p = head (p >-> drop n)
502{-# INLINABLE index #-}
503
63ea9ffd 504
31f41a5d 505-- | Store a tally of how many segments match the given 'Text'
506count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
507count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
508{-# INLINABLE count #-}
509
9e9bb0ce 510
1a83ae4e 511-- | Consume the first character from a stream of 'Text'
512--
513-- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
514-- succeeds with a 'Right' providing the next character and the remainder of the
515-- 'Producer'.
9e9bb0ce 516
9e9bb0ce 517nextChar
518 :: (Monad m)
519 => Producer Text m r
520 -> m (Either r (Char, Producer Text m r))
521nextChar = go
522 where
523 go p = do
524 x <- next p
525 case x of
526 Left r -> return (Left r)
527 Right (txt, p') -> case (T.uncons txt) of
528 Nothing -> go p'
529 Just (c, txt') -> return (Right (c, yield txt' >> p'))
530{-# INLINABLE nextChar #-}
531
1a83ae4e 532-- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
533
9e9bb0ce 534drawChar :: (Monad m) => Parser Text m (Maybe Char)
535drawChar = do
536 x <- PP.draw
537 case x of
538 Nothing -> return Nothing
539 Just txt -> case (T.uncons txt) of
540 Nothing -> drawChar
541 Just (c, txt') -> do
542 PP.unDraw txt'
543 return (Just c)
544{-# INLINABLE drawChar #-}
545
546-- | Push back a 'Char' onto the underlying 'Producer'
547unDrawChar :: (Monad m) => Char -> Parser Text m ()
548unDrawChar c = modify (yield (T.singleton c) >>)
549{-# INLINABLE unDrawChar #-}
550
551{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
552 push the 'Char' back
553
554> peekChar = do
555> x <- drawChar
556> case x of
557> Left _ -> return ()
558> Right c -> unDrawChar c
559> return x
1a83ae4e 560
9e9bb0ce 561-}
1a83ae4e 562
9e9bb0ce 563peekChar :: (Monad m) => Parser Text m (Maybe Char)
564peekChar = do
565 x <- drawChar
566 case x of
567 Nothing -> return ()
568 Just c -> unDrawChar c
569 return x
570{-# INLINABLE peekChar #-}
571
572{-| Check if the underlying 'Producer' has no more characters
573
574 Note that this will skip over empty 'Text' chunks, unlike
575 'PP.isEndOfInput' from @pipes-parse@, which would consider
576 an empty 'Text' a valid bit of input.
577
578> isEndOfChars = liftM isLeft peekChar
579-}
580isEndOfChars :: (Monad m) => Parser Text m Bool
581isEndOfChars = do
582 x <- peekChar
583 return (case x of
584 Nothing -> True
585 Just _-> False )
586{-# INLINABLE isEndOfChars #-}
587
588
31f41a5d 589-- | Splits a 'Producer' after the given number of characters
91727d11 590splitAt
591 :: (Monad m, Integral n)
592 => n
9e9bb0ce 593 -> Lens' (Producer Text m r)
594 (Producer Text m (Producer Text m r))
595splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 596 where
597 go 0 p = return p
598 go n p = do
599 x <- lift (next p)
600 case x of
601 Left r -> return (return r)
31f41a5d 602 Right (txt, p') -> do
603 let len = fromIntegral (T.length txt)
91727d11 604 if (len <= n)
605 then do
31f41a5d 606 yield txt
91727d11 607 go (n - len) p'
608 else do
31f41a5d 609 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 610 yield prefix
611 return (yield suffix >> p')
612{-# INLINABLE splitAt #-}
613
91727d11 614
1a83ae4e 615-- | Split a text stream in two, producing the longest
616-- consecutive group of characters that satisfies the predicate
617-- and returning the rest
618
91727d11 619span
620 :: (Monad m)
621 => (Char -> Bool)
9e9bb0ce 622 -> Lens' (Producer Text m r)
623 (Producer Text m (Producer Text m r))
624span predicate k p0 = fmap join (k (go p0))
91727d11 625 where
626 go p = do
627 x <- lift (next p)
628 case x of
629 Left r -> return (return r)
31f41a5d 630 Right (txt, p') -> do
631 let (prefix, suffix) = T.span predicate txt
91727d11 632 if (T.null suffix)
633 then do
31f41a5d 634 yield txt
91727d11 635 go p'
636 else do
637 yield prefix
638 return (yield suffix >> p')
639{-# INLINABLE span #-}
640
1a83ae4e 641{-| Split a text stream in two, producing the longest
62e8521c 642 consecutive group of characters that don't satisfy the predicate
91727d11 643-}
644break
645 :: (Monad m)
646 => (Char -> Bool)
9e9bb0ce 647 -> Lens' (Producer Text m r)
648 (Producer Text m (Producer Text m r))
91727d11 649break predicate = span (not . predicate)
650{-# INLINABLE break #-}
651
9e9bb0ce 652{-| Improper lens that splits after the first group of equivalent Chars, as
653 defined by the given equivalence relation
654-}
655groupBy
656 :: (Monad m)
657 => (Char -> Char -> Bool)
658 -> Lens' (Producer Text m r)
659 (Producer Text m (Producer Text m r))
660groupBy equals k p0 = fmap join (k ((go p0))) where
661 go p = do
662 x <- lift (next p)
663 case x of
664 Left r -> return (return r)
665 Right (txt, p') -> case T.uncons txt of
666 Nothing -> go p'
667 Just (c, _) -> (yield txt >> p') ^. span (equals c)
668{-# INLINABLE groupBy #-}
669
670-- | Improper lens that splits after the first succession of identical 'Char' s
671group :: Monad m
672 => Lens' (Producer Text m r)
673 (Producer Text m (Producer Text m r))
674group = groupBy (==)
675{-# INLINABLE group #-}
676
677{-| Improper lens that splits a 'Producer' after the first word
678
679 Unlike 'words', this does not drop leading whitespace
680-}
681word :: (Monad m)
682 => Lens' (Producer Text m r)
683 (Producer Text m (Producer Text m r))
684word k p0 = fmap join (k (to p0))
685 where
686 to p = do
687 p' <- p^.span isSpace
688 p'^.break isSpace
689{-# INLINABLE word #-}
690
691
692line :: (Monad m)
693 => Lens' (Producer Text m r)
694 (Producer Text m (Producer Text m r))
695line = break (== '\n')
696
697{-# INLINABLE line #-}
698
699
700-- | Intersperse a 'Char' in between the characters of stream of 'Text'
701intersperse
702 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
703intersperse c = go0
704 where
705 go0 p = do
706 x <- lift (next p)
707 case x of
708 Left r -> return r
709 Right (txt, p') -> do
710 yield (T.intersperse c txt)
711 go1 p'
712 go1 p = do
713 x <- lift (next p)
714 case x of
715 Left r -> return r
716 Right (txt, p') -> do
717 yield (T.singleton c)
718 yield (T.intersperse c txt)
719 go1 p'
720{-# INLINABLE intersperse #-}
721
722
723
724-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
725packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
726packChars = Data.Profunctor.dimap to (fmap from)
727 where
728 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 729 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 730
731 step diffAs c = diffAs . (c:)
732
733 done diffAs = T.pack (diffAs [])
734
735 -- from :: Monad m => Producer Text m x -> Producer Char m x
736 from p = for p (each . T.unpack)
737{-# INLINABLE packChars #-}
738
0f8c6f1b 739
740-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
741chunksOf
742 :: (Monad m, Integral n)
743 => n -> Lens' (Producer Text m r)
744 (FreeT (Producer Text m) m r)
745chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
746 where
747 go p = do
748 x <- next p
749 return $ case x of
7ed76745 750 Left r -> Pure r
751 Right (txt, p') -> Free $ do
0f8c6f1b 752 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 753 return $ FreeT (go p'')
0f8c6f1b 754{-# INLINABLE chunksOf #-}
755
756
62e8521c 757{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 758 predicate
759-}
1677dc12 760splitsWith
91727d11 761 :: (Monad m)
762 => (Char -> Bool)
763 -> Producer Text m r
7ed76745 764 -> FreeT (Producer Text m) m r
765splitsWith predicate p0 = FreeT (go0 p0)
91727d11 766 where
767 go0 p = do
768 x <- next p
769 case x of
7ed76745 770 Left r -> return (Pure r)
31f41a5d 771 Right (txt, p') ->
772 if (T.null txt)
91727d11 773 then go0 p'
7ed76745 774 else return $ Free $ do
9e9bb0ce 775 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 776 return $ FreeT (go1 p'')
91727d11 777 go1 p = do
778 x <- nextChar p
779 return $ case x of
7ed76745 780 Left r -> Pure r
781 Right (_, p') -> Free $ do
9e9bb0ce 782 p'' <- p' ^. span (not . predicate)
7ed76745 783 return $ FreeT (go1 p'')
1677dc12 784{-# INLINABLE splitsWith #-}
91727d11 785
31f41a5d 786-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 787splits :: (Monad m)
91727d11 788 => Char
0f8c6f1b 789 -> Lens' (Producer Text m r)
790 (FreeT (Producer Text m) m r)
791splits c k p =
7ed76745 792 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 793{-# INLINABLE splits #-}
794
795{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
796 given equivalence relation
797-}
798groupsBy
799 :: Monad m
800 => (Char -> Char -> Bool)
801 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
7ed76745 802groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 803 go p = do x <- next p
7ed76745 804 case x of Left r -> return (Pure r)
0f8c6f1b 805 Right (bs, p') -> case T.uncons bs of
806 Nothing -> go p'
7ed76745 807 Just (c, _) -> do return $ Free $ do
0f8c6f1b 808 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 809 return $ FreeT (go p'')
0f8c6f1b 810{-# INLINABLE groupsBy #-}
811
812
813-- | Like 'groupsBy', where the equality predicate is ('==')
814groups
815 :: Monad m
816 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
817groups = groupsBy (==)
818{-# INLINABLE groups #-}
819
91727d11 820
91727d11 821
62e8521c 822{-| Split a text stream into 'FreeT'-delimited lines
91727d11 823-}
824lines
0f8c6f1b 825 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
826lines = Data.Profunctor.dimap _lines (fmap _unlines)
91727d11 827 where
7ed76745 828 _lines p0 = FreeT (go0 p0)
0f8c6f1b 829 where
830 go0 p = do
831 x <- next p
832 case x of
7ed76745 833 Left r -> return (Pure r)
0f8c6f1b 834 Right (txt, p') ->
835 if (T.null txt)
836 then go0 p'
7ed76745 837 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 838 go1 p = do
839 p' <- p ^. break ('\n' ==)
7ed76745 840 return $ FreeT $ do
0f8c6f1b 841 x <- nextChar p'
842 case x of
7ed76745 843 Left r -> return $ Pure r
0f8c6f1b 844 Right (_, p'') -> go0 p''
845 -- _unlines
846 -- :: Monad m
847 -- => FreeT (Producer Text m) m x -> Producer Text m x
7fc48f7c 848 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
849
0f8c6f1b 850
91727d11 851{-# INLINABLE lines #-}
91727d11 852
31f41a5d 853
31f41a5d 854-- | Split a text stream into 'FreeT'-delimited words
91727d11 855words
0f8c6f1b 856 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
857words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 858 where
7ed76745 859 go p = FreeT $ do
cf10d6f1 860 x <- next (p >-> dropWhile isSpace)
861 return $ case x of
7ed76745 862 Left r -> Pure r
863 Right (bs, p') -> Free $ do
9e9bb0ce 864 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 865 return (go p'')
7ed76745 866 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 867
91727d11 868{-# INLINABLE words #-}
869
cf10d6f1 870
31f41a5d 871{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
872 interspersing a text stream in between them
91727d11 873-}
874intercalate
875 :: (Monad m)
876 => Producer Text m ()
877 -> FreeT (Producer Text m) m r
878 -> Producer Text m r
879intercalate p0 = go0
880 where
881 go0 f = do
7ed76745 882 x <- lift (runFreeT f)
91727d11 883 case x of
7ed76745 884 Pure r -> return r
885 Free p -> do
91727d11 886 f' <- p
887 go1 f'
888 go1 f = do
7ed76745 889 x <- lift (runFreeT f)
91727d11 890 case x of
7ed76745 891 Pure r -> return r
892 Free p -> do
91727d11 893 p0
894 f' <- p
895 go1 f'
896{-# INLINABLE intercalate #-}
897
62e8521c 898{-| Join 'FreeT'-delimited lines into a text stream
91727d11 899-}
900unlines
901 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
902unlines = go
903 where
904 go f = do
7ed76745 905 x <- lift (runFreeT f)
91727d11 906 case x of
7ed76745 907 Pure r -> return r
908 Free p -> do
91727d11 909 f' <- p
910 yield $ T.singleton '\n'
911 go f'
912{-# INLINABLE unlines #-}
913
31f41a5d 914{-| Join 'FreeT'-delimited words into a text stream
91727d11 915-}
916unwords
917 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 918unwords = intercalate (yield $ T.singleton ' ')
91727d11 919{-# INLINABLE unwords #-}
920
91727d11 921
91727d11 922{- $reexports
91727d11 923
924 @Data.Text@ re-exports the 'Text' type.
925
0f8c6f1b 926 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 927-}
928
bbdfd305 929