]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
finished somewhat wordy documentation for Pipes.Text
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
9667f797
GG
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
91727d11 3
7faef8bc 4module Pipes.Text (
e4b6dc67 5 -- * Introduction
6 -- $intro
7
91727d11 8 -- * Producers
1a83ae4e 9 fromLazy
91727d11 10
11 -- * Pipes
1677dc12 12 , map
13 , concatMap
14 , take
15 , drop
16 , takeWhile
17 , dropWhile
18 , filter
19 , scan
1677dc12 20 , pack
21 , unpack
22 , toCaseFold
23 , toLower
24 , toUpper
25 , stripStart
91727d11 26
27 -- * Folds
1677dc12 28 , toLazy
29 , toLazyM
30 , foldChars
31 , head
32 , last
33 , null
34 , length
35 , any
36 , all
37 , maximum
38 , minimum
39 , find
40 , index
41 , count
42
43 -- * Primitive Character Parsers
1677dc12 44 , nextChar
45 , drawChar
46 , unDrawChar
47 , peekChar
9e9bb0ce 48 , isEndOfChars
1677dc12 49
50 -- * Parsing Lenses
9e9bb0ce 51 , splitAt
1677dc12 52 , span
53 , break
54 , groupBy
55 , group
9e9bb0ce 56 , word
57 , line
1677dc12 58
59 -- * FreeT Splitters
60 , chunksOf
61 , splitsWith
0f8c6f1b 62 , splits
1a83ae4e 63 , groupsBy
64 , groups
1677dc12 65 , lines
66 , words
67
91727d11 68 -- * Transformations
1677dc12 69 , intersperse
9e9bb0ce 70 , packChars
31f41a5d 71
91727d11 72 -- * Joiners
1677dc12 73 , intercalate
74 , unlines
75 , unwords
9e9bb0ce 76
1a83ae4e 77 -- * Re-exports
91727d11 78 -- $reexports
1677dc12 79 , module Data.ByteString
80 , module Data.Text
81 , module Data.Profunctor
1677dc12 82 , module Pipes.Parse
7ed76745 83 , module Pipes.Group
91727d11 84 ) where
85
0f8c6f1b 86import Control.Applicative ((<*))
70125641 87import Control.Monad (liftM, join)
9e9bb0ce 88import Control.Monad.Trans.State.Strict (StateT(..), modify)
91727d11 89import qualified Data.Text as T
91727d11 90import Data.Text (Text)
91import qualified Data.Text.Lazy as TL
91727d11 92import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
31f41a5d 93import Data.ByteString (ByteString)
1677dc12 94import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 95import Data.Functor.Identity (Identity)
1677dc12 96import Data.Profunctor (Profunctor)
97import qualified Data.Profunctor
91727d11 98import Pipes
7fc48f7c 99import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 100import qualified Pipes.Group as PG
91727d11 101import qualified Pipes.Parse as PP
7ed76745 102import Pipes.Parse (Parser)
91727d11 103import qualified Pipes.Prelude as P
91727d11 104import Data.Char (isSpace)
1a83ae4e 105import Data.Word (Word8)
1677dc12 106
91727d11 107import Prelude hiding (
108 all,
109 any,
110 break,
111 concat,
112 concatMap,
113 drop,
114 dropWhile,
115 elem,
116 filter,
117 head,
118 last,
119 lines,
120 length,
121 map,
122 maximum,
123 minimum,
124 notElem,
125 null,
126 readFile,
127 span,
128 splitAt,
129 take,
130 takeWhile,
131 unlines,
132 unwords,
133 words,
134 writeFile )
135
e4b6dc67 136{- $intro
137
138 * /Effectful Text/
139
140 This package provides @pipes@ utilities for /text streams/, understood as
141 streams of 'Text' chunks. The individual chunks are uniformly /strict/, and thus you
142 will generally want @Data.Text@ in scope. But the type @Producer Text m r@ as we
143 are using it is a sort of pipes equivalent of the lazy @Text@ type.
144
145 This particular module provides many functions equivalent in one way or another to
146 the pure functions in
147 <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>.
148 They transform, divide, group and fold text streams. Though @Producer Text m r@
149 is the type of \'effectful Text\', the functions in this module are \'pure\'
150 in the sense that they are uniformly monad-independent.
151 Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@
152 operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@
153 is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@.
154
155 The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of
156 strict Text: the implementation is arranged so that the individual strict 'Text'
157 chunks are kept to a reasonable size; the user is not aware of the divisions
158 between the connected 'Text' chunks.
159 So also here: the functions in this module are designed to operate on streams that
160 are insensitive to text boundaries. This means that they may freely split
161 text into smaller texts and /discard empty texts/. The objective, though, is
162 that they should /never concatenate texts/ in order to provide strict upper
163 bounds on memory usage.
164
165 For example, to stream only the first three lines of 'stdin' to 'stdout' you
166 might write:
167
168> import Pipes
169> import qualified Pipes.Text as Text
170> import qualified Pipes.Text.IO as Text
171> import Pipes.Group (takes')
172> import Lens.Family
173>
174> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
175> where
176> takeLines n = Text.unlines . takes' n . view Text.lines
177
178 The above program will never bring more than one chunk of text (~ 32 KB) into
179 memory, no matter how long the lines are.
180
181 * /Lenses/
182
183 As this example shows, one superficial difference from @Data.Text.Lazy@
184 is that many of the operations, like 'lines',
185 are \'lensified\'; this has a number of advantages (where it is possible), in particular
186 it facilitates their use with 'Parser's of Text (in the general
187 <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse>
188 sense.)
189 Each such lens, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the
190 intuitively corresponding function when used with @view@ or @(^.)@.
191
192 Note similarly that many equivalents of 'Text -> Text' functions are exported here as 'Pipe's.
193 They reduce to the intuitively corresponding functions when used with '(>->)'. Thus something like
194
195> stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines
196
197 would drop the leading white space from each line.
198
199 The lens combinators
200 you will find indispensible are @view@ / @(^.)@), @zoom@ and probably @over@. These
201 are supplied by both <http://hackage.haskell.org/package/lens lens> and
202 <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained
203 in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial>
204 and to some extent in the @Pipes.Text.Encoding@ module here. The use of
205 @over@ is simple, illustrated by the fact that we can rewrite @stripLines@ above as
206
207> stripLines = over Text.lines $ maps (>-> stripStart)
208
209
210 * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@
211
212 These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ .
213 This is in the types that are most closely associated with our central text type,
214 @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like
215
216> splitAt :: Int -> Text -> (Text, Text)
217> lines :: Text -> [Text]
218> chunksOf :: Int -> Text -> [Text]
219
220 which relate a Text with a pair of Texts or a list of Texts.
221 The corresponding functions here (taking account of \'lensification\') are
222
223> view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r)
224> view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
225> view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r
226
227 Some of the types may be more readable if you imagine that we have introduced
228 our own type synonyms
229
230> type Text m r = Producer T.Text m r
231> type Texts m r = FreeT (Producer T.Text m) m r
232
233 Then we would think of the types above as
234
235> view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r)
236> view lines :: (Monad m) => Text m r -> Texts m r
237> view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r
238
239 which brings one closer to the types of the similar functions in @Data.Text.Lazy@
240
241 In the type @Producer Text m (Producer Text m r)@ the second
242 element of the \'pair\' of effectful Texts cannot simply be retrieved
243 with something like 'snd'. This is an \'effectful\' pair, and one must work
244 through the effects of the first element to arrive at the second Text stream, even
245 if you are proposing to throw the Text in the first element away.
246 Note that we use Control.Monad.join to fuse the pair back together, since it specializes to
247
248> join :: Monad m => Producer Text m (Producer m r) -> Producer m r
249
250 The return type of 'lines', 'words', 'chunksOf' and the other "splitter" functions,
251 @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful)
252 lists of (effectful) texts. The type @([Text],r)@ might be seen to gather
253 together things of the forms:
254
255> r
256> (Text,r)
257> (Text, (Text, r))
258> (Text, (Text, (Text, r)))
259> (Text, (Text, (Text, (Text, r))))
260> ...
261
262 We might also have identified the sum of those types with @Free ((,) Text) r@
263 -- or, more absurdly, @FreeT ((,) Text) Identity r@. Similarly, @FreeT (Producer Text m) m r@
264 encompasses all the members of the sequence:
265
266> m r
267> Producer Text m r
268> Producer Text m (Producer Text m r)
269> Producer Text m (Producer Text m (Producer Text m r))
270> ...
271
272 One might think that
273
274> lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r)
275> view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r
276
277 should really have the type
278
279> lines :: Monad m => Pipe Text Text m r
280
281 as e.g. 'toUpper' does. But this would spoil the control we are
282 attempting to maintain over the size of chunks. It is in fact just
283 as unreasonable to want such a pipe as to want
284
285> Data.Text.Lazy.lines :: Text -> Text
286
287 to 'rechunk' the strict Text chunks inside the lazy Text to respect
288 line boundaries. In fact we have
289
290> Data.Text.Lazy.lines :: Text -> [Text]
291> Prelude.lines :: String -> [String]
292
293 where the elements of the list are themselves lazy Texts or Strings; the use
294 of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this.
295
296 The @Pipes.Group@ module, which can generally be imported without qualification,
297 provides many functions for working with things of type @FreeT (Producer a m) m r@
298
299
300 -}
301
91727d11 302-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
303fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
304fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 305{-# INLINE fromLazy #-}
91727d11 306
1677dc12 307
308type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
309
310type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
311
312(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
313a ^. lens = getConstant (lens Constant a)
314
315
91727d11 316-- | Apply a transformation to each 'Char' in the stream
317map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
318map f = P.map (T.map f)
319{-# INLINABLE map #-}
320
ff38b9f0 321{-# RULES "p >-> map f" forall p f .
322 p >-> map f = for p (\txt -> yield (T.map f txt))
323 #-}
324
31f41a5d 325-- | Map a function over the characters of a text stream and concatenate the results
91727d11 326concatMap
327 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
328concatMap f = P.map (T.concatMap f)
329{-# INLINABLE concatMap #-}
330
ff38b9f0 331{-# RULES "p >-> concatMap f" forall p f .
332 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
333 #-}
7faef8bc 334
ff38b9f0 335
c0343bc9 336-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 337pack :: Monad m => Pipe String Text m r
338pack = P.map T.pack
339{-# INLINEABLE pack #-}
340
ff38b9f0 341{-# RULES "p >-> pack" forall p .
342 p >-> pack = for p (\txt -> yield (T.pack txt))
343 #-}
344
345-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 346unpack :: Monad m => Pipe Text String m r
d4732515 347unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 348{-# INLINEABLE unpack #-}
349
ff38b9f0 350{-# RULES "p >-> unpack" forall p .
351 p >-> unpack = for p (\txt -> yield (T.unpack txt))
352 #-}
d4732515 353
b0d86a59 354-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
355-- here acting as 'Text' pipes, rather as they would on a lazy text
a4913c42 356toCaseFold :: Monad m => Pipe Text Text m r
7faef8bc 357toCaseFold = P.map T.toCaseFold
358{-# INLINEABLE toCaseFold #-}
359
ff38b9f0 360{-# RULES "p >-> toCaseFold" forall p .
361 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
362 #-}
363
364
c0343bc9 365-- | lowercase incoming 'Text'
a4913c42 366toLower :: Monad m => Pipe Text Text m r
7faef8bc 367toLower = P.map T.toLower
368{-# INLINEABLE toLower #-}
369
ff38b9f0 370{-# RULES "p >-> toLower" forall p .
371 p >-> toLower = for p (\txt -> yield (T.toLower txt))
372 #-}
373
c0343bc9 374-- | uppercase incoming 'Text'
c70edb9d 375toUpper :: Monad m => Pipe Text Text m r
7faef8bc 376toUpper = P.map T.toUpper
377{-# INLINEABLE toUpper #-}
378
ff38b9f0 379{-# RULES "p >-> toUpper" forall p .
380 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
381 #-}
382
c0343bc9 383-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 384stripStart :: Monad m => Pipe Text Text m r
385stripStart = do
386 chunk <- await
387 let text = T.stripStart chunk
388 if T.null text
389 then stripStart
b0d86a59 390 else do yield text
391 cat
7faef8bc 392{-# INLINEABLE stripStart #-}
393
31f41a5d 394-- | @(take n)@ only allows @n@ individual characters to pass;
395-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 396take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
397take n0 = go n0 where
398 go n
399 | n <= 0 = return ()
400 | otherwise = do
31f41a5d 401 txt <- await
402 let len = fromIntegral (T.length txt)
91727d11 403 if (len > n)
31f41a5d 404 then yield (T.take (fromIntegral n) txt)
91727d11 405 else do
31f41a5d 406 yield txt
91727d11 407 go (n - len)
408{-# INLINABLE take #-}
409
31f41a5d 410-- | @(drop n)@ drops the first @n@ characters
91727d11 411drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
412drop n0 = go n0 where
413 go n
414 | n <= 0 = cat
415 | otherwise = do
31f41a5d 416 txt <- await
417 let len = fromIntegral (T.length txt)
91727d11 418 if (len >= n)
419 then do
31f41a5d 420 yield (T.drop (fromIntegral n) txt)
91727d11 421 cat
422 else go (n - len)
423{-# INLINABLE drop #-}
424
31f41a5d 425-- | Take characters until they fail the predicate
91727d11 426takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
427takeWhile predicate = go
428 where
429 go = do
31f41a5d 430 txt <- await
431 let (prefix, suffix) = T.span predicate txt
91727d11 432 if (T.null suffix)
433 then do
31f41a5d 434 yield txt
91727d11 435 go
436 else yield prefix
437{-# INLINABLE takeWhile #-}
438
31f41a5d 439-- | Drop characters until they fail the predicate
91727d11 440dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
441dropWhile predicate = go where
442 go = do
31f41a5d 443 txt <- await
444 case T.findIndex (not . predicate) txt of
91727d11 445 Nothing -> go
446 Just i -> do
31f41a5d 447 yield (T.drop i txt)
91727d11 448 cat
449{-# INLINABLE dropWhile #-}
450
451-- | Only allows 'Char's to pass if they satisfy the predicate
452filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
453filter predicate = P.map (T.filter predicate)
454{-# INLINABLE filter #-}
455
ff38b9f0 456{-# RULES "p >-> filter q" forall p q .
457 p >-> filter q = for p (\txt -> yield (T.filter q txt))
458 #-}
459
31f41a5d 460-- | Strict left scan over the characters
91727d11 461scan
462 :: (Monad m)
463 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
11645cdc
GG
464scan step begin = do
465 yield (T.singleton begin)
466 go begin
91727d11 467 where
31f41a5d 468 go c = do
469 txt <- await
470 let txt' = T.scanl step c txt
471 c' = T.last txt'
11645cdc 472 yield (T.tail txt')
31f41a5d 473 go c'
91727d11 474{-# INLINABLE scan #-}
475
476{-| Fold a pure 'Producer' of strict 'Text's into a lazy
477 'TL.Text'
478-}
479toLazy :: Producer Text Identity () -> TL.Text
480toLazy = TL.fromChunks . P.toList
481{-# INLINABLE toLazy #-}
482
483{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
484 'TL.Text'
485
486 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
487 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
488 immediately as they are generated instead of loading them all into memory.
489-}
490toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
491toLazyM = liftM TL.fromChunks . P.toListM
492{-# INLINABLE toLazyM #-}
493
31f41a5d 494-- | Reduce the text stream using a strict left fold over characters
64e03122 495foldChars
91727d11 496 :: Monad m
497 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 498foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 499{-# INLINABLE foldChars #-}
91727d11 500
501-- | Retrieve the first 'Char'
502head :: (Monad m) => Producer Text m () -> m (Maybe Char)
503head = go
504 where
505 go p = do
506 x <- nextChar p
507 case x of
508 Left _ -> return Nothing
31f41a5d 509 Right (c, _) -> return (Just c)
91727d11 510{-# INLINABLE head #-}
511
512-- | Retrieve the last 'Char'
513last :: (Monad m) => Producer Text m () -> m (Maybe Char)
514last = go Nothing
515 where
516 go r p = do
517 x <- next p
518 case x of
519 Left () -> return r
31f41a5d 520 Right (txt, p') ->
521 if (T.null txt)
91727d11 522 then go r p'
31f41a5d 523 else go (Just $ T.last txt) p'
91727d11 524{-# INLINABLE last #-}
525
526-- | Determine if the stream is empty
527null :: (Monad m) => Producer Text m () -> m Bool
528null = P.all T.null
529{-# INLINABLE null #-}
530
62e8521c 531-- | Count the number of characters in the stream
91727d11 532length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 533length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 534{-# INLINABLE length #-}
535
536-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
537any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
538any predicate = P.any (T.any predicate)
539{-# INLINABLE any #-}
540
541-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
542all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
543all predicate = P.all (T.all predicate)
544{-# INLINABLE all #-}
545
62e8521c 546-- | Return the maximum 'Char' within a text stream
91727d11 547maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
548maximum = P.fold step Nothing id
549 where
31f41a5d 550 step mc txt =
551 if (T.null txt)
552 then mc
553 else Just $ case mc of
554 Nothing -> T.maximum txt
555 Just c -> max c (T.maximum txt)
91727d11 556{-# INLINABLE maximum #-}
557
62e8521c 558-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 559minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
560minimum = P.fold step Nothing id
561 where
31f41a5d 562 step mc txt =
563 if (T.null txt)
564 then mc
565 else case mc of
566 Nothing -> Just (T.minimum txt)
567 Just c -> Just (min c (T.minimum txt))
91727d11 568{-# INLINABLE minimum #-}
569
91727d11 570-- | Find the first element in the stream that matches the predicate
571find
572 :: (Monad m)
573 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
574find predicate p = head (p >-> filter predicate)
575{-# INLINABLE find #-}
576
62e8521c 577-- | Index into a text stream
91727d11 578index
579 :: (Monad m, Integral a)
580 => a-> Producer Text m () -> m (Maybe Char)
581index n p = head (p >-> drop n)
582{-# INLINABLE index #-}
583
63ea9ffd 584
31f41a5d 585-- | Store a tally of how many segments match the given 'Text'
586count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
587count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
588{-# INLINABLE count #-}
589
9e9bb0ce 590
1a83ae4e 591-- | Consume the first character from a stream of 'Text'
592--
593-- 'next' either fails with a 'Left' if the 'Producer' has no more characters or
594-- succeeds with a 'Right' providing the next character and the remainder of the
595-- 'Producer'.
9e9bb0ce 596
9e9bb0ce 597nextChar
598 :: (Monad m)
599 => Producer Text m r
600 -> m (Either r (Char, Producer Text m r))
601nextChar = go
602 where
603 go p = do
604 x <- next p
605 case x of
606 Left r -> return (Left r)
607 Right (txt, p') -> case (T.uncons txt) of
608 Nothing -> go p'
609 Just (c, txt') -> return (Right (c, yield txt' >> p'))
610{-# INLINABLE nextChar #-}
611
1a83ae4e 612-- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty
613
9e9bb0ce 614drawChar :: (Monad m) => Parser Text m (Maybe Char)
615drawChar = do
616 x <- PP.draw
617 case x of
618 Nothing -> return Nothing
619 Just txt -> case (T.uncons txt) of
620 Nothing -> drawChar
621 Just (c, txt') -> do
622 PP.unDraw txt'
623 return (Just c)
624{-# INLINABLE drawChar #-}
625
626-- | Push back a 'Char' onto the underlying 'Producer'
627unDrawChar :: (Monad m) => Char -> Parser Text m ()
628unDrawChar c = modify (yield (T.singleton c) >>)
629{-# INLINABLE unDrawChar #-}
630
631{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
632 push the 'Char' back
633
634> peekChar = do
635> x <- drawChar
636> case x of
637> Left _ -> return ()
638> Right c -> unDrawChar c
639> return x
1a83ae4e 640
9e9bb0ce 641-}
1a83ae4e 642
9e9bb0ce 643peekChar :: (Monad m) => Parser Text m (Maybe Char)
644peekChar = do
645 x <- drawChar
646 case x of
647 Nothing -> return ()
648 Just c -> unDrawChar c
649 return x
650{-# INLINABLE peekChar #-}
651
652{-| Check if the underlying 'Producer' has no more characters
653
654 Note that this will skip over empty 'Text' chunks, unlike
655 'PP.isEndOfInput' from @pipes-parse@, which would consider
656 an empty 'Text' a valid bit of input.
657
658> isEndOfChars = liftM isLeft peekChar
659-}
660isEndOfChars :: (Monad m) => Parser Text m Bool
661isEndOfChars = do
662 x <- peekChar
663 return (case x of
664 Nothing -> True
665 Just _-> False )
666{-# INLINABLE isEndOfChars #-}
667
668
31f41a5d 669-- | Splits a 'Producer' after the given number of characters
91727d11 670splitAt
671 :: (Monad m, Integral n)
672 => n
9e9bb0ce 673 -> Lens' (Producer Text m r)
674 (Producer Text m (Producer Text m r))
675splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 676 where
677 go 0 p = return p
678 go n p = do
679 x <- lift (next p)
680 case x of
681 Left r -> return (return r)
31f41a5d 682 Right (txt, p') -> do
683 let len = fromIntegral (T.length txt)
91727d11 684 if (len <= n)
685 then do
31f41a5d 686 yield txt
91727d11 687 go (n - len) p'
688 else do
31f41a5d 689 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 690 yield prefix
691 return (yield suffix >> p')
692{-# INLINABLE splitAt #-}
693
91727d11 694
1a83ae4e 695-- | Split a text stream in two, producing the longest
696-- consecutive group of characters that satisfies the predicate
697-- and returning the rest
698
91727d11 699span
700 :: (Monad m)
701 => (Char -> Bool)
9e9bb0ce 702 -> Lens' (Producer Text m r)
703 (Producer Text m (Producer Text m r))
704span predicate k p0 = fmap join (k (go p0))
91727d11 705 where
706 go p = do
707 x <- lift (next p)
708 case x of
709 Left r -> return (return r)
31f41a5d 710 Right (txt, p') -> do
711 let (prefix, suffix) = T.span predicate txt
91727d11 712 if (T.null suffix)
713 then do
31f41a5d 714 yield txt
91727d11 715 go p'
716 else do
717 yield prefix
718 return (yield suffix >> p')
719{-# INLINABLE span #-}
720
1a83ae4e 721{-| Split a text stream in two, producing the longest
62e8521c 722 consecutive group of characters that don't satisfy the predicate
91727d11 723-}
724break
725 :: (Monad m)
726 => (Char -> Bool)
9e9bb0ce 727 -> Lens' (Producer Text m r)
728 (Producer Text m (Producer Text m r))
91727d11 729break predicate = span (not . predicate)
730{-# INLINABLE break #-}
731
9e9bb0ce 732{-| Improper lens that splits after the first group of equivalent Chars, as
733 defined by the given equivalence relation
734-}
735groupBy
736 :: (Monad m)
737 => (Char -> Char -> Bool)
738 -> Lens' (Producer Text m r)
739 (Producer Text m (Producer Text m r))
740groupBy equals k p0 = fmap join (k ((go p0))) where
741 go p = do
742 x <- lift (next p)
743 case x of
744 Left r -> return (return r)
745 Right (txt, p') -> case T.uncons txt of
746 Nothing -> go p'
747 Just (c, _) -> (yield txt >> p') ^. span (equals c)
748{-# INLINABLE groupBy #-}
749
750-- | Improper lens that splits after the first succession of identical 'Char' s
751group :: Monad m
752 => Lens' (Producer Text m r)
753 (Producer Text m (Producer Text m r))
754group = groupBy (==)
755{-# INLINABLE group #-}
756
757{-| Improper lens that splits a 'Producer' after the first word
758
759 Unlike 'words', this does not drop leading whitespace
760-}
761word :: (Monad m)
762 => Lens' (Producer Text m r)
763 (Producer Text m (Producer Text m r))
764word k p0 = fmap join (k (to p0))
765 where
766 to p = do
767 p' <- p^.span isSpace
768 p'^.break isSpace
769{-# INLINABLE word #-}
770
771
772line :: (Monad m)
773 => Lens' (Producer Text m r)
774 (Producer Text m (Producer Text m r))
775line = break (== '\n')
776
777{-# INLINABLE line #-}
778
779
780-- | Intersperse a 'Char' in between the characters of stream of 'Text'
781intersperse
782 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
783intersperse c = go0
784 where
785 go0 p = do
786 x <- lift (next p)
787 case x of
788 Left r -> return r
789 Right (txt, p') -> do
790 yield (T.intersperse c txt)
791 go1 p'
792 go1 p = do
793 x <- lift (next p)
794 case x of
795 Left r -> return r
796 Right (txt, p') -> do
797 yield (T.singleton c)
798 yield (T.intersperse c txt)
799 go1 p'
800{-# INLINABLE intersperse #-}
801
802
803
804-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
805packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
806packChars = Data.Profunctor.dimap to (fmap from)
807 where
808 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 809 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 810
811 step diffAs c = diffAs . (c:)
812
813 done diffAs = T.pack (diffAs [])
814
815 -- from :: Monad m => Producer Text m x -> Producer Char m x
816 from p = for p (each . T.unpack)
817{-# INLINABLE packChars #-}
818
0f8c6f1b 819
820-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
821chunksOf
822 :: (Monad m, Integral n)
823 => n -> Lens' (Producer Text m r)
824 (FreeT (Producer Text m) m r)
825chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
826 where
827 go p = do
828 x <- next p
829 return $ case x of
7ed76745 830 Left r -> Pure r
831 Right (txt, p') -> Free $ do
0f8c6f1b 832 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 833 return $ FreeT (go p'')
0f8c6f1b 834{-# INLINABLE chunksOf #-}
835
836
62e8521c 837{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 838 predicate
839-}
1677dc12 840splitsWith
91727d11 841 :: (Monad m)
842 => (Char -> Bool)
843 -> Producer Text m r
7ed76745 844 -> FreeT (Producer Text m) m r
845splitsWith predicate p0 = FreeT (go0 p0)
91727d11 846 where
847 go0 p = do
848 x <- next p
849 case x of
7ed76745 850 Left r -> return (Pure r)
31f41a5d 851 Right (txt, p') ->
852 if (T.null txt)
91727d11 853 then go0 p'
7ed76745 854 else return $ Free $ do
9e9bb0ce 855 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 856 return $ FreeT (go1 p'')
91727d11 857 go1 p = do
858 x <- nextChar p
859 return $ case x of
7ed76745 860 Left r -> Pure r
861 Right (_, p') -> Free $ do
9e9bb0ce 862 p'' <- p' ^. span (not . predicate)
7ed76745 863 return $ FreeT (go1 p'')
1677dc12 864{-# INLINABLE splitsWith #-}
91727d11 865
31f41a5d 866-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 867splits :: (Monad m)
91727d11 868 => Char
0f8c6f1b 869 -> Lens' (Producer Text m r)
870 (FreeT (Producer Text m) m r)
871splits c k p =
7ed76745 872 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 873{-# INLINABLE splits #-}
874
875{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
876 given equivalence relation
877-}
878groupsBy
879 :: Monad m
880 => (Char -> Char -> Bool)
881 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
7ed76745 882groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 883 go p = do x <- next p
7ed76745 884 case x of Left r -> return (Pure r)
0f8c6f1b 885 Right (bs, p') -> case T.uncons bs of
886 Nothing -> go p'
7ed76745 887 Just (c, _) -> do return $ Free $ do
0f8c6f1b 888 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 889 return $ FreeT (go p'')
0f8c6f1b 890{-# INLINABLE groupsBy #-}
891
892
893-- | Like 'groupsBy', where the equality predicate is ('==')
894groups
895 :: Monad m
896 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
897groups = groupsBy (==)
898{-# INLINABLE groups #-}
899
91727d11 900
91727d11 901
62e8521c 902{-| Split a text stream into 'FreeT'-delimited lines
91727d11 903-}
904lines
0f8c6f1b 905 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
906lines = Data.Profunctor.dimap _lines (fmap _unlines)
91727d11 907 where
7ed76745 908 _lines p0 = FreeT (go0 p0)
0f8c6f1b 909 where
910 go0 p = do
911 x <- next p
912 case x of
7ed76745 913 Left r -> return (Pure r)
0f8c6f1b 914 Right (txt, p') ->
915 if (T.null txt)
916 then go0 p'
7ed76745 917 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 918 go1 p = do
919 p' <- p ^. break ('\n' ==)
7ed76745 920 return $ FreeT $ do
0f8c6f1b 921 x <- nextChar p'
922 case x of
7ed76745 923 Left r -> return $ Pure r
0f8c6f1b 924 Right (_, p'') -> go0 p''
925 -- _unlines
926 -- :: Monad m
927 -- => FreeT (Producer Text m) m x -> Producer Text m x
7fc48f7c 928 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
929
0f8c6f1b 930
91727d11 931{-# INLINABLE lines #-}
91727d11 932
31f41a5d 933
31f41a5d 934-- | Split a text stream into 'FreeT'-delimited words
91727d11 935words
0f8c6f1b 936 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
937words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 938 where
7ed76745 939 go p = FreeT $ do
cf10d6f1 940 x <- next (p >-> dropWhile isSpace)
941 return $ case x of
7ed76745 942 Left r -> Pure r
943 Right (bs, p') -> Free $ do
9e9bb0ce 944 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 945 return (go p'')
7ed76745 946 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 947
91727d11 948{-# INLINABLE words #-}
949
cf10d6f1 950
31f41a5d 951{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
952 interspersing a text stream in between them
91727d11 953-}
954intercalate
955 :: (Monad m)
956 => Producer Text m ()
957 -> FreeT (Producer Text m) m r
958 -> Producer Text m r
959intercalate p0 = go0
960 where
961 go0 f = do
7ed76745 962 x <- lift (runFreeT f)
91727d11 963 case x of
7ed76745 964 Pure r -> return r
965 Free p -> do
91727d11 966 f' <- p
967 go1 f'
968 go1 f = do
7ed76745 969 x <- lift (runFreeT f)
91727d11 970 case x of
7ed76745 971 Pure r -> return r
972 Free p -> do
91727d11 973 p0
974 f' <- p
975 go1 f'
976{-# INLINABLE intercalate #-}
977
62e8521c 978{-| Join 'FreeT'-delimited lines into a text stream
91727d11 979-}
980unlines
981 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
982unlines = go
983 where
984 go f = do
7ed76745 985 x <- lift (runFreeT f)
91727d11 986 case x of
7ed76745 987 Pure r -> return r
988 Free p -> do
91727d11 989 f' <- p
990 yield $ T.singleton '\n'
991 go f'
992{-# INLINABLE unlines #-}
993
31f41a5d 994{-| Join 'FreeT'-delimited words into a text stream
91727d11 995-}
996unwords
997 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 998unwords = intercalate (yield $ T.singleton ' ')
91727d11 999{-# INLINABLE unwords #-}
1000
91727d11 1001
91727d11 1002{- $reexports
91727d11 1003
1004 @Data.Text@ re-exports the 'Text' type.
1005
0f8c6f1b 1006 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 1007-}
1008
bbdfd305 1009