]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
prophylactic RULEs for Pipes.maps
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
31f41a5d 1{-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
91727d11 2
13a43263 3{-| This module provides @pipes@ utilities for \"text streams\", which are
31f41a5d 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
63ea9ffd 5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 7
63ea9ffd 8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 9 example, the following program copies a document from one file to another:
91727d11 10
11> import Pipes
31f41a5d 12> import qualified Data.Text.Pipes as Text
91727d11 13> import System.IO
14>
15> main =
16> withFile "inFile.txt" ReadMode $ \hIn ->
17> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 18> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
19
20To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 21
13a43263 22> import Pipes
31f41a5d 23> import qualified Data.Text.Pipes as Text
13a43263 24> import Pipes.Safe
25>
31f41a5d 26> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 27
91727d11 28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 29 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 30
31f41a5d 31> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 32
33 You can also translate pure lazy 'TL.Text's to and from proxies:
34
31f41a5d 35> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 36
37 In addition, this module provides many functions equivalent to lazy
31f41a5d 38 'Text' functions so that you can transform or fold text streams. For
91727d11 39 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 40 might write:
91727d11 41
42> import Pipes
31f41a5d 43> import qualified Pipes.Text as Text
44> import qualified Pipes.Parse as Parse
91727d11 45>
31f41a5d 46> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 47> where
31f41a5d 48> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 49
31f41a5d 50 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 51 memory, no matter how long the lines are.
52
53 Note that functions in this library are designed to operate on streams that
31f41a5d 54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
91727d11 57 usage.
58-}
59
7faef8bc 60module Pipes.Text (
91727d11 61 -- * Producers
62 fromLazy,
63 stdin,
64 fromHandle,
65 readFile,
66 stdinLn,
91727d11 67
68 -- * Consumers
69 stdout,
70 stdoutLn,
71 toHandle,
72 writeFile,
73
74 -- * Pipes
75 map,
76 concatMap,
77 take,
78 drop,
79 takeWhile,
80 dropWhile,
81 filter,
91727d11 82 scan,
7faef8bc 83 encodeUtf8,
a02a69ad 84#if MIN_VERSION_text(0,11,4)
85 pipeDecodeUtf8,
86 pipeDecodeUtf8With,
87#endif
7faef8bc 88 pack,
89 unpack,
1d2434b5 90 toCaseFold,
91 toLower,
92 toUpper,
7faef8bc 93 stripStart,
91727d11 94
95 -- * Folds
96 toLazy,
97 toLazyM,
98 fold,
99 head,
100 last,
101 null,
102 length,
103 any,
104 all,
105 maximum,
106 minimum,
91727d11 107 find,
108 index,
31f41a5d 109 count,
91727d11 110
111 -- * Splitters
112 splitAt,
113 chunksOf,
114 span,
115 break,
116 splitWith,
117 split,
118 groupBy,
119 group,
120 lines,
121 words,
31f41a5d 122#if MIN_VERSION_text(0,11,4)
123 decodeUtf8,
63ea9ffd 124 decodeUtf8With,
31f41a5d 125#endif
91727d11 126 -- * Transformations
127 intersperse,
31f41a5d 128
91727d11 129 -- * Joiners
130 intercalate,
131 unlines,
132 unwords,
133
31f41a5d 134 -- * Character Parsers
91727d11 135 -- $parse
31f41a5d 136 nextChar,
137 drawChar,
138 unDrawChar,
139 peekChar,
140 isEndOfChars,
91727d11 141
142 -- * Re-exports
143 -- $reexports
144 module Data.Text,
91727d11 145 module Pipes.Parse
146 ) where
147
148import Control.Exception (throwIO, try)
149import Control.Monad (liftM, unless)
acc6868f 150import Control.Monad.Trans.State.Strict (StateT(..))
91727d11 151import qualified Data.Text as T
152import qualified Data.Text.IO as T
31f41a5d 153import qualified Data.Text.Encoding as TE
63ea9ffd 154import qualified Data.Text.Encoding.Error as TE
91727d11 155import Data.Text (Text)
156import qualified Data.Text.Lazy as TL
157import qualified Data.Text.Lazy.IO as TL
158import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
159import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 160import Data.ByteString (ByteString)
161import qualified Data.ByteString as B
cf10d6f1 162import Data.Char (ord, isSpace)
91727d11 163import Data.Functor.Identity (Identity)
164import qualified Data.List as List
165import Foreign.C.Error (Errno(Errno), ePIPE)
166import qualified GHC.IO.Exception as G
167import Pipes
168import qualified Pipes.ByteString.Parse as PBP
c0343bc9 169import Pipes.Text.Parse (
31f41a5d 170 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
91727d11 171import Pipes.Core (respond, Server')
172import qualified Pipes.Parse as PP
173import Pipes.Parse (input, concat, FreeT)
174import qualified Pipes.Safe.Prelude as Safe
175import qualified Pipes.Safe as Safe
176import Pipes.Safe (MonadSafe(..), Base(..))
177import qualified Pipes.Prelude as P
178import qualified System.IO as IO
179import Data.Char (isSpace)
63ea9ffd 180import Data.Word (Word8)
91727d11 181import Prelude hiding (
182 all,
183 any,
184 break,
185 concat,
186 concatMap,
187 drop,
188 dropWhile,
189 elem,
190 filter,
191 head,
192 last,
193 lines,
194 length,
195 map,
196 maximum,
197 minimum,
198 notElem,
199 null,
200 readFile,
201 span,
202 splitAt,
203 take,
204 takeWhile,
205 unlines,
206 unwords,
207 words,
208 writeFile )
209
210-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
211fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
212fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
213{-# INLINABLE fromLazy #-}
214
62e8521c 215-- | Stream text from 'stdin'
91727d11 216stdin :: MonadIO m => Producer' Text m ()
217stdin = fromHandle IO.stdin
218{-# INLINABLE stdin #-}
219
31f41a5d 220{-| Convert a 'IO.Handle' into a text stream using a text size
221 determined by the good sense of the text library.
222
223-}
224
91727d11 225fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
226fromHandle h = go where
227 go = do txt <- liftIO (T.hGetChunk h)
228 unless (T.null txt) $ do yield txt
229 go
230{-# INLINABLE fromHandle#-}
231
6f6f9974 232{-| Stream text from a file using Pipes.Safe
233
31f41a5d 234>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
235MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 236-}
237
91727d11 238readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
239readFile file = Safe.withFile file IO.ReadMode fromHandle
240{-# INLINABLE readFile #-}
241
31f41a5d 242{-| Stream lines of text from stdin (for testing in ghci etc.)
243
244>>> let safely = runSafeT . runEffect
245>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
246hello
2475
248world
2495
250
251-}
91727d11 252stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 253stdinLn = go where
91727d11 254 go = do
255 eof <- liftIO (IO.hIsEOF IO.stdin)
256 unless eof $ do
257 txt <- liftIO (T.hGetLine IO.stdin)
258 yield txt
259 go
260
91727d11 261
31f41a5d 262{-| Stream text to 'stdout'
91727d11 263
264 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
265
266 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
31f41a5d 267 @(source >-> stdout)@ in suitable cases.
91727d11 268-}
269stdout :: MonadIO m => Consumer' Text m ()
270stdout = go
271 where
272 go = do
273 txt <- await
274 x <- liftIO $ try (T.putStr txt)
275 case x of
276 Left (G.IOError { G.ioe_type = G.ResourceVanished
277 , G.ioe_errno = Just ioe })
278 | Errno ioe == ePIPE
279 -> return ()
280 Left e -> liftIO (throwIO e)
281 Right () -> go
282{-# INLINABLE stdout #-}
283
284stdoutLn :: (MonadIO m) => Consumer' Text m ()
285stdoutLn = go
286 where
287 go = do
288 str <- await
289 x <- liftIO $ try (T.putStrLn str)
290 case x of
291 Left (G.IOError { G.ioe_type = G.ResourceVanished
292 , G.ioe_errno = Just ioe })
293 | Errno ioe == ePIPE
294 -> return ()
295 Left e -> liftIO (throwIO e)
296 Right () -> go
297{-# INLINABLE stdoutLn #-}
298
31f41a5d 299{-| Convert a text stream into a 'Handle'
91727d11 300
31f41a5d 301 Note: again, for best performance, where possible use
302 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 303-}
304toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
305toHandle h = for cat (liftIO . T.hPutStr h)
306{-# INLINABLE toHandle #-}
307
d4732515 308{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 309 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 310 #-}
311
312
31f41a5d 313-- | Stream text into a file. Uses @pipes-safe@.
91727d11 314writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
315writeFile file = Safe.withFile file IO.WriteMode toHandle
316
317-- | Apply a transformation to each 'Char' in the stream
318map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
319map f = P.map (T.map f)
320{-# INLINABLE map #-}
321
ff38b9f0 322{-# RULES "p >-> map f" forall p f .
323 p >-> map f = for p (\txt -> yield (T.map f txt))
324 #-}
325
31f41a5d 326-- | Map a function over the characters of a text stream and concatenate the results
91727d11 327concatMap
328 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
329concatMap f = P.map (T.concatMap f)
330{-# INLINABLE concatMap #-}
331
ff38b9f0 332{-# RULES "p >-> concatMap f" forall p f .
333 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
334 #-}
7faef8bc 335
336-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 337-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
338-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 339encodeUtf8 :: Monad m => Pipe Text ByteString m r
340encodeUtf8 = P.map TE.encodeUtf8
341{-# INLINEABLE encodeUtf8 #-}
342
ff38b9f0 343{-# RULES "p >-> encodeUtf8" forall p .
344 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
345 #-}
346
c0343bc9 347-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 348pack :: Monad m => Pipe String Text m r
349pack = P.map T.pack
350{-# INLINEABLE pack #-}
351
ff38b9f0 352{-# RULES "p >-> pack" forall p .
353 p >-> pack = for p (\txt -> yield (T.pack txt))
354 #-}
355
356-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 357unpack :: Monad m => Pipe Text String m r
d4732515 358unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 359{-# INLINEABLE unpack #-}
360
ff38b9f0 361{-# RULES "p >-> unpack" forall p .
362 p >-> unpack = for p (\txt -> yield (T.unpack txt))
363 #-}
d4732515 364
c0343bc9 365-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
7faef8bc 366-- here acting on a 'Text' pipe, rather as they would on a lazy text
367toCaseFold :: Monad m => Pipe Text Text m ()
368toCaseFold = P.map T.toCaseFold
369{-# INLINEABLE toCaseFold #-}
370
ff38b9f0 371{-# RULES "p >-> toCaseFold" forall p .
372 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
373 #-}
374
375
c0343bc9 376-- | lowercase incoming 'Text'
7faef8bc 377toLower :: Monad m => Pipe Text Text m ()
378toLower = P.map T.toLower
379{-# INLINEABLE toLower #-}
380
ff38b9f0 381{-# RULES "p >-> toLower" forall p .
382 p >-> toLower = for p (\txt -> yield (T.toLower txt))
383 #-}
384
c0343bc9 385-- | uppercase incoming 'Text'
7faef8bc 386toUpper :: Monad m => Pipe Text Text m ()
387toUpper = P.map T.toUpper
388{-# INLINEABLE toUpper #-}
389
ff38b9f0 390{-# RULES "p >-> toUpper" forall p .
391 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
392 #-}
393
c0343bc9 394-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 395stripStart :: Monad m => Pipe Text Text m r
396stripStart = do
397 chunk <- await
398 let text = T.stripStart chunk
399 if T.null text
400 then stripStart
401 else cat
402{-# INLINEABLE stripStart #-}
403
31f41a5d 404-- | @(take n)@ only allows @n@ individual characters to pass;
405-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 406take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
407take n0 = go n0 where
408 go n
409 | n <= 0 = return ()
410 | otherwise = do
31f41a5d 411 txt <- await
412 let len = fromIntegral (T.length txt)
91727d11 413 if (len > n)
31f41a5d 414 then yield (T.take (fromIntegral n) txt)
91727d11 415 else do
31f41a5d 416 yield txt
91727d11 417 go (n - len)
418{-# INLINABLE take #-}
419
31f41a5d 420-- | @(drop n)@ drops the first @n@ characters
91727d11 421drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
422drop n0 = go n0 where
423 go n
424 | n <= 0 = cat
425 | otherwise = do
31f41a5d 426 txt <- await
427 let len = fromIntegral (T.length txt)
91727d11 428 if (len >= n)
429 then do
31f41a5d 430 yield (T.drop (fromIntegral n) txt)
91727d11 431 cat
432 else go (n - len)
433{-# INLINABLE drop #-}
434
31f41a5d 435-- | Take characters until they fail the predicate
91727d11 436takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
437takeWhile predicate = go
438 where
439 go = do
31f41a5d 440 txt <- await
441 let (prefix, suffix) = T.span predicate txt
91727d11 442 if (T.null suffix)
443 then do
31f41a5d 444 yield txt
91727d11 445 go
446 else yield prefix
447{-# INLINABLE takeWhile #-}
448
31f41a5d 449-- | Drop characters until they fail the predicate
91727d11 450dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
451dropWhile predicate = go where
452 go = do
31f41a5d 453 txt <- await
454 case T.findIndex (not . predicate) txt of
91727d11 455 Nothing -> go
456 Just i -> do
31f41a5d 457 yield (T.drop i txt)
91727d11 458 cat
459{-# INLINABLE dropWhile #-}
460
461-- | Only allows 'Char's to pass if they satisfy the predicate
462filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
463filter predicate = P.map (T.filter predicate)
464{-# INLINABLE filter #-}
465
ff38b9f0 466{-# RULES "p >-> filter q" forall p q .
467 p >-> filter q = for p (\txt -> yield (T.filter q txt))
468 #-}
469
31f41a5d 470-- | Strict left scan over the characters
91727d11 471scan
472 :: (Monad m)
473 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
474scan step begin = go begin
475 where
31f41a5d 476 go c = do
477 txt <- await
478 let txt' = T.scanl step c txt
479 c' = T.last txt'
480 yield txt'
481 go c'
91727d11 482{-# INLINABLE scan #-}
483
484{-| Fold a pure 'Producer' of strict 'Text's into a lazy
485 'TL.Text'
486-}
487toLazy :: Producer Text Identity () -> TL.Text
488toLazy = TL.fromChunks . P.toList
489{-# INLINABLE toLazy #-}
490
491{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
492 'TL.Text'
493
494 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
495 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
496 immediately as they are generated instead of loading them all into memory.
497-}
498toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
499toLazyM = liftM TL.fromChunks . P.toListM
500{-# INLINABLE toLazyM #-}
501
31f41a5d 502-- | Reduce the text stream using a strict left fold over characters
91727d11 503fold
504 :: Monad m
505 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
31f41a5d 506fold step begin done = P.fold (T.foldl' step) begin done
91727d11 507{-# INLINABLE fold #-}
508
509-- | Retrieve the first 'Char'
510head :: (Monad m) => Producer Text m () -> m (Maybe Char)
511head = go
512 where
513 go p = do
514 x <- nextChar p
515 case x of
516 Left _ -> return Nothing
31f41a5d 517 Right (c, _) -> return (Just c)
91727d11 518{-# INLINABLE head #-}
519
520-- | Retrieve the last 'Char'
521last :: (Monad m) => Producer Text m () -> m (Maybe Char)
522last = go Nothing
523 where
524 go r p = do
525 x <- next p
526 case x of
527 Left () -> return r
31f41a5d 528 Right (txt, p') ->
529 if (T.null txt)
91727d11 530 then go r p'
31f41a5d 531 else go (Just $ T.last txt) p'
91727d11 532{-# INLINABLE last #-}
533
534-- | Determine if the stream is empty
535null :: (Monad m) => Producer Text m () -> m Bool
536null = P.all T.null
537{-# INLINABLE null #-}
538
62e8521c 539-- | Count the number of characters in the stream
91727d11 540length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 541length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 542{-# INLINABLE length #-}
543
544-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
545any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
546any predicate = P.any (T.any predicate)
547{-# INLINABLE any #-}
548
549-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
550all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
551all predicate = P.all (T.all predicate)
552{-# INLINABLE all #-}
553
62e8521c 554-- | Return the maximum 'Char' within a text stream
91727d11 555maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
556maximum = P.fold step Nothing id
557 where
31f41a5d 558 step mc txt =
559 if (T.null txt)
560 then mc
561 else Just $ case mc of
562 Nothing -> T.maximum txt
563 Just c -> max c (T.maximum txt)
91727d11 564{-# INLINABLE maximum #-}
565
62e8521c 566-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 567minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
568minimum = P.fold step Nothing id
569 where
31f41a5d 570 step mc txt =
571 if (T.null txt)
572 then mc
573 else case mc of
574 Nothing -> Just (T.minimum txt)
575 Just c -> Just (min c (T.minimum txt))
91727d11 576{-# INLINABLE minimum #-}
577
91727d11 578-- | Find the first element in the stream that matches the predicate
579find
580 :: (Monad m)
581 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
582find predicate p = head (p >-> filter predicate)
583{-# INLINABLE find #-}
584
62e8521c 585-- | Index into a text stream
91727d11 586index
587 :: (Monad m, Integral a)
588 => a-> Producer Text m () -> m (Maybe Char)
589index n p = head (p >-> drop n)
590{-# INLINABLE index #-}
591
63ea9ffd 592
31f41a5d 593-- | Store a tally of how many segments match the given 'Text'
594count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
595count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
596{-# INLINABLE count #-}
597
598#if MIN_VERSION_text(0,11,4)
599-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
600-- into a Pipe of Text
601decodeUtf8
602 :: Monad m
603 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
604decodeUtf8 = go TE.streamDecodeUtf8
605 where go dec p = do
606 x <- lift (next p)
607 case x of
608 Left r -> return (return r)
609 Right (chunk, p') -> do
610 let TE.Some text l dec' = dec chunk
611 if B.null l
612 then do
613 yield text
614 go dec' p'
615 else return $ do
616 yield l
617 p'
618{-# INLINEABLE decodeUtf8 #-}
63ea9ffd 619
620-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
621-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
622-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
623decodeUtf8With
624 :: Monad m
625 => TE.OnDecodeError
626 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
627decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
628 where go dec p = do
629 x <- lift (next p)
630 case x of
631 Left r -> return (return r)
632 Right (chunk, p') -> do
633 let TE.Some text l dec' = dec chunk
634 if B.null l
635 then do
636 yield text
637 go dec' p'
638 else return $ do
639 yield l
640 p'
641{-# INLINEABLE decodeUtf8With #-}
a02a69ad 642
643-- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise
644-- with any chunk that contains a sequence of bytes that is unreadable. Otherwise
645-- only few bytes will only be moved from one chunk to the next before decoding.
646pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r
647pipeDecodeUtf8 = go TE.streamDecodeUtf8
648 where go dec = do chunk <- await
649 case dec chunk of
650 TE.Some text l dec' -> do yield text
651 go dec'
652{-# INLINEABLE pipeDecodeUtf8 #-}
653
654-- | A simple pipe from 'ByteString' to 'Text' using a replacement function.
655pipeDecodeUtf8With
656 :: Monad m
657 => TE.OnDecodeError
658 -> Pipe ByteString Text m r
659pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
660 where go dec = do chunk <- await
661 case dec chunk of
662 TE.Some text l dec' -> do yield text
663 go dec'
664{-# INLINEABLE pipeDecodeUtf8With #-}
31f41a5d 665#endif
666
667-- | Splits a 'Producer' after the given number of characters
91727d11 668splitAt
669 :: (Monad m, Integral n)
670 => n
671 -> Producer Text m r
672 -> Producer' Text m (Producer Text m r)
673splitAt = go
674 where
675 go 0 p = return p
676 go n p = do
677 x <- lift (next p)
678 case x of
679 Left r -> return (return r)
31f41a5d 680 Right (txt, p') -> do
681 let len = fromIntegral (T.length txt)
91727d11 682 if (len <= n)
683 then do
31f41a5d 684 yield txt
91727d11 685 go (n - len) p'
686 else do
31f41a5d 687 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 688 yield prefix
689 return (yield suffix >> p')
690{-# INLINABLE splitAt #-}
691
31f41a5d 692-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
91727d11 693chunksOf
694 :: (Monad m, Integral n)
695 => n -> Producer Text m r -> FreeT (Producer Text m) m r
696chunksOf n p0 = PP.FreeT (go p0)
697 where
698 go p = do
699 x <- next p
700 return $ case x of
701 Left r -> PP.Pure r
31f41a5d 702 Right (txt, p') -> PP.Free $ do
703 p'' <- splitAt n (yield txt >> p')
91727d11 704 return $ PP.FreeT (go p'')
705{-# INLINABLE chunksOf #-}
706
31f41a5d 707{-| Split a text stream in two, where the first text stream is the longest
708 consecutive group of text that satisfy the predicate
91727d11 709-}
710span
711 :: (Monad m)
712 => (Char -> Bool)
713 -> Producer Text m r
714 -> Producer' Text m (Producer Text m r)
715span predicate = go
716 where
717 go p = do
718 x <- lift (next p)
719 case x of
720 Left r -> return (return r)
31f41a5d 721 Right (txt, p') -> do
722 let (prefix, suffix) = T.span predicate txt
91727d11 723 if (T.null suffix)
724 then do
31f41a5d 725 yield txt
91727d11 726 go p'
727 else do
728 yield prefix
729 return (yield suffix >> p')
730{-# INLINABLE span #-}
731
62e8521c 732{-| Split a text stream in two, where the first text stream is the longest
733 consecutive group of characters that don't satisfy the predicate
91727d11 734-}
735break
736 :: (Monad m)
737 => (Char -> Bool)
738 -> Producer Text m r
739 -> Producer Text m (Producer Text m r)
740break predicate = span (not . predicate)
741{-# INLINABLE break #-}
742
62e8521c 743{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 744 predicate
745-}
746splitWith
747 :: (Monad m)
748 => (Char -> Bool)
749 -> Producer Text m r
750 -> PP.FreeT (Producer Text m) m r
751splitWith predicate p0 = PP.FreeT (go0 p0)
752 where
753 go0 p = do
754 x <- next p
755 case x of
756 Left r -> return (PP.Pure r)
31f41a5d 757 Right (txt, p') ->
758 if (T.null txt)
91727d11 759 then go0 p'
760 else return $ PP.Free $ do
31f41a5d 761 p'' <- span (not . predicate) (yield txt >> p')
91727d11 762 return $ PP.FreeT (go1 p'')
763 go1 p = do
764 x <- nextChar p
765 return $ case x of
766 Left r -> PP.Pure r
767 Right (_, p') -> PP.Free $ do
768 p'' <- span (not . predicate) p'
769 return $ PP.FreeT (go1 p'')
770{-# INLINABLE splitWith #-}
771
31f41a5d 772-- | Split a text stream using the given 'Char' as the delimiter
91727d11 773split :: (Monad m)
774 => Char
775 -> Producer Text m r
776 -> FreeT (Producer Text m) m r
31f41a5d 777split c = splitWith (c ==)
91727d11 778{-# INLINABLE split #-}
779
62e8521c 780{-| Group a text stream into 'FreeT'-delimited text streams using the supplied
91727d11 781 equality predicate
782-}
783groupBy
784 :: (Monad m)
785 => (Char -> Char -> Bool)
786 -> Producer Text m r
787 -> FreeT (Producer Text m) m r
788groupBy equal p0 = PP.FreeT (go p0)
789 where
790 go p = do
791 x <- next p
792 case x of
793 Left r -> return (PP.Pure r)
31f41a5d 794 Right (txt, p') -> case (T.uncons txt) of
91727d11 795 Nothing -> go p'
31f41a5d 796 Just (c, _) -> do
91727d11 797 return $ PP.Free $ do
31f41a5d 798 p'' <- span (equal c) (yield txt >> p')
91727d11 799 return $ PP.FreeT (go p'')
800{-# INLINABLE groupBy #-}
801
62e8521c 802-- | Group a text stream into 'FreeT'-delimited text streams of identical characters
91727d11 803group
804 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
805group = groupBy (==)
806{-# INLINABLE group #-}
807
62e8521c 808{-| Split a text stream into 'FreeT'-delimited lines
91727d11 809-}
810lines
811 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
812lines p0 = PP.FreeT (go0 p0)
813 where
814 go0 p = do
815 x <- next p
816 case x of
817 Left r -> return (PP.Pure r)
31f41a5d 818 Right (txt, p') ->
819 if (T.null txt)
91727d11 820 then go0 p'
31f41a5d 821 else return $ PP.Free $ go1 (yield txt >> p')
91727d11 822 go1 p = do
823 p' <- break ('\n' ==) p
b4d21c02 824 return $ PP.FreeT $ do
825 x <- nextChar p'
826 case x of
827 Left r -> return $ PP.Pure r
828 Right (_, p'') -> go0 p''
91727d11 829{-# INLINABLE lines #-}
91727d11 830
31f41a5d 831
832
833-- | Split a text stream into 'FreeT'-delimited words
91727d11 834words
835 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
cf10d6f1 836words = go
91727d11 837 where
cf10d6f1 838 go p = PP.FreeT $ do
839 x <- next (p >-> dropWhile isSpace)
840 return $ case x of
841 Left r -> PP.Pure r
842 Right (bs, p') -> PP.Free $ do
843 p'' <- break isSpace (yield bs >> p')
844 return (go p'')
91727d11 845{-# INLINABLE words #-}
846
cf10d6f1 847
62e8521c 848-- | Intersperse a 'Char' in between the characters of the text stream
91727d11 849intersperse
850 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
31f41a5d 851intersperse c = go0
91727d11 852 where
853 go0 p = do
854 x <- lift (next p)
855 case x of
856 Left r -> return r
31f41a5d 857 Right (txt, p') -> do
858 yield (T.intersperse c txt)
91727d11 859 go1 p'
860 go1 p = do
861 x <- lift (next p)
862 case x of
863 Left r -> return r
31f41a5d 864 Right (txt, p') -> do
865 yield (T.singleton c)
866 yield (T.intersperse c txt)
91727d11 867 go1 p'
868{-# INLINABLE intersperse #-}
869
31f41a5d 870{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
871 interspersing a text stream in between them
91727d11 872-}
873intercalate
874 :: (Monad m)
875 => Producer Text m ()
876 -> FreeT (Producer Text m) m r
877 -> Producer Text m r
878intercalate p0 = go0
879 where
880 go0 f = do
881 x <- lift (PP.runFreeT f)
882 case x of
883 PP.Pure r -> return r
884 PP.Free p -> do
885 f' <- p
886 go1 f'
887 go1 f = do
888 x <- lift (PP.runFreeT f)
889 case x of
890 PP.Pure r -> return r
891 PP.Free p -> do
892 p0
893 f' <- p
894 go1 f'
895{-# INLINABLE intercalate #-}
896
62e8521c 897{-| Join 'FreeT'-delimited lines into a text stream
91727d11 898-}
899unlines
900 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
901unlines = go
902 where
903 go f = do
904 x <- lift (PP.runFreeT f)
905 case x of
906 PP.Pure r -> return r
907 PP.Free p -> do
908 f' <- p
909 yield $ T.singleton '\n'
910 go f'
911{-# INLINABLE unlines #-}
912
31f41a5d 913{-| Join 'FreeT'-delimited words into a text stream
91727d11 914-}
915unwords
916 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
917unwords = intercalate (yield $ T.pack " ")
918{-# INLINABLE unwords #-}
919
920{- $parse
31f41a5d 921 The following parsing utilities are single-character analogs of the ones found
922 @pipes-parse@.
91727d11 923-}
924
91727d11 925{- $reexports
31f41a5d 926 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
91727d11 927
928 @Data.Text@ re-exports the 'Text' type.
929
91727d11 930 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
931-}