]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
fromHandle further optimized
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
31f41a5d 1{-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
91727d11 2
13a43263 3{-| This module provides @pipes@ utilities for \"text streams\", which are
31f41a5d 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
63ea9ffd 5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 7
63ea9ffd 8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 9 example, the following program copies a document from one file to another:
91727d11 10
11> import Pipes
31f41a5d 12> import qualified Data.Text.Pipes as Text
91727d11 13> import System.IO
14>
15> main =
16> withFile "inFile.txt" ReadMode $ \hIn ->
17> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 18> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
19
20To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 21
13a43263 22> import Pipes
31f41a5d 23> import qualified Data.Text.Pipes as Text
13a43263 24> import Pipes.Safe
25>
31f41a5d 26> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 27
91727d11 28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 29 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 30
31f41a5d 31> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 32
33 You can also translate pure lazy 'TL.Text's to and from proxies:
34
31f41a5d 35> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 36
37 In addition, this module provides many functions equivalent to lazy
31f41a5d 38 'Text' functions so that you can transform or fold text streams. For
91727d11 39 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 40 might write:
91727d11 41
42> import Pipes
31f41a5d 43> import qualified Pipes.Text as Text
44> import qualified Pipes.Parse as Parse
91727d11 45>
31f41a5d 46> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 47> where
31f41a5d 48> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 49
31f41a5d 50 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 51 memory, no matter how long the lines are.
52
53 Note that functions in this library are designed to operate on streams that
31f41a5d 54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
91727d11 57 usage.
58-}
59
7faef8bc 60module Pipes.Text (
91727d11 61 -- * Producers
62 fromLazy,
63 stdin,
64 fromHandle,
65 readFile,
66 stdinLn,
91727d11 67
68 -- * Consumers
69 stdout,
70 stdoutLn,
71 toHandle,
72 writeFile,
73
74 -- * Pipes
75 map,
76 concatMap,
77 take,
78 drop,
79 takeWhile,
80 dropWhile,
81 filter,
91727d11 82 scan,
7faef8bc 83 encodeUtf8,
a02a69ad 84#if MIN_VERSION_text(0,11,4)
85 pipeDecodeUtf8,
86 pipeDecodeUtf8With,
87#endif
7faef8bc 88 pack,
89 unpack,
1d2434b5 90 toCaseFold,
91 toLower,
92 toUpper,
7faef8bc 93 stripStart,
91727d11 94
95 -- * Folds
96 toLazy,
97 toLazyM,
98 fold,
99 head,
100 last,
101 null,
102 length,
103 any,
104 all,
105 maximum,
106 minimum,
91727d11 107 find,
108 index,
31f41a5d 109 count,
91727d11 110
111 -- * Splitters
112 splitAt,
113 chunksOf,
114 span,
115 break,
116 splitWith,
117 split,
118 groupBy,
119 group,
120 lines,
121 words,
31f41a5d 122#if MIN_VERSION_text(0,11,4)
123 decodeUtf8,
63ea9ffd 124 decodeUtf8With,
31f41a5d 125#endif
91727d11 126 -- * Transformations
127 intersperse,
31f41a5d 128
91727d11 129 -- * Joiners
130 intercalate,
131 unlines,
132 unwords,
133
31f41a5d 134 -- * Character Parsers
91727d11 135 -- $parse
31f41a5d 136 nextChar,
137 drawChar,
138 unDrawChar,
139 peekChar,
140 isEndOfChars,
91727d11 141
142 -- * Re-exports
143 -- $reexports
144 module Data.Text,
91727d11 145 module Pipes.Parse
146 ) where
147
148import Control.Exception (throwIO, try)
149import Control.Monad (liftM, unless)
acc6868f 150import Control.Monad.Trans.State.Strict (StateT(..))
91727d11 151import qualified Data.Text as T
152import qualified Data.Text.IO as T
31f41a5d 153import qualified Data.Text.Encoding as TE
63ea9ffd 154import qualified Data.Text.Encoding.Error as TE
91727d11 155import Data.Text (Text)
156import qualified Data.Text.Lazy as TL
157import qualified Data.Text.Lazy.IO as TL
158import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
159import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 160import Data.ByteString (ByteString)
161import qualified Data.ByteString as B
cf10d6f1 162import Data.Char (ord, isSpace)
91727d11 163import Data.Functor.Identity (Identity)
164import qualified Data.List as List
165import Foreign.C.Error (Errno(Errno), ePIPE)
166import qualified GHC.IO.Exception as G
167import Pipes
5e3f5409 168import qualified Pipes.ByteString as PB
91727d11 169import qualified Pipes.ByteString.Parse as PBP
c0343bc9 170import Pipes.Text.Parse (
31f41a5d 171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
91727d11 172import Pipes.Core (respond, Server')
173import qualified Pipes.Parse as PP
174import Pipes.Parse (input, concat, FreeT)
175import qualified Pipes.Safe.Prelude as Safe
176import qualified Pipes.Safe as Safe
177import Pipes.Safe (MonadSafe(..), Base(..))
178import qualified Pipes.Prelude as P
179import qualified System.IO as IO
180import Data.Char (isSpace)
63ea9ffd 181import Data.Word (Word8)
91727d11 182import Prelude hiding (
183 all,
184 any,
185 break,
186 concat,
187 concatMap,
188 drop,
189 dropWhile,
190 elem,
191 filter,
192 head,
193 last,
194 lines,
195 length,
196 map,
197 maximum,
198 minimum,
199 notElem,
200 null,
201 readFile,
202 span,
203 splitAt,
204 take,
205 takeWhile,
206 unlines,
207 unwords,
208 words,
209 writeFile )
210
211-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
212fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
213fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
214{-# INLINABLE fromLazy #-}
215
62e8521c 216-- | Stream text from 'stdin'
91727d11 217stdin :: MonadIO m => Producer' Text m ()
218stdin = fromHandle IO.stdin
219{-# INLINABLE stdin #-}
220
31f41a5d 221{-| Convert a 'IO.Handle' into a text stream using a text size
222 determined by the good sense of the text library.
223
224-}
225
91727d11 226fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
5e3f5409 227#if MIN_VERSION_text(0,11,4)
8853a440 228fromHandle h = go TE.streamDecodeUtf8 where
229 act = B.hGetSome h defaultChunkSize
230 go dec = do chunk <- liftIO act
231 case dec chunk of
232 TE.Some text _ dec' -> do yield text
233 unless (B.null chunk) (go dec')
234{-# INLINE fromHandle#-}
5e3f5409 235-- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as
236-- the dedicated Text IO function 'hGetChunk' ;
237-- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut"
238-- runs the same as the conduit equivalent, only slightly slower
239-- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut"
5e3f5409 240#else
91727d11 241fromHandle h = go where
242 go = do txt <- liftIO (T.hGetChunk h)
243 unless (T.null txt) $ do yield txt
244 go
245{-# INLINABLE fromHandle#-}
5e3f5409 246#endif
6f6f9974 247{-| Stream text from a file using Pipes.Safe
248
31f41a5d 249>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
250MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 251-}
252
91727d11 253readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
254readFile file = Safe.withFile file IO.ReadMode fromHandle
255{-# INLINABLE readFile #-}
256
31f41a5d 257{-| Stream lines of text from stdin (for testing in ghci etc.)
258
259>>> let safely = runSafeT . runEffect
260>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
261hello
2625
263world
2645
265
266-}
91727d11 267stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 268stdinLn = go where
91727d11 269 go = do
270 eof <- liftIO (IO.hIsEOF IO.stdin)
271 unless eof $ do
272 txt <- liftIO (T.hGetLine IO.stdin)
273 yield txt
274 go
275
91727d11 276
31f41a5d 277{-| Stream text to 'stdout'
91727d11 278
279 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
280
281 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
31f41a5d 282 @(source >-> stdout)@ in suitable cases.
91727d11 283-}
284stdout :: MonadIO m => Consumer' Text m ()
285stdout = go
286 where
287 go = do
288 txt <- await
289 x <- liftIO $ try (T.putStr txt)
290 case x of
291 Left (G.IOError { G.ioe_type = G.ResourceVanished
292 , G.ioe_errno = Just ioe })
293 | Errno ioe == ePIPE
294 -> return ()
295 Left e -> liftIO (throwIO e)
296 Right () -> go
297{-# INLINABLE stdout #-}
298
299stdoutLn :: (MonadIO m) => Consumer' Text m ()
300stdoutLn = go
301 where
302 go = do
303 str <- await
304 x <- liftIO $ try (T.putStrLn str)
305 case x of
306 Left (G.IOError { G.ioe_type = G.ResourceVanished
307 , G.ioe_errno = Just ioe })
308 | Errno ioe == ePIPE
309 -> return ()
310 Left e -> liftIO (throwIO e)
311 Right () -> go
312{-# INLINABLE stdoutLn #-}
313
31f41a5d 314{-| Convert a text stream into a 'Handle'
91727d11 315
31f41a5d 316 Note: again, for best performance, where possible use
317 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 318-}
319toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
320toHandle h = for cat (liftIO . T.hPutStr h)
321{-# INLINABLE toHandle #-}
322
d4732515 323{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 324 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 325 #-}
326
327
31f41a5d 328-- | Stream text into a file. Uses @pipes-safe@.
91727d11 329writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
330writeFile file = Safe.withFile file IO.WriteMode toHandle
331
332-- | Apply a transformation to each 'Char' in the stream
333map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
334map f = P.map (T.map f)
335{-# INLINABLE map #-}
336
ff38b9f0 337{-# RULES "p >-> map f" forall p f .
338 p >-> map f = for p (\txt -> yield (T.map f txt))
339 #-}
340
31f41a5d 341-- | Map a function over the characters of a text stream and concatenate the results
91727d11 342concatMap
343 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
344concatMap f = P.map (T.concatMap f)
345{-# INLINABLE concatMap #-}
346
ff38b9f0 347{-# RULES "p >-> concatMap f" forall p f .
348 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
349 #-}
7faef8bc 350
351-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 352-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
353-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 354encodeUtf8 :: Monad m => Pipe Text ByteString m r
355encodeUtf8 = P.map TE.encodeUtf8
356{-# INLINEABLE encodeUtf8 #-}
357
ff38b9f0 358{-# RULES "p >-> encodeUtf8" forall p .
359 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
360 #-}
361
c0343bc9 362-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 363pack :: Monad m => Pipe String Text m r
364pack = P.map T.pack
365{-# INLINEABLE pack #-}
366
ff38b9f0 367{-# RULES "p >-> pack" forall p .
368 p >-> pack = for p (\txt -> yield (T.pack txt))
369 #-}
370
371-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 372unpack :: Monad m => Pipe Text String m r
d4732515 373unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 374{-# INLINEABLE unpack #-}
375
ff38b9f0 376{-# RULES "p >-> unpack" forall p .
377 p >-> unpack = for p (\txt -> yield (T.unpack txt))
378 #-}
d4732515 379
c0343bc9 380-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
7faef8bc 381-- here acting on a 'Text' pipe, rather as they would on a lazy text
382toCaseFold :: Monad m => Pipe Text Text m ()
383toCaseFold = P.map T.toCaseFold
384{-# INLINEABLE toCaseFold #-}
385
ff38b9f0 386{-# RULES "p >-> toCaseFold" forall p .
387 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
388 #-}
389
390
c0343bc9 391-- | lowercase incoming 'Text'
7faef8bc 392toLower :: Monad m => Pipe Text Text m ()
393toLower = P.map T.toLower
394{-# INLINEABLE toLower #-}
395
ff38b9f0 396{-# RULES "p >-> toLower" forall p .
397 p >-> toLower = for p (\txt -> yield (T.toLower txt))
398 #-}
399
c0343bc9 400-- | uppercase incoming 'Text'
7faef8bc 401toUpper :: Monad m => Pipe Text Text m ()
402toUpper = P.map T.toUpper
403{-# INLINEABLE toUpper #-}
404
ff38b9f0 405{-# RULES "p >-> toUpper" forall p .
406 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
407 #-}
408
c0343bc9 409-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 410stripStart :: Monad m => Pipe Text Text m r
411stripStart = do
412 chunk <- await
413 let text = T.stripStart chunk
414 if T.null text
415 then stripStart
416 else cat
417{-# INLINEABLE stripStart #-}
418
31f41a5d 419-- | @(take n)@ only allows @n@ individual characters to pass;
420-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 421take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
422take n0 = go n0 where
423 go n
424 | n <= 0 = return ()
425 | otherwise = do
31f41a5d 426 txt <- await
427 let len = fromIntegral (T.length txt)
91727d11 428 if (len > n)
31f41a5d 429 then yield (T.take (fromIntegral n) txt)
91727d11 430 else do
31f41a5d 431 yield txt
91727d11 432 go (n - len)
433{-# INLINABLE take #-}
434
31f41a5d 435-- | @(drop n)@ drops the first @n@ characters
91727d11 436drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
437drop n0 = go n0 where
438 go n
439 | n <= 0 = cat
440 | otherwise = do
31f41a5d 441 txt <- await
442 let len = fromIntegral (T.length txt)
91727d11 443 if (len >= n)
444 then do
31f41a5d 445 yield (T.drop (fromIntegral n) txt)
91727d11 446 cat
447 else go (n - len)
448{-# INLINABLE drop #-}
449
31f41a5d 450-- | Take characters until they fail the predicate
91727d11 451takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
452takeWhile predicate = go
453 where
454 go = do
31f41a5d 455 txt <- await
456 let (prefix, suffix) = T.span predicate txt
91727d11 457 if (T.null suffix)
458 then do
31f41a5d 459 yield txt
91727d11 460 go
461 else yield prefix
462{-# INLINABLE takeWhile #-}
463
31f41a5d 464-- | Drop characters until they fail the predicate
91727d11 465dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
466dropWhile predicate = go where
467 go = do
31f41a5d 468 txt <- await
469 case T.findIndex (not . predicate) txt of
91727d11 470 Nothing -> go
471 Just i -> do
31f41a5d 472 yield (T.drop i txt)
91727d11 473 cat
474{-# INLINABLE dropWhile #-}
475
476-- | Only allows 'Char's to pass if they satisfy the predicate
477filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
478filter predicate = P.map (T.filter predicate)
479{-# INLINABLE filter #-}
480
ff38b9f0 481{-# RULES "p >-> filter q" forall p q .
482 p >-> filter q = for p (\txt -> yield (T.filter q txt))
483 #-}
484
31f41a5d 485-- | Strict left scan over the characters
91727d11 486scan
487 :: (Monad m)
488 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
489scan step begin = go begin
490 where
31f41a5d 491 go c = do
492 txt <- await
493 let txt' = T.scanl step c txt
494 c' = T.last txt'
495 yield txt'
496 go c'
91727d11 497{-# INLINABLE scan #-}
498
499{-| Fold a pure 'Producer' of strict 'Text's into a lazy
500 'TL.Text'
501-}
502toLazy :: Producer Text Identity () -> TL.Text
503toLazy = TL.fromChunks . P.toList
504{-# INLINABLE toLazy #-}
505
506{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
507 'TL.Text'
508
509 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
510 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
511 immediately as they are generated instead of loading them all into memory.
512-}
513toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
514toLazyM = liftM TL.fromChunks . P.toListM
515{-# INLINABLE toLazyM #-}
516
31f41a5d 517-- | Reduce the text stream using a strict left fold over characters
91727d11 518fold
519 :: Monad m
520 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
31f41a5d 521fold step begin done = P.fold (T.foldl' step) begin done
91727d11 522{-# INLINABLE fold #-}
523
524-- | Retrieve the first 'Char'
525head :: (Monad m) => Producer Text m () -> m (Maybe Char)
526head = go
527 where
528 go p = do
529 x <- nextChar p
530 case x of
531 Left _ -> return Nothing
31f41a5d 532 Right (c, _) -> return (Just c)
91727d11 533{-# INLINABLE head #-}
534
535-- | Retrieve the last 'Char'
536last :: (Monad m) => Producer Text m () -> m (Maybe Char)
537last = go Nothing
538 where
539 go r p = do
540 x <- next p
541 case x of
542 Left () -> return r
31f41a5d 543 Right (txt, p') ->
544 if (T.null txt)
91727d11 545 then go r p'
31f41a5d 546 else go (Just $ T.last txt) p'
91727d11 547{-# INLINABLE last #-}
548
549-- | Determine if the stream is empty
550null :: (Monad m) => Producer Text m () -> m Bool
551null = P.all T.null
552{-# INLINABLE null #-}
553
62e8521c 554-- | Count the number of characters in the stream
91727d11 555length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 556length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 557{-# INLINABLE length #-}
558
559-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
560any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
561any predicate = P.any (T.any predicate)
562{-# INLINABLE any #-}
563
564-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
565all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
566all predicate = P.all (T.all predicate)
567{-# INLINABLE all #-}
568
62e8521c 569-- | Return the maximum 'Char' within a text stream
91727d11 570maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
571maximum = P.fold step Nothing id
572 where
31f41a5d 573 step mc txt =
574 if (T.null txt)
575 then mc
576 else Just $ case mc of
577 Nothing -> T.maximum txt
578 Just c -> max c (T.maximum txt)
91727d11 579{-# INLINABLE maximum #-}
580
62e8521c 581-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 582minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
583minimum = P.fold step Nothing id
584 where
31f41a5d 585 step mc txt =
586 if (T.null txt)
587 then mc
588 else case mc of
589 Nothing -> Just (T.minimum txt)
590 Just c -> Just (min c (T.minimum txt))
91727d11 591{-# INLINABLE minimum #-}
592
91727d11 593-- | Find the first element in the stream that matches the predicate
594find
595 :: (Monad m)
596 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
597find predicate p = head (p >-> filter predicate)
598{-# INLINABLE find #-}
599
62e8521c 600-- | Index into a text stream
91727d11 601index
602 :: (Monad m, Integral a)
603 => a-> Producer Text m () -> m (Maybe Char)
604index n p = head (p >-> drop n)
605{-# INLINABLE index #-}
606
63ea9ffd 607
31f41a5d 608-- | Store a tally of how many segments match the given 'Text'
609count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
610count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
611{-# INLINABLE count #-}
612
613#if MIN_VERSION_text(0,11,4)
614-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
615-- into a Pipe of Text
616decodeUtf8
617 :: Monad m
618 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
619decodeUtf8 = go TE.streamDecodeUtf8
620 where go dec p = do
621 x <- lift (next p)
622 case x of
623 Left r -> return (return r)
624 Right (chunk, p') -> do
625 let TE.Some text l dec' = dec chunk
626 if B.null l
627 then do
628 yield text
629 go dec' p'
630 else return $ do
631 yield l
632 p'
633{-# INLINEABLE decodeUtf8 #-}
63ea9ffd 634
635-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
636-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
637-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
638decodeUtf8With
639 :: Monad m
640 => TE.OnDecodeError
641 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
642decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
643 where go dec p = do
644 x <- lift (next p)
645 case x of
646 Left r -> return (return r)
647 Right (chunk, p') -> do
648 let TE.Some text l dec' = dec chunk
649 if B.null l
650 then do
651 yield text
652 go dec' p'
653 else return $ do
654 yield l
655 p'
656{-# INLINEABLE decodeUtf8With #-}
a02a69ad 657
658-- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise
659-- with any chunk that contains a sequence of bytes that is unreadable. Otherwise
660-- only few bytes will only be moved from one chunk to the next before decoding.
661pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r
662pipeDecodeUtf8 = go TE.streamDecodeUtf8
663 where go dec = do chunk <- await
664 case dec chunk of
665 TE.Some text l dec' -> do yield text
666 go dec'
667{-# INLINEABLE pipeDecodeUtf8 #-}
668
669-- | A simple pipe from 'ByteString' to 'Text' using a replacement function.
670pipeDecodeUtf8With
671 :: Monad m
672 => TE.OnDecodeError
673 -> Pipe ByteString Text m r
674pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
675 where go dec = do chunk <- await
676 case dec chunk of
677 TE.Some text l dec' -> do yield text
678 go dec'
679{-# INLINEABLE pipeDecodeUtf8With #-}
31f41a5d 680#endif
681
682-- | Splits a 'Producer' after the given number of characters
91727d11 683splitAt
684 :: (Monad m, Integral n)
685 => n
686 -> Producer Text m r
687 -> Producer' Text m (Producer Text m r)
688splitAt = go
689 where
690 go 0 p = return p
691 go n p = do
692 x <- lift (next p)
693 case x of
694 Left r -> return (return r)
31f41a5d 695 Right (txt, p') -> do
696 let len = fromIntegral (T.length txt)
91727d11 697 if (len <= n)
698 then do
31f41a5d 699 yield txt
91727d11 700 go (n - len) p'
701 else do
31f41a5d 702 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 703 yield prefix
704 return (yield suffix >> p')
705{-# INLINABLE splitAt #-}
706
31f41a5d 707-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
91727d11 708chunksOf
709 :: (Monad m, Integral n)
710 => n -> Producer Text m r -> FreeT (Producer Text m) m r
711chunksOf n p0 = PP.FreeT (go p0)
712 where
713 go p = do
714 x <- next p
715 return $ case x of
716 Left r -> PP.Pure r
31f41a5d 717 Right (txt, p') -> PP.Free $ do
718 p'' <- splitAt n (yield txt >> p')
91727d11 719 return $ PP.FreeT (go p'')
720{-# INLINABLE chunksOf #-}
721
31f41a5d 722{-| Split a text stream in two, where the first text stream is the longest
723 consecutive group of text that satisfy the predicate
91727d11 724-}
725span
726 :: (Monad m)
727 => (Char -> Bool)
728 -> Producer Text m r
729 -> Producer' Text m (Producer Text m r)
730span predicate = go
731 where
732 go p = do
733 x <- lift (next p)
734 case x of
735 Left r -> return (return r)
31f41a5d 736 Right (txt, p') -> do
737 let (prefix, suffix) = T.span predicate txt
91727d11 738 if (T.null suffix)
739 then do
31f41a5d 740 yield txt
91727d11 741 go p'
742 else do
743 yield prefix
744 return (yield suffix >> p')
745{-# INLINABLE span #-}
746
62e8521c 747{-| Split a text stream in two, where the first text stream is the longest
748 consecutive group of characters that don't satisfy the predicate
91727d11 749-}
750break
751 :: (Monad m)
752 => (Char -> Bool)
753 -> Producer Text m r
754 -> Producer Text m (Producer Text m r)
755break predicate = span (not . predicate)
756{-# INLINABLE break #-}
757
62e8521c 758{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 759 predicate
760-}
761splitWith
762 :: (Monad m)
763 => (Char -> Bool)
764 -> Producer Text m r
765 -> PP.FreeT (Producer Text m) m r
766splitWith predicate p0 = PP.FreeT (go0 p0)
767 where
768 go0 p = do
769 x <- next p
770 case x of
771 Left r -> return (PP.Pure r)
31f41a5d 772 Right (txt, p') ->
773 if (T.null txt)
91727d11 774 then go0 p'
775 else return $ PP.Free $ do
31f41a5d 776 p'' <- span (not . predicate) (yield txt >> p')
91727d11 777 return $ PP.FreeT (go1 p'')
778 go1 p = do
779 x <- nextChar p
780 return $ case x of
781 Left r -> PP.Pure r
782 Right (_, p') -> PP.Free $ do
783 p'' <- span (not . predicate) p'
784 return $ PP.FreeT (go1 p'')
785{-# INLINABLE splitWith #-}
786
31f41a5d 787-- | Split a text stream using the given 'Char' as the delimiter
91727d11 788split :: (Monad m)
789 => Char
790 -> Producer Text m r
791 -> FreeT (Producer Text m) m r
31f41a5d 792split c = splitWith (c ==)
91727d11 793{-# INLINABLE split #-}
794
62e8521c 795{-| Group a text stream into 'FreeT'-delimited text streams using the supplied
91727d11 796 equality predicate
797-}
798groupBy
799 :: (Monad m)
800 => (Char -> Char -> Bool)
801 -> Producer Text m r
802 -> FreeT (Producer Text m) m r
803groupBy equal p0 = PP.FreeT (go p0)
804 where
805 go p = do
806 x <- next p
807 case x of
808 Left r -> return (PP.Pure r)
31f41a5d 809 Right (txt, p') -> case (T.uncons txt) of
91727d11 810 Nothing -> go p'
31f41a5d 811 Just (c, _) -> do
91727d11 812 return $ PP.Free $ do
31f41a5d 813 p'' <- span (equal c) (yield txt >> p')
91727d11 814 return $ PP.FreeT (go p'')
815{-# INLINABLE groupBy #-}
816
62e8521c 817-- | Group a text stream into 'FreeT'-delimited text streams of identical characters
91727d11 818group
819 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
820group = groupBy (==)
821{-# INLINABLE group #-}
822
62e8521c 823{-| Split a text stream into 'FreeT'-delimited lines
91727d11 824-}
825lines
826 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
827lines p0 = PP.FreeT (go0 p0)
828 where
829 go0 p = do
830 x <- next p
831 case x of
832 Left r -> return (PP.Pure r)
31f41a5d 833 Right (txt, p') ->
834 if (T.null txt)
91727d11 835 then go0 p'
31f41a5d 836 else return $ PP.Free $ go1 (yield txt >> p')
91727d11 837 go1 p = do
838 p' <- break ('\n' ==) p
b4d21c02 839 return $ PP.FreeT $ do
840 x <- nextChar p'
841 case x of
842 Left r -> return $ PP.Pure r
843 Right (_, p'') -> go0 p''
91727d11 844{-# INLINABLE lines #-}
91727d11 845
31f41a5d 846
847
848-- | Split a text stream into 'FreeT'-delimited words
91727d11 849words
850 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
cf10d6f1 851words = go
91727d11 852 where
cf10d6f1 853 go p = PP.FreeT $ do
854 x <- next (p >-> dropWhile isSpace)
855 return $ case x of
856 Left r -> PP.Pure r
857 Right (bs, p') -> PP.Free $ do
858 p'' <- break isSpace (yield bs >> p')
859 return (go p'')
91727d11 860{-# INLINABLE words #-}
861
cf10d6f1 862
62e8521c 863-- | Intersperse a 'Char' in between the characters of the text stream
91727d11 864intersperse
865 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
31f41a5d 866intersperse c = go0
91727d11 867 where
868 go0 p = do
869 x <- lift (next p)
870 case x of
871 Left r -> return r
31f41a5d 872 Right (txt, p') -> do
873 yield (T.intersperse c txt)
91727d11 874 go1 p'
875 go1 p = do
876 x <- lift (next p)
877 case x of
878 Left r -> return r
31f41a5d 879 Right (txt, p') -> do
880 yield (T.singleton c)
881 yield (T.intersperse c txt)
91727d11 882 go1 p'
883{-# INLINABLE intersperse #-}
884
31f41a5d 885{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
886 interspersing a text stream in between them
91727d11 887-}
888intercalate
889 :: (Monad m)
890 => Producer Text m ()
891 -> FreeT (Producer Text m) m r
892 -> Producer Text m r
893intercalate p0 = go0
894 where
895 go0 f = do
896 x <- lift (PP.runFreeT f)
897 case x of
898 PP.Pure r -> return r
899 PP.Free p -> do
900 f' <- p
901 go1 f'
902 go1 f = do
903 x <- lift (PP.runFreeT f)
904 case x of
905 PP.Pure r -> return r
906 PP.Free p -> do
907 p0
908 f' <- p
909 go1 f'
910{-# INLINABLE intercalate #-}
911
62e8521c 912{-| Join 'FreeT'-delimited lines into a text stream
91727d11 913-}
914unlines
915 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
916unlines = go
917 where
918 go f = do
919 x <- lift (PP.runFreeT f)
920 case x of
921 PP.Pure r -> return r
922 PP.Free p -> do
923 f' <- p
924 yield $ T.singleton '\n'
925 go f'
926{-# INLINABLE unlines #-}
927
31f41a5d 928{-| Join 'FreeT'-delimited words into a text stream
91727d11 929-}
930unwords
931 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
932unwords = intercalate (yield $ T.pack " ")
933{-# INLINABLE unwords #-}
934
935{- $parse
31f41a5d 936 The following parsing utilities are single-character analogs of the ones found
937 @pipes-parse@.
91727d11 938-}
939
91727d11 940{- $reexports
31f41a5d 941 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
91727d11 942
943 @Data.Text@ re-exports the 'Text' type.
944
91727d11 945 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
946-}