]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
scrap Text.hGetChunk for bytestring + streamDecodeUtf8, which is three times as fast
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
31f41a5d 1{-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
91727d11 2
13a43263 3{-| This module provides @pipes@ utilities for \"text streams\", which are
31f41a5d 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
63ea9ffd 5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 7
63ea9ffd 8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 9 example, the following program copies a document from one file to another:
91727d11 10
11> import Pipes
31f41a5d 12> import qualified Data.Text.Pipes as Text
91727d11 13> import System.IO
14>
15> main =
16> withFile "inFile.txt" ReadMode $ \hIn ->
17> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 18> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
19
20To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 21
13a43263 22> import Pipes
31f41a5d 23> import qualified Data.Text.Pipes as Text
13a43263 24> import Pipes.Safe
25>
31f41a5d 26> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 27
91727d11 28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 29 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 30
31f41a5d 31> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 32
33 You can also translate pure lazy 'TL.Text's to and from proxies:
34
31f41a5d 35> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 36
37 In addition, this module provides many functions equivalent to lazy
31f41a5d 38 'Text' functions so that you can transform or fold text streams. For
91727d11 39 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 40 might write:
91727d11 41
42> import Pipes
31f41a5d 43> import qualified Pipes.Text as Text
44> import qualified Pipes.Parse as Parse
91727d11 45>
31f41a5d 46> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 47> where
31f41a5d 48> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 49
31f41a5d 50 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 51 memory, no matter how long the lines are.
52
53 Note that functions in this library are designed to operate on streams that
31f41a5d 54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
91727d11 57 usage.
58-}
59
7faef8bc 60module Pipes.Text (
91727d11 61 -- * Producers
62 fromLazy,
63 stdin,
64 fromHandle,
65 readFile,
66 stdinLn,
91727d11 67
68 -- * Consumers
69 stdout,
70 stdoutLn,
71 toHandle,
72 writeFile,
73
74 -- * Pipes
75 map,
76 concatMap,
77 take,
78 drop,
79 takeWhile,
80 dropWhile,
81 filter,
91727d11 82 scan,
7faef8bc 83 encodeUtf8,
a02a69ad 84#if MIN_VERSION_text(0,11,4)
85 pipeDecodeUtf8,
86 pipeDecodeUtf8With,
87#endif
7faef8bc 88 pack,
89 unpack,
1d2434b5 90 toCaseFold,
91 toLower,
92 toUpper,
7faef8bc 93 stripStart,
91727d11 94
95 -- * Folds
96 toLazy,
97 toLazyM,
98 fold,
99 head,
100 last,
101 null,
102 length,
103 any,
104 all,
105 maximum,
106 minimum,
91727d11 107 find,
108 index,
31f41a5d 109 count,
91727d11 110
111 -- * Splitters
112 splitAt,
113 chunksOf,
114 span,
115 break,
116 splitWith,
117 split,
118 groupBy,
119 group,
120 lines,
121 words,
31f41a5d 122#if MIN_VERSION_text(0,11,4)
123 decodeUtf8,
63ea9ffd 124 decodeUtf8With,
31f41a5d 125#endif
91727d11 126 -- * Transformations
127 intersperse,
31f41a5d 128
91727d11 129 -- * Joiners
130 intercalate,
131 unlines,
132 unwords,
133
31f41a5d 134 -- * Character Parsers
91727d11 135 -- $parse
31f41a5d 136 nextChar,
137 drawChar,
138 unDrawChar,
139 peekChar,
140 isEndOfChars,
91727d11 141
142 -- * Re-exports
143 -- $reexports
144 module Data.Text,
91727d11 145 module Pipes.Parse
146 ) where
147
148import Control.Exception (throwIO, try)
149import Control.Monad (liftM, unless)
acc6868f 150import Control.Monad.Trans.State.Strict (StateT(..))
91727d11 151import qualified Data.Text as T
152import qualified Data.Text.IO as T
31f41a5d 153import qualified Data.Text.Encoding as TE
63ea9ffd 154import qualified Data.Text.Encoding.Error as TE
91727d11 155import Data.Text (Text)
156import qualified Data.Text.Lazy as TL
157import qualified Data.Text.Lazy.IO as TL
158import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
159import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 160import Data.ByteString (ByteString)
161import qualified Data.ByteString as B
cf10d6f1 162import Data.Char (ord, isSpace)
91727d11 163import Data.Functor.Identity (Identity)
164import qualified Data.List as List
165import Foreign.C.Error (Errno(Errno), ePIPE)
166import qualified GHC.IO.Exception as G
167import Pipes
5e3f5409 168import qualified Pipes.ByteString as PB
91727d11 169import qualified Pipes.ByteString.Parse as PBP
c0343bc9 170import Pipes.Text.Parse (
31f41a5d 171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
91727d11 172import Pipes.Core (respond, Server')
173import qualified Pipes.Parse as PP
174import Pipes.Parse (input, concat, FreeT)
175import qualified Pipes.Safe.Prelude as Safe
176import qualified Pipes.Safe as Safe
177import Pipes.Safe (MonadSafe(..), Base(..))
178import qualified Pipes.Prelude as P
179import qualified System.IO as IO
180import Data.Char (isSpace)
63ea9ffd 181import Data.Word (Word8)
91727d11 182import Prelude hiding (
183 all,
184 any,
185 break,
186 concat,
187 concatMap,
188 drop,
189 dropWhile,
190 elem,
191 filter,
192 head,
193 last,
194 lines,
195 length,
196 map,
197 maximum,
198 minimum,
199 notElem,
200 null,
201 readFile,
202 span,
203 splitAt,
204 take,
205 takeWhile,
206 unlines,
207 unwords,
208 words,
209 writeFile )
210
211-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
212fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
213fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
214{-# INLINABLE fromLazy #-}
215
62e8521c 216-- | Stream text from 'stdin'
91727d11 217stdin :: MonadIO m => Producer' Text m ()
218stdin = fromHandle IO.stdin
219{-# INLINABLE stdin #-}
220
31f41a5d 221{-| Convert a 'IO.Handle' into a text stream using a text size
222 determined by the good sense of the text library.
223
224-}
225
91727d11 226fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
5e3f5409 227#if MIN_VERSION_text(0,11,4)
228fromHandle h = PB.fromHandle h >-> pipeDecodeUtf8
229{-# INLINABLE fromHandle#-}
230-- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as
231-- the dedicated Text IO function 'hGetChunk' ;
232-- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut"
233-- runs the same as the conduit equivalent, only slightly slower
234-- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut"
235
236#else
91727d11 237fromHandle h = go where
238 go = do txt <- liftIO (T.hGetChunk h)
239 unless (T.null txt) $ do yield txt
240 go
241{-# INLINABLE fromHandle#-}
5e3f5409 242#endif
6f6f9974 243{-| Stream text from a file using Pipes.Safe
244
31f41a5d 245>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
246MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 247-}
248
91727d11 249readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
250readFile file = Safe.withFile file IO.ReadMode fromHandle
251{-# INLINABLE readFile #-}
252
31f41a5d 253{-| Stream lines of text from stdin (for testing in ghci etc.)
254
255>>> let safely = runSafeT . runEffect
256>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
257hello
2585
259world
2605
261
262-}
91727d11 263stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 264stdinLn = go where
91727d11 265 go = do
266 eof <- liftIO (IO.hIsEOF IO.stdin)
267 unless eof $ do
268 txt <- liftIO (T.hGetLine IO.stdin)
269 yield txt
270 go
271
91727d11 272
31f41a5d 273{-| Stream text to 'stdout'
91727d11 274
275 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
276
277 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
31f41a5d 278 @(source >-> stdout)@ in suitable cases.
91727d11 279-}
280stdout :: MonadIO m => Consumer' Text m ()
281stdout = go
282 where
283 go = do
284 txt <- await
285 x <- liftIO $ try (T.putStr txt)
286 case x of
287 Left (G.IOError { G.ioe_type = G.ResourceVanished
288 , G.ioe_errno = Just ioe })
289 | Errno ioe == ePIPE
290 -> return ()
291 Left e -> liftIO (throwIO e)
292 Right () -> go
293{-# INLINABLE stdout #-}
294
295stdoutLn :: (MonadIO m) => Consumer' Text m ()
296stdoutLn = go
297 where
298 go = do
299 str <- await
300 x <- liftIO $ try (T.putStrLn str)
301 case x of
302 Left (G.IOError { G.ioe_type = G.ResourceVanished
303 , G.ioe_errno = Just ioe })
304 | Errno ioe == ePIPE
305 -> return ()
306 Left e -> liftIO (throwIO e)
307 Right () -> go
308{-# INLINABLE stdoutLn #-}
309
31f41a5d 310{-| Convert a text stream into a 'Handle'
91727d11 311
31f41a5d 312 Note: again, for best performance, where possible use
313 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 314-}
315toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
316toHandle h = for cat (liftIO . T.hPutStr h)
317{-# INLINABLE toHandle #-}
318
d4732515 319{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 320 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 321 #-}
322
323
31f41a5d 324-- | Stream text into a file. Uses @pipes-safe@.
91727d11 325writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
326writeFile file = Safe.withFile file IO.WriteMode toHandle
327
328-- | Apply a transformation to each 'Char' in the stream
329map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
330map f = P.map (T.map f)
331{-# INLINABLE map #-}
332
ff38b9f0 333{-# RULES "p >-> map f" forall p f .
334 p >-> map f = for p (\txt -> yield (T.map f txt))
335 #-}
336
31f41a5d 337-- | Map a function over the characters of a text stream and concatenate the results
91727d11 338concatMap
339 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
340concatMap f = P.map (T.concatMap f)
341{-# INLINABLE concatMap #-}
342
ff38b9f0 343{-# RULES "p >-> concatMap f" forall p f .
344 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
345 #-}
7faef8bc 346
347-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 348-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
349-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 350encodeUtf8 :: Monad m => Pipe Text ByteString m r
351encodeUtf8 = P.map TE.encodeUtf8
352{-# INLINEABLE encodeUtf8 #-}
353
ff38b9f0 354{-# RULES "p >-> encodeUtf8" forall p .
355 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
356 #-}
357
c0343bc9 358-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 359pack :: Monad m => Pipe String Text m r
360pack = P.map T.pack
361{-# INLINEABLE pack #-}
362
ff38b9f0 363{-# RULES "p >-> pack" forall p .
364 p >-> pack = for p (\txt -> yield (T.pack txt))
365 #-}
366
367-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 368unpack :: Monad m => Pipe Text String m r
d4732515 369unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 370{-# INLINEABLE unpack #-}
371
ff38b9f0 372{-# RULES "p >-> unpack" forall p .
373 p >-> unpack = for p (\txt -> yield (T.unpack txt))
374 #-}
d4732515 375
c0343bc9 376-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
7faef8bc 377-- here acting on a 'Text' pipe, rather as they would on a lazy text
378toCaseFold :: Monad m => Pipe Text Text m ()
379toCaseFold = P.map T.toCaseFold
380{-# INLINEABLE toCaseFold #-}
381
ff38b9f0 382{-# RULES "p >-> toCaseFold" forall p .
383 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
384 #-}
385
386
c0343bc9 387-- | lowercase incoming 'Text'
7faef8bc 388toLower :: Monad m => Pipe Text Text m ()
389toLower = P.map T.toLower
390{-# INLINEABLE toLower #-}
391
ff38b9f0 392{-# RULES "p >-> toLower" forall p .
393 p >-> toLower = for p (\txt -> yield (T.toLower txt))
394 #-}
395
c0343bc9 396-- | uppercase incoming 'Text'
7faef8bc 397toUpper :: Monad m => Pipe Text Text m ()
398toUpper = P.map T.toUpper
399{-# INLINEABLE toUpper #-}
400
ff38b9f0 401{-# RULES "p >-> toUpper" forall p .
402 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
403 #-}
404
c0343bc9 405-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 406stripStart :: Monad m => Pipe Text Text m r
407stripStart = do
408 chunk <- await
409 let text = T.stripStart chunk
410 if T.null text
411 then stripStart
412 else cat
413{-# INLINEABLE stripStart #-}
414
31f41a5d 415-- | @(take n)@ only allows @n@ individual characters to pass;
416-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 417take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
418take n0 = go n0 where
419 go n
420 | n <= 0 = return ()
421 | otherwise = do
31f41a5d 422 txt <- await
423 let len = fromIntegral (T.length txt)
91727d11 424 if (len > n)
31f41a5d 425 then yield (T.take (fromIntegral n) txt)
91727d11 426 else do
31f41a5d 427 yield txt
91727d11 428 go (n - len)
429{-# INLINABLE take #-}
430
31f41a5d 431-- | @(drop n)@ drops the first @n@ characters
91727d11 432drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
433drop n0 = go n0 where
434 go n
435 | n <= 0 = cat
436 | otherwise = do
31f41a5d 437 txt <- await
438 let len = fromIntegral (T.length txt)
91727d11 439 if (len >= n)
440 then do
31f41a5d 441 yield (T.drop (fromIntegral n) txt)
91727d11 442 cat
443 else go (n - len)
444{-# INLINABLE drop #-}
445
31f41a5d 446-- | Take characters until they fail the predicate
91727d11 447takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
448takeWhile predicate = go
449 where
450 go = do
31f41a5d 451 txt <- await
452 let (prefix, suffix) = T.span predicate txt
91727d11 453 if (T.null suffix)
454 then do
31f41a5d 455 yield txt
91727d11 456 go
457 else yield prefix
458{-# INLINABLE takeWhile #-}
459
31f41a5d 460-- | Drop characters until they fail the predicate
91727d11 461dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
462dropWhile predicate = go where
463 go = do
31f41a5d 464 txt <- await
465 case T.findIndex (not . predicate) txt of
91727d11 466 Nothing -> go
467 Just i -> do
31f41a5d 468 yield (T.drop i txt)
91727d11 469 cat
470{-# INLINABLE dropWhile #-}
471
472-- | Only allows 'Char's to pass if they satisfy the predicate
473filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
474filter predicate = P.map (T.filter predicate)
475{-# INLINABLE filter #-}
476
ff38b9f0 477{-# RULES "p >-> filter q" forall p q .
478 p >-> filter q = for p (\txt -> yield (T.filter q txt))
479 #-}
480
31f41a5d 481-- | Strict left scan over the characters
91727d11 482scan
483 :: (Monad m)
484 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
485scan step begin = go begin
486 where
31f41a5d 487 go c = do
488 txt <- await
489 let txt' = T.scanl step c txt
490 c' = T.last txt'
491 yield txt'
492 go c'
91727d11 493{-# INLINABLE scan #-}
494
495{-| Fold a pure 'Producer' of strict 'Text's into a lazy
496 'TL.Text'
497-}
498toLazy :: Producer Text Identity () -> TL.Text
499toLazy = TL.fromChunks . P.toList
500{-# INLINABLE toLazy #-}
501
502{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
503 'TL.Text'
504
505 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
506 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
507 immediately as they are generated instead of loading them all into memory.
508-}
509toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
510toLazyM = liftM TL.fromChunks . P.toListM
511{-# INLINABLE toLazyM #-}
512
31f41a5d 513-- | Reduce the text stream using a strict left fold over characters
91727d11 514fold
515 :: Monad m
516 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
31f41a5d 517fold step begin done = P.fold (T.foldl' step) begin done
91727d11 518{-# INLINABLE fold #-}
519
520-- | Retrieve the first 'Char'
521head :: (Monad m) => Producer Text m () -> m (Maybe Char)
522head = go
523 where
524 go p = do
525 x <- nextChar p
526 case x of
527 Left _ -> return Nothing
31f41a5d 528 Right (c, _) -> return (Just c)
91727d11 529{-# INLINABLE head #-}
530
531-- | Retrieve the last 'Char'
532last :: (Monad m) => Producer Text m () -> m (Maybe Char)
533last = go Nothing
534 where
535 go r p = do
536 x <- next p
537 case x of
538 Left () -> return r
31f41a5d 539 Right (txt, p') ->
540 if (T.null txt)
91727d11 541 then go r p'
31f41a5d 542 else go (Just $ T.last txt) p'
91727d11 543{-# INLINABLE last #-}
544
545-- | Determine if the stream is empty
546null :: (Monad m) => Producer Text m () -> m Bool
547null = P.all T.null
548{-# INLINABLE null #-}
549
62e8521c 550-- | Count the number of characters in the stream
91727d11 551length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 552length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 553{-# INLINABLE length #-}
554
555-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
556any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
557any predicate = P.any (T.any predicate)
558{-# INLINABLE any #-}
559
560-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
561all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
562all predicate = P.all (T.all predicate)
563{-# INLINABLE all #-}
564
62e8521c 565-- | Return the maximum 'Char' within a text stream
91727d11 566maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
567maximum = P.fold step Nothing id
568 where
31f41a5d 569 step mc txt =
570 if (T.null txt)
571 then mc
572 else Just $ case mc of
573 Nothing -> T.maximum txt
574 Just c -> max c (T.maximum txt)
91727d11 575{-# INLINABLE maximum #-}
576
62e8521c 577-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 578minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
579minimum = P.fold step Nothing id
580 where
31f41a5d 581 step mc txt =
582 if (T.null txt)
583 then mc
584 else case mc of
585 Nothing -> Just (T.minimum txt)
586 Just c -> Just (min c (T.minimum txt))
91727d11 587{-# INLINABLE minimum #-}
588
91727d11 589-- | Find the first element in the stream that matches the predicate
590find
591 :: (Monad m)
592 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
593find predicate p = head (p >-> filter predicate)
594{-# INLINABLE find #-}
595
62e8521c 596-- | Index into a text stream
91727d11 597index
598 :: (Monad m, Integral a)
599 => a-> Producer Text m () -> m (Maybe Char)
600index n p = head (p >-> drop n)
601{-# INLINABLE index #-}
602
63ea9ffd 603
31f41a5d 604-- | Store a tally of how many segments match the given 'Text'
605count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
606count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
607{-# INLINABLE count #-}
608
609#if MIN_VERSION_text(0,11,4)
610-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
611-- into a Pipe of Text
612decodeUtf8
613 :: Monad m
614 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
615decodeUtf8 = go TE.streamDecodeUtf8
616 where go dec p = do
617 x <- lift (next p)
618 case x of
619 Left r -> return (return r)
620 Right (chunk, p') -> do
621 let TE.Some text l dec' = dec chunk
622 if B.null l
623 then do
624 yield text
625 go dec' p'
626 else return $ do
627 yield l
628 p'
629{-# INLINEABLE decodeUtf8 #-}
63ea9ffd 630
631-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
632-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
633-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
634decodeUtf8With
635 :: Monad m
636 => TE.OnDecodeError
637 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
638decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
639 where go dec p = do
640 x <- lift (next p)
641 case x of
642 Left r -> return (return r)
643 Right (chunk, p') -> do
644 let TE.Some text l dec' = dec chunk
645 if B.null l
646 then do
647 yield text
648 go dec' p'
649 else return $ do
650 yield l
651 p'
652{-# INLINEABLE decodeUtf8With #-}
a02a69ad 653
654-- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise
655-- with any chunk that contains a sequence of bytes that is unreadable. Otherwise
656-- only few bytes will only be moved from one chunk to the next before decoding.
657pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r
658pipeDecodeUtf8 = go TE.streamDecodeUtf8
659 where go dec = do chunk <- await
660 case dec chunk of
661 TE.Some text l dec' -> do yield text
662 go dec'
663{-# INLINEABLE pipeDecodeUtf8 #-}
664
665-- | A simple pipe from 'ByteString' to 'Text' using a replacement function.
666pipeDecodeUtf8With
667 :: Monad m
668 => TE.OnDecodeError
669 -> Pipe ByteString Text m r
670pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
671 where go dec = do chunk <- await
672 case dec chunk of
673 TE.Some text l dec' -> do yield text
674 go dec'
675{-# INLINEABLE pipeDecodeUtf8With #-}
31f41a5d 676#endif
677
678-- | Splits a 'Producer' after the given number of characters
91727d11 679splitAt
680 :: (Monad m, Integral n)
681 => n
682 -> Producer Text m r
683 -> Producer' Text m (Producer Text m r)
684splitAt = go
685 where
686 go 0 p = return p
687 go n p = do
688 x <- lift (next p)
689 case x of
690 Left r -> return (return r)
31f41a5d 691 Right (txt, p') -> do
692 let len = fromIntegral (T.length txt)
91727d11 693 if (len <= n)
694 then do
31f41a5d 695 yield txt
91727d11 696 go (n - len) p'
697 else do
31f41a5d 698 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 699 yield prefix
700 return (yield suffix >> p')
701{-# INLINABLE splitAt #-}
702
31f41a5d 703-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
91727d11 704chunksOf
705 :: (Monad m, Integral n)
706 => n -> Producer Text m r -> FreeT (Producer Text m) m r
707chunksOf n p0 = PP.FreeT (go p0)
708 where
709 go p = do
710 x <- next p
711 return $ case x of
712 Left r -> PP.Pure r
31f41a5d 713 Right (txt, p') -> PP.Free $ do
714 p'' <- splitAt n (yield txt >> p')
91727d11 715 return $ PP.FreeT (go p'')
716{-# INLINABLE chunksOf #-}
717
31f41a5d 718{-| Split a text stream in two, where the first text stream is the longest
719 consecutive group of text that satisfy the predicate
91727d11 720-}
721span
722 :: (Monad m)
723 => (Char -> Bool)
724 -> Producer Text m r
725 -> Producer' Text m (Producer Text m r)
726span predicate = go
727 where
728 go p = do
729 x <- lift (next p)
730 case x of
731 Left r -> return (return r)
31f41a5d 732 Right (txt, p') -> do
733 let (prefix, suffix) = T.span predicate txt
91727d11 734 if (T.null suffix)
735 then do
31f41a5d 736 yield txt
91727d11 737 go p'
738 else do
739 yield prefix
740 return (yield suffix >> p')
741{-# INLINABLE span #-}
742
62e8521c 743{-| Split a text stream in two, where the first text stream is the longest
744 consecutive group of characters that don't satisfy the predicate
91727d11 745-}
746break
747 :: (Monad m)
748 => (Char -> Bool)
749 -> Producer Text m r
750 -> Producer Text m (Producer Text m r)
751break predicate = span (not . predicate)
752{-# INLINABLE break #-}
753
62e8521c 754{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 755 predicate
756-}
757splitWith
758 :: (Monad m)
759 => (Char -> Bool)
760 -> Producer Text m r
761 -> PP.FreeT (Producer Text m) m r
762splitWith predicate p0 = PP.FreeT (go0 p0)
763 where
764 go0 p = do
765 x <- next p
766 case x of
767 Left r -> return (PP.Pure r)
31f41a5d 768 Right (txt, p') ->
769 if (T.null txt)
91727d11 770 then go0 p'
771 else return $ PP.Free $ do
31f41a5d 772 p'' <- span (not . predicate) (yield txt >> p')
91727d11 773 return $ PP.FreeT (go1 p'')
774 go1 p = do
775 x <- nextChar p
776 return $ case x of
777 Left r -> PP.Pure r
778 Right (_, p') -> PP.Free $ do
779 p'' <- span (not . predicate) p'
780 return $ PP.FreeT (go1 p'')
781{-# INLINABLE splitWith #-}
782
31f41a5d 783-- | Split a text stream using the given 'Char' as the delimiter
91727d11 784split :: (Monad m)
785 => Char
786 -> Producer Text m r
787 -> FreeT (Producer Text m) m r
31f41a5d 788split c = splitWith (c ==)
91727d11 789{-# INLINABLE split #-}
790
62e8521c 791{-| Group a text stream into 'FreeT'-delimited text streams using the supplied
91727d11 792 equality predicate
793-}
794groupBy
795 :: (Monad m)
796 => (Char -> Char -> Bool)
797 -> Producer Text m r
798 -> FreeT (Producer Text m) m r
799groupBy equal p0 = PP.FreeT (go p0)
800 where
801 go p = do
802 x <- next p
803 case x of
804 Left r -> return (PP.Pure r)
31f41a5d 805 Right (txt, p') -> case (T.uncons txt) of
91727d11 806 Nothing -> go p'
31f41a5d 807 Just (c, _) -> do
91727d11 808 return $ PP.Free $ do
31f41a5d 809 p'' <- span (equal c) (yield txt >> p')
91727d11 810 return $ PP.FreeT (go p'')
811{-# INLINABLE groupBy #-}
812
62e8521c 813-- | Group a text stream into 'FreeT'-delimited text streams of identical characters
91727d11 814group
815 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
816group = groupBy (==)
817{-# INLINABLE group #-}
818
62e8521c 819{-| Split a text stream into 'FreeT'-delimited lines
91727d11 820-}
821lines
822 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
823lines p0 = PP.FreeT (go0 p0)
824 where
825 go0 p = do
826 x <- next p
827 case x of
828 Left r -> return (PP.Pure r)
31f41a5d 829 Right (txt, p') ->
830 if (T.null txt)
91727d11 831 then go0 p'
31f41a5d 832 else return $ PP.Free $ go1 (yield txt >> p')
91727d11 833 go1 p = do
834 p' <- break ('\n' ==) p
b4d21c02 835 return $ PP.FreeT $ do
836 x <- nextChar p'
837 case x of
838 Left r -> return $ PP.Pure r
839 Right (_, p'') -> go0 p''
91727d11 840{-# INLINABLE lines #-}
91727d11 841
31f41a5d 842
843
844-- | Split a text stream into 'FreeT'-delimited words
91727d11 845words
846 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
cf10d6f1 847words = go
91727d11 848 where
cf10d6f1 849 go p = PP.FreeT $ do
850 x <- next (p >-> dropWhile isSpace)
851 return $ case x of
852 Left r -> PP.Pure r
853 Right (bs, p') -> PP.Free $ do
854 p'' <- break isSpace (yield bs >> p')
855 return (go p'')
91727d11 856{-# INLINABLE words #-}
857
cf10d6f1 858
62e8521c 859-- | Intersperse a 'Char' in between the characters of the text stream
91727d11 860intersperse
861 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
31f41a5d 862intersperse c = go0
91727d11 863 where
864 go0 p = do
865 x <- lift (next p)
866 case x of
867 Left r -> return r
31f41a5d 868 Right (txt, p') -> do
869 yield (T.intersperse c txt)
91727d11 870 go1 p'
871 go1 p = do
872 x <- lift (next p)
873 case x of
874 Left r -> return r
31f41a5d 875 Right (txt, p') -> do
876 yield (T.singleton c)
877 yield (T.intersperse c txt)
91727d11 878 go1 p'
879{-# INLINABLE intersperse #-}
880
31f41a5d 881{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
882 interspersing a text stream in between them
91727d11 883-}
884intercalate
885 :: (Monad m)
886 => Producer Text m ()
887 -> FreeT (Producer Text m) m r
888 -> Producer Text m r
889intercalate p0 = go0
890 where
891 go0 f = do
892 x <- lift (PP.runFreeT f)
893 case x of
894 PP.Pure r -> return r
895 PP.Free p -> do
896 f' <- p
897 go1 f'
898 go1 f = do
899 x <- lift (PP.runFreeT f)
900 case x of
901 PP.Pure r -> return r
902 PP.Free p -> do
903 p0
904 f' <- p
905 go1 f'
906{-# INLINABLE intercalate #-}
907
62e8521c 908{-| Join 'FreeT'-delimited lines into a text stream
91727d11 909-}
910unlines
911 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
912unlines = go
913 where
914 go f = do
915 x <- lift (PP.runFreeT f)
916 case x of
917 PP.Pure r -> return r
918 PP.Free p -> do
919 f' <- p
920 yield $ T.singleton '\n'
921 go f'
922{-# INLINABLE unlines #-}
923
31f41a5d 924{-| Join 'FreeT'-delimited words into a text stream
91727d11 925-}
926unwords
927 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
928unwords = intercalate (yield $ T.pack " ")
929{-# INLINABLE unwords #-}
930
931{- $parse
31f41a5d 932 The following parsing utilities are single-character analogs of the ones found
933 @pipes-parse@.
91727d11 934-}
935
91727d11 936{- $reexports
31f41a5d 937 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
91727d11 938
939 @Data.Text@ re-exports the 'Text' type.
940
91727d11 941 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
942-}