]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
4df2b5d95c36e97df4245099c72bc2832c4a2d2b
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
2 #if __GLASGOW_HASKELL__ >= 702
3 {-# LANGUAGE Trustworthy #-}
4 #endif
5 {-| This module provides @pipes@ utilities for \"text streams\", which are
6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
10
11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
12 example, the following program copies a document from one file to another:
13
14 > import Pipes
15 > import qualified Data.Text.Pipes as Text
16 > import System.IO
17 >
18 > main =
19 > withFile "inFile.txt" ReadMode $ \hIn ->
20 > withFile "outFile.txt" WriteMode $ \hOut ->
21 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
24
25 > import Pipes
26 > import qualified Data.Text.Pipes as Text
27 > import Pipes.Safe
28 >
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
30
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' proxies, as with the following \"echo\" program:
33
34 > main = runEffect $ Text.stdin >-> Text.stdout
35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
39
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 might write:
44
45 > import Pipes
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
48 >
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
50 > where
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
52
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
61 -}
62
63 module Pipes.Text (
64 -- * Producers
65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
69 , stdinLn
70
71 -- * Consumers
72 , stdout
73 , stdoutLn
74 , toHandle
75 , writeFile
76
77 -- * Pipes
78 , map
79 , concatMap
80 , take
81 , drop
82 , takeWhile
83 , dropWhile
84 , filter
85 , scan
86 , encodeUtf8
87 , pack
88 , unpack
89 , toCaseFold
90 , toLower
91 , toUpper
92 , stripStart
93
94 -- * Folds
95 , toLazy
96 , toLazyM
97 , foldChars
98 , head
99 , last
100 , null
101 , length
102 , any
103 , all
104 , maximum
105 , minimum
106 , find
107 , index
108 , count
109
110 -- * Primitive Character Parsers
111 -- $parse
112 , nextChar
113 , drawChar
114 , unDrawChar
115 , peekChar
116 , isEndOfChars,
117
118 -- * Parsing Lenses
119 splitAt
120 , span
121 , break
122 , groupBy
123 , group
124 -- , word
125 -- , line
126 , decodeUtf8
127 , decode
128
129 -- * FreeT Splitters
130 , chunksOf
131 , splitsWith
132 , split
133 -- , groupsBy
134 -- , groups
135 , lines
136 , words
137
138
139 -- * Transformations
140 , intersperse
141 -- , packChars
142
143 -- * Joiners
144 , intercalate
145 , unlines
146 , unwords
147 -- * Re-exports
148 -- $reexports
149 , module Data.ByteString
150 , module Data.Text
151 , module Data.Profunctor
152 , module Data.Word
153 , module Pipes.Parse
154 ) where
155
156 import Control.Exception (throwIO, try)
157 import Control.Monad (liftM, unless, join)
158 import Control.Monad.Trans.State.Strict (StateT(..))
159 import Data.Monoid ((<>))
160 import qualified Data.Text as T
161 import qualified Data.Text.IO as T
162 import qualified Data.Text.Encoding as TE
163 import qualified Data.Text.Encoding.Error as TE
164 import Data.Text (Text)
165 import qualified Data.Text.Lazy as TL
166 import qualified Data.Text.Lazy.IO as TL
167 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
168 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
169 import Data.ByteString (ByteString)
170 import qualified Data.ByteString as B
171 import Data.Char (ord, isSpace)
172 import Data.Functor.Constant (Constant(Constant, getConstant))
173 import Data.Functor.Identity (Identity)
174 import Data.Profunctor (Profunctor)
175 import qualified Data.Profunctor
176 import qualified Data.List as List
177 import Foreign.C.Error (Errno(Errno), ePIPE)
178 import qualified GHC.IO.Exception as G
179 import Pipes
180 import qualified Pipes.ByteString as PB
181 import qualified Pipes.Text.Internal as PE
182 import Pipes.Text.Internal (Codec(..))
183 import Pipes.Text.Parse (nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
184 import Pipes.Core (respond, Server')
185 import qualified Pipes.Parse as PP
186 import Pipes.Parse (Parser, concats, intercalates, FreeT)
187 import qualified Pipes.Safe.Prelude as Safe
188 import qualified Pipes.Safe as Safe
189 import Pipes.Safe (MonadSafe(..), Base(..))
190 import qualified Pipes.Prelude as P
191 import qualified System.IO as IO
192 import Data.Char (isSpace)
193 import Data.Word (Word8)
194
195 import Prelude hiding (
196 all,
197 any,
198 break,
199 concat,
200 concatMap,
201 drop,
202 dropWhile,
203 elem,
204 filter,
205 head,
206 last,
207 lines,
208 length,
209 map,
210 maximum,
211 minimum,
212 notElem,
213 null,
214 readFile,
215 span,
216 splitAt,
217 take,
218 takeWhile,
219 unlines,
220 unwords,
221 words,
222 writeFile )
223
224 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
225 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
226 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
227 {-# INLINE fromLazy #-}
228
229 -- | Stream text from 'stdin'
230 stdin :: MonadIO m => Producer Text m ()
231 stdin = fromHandle IO.stdin
232 {-# INLINE stdin #-}
233
234 {-| Convert a 'IO.Handle' into a text stream using a text size
235 determined by the good sense of the text library; note that this
236 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
237 but uses the system encoding and has other `Data.Text.IO` features
238 -}
239
240 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
241 fromHandle h = go where
242 go = do txt <- liftIO (T.hGetChunk h)
243 unless (T.null txt) $ do yield txt
244 go
245 {-# INLINABLE fromHandle#-}
246
247
248 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
249
250 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
251 MAIN = PUTSTRLN "HELLO WORLD"
252 -}
253
254 readFile :: MonadSafe m => FilePath -> Producer Text m ()
255 readFile file = Safe.withFile file IO.ReadMode fromHandle
256 {-# INLINE readFile #-}
257
258 {-| Stream lines of text from stdin (for testing in ghci etc.)
259
260 >>> let safely = runSafeT . runEffect
261 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
262 hello
263 5
264 world
265 5
266
267 -}
268 stdinLn :: MonadIO m => Producer' Text m ()
269 stdinLn = go where
270 go = do
271 eof <- liftIO (IO.hIsEOF IO.stdin)
272 unless eof $ do
273 txt <- liftIO (T.hGetLine IO.stdin)
274 yield txt
275 go
276 {-# INLINABLE stdinLn #-}
277
278 {-| Stream text to 'stdout'
279
280 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
281
282 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
283 @(source >-> stdout)@ in suitable cases.
284 -}
285 stdout :: MonadIO m => Consumer' Text m ()
286 stdout = go
287 where
288 go = do
289 txt <- await
290 x <- liftIO $ try (T.putStr txt)
291 case x of
292 Left (G.IOError { G.ioe_type = G.ResourceVanished
293 , G.ioe_errno = Just ioe })
294 | Errno ioe == ePIPE
295 -> return ()
296 Left e -> liftIO (throwIO e)
297 Right () -> go
298 {-# INLINABLE stdout #-}
299
300 stdoutLn :: (MonadIO m) => Consumer' Text m ()
301 stdoutLn = go
302 where
303 go = do
304 str <- await
305 x <- liftIO $ try (T.putStrLn str)
306 case x of
307 Left (G.IOError { G.ioe_type = G.ResourceVanished
308 , G.ioe_errno = Just ioe })
309 | Errno ioe == ePIPE
310 -> return ()
311 Left e -> liftIO (throwIO e)
312 Right () -> go
313 {-# INLINABLE stdoutLn #-}
314
315 {-| Convert a text stream into a 'Handle'
316
317 Note: again, for best performance, where possible use
318 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
319 -}
320 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
321 toHandle h = for cat (liftIO . T.hPutStr h)
322 {-# INLINABLE toHandle #-}
323
324 {-# RULES "p >-> toHandle h" forall p h .
325 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
326 #-}
327
328
329 -- | Stream text into a file. Uses @pipes-safe@.
330 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
331 writeFile file = Safe.withFile file IO.WriteMode toHandle
332 {-# INLINE writeFile #-}
333
334
335 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
336
337 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
338
339 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
340 a ^. lens = getConstant (lens Constant a)
341
342
343 -- | Apply a transformation to each 'Char' in the stream
344 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
345 map f = P.map (T.map f)
346 {-# INLINABLE map #-}
347
348 {-# RULES "p >-> map f" forall p f .
349 p >-> map f = for p (\txt -> yield (T.map f txt))
350 #-}
351
352 -- | Map a function over the characters of a text stream and concatenate the results
353 concatMap
354 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
355 concatMap f = P.map (T.concatMap f)
356 {-# INLINABLE concatMap #-}
357
358 {-# RULES "p >-> concatMap f" forall p f .
359 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
360 #-}
361
362 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
363 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
364 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
365 encodeUtf8 :: Monad m => Pipe Text ByteString m r
366 encodeUtf8 = P.map TE.encodeUtf8
367 {-# INLINEABLE encodeUtf8 #-}
368
369 {-# RULES "p >-> encodeUtf8" forall p .
370 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
371 #-}
372
373 -- | Transform a Pipe of 'String's into one of 'Text' chunks
374 pack :: Monad m => Pipe String Text m r
375 pack = P.map T.pack
376 {-# INLINEABLE pack #-}
377
378 {-# RULES "p >-> pack" forall p .
379 p >-> pack = for p (\txt -> yield (T.pack txt))
380 #-}
381
382 -- | Transform a Pipes of 'Text' chunks into one of 'String's
383 unpack :: Monad m => Pipe Text String m r
384 unpack = for cat (\t -> yield (T.unpack t))
385 {-# INLINEABLE unpack #-}
386
387 {-# RULES "p >-> unpack" forall p .
388 p >-> unpack = for p (\txt -> yield (T.unpack txt))
389 #-}
390
391 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
392 -- here acting on a 'Text' pipe, rather as they would on a lazy text
393 toCaseFold :: Monad m => Pipe Text Text m ()
394 toCaseFold = P.map T.toCaseFold
395 {-# INLINEABLE toCaseFold #-}
396
397 {-# RULES "p >-> toCaseFold" forall p .
398 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
399 #-}
400
401
402 -- | lowercase incoming 'Text'
403 toLower :: Monad m => Pipe Text Text m ()
404 toLower = P.map T.toLower
405 {-# INLINEABLE toLower #-}
406
407 {-# RULES "p >-> toLower" forall p .
408 p >-> toLower = for p (\txt -> yield (T.toLower txt))
409 #-}
410
411 -- | uppercase incoming 'Text'
412 toUpper :: Monad m => Pipe Text Text m ()
413 toUpper = P.map T.toUpper
414 {-# INLINEABLE toUpper #-}
415
416 {-# RULES "p >-> toUpper" forall p .
417 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
418 #-}
419
420 -- | Remove leading white space from an incoming succession of 'Text's
421 stripStart :: Monad m => Pipe Text Text m r
422 stripStart = do
423 chunk <- await
424 let text = T.stripStart chunk
425 if T.null text
426 then stripStart
427 else cat
428 {-# INLINEABLE stripStart #-}
429
430 -- | @(take n)@ only allows @n@ individual characters to pass;
431 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
432 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
433 take n0 = go n0 where
434 go n
435 | n <= 0 = return ()
436 | otherwise = do
437 txt <- await
438 let len = fromIntegral (T.length txt)
439 if (len > n)
440 then yield (T.take (fromIntegral n) txt)
441 else do
442 yield txt
443 go (n - len)
444 {-# INLINABLE take #-}
445
446 -- | @(drop n)@ drops the first @n@ characters
447 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
448 drop n0 = go n0 where
449 go n
450 | n <= 0 = cat
451 | otherwise = do
452 txt <- await
453 let len = fromIntegral (T.length txt)
454 if (len >= n)
455 then do
456 yield (T.drop (fromIntegral n) txt)
457 cat
458 else go (n - len)
459 {-# INLINABLE drop #-}
460
461 -- | Take characters until they fail the predicate
462 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
463 takeWhile predicate = go
464 where
465 go = do
466 txt <- await
467 let (prefix, suffix) = T.span predicate txt
468 if (T.null suffix)
469 then do
470 yield txt
471 go
472 else yield prefix
473 {-# INLINABLE takeWhile #-}
474
475 -- | Drop characters until they fail the predicate
476 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
477 dropWhile predicate = go where
478 go = do
479 txt <- await
480 case T.findIndex (not . predicate) txt of
481 Nothing -> go
482 Just i -> do
483 yield (T.drop i txt)
484 cat
485 {-# INLINABLE dropWhile #-}
486
487 -- | Only allows 'Char's to pass if they satisfy the predicate
488 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
489 filter predicate = P.map (T.filter predicate)
490 {-# INLINABLE filter #-}
491
492 {-# RULES "p >-> filter q" forall p q .
493 p >-> filter q = for p (\txt -> yield (T.filter q txt))
494 #-}
495
496 -- | Strict left scan over the characters
497 scan
498 :: (Monad m)
499 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
500 scan step begin = go begin
501 where
502 go c = do
503 txt <- await
504 let txt' = T.scanl step c txt
505 c' = T.last txt'
506 yield txt'
507 go c'
508 {-# INLINABLE scan #-}
509
510 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
511 'TL.Text'
512 -}
513 toLazy :: Producer Text Identity () -> TL.Text
514 toLazy = TL.fromChunks . P.toList
515 {-# INLINABLE toLazy #-}
516
517 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
518 'TL.Text'
519
520 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
521 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
522 immediately as they are generated instead of loading them all into memory.
523 -}
524 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
525 toLazyM = liftM TL.fromChunks . P.toListM
526 {-# INLINABLE toLazyM #-}
527
528 -- | Reduce the text stream using a strict left fold over characters
529 foldChars
530 :: Monad m
531 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
532 foldChars step begin done = P.fold (T.foldl' step) begin done
533 {-# INLINABLE foldChars #-}
534
535 -- | Retrieve the first 'Char'
536 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
537 head = go
538 where
539 go p = do
540 x <- nextChar p
541 case x of
542 Left _ -> return Nothing
543 Right (c, _) -> return (Just c)
544 {-# INLINABLE head #-}
545
546 -- | Retrieve the last 'Char'
547 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
548 last = go Nothing
549 where
550 go r p = do
551 x <- next p
552 case x of
553 Left () -> return r
554 Right (txt, p') ->
555 if (T.null txt)
556 then go r p'
557 else go (Just $ T.last txt) p'
558 {-# INLINABLE last #-}
559
560 -- | Determine if the stream is empty
561 null :: (Monad m) => Producer Text m () -> m Bool
562 null = P.all T.null
563 {-# INLINABLE null #-}
564
565 -- | Count the number of characters in the stream
566 length :: (Monad m, Num n) => Producer Text m () -> m n
567 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
568 {-# INLINABLE length #-}
569
570 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
571 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
572 any predicate = P.any (T.any predicate)
573 {-# INLINABLE any #-}
574
575 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
576 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
577 all predicate = P.all (T.all predicate)
578 {-# INLINABLE all #-}
579
580 -- | Return the maximum 'Char' within a text stream
581 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
582 maximum = P.fold step Nothing id
583 where
584 step mc txt =
585 if (T.null txt)
586 then mc
587 else Just $ case mc of
588 Nothing -> T.maximum txt
589 Just c -> max c (T.maximum txt)
590 {-# INLINABLE maximum #-}
591
592 -- | Return the minimum 'Char' within a text stream (surely very useful!)
593 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
594 minimum = P.fold step Nothing id
595 where
596 step mc txt =
597 if (T.null txt)
598 then mc
599 else case mc of
600 Nothing -> Just (T.minimum txt)
601 Just c -> Just (min c (T.minimum txt))
602 {-# INLINABLE minimum #-}
603
604
605 -- | Find the first element in the stream that matches the predicate
606 find
607 :: (Monad m)
608 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
609 find predicate p = head (p >-> filter predicate)
610 {-# INLINABLE find #-}
611
612 -- | Index into a text stream
613 index
614 :: (Monad m, Integral a)
615 => a-> Producer Text m () -> m (Maybe Char)
616 index n p = head (p >-> drop n)
617 {-# INLINABLE index #-}
618
619
620 -- | Store a tally of how many segments match the given 'Text'
621 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
622 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
623 {-# INLINABLE count #-}
624
625 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
626 -- returning a Pipe of ByteStrings that begins at the point of failure.
627
628 decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
629 decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
630 go !carry dec0 p = do
631 x <- lift (next p)
632 case x of Left r -> if B.null carry
633 then return (return r) -- all bytestrinput was consumed
634 else return (do yield carry -- a potentially valid fragment remains
635 return r)
636
637 Right (chunk, p') -> case dec0 chunk of
638 PE.Some text carry2 dec -> do yield text
639 go carry2 dec p'
640 PE.Other text bs -> do yield text
641 return (do yield bs -- an invalid blob remains
642 p')
643 {-# INLINABLE decodeUtf8 #-}
644
645
646 -- | Splits a 'Producer' after the given number of characters
647 splitAt
648 :: (Monad m, Integral n)
649 => n
650 -> Producer Text m r
651 -> Producer' Text m (Producer Text m r)
652 splitAt = go
653 where
654 go 0 p = return p
655 go n p = do
656 x <- lift (next p)
657 case x of
658 Left r -> return (return r)
659 Right (txt, p') -> do
660 let len = fromIntegral (T.length txt)
661 if (len <= n)
662 then do
663 yield txt
664 go (n - len) p'
665 else do
666 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
667 yield prefix
668 return (yield suffix >> p')
669 {-# INLINABLE splitAt #-}
670
671 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
672 chunksOf
673 :: (Monad m, Integral n)
674 => n -> Producer Text m r -> FreeT (Producer Text m) m r
675 chunksOf n p0 = PP.FreeT (go p0)
676 where
677 go p = do
678 x <- next p
679 return $ case x of
680 Left r -> PP.Pure r
681 Right (txt, p') -> PP.Free $ do
682 p'' <- splitAt n (yield txt >> p')
683 return $ PP.FreeT (go p'')
684 {-# INLINABLE chunksOf #-}
685
686 {-| Split a text stream in two, where the first text stream is the longest
687 consecutive group of text that satisfy the predicate
688 -}
689 span
690 :: (Monad m)
691 => (Char -> Bool)
692 -> Producer Text m r
693 -> Producer' Text m (Producer Text m r)
694 span predicate = go
695 where
696 go p = do
697 x <- lift (next p)
698 case x of
699 Left r -> return (return r)
700 Right (txt, p') -> do
701 let (prefix, suffix) = T.span predicate txt
702 if (T.null suffix)
703 then do
704 yield txt
705 go p'
706 else do
707 yield prefix
708 return (yield suffix >> p')
709 {-# INLINABLE span #-}
710
711 {-| Split a text stream in two, where the first text stream is the longest
712 consecutive group of characters that don't satisfy the predicate
713 -}
714 break
715 :: (Monad m)
716 => (Char -> Bool)
717 -> Producer Text m r
718 -> Producer Text m (Producer Text m r)
719 break predicate = span (not . predicate)
720 {-# INLINABLE break #-}
721
722 {-| Split a text stream into sub-streams delimited by characters that satisfy the
723 predicate
724 -}
725 splitsWith
726 :: (Monad m)
727 => (Char -> Bool)
728 -> Producer Text m r
729 -> PP.FreeT (Producer Text m) m r
730 splitsWith predicate p0 = PP.FreeT (go0 p0)
731 where
732 go0 p = do
733 x <- next p
734 case x of
735 Left r -> return (PP.Pure r)
736 Right (txt, p') ->
737 if (T.null txt)
738 then go0 p'
739 else return $ PP.Free $ do
740 p'' <- span (not . predicate) (yield txt >> p')
741 return $ PP.FreeT (go1 p'')
742 go1 p = do
743 x <- nextChar p
744 return $ case x of
745 Left r -> PP.Pure r
746 Right (_, p') -> PP.Free $ do
747 p'' <- span (not . predicate) p'
748 return $ PP.FreeT (go1 p'')
749 {-# INLINABLE splitsWith #-}
750
751 -- | Split a text stream using the given 'Char' as the delimiter
752 split :: (Monad m)
753 => Char
754 -> Producer Text m r
755 -> FreeT (Producer Text m) m r
756 split c = splitsWith (c ==)
757 {-# INLINABLE split #-}
758
759 {-| Group a text stream into 'FreeT'-delimited text streams using the supplied
760 equality predicate
761 -}
762 groupBy
763 :: (Monad m)
764 => (Char -> Char -> Bool)
765 -> Producer Text m r
766 -> FreeT (Producer Text m) m r
767 groupBy equal p0 = PP.FreeT (go p0)
768 where
769 go p = do
770 x <- next p
771 case x of
772 Left r -> return (PP.Pure r)
773 Right (txt, p') -> case (T.uncons txt) of
774 Nothing -> go p'
775 Just (c, _) -> do
776 return $ PP.Free $ do
777 p'' <- span (equal c) (yield txt >> p')
778 return $ PP.FreeT (go p'')
779 {-# INLINABLE groupBy #-}
780
781 -- | Group a text stream into 'FreeT'-delimited text streams of identical characters
782 group
783 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
784 group = groupBy (==)
785 {-# INLINABLE group #-}
786
787 {-| Split a text stream into 'FreeT'-delimited lines
788 -}
789 lines
790 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
791 lines p0 = PP.FreeT (go0 p0)
792 where
793 go0 p = do
794 x <- next p
795 case x of
796 Left r -> return (PP.Pure r)
797 Right (txt, p') ->
798 if (T.null txt)
799 then go0 p'
800 else return $ PP.Free $ go1 (yield txt >> p')
801 go1 p = do
802 p' <- break ('\n' ==) p
803 return $ PP.FreeT $ do
804 x <- nextChar p'
805 case x of
806 Left r -> return $ PP.Pure r
807 Right (_, p'') -> go0 p''
808 {-# INLINABLE lines #-}
809
810
811
812 -- | Split a text stream into 'FreeT'-delimited words
813 words
814 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
815 words = go
816 where
817 go p = PP.FreeT $ do
818 x <- next (p >-> dropWhile isSpace)
819 return $ case x of
820 Left r -> PP.Pure r
821 Right (bs, p') -> PP.Free $ do
822 p'' <- break isSpace (yield bs >> p')
823 return (go p'')
824 {-# INLINABLE words #-}
825
826
827 -- | Intersperse a 'Char' in between the characters of the text stream
828 intersperse
829 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
830 intersperse c = go0
831 where
832 go0 p = do
833 x <- lift (next p)
834 case x of
835 Left r -> return r
836 Right (txt, p') -> do
837 yield (T.intersperse c txt)
838 go1 p'
839 go1 p = do
840 x <- lift (next p)
841 case x of
842 Left r -> return r
843 Right (txt, p') -> do
844 yield (T.singleton c)
845 yield (T.intersperse c txt)
846 go1 p'
847 {-# INLINABLE intersperse #-}
848
849 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
850 interspersing a text stream in between them
851 -}
852 intercalate
853 :: (Monad m)
854 => Producer Text m ()
855 -> FreeT (Producer Text m) m r
856 -> Producer Text m r
857 intercalate p0 = go0
858 where
859 go0 f = do
860 x <- lift (PP.runFreeT f)
861 case x of
862 PP.Pure r -> return r
863 PP.Free p -> do
864 f' <- p
865 go1 f'
866 go1 f = do
867 x <- lift (PP.runFreeT f)
868 case x of
869 PP.Pure r -> return r
870 PP.Free p -> do
871 p0
872 f' <- p
873 go1 f'
874 {-# INLINABLE intercalate #-}
875
876 {-| Join 'FreeT'-delimited lines into a text stream
877 -}
878 unlines
879 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
880 unlines = go
881 where
882 go f = do
883 x <- lift (PP.runFreeT f)
884 case x of
885 PP.Pure r -> return r
886 PP.Free p -> do
887 f' <- p
888 yield $ T.singleton '\n'
889 go f'
890 {-# INLINABLE unlines #-}
891
892 {-| Join 'FreeT'-delimited words into a text stream
893 -}
894 unwords
895 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
896 unwords = intercalate (yield $ T.pack " ")
897 {-# INLINABLE unwords #-}
898
899 {- $parse
900 The following parsing utilities are single-character analogs of the ones found
901 @pipes-parse@.
902 -}
903
904 {- $reexports
905 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
906
907 @Data.Text@ re-exports the 'Text' type.
908
909 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
910 -}
911
912
913
914 decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
915 -- decode codec = go B.empty where
916 -- go extra p0 =
917 -- do x <- lift (next p0)
918 -- case x of Right (chunk, p) ->
919 -- do let (text, stuff) = codecDecode codec (B.append extra chunk)
920 -- yield text
921 -- case stuff of Right extra' -> go extra' p
922 -- Left (exc,bs) -> do yield text
923 -- return (do yield bs
924 -- p)
925 -- Left r -> return (do yield extra
926 -- return r)
927
928 decode d p0 = case d of
929 PE.Other txt bad -> do yield txt
930 return (do yield bad
931 p0)
932 PE.Some txt extra dec -> do yield txt
933 x <- lift (next p0)
934 case x of Left r -> return (do yield extra
935 return r)
936 Right (chunk,p1) -> decode (dec chunk) p1
937
938 -- go !carry dec0 p = do
939 -- x <- lift (next p)
940 -- case x of Left r -> if B.null carry
941 -- then return (return r) -- all bytestrinput was consumed
942 -- else return (do yield carry -- a potentially valid fragment remains
943 -- return r)
944 --
945 -- Right (chunk, p') -> case dec0 chunk of
946 -- PE.Some text carry2 dec -> do yield text
947 -- go carry2 dec p'
948 -- PE.Other text bs -> do yield text
949 -- return (do yield bs -- an invalid blob remains
950 -- p')
951 -- {-# INLINABLE decodeUtf8 #-}