]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
199e7c2be0feac7bcf706949ee5a7a0c60bb7ebc
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
2 #if __GLASGOW_HASKELL__ >= 702
3 {-# LANGUAGE Trustworthy #-}
4 #endif
5 {-| This module provides @pipes@ utilities for \"text streams\", which are
6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
10
11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
12 example, the following program copies a document from one file to another:
13
14 > import Pipes
15 > import qualified Data.Text.Pipes as Text
16 > import System.IO
17 >
18 > main =
19 > withFile "inFile.txt" ReadMode $ \hIn ->
20 > withFile "outFile.txt" WriteMode $ \hOut ->
21 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
24
25 > import Pipes
26 > import qualified Data.Text.Pipes as Text
27 > import Pipes.Safe
28 >
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
30
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' proxies, as with the following \"echo\" program:
33
34 > main = runEffect $ Text.stdin >-> Text.stdout
35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
39
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 might write:
44
45 > import Pipes
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
48 >
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
50 > where
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
52
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
61 -}
62
63 module Pipes.Text (
64 -- * Producers
65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
69 , stdinLn
70
71 -- * Consumers
72 , stdout
73 , stdoutLn
74 , toHandle
75 , writeFile
76
77 -- * Pipes
78 , map
79 , concatMap
80 , take
81 , drop
82 , takeWhile
83 , dropWhile
84 , filter
85 , scan
86 , encodeUtf8
87 , pack
88 , unpack
89 , toCaseFold
90 , toLower
91 , toUpper
92 , stripStart
93
94 -- * Folds
95 , toLazy
96 , toLazyM
97 , foldChars
98 , head
99 , last
100 , null
101 , length
102 , any
103 , all
104 , maximum
105 , minimum
106 , find
107 , index
108 , count
109
110 -- * Primitive Character Parsers
111 -- $parse
112 , nextChar
113 , drawChar
114 , unDrawChar
115 , peekChar
116 , isEndOfChars
117
118 -- * Parsing Lenses
119 , splitAt
120 , span
121 , break
122 , groupBy
123 , group
124 , word
125 , line
126 , decodeUtf8
127 , decode
128
129 -- * FreeT Splitters
130 , chunksOf
131 , splitsWith
132 , splits
133 -- , groupsBy
134 -- , groups
135 , lines
136 , words
137
138 -- * Transformations
139 , intersperse
140 , packChars
141
142 -- * Joiners
143 , intercalate
144 , unlines
145 , unwords
146
147 -- * Re-exports
148 -- $reexports
149 , module Data.ByteString
150 , module Data.Text
151 , module Data.Profunctor
152 , module Data.Word
153 , module Pipes.Parse
154 , module Pipes.Group
155 ) where
156
157 import Control.Exception (throwIO, try)
158 import Control.Applicative ((<*))
159 import Control.Monad (liftM, unless, join)
160 import Control.Monad.Trans.State.Strict (StateT(..), modify)
161 import Data.Monoid ((<>))
162 import qualified Data.Text as T
163 import qualified Data.Text.IO as T
164 import qualified Data.Text.Encoding as TE
165 import qualified Data.Text.Encoding.Error as TE
166 import Data.Text (Text)
167 import qualified Data.Text.Lazy as TL
168 import qualified Data.Text.Lazy.IO as TL
169 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
170 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
171 import Data.ByteString (ByteString)
172 import qualified Data.ByteString as B
173 import Data.Char (ord, isSpace)
174 import Data.Functor.Constant (Constant(Constant, getConstant))
175 import Data.Functor.Identity (Identity)
176 import Data.Profunctor (Profunctor)
177 import qualified Data.Profunctor
178 import qualified Data.List as List
179 import Foreign.C.Error (Errno(Errno), ePIPE)
180 import qualified GHC.IO.Exception as G
181 import Pipes
182 import qualified Pipes.ByteString as PB
183 import qualified Pipes.Text.Internal as PE
184 import Pipes.Text.Internal (Codec(..))
185 import Pipes.Core (respond, Server')
186 import Pipes.Group (concats, intercalates, transFreeT, FreeT(..), FreeF(..))
187 import qualified Pipes.Group as PG
188 import qualified Pipes.Parse as PP
189 import Pipes.Parse (Parser)
190 import qualified Pipes.Safe.Prelude as Safe
191 import qualified Pipes.Safe as Safe
192 import Pipes.Safe (MonadSafe(..), Base(..))
193 import qualified Pipes.Prelude as P
194 import qualified System.IO as IO
195 import Data.Char (isSpace)
196 import Data.Word (Word8)
197
198 import Prelude hiding (
199 all,
200 any,
201 break,
202 concat,
203 concatMap,
204 drop,
205 dropWhile,
206 elem,
207 filter,
208 head,
209 last,
210 lines,
211 length,
212 map,
213 maximum,
214 minimum,
215 notElem,
216 null,
217 readFile,
218 span,
219 splitAt,
220 take,
221 takeWhile,
222 unlines,
223 unwords,
224 words,
225 writeFile )
226
227 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
228 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
229 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
230 {-# INLINE fromLazy #-}
231
232 -- | Stream text from 'stdin'
233 stdin :: MonadIO m => Producer Text m ()
234 stdin = fromHandle IO.stdin
235 {-# INLINE stdin #-}
236
237 {-| Convert a 'IO.Handle' into a text stream using a text size
238 determined by the good sense of the text library; note that this
239 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
240 but uses the system encoding and has other `Data.Text.IO` features
241 -}
242
243 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
244 fromHandle h = go where
245 go = do txt <- liftIO (T.hGetChunk h)
246 unless (T.null txt) $ do yield txt
247 go
248 {-# INLINABLE fromHandle#-}
249
250
251 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
252
253 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
254 MAIN = PUTSTRLN "HELLO WORLD"
255 -}
256
257 readFile :: MonadSafe m => FilePath -> Producer Text m ()
258 readFile file = Safe.withFile file IO.ReadMode fromHandle
259 {-# INLINE readFile #-}
260
261 {-| Stream lines of text from stdin (for testing in ghci etc.)
262
263 >>> let safely = runSafeT . runEffect
264 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
265 hello
266 5
267 world
268 5
269
270 -}
271 stdinLn :: MonadIO m => Producer' Text m ()
272 stdinLn = go where
273 go = do
274 eof <- liftIO (IO.hIsEOF IO.stdin)
275 unless eof $ do
276 txt <- liftIO (T.hGetLine IO.stdin)
277 yield txt
278 go
279 {-# INLINABLE stdinLn #-}
280
281 {-| Stream text to 'stdout'
282
283 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
284
285 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
286 @(source >-> stdout)@ in suitable cases.
287 -}
288 stdout :: MonadIO m => Consumer' Text m ()
289 stdout = go
290 where
291 go = do
292 txt <- await
293 x <- liftIO $ try (T.putStr txt)
294 case x of
295 Left (G.IOError { G.ioe_type = G.ResourceVanished
296 , G.ioe_errno = Just ioe })
297 | Errno ioe == ePIPE
298 -> return ()
299 Left e -> liftIO (throwIO e)
300 Right () -> go
301 {-# INLINABLE stdout #-}
302
303 stdoutLn :: (MonadIO m) => Consumer' Text m ()
304 stdoutLn = go
305 where
306 go = do
307 str <- await
308 x <- liftIO $ try (T.putStrLn str)
309 case x of
310 Left (G.IOError { G.ioe_type = G.ResourceVanished
311 , G.ioe_errno = Just ioe })
312 | Errno ioe == ePIPE
313 -> return ()
314 Left e -> liftIO (throwIO e)
315 Right () -> go
316 {-# INLINABLE stdoutLn #-}
317
318 {-| Convert a text stream into a 'Handle'
319
320 Note: again, for best performance, where possible use
321 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
322 -}
323 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
324 toHandle h = for cat (liftIO . T.hPutStr h)
325 {-# INLINABLE toHandle #-}
326
327 {-# RULES "p >-> toHandle h" forall p h .
328 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
329 #-}
330
331
332 -- | Stream text into a file. Uses @pipes-safe@.
333 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
334 writeFile file = Safe.withFile file IO.WriteMode toHandle
335 {-# INLINE writeFile #-}
336
337
338 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
339
340 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
341
342 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
343 a ^. lens = getConstant (lens Constant a)
344
345
346 -- | Apply a transformation to each 'Char' in the stream
347 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
348 map f = P.map (T.map f)
349 {-# INLINABLE map #-}
350
351 {-# RULES "p >-> map f" forall p f .
352 p >-> map f = for p (\txt -> yield (T.map f txt))
353 #-}
354
355 -- | Map a function over the characters of a text stream and concatenate the results
356 concatMap
357 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
358 concatMap f = P.map (T.concatMap f)
359 {-# INLINABLE concatMap #-}
360
361 {-# RULES "p >-> concatMap f" forall p f .
362 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
363 #-}
364
365 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
366 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
367 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
368 encodeUtf8 :: Monad m => Pipe Text ByteString m r
369 encodeUtf8 = P.map TE.encodeUtf8
370 {-# INLINEABLE encodeUtf8 #-}
371
372 {-# RULES "p >-> encodeUtf8" forall p .
373 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
374 #-}
375
376 -- | Transform a Pipe of 'String's into one of 'Text' chunks
377 pack :: Monad m => Pipe String Text m r
378 pack = P.map T.pack
379 {-# INLINEABLE pack #-}
380
381 {-# RULES "p >-> pack" forall p .
382 p >-> pack = for p (\txt -> yield (T.pack txt))
383 #-}
384
385 -- | Transform a Pipes of 'Text' chunks into one of 'String's
386 unpack :: Monad m => Pipe Text String m r
387 unpack = for cat (\t -> yield (T.unpack t))
388 {-# INLINEABLE unpack #-}
389
390 {-# RULES "p >-> unpack" forall p .
391 p >-> unpack = for p (\txt -> yield (T.unpack txt))
392 #-}
393
394 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
395 -- here acting as 'Text' pipes, rather as they would on a lazy text
396 toCaseFold :: Monad m => Pipe Text Text m ()
397 toCaseFold = P.map T.toCaseFold
398 {-# INLINEABLE toCaseFold #-}
399
400 {-# RULES "p >-> toCaseFold" forall p .
401 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
402 #-}
403
404
405 -- | lowercase incoming 'Text'
406 toLower :: Monad m => Pipe Text Text m ()
407 toLower = P.map T.toLower
408 {-# INLINEABLE toLower #-}
409
410 {-# RULES "p >-> toLower" forall p .
411 p >-> toLower = for p (\txt -> yield (T.toLower txt))
412 #-}
413
414 -- | uppercase incoming 'Text'
415 toUpper :: Monad m => Pipe Text Text m ()
416 toUpper = P.map T.toUpper
417 {-# INLINEABLE toUpper #-}
418
419 {-# RULES "p >-> toUpper" forall p .
420 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
421 #-}
422
423 -- | Remove leading white space from an incoming succession of 'Text's
424 stripStart :: Monad m => Pipe Text Text m r
425 stripStart = do
426 chunk <- await
427 let text = T.stripStart chunk
428 if T.null text
429 then stripStart
430 else do yield text
431 cat
432 {-# INLINEABLE stripStart #-}
433
434 -- | @(take n)@ only allows @n@ individual characters to pass;
435 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
436 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
437 take n0 = go n0 where
438 go n
439 | n <= 0 = return ()
440 | otherwise = do
441 txt <- await
442 let len = fromIntegral (T.length txt)
443 if (len > n)
444 then yield (T.take (fromIntegral n) txt)
445 else do
446 yield txt
447 go (n - len)
448 {-# INLINABLE take #-}
449
450 -- | @(drop n)@ drops the first @n@ characters
451 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
452 drop n0 = go n0 where
453 go n
454 | n <= 0 = cat
455 | otherwise = do
456 txt <- await
457 let len = fromIntegral (T.length txt)
458 if (len >= n)
459 then do
460 yield (T.drop (fromIntegral n) txt)
461 cat
462 else go (n - len)
463 {-# INLINABLE drop #-}
464
465 -- | Take characters until they fail the predicate
466 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
467 takeWhile predicate = go
468 where
469 go = do
470 txt <- await
471 let (prefix, suffix) = T.span predicate txt
472 if (T.null suffix)
473 then do
474 yield txt
475 go
476 else yield prefix
477 {-# INLINABLE takeWhile #-}
478
479 -- | Drop characters until they fail the predicate
480 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
481 dropWhile predicate = go where
482 go = do
483 txt <- await
484 case T.findIndex (not . predicate) txt of
485 Nothing -> go
486 Just i -> do
487 yield (T.drop i txt)
488 cat
489 {-# INLINABLE dropWhile #-}
490
491 -- | Only allows 'Char's to pass if they satisfy the predicate
492 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
493 filter predicate = P.map (T.filter predicate)
494 {-# INLINABLE filter #-}
495
496 {-# RULES "p >-> filter q" forall p q .
497 p >-> filter q = for p (\txt -> yield (T.filter q txt))
498 #-}
499
500 -- | Strict left scan over the characters
501 scan
502 :: (Monad m)
503 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
504 scan step begin = go begin
505 where
506 go c = do
507 txt <- await
508 let txt' = T.scanl step c txt
509 c' = T.last txt'
510 yield txt'
511 go c'
512 {-# INLINABLE scan #-}
513
514 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
515 'TL.Text'
516 -}
517 toLazy :: Producer Text Identity () -> TL.Text
518 toLazy = TL.fromChunks . P.toList
519 {-# INLINABLE toLazy #-}
520
521 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
522 'TL.Text'
523
524 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
525 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
526 immediately as they are generated instead of loading them all into memory.
527 -}
528 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
529 toLazyM = liftM TL.fromChunks . P.toListM
530 {-# INLINABLE toLazyM #-}
531
532 -- | Reduce the text stream using a strict left fold over characters
533 foldChars
534 :: Monad m
535 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
536 foldChars step begin done = P.fold (T.foldl' step) begin done
537 {-# INLINABLE foldChars #-}
538
539 -- | Retrieve the first 'Char'
540 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
541 head = go
542 where
543 go p = do
544 x <- nextChar p
545 case x of
546 Left _ -> return Nothing
547 Right (c, _) -> return (Just c)
548 {-# INLINABLE head #-}
549
550 -- | Retrieve the last 'Char'
551 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
552 last = go Nothing
553 where
554 go r p = do
555 x <- next p
556 case x of
557 Left () -> return r
558 Right (txt, p') ->
559 if (T.null txt)
560 then go r p'
561 else go (Just $ T.last txt) p'
562 {-# INLINABLE last #-}
563
564 -- | Determine if the stream is empty
565 null :: (Monad m) => Producer Text m () -> m Bool
566 null = P.all T.null
567 {-# INLINABLE null #-}
568
569 -- | Count the number of characters in the stream
570 length :: (Monad m, Num n) => Producer Text m () -> m n
571 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
572 {-# INLINABLE length #-}
573
574 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
575 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
576 any predicate = P.any (T.any predicate)
577 {-# INLINABLE any #-}
578
579 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
580 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
581 all predicate = P.all (T.all predicate)
582 {-# INLINABLE all #-}
583
584 -- | Return the maximum 'Char' within a text stream
585 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
586 maximum = P.fold step Nothing id
587 where
588 step mc txt =
589 if (T.null txt)
590 then mc
591 else Just $ case mc of
592 Nothing -> T.maximum txt
593 Just c -> max c (T.maximum txt)
594 {-# INLINABLE maximum #-}
595
596 -- | Return the minimum 'Char' within a text stream (surely very useful!)
597 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
598 minimum = P.fold step Nothing id
599 where
600 step mc txt =
601 if (T.null txt)
602 then mc
603 else case mc of
604 Nothing -> Just (T.minimum txt)
605 Just c -> Just (min c (T.minimum txt))
606 {-# INLINABLE minimum #-}
607
608
609 -- | Find the first element in the stream that matches the predicate
610 find
611 :: (Monad m)
612 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
613 find predicate p = head (p >-> filter predicate)
614 {-# INLINABLE find #-}
615
616 -- | Index into a text stream
617 index
618 :: (Monad m, Integral a)
619 => a-> Producer Text m () -> m (Maybe Char)
620 index n p = head (p >-> drop n)
621 {-# INLINABLE index #-}
622
623
624 -- | Store a tally of how many segments match the given 'Text'
625 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
626 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
627 {-# INLINABLE count #-}
628
629
630 {-| Consume the first character from a stream of 'Text'
631
632 'next' either fails with a 'Left' if the 'Producer' has no more characters or
633 succeeds with a 'Right' providing the next character and the remainder of the
634 'Producer'.
635 -}
636 nextChar
637 :: (Monad m)
638 => Producer Text m r
639 -> m (Either r (Char, Producer Text m r))
640 nextChar = go
641 where
642 go p = do
643 x <- next p
644 case x of
645 Left r -> return (Left r)
646 Right (txt, p') -> case (T.uncons txt) of
647 Nothing -> go p'
648 Just (c, txt') -> return (Right (c, yield txt' >> p'))
649 {-# INLINABLE nextChar #-}
650
651 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
652 'Producer' is empty
653 -}
654 drawChar :: (Monad m) => Parser Text m (Maybe Char)
655 drawChar = do
656 x <- PP.draw
657 case x of
658 Nothing -> return Nothing
659 Just txt -> case (T.uncons txt) of
660 Nothing -> drawChar
661 Just (c, txt') -> do
662 PP.unDraw txt'
663 return (Just c)
664 {-# INLINABLE drawChar #-}
665
666 -- | Push back a 'Char' onto the underlying 'Producer'
667 unDrawChar :: (Monad m) => Char -> Parser Text m ()
668 unDrawChar c = modify (yield (T.singleton c) >>)
669 {-# INLINABLE unDrawChar #-}
670
671 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
672 push the 'Char' back
673
674 > peekChar = do
675 > x <- drawChar
676 > case x of
677 > Left _ -> return ()
678 > Right c -> unDrawChar c
679 > return x
680 -}
681 peekChar :: (Monad m) => Parser Text m (Maybe Char)
682 peekChar = do
683 x <- drawChar
684 case x of
685 Nothing -> return ()
686 Just c -> unDrawChar c
687 return x
688 {-# INLINABLE peekChar #-}
689
690 {-| Check if the underlying 'Producer' has no more characters
691
692 Note that this will skip over empty 'Text' chunks, unlike
693 'PP.isEndOfInput' from @pipes-parse@, which would consider
694 an empty 'Text' a valid bit of input.
695
696 > isEndOfChars = liftM isLeft peekChar
697 -}
698 isEndOfChars :: (Monad m) => Parser Text m Bool
699 isEndOfChars = do
700 x <- peekChar
701 return (case x of
702 Nothing -> True
703 Just _-> False )
704 {-# INLINABLE isEndOfChars #-}
705
706
707
708
709
710 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
711 -- returning a Pipe of ByteStrings that begins at the point of failure.
712
713 decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
714 (Producer Text m (Producer ByteString m r))
715 decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
716 (k (go B.empty PE.streamDecodeUtf8 p0)) where
717 go !carry dec0 p = do
718 x <- lift (next p)
719 case x of Left r -> if B.null carry
720 then return (return r) -- all bytestrinput was consumed
721 else return (do yield carry -- a potentially valid fragment remains
722 return r)
723
724 Right (chunk, p') -> case dec0 chunk of
725 PE.Some text carry2 dec -> do yield text
726 go carry2 dec p'
727 PE.Other text bs -> do yield text
728 return (do yield bs -- an invalid blob remains
729 p')
730 {-# INLINABLE decodeUtf8 #-}
731
732
733 -- | Splits a 'Producer' after the given number of characters
734 splitAt
735 :: (Monad m, Integral n)
736 => n
737 -> Lens' (Producer Text m r)
738 (Producer Text m (Producer Text m r))
739 splitAt n0 k p0 = fmap join (k (go n0 p0))
740 where
741 go 0 p = return p
742 go n p = do
743 x <- lift (next p)
744 case x of
745 Left r -> return (return r)
746 Right (txt, p') -> do
747 let len = fromIntegral (T.length txt)
748 if (len <= n)
749 then do
750 yield txt
751 go (n - len) p'
752 else do
753 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
754 yield prefix
755 return (yield suffix >> p')
756 {-# INLINABLE splitAt #-}
757
758
759 {-| Split a text stream in two, where the first text stream is the longest
760 consecutive group of text that satisfy the predicate
761 -}
762 span
763 :: (Monad m)
764 => (Char -> Bool)
765 -> Lens' (Producer Text m r)
766 (Producer Text m (Producer Text m r))
767 span predicate k p0 = fmap join (k (go p0))
768 where
769 go p = do
770 x <- lift (next p)
771 case x of
772 Left r -> return (return r)
773 Right (txt, p') -> do
774 let (prefix, suffix) = T.span predicate txt
775 if (T.null suffix)
776 then do
777 yield txt
778 go p'
779 else do
780 yield prefix
781 return (yield suffix >> p')
782 {-# INLINABLE span #-}
783
784 {-| Split a text stream in two, where the first text stream is the longest
785 consecutive group of characters that don't satisfy the predicate
786 -}
787 break
788 :: (Monad m)
789 => (Char -> Bool)
790 -> Lens' (Producer Text m r)
791 (Producer Text m (Producer Text m r))
792 break predicate = span (not . predicate)
793 {-# INLINABLE break #-}
794
795 {-| Improper lens that splits after the first group of equivalent Chars, as
796 defined by the given equivalence relation
797 -}
798 groupBy
799 :: (Monad m)
800 => (Char -> Char -> Bool)
801 -> Lens' (Producer Text m r)
802 (Producer Text m (Producer Text m r))
803 groupBy equals k p0 = fmap join (k ((go p0))) where
804 go p = do
805 x <- lift (next p)
806 case x of
807 Left r -> return (return r)
808 Right (txt, p') -> case T.uncons txt of
809 Nothing -> go p'
810 Just (c, _) -> (yield txt >> p') ^. span (equals c)
811 {-# INLINABLE groupBy #-}
812
813 -- | Improper lens that splits after the first succession of identical 'Char' s
814 group :: Monad m
815 => Lens' (Producer Text m r)
816 (Producer Text m (Producer Text m r))
817 group = groupBy (==)
818 {-# INLINABLE group #-}
819
820 {-| Improper lens that splits a 'Producer' after the first word
821
822 Unlike 'words', this does not drop leading whitespace
823 -}
824 word :: (Monad m)
825 => Lens' (Producer Text m r)
826 (Producer Text m (Producer Text m r))
827 word k p0 = fmap join (k (to p0))
828 where
829 to p = do
830 p' <- p^.span isSpace
831 p'^.break isSpace
832 {-# INLINABLE word #-}
833
834
835 line :: (Monad m)
836 => Lens' (Producer Text m r)
837 (Producer Text m (Producer Text m r))
838 line = break (== '\n')
839
840 {-# INLINABLE line #-}
841
842
843 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
844 intersperse
845 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
846 intersperse c = go0
847 where
848 go0 p = do
849 x <- lift (next p)
850 case x of
851 Left r -> return r
852 Right (txt, p') -> do
853 yield (T.intersperse c txt)
854 go1 p'
855 go1 p = do
856 x <- lift (next p)
857 case x of
858 Left r -> return r
859 Right (txt, p') -> do
860 yield (T.singleton c)
861 yield (T.intersperse c txt)
862 go1 p'
863 {-# INLINABLE intersperse #-}
864
865
866
867 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
868 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
869 packChars = Data.Profunctor.dimap to (fmap from)
870 where
871 -- to :: Monad m => Producer Char m x -> Producer Text m x
872 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
873
874 step diffAs c = diffAs . (c:)
875
876 done diffAs = T.pack (diffAs [])
877
878 -- from :: Monad m => Producer Text m x -> Producer Char m x
879 from p = for p (each . T.unpack)
880 {-# INLINABLE packChars #-}
881
882
883 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
884 chunksOf
885 :: (Monad m, Integral n)
886 => n -> Lens' (Producer Text m r)
887 (FreeT (Producer Text m) m r)
888 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
889 where
890 go p = do
891 x <- next p
892 return $ case x of
893 Left r -> Pure r
894 Right (txt, p') -> Free $ do
895 p'' <- (yield txt >> p') ^. splitAt n
896 return $ FreeT (go p'')
897 {-# INLINABLE chunksOf #-}
898
899
900 {-| Split a text stream into sub-streams delimited by characters that satisfy the
901 predicate
902 -}
903 splitsWith
904 :: (Monad m)
905 => (Char -> Bool)
906 -> Producer Text m r
907 -> FreeT (Producer Text m) m r
908 splitsWith predicate p0 = FreeT (go0 p0)
909 where
910 go0 p = do
911 x <- next p
912 case x of
913 Left r -> return (Pure r)
914 Right (txt, p') ->
915 if (T.null txt)
916 then go0 p'
917 else return $ Free $ do
918 p'' <- (yield txt >> p') ^. span (not . predicate)
919 return $ FreeT (go1 p'')
920 go1 p = do
921 x <- nextChar p
922 return $ case x of
923 Left r -> Pure r
924 Right (_, p') -> Free $ do
925 p'' <- p' ^. span (not . predicate)
926 return $ FreeT (go1 p'')
927 {-# INLINABLE splitsWith #-}
928
929 -- | Split a text stream using the given 'Char' as the delimiter
930 splits :: (Monad m)
931 => Char
932 -> Lens' (Producer Text m r)
933 (FreeT (Producer Text m) m r)
934 splits c k p =
935 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
936 {-# INLINABLE splits #-}
937
938 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
939 given equivalence relation
940 -}
941 groupsBy
942 :: Monad m
943 => (Char -> Char -> Bool)
944 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
945 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
946 go p = do x <- next p
947 case x of Left r -> return (Pure r)
948 Right (bs, p') -> case T.uncons bs of
949 Nothing -> go p'
950 Just (c, _) -> do return $ Free $ do
951 p'' <- (yield bs >> p')^.span (equals c)
952 return $ FreeT (go p'')
953 {-# INLINABLE groupsBy #-}
954
955
956 -- | Like 'groupsBy', where the equality predicate is ('==')
957 groups
958 :: Monad m
959 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
960 groups = groupsBy (==)
961 {-# INLINABLE groups #-}
962
963
964
965 {-| Split a text stream into 'FreeT'-delimited lines
966 -}
967 lines
968 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
969 lines = Data.Profunctor.dimap _lines (fmap _unlines)
970 where
971 _lines p0 = FreeT (go0 p0)
972 where
973 go0 p = do
974 x <- next p
975 case x of
976 Left r -> return (Pure r)
977 Right (txt, p') ->
978 if (T.null txt)
979 then go0 p'
980 else return $ Free $ go1 (yield txt >> p')
981 go1 p = do
982 p' <- p ^. break ('\n' ==)
983 return $ FreeT $ do
984 x <- nextChar p'
985 case x of
986 Left r -> return $ Pure r
987 Right (_, p'') -> go0 p''
988 -- _unlines
989 -- :: Monad m
990 -- => FreeT (Producer Text m) m x -> Producer Text m x
991 _unlines = concats . transFreeT addNewline
992
993 -- addNewline
994 -- :: Monad m => Producer Text m r -> Producer Text m r
995 addNewline p = p <* yield (T.singleton '\n')
996 {-# INLINABLE lines #-}
997
998
999
1000 -- | Split a text stream into 'FreeT'-delimited words
1001 words
1002 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
1003 words = Data.Profunctor.dimap go (fmap _unwords)
1004 where
1005 go p = FreeT $ do
1006 x <- next (p >-> dropWhile isSpace)
1007 return $ case x of
1008 Left r -> Pure r
1009 Right (bs, p') -> Free $ do
1010 p'' <- (yield bs >> p') ^. break isSpace
1011 return (go p'')
1012 _unwords = PG.intercalates (yield $ T.singleton ' ')
1013
1014 {-# INLINABLE words #-}
1015
1016
1017 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1018 interspersing a text stream in between them
1019 -}
1020 intercalate
1021 :: (Monad m)
1022 => Producer Text m ()
1023 -> FreeT (Producer Text m) m r
1024 -> Producer Text m r
1025 intercalate p0 = go0
1026 where
1027 go0 f = do
1028 x <- lift (runFreeT f)
1029 case x of
1030 Pure r -> return r
1031 Free p -> do
1032 f' <- p
1033 go1 f'
1034 go1 f = do
1035 x <- lift (runFreeT f)
1036 case x of
1037 Pure r -> return r
1038 Free p -> do
1039 p0
1040 f' <- p
1041 go1 f'
1042 {-# INLINABLE intercalate #-}
1043
1044 {-| Join 'FreeT'-delimited lines into a text stream
1045 -}
1046 unlines
1047 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1048 unlines = go
1049 where
1050 go f = do
1051 x <- lift (runFreeT f)
1052 case x of
1053 Pure r -> return r
1054 Free p -> do
1055 f' <- p
1056 yield $ T.singleton '\n'
1057 go f'
1058 {-# INLINABLE unlines #-}
1059
1060 {-| Join 'FreeT'-delimited words into a text stream
1061 -}
1062 unwords
1063 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1064 unwords = intercalate (yield $ T.pack " ")
1065 {-# INLINABLE unwords #-}
1066
1067 {- $parse
1068 The following parsing utilities are single-character analogs of the ones found
1069 @pipes-parse@.
1070 -}
1071
1072 {- $reexports
1073
1074 @Data.Text@ re-exports the 'Text' type.
1075
1076 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1077 -}
1078
1079
1080
1081 decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1082 -- decode codec = go B.empty where
1083 -- go extra p0 =
1084 -- do x <- lift (next p0)
1085 -- case x of Right (chunk, p) ->
1086 -- do let (text, stuff) = codecDecode codec (B.append extra chunk)
1087 -- yield text
1088 -- case stuff of Right extra' -> go extra' p
1089 -- Left (exc,bs) -> do yield text
1090 -- return (do yield bs
1091 -- p)
1092 -- Left r -> return (do yield extra
1093 -- return r)
1094
1095 decode d p0 = case d of
1096 PE.Other txt bad -> do yield txt
1097 return (do yield bad
1098 p0)
1099 PE.Some txt extra dec -> do yield txt
1100 x <- lift (next p0)
1101 case x of Left r -> return (do yield extra
1102 return r)
1103 Right (chunk,p1) -> decode (dec chunk) p1
1104
1105 -- go !carry dec0 p = do
1106 -- x <- lift (next p)
1107 -- case x of Left r -> if B.null carry
1108 -- then return (return r) -- all bytestrinput was consumed
1109 -- else return (do yield carry -- a potentially valid fragment remains
1110 -- return r)
1111 --
1112 -- Right (chunk, p') -> case dec0 chunk of
1113 -- PE.Some text carry2 dec -> do yield text
1114 -- go carry2 dec p'
1115 -- PE.Other text bs -> do yield text
1116 -- return (do yield bs -- an invalid blob remains
1117 -- p')
1118 -- {-# INLINABLE decodeUtf8 #-}