]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
396633aee5218b38cf4993a07a3eb6ff95d79931
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
2 #if __GLASGOW_HASKELL__ >= 702
3 {-# LANGUAGE Trustworthy #-}
4 #endif
5 {-| This module provides @pipes@ utilities for \"text streams\", which are
6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
10
11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
12 example, the following program copies a document from one file to another:
13
14 > import Pipes
15 > import qualified Data.Text.Pipes as Text
16 > import System.IO
17 >
18 > main =
19 > withFile "inFile.txt" ReadMode $ \hIn ->
20 > withFile "outFile.txt" WriteMode $ \hOut ->
21 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
24
25 > import Pipes
26 > import qualified Data.Text.Pipes as Text
27 > import Pipes.Safe
28 >
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
30
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' proxies, as with the following \"echo\" program:
33
34 > main = runEffect $ Text.stdin >-> Text.stdout
35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
39
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 might write:
44
45 > import Pipes
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
48 >
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
50 > where
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
52
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
61 -}
62
63 module Pipes.Text (
64 -- * Producers
65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
69 , stdinLn
70
71 -- * Consumers
72 , stdout
73 , stdoutLn
74 , toHandle
75 , writeFile
76
77 -- * Pipes
78 , map
79 , concatMap
80 , take
81 , drop
82 , takeWhile
83 , dropWhile
84 , filter
85 , scan
86 , encodeUtf8
87 , pack
88 , unpack
89 , toCaseFold
90 , toLower
91 , toUpper
92 , stripStart
93
94 -- * Folds
95 , toLazy
96 , toLazyM
97 , foldChars
98 , head
99 , last
100 , null
101 , length
102 , any
103 , all
104 , maximum
105 , minimum
106 , find
107 , index
108 , count
109
110 -- * Primitive Character Parsers
111 -- $parse
112 , nextChar
113 , drawChar
114 , unDrawChar
115 , peekChar
116 , isEndOfChars
117
118 -- * Parsing Lenses
119 , splitAt
120 , span
121 , break
122 , groupBy
123 , group
124 , word
125 , line
126 , decodeUtf8
127 , decode
128
129 -- * FreeT Splitters
130 , chunksOf
131 , splitsWith
132 , splits
133 -- , groupsBy
134 -- , groups
135 , lines
136 , words
137
138
139 -- * Transformations
140 , intersperse
141 , packChars
142
143 -- * Joiners
144 , intercalate
145 , unlines
146 , unwords
147
148 -- * Re-exports
149 -- $reexports
150 , module Data.ByteString
151 , module Data.Text
152 , module Data.Profunctor
153 , module Data.Word
154 , module Pipes.Parse
155 ) where
156
157 import Control.Exception (throwIO, try)
158 import Control.Applicative ((<*))
159 import Control.Monad (liftM, unless, join)
160 import Control.Monad.Trans.State.Strict (StateT(..), modify)
161 import Data.Monoid ((<>))
162 import qualified Data.Text as T
163 import qualified Data.Text.IO as T
164 import qualified Data.Text.Encoding as TE
165 import qualified Data.Text.Encoding.Error as TE
166 import Data.Text (Text)
167 import qualified Data.Text.Lazy as TL
168 import qualified Data.Text.Lazy.IO as TL
169 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
170 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
171 import Data.ByteString (ByteString)
172 import qualified Data.ByteString as B
173 import Data.Char (ord, isSpace)
174 import Data.Functor.Constant (Constant(Constant, getConstant))
175 import Data.Functor.Identity (Identity)
176 import Data.Profunctor (Profunctor)
177 import qualified Data.Profunctor
178 import qualified Data.List as List
179 import Foreign.C.Error (Errno(Errno), ePIPE)
180 import qualified GHC.IO.Exception as G
181 import Pipes
182 import qualified Pipes.ByteString as PB
183 import qualified Pipes.Text.Internal as PE
184 import Pipes.Text.Internal (Codec(..))
185 import Pipes.Core (respond, Server')
186 import qualified Pipes.Parse as PP
187 import Pipes.Parse (Parser, concats, intercalates, FreeT(..))
188 import qualified Pipes.Safe.Prelude as Safe
189 import qualified Pipes.Safe as Safe
190 import Pipes.Safe (MonadSafe(..), Base(..))
191 import qualified Pipes.Prelude as P
192 import qualified System.IO as IO
193 import Data.Char (isSpace)
194 import Data.Word (Word8)
195
196 import Prelude hiding (
197 all,
198 any,
199 break,
200 concat,
201 concatMap,
202 drop,
203 dropWhile,
204 elem,
205 filter,
206 head,
207 last,
208 lines,
209 length,
210 map,
211 maximum,
212 minimum,
213 notElem,
214 null,
215 readFile,
216 span,
217 splitAt,
218 take,
219 takeWhile,
220 unlines,
221 unwords,
222 words,
223 writeFile )
224
225 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
226 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
227 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
228 {-# INLINE fromLazy #-}
229
230 -- | Stream text from 'stdin'
231 stdin :: MonadIO m => Producer Text m ()
232 stdin = fromHandle IO.stdin
233 {-# INLINE stdin #-}
234
235 {-| Convert a 'IO.Handle' into a text stream using a text size
236 determined by the good sense of the text library; note that this
237 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
238 but uses the system encoding and has other `Data.Text.IO` features
239 -}
240
241 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
242 fromHandle h = go where
243 go = do txt <- liftIO (T.hGetChunk h)
244 unless (T.null txt) $ do yield txt
245 go
246 {-# INLINABLE fromHandle#-}
247
248
249 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
250
251 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
252 MAIN = PUTSTRLN "HELLO WORLD"
253 -}
254
255 readFile :: MonadSafe m => FilePath -> Producer Text m ()
256 readFile file = Safe.withFile file IO.ReadMode fromHandle
257 {-# INLINE readFile #-}
258
259 {-| Stream lines of text from stdin (for testing in ghci etc.)
260
261 >>> let safely = runSafeT . runEffect
262 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
263 hello
264 5
265 world
266 5
267
268 -}
269 stdinLn :: MonadIO m => Producer' Text m ()
270 stdinLn = go where
271 go = do
272 eof <- liftIO (IO.hIsEOF IO.stdin)
273 unless eof $ do
274 txt <- liftIO (T.hGetLine IO.stdin)
275 yield txt
276 go
277 {-# INLINABLE stdinLn #-}
278
279 {-| Stream text to 'stdout'
280
281 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
282
283 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
284 @(source >-> stdout)@ in suitable cases.
285 -}
286 stdout :: MonadIO m => Consumer' Text m ()
287 stdout = go
288 where
289 go = do
290 txt <- await
291 x <- liftIO $ try (T.putStr txt)
292 case x of
293 Left (G.IOError { G.ioe_type = G.ResourceVanished
294 , G.ioe_errno = Just ioe })
295 | Errno ioe == ePIPE
296 -> return ()
297 Left e -> liftIO (throwIO e)
298 Right () -> go
299 {-# INLINABLE stdout #-}
300
301 stdoutLn :: (MonadIO m) => Consumer' Text m ()
302 stdoutLn = go
303 where
304 go = do
305 str <- await
306 x <- liftIO $ try (T.putStrLn str)
307 case x of
308 Left (G.IOError { G.ioe_type = G.ResourceVanished
309 , G.ioe_errno = Just ioe })
310 | Errno ioe == ePIPE
311 -> return ()
312 Left e -> liftIO (throwIO e)
313 Right () -> go
314 {-# INLINABLE stdoutLn #-}
315
316 {-| Convert a text stream into a 'Handle'
317
318 Note: again, for best performance, where possible use
319 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
320 -}
321 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
322 toHandle h = for cat (liftIO . T.hPutStr h)
323 {-# INLINABLE toHandle #-}
324
325 {-# RULES "p >-> toHandle h" forall p h .
326 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
327 #-}
328
329
330 -- | Stream text into a file. Uses @pipes-safe@.
331 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
332 writeFile file = Safe.withFile file IO.WriteMode toHandle
333 {-# INLINE writeFile #-}
334
335
336 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
337
338 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
339
340 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
341 a ^. lens = getConstant (lens Constant a)
342
343
344 -- | Apply a transformation to each 'Char' in the stream
345 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
346 map f = P.map (T.map f)
347 {-# INLINABLE map #-}
348
349 {-# RULES "p >-> map f" forall p f .
350 p >-> map f = for p (\txt -> yield (T.map f txt))
351 #-}
352
353 -- | Map a function over the characters of a text stream and concatenate the results
354 concatMap
355 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
356 concatMap f = P.map (T.concatMap f)
357 {-# INLINABLE concatMap #-}
358
359 {-# RULES "p >-> concatMap f" forall p f .
360 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
361 #-}
362
363 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
364 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
365 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
366 encodeUtf8 :: Monad m => Pipe Text ByteString m r
367 encodeUtf8 = P.map TE.encodeUtf8
368 {-# INLINEABLE encodeUtf8 #-}
369
370 {-# RULES "p >-> encodeUtf8" forall p .
371 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
372 #-}
373
374 -- | Transform a Pipe of 'String's into one of 'Text' chunks
375 pack :: Monad m => Pipe String Text m r
376 pack = P.map T.pack
377 {-# INLINEABLE pack #-}
378
379 {-# RULES "p >-> pack" forall p .
380 p >-> pack = for p (\txt -> yield (T.pack txt))
381 #-}
382
383 -- | Transform a Pipes of 'Text' chunks into one of 'String's
384 unpack :: Monad m => Pipe Text String m r
385 unpack = for cat (\t -> yield (T.unpack t))
386 {-# INLINEABLE unpack #-}
387
388 {-# RULES "p >-> unpack" forall p .
389 p >-> unpack = for p (\txt -> yield (T.unpack txt))
390 #-}
391
392 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
393 -- here acting as 'Text' pipes, rather as they would on a lazy text
394 toCaseFold :: Monad m => Pipe Text Text m ()
395 toCaseFold = P.map T.toCaseFold
396 {-# INLINEABLE toCaseFold #-}
397
398 {-# RULES "p >-> toCaseFold" forall p .
399 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
400 #-}
401
402
403 -- | lowercase incoming 'Text'
404 toLower :: Monad m => Pipe Text Text m ()
405 toLower = P.map T.toLower
406 {-# INLINEABLE toLower #-}
407
408 {-# RULES "p >-> toLower" forall p .
409 p >-> toLower = for p (\txt -> yield (T.toLower txt))
410 #-}
411
412 -- | uppercase incoming 'Text'
413 toUpper :: Monad m => Pipe Text Text m ()
414 toUpper = P.map T.toUpper
415 {-# INLINEABLE toUpper #-}
416
417 {-# RULES "p >-> toUpper" forall p .
418 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
419 #-}
420
421 -- | Remove leading white space from an incoming succession of 'Text's
422 stripStart :: Monad m => Pipe Text Text m r
423 stripStart = do
424 chunk <- await
425 let text = T.stripStart chunk
426 if T.null text
427 then stripStart
428 else do yield text
429 cat
430 {-# INLINEABLE stripStart #-}
431
432 -- | @(take n)@ only allows @n@ individual characters to pass;
433 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
434 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
435 take n0 = go n0 where
436 go n
437 | n <= 0 = return ()
438 | otherwise = do
439 txt <- await
440 let len = fromIntegral (T.length txt)
441 if (len > n)
442 then yield (T.take (fromIntegral n) txt)
443 else do
444 yield txt
445 go (n - len)
446 {-# INLINABLE take #-}
447
448 -- | @(drop n)@ drops the first @n@ characters
449 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
450 drop n0 = go n0 where
451 go n
452 | n <= 0 = cat
453 | otherwise = do
454 txt <- await
455 let len = fromIntegral (T.length txt)
456 if (len >= n)
457 then do
458 yield (T.drop (fromIntegral n) txt)
459 cat
460 else go (n - len)
461 {-# INLINABLE drop #-}
462
463 -- | Take characters until they fail the predicate
464 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
465 takeWhile predicate = go
466 where
467 go = do
468 txt <- await
469 let (prefix, suffix) = T.span predicate txt
470 if (T.null suffix)
471 then do
472 yield txt
473 go
474 else yield prefix
475 {-# INLINABLE takeWhile #-}
476
477 -- | Drop characters until they fail the predicate
478 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
479 dropWhile predicate = go where
480 go = do
481 txt <- await
482 case T.findIndex (not . predicate) txt of
483 Nothing -> go
484 Just i -> do
485 yield (T.drop i txt)
486 cat
487 {-# INLINABLE dropWhile #-}
488
489 -- | Only allows 'Char's to pass if they satisfy the predicate
490 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
491 filter predicate = P.map (T.filter predicate)
492 {-# INLINABLE filter #-}
493
494 {-# RULES "p >-> filter q" forall p q .
495 p >-> filter q = for p (\txt -> yield (T.filter q txt))
496 #-}
497
498 -- | Strict left scan over the characters
499 scan
500 :: (Monad m)
501 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
502 scan step begin = go begin
503 where
504 go c = do
505 txt <- await
506 let txt' = T.scanl step c txt
507 c' = T.last txt'
508 yield txt'
509 go c'
510 {-# INLINABLE scan #-}
511
512 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
513 'TL.Text'
514 -}
515 toLazy :: Producer Text Identity () -> TL.Text
516 toLazy = TL.fromChunks . P.toList
517 {-# INLINABLE toLazy #-}
518
519 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
520 'TL.Text'
521
522 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
523 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
524 immediately as they are generated instead of loading them all into memory.
525 -}
526 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
527 toLazyM = liftM TL.fromChunks . P.toListM
528 {-# INLINABLE toLazyM #-}
529
530 -- | Reduce the text stream using a strict left fold over characters
531 foldChars
532 :: Monad m
533 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
534 foldChars step begin done = P.fold (T.foldl' step) begin done
535 {-# INLINABLE foldChars #-}
536
537 -- | Retrieve the first 'Char'
538 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
539 head = go
540 where
541 go p = do
542 x <- nextChar p
543 case x of
544 Left _ -> return Nothing
545 Right (c, _) -> return (Just c)
546 {-# INLINABLE head #-}
547
548 -- | Retrieve the last 'Char'
549 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
550 last = go Nothing
551 where
552 go r p = do
553 x <- next p
554 case x of
555 Left () -> return r
556 Right (txt, p') ->
557 if (T.null txt)
558 then go r p'
559 else go (Just $ T.last txt) p'
560 {-# INLINABLE last #-}
561
562 -- | Determine if the stream is empty
563 null :: (Monad m) => Producer Text m () -> m Bool
564 null = P.all T.null
565 {-# INLINABLE null #-}
566
567 -- | Count the number of characters in the stream
568 length :: (Monad m, Num n) => Producer Text m () -> m n
569 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
570 {-# INLINABLE length #-}
571
572 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
573 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
574 any predicate = P.any (T.any predicate)
575 {-# INLINABLE any #-}
576
577 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
578 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
579 all predicate = P.all (T.all predicate)
580 {-# INLINABLE all #-}
581
582 -- | Return the maximum 'Char' within a text stream
583 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
584 maximum = P.fold step Nothing id
585 where
586 step mc txt =
587 if (T.null txt)
588 then mc
589 else Just $ case mc of
590 Nothing -> T.maximum txt
591 Just c -> max c (T.maximum txt)
592 {-# INLINABLE maximum #-}
593
594 -- | Return the minimum 'Char' within a text stream (surely very useful!)
595 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
596 minimum = P.fold step Nothing id
597 where
598 step mc txt =
599 if (T.null txt)
600 then mc
601 else case mc of
602 Nothing -> Just (T.minimum txt)
603 Just c -> Just (min c (T.minimum txt))
604 {-# INLINABLE minimum #-}
605
606
607 -- | Find the first element in the stream that matches the predicate
608 find
609 :: (Monad m)
610 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
611 find predicate p = head (p >-> filter predicate)
612 {-# INLINABLE find #-}
613
614 -- | Index into a text stream
615 index
616 :: (Monad m, Integral a)
617 => a-> Producer Text m () -> m (Maybe Char)
618 index n p = head (p >-> drop n)
619 {-# INLINABLE index #-}
620
621
622 -- | Store a tally of how many segments match the given 'Text'
623 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
624 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
625 {-# INLINABLE count #-}
626
627
628 {-| Consume the first character from a stream of 'Text'
629
630 'next' either fails with a 'Left' if the 'Producer' has no more characters or
631 succeeds with a 'Right' providing the next character and the remainder of the
632 'Producer'.
633 -}
634 nextChar
635 :: (Monad m)
636 => Producer Text m r
637 -> m (Either r (Char, Producer Text m r))
638 nextChar = go
639 where
640 go p = do
641 x <- next p
642 case x of
643 Left r -> return (Left r)
644 Right (txt, p') -> case (T.uncons txt) of
645 Nothing -> go p'
646 Just (c, txt') -> return (Right (c, yield txt' >> p'))
647 {-# INLINABLE nextChar #-}
648
649 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
650 'Producer' is empty
651 -}
652 drawChar :: (Monad m) => Parser Text m (Maybe Char)
653 drawChar = do
654 x <- PP.draw
655 case x of
656 Nothing -> return Nothing
657 Just txt -> case (T.uncons txt) of
658 Nothing -> drawChar
659 Just (c, txt') -> do
660 PP.unDraw txt'
661 return (Just c)
662 {-# INLINABLE drawChar #-}
663
664 -- | Push back a 'Char' onto the underlying 'Producer'
665 unDrawChar :: (Monad m) => Char -> Parser Text m ()
666 unDrawChar c = modify (yield (T.singleton c) >>)
667 {-# INLINABLE unDrawChar #-}
668
669 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
670 push the 'Char' back
671
672 > peekChar = do
673 > x <- drawChar
674 > case x of
675 > Left _ -> return ()
676 > Right c -> unDrawChar c
677 > return x
678 -}
679 peekChar :: (Monad m) => Parser Text m (Maybe Char)
680 peekChar = do
681 x <- drawChar
682 case x of
683 Nothing -> return ()
684 Just c -> unDrawChar c
685 return x
686 {-# INLINABLE peekChar #-}
687
688 {-| Check if the underlying 'Producer' has no more characters
689
690 Note that this will skip over empty 'Text' chunks, unlike
691 'PP.isEndOfInput' from @pipes-parse@, which would consider
692 an empty 'Text' a valid bit of input.
693
694 > isEndOfChars = liftM isLeft peekChar
695 -}
696 isEndOfChars :: (Monad m) => Parser Text m Bool
697 isEndOfChars = do
698 x <- peekChar
699 return (case x of
700 Nothing -> True
701 Just _-> False )
702 {-# INLINABLE isEndOfChars #-}
703
704
705
706
707
708 -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
709 -- returning a Pipe of ByteStrings that begins at the point of failure.
710
711 decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
712 (Producer Text m (Producer ByteString m r))
713 decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
714 (k (go B.empty PE.streamDecodeUtf8 p0)) where
715 go !carry dec0 p = do
716 x <- lift (next p)
717 case x of Left r -> if B.null carry
718 then return (return r) -- all bytestrinput was consumed
719 else return (do yield carry -- a potentially valid fragment remains
720 return r)
721
722 Right (chunk, p') -> case dec0 chunk of
723 PE.Some text carry2 dec -> do yield text
724 go carry2 dec p'
725 PE.Other text bs -> do yield text
726 return (do yield bs -- an invalid blob remains
727 p')
728 {-# INLINABLE decodeUtf8 #-}
729
730
731 -- | Splits a 'Producer' after the given number of characters
732 splitAt
733 :: (Monad m, Integral n)
734 => n
735 -> Lens' (Producer Text m r)
736 (Producer Text m (Producer Text m r))
737 splitAt n0 k p0 = fmap join (k (go n0 p0))
738 where
739 go 0 p = return p
740 go n p = do
741 x <- lift (next p)
742 case x of
743 Left r -> return (return r)
744 Right (txt, p') -> do
745 let len = fromIntegral (T.length txt)
746 if (len <= n)
747 then do
748 yield txt
749 go (n - len) p'
750 else do
751 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
752 yield prefix
753 return (yield suffix >> p')
754 {-# INLINABLE splitAt #-}
755
756
757 {-| Split a text stream in two, where the first text stream is the longest
758 consecutive group of text that satisfy the predicate
759 -}
760 span
761 :: (Monad m)
762 => (Char -> Bool)
763 -> Lens' (Producer Text m r)
764 (Producer Text m (Producer Text m r))
765 span predicate k p0 = fmap join (k (go p0))
766 where
767 go p = do
768 x <- lift (next p)
769 case x of
770 Left r -> return (return r)
771 Right (txt, p') -> do
772 let (prefix, suffix) = T.span predicate txt
773 if (T.null suffix)
774 then do
775 yield txt
776 go p'
777 else do
778 yield prefix
779 return (yield suffix >> p')
780 {-# INLINABLE span #-}
781
782 {-| Split a text stream in two, where the first text stream is the longest
783 consecutive group of characters that don't satisfy the predicate
784 -}
785 break
786 :: (Monad m)
787 => (Char -> Bool)
788 -> Lens' (Producer Text m r)
789 (Producer Text m (Producer Text m r))
790 break predicate = span (not . predicate)
791 {-# INLINABLE break #-}
792
793 {-| Improper lens that splits after the first group of equivalent Chars, as
794 defined by the given equivalence relation
795 -}
796 groupBy
797 :: (Monad m)
798 => (Char -> Char -> Bool)
799 -> Lens' (Producer Text m r)
800 (Producer Text m (Producer Text m r))
801 groupBy equals k p0 = fmap join (k ((go p0))) where
802 go p = do
803 x <- lift (next p)
804 case x of
805 Left r -> return (return r)
806 Right (txt, p') -> case T.uncons txt of
807 Nothing -> go p'
808 Just (c, _) -> (yield txt >> p') ^. span (equals c)
809 {-# INLINABLE groupBy #-}
810
811 -- | Improper lens that splits after the first succession of identical 'Char' s
812 group :: Monad m
813 => Lens' (Producer Text m r)
814 (Producer Text m (Producer Text m r))
815 group = groupBy (==)
816 {-# INLINABLE group #-}
817
818 {-| Improper lens that splits a 'Producer' after the first word
819
820 Unlike 'words', this does not drop leading whitespace
821 -}
822 word :: (Monad m)
823 => Lens' (Producer Text m r)
824 (Producer Text m (Producer Text m r))
825 word k p0 = fmap join (k (to p0))
826 where
827 to p = do
828 p' <- p^.span isSpace
829 p'^.break isSpace
830 {-# INLINABLE word #-}
831
832
833 line :: (Monad m)
834 => Lens' (Producer Text m r)
835 (Producer Text m (Producer Text m r))
836 line = break (== '\n')
837
838 {-# INLINABLE line #-}
839
840
841 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
842 intersperse
843 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
844 intersperse c = go0
845 where
846 go0 p = do
847 x <- lift (next p)
848 case x of
849 Left r -> return r
850 Right (txt, p') -> do
851 yield (T.intersperse c txt)
852 go1 p'
853 go1 p = do
854 x <- lift (next p)
855 case x of
856 Left r -> return r
857 Right (txt, p') -> do
858 yield (T.singleton c)
859 yield (T.intersperse c txt)
860 go1 p'
861 {-# INLINABLE intersperse #-}
862
863
864
865 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
866 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
867 packChars = Data.Profunctor.dimap to (fmap from)
868 where
869 -- to :: Monad m => Producer Char m x -> Producer Text m x
870 to p = PP.folds step id done (p^.PP.chunksOf defaultChunkSize)
871
872 step diffAs c = diffAs . (c:)
873
874 done diffAs = T.pack (diffAs [])
875
876 -- from :: Monad m => Producer Text m x -> Producer Char m x
877 from p = for p (each . T.unpack)
878 {-# INLINABLE packChars #-}
879
880
881 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
882 chunksOf
883 :: (Monad m, Integral n)
884 => n -> Lens' (Producer Text m r)
885 (FreeT (Producer Text m) m r)
886 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
887 where
888 go p = do
889 x <- next p
890 return $ case x of
891 Left r -> PP.Pure r
892 Right (txt, p') -> PP.Free $ do
893 p'' <- (yield txt >> p') ^. splitAt n
894 return $ PP.FreeT (go p'')
895 {-# INLINABLE chunksOf #-}
896
897
898 {-| Split a text stream into sub-streams delimited by characters that satisfy the
899 predicate
900 -}
901 splitsWith
902 :: (Monad m)
903 => (Char -> Bool)
904 -> Producer Text m r
905 -> PP.FreeT (Producer Text m) m r
906 splitsWith predicate p0 = PP.FreeT (go0 p0)
907 where
908 go0 p = do
909 x <- next p
910 case x of
911 Left r -> return (PP.Pure r)
912 Right (txt, p') ->
913 if (T.null txt)
914 then go0 p'
915 else return $ PP.Free $ do
916 p'' <- (yield txt >> p') ^. span (not . predicate)
917 return $ PP.FreeT (go1 p'')
918 go1 p = do
919 x <- nextChar p
920 return $ case x of
921 Left r -> PP.Pure r
922 Right (_, p') -> PP.Free $ do
923 p'' <- p' ^. span (not . predicate)
924 return $ PP.FreeT (go1 p'')
925 {-# INLINABLE splitsWith #-}
926
927 -- | Split a text stream using the given 'Char' as the delimiter
928 splits :: (Monad m)
929 => Char
930 -> Lens' (Producer Text m r)
931 (FreeT (Producer Text m) m r)
932 splits c k p =
933 fmap (PP.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
934 {-# INLINABLE splits #-}
935
936 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
937 given equivalence relation
938 -}
939 groupsBy
940 :: Monad m
941 => (Char -> Char -> Bool)
942 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
943 groupsBy equals k p0 = fmap concats (k (PP.FreeT (go p0))) where
944 go p = do x <- next p
945 case x of Left r -> return (PP.Pure r)
946 Right (bs, p') -> case T.uncons bs of
947 Nothing -> go p'
948 Just (c, _) -> do return $ PP.Free $ do
949 p'' <- (yield bs >> p')^.span (equals c)
950 return $ PP.FreeT (go p'')
951 {-# INLINABLE groupsBy #-}
952
953
954 -- | Like 'groupsBy', where the equality predicate is ('==')
955 groups
956 :: Monad m
957 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
958 groups = groupsBy (==)
959 {-# INLINABLE groups #-}
960
961
962
963 {-| Split a text stream into 'FreeT'-delimited lines
964 -}
965 lines
966 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
967 lines = Data.Profunctor.dimap _lines (fmap _unlines)
968 where
969 _lines p0 = PP.FreeT (go0 p0)
970 where
971 go0 p = do
972 x <- next p
973 case x of
974 Left r -> return (PP.Pure r)
975 Right (txt, p') ->
976 if (T.null txt)
977 then go0 p'
978 else return $ PP.Free $ go1 (yield txt >> p')
979 go1 p = do
980 p' <- p ^. break ('\n' ==)
981 return $ PP.FreeT $ do
982 x <- nextChar p'
983 case x of
984 Left r -> return $ PP.Pure r
985 Right (_, p'') -> go0 p''
986 -- _unlines
987 -- :: Monad m
988 -- => FreeT (Producer Text m) m x -> Producer Text m x
989 _unlines = PP.concats . PP.transFreeT addNewline
990
991 -- addNewline
992 -- :: Monad m => Producer Text m r -> Producer Text m r
993 addNewline p = p <* yield (T.singleton '\n')
994 {-# INLINABLE lines #-}
995
996
997
998 -- | Split a text stream into 'FreeT'-delimited words
999 words
1000 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
1001 words = Data.Profunctor.dimap go (fmap _unwords)
1002 where
1003 go p = PP.FreeT $ do
1004 x <- next (p >-> dropWhile isSpace)
1005 return $ case x of
1006 Left r -> PP.Pure r
1007 Right (bs, p') -> PP.Free $ do
1008 p'' <- (yield bs >> p') ^. break isSpace
1009 return (go p'')
1010 _unwords = PP.intercalates (yield $ T.singleton ' ')
1011
1012 {-# INLINABLE words #-}
1013
1014
1015 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1016 interspersing a text stream in between them
1017 -}
1018 intercalate
1019 :: (Monad m)
1020 => Producer Text m ()
1021 -> FreeT (Producer Text m) m r
1022 -> Producer Text m r
1023 intercalate p0 = go0
1024 where
1025 go0 f = do
1026 x <- lift (PP.runFreeT f)
1027 case x of
1028 PP.Pure r -> return r
1029 PP.Free p -> do
1030 f' <- p
1031 go1 f'
1032 go1 f = do
1033 x <- lift (PP.runFreeT f)
1034 case x of
1035 PP.Pure r -> return r
1036 PP.Free p -> do
1037 p0
1038 f' <- p
1039 go1 f'
1040 {-# INLINABLE intercalate #-}
1041
1042 {-| Join 'FreeT'-delimited lines into a text stream
1043 -}
1044 unlines
1045 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1046 unlines = go
1047 where
1048 go f = do
1049 x <- lift (PP.runFreeT f)
1050 case x of
1051 PP.Pure r -> return r
1052 PP.Free p -> do
1053 f' <- p
1054 yield $ T.singleton '\n'
1055 go f'
1056 {-# INLINABLE unlines #-}
1057
1058 {-| Join 'FreeT'-delimited words into a text stream
1059 -}
1060 unwords
1061 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1062 unwords = intercalate (yield $ T.pack " ")
1063 {-# INLINABLE unwords #-}
1064
1065 {- $parse
1066 The following parsing utilities are single-character analogs of the ones found
1067 @pipes-parse@.
1068 -}
1069
1070 {- $reexports
1071
1072 @Data.Text@ re-exports the 'Text' type.
1073
1074 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1075 -}
1076
1077
1078
1079 decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1080 -- decode codec = go B.empty where
1081 -- go extra p0 =
1082 -- do x <- lift (next p0)
1083 -- case x of Right (chunk, p) ->
1084 -- do let (text, stuff) = codecDecode codec (B.append extra chunk)
1085 -- yield text
1086 -- case stuff of Right extra' -> go extra' p
1087 -- Left (exc,bs) -> do yield text
1088 -- return (do yield bs
1089 -- p)
1090 -- Left r -> return (do yield extra
1091 -- return r)
1092
1093 decode d p0 = case d of
1094 PE.Other txt bad -> do yield txt
1095 return (do yield bad
1096 p0)
1097 PE.Some txt extra dec -> do yield txt
1098 x <- lift (next p0)
1099 case x of Left r -> return (do yield extra
1100 return r)
1101 Right (chunk,p1) -> decode (dec chunk) p1
1102
1103 -- go !carry dec0 p = do
1104 -- x <- lift (next p)
1105 -- case x of Left r -> if B.null carry
1106 -- then return (return r) -- all bytestrinput was consumed
1107 -- else return (do yield carry -- a potentially valid fragment remains
1108 -- return r)
1109 --
1110 -- Right (chunk, p') -> case dec0 chunk of
1111 -- PE.Some text carry2 dec -> do yield text
1112 -- go carry2 dec p'
1113 -- PE.Other text bs -> do yield text
1114 -- return (do yield bs -- an invalid blob remains
1115 -- p')
1116 -- {-# INLINABLE decodeUtf8 #-}