]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
4b2d2b04261f9dfe8d565b5b04acfc7be95c67c6
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
8
9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
10 example, the following program copies a document from one file to another:
11
12 > import Pipes
13 > import qualified Data.Text.Pipes as Text
14 > import System.IO
15 >
16 > main =
17 > withFile "inFile.txt" ReadMode $ \hIn ->
18 > withFile "outFile.txt" WriteMode $ \hOut ->
19 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
20
21 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
22
23 > import Pipes
24 > import qualified Data.Text.Pipes as Text
25 > import Pipes.Safe
26 >
27 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
28
29 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
30 and 'stdout' pipes, as with the following \"echo\" program:
31
32 > main = runEffect $ Text.stdin >-> Text.stdout
33
34 You can also translate pure lazy 'TL.Text's to and from pipes:
35
36 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
37
38 In addition, this module provides many functions equivalent to lazy
39 'Text' functions so that you can transform or fold text streams. For
40 example, to stream only the first three lines of 'stdin' to 'stdout' you
41 might write:
42
43 > import Pipes
44 > import qualified Pipes.Text as Text
45 > import qualified Pipes.Parse as Parse
46 >
47 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
48 > where
49 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
50
51 The above program will never bring more than one chunk of text (~ 32 KB) into
52 memory, no matter how long the lines are.
53
54 Note that functions in this library are designed to operate on streams that
55 are insensitive to text boundaries. This means that they may freely split
56 text into smaller texts, /discard empty texts/. However, apart from the
57 special case of 'concatMap', they will /never concatenate texts/ in order
58 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
59 -}
60
61 module Pipes.Text (
62 -- * Producers
63 fromLazy
64 , stdin
65 , fromHandle
66 , readFile
67
68 -- * Consumers
69 , stdout
70 , toHandle
71 , writeFile
72
73 -- * Pipes
74 , map
75 , concatMap
76 , take
77 , drop
78 , takeWhile
79 , dropWhile
80 , filter
81 , scan
82 , encodeUtf8
83 , pack
84 , unpack
85 , toCaseFold
86 , toLower
87 , toUpper
88 , stripStart
89
90 -- * Folds
91 , toLazy
92 , toLazyM
93 , foldChars
94 , head
95 , last
96 , null
97 , length
98 , any
99 , all
100 , maximum
101 , minimum
102 , find
103 , index
104 , count
105
106 -- * Primitive Character Parsers
107 -- $parse
108 , nextChar
109 , drawChar
110 , unDrawChar
111 , peekChar
112 , isEndOfChars
113
114 -- * Parsing Lenses
115 , splitAt
116 , span
117 , break
118 , groupBy
119 , group
120 , word
121 , line
122
123 -- * Decoding Lenses
124 , decodeUtf8
125 , codec
126
127 -- * Codecs
128 , utf8
129 , utf16_le
130 , utf16_be
131 , utf32_le
132 , utf32_be
133
134 -- * Other Decoding/Encoding Functions
135 , decodeIso8859_1
136 , decodeAscii
137 , encodeIso8859_1
138 , encodeAscii
139
140 -- * FreeT Splitters
141 , chunksOf
142 , splitsWith
143 , splits
144 -- , groupsBy
145 -- , groups
146 , lines
147 , words
148
149 -- * Transformations
150 , intersperse
151 , packChars
152
153 -- * Joiners
154 , intercalate
155 , unlines
156 , unwords
157
158 -- * Re-exports
159 -- $reexports
160 , Decoding(..)
161 , streamDecodeUtf8
162 , decodeSomeUtf8
163 , Codec(..)
164 , TextException(..)
165 , module Data.ByteString
166 , module Data.Text
167 , module Data.Profunctor
168 , module Data.Word
169 , module Pipes.Parse
170 , module Pipes.Group
171 ) where
172
173 import Control.Exception (throwIO, try)
174 import Control.Applicative ((<*))
175 import Control.Monad (liftM, unless, join)
176 import Control.Monad.Trans.State.Strict (StateT(..), modify)
177 import Data.Monoid ((<>))
178 import qualified Data.Text as T
179 import qualified Data.Text.IO as T
180 import qualified Data.Text.Encoding as TE
181 import qualified Data.Text.Encoding.Error as TE
182 import Data.Text (Text)
183 import qualified Data.Text.Lazy as TL
184 import qualified Data.Text.Lazy.IO as TL
185 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
186 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
187 import Data.ByteString (ByteString)
188 import qualified Data.ByteString as B
189 import qualified Data.ByteString.Char8 as B8
190 import Data.Char (ord, isSpace)
191 import Data.Functor.Constant (Constant(Constant, getConstant))
192 import Data.Functor.Identity (Identity)
193 import Data.Profunctor (Profunctor)
194 import qualified Data.Profunctor
195 import qualified Data.List as List
196 import Foreign.C.Error (Errno(Errno), ePIPE)
197 import qualified GHC.IO.Exception as G
198 import Pipes
199 import qualified Pipes.ByteString as PB
200 import qualified Pipes.Text.Internal as PI
201 import Pipes.Text.Internal
202 import Pipes.Core (respond, Server')
203 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
204 import qualified Pipes.Group as PG
205 import qualified Pipes.Parse as PP
206 import Pipes.Parse (Parser)
207 import qualified Pipes.Safe.Prelude as Safe
208 import qualified Pipes.Safe as Safe
209 import Pipes.Safe (MonadSafe(..), Base(..))
210 import qualified Pipes.Prelude as P
211 import qualified System.IO as IO
212 import Data.Char (isSpace)
213 import Data.Word (Word8)
214
215 import Prelude hiding (
216 all,
217 any,
218 break,
219 concat,
220 concatMap,
221 drop,
222 dropWhile,
223 elem,
224 filter,
225 head,
226 last,
227 lines,
228 length,
229 map,
230 maximum,
231 minimum,
232 notElem,
233 null,
234 readFile,
235 span,
236 splitAt,
237 take,
238 takeWhile,
239 unlines,
240 unwords,
241 words,
242 writeFile )
243
244 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
245 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
246 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
247 {-# INLINE fromLazy #-}
248
249 -- | Stream text from 'stdin'
250 stdin :: MonadIO m => Producer Text m ()
251 stdin = fromHandle IO.stdin
252 {-# INLINE stdin #-}
253
254 {-| Convert a 'IO.Handle' into a text stream using a text size
255 determined by the good sense of the text library; note that this
256 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
257 but uses the system encoding and has other `Data.Text.IO` features
258 -}
259
260 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
261 fromHandle h = go where
262 go = do txt <- liftIO (T.hGetChunk h)
263 unless (T.null txt) ( do yield txt
264 go )
265 {-# INLINABLE fromHandle#-}
266
267
268 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
269
270 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
271 MAIN = PUTSTRLN "HELLO WORLD"
272 -}
273
274 readFile :: MonadSafe m => FilePath -> Producer Text m ()
275 readFile file = Safe.withFile file IO.ReadMode fromHandle
276 {-# INLINE readFile #-}
277
278
279 {-| Stream text to 'stdout'
280
281 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
282
283 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
284 instead of @(source >-> stdout)@ .
285 -}
286 stdout :: MonadIO m => Consumer' Text m ()
287 stdout = go
288 where
289 go = do
290 txt <- await
291 x <- liftIO $ try (T.putStr txt)
292 case x of
293 Left (G.IOError { G.ioe_type = G.ResourceVanished
294 , G.ioe_errno = Just ioe })
295 | Errno ioe == ePIPE
296 -> return ()
297 Left e -> liftIO (throwIO e)
298 Right () -> go
299 {-# INLINABLE stdout #-}
300
301
302 {-| Convert a text stream into a 'Handle'
303
304 Note: again, for best performance, where possible use
305 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
306 -}
307 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
308 toHandle h = for cat (liftIO . T.hPutStr h)
309 {-# INLINABLE toHandle #-}
310
311 {-# RULES "p >-> toHandle h" forall p h .
312 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
313 #-}
314
315
316 -- | Stream text into a file. Uses @pipes-safe@.
317 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
318 writeFile file = Safe.withFile file IO.WriteMode toHandle
319 {-# INLINE writeFile #-}
320
321
322 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
323
324 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
325
326 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
327 a ^. lens = getConstant (lens Constant a)
328
329
330 -- | Apply a transformation to each 'Char' in the stream
331 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
332 map f = P.map (T.map f)
333 {-# INLINABLE map #-}
334
335 {-# RULES "p >-> map f" forall p f .
336 p >-> map f = for p (\txt -> yield (T.map f txt))
337 #-}
338
339 -- | Map a function over the characters of a text stream and concatenate the results
340 concatMap
341 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
342 concatMap f = P.map (T.concatMap f)
343 {-# INLINABLE concatMap #-}
344
345 {-# RULES "p >-> concatMap f" forall p f .
346 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
347 #-}
348
349 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
350 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
351 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
352 encodeUtf8 :: Monad m => Pipe Text ByteString m r
353 encodeUtf8 = P.map TE.encodeUtf8
354 {-# INLINEABLE encodeUtf8 #-}
355
356 {-# RULES "p >-> encodeUtf8" forall p .
357 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
358 #-}
359
360 -- | Transform a Pipe of 'String's into one of 'Text' chunks
361 pack :: Monad m => Pipe String Text m r
362 pack = P.map T.pack
363 {-# INLINEABLE pack #-}
364
365 {-# RULES "p >-> pack" forall p .
366 p >-> pack = for p (\txt -> yield (T.pack txt))
367 #-}
368
369 -- | Transform a Pipes of 'Text' chunks into one of 'String's
370 unpack :: Monad m => Pipe Text String m r
371 unpack = for cat (\t -> yield (T.unpack t))
372 {-# INLINEABLE unpack #-}
373
374 {-# RULES "p >-> unpack" forall p .
375 p >-> unpack = for p (\txt -> yield (T.unpack txt))
376 #-}
377
378 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
379 -- here acting as 'Text' pipes, rather as they would on a lazy text
380 toCaseFold :: Monad m => Pipe Text Text m ()
381 toCaseFold = P.map T.toCaseFold
382 {-# INLINEABLE toCaseFold #-}
383
384 {-# RULES "p >-> toCaseFold" forall p .
385 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
386 #-}
387
388
389 -- | lowercase incoming 'Text'
390 toLower :: Monad m => Pipe Text Text m ()
391 toLower = P.map T.toLower
392 {-# INLINEABLE toLower #-}
393
394 {-# RULES "p >-> toLower" forall p .
395 p >-> toLower = for p (\txt -> yield (T.toLower txt))
396 #-}
397
398 -- | uppercase incoming 'Text'
399 toUpper :: Monad m => Pipe Text Text m ()
400 toUpper = P.map T.toUpper
401 {-# INLINEABLE toUpper #-}
402
403 {-# RULES "p >-> toUpper" forall p .
404 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
405 #-}
406
407 -- | Remove leading white space from an incoming succession of 'Text's
408 stripStart :: Monad m => Pipe Text Text m r
409 stripStart = do
410 chunk <- await
411 let text = T.stripStart chunk
412 if T.null text
413 then stripStart
414 else do yield text
415 cat
416 {-# INLINEABLE stripStart #-}
417
418 -- | @(take n)@ only allows @n@ individual characters to pass;
419 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
420 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
421 take n0 = go n0 where
422 go n
423 | n <= 0 = return ()
424 | otherwise = do
425 txt <- await
426 let len = fromIntegral (T.length txt)
427 if (len > n)
428 then yield (T.take (fromIntegral n) txt)
429 else do
430 yield txt
431 go (n - len)
432 {-# INLINABLE take #-}
433
434 -- | @(drop n)@ drops the first @n@ characters
435 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
436 drop n0 = go n0 where
437 go n
438 | n <= 0 = cat
439 | otherwise = do
440 txt <- await
441 let len = fromIntegral (T.length txt)
442 if (len >= n)
443 then do
444 yield (T.drop (fromIntegral n) txt)
445 cat
446 else go (n - len)
447 {-# INLINABLE drop #-}
448
449 -- | Take characters until they fail the predicate
450 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
451 takeWhile predicate = go
452 where
453 go = do
454 txt <- await
455 let (prefix, suffix) = T.span predicate txt
456 if (T.null suffix)
457 then do
458 yield txt
459 go
460 else yield prefix
461 {-# INLINABLE takeWhile #-}
462
463 -- | Drop characters until they fail the predicate
464 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
465 dropWhile predicate = go where
466 go = do
467 txt <- await
468 case T.findIndex (not . predicate) txt of
469 Nothing -> go
470 Just i -> do
471 yield (T.drop i txt)
472 cat
473 {-# INLINABLE dropWhile #-}
474
475 -- | Only allows 'Char's to pass if they satisfy the predicate
476 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
477 filter predicate = P.map (T.filter predicate)
478 {-# INLINABLE filter #-}
479
480 {-# RULES "p >-> filter q" forall p q .
481 p >-> filter q = for p (\txt -> yield (T.filter q txt))
482 #-}
483
484 -- | Strict left scan over the characters
485 scan
486 :: (Monad m)
487 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
488 scan step begin = do
489 yield (T.singleton begin)
490 go begin
491 where
492 go c = do
493 txt <- await
494 let txt' = T.scanl step c txt
495 c' = T.last txt'
496 yield (T.tail txt')
497 go c'
498 {-# INLINABLE scan #-}
499
500 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
501 'TL.Text'
502 -}
503 toLazy :: Producer Text Identity () -> TL.Text
504 toLazy = TL.fromChunks . P.toList
505 {-# INLINABLE toLazy #-}
506
507 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
508 'TL.Text'
509
510 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
511 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
512 immediately as they are generated instead of loading them all into memory.
513 -}
514 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
515 toLazyM = liftM TL.fromChunks . P.toListM
516 {-# INLINABLE toLazyM #-}
517
518 -- | Reduce the text stream using a strict left fold over characters
519 foldChars
520 :: Monad m
521 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
522 foldChars step begin done = P.fold (T.foldl' step) begin done
523 {-# INLINABLE foldChars #-}
524
525 -- | Retrieve the first 'Char'
526 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
527 head = go
528 where
529 go p = do
530 x <- nextChar p
531 case x of
532 Left _ -> return Nothing
533 Right (c, _) -> return (Just c)
534 {-# INLINABLE head #-}
535
536 -- | Retrieve the last 'Char'
537 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
538 last = go Nothing
539 where
540 go r p = do
541 x <- next p
542 case x of
543 Left () -> return r
544 Right (txt, p') ->
545 if (T.null txt)
546 then go r p'
547 else go (Just $ T.last txt) p'
548 {-# INLINABLE last #-}
549
550 -- | Determine if the stream is empty
551 null :: (Monad m) => Producer Text m () -> m Bool
552 null = P.all T.null
553 {-# INLINABLE null #-}
554
555 -- | Count the number of characters in the stream
556 length :: (Monad m, Num n) => Producer Text m () -> m n
557 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
558 {-# INLINABLE length #-}
559
560 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
561 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
562 any predicate = P.any (T.any predicate)
563 {-# INLINABLE any #-}
564
565 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
566 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
567 all predicate = P.all (T.all predicate)
568 {-# INLINABLE all #-}
569
570 -- | Return the maximum 'Char' within a text stream
571 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
572 maximum = P.fold step Nothing id
573 where
574 step mc txt =
575 if (T.null txt)
576 then mc
577 else Just $ case mc of
578 Nothing -> T.maximum txt
579 Just c -> max c (T.maximum txt)
580 {-# INLINABLE maximum #-}
581
582 -- | Return the minimum 'Char' within a text stream (surely very useful!)
583 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
584 minimum = P.fold step Nothing id
585 where
586 step mc txt =
587 if (T.null txt)
588 then mc
589 else case mc of
590 Nothing -> Just (T.minimum txt)
591 Just c -> Just (min c (T.minimum txt))
592 {-# INLINABLE minimum #-}
593
594
595 -- | Find the first element in the stream that matches the predicate
596 find
597 :: (Monad m)
598 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
599 find predicate p = head (p >-> filter predicate)
600 {-# INLINABLE find #-}
601
602 -- | Index into a text stream
603 index
604 :: (Monad m, Integral a)
605 => a-> Producer Text m () -> m (Maybe Char)
606 index n p = head (p >-> drop n)
607 {-# INLINABLE index #-}
608
609
610 -- | Store a tally of how many segments match the given 'Text'
611 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
612 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
613 {-# INLINABLE count #-}
614
615
616 {-| Consume the first character from a stream of 'Text'
617
618 'next' either fails with a 'Left' if the 'Producer' has no more characters or
619 succeeds with a 'Right' providing the next character and the remainder of the
620 'Producer'.
621 -}
622 nextChar
623 :: (Monad m)
624 => Producer Text m r
625 -> m (Either r (Char, Producer Text m r))
626 nextChar = go
627 where
628 go p = do
629 x <- next p
630 case x of
631 Left r -> return (Left r)
632 Right (txt, p') -> case (T.uncons txt) of
633 Nothing -> go p'
634 Just (c, txt') -> return (Right (c, yield txt' >> p'))
635 {-# INLINABLE nextChar #-}
636
637 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
638 'Producer' is empty
639 -}
640 drawChar :: (Monad m) => Parser Text m (Maybe Char)
641 drawChar = do
642 x <- PP.draw
643 case x of
644 Nothing -> return Nothing
645 Just txt -> case (T.uncons txt) of
646 Nothing -> drawChar
647 Just (c, txt') -> do
648 PP.unDraw txt'
649 return (Just c)
650 {-# INLINABLE drawChar #-}
651
652 -- | Push back a 'Char' onto the underlying 'Producer'
653 unDrawChar :: (Monad m) => Char -> Parser Text m ()
654 unDrawChar c = modify (yield (T.singleton c) >>)
655 {-# INLINABLE unDrawChar #-}
656
657 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
658 push the 'Char' back
659
660 > peekChar = do
661 > x <- drawChar
662 > case x of
663 > Left _ -> return ()
664 > Right c -> unDrawChar c
665 > return x
666 -}
667 peekChar :: (Monad m) => Parser Text m (Maybe Char)
668 peekChar = do
669 x <- drawChar
670 case x of
671 Nothing -> return ()
672 Just c -> unDrawChar c
673 return x
674 {-# INLINABLE peekChar #-}
675
676 {-| Check if the underlying 'Producer' has no more characters
677
678 Note that this will skip over empty 'Text' chunks, unlike
679 'PP.isEndOfInput' from @pipes-parse@, which would consider
680 an empty 'Text' a valid bit of input.
681
682 > isEndOfChars = liftM isLeft peekChar
683 -}
684 isEndOfChars :: (Monad m) => Parser Text m Bool
685 isEndOfChars = do
686 x <- peekChar
687 return (case x of
688 Nothing -> True
689 Just _-> False )
690 {-# INLINABLE isEndOfChars #-}
691
692
693 {- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
694 stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
695 -}
696
697 decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
698 (Producer Text m (Producer ByteString m r))
699 decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
700 (k (go B.empty PI.streamDecodeUtf8 p0)) where
701 go !carry dec0 p = do
702 x <- lift (next p)
703 case x of Left r -> return (if B.null carry
704 then return r -- all bytestring input was consumed
705 else (do yield carry -- a potentially valid fragment remains
706 return r))
707
708 Right (chunk, p') -> case dec0 chunk of
709 PI.Some text carry2 dec -> do yield text
710 go carry2 dec p'
711 PI.Other text bs -> do yield text
712 return (do yield bs -- an invalid blob remains
713 p')
714 {-# INLINABLE decodeUtf8 #-}
715
716
717 -- | Splits a 'Producer' after the given number of characters
718 splitAt
719 :: (Monad m, Integral n)
720 => n
721 -> Lens' (Producer Text m r)
722 (Producer Text m (Producer Text m r))
723 splitAt n0 k p0 = fmap join (k (go n0 p0))
724 where
725 go 0 p = return p
726 go n p = do
727 x <- lift (next p)
728 case x of
729 Left r -> return (return r)
730 Right (txt, p') -> do
731 let len = fromIntegral (T.length txt)
732 if (len <= n)
733 then do
734 yield txt
735 go (n - len) p'
736 else do
737 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
738 yield prefix
739 return (yield suffix >> p')
740 {-# INLINABLE splitAt #-}
741
742
743 {-| Split a text stream in two, where the first text stream is the longest
744 consecutive group of text that satisfy the predicate
745 -}
746 span
747 :: (Monad m)
748 => (Char -> Bool)
749 -> Lens' (Producer Text m r)
750 (Producer Text m (Producer Text m r))
751 span predicate k p0 = fmap join (k (go p0))
752 where
753 go p = do
754 x <- lift (next p)
755 case x of
756 Left r -> return (return r)
757 Right (txt, p') -> do
758 let (prefix, suffix) = T.span predicate txt
759 if (T.null suffix)
760 then do
761 yield txt
762 go p'
763 else do
764 yield prefix
765 return (yield suffix >> p')
766 {-# INLINABLE span #-}
767
768 {-| Split a text stream in two, where the first text stream is the longest
769 consecutive group of characters that don't satisfy the predicate
770 -}
771 break
772 :: (Monad m)
773 => (Char -> Bool)
774 -> Lens' (Producer Text m r)
775 (Producer Text m (Producer Text m r))
776 break predicate = span (not . predicate)
777 {-# INLINABLE break #-}
778
779 {-| Improper lens that splits after the first group of equivalent Chars, as
780 defined by the given equivalence relation
781 -}
782 groupBy
783 :: (Monad m)
784 => (Char -> Char -> Bool)
785 -> Lens' (Producer Text m r)
786 (Producer Text m (Producer Text m r))
787 groupBy equals k p0 = fmap join (k ((go p0))) where
788 go p = do
789 x <- lift (next p)
790 case x of
791 Left r -> return (return r)
792 Right (txt, p') -> case T.uncons txt of
793 Nothing -> go p'
794 Just (c, _) -> (yield txt >> p') ^. span (equals c)
795 {-# INLINABLE groupBy #-}
796
797 -- | Improper lens that splits after the first succession of identical 'Char' s
798 group :: Monad m
799 => Lens' (Producer Text m r)
800 (Producer Text m (Producer Text m r))
801 group = groupBy (==)
802 {-# INLINABLE group #-}
803
804 {-| Improper lens that splits a 'Producer' after the first word
805
806 Unlike 'words', this does not drop leading whitespace
807 -}
808 word :: (Monad m)
809 => Lens' (Producer Text m r)
810 (Producer Text m (Producer Text m r))
811 word k p0 = fmap join (k (to p0))
812 where
813 to p = do
814 p' <- p^.span isSpace
815 p'^.break isSpace
816 {-# INLINABLE word #-}
817
818
819 line :: (Monad m)
820 => Lens' (Producer Text m r)
821 (Producer Text m (Producer Text m r))
822 line = break (== '\n')
823
824 {-# INLINABLE line #-}
825
826
827 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
828 intersperse
829 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
830 intersperse c = go0
831 where
832 go0 p = do
833 x <- lift (next p)
834 case x of
835 Left r -> return r
836 Right (txt, p') -> do
837 yield (T.intersperse c txt)
838 go1 p'
839 go1 p = do
840 x <- lift (next p)
841 case x of
842 Left r -> return r
843 Right (txt, p') -> do
844 yield (T.singleton c)
845 yield (T.intersperse c txt)
846 go1 p'
847 {-# INLINABLE intersperse #-}
848
849
850
851 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
852 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
853 packChars = Data.Profunctor.dimap to (fmap from)
854 where
855 -- to :: Monad m => Producer Char m x -> Producer Text m x
856 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
857
858 step diffAs c = diffAs . (c:)
859
860 done diffAs = T.pack (diffAs [])
861
862 -- from :: Monad m => Producer Text m x -> Producer Char m x
863 from p = for p (each . T.unpack)
864 {-# INLINABLE packChars #-}
865
866
867 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
868 chunksOf
869 :: (Monad m, Integral n)
870 => n -> Lens' (Producer Text m r)
871 (FreeT (Producer Text m) m r)
872 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
873 where
874 go p = do
875 x <- next p
876 return $ case x of
877 Left r -> Pure r
878 Right (txt, p') -> Free $ do
879 p'' <- (yield txt >> p') ^. splitAt n
880 return $ FreeT (go p'')
881 {-# INLINABLE chunksOf #-}
882
883
884 {-| Split a text stream into sub-streams delimited by characters that satisfy the
885 predicate
886 -}
887 splitsWith
888 :: (Monad m)
889 => (Char -> Bool)
890 -> Producer Text m r
891 -> FreeT (Producer Text m) m r
892 splitsWith predicate p0 = FreeT (go0 p0)
893 where
894 go0 p = do
895 x <- next p
896 case x of
897 Left r -> return (Pure r)
898 Right (txt, p') ->
899 if (T.null txt)
900 then go0 p'
901 else return $ Free $ do
902 p'' <- (yield txt >> p') ^. span (not . predicate)
903 return $ FreeT (go1 p'')
904 go1 p = do
905 x <- nextChar p
906 return $ case x of
907 Left r -> Pure r
908 Right (_, p') -> Free $ do
909 p'' <- p' ^. span (not . predicate)
910 return $ FreeT (go1 p'')
911 {-# INLINABLE splitsWith #-}
912
913 -- | Split a text stream using the given 'Char' as the delimiter
914 splits :: (Monad m)
915 => Char
916 -> Lens' (Producer Text m r)
917 (FreeT (Producer Text m) m r)
918 splits c k p =
919 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
920 {-# INLINABLE splits #-}
921
922 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
923 given equivalence relation
924 -}
925 groupsBy
926 :: Monad m
927 => (Char -> Char -> Bool)
928 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
929 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
930 go p = do x <- next p
931 case x of Left r -> return (Pure r)
932 Right (bs, p') -> case T.uncons bs of
933 Nothing -> go p'
934 Just (c, _) -> do return $ Free $ do
935 p'' <- (yield bs >> p')^.span (equals c)
936 return $ FreeT (go p'')
937 {-# INLINABLE groupsBy #-}
938
939
940 -- | Like 'groupsBy', where the equality predicate is ('==')
941 groups
942 :: Monad m
943 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
944 groups = groupsBy (==)
945 {-# INLINABLE groups #-}
946
947
948
949 {-| Split a text stream into 'FreeT'-delimited lines
950 -}
951 lines
952 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
953 lines = Data.Profunctor.dimap _lines (fmap _unlines)
954 where
955 _lines p0 = FreeT (go0 p0)
956 where
957 go0 p = do
958 x <- next p
959 case x of
960 Left r -> return (Pure r)
961 Right (txt, p') ->
962 if (T.null txt)
963 then go0 p'
964 else return $ Free $ go1 (yield txt >> p')
965 go1 p = do
966 p' <- p ^. break ('\n' ==)
967 return $ FreeT $ do
968 x <- nextChar p'
969 case x of
970 Left r -> return $ Pure r
971 Right (_, p'') -> go0 p''
972 -- _unlines
973 -- :: Monad m
974 -- => FreeT (Producer Text m) m x -> Producer Text m x
975 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
976
977
978 {-# INLINABLE lines #-}
979
980
981 -- | Split a text stream into 'FreeT'-delimited words
982 words
983 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
984 words = Data.Profunctor.dimap go (fmap _unwords)
985 where
986 go p = FreeT $ do
987 x <- next (p >-> dropWhile isSpace)
988 return $ case x of
989 Left r -> Pure r
990 Right (bs, p') -> Free $ do
991 p'' <- (yield bs >> p') ^. break isSpace
992 return (go p'')
993 _unwords = PG.intercalates (yield $ T.singleton ' ')
994
995 {-# INLINABLE words #-}
996
997
998 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
999 interspersing a text stream in between them
1000 -}
1001 intercalate
1002 :: (Monad m)
1003 => Producer Text m ()
1004 -> FreeT (Producer Text m) m r
1005 -> Producer Text m r
1006 intercalate p0 = go0
1007 where
1008 go0 f = do
1009 x <- lift (runFreeT f)
1010 case x of
1011 Pure r -> return r
1012 Free p -> do
1013 f' <- p
1014 go1 f'
1015 go1 f = do
1016 x <- lift (runFreeT f)
1017 case x of
1018 Pure r -> return r
1019 Free p -> do
1020 p0
1021 f' <- p
1022 go1 f'
1023 {-# INLINABLE intercalate #-}
1024
1025 {-| Join 'FreeT'-delimited lines into a text stream
1026 -}
1027 unlines
1028 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1029 unlines = go
1030 where
1031 go f = do
1032 x <- lift (runFreeT f)
1033 case x of
1034 Pure r -> return r
1035 Free p -> do
1036 f' <- p
1037 yield $ T.singleton '\n'
1038 go f'
1039 {-# INLINABLE unlines #-}
1040
1041 {-| Join 'FreeT'-delimited words into a text stream
1042 -}
1043 unwords
1044 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1045 unwords = intercalate (yield $ T.singleton ' ')
1046 {-# INLINABLE unwords #-}
1047
1048 {- $parse
1049 The following parsing utilities are single-character analogs of the ones found
1050 @pipes-parse@.
1051 -}
1052
1053 {- $reexports
1054
1055 @Data.Text@ re-exports the 'Text' type.
1056
1057 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1058 -}
1059
1060 {- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
1061 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
1062 individual 'Codec' definitions follow the enumerator and conduit libraries.
1063
1064 Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
1065 to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
1066 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
1067 better implementation.
1068
1069 -}
1070 codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1071 codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1072 (k (decoder (dec B.empty) p0) ) where
1073 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1074 decoder !d p0 = case d of
1075 PI.Other txt bad -> do yield txt
1076 return (do yield bad
1077 p0)
1078 PI.Some txt extra dec -> do yield txt
1079 x <- lift (next p0)
1080 case x of Left r -> return (do yield extra
1081 return r)
1082 Right (chunk,p1) -> decoder (dec chunk) p1
1083
1084 {- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
1085 use the pipes 'Lens' style to work with them. Rather we simply define functions
1086 each way.
1087
1088 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
1089 returning the rest of the 'Text' at the first non-ascii 'Char'
1090 -}
1091 encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1092 encodeAscii = go where
1093 go p = do echunk <- lift (next p)
1094 case echunk of
1095 Left r -> return (return r)
1096 Right (chunk, p') ->
1097 if T.null chunk
1098 then go p'
1099 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1100 in do yield (B8.pack (T.unpack safe))
1101 if T.null unsafe
1102 then go p'
1103 else return $ do yield unsafe
1104 p'
1105 {- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
1106 returning the rest of the 'Text' upon hitting any non-latin 'Char'
1107 -}
1108 encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1109 encodeIso8859_1 = go where
1110 go p = do etxt <- lift (next p)
1111 case etxt of
1112 Left r -> return (return r)
1113 Right (txt, p') ->
1114 if T.null txt
1115 then go p'
1116 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1117 in do yield (B8.pack (T.unpack safe))
1118 if T.null unsafe
1119 then go p'
1120 else return $ do yield unsafe
1121 p'
1122
1123 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1124 unused 'ByteString' upon hitting an un-ascii byte.
1125 -}
1126 decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1127 decodeAscii = go where
1128 go p = do echunk <- lift (next p)
1129 case echunk of
1130 Left r -> return (return r)
1131 Right (chunk, p') ->
1132 if B.null chunk
1133 then go p'
1134 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1135 in do yield (T.pack (B8.unpack safe))
1136 if B.null unsafe
1137 then go p'
1138 else return $ do yield unsafe
1139 p'
1140
1141 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1142 unused 'ByteString' upon hitting the rare un-latinizable byte.
1143 -}
1144 decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1145 decodeIso8859_1 = go where
1146 go p = do echunk <- lift (next p)
1147 case echunk of
1148 Left r -> return (return r)
1149 Right (chunk, p') ->
1150 if B.null chunk
1151 then go p'
1152 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1153 in do yield (T.pack (B8.unpack safe))
1154 if B.null unsafe
1155 then go p'
1156 else return $ do yield unsafe
1157 p'
1158
1159
1160
1161
1162