]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
eb72be25905d0552ad0152df3cdd84f73338730c
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
3 {-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
8
9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
10 example, the following program copies a document from one file to another:
11
12 > import Pipes
13 > import qualified Data.Text.Pipes as Text
14 > import System.IO
15 >
16 > main =
17 > withFile "inFile.txt" ReadMode $ \hIn ->
18 > withFile "outFile.txt" WriteMode $ \hOut ->
19 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
20
21 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
22
23 > import Pipes
24 > import qualified Data.Text.Pipes as Text
25 > import Pipes.Safe
26 >
27 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
28
29 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
30 and 'stdout' pipes, as with the following \"echo\" program:
31
32 > main = runEffect $ Text.stdin >-> Text.stdout
33
34 You can also translate pure lazy 'TL.Text's to and from pipes:
35
36 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
37
38 In addition, this module provides many functions equivalent to lazy
39 'Text' functions so that you can transform or fold text streams. For
40 example, to stream only the first three lines of 'stdin' to 'stdout' you
41 might write:
42
43 > import Pipes
44 > import qualified Pipes.Text as Text
45 > import qualified Pipes.Parse as Parse
46 >
47 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
48 > where
49 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
50
51 The above program will never bring more than one chunk of text (~ 32 KB) into
52 memory, no matter how long the lines are.
53
54 Note that functions in this library are designed to operate on streams that
55 are insensitive to text boundaries. This means that they may freely split
56 text into smaller texts, /discard empty texts/. However, apart from the
57 special case of 'concatMap', they will /never concatenate texts/ in order
58 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
59 -}
60
61 module Pipes.Text (
62 -- * Producers
63 fromLazy
64 , stdin
65 , fromHandle
66 , readFile
67
68 -- * Consumers
69 , stdout
70 , toHandle
71 , writeFile
72
73 -- * Pipes
74 , map
75 , concatMap
76 , take
77 , drop
78 , takeWhile
79 , dropWhile
80 , filter
81 , scan
82 , encodeUtf8
83 , pack
84 , unpack
85 , toCaseFold
86 , toLower
87 , toUpper
88 , stripStart
89
90 -- * Folds
91 , toLazy
92 , toLazyM
93 , foldChars
94 , head
95 , last
96 , null
97 , length
98 , any
99 , all
100 , maximum
101 , minimum
102 , find
103 , index
104 , count
105
106 -- * Primitive Character Parsers
107 -- $parse
108 , nextChar
109 , drawChar
110 , unDrawChar
111 , peekChar
112 , isEndOfChars
113
114 -- * Parsing Lenses
115 , splitAt
116 , span
117 , break
118 , groupBy
119 , group
120 , word
121 , line
122
123 -- * Decoding Lenses
124 , decodeUtf8
125 , codec
126
127 -- * Codecs
128 , utf8
129 , utf16_le
130 , utf16_be
131 , utf32_le
132 , utf32_be
133
134 -- * Other Decoding/Encoding Functions
135 , decodeIso8859_1
136 , decodeAscii
137 , encodeIso8859_1
138 , encodeAscii
139
140 -- * FreeT Splitters
141 , chunksOf
142 , splitsWith
143 , splits
144 -- , groupsBy
145 -- , groups
146 , lines
147 , words
148
149 -- * Transformations
150 , intersperse
151 , packChars
152
153 -- * Joiners
154 , intercalate
155 , unlines
156 , unwords
157
158 -- * Re-exports
159 -- $reexports
160 , Decoding(..)
161 , streamDecodeUtf8
162 , decodeSomeUtf8
163 , Codec(..)
164 , TextException(..)
165 , module Data.ByteString
166 , module Data.Text
167 , module Data.Profunctor
168 , module Data.Word
169 , module Pipes.Parse
170 , module Pipes.Group
171 ) where
172
173 import Control.Exception (throwIO, try)
174 import Control.Applicative ((<*))
175 import Control.Monad (liftM, unless, join)
176 import Control.Monad.Trans.State.Strict (StateT(..), modify)
177 import Data.Monoid ((<>))
178 import qualified Data.Text as T
179 import qualified Data.Text.IO as T
180 import qualified Data.Text.Encoding as TE
181 import qualified Data.Text.Encoding.Error as TE
182 import Data.Text (Text)
183 import qualified Data.Text.Lazy as TL
184 import qualified Data.Text.Lazy.IO as TL
185 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
186 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
187 import Data.ByteString (ByteString)
188 import qualified Data.ByteString as B
189 import qualified Data.ByteString.Char8 as B8
190 import Data.Char (ord, isSpace)
191 import Data.Functor.Constant (Constant(Constant, getConstant))
192 import Data.Functor.Identity (Identity)
193 import Data.Profunctor (Profunctor)
194 import qualified Data.Profunctor
195 import qualified Data.List as List
196 import Foreign.C.Error (Errno(Errno), ePIPE)
197 import qualified GHC.IO.Exception as G
198 import Pipes
199 import qualified Pipes.ByteString as PB
200 import qualified Pipes.Text.Internal as PI
201 import Pipes.Text.Internal
202 import Pipes.Core (respond, Server')
203 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
204 import qualified Pipes.Group as PG
205 import qualified Pipes.Parse as PP
206 import Pipes.Parse (Parser)
207 import qualified Pipes.Safe.Prelude as Safe
208 import qualified Pipes.Safe as Safe
209 import Pipes.Safe (MonadSafe(..), Base(..))
210 import qualified Pipes.Prelude as P
211 import qualified System.IO as IO
212 import Data.Char (isSpace)
213 import Data.Word (Word8)
214
215 import Prelude hiding (
216 all,
217 any,
218 break,
219 concat,
220 concatMap,
221 drop,
222 dropWhile,
223 elem,
224 filter,
225 head,
226 last,
227 lines,
228 length,
229 map,
230 maximum,
231 minimum,
232 notElem,
233 null,
234 readFile,
235 span,
236 splitAt,
237 take,
238 takeWhile,
239 unlines,
240 unwords,
241 words,
242 writeFile )
243
244 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
245 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
246 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
247 {-# INLINE fromLazy #-}
248
249 -- | Stream text from 'stdin'
250 stdin :: MonadIO m => Producer Text m ()
251 stdin = fromHandle IO.stdin
252 {-# INLINE stdin #-}
253
254 {-| Convert a 'IO.Handle' into a text stream using a text size
255 determined by the good sense of the text library; note that this
256 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
257 but uses the system encoding and has other `Data.Text.IO` features
258 -}
259
260 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
261 fromHandle h = go where
262 go = do txt <- liftIO (T.hGetChunk h)
263 unless (T.null txt) ( do yield txt
264 go )
265 {-# INLINABLE fromHandle#-}
266
267
268 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
269
270 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
271 MAIN = PUTSTRLN "HELLO WORLD"
272 -}
273
274 readFile :: MonadSafe m => FilePath -> Producer Text m ()
275 readFile file = Safe.withFile file IO.ReadMode fromHandle
276 {-# INLINE readFile #-}
277
278
279 {-| Stream text to 'stdout'
280
281 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
282
283 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
284 instead of @(source >-> stdout)@ .
285 -}
286 stdout :: MonadIO m => Consumer' Text m ()
287 stdout = go
288 where
289 go = do
290 txt <- await
291 x <- liftIO $ try (T.putStr txt)
292 case x of
293 Left (G.IOError { G.ioe_type = G.ResourceVanished
294 , G.ioe_errno = Just ioe })
295 | Errno ioe == ePIPE
296 -> return ()
297 Left e -> liftIO (throwIO e)
298 Right () -> go
299 {-# INLINABLE stdout #-}
300
301
302 {-| Convert a text stream into a 'Handle'
303
304 Note: again, for best performance, where possible use
305 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
306 -}
307 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
308 toHandle h = for cat (liftIO . T.hPutStr h)
309 {-# INLINABLE toHandle #-}
310
311 {-# RULES "p >-> toHandle h" forall p h .
312 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
313 #-}
314
315
316 -- | Stream text into a file. Uses @pipes-safe@.
317 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
318 writeFile file = Safe.withFile file IO.WriteMode toHandle
319 {-# INLINE writeFile #-}
320
321
322 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
323
324 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
325
326 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
327 a ^. lens = getConstant (lens Constant a)
328
329
330 -- | Apply a transformation to each 'Char' in the stream
331 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
332 map f = P.map (T.map f)
333 {-# INLINABLE map #-}
334
335 {-# RULES "p >-> map f" forall p f .
336 p >-> map f = for p (\txt -> yield (T.map f txt))
337 #-}
338
339 -- | Map a function over the characters of a text stream and concatenate the results
340 concatMap
341 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
342 concatMap f = P.map (T.concatMap f)
343 {-# INLINABLE concatMap #-}
344
345 {-# RULES "p >-> concatMap f" forall p f .
346 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
347 #-}
348
349 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
350 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
351 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
352 encodeUtf8 :: Monad m => Pipe Text ByteString m r
353 encodeUtf8 = P.map TE.encodeUtf8
354 {-# INLINEABLE encodeUtf8 #-}
355
356 {-# RULES "p >-> encodeUtf8" forall p .
357 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
358 #-}
359
360 -- | Transform a Pipe of 'String's into one of 'Text' chunks
361 pack :: Monad m => Pipe String Text m r
362 pack = P.map T.pack
363 {-# INLINEABLE pack #-}
364
365 {-# RULES "p >-> pack" forall p .
366 p >-> pack = for p (\txt -> yield (T.pack txt))
367 #-}
368
369 -- | Transform a Pipes of 'Text' chunks into one of 'String's
370 unpack :: Monad m => Pipe Text String m r
371 unpack = for cat (\t -> yield (T.unpack t))
372 {-# INLINEABLE unpack #-}
373
374 {-# RULES "p >-> unpack" forall p .
375 p >-> unpack = for p (\txt -> yield (T.unpack txt))
376 #-}
377
378 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
379 -- here acting as 'Text' pipes, rather as they would on a lazy text
380 toCaseFold :: Monad m => Pipe Text Text m ()
381 toCaseFold = P.map T.toCaseFold
382 {-# INLINEABLE toCaseFold #-}
383
384 {-# RULES "p >-> toCaseFold" forall p .
385 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
386 #-}
387
388
389 -- | lowercase incoming 'Text'
390 toLower :: Monad m => Pipe Text Text m ()
391 toLower = P.map T.toLower
392 {-# INLINEABLE toLower #-}
393
394 {-# RULES "p >-> toLower" forall p .
395 p >-> toLower = for p (\txt -> yield (T.toLower txt))
396 #-}
397
398 -- | uppercase incoming 'Text'
399 toUpper :: Monad m => Pipe Text Text m ()
400 toUpper = P.map T.toUpper
401 {-# INLINEABLE toUpper #-}
402
403 {-# RULES "p >-> toUpper" forall p .
404 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
405 #-}
406
407 -- | Remove leading white space from an incoming succession of 'Text's
408 stripStart :: Monad m => Pipe Text Text m r
409 stripStart = do
410 chunk <- await
411 let text = T.stripStart chunk
412 if T.null text
413 then stripStart
414 else do yield text
415 cat
416 {-# INLINEABLE stripStart #-}
417
418 -- | @(take n)@ only allows @n@ individual characters to pass;
419 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
420 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
421 take n0 = go n0 where
422 go n
423 | n <= 0 = return ()
424 | otherwise = do
425 txt <- await
426 let len = fromIntegral (T.length txt)
427 if (len > n)
428 then yield (T.take (fromIntegral n) txt)
429 else do
430 yield txt
431 go (n - len)
432 {-# INLINABLE take #-}
433
434 -- | @(drop n)@ drops the first @n@ characters
435 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
436 drop n0 = go n0 where
437 go n
438 | n <= 0 = cat
439 | otherwise = do
440 txt <- await
441 let len = fromIntegral (T.length txt)
442 if (len >= n)
443 then do
444 yield (T.drop (fromIntegral n) txt)
445 cat
446 else go (n - len)
447 {-# INLINABLE drop #-}
448
449 -- | Take characters until they fail the predicate
450 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
451 takeWhile predicate = go
452 where
453 go = do
454 txt <- await
455 let (prefix, suffix) = T.span predicate txt
456 if (T.null suffix)
457 then do
458 yield txt
459 go
460 else yield prefix
461 {-# INLINABLE takeWhile #-}
462
463 -- | Drop characters until they fail the predicate
464 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
465 dropWhile predicate = go where
466 go = do
467 txt <- await
468 case T.findIndex (not . predicate) txt of
469 Nothing -> go
470 Just i -> do
471 yield (T.drop i txt)
472 cat
473 {-# INLINABLE dropWhile #-}
474
475 -- | Only allows 'Char's to pass if they satisfy the predicate
476 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
477 filter predicate = P.map (T.filter predicate)
478 {-# INLINABLE filter #-}
479
480 {-# RULES "p >-> filter q" forall p q .
481 p >-> filter q = for p (\txt -> yield (T.filter q txt))
482 #-}
483
484 -- | Strict left scan over the characters
485 scan
486 :: (Monad m)
487 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
488 scan step begin = go begin
489 where
490 go c = do
491 txt <- await
492 let txt' = T.scanl step c txt
493 c' = T.last txt'
494 yield txt'
495 go c'
496 {-# INLINABLE scan #-}
497
498 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
499 'TL.Text'
500 -}
501 toLazy :: Producer Text Identity () -> TL.Text
502 toLazy = TL.fromChunks . P.toList
503 {-# INLINABLE toLazy #-}
504
505 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
506 'TL.Text'
507
508 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
509 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
510 immediately as they are generated instead of loading them all into memory.
511 -}
512 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
513 toLazyM = liftM TL.fromChunks . P.toListM
514 {-# INLINABLE toLazyM #-}
515
516 -- | Reduce the text stream using a strict left fold over characters
517 foldChars
518 :: Monad m
519 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
520 foldChars step begin done = P.fold (T.foldl' step) begin done
521 {-# INLINABLE foldChars #-}
522
523 -- | Retrieve the first 'Char'
524 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
525 head = go
526 where
527 go p = do
528 x <- nextChar p
529 case x of
530 Left _ -> return Nothing
531 Right (c, _) -> return (Just c)
532 {-# INLINABLE head #-}
533
534 -- | Retrieve the last 'Char'
535 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
536 last = go Nothing
537 where
538 go r p = do
539 x <- next p
540 case x of
541 Left () -> return r
542 Right (txt, p') ->
543 if (T.null txt)
544 then go r p'
545 else go (Just $ T.last txt) p'
546 {-# INLINABLE last #-}
547
548 -- | Determine if the stream is empty
549 null :: (Monad m) => Producer Text m () -> m Bool
550 null = P.all T.null
551 {-# INLINABLE null #-}
552
553 -- | Count the number of characters in the stream
554 length :: (Monad m, Num n) => Producer Text m () -> m n
555 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
556 {-# INLINABLE length #-}
557
558 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
559 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
560 any predicate = P.any (T.any predicate)
561 {-# INLINABLE any #-}
562
563 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
564 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
565 all predicate = P.all (T.all predicate)
566 {-# INLINABLE all #-}
567
568 -- | Return the maximum 'Char' within a text stream
569 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
570 maximum = P.fold step Nothing id
571 where
572 step mc txt =
573 if (T.null txt)
574 then mc
575 else Just $ case mc of
576 Nothing -> T.maximum txt
577 Just c -> max c (T.maximum txt)
578 {-# INLINABLE maximum #-}
579
580 -- | Return the minimum 'Char' within a text stream (surely very useful!)
581 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
582 minimum = P.fold step Nothing id
583 where
584 step mc txt =
585 if (T.null txt)
586 then mc
587 else case mc of
588 Nothing -> Just (T.minimum txt)
589 Just c -> Just (min c (T.minimum txt))
590 {-# INLINABLE minimum #-}
591
592
593 -- | Find the first element in the stream that matches the predicate
594 find
595 :: (Monad m)
596 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
597 find predicate p = head (p >-> filter predicate)
598 {-# INLINABLE find #-}
599
600 -- | Index into a text stream
601 index
602 :: (Monad m, Integral a)
603 => a-> Producer Text m () -> m (Maybe Char)
604 index n p = head (p >-> drop n)
605 {-# INLINABLE index #-}
606
607
608 -- | Store a tally of how many segments match the given 'Text'
609 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
610 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
611 {-# INLINABLE count #-}
612
613
614 {-| Consume the first character from a stream of 'Text'
615
616 'next' either fails with a 'Left' if the 'Producer' has no more characters or
617 succeeds with a 'Right' providing the next character and the remainder of the
618 'Producer'.
619 -}
620 nextChar
621 :: (Monad m)
622 => Producer Text m r
623 -> m (Either r (Char, Producer Text m r))
624 nextChar = go
625 where
626 go p = do
627 x <- next p
628 case x of
629 Left r -> return (Left r)
630 Right (txt, p') -> case (T.uncons txt) of
631 Nothing -> go p'
632 Just (c, txt') -> return (Right (c, yield txt' >> p'))
633 {-# INLINABLE nextChar #-}
634
635 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
636 'Producer' is empty
637 -}
638 drawChar :: (Monad m) => Parser Text m (Maybe Char)
639 drawChar = do
640 x <- PP.draw
641 case x of
642 Nothing -> return Nothing
643 Just txt -> case (T.uncons txt) of
644 Nothing -> drawChar
645 Just (c, txt') -> do
646 PP.unDraw txt'
647 return (Just c)
648 {-# INLINABLE drawChar #-}
649
650 -- | Push back a 'Char' onto the underlying 'Producer'
651 unDrawChar :: (Monad m) => Char -> Parser Text m ()
652 unDrawChar c = modify (yield (T.singleton c) >>)
653 {-# INLINABLE unDrawChar #-}
654
655 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
656 push the 'Char' back
657
658 > peekChar = do
659 > x <- drawChar
660 > case x of
661 > Left _ -> return ()
662 > Right c -> unDrawChar c
663 > return x
664 -}
665 peekChar :: (Monad m) => Parser Text m (Maybe Char)
666 peekChar = do
667 x <- drawChar
668 case x of
669 Nothing -> return ()
670 Just c -> unDrawChar c
671 return x
672 {-# INLINABLE peekChar #-}
673
674 {-| Check if the underlying 'Producer' has no more characters
675
676 Note that this will skip over empty 'Text' chunks, unlike
677 'PP.isEndOfInput' from @pipes-parse@, which would consider
678 an empty 'Text' a valid bit of input.
679
680 > isEndOfChars = liftM isLeft peekChar
681 -}
682 isEndOfChars :: (Monad m) => Parser Text m Bool
683 isEndOfChars = do
684 x <- peekChar
685 return (case x of
686 Nothing -> True
687 Just _-> False )
688 {-# INLINABLE isEndOfChars #-}
689
690
691 {- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
692 stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
693 -}
694
695 decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
696 (Producer Text m (Producer ByteString m r))
697 decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
698 (k (go B.empty PI.streamDecodeUtf8 p0)) where
699 go !carry dec0 p = do
700 x <- lift (next p)
701 case x of Left r -> return (if B.null carry
702 then return r -- all bytestring input was consumed
703 else (do yield carry -- a potentially valid fragment remains
704 return r))
705
706 Right (chunk, p') -> case dec0 chunk of
707 PI.Some text carry2 dec -> do yield text
708 go carry2 dec p'
709 PI.Other text bs -> do yield text
710 return (do yield bs -- an invalid blob remains
711 p')
712 {-# INLINABLE decodeUtf8 #-}
713
714
715 -- | Splits a 'Producer' after the given number of characters
716 splitAt
717 :: (Monad m, Integral n)
718 => n
719 -> Lens' (Producer Text m r)
720 (Producer Text m (Producer Text m r))
721 splitAt n0 k p0 = fmap join (k (go n0 p0))
722 where
723 go 0 p = return p
724 go n p = do
725 x <- lift (next p)
726 case x of
727 Left r -> return (return r)
728 Right (txt, p') -> do
729 let len = fromIntegral (T.length txt)
730 if (len <= n)
731 then do
732 yield txt
733 go (n - len) p'
734 else do
735 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
736 yield prefix
737 return (yield suffix >> p')
738 {-# INLINABLE splitAt #-}
739
740
741 {-| Split a text stream in two, where the first text stream is the longest
742 consecutive group of text that satisfy the predicate
743 -}
744 span
745 :: (Monad m)
746 => (Char -> Bool)
747 -> Lens' (Producer Text m r)
748 (Producer Text m (Producer Text m r))
749 span predicate k p0 = fmap join (k (go p0))
750 where
751 go p = do
752 x <- lift (next p)
753 case x of
754 Left r -> return (return r)
755 Right (txt, p') -> do
756 let (prefix, suffix) = T.span predicate txt
757 if (T.null suffix)
758 then do
759 yield txt
760 go p'
761 else do
762 yield prefix
763 return (yield suffix >> p')
764 {-# INLINABLE span #-}
765
766 {-| Split a text stream in two, where the first text stream is the longest
767 consecutive group of characters that don't satisfy the predicate
768 -}
769 break
770 :: (Monad m)
771 => (Char -> Bool)
772 -> Lens' (Producer Text m r)
773 (Producer Text m (Producer Text m r))
774 break predicate = span (not . predicate)
775 {-# INLINABLE break #-}
776
777 {-| Improper lens that splits after the first group of equivalent Chars, as
778 defined by the given equivalence relation
779 -}
780 groupBy
781 :: (Monad m)
782 => (Char -> Char -> Bool)
783 -> Lens' (Producer Text m r)
784 (Producer Text m (Producer Text m r))
785 groupBy equals k p0 = fmap join (k ((go p0))) where
786 go p = do
787 x <- lift (next p)
788 case x of
789 Left r -> return (return r)
790 Right (txt, p') -> case T.uncons txt of
791 Nothing -> go p'
792 Just (c, _) -> (yield txt >> p') ^. span (equals c)
793 {-# INLINABLE groupBy #-}
794
795 -- | Improper lens that splits after the first succession of identical 'Char' s
796 group :: Monad m
797 => Lens' (Producer Text m r)
798 (Producer Text m (Producer Text m r))
799 group = groupBy (==)
800 {-# INLINABLE group #-}
801
802 {-| Improper lens that splits a 'Producer' after the first word
803
804 Unlike 'words', this does not drop leading whitespace
805 -}
806 word :: (Monad m)
807 => Lens' (Producer Text m r)
808 (Producer Text m (Producer Text m r))
809 word k p0 = fmap join (k (to p0))
810 where
811 to p = do
812 p' <- p^.span isSpace
813 p'^.break isSpace
814 {-# INLINABLE word #-}
815
816
817 line :: (Monad m)
818 => Lens' (Producer Text m r)
819 (Producer Text m (Producer Text m r))
820 line = break (== '\n')
821
822 {-# INLINABLE line #-}
823
824
825 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
826 intersperse
827 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
828 intersperse c = go0
829 where
830 go0 p = do
831 x <- lift (next p)
832 case x of
833 Left r -> return r
834 Right (txt, p') -> do
835 yield (T.intersperse c txt)
836 go1 p'
837 go1 p = do
838 x <- lift (next p)
839 case x of
840 Left r -> return r
841 Right (txt, p') -> do
842 yield (T.singleton c)
843 yield (T.intersperse c txt)
844 go1 p'
845 {-# INLINABLE intersperse #-}
846
847
848
849 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
850 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
851 packChars = Data.Profunctor.dimap to (fmap from)
852 where
853 -- to :: Monad m => Producer Char m x -> Producer Text m x
854 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
855
856 step diffAs c = diffAs . (c:)
857
858 done diffAs = T.pack (diffAs [])
859
860 -- from :: Monad m => Producer Text m x -> Producer Char m x
861 from p = for p (each . T.unpack)
862 {-# INLINABLE packChars #-}
863
864
865 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
866 chunksOf
867 :: (Monad m, Integral n)
868 => n -> Lens' (Producer Text m r)
869 (FreeT (Producer Text m) m r)
870 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
871 where
872 go p = do
873 x <- next p
874 return $ case x of
875 Left r -> Pure r
876 Right (txt, p') -> Free $ do
877 p'' <- (yield txt >> p') ^. splitAt n
878 return $ FreeT (go p'')
879 {-# INLINABLE chunksOf #-}
880
881
882 {-| Split a text stream into sub-streams delimited by characters that satisfy the
883 predicate
884 -}
885 splitsWith
886 :: (Monad m)
887 => (Char -> Bool)
888 -> Producer Text m r
889 -> FreeT (Producer Text m) m r
890 splitsWith predicate p0 = FreeT (go0 p0)
891 where
892 go0 p = do
893 x <- next p
894 case x of
895 Left r -> return (Pure r)
896 Right (txt, p') ->
897 if (T.null txt)
898 then go0 p'
899 else return $ Free $ do
900 p'' <- (yield txt >> p') ^. span (not . predicate)
901 return $ FreeT (go1 p'')
902 go1 p = do
903 x <- nextChar p
904 return $ case x of
905 Left r -> Pure r
906 Right (_, p') -> Free $ do
907 p'' <- p' ^. span (not . predicate)
908 return $ FreeT (go1 p'')
909 {-# INLINABLE splitsWith #-}
910
911 -- | Split a text stream using the given 'Char' as the delimiter
912 splits :: (Monad m)
913 => Char
914 -> Lens' (Producer Text m r)
915 (FreeT (Producer Text m) m r)
916 splits c k p =
917 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
918 {-# INLINABLE splits #-}
919
920 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
921 given equivalence relation
922 -}
923 groupsBy
924 :: Monad m
925 => (Char -> Char -> Bool)
926 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
927 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
928 go p = do x <- next p
929 case x of Left r -> return (Pure r)
930 Right (bs, p') -> case T.uncons bs of
931 Nothing -> go p'
932 Just (c, _) -> do return $ Free $ do
933 p'' <- (yield bs >> p')^.span (equals c)
934 return $ FreeT (go p'')
935 {-# INLINABLE groupsBy #-}
936
937
938 -- | Like 'groupsBy', where the equality predicate is ('==')
939 groups
940 :: Monad m
941 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
942 groups = groupsBy (==)
943 {-# INLINABLE groups #-}
944
945
946
947 {-| Split a text stream into 'FreeT'-delimited lines
948 -}
949 lines
950 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
951 lines = Data.Profunctor.dimap _lines (fmap _unlines)
952 where
953 _lines p0 = FreeT (go0 p0)
954 where
955 go0 p = do
956 x <- next p
957 case x of
958 Left r -> return (Pure r)
959 Right (txt, p') ->
960 if (T.null txt)
961 then go0 p'
962 else return $ Free $ go1 (yield txt >> p')
963 go1 p = do
964 p' <- p ^. break ('\n' ==)
965 return $ FreeT $ do
966 x <- nextChar p'
967 case x of
968 Left r -> return $ Pure r
969 Right (_, p'') -> go0 p''
970 -- _unlines
971 -- :: Monad m
972 -- => FreeT (Producer Text m) m x -> Producer Text m x
973 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
974
975
976 {-# INLINABLE lines #-}
977
978
979 -- | Split a text stream into 'FreeT'-delimited words
980 words
981 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
982 words = Data.Profunctor.dimap go (fmap _unwords)
983 where
984 go p = FreeT $ do
985 x <- next (p >-> dropWhile isSpace)
986 return $ case x of
987 Left r -> Pure r
988 Right (bs, p') -> Free $ do
989 p'' <- (yield bs >> p') ^. break isSpace
990 return (go p'')
991 _unwords = PG.intercalates (yield $ T.singleton ' ')
992
993 {-# INLINABLE words #-}
994
995
996 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
997 interspersing a text stream in between them
998 -}
999 intercalate
1000 :: (Monad m)
1001 => Producer Text m ()
1002 -> FreeT (Producer Text m) m r
1003 -> Producer Text m r
1004 intercalate p0 = go0
1005 where
1006 go0 f = do
1007 x <- lift (runFreeT f)
1008 case x of
1009 Pure r -> return r
1010 Free p -> do
1011 f' <- p
1012 go1 f'
1013 go1 f = do
1014 x <- lift (runFreeT f)
1015 case x of
1016 Pure r -> return r
1017 Free p -> do
1018 p0
1019 f' <- p
1020 go1 f'
1021 {-# INLINABLE intercalate #-}
1022
1023 {-| Join 'FreeT'-delimited lines into a text stream
1024 -}
1025 unlines
1026 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1027 unlines = go
1028 where
1029 go f = do
1030 x <- lift (runFreeT f)
1031 case x of
1032 Pure r -> return r
1033 Free p -> do
1034 f' <- p
1035 yield $ T.singleton '\n'
1036 go f'
1037 {-# INLINABLE unlines #-}
1038
1039 {-| Join 'FreeT'-delimited words into a text stream
1040 -}
1041 unwords
1042 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1043 unwords = intercalate (yield $ T.singleton ' ')
1044 {-# INLINABLE unwords #-}
1045
1046 {- $parse
1047 The following parsing utilities are single-character analogs of the ones found
1048 @pipes-parse@.
1049 -}
1050
1051 {- $reexports
1052
1053 @Data.Text@ re-exports the 'Text' type.
1054
1055 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1056 -}
1057
1058 {- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
1059 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
1060 individual 'Codec' definitions follow the enumerator and conduit libraries.
1061
1062 Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
1063 to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
1064 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
1065 better implementation.
1066
1067 -}
1068 codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1069 codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1070 (k (decoder (dec B.empty) p0) ) where
1071 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1072 decoder !d p0 = case d of
1073 PI.Other txt bad -> do yield txt
1074 return (do yield bad
1075 p0)
1076 PI.Some txt extra dec -> do yield txt
1077 x <- lift (next p0)
1078 case x of Left r -> return (do yield extra
1079 return r)
1080 Right (chunk,p1) -> decoder (dec chunk) p1
1081
1082 {- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
1083 use the pipes 'Lens' style to work with them. Rather we simply define functions
1084 each way.
1085
1086 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
1087 returning the rest of the 'Text' at the first non-ascii 'Char'
1088 -}
1089 encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1090 encodeAscii = go where
1091 go p = do echunk <- lift (next p)
1092 case echunk of
1093 Left r -> return (return r)
1094 Right (chunk, p') ->
1095 if T.null chunk
1096 then go p'
1097 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1098 in do yield (B8.pack (T.unpack safe))
1099 if T.null unsafe
1100 then go p'
1101 else return $ do yield unsafe
1102 p'
1103 {- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
1104 returning the rest of the 'Text' upon hitting any non-latin 'Char'
1105 -}
1106 encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1107 encodeIso8859_1 = go where
1108 go p = do etxt <- lift (next p)
1109 case etxt of
1110 Left r -> return (return r)
1111 Right (txt, p') ->
1112 if T.null txt
1113 then go p'
1114 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1115 in do yield (B8.pack (T.unpack safe))
1116 if T.null unsafe
1117 then go p'
1118 else return $ do yield unsafe
1119 p'
1120
1121 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1122 unused 'ByteString' upon hitting an un-ascii byte.
1123 -}
1124 decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1125 decodeAscii = go where
1126 go p = do echunk <- lift (next p)
1127 case echunk of
1128 Left r -> return (return r)
1129 Right (chunk, p') ->
1130 if B.null chunk
1131 then go p'
1132 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1133 in do yield (T.pack (B8.unpack safe))
1134 if B.null unsafe
1135 then go p'
1136 else return $ do yield unsafe
1137 p'
1138
1139 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1140 unused 'ByteString' upon hitting the rare un-latinizable byte.
1141 -}
1142 decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1143 decodeIso8859_1 = go where
1144 go p = do echunk <- lift (next p)
1145 case echunk of
1146 Left r -> return (return r)
1147 Right (chunk, p') ->
1148 if B.null chunk
1149 then go p'
1150 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1151 in do yield (T.pack (B8.unpack safe))
1152 if B.null unsafe
1153 then go p'
1154 else return $ do yield unsafe
1155 p'
1156
1157
1158
1159
1160