]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
bbf200f9e2ec8e72b8439744a1c0211fd96529ca
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
2 #if __GLASGOW_HASKELL__ >= 702
3 {-# LANGUAGE Trustworthy #-}
4 #endif
5 {-| This module provides @pipes@ utilities for \"text streams\", which are
6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
10
11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
12 example, the following program copies a document from one file to another:
13
14 > import Pipes
15 > import qualified Data.Text.Pipes as Text
16 > import System.IO
17 >
18 > main =
19 > withFile "inFile.txt" ReadMode $ \hIn ->
20 > withFile "outFile.txt" WriteMode $ \hOut ->
21 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
24
25 > import Pipes
26 > import qualified Data.Text.Pipes as Text
27 > import Pipes.Safe
28 >
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
30
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' proxies, as with the following \"echo\" program:
33
34 > main = runEffect $ Text.stdin >-> Text.stdout
35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
39
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 might write:
44
45 > import Pipes
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
48 >
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
50 > where
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
52
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
61 -}
62
63 module Pipes.Text (
64 -- * Producers
65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
69
70 -- * Consumers
71 , stdout
72 , toHandle
73 , writeFile
74
75 -- * Pipes
76 , map
77 , concatMap
78 , take
79 , drop
80 , takeWhile
81 , dropWhile
82 , filter
83 , scan
84 , encodeUtf8
85 , pack
86 , unpack
87 , toCaseFold
88 , toLower
89 , toUpper
90 , stripStart
91
92 -- * Folds
93 , toLazy
94 , toLazyM
95 , foldChars
96 , head
97 , last
98 , null
99 , length
100 , any
101 , all
102 , maximum
103 , minimum
104 , find
105 , index
106 , count
107
108 -- * Primitive Character Parsers
109 -- $parse
110 , nextChar
111 , drawChar
112 , unDrawChar
113 , peekChar
114 , isEndOfChars
115
116 -- * Parsing Lenses
117 , splitAt
118 , span
119 , break
120 , groupBy
121 , group
122 , word
123 , line
124
125 -- * Decoding Lenses
126 , decodeUtf8
127 , codec
128
129 -- * Codecs
130 , utf8
131 , utf16_le
132 , utf16_be
133 , utf32_le
134 , utf32_be
135
136 -- * Other Decoding/Encoding Functions
137 , decodeIso8859_1
138 , decodeAscii
139 , encodeIso8859_1
140 , encodeAscii
141
142 -- * FreeT Splitters
143 , chunksOf
144 , splitsWith
145 , splits
146 -- , groupsBy
147 -- , groups
148 , lines
149 , words
150
151 -- * Transformations
152 , intersperse
153 , packChars
154
155 -- * Joiners
156 , intercalate
157 , unlines
158 , unwords
159
160 -- * Re-exports
161 -- $reexports
162 , module Data.ByteString
163 , module Data.Text
164 , module Data.Profunctor
165 , module Data.Word
166 , module Pipes.Parse
167 , module Pipes.Group
168 , module Pipes.Text.Internal
169 ) where
170
171 import Control.Exception (throwIO, try)
172 import Control.Applicative ((<*))
173 import Control.Monad (liftM, unless, join)
174 import Control.Monad.Trans.State.Strict (StateT(..), modify)
175 import Data.Monoid ((<>))
176 import qualified Data.Text as T
177 import qualified Data.Text.IO as T
178 import qualified Data.Text.Encoding as TE
179 import qualified Data.Text.Encoding.Error as TE
180 import Data.Text (Text)
181 import qualified Data.Text.Lazy as TL
182 import qualified Data.Text.Lazy.IO as TL
183 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
184 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
185 import Data.ByteString (ByteString)
186 import qualified Data.ByteString as B
187 import qualified Data.ByteString.Char8 as B8
188 import Data.Char (ord, isSpace)
189 import Data.Functor.Constant (Constant(Constant, getConstant))
190 import Data.Functor.Identity (Identity)
191 import Data.Profunctor (Profunctor)
192 import qualified Data.Profunctor
193 import qualified Data.List as List
194 import Foreign.C.Error (Errno(Errno), ePIPE)
195 import qualified GHC.IO.Exception as G
196 import Pipes
197 import qualified Pipes.ByteString as PB
198 import qualified Pipes.Text.Internal as PI
199 import Pipes.Text.Internal
200 import Pipes.Core (respond, Server')
201 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
202 import qualified Pipes.Group as PG
203 import qualified Pipes.Parse as PP
204 import Pipes.Parse (Parser)
205 import qualified Pipes.Safe.Prelude as Safe
206 import qualified Pipes.Safe as Safe
207 import Pipes.Safe (MonadSafe(..), Base(..))
208 import qualified Pipes.Prelude as P
209 import qualified System.IO as IO
210 import Data.Char (isSpace)
211 import Data.Word (Word8)
212
213 import Prelude hiding (
214 all,
215 any,
216 break,
217 concat,
218 concatMap,
219 drop,
220 dropWhile,
221 elem,
222 filter,
223 head,
224 last,
225 lines,
226 length,
227 map,
228 maximum,
229 minimum,
230 notElem,
231 null,
232 readFile,
233 span,
234 splitAt,
235 take,
236 takeWhile,
237 unlines,
238 unwords,
239 words,
240 writeFile )
241
242 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
243 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
244 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
245 {-# INLINE fromLazy #-}
246
247 -- | Stream text from 'stdin'
248 stdin :: MonadIO m => Producer Text m ()
249 stdin = fromHandle IO.stdin
250 {-# INLINE stdin #-}
251
252 {-| Convert a 'IO.Handle' into a text stream using a text size
253 determined by the good sense of the text library; note that this
254 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
255 but uses the system encoding and has other `Data.Text.IO` features
256 -}
257
258 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
259 fromHandle h = go where
260 go = do txt <- liftIO (T.hGetChunk h)
261 unless (T.null txt) ( do yield txt
262 go )
263 {-# INLINABLE fromHandle#-}
264
265
266 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
267
268 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
269 MAIN = PUTSTRLN "HELLO WORLD"
270 -}
271
272 readFile :: MonadSafe m => FilePath -> Producer Text m ()
273 readFile file = Safe.withFile file IO.ReadMode fromHandle
274 {-# INLINE readFile #-}
275
276
277 {-| Stream text to 'stdout'
278
279 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
280
281 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
282 instead of @(source >-> stdout)@ .
283 -}
284 stdout :: MonadIO m => Consumer' Text m ()
285 stdout = go
286 where
287 go = do
288 txt <- await
289 x <- liftIO $ try (T.putStr txt)
290 case x of
291 Left (G.IOError { G.ioe_type = G.ResourceVanished
292 , G.ioe_errno = Just ioe })
293 | Errno ioe == ePIPE
294 -> return ()
295 Left e -> liftIO (throwIO e)
296 Right () -> go
297 {-# INLINABLE stdout #-}
298
299
300 {-| Convert a text stream into a 'Handle'
301
302 Note: again, for best performance, where possible use
303 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
304 -}
305 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
306 toHandle h = for cat (liftIO . T.hPutStr h)
307 {-# INLINABLE toHandle #-}
308
309 {-# RULES "p >-> toHandle h" forall p h .
310 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
311 #-}
312
313
314 -- | Stream text into a file. Uses @pipes-safe@.
315 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
316 writeFile file = Safe.withFile file IO.WriteMode toHandle
317 {-# INLINE writeFile #-}
318
319
320 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
321
322 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
323
324 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
325 a ^. lens = getConstant (lens Constant a)
326
327
328 -- | Apply a transformation to each 'Char' in the stream
329 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
330 map f = P.map (T.map f)
331 {-# INLINABLE map #-}
332
333 {-# RULES "p >-> map f" forall p f .
334 p >-> map f = for p (\txt -> yield (T.map f txt))
335 #-}
336
337 -- | Map a function over the characters of a text stream and concatenate the results
338 concatMap
339 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
340 concatMap f = P.map (T.concatMap f)
341 {-# INLINABLE concatMap #-}
342
343 {-# RULES "p >-> concatMap f" forall p f .
344 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
345 #-}
346
347 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
348 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
349 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
350 encodeUtf8 :: Monad m => Pipe Text ByteString m r
351 encodeUtf8 = P.map TE.encodeUtf8
352 {-# INLINEABLE encodeUtf8 #-}
353
354 {-# RULES "p >-> encodeUtf8" forall p .
355 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
356 #-}
357
358 -- | Transform a Pipe of 'String's into one of 'Text' chunks
359 pack :: Monad m => Pipe String Text m r
360 pack = P.map T.pack
361 {-# INLINEABLE pack #-}
362
363 {-# RULES "p >-> pack" forall p .
364 p >-> pack = for p (\txt -> yield (T.pack txt))
365 #-}
366
367 -- | Transform a Pipes of 'Text' chunks into one of 'String's
368 unpack :: Monad m => Pipe Text String m r
369 unpack = for cat (\t -> yield (T.unpack t))
370 {-# INLINEABLE unpack #-}
371
372 {-# RULES "p >-> unpack" forall p .
373 p >-> unpack = for p (\txt -> yield (T.unpack txt))
374 #-}
375
376 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
377 -- here acting as 'Text' pipes, rather as they would on a lazy text
378 toCaseFold :: Monad m => Pipe Text Text m ()
379 toCaseFold = P.map T.toCaseFold
380 {-# INLINEABLE toCaseFold #-}
381
382 {-# RULES "p >-> toCaseFold" forall p .
383 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
384 #-}
385
386
387 -- | lowercase incoming 'Text'
388 toLower :: Monad m => Pipe Text Text m ()
389 toLower = P.map T.toLower
390 {-# INLINEABLE toLower #-}
391
392 {-# RULES "p >-> toLower" forall p .
393 p >-> toLower = for p (\txt -> yield (T.toLower txt))
394 #-}
395
396 -- | uppercase incoming 'Text'
397 toUpper :: Monad m => Pipe Text Text m ()
398 toUpper = P.map T.toUpper
399 {-# INLINEABLE toUpper #-}
400
401 {-# RULES "p >-> toUpper" forall p .
402 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
403 #-}
404
405 -- | Remove leading white space from an incoming succession of 'Text's
406 stripStart :: Monad m => Pipe Text Text m r
407 stripStart = do
408 chunk <- await
409 let text = T.stripStart chunk
410 if T.null text
411 then stripStart
412 else do yield text
413 cat
414 {-# INLINEABLE stripStart #-}
415
416 -- | @(take n)@ only allows @n@ individual characters to pass;
417 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
418 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
419 take n0 = go n0 where
420 go n
421 | n <= 0 = return ()
422 | otherwise = do
423 txt <- await
424 let len = fromIntegral (T.length txt)
425 if (len > n)
426 then yield (T.take (fromIntegral n) txt)
427 else do
428 yield txt
429 go (n - len)
430 {-# INLINABLE take #-}
431
432 -- | @(drop n)@ drops the first @n@ characters
433 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
434 drop n0 = go n0 where
435 go n
436 | n <= 0 = cat
437 | otherwise = do
438 txt <- await
439 let len = fromIntegral (T.length txt)
440 if (len >= n)
441 then do
442 yield (T.drop (fromIntegral n) txt)
443 cat
444 else go (n - len)
445 {-# INLINABLE drop #-}
446
447 -- | Take characters until they fail the predicate
448 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
449 takeWhile predicate = go
450 where
451 go = do
452 txt <- await
453 let (prefix, suffix) = T.span predicate txt
454 if (T.null suffix)
455 then do
456 yield txt
457 go
458 else yield prefix
459 {-# INLINABLE takeWhile #-}
460
461 -- | Drop characters until they fail the predicate
462 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
463 dropWhile predicate = go where
464 go = do
465 txt <- await
466 case T.findIndex (not . predicate) txt of
467 Nothing -> go
468 Just i -> do
469 yield (T.drop i txt)
470 cat
471 {-# INLINABLE dropWhile #-}
472
473 -- | Only allows 'Char's to pass if they satisfy the predicate
474 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
475 filter predicate = P.map (T.filter predicate)
476 {-# INLINABLE filter #-}
477
478 {-# RULES "p >-> filter q" forall p q .
479 p >-> filter q = for p (\txt -> yield (T.filter q txt))
480 #-}
481
482 -- | Strict left scan over the characters
483 scan
484 :: (Monad m)
485 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
486 scan step begin = go begin
487 where
488 go c = do
489 txt <- await
490 let txt' = T.scanl step c txt
491 c' = T.last txt'
492 yield txt'
493 go c'
494 {-# INLINABLE scan #-}
495
496 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
497 'TL.Text'
498 -}
499 toLazy :: Producer Text Identity () -> TL.Text
500 toLazy = TL.fromChunks . P.toList
501 {-# INLINABLE toLazy #-}
502
503 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
504 'TL.Text'
505
506 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
507 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
508 immediately as they are generated instead of loading them all into memory.
509 -}
510 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
511 toLazyM = liftM TL.fromChunks . P.toListM
512 {-# INLINABLE toLazyM #-}
513
514 -- | Reduce the text stream using a strict left fold over characters
515 foldChars
516 :: Monad m
517 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
518 foldChars step begin done = P.fold (T.foldl' step) begin done
519 {-# INLINABLE foldChars #-}
520
521 -- | Retrieve the first 'Char'
522 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
523 head = go
524 where
525 go p = do
526 x <- nextChar p
527 case x of
528 Left _ -> return Nothing
529 Right (c, _) -> return (Just c)
530 {-# INLINABLE head #-}
531
532 -- | Retrieve the last 'Char'
533 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
534 last = go Nothing
535 where
536 go r p = do
537 x <- next p
538 case x of
539 Left () -> return r
540 Right (txt, p') ->
541 if (T.null txt)
542 then go r p'
543 else go (Just $ T.last txt) p'
544 {-# INLINABLE last #-}
545
546 -- | Determine if the stream is empty
547 null :: (Monad m) => Producer Text m () -> m Bool
548 null = P.all T.null
549 {-# INLINABLE null #-}
550
551 -- | Count the number of characters in the stream
552 length :: (Monad m, Num n) => Producer Text m () -> m n
553 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
554 {-# INLINABLE length #-}
555
556 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
557 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
558 any predicate = P.any (T.any predicate)
559 {-# INLINABLE any #-}
560
561 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
562 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
563 all predicate = P.all (T.all predicate)
564 {-# INLINABLE all #-}
565
566 -- | Return the maximum 'Char' within a text stream
567 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
568 maximum = P.fold step Nothing id
569 where
570 step mc txt =
571 if (T.null txt)
572 then mc
573 else Just $ case mc of
574 Nothing -> T.maximum txt
575 Just c -> max c (T.maximum txt)
576 {-# INLINABLE maximum #-}
577
578 -- | Return the minimum 'Char' within a text stream (surely very useful!)
579 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
580 minimum = P.fold step Nothing id
581 where
582 step mc txt =
583 if (T.null txt)
584 then mc
585 else case mc of
586 Nothing -> Just (T.minimum txt)
587 Just c -> Just (min c (T.minimum txt))
588 {-# INLINABLE minimum #-}
589
590
591 -- | Find the first element in the stream that matches the predicate
592 find
593 :: (Monad m)
594 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
595 find predicate p = head (p >-> filter predicate)
596 {-# INLINABLE find #-}
597
598 -- | Index into a text stream
599 index
600 :: (Monad m, Integral a)
601 => a-> Producer Text m () -> m (Maybe Char)
602 index n p = head (p >-> drop n)
603 {-# INLINABLE index #-}
604
605
606 -- | Store a tally of how many segments match the given 'Text'
607 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
608 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
609 {-# INLINABLE count #-}
610
611
612 {-| Consume the first character from a stream of 'Text'
613
614 'next' either fails with a 'Left' if the 'Producer' has no more characters or
615 succeeds with a 'Right' providing the next character and the remainder of the
616 'Producer'.
617 -}
618 nextChar
619 :: (Monad m)
620 => Producer Text m r
621 -> m (Either r (Char, Producer Text m r))
622 nextChar = go
623 where
624 go p = do
625 x <- next p
626 case x of
627 Left r -> return (Left r)
628 Right (txt, p') -> case (T.uncons txt) of
629 Nothing -> go p'
630 Just (c, txt') -> return (Right (c, yield txt' >> p'))
631 {-# INLINABLE nextChar #-}
632
633 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
634 'Producer' is empty
635 -}
636 drawChar :: (Monad m) => Parser Text m (Maybe Char)
637 drawChar = do
638 x <- PP.draw
639 case x of
640 Nothing -> return Nothing
641 Just txt -> case (T.uncons txt) of
642 Nothing -> drawChar
643 Just (c, txt') -> do
644 PP.unDraw txt'
645 return (Just c)
646 {-# INLINABLE drawChar #-}
647
648 -- | Push back a 'Char' onto the underlying 'Producer'
649 unDrawChar :: (Monad m) => Char -> Parser Text m ()
650 unDrawChar c = modify (yield (T.singleton c) >>)
651 {-# INLINABLE unDrawChar #-}
652
653 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
654 push the 'Char' back
655
656 > peekChar = do
657 > x <- drawChar
658 > case x of
659 > Left _ -> return ()
660 > Right c -> unDrawChar c
661 > return x
662 -}
663 peekChar :: (Monad m) => Parser Text m (Maybe Char)
664 peekChar = do
665 x <- drawChar
666 case x of
667 Nothing -> return ()
668 Just c -> unDrawChar c
669 return x
670 {-# INLINABLE peekChar #-}
671
672 {-| Check if the underlying 'Producer' has no more characters
673
674 Note that this will skip over empty 'Text' chunks, unlike
675 'PP.isEndOfInput' from @pipes-parse@, which would consider
676 an empty 'Text' a valid bit of input.
677
678 > isEndOfChars = liftM isLeft peekChar
679 -}
680 isEndOfChars :: (Monad m) => Parser Text m Bool
681 isEndOfChars = do
682 x <- peekChar
683 return (case x of
684 Nothing -> True
685 Just _-> False )
686 {-# INLINABLE isEndOfChars #-}
687
688
689 {- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
690 stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
691 -}
692
693 decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
694 (Producer Text m (Producer ByteString m r))
695 decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
696 (k (go B.empty PI.streamDecodeUtf8 p0)) where
697 go !carry dec0 p = do
698 x <- lift (next p)
699 case x of Left r -> return (if B.null carry
700 then return r -- all bytestring input was consumed
701 else (do yield carry -- a potentially valid fragment remains
702 return r))
703
704 Right (chunk, p') -> case dec0 chunk of
705 PI.Some text carry2 dec -> do yield text
706 go carry2 dec p'
707 PI.Other text bs -> do yield text
708 return (do yield bs -- an invalid blob remains
709 p')
710 {-# INLINABLE decodeUtf8 #-}
711
712
713 -- | Splits a 'Producer' after the given number of characters
714 splitAt
715 :: (Monad m, Integral n)
716 => n
717 -> Lens' (Producer Text m r)
718 (Producer Text m (Producer Text m r))
719 splitAt n0 k p0 = fmap join (k (go n0 p0))
720 where
721 go 0 p = return p
722 go n p = do
723 x <- lift (next p)
724 case x of
725 Left r -> return (return r)
726 Right (txt, p') -> do
727 let len = fromIntegral (T.length txt)
728 if (len <= n)
729 then do
730 yield txt
731 go (n - len) p'
732 else do
733 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
734 yield prefix
735 return (yield suffix >> p')
736 {-# INLINABLE splitAt #-}
737
738
739 {-| Split a text stream in two, where the first text stream is the longest
740 consecutive group of text that satisfy the predicate
741 -}
742 span
743 :: (Monad m)
744 => (Char -> Bool)
745 -> Lens' (Producer Text m r)
746 (Producer Text m (Producer Text m r))
747 span predicate k p0 = fmap join (k (go p0))
748 where
749 go p = do
750 x <- lift (next p)
751 case x of
752 Left r -> return (return r)
753 Right (txt, p') -> do
754 let (prefix, suffix) = T.span predicate txt
755 if (T.null suffix)
756 then do
757 yield txt
758 go p'
759 else do
760 yield prefix
761 return (yield suffix >> p')
762 {-# INLINABLE span #-}
763
764 {-| Split a text stream in two, where the first text stream is the longest
765 consecutive group of characters that don't satisfy the predicate
766 -}
767 break
768 :: (Monad m)
769 => (Char -> Bool)
770 -> Lens' (Producer Text m r)
771 (Producer Text m (Producer Text m r))
772 break predicate = span (not . predicate)
773 {-# INLINABLE break #-}
774
775 {-| Improper lens that splits after the first group of equivalent Chars, as
776 defined by the given equivalence relation
777 -}
778 groupBy
779 :: (Monad m)
780 => (Char -> Char -> Bool)
781 -> Lens' (Producer Text m r)
782 (Producer Text m (Producer Text m r))
783 groupBy equals k p0 = fmap join (k ((go p0))) where
784 go p = do
785 x <- lift (next p)
786 case x of
787 Left r -> return (return r)
788 Right (txt, p') -> case T.uncons txt of
789 Nothing -> go p'
790 Just (c, _) -> (yield txt >> p') ^. span (equals c)
791 {-# INLINABLE groupBy #-}
792
793 -- | Improper lens that splits after the first succession of identical 'Char' s
794 group :: Monad m
795 => Lens' (Producer Text m r)
796 (Producer Text m (Producer Text m r))
797 group = groupBy (==)
798 {-# INLINABLE group #-}
799
800 {-| Improper lens that splits a 'Producer' after the first word
801
802 Unlike 'words', this does not drop leading whitespace
803 -}
804 word :: (Monad m)
805 => Lens' (Producer Text m r)
806 (Producer Text m (Producer Text m r))
807 word k p0 = fmap join (k (to p0))
808 where
809 to p = do
810 p' <- p^.span isSpace
811 p'^.break isSpace
812 {-# INLINABLE word #-}
813
814
815 line :: (Monad m)
816 => Lens' (Producer Text m r)
817 (Producer Text m (Producer Text m r))
818 line = break (== '\n')
819
820 {-# INLINABLE line #-}
821
822
823 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
824 intersperse
825 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
826 intersperse c = go0
827 where
828 go0 p = do
829 x <- lift (next p)
830 case x of
831 Left r -> return r
832 Right (txt, p') -> do
833 yield (T.intersperse c txt)
834 go1 p'
835 go1 p = do
836 x <- lift (next p)
837 case x of
838 Left r -> return r
839 Right (txt, p') -> do
840 yield (T.singleton c)
841 yield (T.intersperse c txt)
842 go1 p'
843 {-# INLINABLE intersperse #-}
844
845
846
847 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
848 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
849 packChars = Data.Profunctor.dimap to (fmap from)
850 where
851 -- to :: Monad m => Producer Char m x -> Producer Text m x
852 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
853
854 step diffAs c = diffAs . (c:)
855
856 done diffAs = T.pack (diffAs [])
857
858 -- from :: Monad m => Producer Text m x -> Producer Char m x
859 from p = for p (each . T.unpack)
860 {-# INLINABLE packChars #-}
861
862
863 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
864 chunksOf
865 :: (Monad m, Integral n)
866 => n -> Lens' (Producer Text m r)
867 (FreeT (Producer Text m) m r)
868 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
869 where
870 go p = do
871 x <- next p
872 return $ case x of
873 Left r -> Pure r
874 Right (txt, p') -> Free $ do
875 p'' <- (yield txt >> p') ^. splitAt n
876 return $ FreeT (go p'')
877 {-# INLINABLE chunksOf #-}
878
879
880 {-| Split a text stream into sub-streams delimited by characters that satisfy the
881 predicate
882 -}
883 splitsWith
884 :: (Monad m)
885 => (Char -> Bool)
886 -> Producer Text m r
887 -> FreeT (Producer Text m) m r
888 splitsWith predicate p0 = FreeT (go0 p0)
889 where
890 go0 p = do
891 x <- next p
892 case x of
893 Left r -> return (Pure r)
894 Right (txt, p') ->
895 if (T.null txt)
896 then go0 p'
897 else return $ Free $ do
898 p'' <- (yield txt >> p') ^. span (not . predicate)
899 return $ FreeT (go1 p'')
900 go1 p = do
901 x <- nextChar p
902 return $ case x of
903 Left r -> Pure r
904 Right (_, p') -> Free $ do
905 p'' <- p' ^. span (not . predicate)
906 return $ FreeT (go1 p'')
907 {-# INLINABLE splitsWith #-}
908
909 -- | Split a text stream using the given 'Char' as the delimiter
910 splits :: (Monad m)
911 => Char
912 -> Lens' (Producer Text m r)
913 (FreeT (Producer Text m) m r)
914 splits c k p =
915 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
916 {-# INLINABLE splits #-}
917
918 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
919 given equivalence relation
920 -}
921 groupsBy
922 :: Monad m
923 => (Char -> Char -> Bool)
924 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
925 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
926 go p = do x <- next p
927 case x of Left r -> return (Pure r)
928 Right (bs, p') -> case T.uncons bs of
929 Nothing -> go p'
930 Just (c, _) -> do return $ Free $ do
931 p'' <- (yield bs >> p')^.span (equals c)
932 return $ FreeT (go p'')
933 {-# INLINABLE groupsBy #-}
934
935
936 -- | Like 'groupsBy', where the equality predicate is ('==')
937 groups
938 :: Monad m
939 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
940 groups = groupsBy (==)
941 {-# INLINABLE groups #-}
942
943
944
945 {-| Split a text stream into 'FreeT'-delimited lines
946 -}
947 lines
948 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
949 lines = Data.Profunctor.dimap _lines (fmap _unlines)
950 where
951 _lines p0 = FreeT (go0 p0)
952 where
953 go0 p = do
954 x <- next p
955 case x of
956 Left r -> return (Pure r)
957 Right (txt, p') ->
958 if (T.null txt)
959 then go0 p'
960 else return $ Free $ go1 (yield txt >> p')
961 go1 p = do
962 p' <- p ^. break ('\n' ==)
963 return $ FreeT $ do
964 x <- nextChar p'
965 case x of
966 Left r -> return $ Pure r
967 Right (_, p'') -> go0 p''
968 -- _unlines
969 -- :: Monad m
970 -- => FreeT (Producer Text m) m x -> Producer Text m x
971 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
972
973
974 {-# INLINABLE lines #-}
975
976
977 -- | Split a text stream into 'FreeT'-delimited words
978 words
979 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
980 words = Data.Profunctor.dimap go (fmap _unwords)
981 where
982 go p = FreeT $ do
983 x <- next (p >-> dropWhile isSpace)
984 return $ case x of
985 Left r -> Pure r
986 Right (bs, p') -> Free $ do
987 p'' <- (yield bs >> p') ^. break isSpace
988 return (go p'')
989 _unwords = PG.intercalates (yield $ T.singleton ' ')
990
991 {-# INLINABLE words #-}
992
993
994 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
995 interspersing a text stream in between them
996 -}
997 intercalate
998 :: (Monad m)
999 => Producer Text m ()
1000 -> FreeT (Producer Text m) m r
1001 -> Producer Text m r
1002 intercalate p0 = go0
1003 where
1004 go0 f = do
1005 x <- lift (runFreeT f)
1006 case x of
1007 Pure r -> return r
1008 Free p -> do
1009 f' <- p
1010 go1 f'
1011 go1 f = do
1012 x <- lift (runFreeT f)
1013 case x of
1014 Pure r -> return r
1015 Free p -> do
1016 p0
1017 f' <- p
1018 go1 f'
1019 {-# INLINABLE intercalate #-}
1020
1021 {-| Join 'FreeT'-delimited lines into a text stream
1022 -}
1023 unlines
1024 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1025 unlines = go
1026 where
1027 go f = do
1028 x <- lift (runFreeT f)
1029 case x of
1030 Pure r -> return r
1031 Free p -> do
1032 f' <- p
1033 yield $ T.singleton '\n'
1034 go f'
1035 {-# INLINABLE unlines #-}
1036
1037 {-| Join 'FreeT'-delimited words into a text stream
1038 -}
1039 unwords
1040 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1041 unwords = intercalate (yield $ T.singleton ' ')
1042 {-# INLINABLE unwords #-}
1043
1044 {- $parse
1045 The following parsing utilities are single-character analogs of the ones found
1046 @pipes-parse@.
1047 -}
1048
1049 {- $reexports
1050
1051 @Data.Text@ re-exports the 'Text' type.
1052
1053 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1054 -}
1055
1056 {- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
1057 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
1058 individual 'Codec' definitions follow the enumerator and conduit libraries.
1059
1060 Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
1061 to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
1062 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
1063 better implementation.
1064
1065 -}
1066 codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1067 codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1068 (k (decoder (dec B.empty) p0) ) where
1069 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1070 decoder !d p0 = case d of
1071 PI.Other txt bad -> do yield txt
1072 return (do yield bad
1073 p0)
1074 PI.Some txt extra dec -> do yield txt
1075 x <- lift (next p0)
1076 case x of Left r -> return (do yield extra
1077 return r)
1078 Right (chunk,p1) -> decoder (dec chunk) p1
1079
1080 {- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
1081 use the pipes 'Lens' style to work with them. Rather we simply define functions
1082 each way.
1083
1084 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
1085 returning the rest of the 'Text' at the first non-ascii 'Char'
1086 -}
1087 encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1088 encodeAscii = go where
1089 go p = do echunk <- lift (next p)
1090 case echunk of
1091 Left r -> return (return r)
1092 Right (chunk, p') ->
1093 if T.null chunk
1094 then go p'
1095 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1096 in do yield (B8.pack (T.unpack safe))
1097 if T.null unsafe
1098 then go p'
1099 else return $ do yield unsafe
1100 p'
1101 {- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
1102 returning the rest of the 'Text' upon hitting any non-latin 'Char'
1103 -}
1104 encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1105 encodeIso8859_1 = go where
1106 go p = do etxt <- lift (next p)
1107 case etxt of
1108 Left r -> return (return r)
1109 Right (txt, p') ->
1110 if T.null txt
1111 then go p'
1112 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1113 in do yield (B8.pack (T.unpack safe))
1114 if T.null unsafe
1115 then go p'
1116 else return $ do yield unsafe
1117 p'
1118
1119 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1120 unused 'ByteString' upon hitting an un-ascii byte.
1121 -}
1122 decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1123 decodeAscii = go where
1124 go p = do echunk <- lift (next p)
1125 case echunk of
1126 Left r -> return (return r)
1127 Right (chunk, p') ->
1128 if B.null chunk
1129 then go p'
1130 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1131 in do yield (T.pack (B8.unpack safe))
1132 if B.null unsafe
1133 then go p'
1134 else return $ do yield unsafe
1135 p'
1136
1137 {- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1138 unused 'ByteString' upon hitting the rare un-latinizable byte.
1139 -}
1140 decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1141 decodeIso8859_1 = go where
1142 go p = do echunk <- lift (next p)
1143 case echunk of
1144 Left r -> return (return r)
1145 Right (chunk, p') ->
1146 if B.null chunk
1147 then go p'
1148 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1149 in do yield (T.pack (B8.unpack safe))
1150 if B.null unsafe
1151 then go p'
1152 else return $ do yield unsafe
1153 p'
1154
1155
1156
1157
1158