]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Text.hs
0957a7d40b7975f54bfa55cc8ce2b4ee6bddeabd
[github/fretlink/text-pipes.git] / Pipes / Text.hs
1 {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
2 #if __GLASGOW_HASKELL__ >= 702
3 {-# LANGUAGE Trustworthy #-}
4 #endif
5 {-| This module provides @pipes@ utilities for \"text streams\", which are
6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
10
11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
12 example, the following program copies a document from one file to another:
13
14 > import Pipes
15 > import qualified Data.Text.Pipes as Text
16 > import System.IO
17 >
18 > main =
19 > withFile "inFile.txt" ReadMode $ \hIn ->
20 > withFile "outFile.txt" WriteMode $ \hOut ->
21 > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23 To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
24
25 > import Pipes
26 > import qualified Data.Text.Pipes as Text
27 > import Pipes.Safe
28 >
29 > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
30
31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
32 and 'stdout' proxies, as with the following \"echo\" program:
33
34 > main = runEffect $ Text.stdin >-> Text.stdout
35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
38 > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
39
40 In addition, this module provides many functions equivalent to lazy
41 'Text' functions so that you can transform or fold text streams. For
42 example, to stream only the first three lines of 'stdin' to 'stdout' you
43 might write:
44
45 > import Pipes
46 > import qualified Pipes.Text as Text
47 > import qualified Pipes.Parse as Parse
48 >
49 > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
50 > where
51 > takeLines n = Text.unlines . Parse.takeFree n . Text.lines
52
53 The above program will never bring more than one chunk of text (~ 32 KB) into
54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
57 are insensitive to text boundaries. This means that they may freely split
58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
61 -}
62
63 module Pipes.Text (
64 -- * Producers
65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
69 , stdinLn
70
71 -- * Consumers
72 , stdout
73 , stdoutLn
74 , toHandle
75 , writeFile
76
77 -- * Pipes
78 , map
79 , concatMap
80 , take
81 , drop
82 , takeWhile
83 , dropWhile
84 , filter
85 , scan
86 , encodeUtf8
87 , pack
88 , unpack
89 , toCaseFold
90 , toLower
91 , toUpper
92 , stripStart
93
94 -- * Folds
95 , toLazy
96 , toLazyM
97 , foldChars
98 , head
99 , last
100 , null
101 , length
102 , any
103 , all
104 , maximum
105 , minimum
106 , find
107 , index
108 , count
109
110 -- * Primitive Character Parsers
111 -- $parse
112 , nextChar
113 , drawChar
114 , unDrawChar
115 , peekChar
116 , isEndOfChars
117
118 -- * Parsing Lenses
119 , splitAt
120 , span
121 , break
122 , groupBy
123 , group
124 , word
125 , line
126
127 -- * Decoding Lenses
128 , decodeUtf8
129 , codec
130
131 -- * Codecs
132 , utf8
133 , utf16_le
134 , utf16_be
135 , utf32_le
136 , utf32_be
137
138 -- * Other Decoding/Encoding Functions
139 , decodeIso8859_1
140 , decodeAscii
141 , encodeIso8859_1
142 , encodeAscii
143
144 -- * FreeT Splitters
145 , chunksOf
146 , splitsWith
147 , splits
148 -- , groupsBy
149 -- , groups
150 , lines
151 , words
152
153 -- * Transformations
154 , intersperse
155 , packChars
156
157 -- * Joiners
158 , intercalate
159 , unlines
160 , unwords
161
162 -- * Re-exports
163 -- $reexports
164 , module Data.ByteString
165 , module Data.Text
166 , module Data.Profunctor
167 , module Data.Word
168 , module Pipes.Parse
169 , module Pipes.Group
170 , module Pipes.Text.Internal.Codec
171 ) where
172
173 import Control.Exception (throwIO, try)
174 import Control.Applicative ((<*))
175 import Control.Monad (liftM, unless, join)
176 import Control.Monad.Trans.State.Strict (StateT(..), modify)
177 import Data.Monoid ((<>))
178 import qualified Data.Text as T
179 import qualified Data.Text.IO as T
180 import qualified Data.Text.Encoding as TE
181 import qualified Data.Text.Encoding.Error as TE
182 import Data.Text (Text)
183 import qualified Data.Text.Lazy as TL
184 import qualified Data.Text.Lazy.IO as TL
185 import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
186 import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
187 import Data.ByteString (ByteString)
188 import qualified Data.ByteString as B
189 import qualified Data.ByteString.Char8 as B8
190 import Data.Char (ord, isSpace)
191 import Data.Functor.Constant (Constant(Constant, getConstant))
192 import Data.Functor.Identity (Identity)
193 import Data.Profunctor (Profunctor)
194 import qualified Data.Profunctor
195 import qualified Data.List as List
196 import Foreign.C.Error (Errno(Errno), ePIPE)
197 import qualified GHC.IO.Exception as G
198 import Pipes
199 import qualified Pipes.ByteString as PB
200 import qualified Pipes.Text.Internal.Decoding as PE
201 import Pipes.Text.Internal.Codec
202 import Pipes.Core (respond, Server')
203 import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
204 import qualified Pipes.Group as PG
205 import qualified Pipes.Parse as PP
206 import Pipes.Parse (Parser)
207 import qualified Pipes.Safe.Prelude as Safe
208 import qualified Pipes.Safe as Safe
209 import Pipes.Safe (MonadSafe(..), Base(..))
210 import qualified Pipes.Prelude as P
211 import qualified System.IO as IO
212 import Data.Char (isSpace)
213 import Data.Word (Word8)
214
215 import Prelude hiding (
216 all,
217 any,
218 break,
219 concat,
220 concatMap,
221 drop,
222 dropWhile,
223 elem,
224 filter,
225 head,
226 last,
227 lines,
228 length,
229 map,
230 maximum,
231 minimum,
232 notElem,
233 null,
234 readFile,
235 span,
236 splitAt,
237 take,
238 takeWhile,
239 unlines,
240 unwords,
241 words,
242 writeFile )
243
244 -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
245 fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
246 fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
247 {-# INLINE fromLazy #-}
248
249 -- | Stream text from 'stdin'
250 stdin :: MonadIO m => Producer Text m ()
251 stdin = fromHandle IO.stdin
252 {-# INLINE stdin #-}
253
254 {-| Convert a 'IO.Handle' into a text stream using a text size
255 determined by the good sense of the text library; note that this
256 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
257 but uses the system encoding and has other `Data.Text.IO` features
258 -}
259
260 fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
261 fromHandle h = go where
262 go = do txt <- liftIO (T.hGetChunk h)
263 unless (T.null txt) ( do yield txt
264 go )
265 {-# INLINABLE fromHandle#-}
266
267
268 {-| Stream text from a file in the simple fashion of @Data.Text.IO@
269
270 >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
271 MAIN = PUTSTRLN "HELLO WORLD"
272 -}
273
274 readFile :: MonadSafe m => FilePath -> Producer Text m ()
275 readFile file = Safe.withFile file IO.ReadMode fromHandle
276 {-# INLINE readFile #-}
277
278 {-| Crudely stream lines of input from stdin in the style of Pipes.Prelude.
279 This is for testing in ghci etc.; obviously it will be unsound if used to recieve
280 the contents of immense files with few newlines.
281
282 >>> let safely = runSafeT . runEffect
283 >>> safely $ for Text.stdinLn (lift . lift . print . T.length)
284 hello
285 5
286 world
287 5
288
289 -}
290 stdinLn :: MonadIO m => Producer' Text m ()
291 stdinLn = go where
292 go = do
293 eof <- liftIO (IO.hIsEOF IO.stdin)
294 unless eof $ do
295 txt <- liftIO (T.hGetLine IO.stdin)
296 yield txt
297 go
298 {-# INLINABLE stdinLn #-}
299
300 {-| Stream text to 'stdout'
301
302 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
303
304 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
305 instead of @(source >-> stdout)@ .
306 -}
307 stdout :: MonadIO m => Consumer' Text m ()
308 stdout = go
309 where
310 go = do
311 txt <- await
312 x <- liftIO $ try (T.putStr txt)
313 case x of
314 Left (G.IOError { G.ioe_type = G.ResourceVanished
315 , G.ioe_errno = Just ioe })
316 | Errno ioe == ePIPE
317 -> return ()
318 Left e -> liftIO (throwIO e)
319 Right () -> go
320 {-# INLINABLE stdout #-}
321
322 stdoutLn :: (MonadIO m) => Consumer' Text m ()
323 stdoutLn = go
324 where
325 go = do
326 str <- await
327 x <- liftIO $ try (T.putStrLn str)
328 case x of
329 Left (G.IOError { G.ioe_type = G.ResourceVanished
330 , G.ioe_errno = Just ioe })
331 | Errno ioe == ePIPE
332 -> return ()
333 Left e -> liftIO (throwIO e)
334 Right () -> go
335 {-# INLINABLE stdoutLn #-}
336
337 {-| Convert a text stream into a 'Handle'
338
339 Note: again, for best performance, where possible use
340 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
341 -}
342 toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
343 toHandle h = for cat (liftIO . T.hPutStr h)
344 {-# INLINABLE toHandle #-}
345
346 {-# RULES "p >-> toHandle h" forall p h .
347 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
348 #-}
349
350
351 -- | Stream text into a file. Uses @pipes-safe@.
352 writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
353 writeFile file = Safe.withFile file IO.WriteMode toHandle
354 {-# INLINE writeFile #-}
355
356
357 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
358
359 type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
360
361 (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
362 a ^. lens = getConstant (lens Constant a)
363
364
365 -- | Apply a transformation to each 'Char' in the stream
366 map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
367 map f = P.map (T.map f)
368 {-# INLINABLE map #-}
369
370 {-# RULES "p >-> map f" forall p f .
371 p >-> map f = for p (\txt -> yield (T.map f txt))
372 #-}
373
374 -- | Map a function over the characters of a text stream and concatenate the results
375 concatMap
376 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
377 concatMap f = P.map (T.concatMap f)
378 {-# INLINABLE concatMap #-}
379
380 {-# RULES "p >-> concatMap f" forall p f .
381 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
382 #-}
383
384 -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
385 -- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
386 -- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
387 encodeUtf8 :: Monad m => Pipe Text ByteString m r
388 encodeUtf8 = P.map TE.encodeUtf8
389 {-# INLINEABLE encodeUtf8 #-}
390
391 {-# RULES "p >-> encodeUtf8" forall p .
392 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
393 #-}
394
395 -- | Transform a Pipe of 'String's into one of 'Text' chunks
396 pack :: Monad m => Pipe String Text m r
397 pack = P.map T.pack
398 {-# INLINEABLE pack #-}
399
400 {-# RULES "p >-> pack" forall p .
401 p >-> pack = for p (\txt -> yield (T.pack txt))
402 #-}
403
404 -- | Transform a Pipes of 'Text' chunks into one of 'String's
405 unpack :: Monad m => Pipe Text String m r
406 unpack = for cat (\t -> yield (T.unpack t))
407 {-# INLINEABLE unpack #-}
408
409 {-# RULES "p >-> unpack" forall p .
410 p >-> unpack = for p (\txt -> yield (T.unpack txt))
411 #-}
412
413 -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
414 -- here acting as 'Text' pipes, rather as they would on a lazy text
415 toCaseFold :: Monad m => Pipe Text Text m ()
416 toCaseFold = P.map T.toCaseFold
417 {-# INLINEABLE toCaseFold #-}
418
419 {-# RULES "p >-> toCaseFold" forall p .
420 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
421 #-}
422
423
424 -- | lowercase incoming 'Text'
425 toLower :: Monad m => Pipe Text Text m ()
426 toLower = P.map T.toLower
427 {-# INLINEABLE toLower #-}
428
429 {-# RULES "p >-> toLower" forall p .
430 p >-> toLower = for p (\txt -> yield (T.toLower txt))
431 #-}
432
433 -- | uppercase incoming 'Text'
434 toUpper :: Monad m => Pipe Text Text m ()
435 toUpper = P.map T.toUpper
436 {-# INLINEABLE toUpper #-}
437
438 {-# RULES "p >-> toUpper" forall p .
439 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
440 #-}
441
442 -- | Remove leading white space from an incoming succession of 'Text's
443 stripStart :: Monad m => Pipe Text Text m r
444 stripStart = do
445 chunk <- await
446 let text = T.stripStart chunk
447 if T.null text
448 then stripStart
449 else do yield text
450 cat
451 {-# INLINEABLE stripStart #-}
452
453 -- | @(take n)@ only allows @n@ individual characters to pass;
454 -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
455 take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
456 take n0 = go n0 where
457 go n
458 | n <= 0 = return ()
459 | otherwise = do
460 txt <- await
461 let len = fromIntegral (T.length txt)
462 if (len > n)
463 then yield (T.take (fromIntegral n) txt)
464 else do
465 yield txt
466 go (n - len)
467 {-# INLINABLE take #-}
468
469 -- | @(drop n)@ drops the first @n@ characters
470 drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
471 drop n0 = go n0 where
472 go n
473 | n <= 0 = cat
474 | otherwise = do
475 txt <- await
476 let len = fromIntegral (T.length txt)
477 if (len >= n)
478 then do
479 yield (T.drop (fromIntegral n) txt)
480 cat
481 else go (n - len)
482 {-# INLINABLE drop #-}
483
484 -- | Take characters until they fail the predicate
485 takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
486 takeWhile predicate = go
487 where
488 go = do
489 txt <- await
490 let (prefix, suffix) = T.span predicate txt
491 if (T.null suffix)
492 then do
493 yield txt
494 go
495 else yield prefix
496 {-# INLINABLE takeWhile #-}
497
498 -- | Drop characters until they fail the predicate
499 dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
500 dropWhile predicate = go where
501 go = do
502 txt <- await
503 case T.findIndex (not . predicate) txt of
504 Nothing -> go
505 Just i -> do
506 yield (T.drop i txt)
507 cat
508 {-# INLINABLE dropWhile #-}
509
510 -- | Only allows 'Char's to pass if they satisfy the predicate
511 filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
512 filter predicate = P.map (T.filter predicate)
513 {-# INLINABLE filter #-}
514
515 {-# RULES "p >-> filter q" forall p q .
516 p >-> filter q = for p (\txt -> yield (T.filter q txt))
517 #-}
518
519 -- | Strict left scan over the characters
520 scan
521 :: (Monad m)
522 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
523 scan step begin = go begin
524 where
525 go c = do
526 txt <- await
527 let txt' = T.scanl step c txt
528 c' = T.last txt'
529 yield txt'
530 go c'
531 {-# INLINABLE scan #-}
532
533 {-| Fold a pure 'Producer' of strict 'Text's into a lazy
534 'TL.Text'
535 -}
536 toLazy :: Producer Text Identity () -> TL.Text
537 toLazy = TL.fromChunks . P.toList
538 {-# INLINABLE toLazy #-}
539
540 {-| Fold an effectful 'Producer' of strict 'Text's into a lazy
541 'TL.Text'
542
543 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
544 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
545 immediately as they are generated instead of loading them all into memory.
546 -}
547 toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
548 toLazyM = liftM TL.fromChunks . P.toListM
549 {-# INLINABLE toLazyM #-}
550
551 -- | Reduce the text stream using a strict left fold over characters
552 foldChars
553 :: Monad m
554 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
555 foldChars step begin done = P.fold (T.foldl' step) begin done
556 {-# INLINABLE foldChars #-}
557
558 -- | Retrieve the first 'Char'
559 head :: (Monad m) => Producer Text m () -> m (Maybe Char)
560 head = go
561 where
562 go p = do
563 x <- nextChar p
564 case x of
565 Left _ -> return Nothing
566 Right (c, _) -> return (Just c)
567 {-# INLINABLE head #-}
568
569 -- | Retrieve the last 'Char'
570 last :: (Monad m) => Producer Text m () -> m (Maybe Char)
571 last = go Nothing
572 where
573 go r p = do
574 x <- next p
575 case x of
576 Left () -> return r
577 Right (txt, p') ->
578 if (T.null txt)
579 then go r p'
580 else go (Just $ T.last txt) p'
581 {-# INLINABLE last #-}
582
583 -- | Determine if the stream is empty
584 null :: (Monad m) => Producer Text m () -> m Bool
585 null = P.all T.null
586 {-# INLINABLE null #-}
587
588 -- | Count the number of characters in the stream
589 length :: (Monad m, Num n) => Producer Text m () -> m n
590 length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
591 {-# INLINABLE length #-}
592
593 -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
594 any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
595 any predicate = P.any (T.any predicate)
596 {-# INLINABLE any #-}
597
598 -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
599 all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
600 all predicate = P.all (T.all predicate)
601 {-# INLINABLE all #-}
602
603 -- | Return the maximum 'Char' within a text stream
604 maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
605 maximum = P.fold step Nothing id
606 where
607 step mc txt =
608 if (T.null txt)
609 then mc
610 else Just $ case mc of
611 Nothing -> T.maximum txt
612 Just c -> max c (T.maximum txt)
613 {-# INLINABLE maximum #-}
614
615 -- | Return the minimum 'Char' within a text stream (surely very useful!)
616 minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
617 minimum = P.fold step Nothing id
618 where
619 step mc txt =
620 if (T.null txt)
621 then mc
622 else case mc of
623 Nothing -> Just (T.minimum txt)
624 Just c -> Just (min c (T.minimum txt))
625 {-# INLINABLE minimum #-}
626
627
628 -- | Find the first element in the stream that matches the predicate
629 find
630 :: (Monad m)
631 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
632 find predicate p = head (p >-> filter predicate)
633 {-# INLINABLE find #-}
634
635 -- | Index into a text stream
636 index
637 :: (Monad m, Integral a)
638 => a-> Producer Text m () -> m (Maybe Char)
639 index n p = head (p >-> drop n)
640 {-# INLINABLE index #-}
641
642
643 -- | Store a tally of how many segments match the given 'Text'
644 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
645 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
646 {-# INLINABLE count #-}
647
648
649 {-| Consume the first character from a stream of 'Text'
650
651 'next' either fails with a 'Left' if the 'Producer' has no more characters or
652 succeeds with a 'Right' providing the next character and the remainder of the
653 'Producer'.
654 -}
655 nextChar
656 :: (Monad m)
657 => Producer Text m r
658 -> m (Either r (Char, Producer Text m r))
659 nextChar = go
660 where
661 go p = do
662 x <- next p
663 case x of
664 Left r -> return (Left r)
665 Right (txt, p') -> case (T.uncons txt) of
666 Nothing -> go p'
667 Just (c, txt') -> return (Right (c, yield txt' >> p'))
668 {-# INLINABLE nextChar #-}
669
670 {-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
671 'Producer' is empty
672 -}
673 drawChar :: (Monad m) => Parser Text m (Maybe Char)
674 drawChar = do
675 x <- PP.draw
676 case x of
677 Nothing -> return Nothing
678 Just txt -> case (T.uncons txt) of
679 Nothing -> drawChar
680 Just (c, txt') -> do
681 PP.unDraw txt'
682 return (Just c)
683 {-# INLINABLE drawChar #-}
684
685 -- | Push back a 'Char' onto the underlying 'Producer'
686 unDrawChar :: (Monad m) => Char -> Parser Text m ()
687 unDrawChar c = modify (yield (T.singleton c) >>)
688 {-# INLINABLE unDrawChar #-}
689
690 {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
691 push the 'Char' back
692
693 > peekChar = do
694 > x <- drawChar
695 > case x of
696 > Left _ -> return ()
697 > Right c -> unDrawChar c
698 > return x
699 -}
700 peekChar :: (Monad m) => Parser Text m (Maybe Char)
701 peekChar = do
702 x <- drawChar
703 case x of
704 Nothing -> return ()
705 Just c -> unDrawChar c
706 return x
707 {-# INLINABLE peekChar #-}
708
709 {-| Check if the underlying 'Producer' has no more characters
710
711 Note that this will skip over empty 'Text' chunks, unlike
712 'PP.isEndOfInput' from @pipes-parse@, which would consider
713 an empty 'Text' a valid bit of input.
714
715 > isEndOfChars = liftM isLeft peekChar
716 -}
717 isEndOfChars :: (Monad m) => Parser Text m Bool
718 isEndOfChars = do
719 x <- peekChar
720 return (case x of
721 Nothing -> True
722 Just _-> False )
723 {-# INLINABLE isEndOfChars #-}
724
725
726 -- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
727 -- stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
728
729 decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
730 (Producer Text m (Producer ByteString m r))
731 decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
732 (k (go B.empty PE.streamDecodeUtf8 p0)) where
733 go !carry dec0 p = do
734 x <- lift (next p)
735 case x of Left r -> return (if B.null carry
736 then return r -- all bytestring input was consumed
737 else (do yield carry -- a potentially valid fragment remains
738 return r))
739
740 Right (chunk, p') -> case dec0 chunk of
741 PE.Some text carry2 dec -> do yield text
742 go carry2 dec p'
743 PE.Other text bs -> do yield text
744 return (do yield bs -- an invalid blob remains
745 p')
746 {-# INLINABLE decodeUtf8 #-}
747
748
749 -- | Splits a 'Producer' after the given number of characters
750 splitAt
751 :: (Monad m, Integral n)
752 => n
753 -> Lens' (Producer Text m r)
754 (Producer Text m (Producer Text m r))
755 splitAt n0 k p0 = fmap join (k (go n0 p0))
756 where
757 go 0 p = return p
758 go n p = do
759 x <- lift (next p)
760 case x of
761 Left r -> return (return r)
762 Right (txt, p') -> do
763 let len = fromIntegral (T.length txt)
764 if (len <= n)
765 then do
766 yield txt
767 go (n - len) p'
768 else do
769 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
770 yield prefix
771 return (yield suffix >> p')
772 {-# INLINABLE splitAt #-}
773
774
775 {-| Split a text stream in two, where the first text stream is the longest
776 consecutive group of text that satisfy the predicate
777 -}
778 span
779 :: (Monad m)
780 => (Char -> Bool)
781 -> Lens' (Producer Text m r)
782 (Producer Text m (Producer Text m r))
783 span predicate k p0 = fmap join (k (go p0))
784 where
785 go p = do
786 x <- lift (next p)
787 case x of
788 Left r -> return (return r)
789 Right (txt, p') -> do
790 let (prefix, suffix) = T.span predicate txt
791 if (T.null suffix)
792 then do
793 yield txt
794 go p'
795 else do
796 yield prefix
797 return (yield suffix >> p')
798 {-# INLINABLE span #-}
799
800 {-| Split a text stream in two, where the first text stream is the longest
801 consecutive group of characters that don't satisfy the predicate
802 -}
803 break
804 :: (Monad m)
805 => (Char -> Bool)
806 -> Lens' (Producer Text m r)
807 (Producer Text m (Producer Text m r))
808 break predicate = span (not . predicate)
809 {-# INLINABLE break #-}
810
811 {-| Improper lens that splits after the first group of equivalent Chars, as
812 defined by the given equivalence relation
813 -}
814 groupBy
815 :: (Monad m)
816 => (Char -> Char -> Bool)
817 -> Lens' (Producer Text m r)
818 (Producer Text m (Producer Text m r))
819 groupBy equals k p0 = fmap join (k ((go p0))) where
820 go p = do
821 x <- lift (next p)
822 case x of
823 Left r -> return (return r)
824 Right (txt, p') -> case T.uncons txt of
825 Nothing -> go p'
826 Just (c, _) -> (yield txt >> p') ^. span (equals c)
827 {-# INLINABLE groupBy #-}
828
829 -- | Improper lens that splits after the first succession of identical 'Char' s
830 group :: Monad m
831 => Lens' (Producer Text m r)
832 (Producer Text m (Producer Text m r))
833 group = groupBy (==)
834 {-# INLINABLE group #-}
835
836 {-| Improper lens that splits a 'Producer' after the first word
837
838 Unlike 'words', this does not drop leading whitespace
839 -}
840 word :: (Monad m)
841 => Lens' (Producer Text m r)
842 (Producer Text m (Producer Text m r))
843 word k p0 = fmap join (k (to p0))
844 where
845 to p = do
846 p' <- p^.span isSpace
847 p'^.break isSpace
848 {-# INLINABLE word #-}
849
850
851 line :: (Monad m)
852 => Lens' (Producer Text m r)
853 (Producer Text m (Producer Text m r))
854 line = break (== '\n')
855
856 {-# INLINABLE line #-}
857
858
859 -- | Intersperse a 'Char' in between the characters of stream of 'Text'
860 intersperse
861 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
862 intersperse c = go0
863 where
864 go0 p = do
865 x <- lift (next p)
866 case x of
867 Left r -> return r
868 Right (txt, p') -> do
869 yield (T.intersperse c txt)
870 go1 p'
871 go1 p = do
872 x <- lift (next p)
873 case x of
874 Left r -> return r
875 Right (txt, p') -> do
876 yield (T.singleton c)
877 yield (T.intersperse c txt)
878 go1 p'
879 {-# INLINABLE intersperse #-}
880
881
882
883 -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
884 packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
885 packChars = Data.Profunctor.dimap to (fmap from)
886 where
887 -- to :: Monad m => Producer Char m x -> Producer Text m x
888 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
889
890 step diffAs c = diffAs . (c:)
891
892 done diffAs = T.pack (diffAs [])
893
894 -- from :: Monad m => Producer Text m x -> Producer Char m x
895 from p = for p (each . T.unpack)
896 {-# INLINABLE packChars #-}
897
898
899 -- | Split a text stream into 'FreeT'-delimited text streams of fixed size
900 chunksOf
901 :: (Monad m, Integral n)
902 => n -> Lens' (Producer Text m r)
903 (FreeT (Producer Text m) m r)
904 chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
905 where
906 go p = do
907 x <- next p
908 return $ case x of
909 Left r -> Pure r
910 Right (txt, p') -> Free $ do
911 p'' <- (yield txt >> p') ^. splitAt n
912 return $ FreeT (go p'')
913 {-# INLINABLE chunksOf #-}
914
915
916 {-| Split a text stream into sub-streams delimited by characters that satisfy the
917 predicate
918 -}
919 splitsWith
920 :: (Monad m)
921 => (Char -> Bool)
922 -> Producer Text m r
923 -> FreeT (Producer Text m) m r
924 splitsWith predicate p0 = FreeT (go0 p0)
925 where
926 go0 p = do
927 x <- next p
928 case x of
929 Left r -> return (Pure r)
930 Right (txt, p') ->
931 if (T.null txt)
932 then go0 p'
933 else return $ Free $ do
934 p'' <- (yield txt >> p') ^. span (not . predicate)
935 return $ FreeT (go1 p'')
936 go1 p = do
937 x <- nextChar p
938 return $ case x of
939 Left r -> Pure r
940 Right (_, p') -> Free $ do
941 p'' <- p' ^. span (not . predicate)
942 return $ FreeT (go1 p'')
943 {-# INLINABLE splitsWith #-}
944
945 -- | Split a text stream using the given 'Char' as the delimiter
946 splits :: (Monad m)
947 => Char
948 -> Lens' (Producer Text m r)
949 (FreeT (Producer Text m) m r)
950 splits c k p =
951 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
952 {-# INLINABLE splits #-}
953
954 {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
955 given equivalence relation
956 -}
957 groupsBy
958 :: Monad m
959 => (Char -> Char -> Bool)
960 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
961 groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
962 go p = do x <- next p
963 case x of Left r -> return (Pure r)
964 Right (bs, p') -> case T.uncons bs of
965 Nothing -> go p'
966 Just (c, _) -> do return $ Free $ do
967 p'' <- (yield bs >> p')^.span (equals c)
968 return $ FreeT (go p'')
969 {-# INLINABLE groupsBy #-}
970
971
972 -- | Like 'groupsBy', where the equality predicate is ('==')
973 groups
974 :: Monad m
975 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
976 groups = groupsBy (==)
977 {-# INLINABLE groups #-}
978
979
980
981 {-| Split a text stream into 'FreeT'-delimited lines
982 -}
983 lines
984 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
985 lines = Data.Profunctor.dimap _lines (fmap _unlines)
986 where
987 _lines p0 = FreeT (go0 p0)
988 where
989 go0 p = do
990 x <- next p
991 case x of
992 Left r -> return (Pure r)
993 Right (txt, p') ->
994 if (T.null txt)
995 then go0 p'
996 else return $ Free $ go1 (yield txt >> p')
997 go1 p = do
998 p' <- p ^. break ('\n' ==)
999 return $ FreeT $ do
1000 x <- nextChar p'
1001 case x of
1002 Left r -> return $ Pure r
1003 Right (_, p'') -> go0 p''
1004 -- _unlines
1005 -- :: Monad m
1006 -- => FreeT (Producer Text m) m x -> Producer Text m x
1007 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1008
1009
1010 {-# INLINABLE lines #-}
1011
1012
1013
1014 -- | Split a text stream into 'FreeT'-delimited words
1015 words
1016 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
1017 words = Data.Profunctor.dimap go (fmap _unwords)
1018 where
1019 go p = FreeT $ do
1020 x <- next (p >-> dropWhile isSpace)
1021 return $ case x of
1022 Left r -> Pure r
1023 Right (bs, p') -> Free $ do
1024 p'' <- (yield bs >> p') ^. break isSpace
1025 return (go p'')
1026 _unwords = PG.intercalates (yield $ T.singleton ' ')
1027
1028 {-# INLINABLE words #-}
1029
1030
1031 {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1032 interspersing a text stream in between them
1033 -}
1034 intercalate
1035 :: (Monad m)
1036 => Producer Text m ()
1037 -> FreeT (Producer Text m) m r
1038 -> Producer Text m r
1039 intercalate p0 = go0
1040 where
1041 go0 f = do
1042 x <- lift (runFreeT f)
1043 case x of
1044 Pure r -> return r
1045 Free p -> do
1046 f' <- p
1047 go1 f'
1048 go1 f = do
1049 x <- lift (runFreeT f)
1050 case x of
1051 Pure r -> return r
1052 Free p -> do
1053 p0
1054 f' <- p
1055 go1 f'
1056 {-# INLINABLE intercalate #-}
1057
1058 {-| Join 'FreeT'-delimited lines into a text stream
1059 -}
1060 unlines
1061 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1062 unlines = go
1063 where
1064 go f = do
1065 x <- lift (runFreeT f)
1066 case x of
1067 Pure r -> return r
1068 Free p -> do
1069 f' <- p
1070 yield $ T.singleton '\n'
1071 go f'
1072 {-# INLINABLE unlines #-}
1073
1074 {-| Join 'FreeT'-delimited words into a text stream
1075 -}
1076 unwords
1077 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1078 unwords = intercalate (yield $ T.singleton ' ')
1079 {-# INLINABLE unwords #-}
1080
1081 {- $parse
1082 The following parsing utilities are single-character analogs of the ones found
1083 @pipes-parse@.
1084 -}
1085
1086 {- $reexports
1087
1088 @Data.Text@ re-exports the 'Text' type.
1089
1090 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1091 -}
1092
1093 codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1094 codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1095 (k (decoder (dec B.empty) p0) ) where
1096 decoder :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1097 decoder !d p0 = case d of
1098 PE.Other txt bad -> do yield txt
1099 return (do yield bad
1100 p0)
1101 PE.Some txt extra dec -> do yield txt
1102 x <- lift (next p0)
1103 case x of Left r -> return (do yield extra
1104 return r)
1105 Right (chunk,p1) -> decoder (dec chunk) p1
1106
1107 -- decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
1108 -- (k (go B.empty PE.streamDecodeUtf8 p0)) where
1109
1110 encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1111 encodeAscii = go where
1112 go p = do echunk <- lift (next p)
1113 case echunk of
1114 Left r -> return (return r)
1115 Right (chunk, p') ->
1116 if T.null chunk
1117 then go p'
1118 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1119 in do yield (B8.pack (T.unpack safe))
1120 if T.null unsafe
1121 then go p'
1122 else return $ do yield unsafe
1123 p'
1124
1125 encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1126 encodeIso8859_1 = go where
1127 go p = do etxt <- lift (next p)
1128 case etxt of
1129 Left r -> return (return r)
1130 Right (txt, p') ->
1131 if T.null txt
1132 then go p'
1133 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1134 in do yield (B8.pack (T.unpack safe))
1135 if T.null unsafe
1136 then go p'
1137 else return $ do yield unsafe
1138 p'
1139
1140 decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1141 decodeAscii = go where
1142 go p = do echunk <- lift (next p)
1143 case echunk of
1144 Left r -> return (return r)
1145 Right (chunk, p') ->
1146 if B.null chunk
1147 then go p'
1148 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1149 in do yield (T.pack (B8.unpack safe))
1150 if B.null unsafe
1151 then go p'
1152 else return $ do yield unsafe
1153 p'
1154
1155
1156 decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1157 decodeIso8859_1 = go where
1158 go p = do echunk <- lift (next p)
1159 case echunk of
1160 Left r -> return (return r)
1161 Right (chunk, p') ->
1162 if B.null chunk
1163 then go p'
1164 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1165 in do yield (T.pack (B8.unpack safe))
1166 if B.null unsafe
1167 then go p'
1168 else return $ do yield unsafe
1169 p'
1170
1171
1172
1173 {-
1174 ascii :: Codec
1175 ascii = Codec name enc (toDecoding dec) where
1176 name = T.pack "ASCII"
1177 enc text = (bytes, extra) where
1178 (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
1179 bytes = B8.pack (T.unpack safe)
1180 extra = if T.null unsafe
1181 then Nothing
1182 else Just (EncodeException ascii (T.head unsafe), unsafe)
1183
1184 dec bytes = (text, extra) where
1185 (safe, unsafe) = B.span (<= 0x7F) bytes
1186 text = T.pack (B8.unpack safe)
1187 extra = if B.null unsafe
1188 then Right B.empty
1189 else Left (DecodeException ascii (B.head unsafe), unsafe)
1190
1191 iso8859_1 :: Codec
1192 iso8859_1 = Codec name enc (toDecoding dec) where
1193 name = T.pack "ISO-8859-1"
1194 enc text = (bytes, extra) where
1195 (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
1196 bytes = B8.pack (T.unpack safe)
1197 extra = if T.null unsafe
1198 then Nothing
1199 else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
1200
1201 dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
1202 -}
1203