aboutsummaryrefslogtreecommitdiffhomepage
path: root/Pipes
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2013-10-22 18:40:23 -0400
committermichaelt <what_is_it_to_do_anything@yahoo.com>2013-10-22 18:40:23 -0400
commit7faef8bceff2440056da59920fb932b5b76f6541 (patch)
tree935ae450dad672aca4b0fda84f9993eb93ba547c /Pipes
parent31f41a5d197ca9f1a613f2dc684a9fa0467a0f2e (diff)
downloadtext-pipes-7faef8bceff2440056da59920fb932b5b76f6541.tar.gz
text-pipes-7faef8bceff2440056da59920fb932b5b76f6541.tar.zst
text-pipes-7faef8bceff2440056da59920fb932b5b76f6541.zip
new module names
Diffstat (limited to 'Pipes')
-rw-r--r--Pipes/Text.hs856
-rw-r--r--Pipes/Text/Parse.hs139
2 files changed, 995 insertions, 0 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
new file mode 100644
index 0000000..b0d90f0
--- /dev/null
+++ b/Pipes/Text.hs
@@ -0,0 +1,856 @@
1{-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
2
3{-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 can interact lazy 'Text's and 'IO.Handle's.
6
7 To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For
8 example, the following program copies a document from one file to another:
9
10> import Pipes
11> import qualified Data.Text.Pipes as Text
12> import System.IO
13>
14> main =
15> withFile "inFile.txt" ReadMode $ \hIn ->
16> withFile "outFile.txt" WriteMode $ \hOut ->
17> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
18
19To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
20
21> import Pipes
22> import qualified Data.Text.Pipes as Text
23> import Pipes.Safe
24>
25> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
26
27 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
28 and 'stdout' proxies, as with the following \"echo\" program:
29
30> main = runEffect $ Text.stdin >-> Text.stdout
31
32 You can also translate pure lazy 'TL.Text's to and from proxies:
33
34> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
35
36 In addition, this module provides many functions equivalent to lazy
37 'Text' functions so that you can transform or fold text streams. For
38 example, to stream only the first three lines of 'stdin' to 'stdout' you
39 might write:
40
41> import Pipes
42> import qualified Pipes.Text as Text
43> import qualified Pipes.Parse as Parse
44>
45> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
46> where
47> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
48
49 The above program will never bring more than one chunk of text (~ 32 KB) into
50 memory, no matter how long the lines are.
51
52 Note that functions in this library are designed to operate on streams that
53 are insensitive to text boundaries. This means that they may freely split
54 text into smaller texts and /discard empty texts/. However, they will
55 /never concatenate texts/ in order to provide strict upper bounds on memory
56 usage.
57-}
58
59module Pipes.Text (
60 -- * Producers
61 fromLazy,
62 stdin,
63 fromHandle,
64 readFile,
65 stdinLn,
66
67 -- * Consumers
68 stdout,
69 stdoutLn,
70 toHandle,
71 writeFile,
72
73 -- * Pipes
74 map,
75 concatMap,
76 take,
77 drop,
78 takeWhile,
79 dropWhile,
80 filter,
81 scan,
82 encodeUtf8,
83 pack,
84 unpack,
85 stripStart,
86
87 -- * Folds
88 toLazy,
89 toLazyM,
90 fold,
91 head,
92 last,
93 null,
94 length,
95 any,
96 all,
97 maximum,
98 minimum,
99 find,
100 index,
101-- elemIndex,
102-- findIndex,
103 count,
104
105 -- * Splitters
106 splitAt,
107 chunksOf,
108 span,
109 break,
110 splitWith,
111 split,
112 groupBy,
113 group,
114 lines,
115 words,
116#if MIN_VERSION_text(0,11,4)
117 decodeUtf8,
118#endif
119 -- * Transformations
120 intersperse,
121
122 -- * Joiners
123 intercalate,
124 unlines,
125 unwords,
126
127 -- * Character Parsers
128 -- $parse
129 nextChar,
130 drawChar,
131 unDrawChar,
132 peekChar,
133 isEndOfChars,
134
135 -- * Re-exports
136 -- $reexports
137 module Data.Text,
138 module Pipes.Parse
139 ) where
140
141import Control.Exception (throwIO, try)
142import Control.Monad (liftM, unless)
143import Control.Monad.Trans.State.Strict (StateT)
144import qualified Data.Text as T
145import qualified Data.Text.IO as T
146import qualified Data.Text.Encoding as TE
147import Data.Text (Text)
148import qualified Data.Text.Lazy as TL
149import qualified Data.Text.Lazy.IO as TL
150import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
151import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
152import Data.ByteString (ByteString)
153import qualified Data.ByteString as B
154import Data.Char (ord)
155import Data.Functor.Identity (Identity)
156import qualified Data.List as List
157import Foreign.C.Error (Errno(Errno), ePIPE)
158import qualified GHC.IO.Exception as G
159import Pipes
160import qualified Pipes.ByteString.Parse as PBP
161import Data.Text.Pipes.Parse (
162 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
163import Pipes.Core (respond, Server')
164import qualified Pipes.Parse as PP
165import Pipes.Parse (input, concat, FreeT)
166import qualified Pipes.Safe.Prelude as Safe
167import qualified Pipes.Safe as Safe
168import Pipes.Safe (MonadSafe(..), Base(..))
169import qualified Pipes.Prelude as P
170import qualified System.IO as IO
171import Data.Char (isSpace)
172import Prelude hiding (
173 all,
174 any,
175 break,
176 concat,
177 concatMap,
178 drop,
179 dropWhile,
180 elem,
181 filter,
182 head,
183 last,
184 lines,
185 length,
186 map,
187 maximum,
188 minimum,
189 notElem,
190 null,
191 readFile,
192 span,
193 splitAt,
194 take,
195 takeWhile,
196 unlines,
197 unwords,
198 words,
199 writeFile )
200
201-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
202fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
203fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
204{-# INLINABLE fromLazy #-}
205
206-- | Stream bytes from 'stdin'
207stdin :: MonadIO m => Producer' Text m ()
208stdin = fromHandle IO.stdin
209{-# INLINABLE stdin #-}
210
211{-| Convert a 'IO.Handle' into a text stream using a text size
212 determined by the good sense of the text library.
213
214-}
215
216fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
217fromHandle h = go where
218 go = do txt <- liftIO (T.hGetChunk h)
219 unless (T.null txt) $ do yield txt
220 go
221{-# INLINABLE fromHandle#-}
222
223{-| Stream text from a file using Pipes.Safe
224
225>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
226MAIN = PUTSTRLN "HELLO WORLD"
227-}
228
229readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
230readFile file = Safe.withFile file IO.ReadMode fromHandle
231{-# INLINABLE readFile #-}
232
233{-| Stream lines of text from stdin (for testing in ghci etc.)
234
235>>> let safely = runSafeT . runEffect
236>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
237hello
2385
239world
2405
241
242-}
243stdinLn :: MonadIO m => Producer' Text m ()
244stdinLn = go where
245 go = do
246 eof <- liftIO (IO.hIsEOF IO.stdin)
247 unless eof $ do
248 txt <- liftIO (T.hGetLine IO.stdin)
249 yield txt
250 go
251
252
253{-| Stream text to 'stdout'
254
255 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
256
257 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
258 @(source >-> stdout)@ in suitable cases.
259-}
260stdout :: MonadIO m => Consumer' Text m ()
261stdout = go
262 where
263 go = do
264 txt <- await
265 x <- liftIO $ try (T.putStr txt)
266 case x of
267 Left (G.IOError { G.ioe_type = G.ResourceVanished
268 , G.ioe_errno = Just ioe })
269 | Errno ioe == ePIPE
270 -> return ()
271 Left e -> liftIO (throwIO e)
272 Right () -> go
273{-# INLINABLE stdout #-}
274
275stdoutLn :: (MonadIO m) => Consumer' Text m ()
276stdoutLn = go
277 where
278 go = do
279 str <- await
280 x <- liftIO $ try (T.putStrLn str)
281 case x of
282 Left (G.IOError { G.ioe_type = G.ResourceVanished
283 , G.ioe_errno = Just ioe })
284 | Errno ioe == ePIPE
285 -> return ()
286 Left e -> liftIO (throwIO e)
287 Right () -> go
288{-# INLINABLE stdoutLn #-}
289
290{-| Convert a text stream into a 'Handle'
291
292 Note: again, for best performance, where possible use
293 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
294-}
295toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
296toHandle h = for cat (liftIO . T.hPutStr h)
297{-# INLINABLE toHandle #-}
298
299-- | Stream text into a file. Uses @pipes-safe@.
300writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
301writeFile file = Safe.withFile file IO.WriteMode toHandle
302
303-- | Apply a transformation to each 'Char' in the stream
304map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
305map f = P.map (T.map f)
306{-# INLINABLE map #-}
307
308-- | Map a function over the characters of a text stream and concatenate the results
309concatMap
310 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
311concatMap f = P.map (T.concatMap f)
312{-# INLINABLE concatMap #-}
313
314
315-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
316-- encoding
317encodeUtf8 :: Monad m => Pipe Text ByteString m r
318encodeUtf8 = P.map TE.encodeUtf8
319{-# INLINEABLE encodeUtf8 #-}
320
321--| Transform a Pipe of 'String's into one of 'Text' chunks
322pack :: Monad m => Pipe String Text m r
323pack = P.map T.pack
324{-# INLINEABLE pack #-}
325
326--| Transforma a Pipes of 'Text' chunks into one of 'String's
327unpack :: Monad m => Pipe Text String m r
328unpack = P.map T.unpack
329{-# INLINEABLE unpack #-}
330
331--| @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
332-- here acting on a 'Text' pipe, rather as they would on a lazy text
333toCaseFold :: Monad m => Pipe Text Text m ()
334toCaseFold = P.map T.toCaseFold
335{-# INLINEABLE toCaseFold #-}
336
337--| lowercase incoming 'Text'
338toLower :: Monad m => Pipe Text Text m ()
339toLower = P.map T.toLower
340{-# INLINEABLE toLower #-}
341
342--| uppercase incoming 'Text'
343toUpper :: Monad m => Pipe Text Text m ()
344toUpper = P.map T.toUpper
345{-# INLINEABLE toUpper #-}
346
347--| Remove leading white space from an incoming succession of 'Text's
348stripStart :: Monad m => Pipe Text Text m r
349stripStart = do
350 chunk <- await
351 let text = T.stripStart chunk
352 if T.null text
353 then stripStart
354 else cat
355{-# INLINEABLE stripStart #-}
356
357-- | @(take n)@ only allows @n@ individual characters to pass;
358-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
359take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
360take n0 = go n0 where
361 go n
362 | n <= 0 = return ()
363 | otherwise = do
364 txt <- await
365 let len = fromIntegral (T.length txt)
366 if (len > n)
367 then yield (T.take (fromIntegral n) txt)
368 else do
369 yield txt
370 go (n - len)
371{-# INLINABLE take #-}
372
373-- | @(drop n)@ drops the first @n@ characters
374drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
375drop n0 = go n0 where
376 go n
377 | n <= 0 = cat
378 | otherwise = do
379 txt <- await
380 let len = fromIntegral (T.length txt)
381 if (len >= n)
382 then do
383 yield (T.drop (fromIntegral n) txt)
384 cat
385 else go (n - len)
386{-# INLINABLE drop #-}
387
388-- | Take characters until they fail the predicate
389takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
390takeWhile predicate = go
391 where
392 go = do
393 txt <- await
394 let (prefix, suffix) = T.span predicate txt
395 if (T.null suffix)
396 then do
397 yield txt
398 go
399 else yield prefix
400{-# INLINABLE takeWhile #-}
401
402-- | Drop characters until they fail the predicate
403dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
404dropWhile predicate = go where
405 go = do
406 txt <- await
407 case T.findIndex (not . predicate) txt of
408 Nothing -> go
409 Just i -> do
410 yield (T.drop i txt)
411 cat
412{-# INLINABLE dropWhile #-}
413
414-- | Only allows 'Char's to pass if they satisfy the predicate
415filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
416filter predicate = P.map (T.filter predicate)
417{-# INLINABLE filter #-}
418
419
420-- | Strict left scan over the characters
421scan
422 :: (Monad m)
423 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
424scan step begin = go begin
425 where
426 go c = do
427 txt <- await
428 let txt' = T.scanl step c txt
429 c' = T.last txt'
430 yield txt'
431 go c'
432{-# INLINABLE scan #-}
433
434{-| Fold a pure 'Producer' of strict 'Text's into a lazy
435 'TL.Text'
436-}
437toLazy :: Producer Text Identity () -> TL.Text
438toLazy = TL.fromChunks . P.toList
439{-# INLINABLE toLazy #-}
440
441{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
442 'TL.Text'
443
444 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
445 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
446 immediately as they are generated instead of loading them all into memory.
447-}
448toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
449toLazyM = liftM TL.fromChunks . P.toListM
450{-# INLINABLE toLazyM #-}
451
452-- | Reduce the text stream using a strict left fold over characters
453fold
454 :: Monad m
455 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
456fold step begin done = P.fold (T.foldl' step) begin done
457{-# INLINABLE fold #-}
458
459-- | Retrieve the first 'Char'
460head :: (Monad m) => Producer Text m () -> m (Maybe Char)
461head = go
462 where
463 go p = do
464 x <- nextChar p
465 case x of
466 Left _ -> return Nothing
467 Right (c, _) -> return (Just c)
468{-# INLINABLE head #-}
469
470-- | Retrieve the last 'Char'
471last :: (Monad m) => Producer Text m () -> m (Maybe Char)
472last = go Nothing
473 where
474 go r p = do
475 x <- next p
476 case x of
477 Left () -> return r
478 Right (txt, p') ->
479 if (T.null txt)
480 then go r p'
481 else go (Just $ T.last txt) p'
482{-# INLINABLE last #-}
483
484-- | Determine if the stream is empty
485null :: (Monad m) => Producer Text m () -> m Bool
486null = P.all T.null
487{-# INLINABLE null #-}
488
489-- | Count the number of bytes
490length :: (Monad m, Num n) => Producer Text m () -> m n
491length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
492{-# INLINABLE length #-}
493
494-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
495any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
496any predicate = P.any (T.any predicate)
497{-# INLINABLE any #-}
498
499-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
500all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
501all predicate = P.all (T.all predicate)
502{-# INLINABLE all #-}
503
504-- | Return the maximum 'Char' within a byte stream
505maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
506maximum = P.fold step Nothing id
507 where
508 step mc txt =
509 if (T.null txt)
510 then mc
511 else Just $ case mc of
512 Nothing -> T.maximum txt
513 Just c -> max c (T.maximum txt)
514{-# INLINABLE maximum #-}
515
516-- | Return the minimum 'Char' within a byte stream
517minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
518minimum = P.fold step Nothing id
519 where
520 step mc txt =
521 if (T.null txt)
522 then mc
523 else case mc of
524 Nothing -> Just (T.minimum txt)
525 Just c -> Just (min c (T.minimum txt))
526{-# INLINABLE minimum #-}
527
528-- | Find the first element in the stream that matches the predicate
529find
530 :: (Monad m)
531 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
532find predicate p = head (p >-> filter predicate)
533{-# INLINABLE find #-}
534
535-- | Index into a byte stream
536index
537 :: (Monad m, Integral a)
538 => a-> Producer Text m () -> m (Maybe Char)
539index n p = head (p >-> drop n)
540{-# INLINABLE index #-}
541
542-- | Find the index of an element that matches the given 'Char'
543-- elemIndex
544-- :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n)
545-- elemIndex w8 = findIndex (w8 ==)
546-- {-# INLINABLE elemIndex #-}
547
548-- | Store the first index of an element that satisfies the predicate
549-- findIndex
550-- :: (Monad m, Num n)
551-- => (Char -> Bool) -> Producer Text m () -> m (Maybe n)
552-- findIndex predicate p = P.head (p >-> findIndices predicate)
553-- {-# INLINABLE findIndex #-}
554--
555-- | Store a tally of how many segments match the given 'Text'
556count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
557count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
558{-# INLINABLE count #-}
559
560#if MIN_VERSION_text(0,11,4)
561-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
562-- into a Pipe of Text
563decodeUtf8
564 :: Monad m
565 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
566decodeUtf8 = go TE.streamDecodeUtf8
567 where go dec p = do
568 x <- lift (next p)
569 case x of
570 Left r -> return (return r)
571 Right (chunk, p') -> do
572 let TE.Some text l dec' = dec chunk
573 if B.null l
574 then do
575 yield text
576 go dec' p'
577 else return $ do
578 yield l
579 p'
580{-# INLINEABLE decodeUtf8 #-}
581#endif
582
583-- | Splits a 'Producer' after the given number of characters
584splitAt
585 :: (Monad m, Integral n)
586 => n
587 -> Producer Text m r
588 -> Producer' Text m (Producer Text m r)
589splitAt = go
590 where
591 go 0 p = return p
592 go n p = do
593 x <- lift (next p)
594 case x of
595 Left r -> return (return r)
596 Right (txt, p') -> do
597 let len = fromIntegral (T.length txt)
598 if (len <= n)
599 then do
600 yield txt
601 go (n - len) p'
602 else do
603 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
604 yield prefix
605 return (yield suffix >> p')
606{-# INLINABLE splitAt #-}
607
608-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
609chunksOf
610 :: (Monad m, Integral n)
611 => n -> Producer Text m r -> FreeT (Producer Text m) m r
612chunksOf n p0 = PP.FreeT (go p0)
613 where
614 go p = do
615 x <- next p
616 return $ case x of
617 Left r -> PP.Pure r
618 Right (txt, p') -> PP.Free $ do
619 p'' <- splitAt n (yield txt >> p')
620 return $ PP.FreeT (go p'')
621{-# INLINABLE chunksOf #-}
622
623{-| Split a text stream in two, where the first text stream is the longest
624 consecutive group of text that satisfy the predicate
625-}
626span
627 :: (Monad m)
628 => (Char -> Bool)
629 -> Producer Text m r
630 -> Producer' Text m (Producer Text m r)
631span predicate = go
632 where
633 go p = do
634 x <- lift (next p)
635 case x of
636 Left r -> return (return r)
637 Right (txt, p') -> do
638 let (prefix, suffix) = T.span predicate txt
639 if (T.null suffix)
640 then do
641 yield txt
642 go p'
643 else do
644 yield prefix
645 return (yield suffix >> p')
646{-# INLINABLE span #-}
647
648{-| Split a byte stream in two, where the first byte stream is the longest
649 consecutive group of bytes that don't satisfy the predicate
650-}
651break
652 :: (Monad m)
653 => (Char -> Bool)
654 -> Producer Text m r
655 -> Producer Text m (Producer Text m r)
656break predicate = span (not . predicate)
657{-# INLINABLE break #-}
658
659{-| Split a byte stream into sub-streams delimited by bytes that satisfy the
660 predicate
661-}
662splitWith
663 :: (Monad m)
664 => (Char -> Bool)
665 -> Producer Text m r
666 -> PP.FreeT (Producer Text m) m r
667splitWith predicate p0 = PP.FreeT (go0 p0)
668 where
669 go0 p = do
670 x <- next p
671 case x of
672 Left r -> return (PP.Pure r)
673 Right (txt, p') ->
674 if (T.null txt)
675 then go0 p'
676 else return $ PP.Free $ do
677 p'' <- span (not . predicate) (yield txt >> p')
678 return $ PP.FreeT (go1 p'')
679 go1 p = do
680 x <- nextChar p
681 return $ case x of
682 Left r -> PP.Pure r
683 Right (_, p') -> PP.Free $ do
684 p'' <- span (not . predicate) p'
685 return $ PP.FreeT (go1 p'')
686{-# INLINABLE splitWith #-}
687
688-- | Split a text stream using the given 'Char' as the delimiter
689split :: (Monad m)
690 => Char
691 -> Producer Text m r
692 -> FreeT (Producer Text m) m r
693split c = splitWith (c ==)
694{-# INLINABLE split #-}
695
696{-| Group a text stream into 'FreeT'-delimited byte streams using the supplied
697 equality predicate
698-}
699groupBy
700 :: (Monad m)
701 => (Char -> Char -> Bool)
702 -> Producer Text m r
703 -> FreeT (Producer Text m) m r
704groupBy equal p0 = PP.FreeT (go p0)
705 where
706 go p = do
707 x <- next p
708 case x of
709 Left r -> return (PP.Pure r)
710 Right (txt, p') -> case (T.uncons txt) of
711 Nothing -> go p'
712 Just (c, _) -> do
713 return $ PP.Free $ do
714 p'' <- span (equal c) (yield txt >> p')
715 return $ PP.FreeT (go p'')
716{-# INLINABLE groupBy #-}
717
718-- | Group a byte stream into 'FreeT'-delimited byte streams of identical bytes
719group
720 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
721group = groupBy (==)
722{-# INLINABLE group #-}
723
724{-| Split a byte stream into 'FreeT'-delimited lines
725
726 Note: This function is purely for demonstration purposes since it assumes a
727 particular encoding. You should prefer the 'Data.Text.Text' equivalent of
728 this function from the upcoming @pipes-text@ library.
729-}
730lines
731 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
732lines p0 = PP.FreeT (go0 p0)
733 where
734 go0 p = do
735 x <- next p
736 case x of
737 Left r -> return (PP.Pure r)
738 Right (txt, p') ->
739 if (T.null txt)
740 then go0 p'
741 else return $ PP.Free $ go1 (yield txt >> p')
742 go1 p = do
743 p' <- break ('\n' ==) p
744 return $ PP.FreeT (go2 p')
745 go2 p = do
746 x <- nextChar p
747 return $ case x of
748 Left r -> PP.Pure r
749 Right (_, p') -> PP.Free (go1 p')
750{-# INLINABLE lines #-}
751
752
753
754-- | Split a text stream into 'FreeT'-delimited words
755words
756 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
757words p0 = removeEmpty (splitWith isSpace p0)
758 where
759 removeEmpty f = PP.FreeT $ do
760 x <- PP.runFreeT f
761 case x of
762 PP.Pure r -> return (PP.Pure r)
763 PP.Free p -> do
764 y <- next p
765 case y of
766 Left f' -> PP.runFreeT (removeEmpty f')
767 Right (bs, p') -> return $ PP.Free $ do
768 yield bs
769 f' <- p'
770 return (removeEmpty f')
771{-# INLINABLE words #-}
772
773-- | Intersperse a 'Char' in between the bytes of the byte stream
774intersperse
775 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
776intersperse c = go0
777 where
778 go0 p = do
779 x <- lift (next p)
780 case x of
781 Left r -> return r
782 Right (txt, p') -> do
783 yield (T.intersperse c txt)
784 go1 p'
785 go1 p = do
786 x <- lift (next p)
787 case x of
788 Left r -> return r
789 Right (txt, p') -> do
790 yield (T.singleton c)
791 yield (T.intersperse c txt)
792 go1 p'
793{-# INLINABLE intersperse #-}
794
795{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
796 interspersing a text stream in between them
797-}
798intercalate
799 :: (Monad m)
800 => Producer Text m ()
801 -> FreeT (Producer Text m) m r
802 -> Producer Text m r
803intercalate p0 = go0
804 where
805 go0 f = do
806 x <- lift (PP.runFreeT f)
807 case x of
808 PP.Pure r -> return r
809 PP.Free p -> do
810 f' <- p
811 go1 f'
812 go1 f = do
813 x <- lift (PP.runFreeT f)
814 case x of
815 PP.Pure r -> return r
816 PP.Free p -> do
817 p0
818 f' <- p
819 go1 f'
820{-# INLINABLE intercalate #-}
821
822{-| Join 'FreeT'-delimited lines into a byte stream
823-}
824unlines
825 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
826unlines = go
827 where
828 go f = do
829 x <- lift (PP.runFreeT f)
830 case x of
831 PP.Pure r -> return r
832 PP.Free p -> do
833 f' <- p
834 yield $ T.singleton '\n'
835 go f'
836{-# INLINABLE unlines #-}
837
838{-| Join 'FreeT'-delimited words into a text stream
839-}
840unwords
841 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
842unwords = intercalate (yield $ T.pack " ")
843{-# INLINABLE unwords #-}
844
845{- $parse
846 The following parsing utilities are single-character analogs of the ones found
847 @pipes-parse@.
848-}
849
850{- $reexports
851 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
852
853 @Data.Text@ re-exports the 'Text' type.
854
855 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
856-} \ No newline at end of file
diff --git a/Pipes/Text/Parse.hs b/Pipes/Text/Parse.hs
new file mode 100644
index 0000000..8c3a13e
--- /dev/null
+++ b/Pipes/Text/Parse.hs
@@ -0,0 +1,139 @@
1-- | Parsing utilities for characterstrings, in the style of @pipes-parse@
2
3module Pipes.Text.Parse (
4 -- * Parsers
5 nextChar,
6 drawChar,
7 unDrawChar,
8 peekChar,
9 isEndOfChars,
10 take,
11 takeWhile
12 ) where
13
14import Control.Monad.Trans.State.Strict (StateT, modify)
15import qualified Data.Text as T
16import Data.Text (Text)
17
18import Pipes
19import qualified Pipes.Parse as PP
20
21import Prelude hiding (take, takeWhile)
22
23{-| Consume the first character from a 'Text' stream
24
25 'next' either fails with a 'Left' if the 'Producer' has no more characters or
26 succeeds with a 'Right' providing the next byte and the remainder of the
27 'Producer'.
28-}
29nextChar
30 :: (Monad m)
31 => Producer Text m r
32 -> m (Either r (Char, Producer Text m r))
33nextChar = go
34 where
35 go p = do
36 x <- next p
37 case x of
38 Left r -> return (Left r)
39 Right (txt, p') -> case (T.uncons txt) of
40 Nothing -> go p'
41 Just (c, txt') -> return (Right (c, yield txt' >> p'))
42{-# INLINABLE nextChar #-}
43
44{-| Draw one 'Char' from the underlying 'Producer', returning 'Left' if the
45 'Producer' is empty
46-}
47drawChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char)
48drawChar = do
49 x <- PP.draw
50 case x of
51 Left r -> return (Left r)
52 Right txt -> case (T.uncons txt) of
53 Nothing -> drawChar
54 Just (c, txt') -> do
55 PP.unDraw txt'
56 return (Right c)
57{-# INLINABLE drawChar #-}
58
59-- | Push back a 'Char' onto the underlying 'Producer'
60unDrawChar :: (Monad m) => Char -> StateT (Producer Text m r) m ()
61unDrawChar c = modify (yield (T.singleton c) >>)
62{-# INLINABLE unDrawChar #-}
63
64{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
65 push the 'Char' back
66
67> peekChar = do
68> x <- drawChar
69> case x of
70> Left _ -> return ()
71> Right c -> unDrawChar c
72> return x
73-}
74peekChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char)
75peekChar = do
76 x <- drawChar
77 case x of
78 Left _ -> return ()
79 Right c -> unDrawChar c
80 return x
81{-# INLINABLE peekChar #-}
82
83{-| Check if the underlying 'Producer' has no more characters
84
85 Note that this will skip over empty 'Text' chunks, unlike
86 'PP.isEndOfInput' from @pipes-parse@.
87
88> isEndOfChars = liftM isLeft peekChar
89-}
90isEndOfChars :: (Monad m) => StateT (Producer Text m r) m Bool
91isEndOfChars = do
92 x <- peekChar
93 return (case x of
94 Left _ -> True
95 Right _ -> False )
96{-# INLINABLE isEndOfChars #-}
97
98{-| @(take n)@ only allows @n@ characters to pass
99
100 Unlike 'take', this 'PP.unDraw's unused characters
101-}
102take :: (Monad m, Integral a) => a -> Pipe Text Text (StateT (Producer Text m r) m) ()
103take n0 = go n0 where
104 go n
105 | n <= 0 = return ()
106 | otherwise = do
107 txt <- await
108 let len = fromIntegral (T.length txt)
109 if (len > n)
110 then do
111 let n' = fromIntegral n
112 lift . PP.unDraw $ T.drop n' txt
113 yield $ T.take n' txt
114 else do
115 yield txt
116 go (n - len)
117{-# INLINABLE take #-}
118
119{-| Take characters until they fail the predicate
120
121 Unlike 'takeWhile', this 'PP.unDraw's unused characters
122-}
123takeWhile
124 :: (Monad m)
125 => (Char -> Bool)
126 -> Pipe Text Text (StateT (Producer Text m r) m) ()
127takeWhile predicate = go
128 where
129 go = do
130 txt <- await
131 let (prefix, suffix) = T.span predicate txt
132 if (T.null suffix)
133 then do
134 yield txt
135 go
136 else do
137 lift $ PP.unDraw suffix
138 yield prefix
139{-# INLINABLE takeWhile #-}