diff options
Diffstat (limited to 'Pipes')
-rw-r--r-- | Pipes/Text.hs | 856 | ||||
-rw-r--r-- | Pipes/Text/Parse.hs | 139 |
2 files changed, 995 insertions, 0 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs new file mode 100644 index 0000000..b0d90f0 --- /dev/null +++ b/Pipes/Text.hs | |||
@@ -0,0 +1,856 @@ | |||
1 | {-# LANGUAGE RankNTypes, TypeFamilies, CPP #-} | ||
2 | |||
3 | {-| This module provides @pipes@ utilities for \"text streams\", which are | ||
4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but | ||
5 | can interact lazy 'Text's and 'IO.Handle's. | ||
6 | |||
7 | To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For | ||
8 | example, the following program copies a document from one file to another: | ||
9 | |||
10 | > import Pipes | ||
11 | > import qualified Data.Text.Pipes as Text | ||
12 | > import System.IO | ||
13 | > | ||
14 | > main = | ||
15 | > withFile "inFile.txt" ReadMode $ \hIn -> | ||
16 | > withFile "outFile.txt" WriteMode $ \hOut -> | ||
17 | > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut | ||
18 | |||
19 | To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe): | ||
20 | |||
21 | > import Pipes | ||
22 | > import qualified Data.Text.Pipes as Text | ||
23 | > import Pipes.Safe | ||
24 | > | ||
25 | > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt" | ||
26 | |||
27 | You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin' | ||
28 | and 'stdout' proxies, as with the following \"echo\" program: | ||
29 | |||
30 | > main = runEffect $ Text.stdin >-> Text.stdout | ||
31 | |||
32 | You can also translate pure lazy 'TL.Text's to and from proxies: | ||
33 | |||
34 | > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout | ||
35 | |||
36 | In addition, this module provides many functions equivalent to lazy | ||
37 | 'Text' functions so that you can transform or fold text streams. For | ||
38 | example, to stream only the first three lines of 'stdin' to 'stdout' you | ||
39 | might write: | ||
40 | |||
41 | > import Pipes | ||
42 | > import qualified Pipes.Text as Text | ||
43 | > import qualified Pipes.Parse as Parse | ||
44 | > | ||
45 | > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout | ||
46 | > where | ||
47 | > takeLines n = Text.unlines . Parse.takeFree n . Text.lines | ||
48 | |||
49 | The above program will never bring more than one chunk of text (~ 32 KB) into | ||
50 | memory, no matter how long the lines are. | ||
51 | |||
52 | Note that functions in this library are designed to operate on streams that | ||
53 | are insensitive to text boundaries. This means that they may freely split | ||
54 | text into smaller texts and /discard empty texts/. However, they will | ||
55 | /never concatenate texts/ in order to provide strict upper bounds on memory | ||
56 | usage. | ||
57 | -} | ||
58 | |||
59 | module Pipes.Text ( | ||
60 | -- * Producers | ||
61 | fromLazy, | ||
62 | stdin, | ||
63 | fromHandle, | ||
64 | readFile, | ||
65 | stdinLn, | ||
66 | |||
67 | -- * Consumers | ||
68 | stdout, | ||
69 | stdoutLn, | ||
70 | toHandle, | ||
71 | writeFile, | ||
72 | |||
73 | -- * Pipes | ||
74 | map, | ||
75 | concatMap, | ||
76 | take, | ||
77 | drop, | ||
78 | takeWhile, | ||
79 | dropWhile, | ||
80 | filter, | ||
81 | scan, | ||
82 | encodeUtf8, | ||
83 | pack, | ||
84 | unpack, | ||
85 | stripStart, | ||
86 | |||
87 | -- * Folds | ||
88 | toLazy, | ||
89 | toLazyM, | ||
90 | fold, | ||
91 | head, | ||
92 | last, | ||
93 | null, | ||
94 | length, | ||
95 | any, | ||
96 | all, | ||
97 | maximum, | ||
98 | minimum, | ||
99 | find, | ||
100 | index, | ||
101 | -- elemIndex, | ||
102 | -- findIndex, | ||
103 | count, | ||
104 | |||
105 | -- * Splitters | ||
106 | splitAt, | ||
107 | chunksOf, | ||
108 | span, | ||
109 | break, | ||
110 | splitWith, | ||
111 | split, | ||
112 | groupBy, | ||
113 | group, | ||
114 | lines, | ||
115 | words, | ||
116 | #if MIN_VERSION_text(0,11,4) | ||
117 | decodeUtf8, | ||
118 | #endif | ||
119 | -- * Transformations | ||
120 | intersperse, | ||
121 | |||
122 | -- * Joiners | ||
123 | intercalate, | ||
124 | unlines, | ||
125 | unwords, | ||
126 | |||
127 | -- * Character Parsers | ||
128 | -- $parse | ||
129 | nextChar, | ||
130 | drawChar, | ||
131 | unDrawChar, | ||
132 | peekChar, | ||
133 | isEndOfChars, | ||
134 | |||
135 | -- * Re-exports | ||
136 | -- $reexports | ||
137 | module Data.Text, | ||
138 | module Pipes.Parse | ||
139 | ) where | ||
140 | |||
141 | import Control.Exception (throwIO, try) | ||
142 | import Control.Monad (liftM, unless) | ||
143 | import Control.Monad.Trans.State.Strict (StateT) | ||
144 | import qualified Data.Text as T | ||
145 | import qualified Data.Text.IO as T | ||
146 | import qualified Data.Text.Encoding as TE | ||
147 | import Data.Text (Text) | ||
148 | import qualified Data.Text.Lazy as TL | ||
149 | import qualified Data.Text.Lazy.IO as TL | ||
150 | import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize) | ||
151 | import Data.ByteString.Unsafe (unsafeTake, unsafeDrop) | ||
152 | import Data.ByteString (ByteString) | ||
153 | import qualified Data.ByteString as B | ||
154 | import Data.Char (ord) | ||
155 | import Data.Functor.Identity (Identity) | ||
156 | import qualified Data.List as List | ||
157 | import Foreign.C.Error (Errno(Errno), ePIPE) | ||
158 | import qualified GHC.IO.Exception as G | ||
159 | import Pipes | ||
160 | import qualified Pipes.ByteString.Parse as PBP | ||
161 | import Data.Text.Pipes.Parse ( | ||
162 | nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) | ||
163 | import Pipes.Core (respond, Server') | ||
164 | import qualified Pipes.Parse as PP | ||
165 | import Pipes.Parse (input, concat, FreeT) | ||
166 | import qualified Pipes.Safe.Prelude as Safe | ||
167 | import qualified Pipes.Safe as Safe | ||
168 | import Pipes.Safe (MonadSafe(..), Base(..)) | ||
169 | import qualified Pipes.Prelude as P | ||
170 | import qualified System.IO as IO | ||
171 | import Data.Char (isSpace) | ||
172 | import Prelude hiding ( | ||
173 | all, | ||
174 | any, | ||
175 | break, | ||
176 | concat, | ||
177 | concatMap, | ||
178 | drop, | ||
179 | dropWhile, | ||
180 | elem, | ||
181 | filter, | ||
182 | head, | ||
183 | last, | ||
184 | lines, | ||
185 | length, | ||
186 | map, | ||
187 | maximum, | ||
188 | minimum, | ||
189 | notElem, | ||
190 | null, | ||
191 | readFile, | ||
192 | span, | ||
193 | splitAt, | ||
194 | take, | ||
195 | takeWhile, | ||
196 | unlines, | ||
197 | unwords, | ||
198 | words, | ||
199 | writeFile ) | ||
200 | |||
201 | -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's | ||
202 | fromLazy :: (Monad m) => TL.Text -> Producer' Text m () | ||
203 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) | ||
204 | {-# INLINABLE fromLazy #-} | ||
205 | |||
206 | -- | Stream bytes from 'stdin' | ||
207 | stdin :: MonadIO m => Producer' Text m () | ||
208 | stdin = fromHandle IO.stdin | ||
209 | {-# INLINABLE stdin #-} | ||
210 | |||
211 | {-| Convert a 'IO.Handle' into a text stream using a text size | ||
212 | determined by the good sense of the text library. | ||
213 | |||
214 | -} | ||
215 | |||
216 | fromHandle :: MonadIO m => IO.Handle -> Producer' Text m () | ||
217 | fromHandle h = go where | ||
218 | go = do txt <- liftIO (T.hGetChunk h) | ||
219 | unless (T.null txt) $ do yield txt | ||
220 | go | ||
221 | {-# INLINABLE fromHandle#-} | ||
222 | |||
223 | {-| Stream text from a file using Pipes.Safe | ||
224 | |||
225 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout | ||
226 | MAIN = PUTSTRLN "HELLO WORLD" | ||
227 | -} | ||
228 | |||
229 | readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m () | ||
230 | readFile file = Safe.withFile file IO.ReadMode fromHandle | ||
231 | {-# INLINABLE readFile #-} | ||
232 | |||
233 | {-| Stream lines of text from stdin (for testing in ghci etc.) | ||
234 | |||
235 | >>> let safely = runSafeT . runEffect | ||
236 | >>> safely $ for Text.stdinLn (lift . lift . print . T.length) | ||
237 | hello | ||
238 | 5 | ||
239 | world | ||
240 | 5 | ||
241 | |||
242 | -} | ||
243 | stdinLn :: MonadIO m => Producer' Text m () | ||
244 | stdinLn = go where | ||
245 | go = do | ||
246 | eof <- liftIO (IO.hIsEOF IO.stdin) | ||
247 | unless eof $ do | ||
248 | txt <- liftIO (T.hGetLine IO.stdin) | ||
249 | yield txt | ||
250 | go | ||
251 | |||
252 | |||
253 | {-| Stream text to 'stdout' | ||
254 | |||
255 | Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe. | ||
256 | |||
257 | Note: For best performance, use @(for source (liftIO . putStr))@ instead of | ||
258 | @(source >-> stdout)@ in suitable cases. | ||
259 | -} | ||
260 | stdout :: MonadIO m => Consumer' Text m () | ||
261 | stdout = go | ||
262 | where | ||
263 | go = do | ||
264 | txt <- await | ||
265 | x <- liftIO $ try (T.putStr txt) | ||
266 | case x of | ||
267 | Left (G.IOError { G.ioe_type = G.ResourceVanished | ||
268 | , G.ioe_errno = Just ioe }) | ||
269 | | Errno ioe == ePIPE | ||
270 | -> return () | ||
271 | Left e -> liftIO (throwIO e) | ||
272 | Right () -> go | ||
273 | {-# INLINABLE stdout #-} | ||
274 | |||
275 | stdoutLn :: (MonadIO m) => Consumer' Text m () | ||
276 | stdoutLn = go | ||
277 | where | ||
278 | go = do | ||
279 | str <- await | ||
280 | x <- liftIO $ try (T.putStrLn str) | ||
281 | case x of | ||
282 | Left (G.IOError { G.ioe_type = G.ResourceVanished | ||
283 | , G.ioe_errno = Just ioe }) | ||
284 | | Errno ioe == ePIPE | ||
285 | -> return () | ||
286 | Left e -> liftIO (throwIO e) | ||
287 | Right () -> go | ||
288 | {-# INLINABLE stdoutLn #-} | ||
289 | |||
290 | {-| Convert a text stream into a 'Handle' | ||
291 | |||
292 | Note: again, for best performance, where possible use | ||
293 | @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@. | ||
294 | -} | ||
295 | toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r | ||
296 | toHandle h = for cat (liftIO . T.hPutStr h) | ||
297 | {-# INLINABLE toHandle #-} | ||
298 | |||
299 | -- | Stream text into a file. Uses @pipes-safe@. | ||
300 | writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m () | ||
301 | writeFile file = Safe.withFile file IO.WriteMode toHandle | ||
302 | |||
303 | -- | Apply a transformation to each 'Char' in the stream | ||
304 | map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r | ||
305 | map f = P.map (T.map f) | ||
306 | {-# INLINABLE map #-} | ||
307 | |||
308 | -- | Map a function over the characters of a text stream and concatenate the results | ||
309 | concatMap | ||
310 | :: (Monad m) => (Char -> Text) -> Pipe Text Text m r | ||
311 | concatMap f = P.map (T.concatMap f) | ||
312 | {-# INLINABLE concatMap #-} | ||
313 | |||
314 | |||
315 | -- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8 | ||
316 | -- encoding | ||
317 | encodeUtf8 :: Monad m => Pipe Text ByteString m r | ||
318 | encodeUtf8 = P.map TE.encodeUtf8 | ||
319 | {-# INLINEABLE encodeUtf8 #-} | ||
320 | |||
321 | --| Transform a Pipe of 'String's into one of 'Text' chunks | ||
322 | pack :: Monad m => Pipe String Text m r | ||
323 | pack = P.map T.pack | ||
324 | {-# INLINEABLE pack #-} | ||
325 | |||
326 | --| Transforma a Pipes of 'Text' chunks into one of 'String's | ||
327 | unpack :: Monad m => Pipe Text String m r | ||
328 | unpack = P.map T.unpack | ||
329 | {-# INLINEABLE unpack #-} | ||
330 | |||
331 | --| @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility, | ||
332 | -- here acting on a 'Text' pipe, rather as they would on a lazy text | ||
333 | toCaseFold :: Monad m => Pipe Text Text m () | ||
334 | toCaseFold = P.map T.toCaseFold | ||
335 | {-# INLINEABLE toCaseFold #-} | ||
336 | |||
337 | --| lowercase incoming 'Text' | ||
338 | toLower :: Monad m => Pipe Text Text m () | ||
339 | toLower = P.map T.toLower | ||
340 | {-# INLINEABLE toLower #-} | ||
341 | |||
342 | --| uppercase incoming 'Text' | ||
343 | toUpper :: Monad m => Pipe Text Text m () | ||
344 | toUpper = P.map T.toUpper | ||
345 | {-# INLINEABLE toUpper #-} | ||
346 | |||
347 | --| Remove leading white space from an incoming succession of 'Text's | ||
348 | stripStart :: Monad m => Pipe Text Text m r | ||
349 | stripStart = do | ||
350 | chunk <- await | ||
351 | let text = T.stripStart chunk | ||
352 | if T.null text | ||
353 | then stripStart | ||
354 | else cat | ||
355 | {-# INLINEABLE stripStart #-} | ||
356 | |||
357 | -- | @(take n)@ only allows @n@ individual characters to pass; | ||
358 | -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass. | ||
359 | take :: (Monad m, Integral a) => a -> Pipe Text Text m () | ||
360 | take n0 = go n0 where | ||
361 | go n | ||
362 | | n <= 0 = return () | ||
363 | | otherwise = do | ||
364 | txt <- await | ||
365 | let len = fromIntegral (T.length txt) | ||
366 | if (len > n) | ||
367 | then yield (T.take (fromIntegral n) txt) | ||
368 | else do | ||
369 | yield txt | ||
370 | go (n - len) | ||
371 | {-# INLINABLE take #-} | ||
372 | |||
373 | -- | @(drop n)@ drops the first @n@ characters | ||
374 | drop :: (Monad m, Integral a) => a -> Pipe Text Text m r | ||
375 | drop n0 = go n0 where | ||
376 | go n | ||
377 | | n <= 0 = cat | ||
378 | | otherwise = do | ||
379 | txt <- await | ||
380 | let len = fromIntegral (T.length txt) | ||
381 | if (len >= n) | ||
382 | then do | ||
383 | yield (T.drop (fromIntegral n) txt) | ||
384 | cat | ||
385 | else go (n - len) | ||
386 | {-# INLINABLE drop #-} | ||
387 | |||
388 | -- | Take characters until they fail the predicate | ||
389 | takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m () | ||
390 | takeWhile predicate = go | ||
391 | where | ||
392 | go = do | ||
393 | txt <- await | ||
394 | let (prefix, suffix) = T.span predicate txt | ||
395 | if (T.null suffix) | ||
396 | then do | ||
397 | yield txt | ||
398 | go | ||
399 | else yield prefix | ||
400 | {-# INLINABLE takeWhile #-} | ||
401 | |||
402 | -- | Drop characters until they fail the predicate | ||
403 | dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r | ||
404 | dropWhile predicate = go where | ||
405 | go = do | ||
406 | txt <- await | ||
407 | case T.findIndex (not . predicate) txt of | ||
408 | Nothing -> go | ||
409 | Just i -> do | ||
410 | yield (T.drop i txt) | ||
411 | cat | ||
412 | {-# INLINABLE dropWhile #-} | ||
413 | |||
414 | -- | Only allows 'Char's to pass if they satisfy the predicate | ||
415 | filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r | ||
416 | filter predicate = P.map (T.filter predicate) | ||
417 | {-# INLINABLE filter #-} | ||
418 | |||
419 | |||
420 | -- | Strict left scan over the characters | ||
421 | scan | ||
422 | :: (Monad m) | ||
423 | => (Char -> Char -> Char) -> Char -> Pipe Text Text m r | ||
424 | scan step begin = go begin | ||
425 | where | ||
426 | go c = do | ||
427 | txt <- await | ||
428 | let txt' = T.scanl step c txt | ||
429 | c' = T.last txt' | ||
430 | yield txt' | ||
431 | go c' | ||
432 | {-# INLINABLE scan #-} | ||
433 | |||
434 | {-| Fold a pure 'Producer' of strict 'Text's into a lazy | ||
435 | 'TL.Text' | ||
436 | -} | ||
437 | toLazy :: Producer Text Identity () -> TL.Text | ||
438 | toLazy = TL.fromChunks . P.toList | ||
439 | {-# INLINABLE toLazy #-} | ||
440 | |||
441 | {-| Fold an effectful 'Producer' of strict 'Text's into a lazy | ||
442 | 'TL.Text' | ||
443 | |||
444 | Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for | ||
445 | simple testing purposes. Idiomatic @pipes@ style consumes the chunks | ||
446 | immediately as they are generated instead of loading them all into memory. | ||
447 | -} | ||
448 | toLazyM :: (Monad m) => Producer Text m () -> m TL.Text | ||
449 | toLazyM = liftM TL.fromChunks . P.toListM | ||
450 | {-# INLINABLE toLazyM #-} | ||
451 | |||
452 | -- | Reduce the text stream using a strict left fold over characters | ||
453 | fold | ||
454 | :: Monad m | ||
455 | => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r | ||
456 | fold step begin done = P.fold (T.foldl' step) begin done | ||
457 | {-# INLINABLE fold #-} | ||
458 | |||
459 | -- | Retrieve the first 'Char' | ||
460 | head :: (Monad m) => Producer Text m () -> m (Maybe Char) | ||
461 | head = go | ||
462 | where | ||
463 | go p = do | ||
464 | x <- nextChar p | ||
465 | case x of | ||
466 | Left _ -> return Nothing | ||
467 | Right (c, _) -> return (Just c) | ||
468 | {-# INLINABLE head #-} | ||
469 | |||
470 | -- | Retrieve the last 'Char' | ||
471 | last :: (Monad m) => Producer Text m () -> m (Maybe Char) | ||
472 | last = go Nothing | ||
473 | where | ||
474 | go r p = do | ||
475 | x <- next p | ||
476 | case x of | ||
477 | Left () -> return r | ||
478 | Right (txt, p') -> | ||
479 | if (T.null txt) | ||
480 | then go r p' | ||
481 | else go (Just $ T.last txt) p' | ||
482 | {-# INLINABLE last #-} | ||
483 | |||
484 | -- | Determine if the stream is empty | ||
485 | null :: (Monad m) => Producer Text m () -> m Bool | ||
486 | null = P.all T.null | ||
487 | {-# INLINABLE null #-} | ||
488 | |||
489 | -- | Count the number of bytes | ||
490 | length :: (Monad m, Num n) => Producer Text m () -> m n | ||
491 | length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id | ||
492 | {-# INLINABLE length #-} | ||
493 | |||
494 | -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate | ||
495 | any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | ||
496 | any predicate = P.any (T.any predicate) | ||
497 | {-# INLINABLE any #-} | ||
498 | |||
499 | -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate | ||
500 | all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | ||
501 | all predicate = P.all (T.all predicate) | ||
502 | {-# INLINABLE all #-} | ||
503 | |||
504 | -- | Return the maximum 'Char' within a byte stream | ||
505 | maximum :: (Monad m) => Producer Text m () -> m (Maybe Char) | ||
506 | maximum = P.fold step Nothing id | ||
507 | where | ||
508 | step mc txt = | ||
509 | if (T.null txt) | ||
510 | then mc | ||
511 | else Just $ case mc of | ||
512 | Nothing -> T.maximum txt | ||
513 | Just c -> max c (T.maximum txt) | ||
514 | {-# INLINABLE maximum #-} | ||
515 | |||
516 | -- | Return the minimum 'Char' within a byte stream | ||
517 | minimum :: (Monad m) => Producer Text m () -> m (Maybe Char) | ||
518 | minimum = P.fold step Nothing id | ||
519 | where | ||
520 | step mc txt = | ||
521 | if (T.null txt) | ||
522 | then mc | ||
523 | else case mc of | ||
524 | Nothing -> Just (T.minimum txt) | ||
525 | Just c -> Just (min c (T.minimum txt)) | ||
526 | {-# INLINABLE minimum #-} | ||
527 | |||
528 | -- | Find the first element in the stream that matches the predicate | ||
529 | find | ||
530 | :: (Monad m) | ||
531 | => (Char -> Bool) -> Producer Text m () -> m (Maybe Char) | ||
532 | find predicate p = head (p >-> filter predicate) | ||
533 | {-# INLINABLE find #-} | ||
534 | |||
535 | -- | Index into a byte stream | ||
536 | index | ||
537 | :: (Monad m, Integral a) | ||
538 | => a-> Producer Text m () -> m (Maybe Char) | ||
539 | index n p = head (p >-> drop n) | ||
540 | {-# INLINABLE index #-} | ||
541 | |||
542 | -- | Find the index of an element that matches the given 'Char' | ||
543 | -- elemIndex | ||
544 | -- :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n) | ||
545 | -- elemIndex w8 = findIndex (w8 ==) | ||
546 | -- {-# INLINABLE elemIndex #-} | ||
547 | |||
548 | -- | Store the first index of an element that satisfies the predicate | ||
549 | -- findIndex | ||
550 | -- :: (Monad m, Num n) | ||
551 | -- => (Char -> Bool) -> Producer Text m () -> m (Maybe n) | ||
552 | -- findIndex predicate p = P.head (p >-> findIndices predicate) | ||
553 | -- {-# INLINABLE findIndex #-} | ||
554 | -- | ||
555 | -- | Store a tally of how many segments match the given 'Text' | ||
556 | count :: (Monad m, Num n) => Text -> Producer Text m () -> m n | ||
557 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | ||
558 | {-# INLINABLE count #-} | ||
559 | |||
560 | #if MIN_VERSION_text(0,11,4) | ||
561 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded | ||
562 | -- into a Pipe of Text | ||
563 | decodeUtf8 | ||
564 | :: Monad m | ||
565 | => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
566 | decodeUtf8 = go TE.streamDecodeUtf8 | ||
567 | where go dec p = do | ||
568 | x <- lift (next p) | ||
569 | case x of | ||
570 | Left r -> return (return r) | ||
571 | Right (chunk, p') -> do | ||
572 | let TE.Some text l dec' = dec chunk | ||
573 | if B.null l | ||
574 | then do | ||
575 | yield text | ||
576 | go dec' p' | ||
577 | else return $ do | ||
578 | yield l | ||
579 | p' | ||
580 | {-# INLINEABLE decodeUtf8 #-} | ||
581 | #endif | ||
582 | |||
583 | -- | Splits a 'Producer' after the given number of characters | ||
584 | splitAt | ||
585 | :: (Monad m, Integral n) | ||
586 | => n | ||
587 | -> Producer Text m r | ||
588 | -> Producer' Text m (Producer Text m r) | ||
589 | splitAt = go | ||
590 | where | ||
591 | go 0 p = return p | ||
592 | go n p = do | ||
593 | x <- lift (next p) | ||
594 | case x of | ||
595 | Left r -> return (return r) | ||
596 | Right (txt, p') -> do | ||
597 | let len = fromIntegral (T.length txt) | ||
598 | if (len <= n) | ||
599 | then do | ||
600 | yield txt | ||
601 | go (n - len) p' | ||
602 | else do | ||
603 | let (prefix, suffix) = T.splitAt (fromIntegral n) txt | ||
604 | yield prefix | ||
605 | return (yield suffix >> p') | ||
606 | {-# INLINABLE splitAt #-} | ||
607 | |||
608 | -- | Split a text stream into 'FreeT'-delimited text streams of fixed size | ||
609 | chunksOf | ||
610 | :: (Monad m, Integral n) | ||
611 | => n -> Producer Text m r -> FreeT (Producer Text m) m r | ||
612 | chunksOf n p0 = PP.FreeT (go p0) | ||
613 | where | ||
614 | go p = do | ||
615 | x <- next p | ||
616 | return $ case x of | ||
617 | Left r -> PP.Pure r | ||
618 | Right (txt, p') -> PP.Free $ do | ||
619 | p'' <- splitAt n (yield txt >> p') | ||
620 | return $ PP.FreeT (go p'') | ||
621 | {-# INLINABLE chunksOf #-} | ||
622 | |||
623 | {-| Split a text stream in two, where the first text stream is the longest | ||
624 | consecutive group of text that satisfy the predicate | ||
625 | -} | ||
626 | span | ||
627 | :: (Monad m) | ||
628 | => (Char -> Bool) | ||
629 | -> Producer Text m r | ||
630 | -> Producer' Text m (Producer Text m r) | ||
631 | span predicate = go | ||
632 | where | ||
633 | go p = do | ||
634 | x <- lift (next p) | ||
635 | case x of | ||
636 | Left r -> return (return r) | ||
637 | Right (txt, p') -> do | ||
638 | let (prefix, suffix) = T.span predicate txt | ||
639 | if (T.null suffix) | ||
640 | then do | ||
641 | yield txt | ||
642 | go p' | ||
643 | else do | ||
644 | yield prefix | ||
645 | return (yield suffix >> p') | ||
646 | {-# INLINABLE span #-} | ||
647 | |||
648 | {-| Split a byte stream in two, where the first byte stream is the longest | ||
649 | consecutive group of bytes that don't satisfy the predicate | ||
650 | -} | ||
651 | break | ||
652 | :: (Monad m) | ||
653 | => (Char -> Bool) | ||
654 | -> Producer Text m r | ||
655 | -> Producer Text m (Producer Text m r) | ||
656 | break predicate = span (not . predicate) | ||
657 | {-# INLINABLE break #-} | ||
658 | |||
659 | {-| Split a byte stream into sub-streams delimited by bytes that satisfy the | ||
660 | predicate | ||
661 | -} | ||
662 | splitWith | ||
663 | :: (Monad m) | ||
664 | => (Char -> Bool) | ||
665 | -> Producer Text m r | ||
666 | -> PP.FreeT (Producer Text m) m r | ||
667 | splitWith predicate p0 = PP.FreeT (go0 p0) | ||
668 | where | ||
669 | go0 p = do | ||
670 | x <- next p | ||
671 | case x of | ||
672 | Left r -> return (PP.Pure r) | ||
673 | Right (txt, p') -> | ||
674 | if (T.null txt) | ||
675 | then go0 p' | ||
676 | else return $ PP.Free $ do | ||
677 | p'' <- span (not . predicate) (yield txt >> p') | ||
678 | return $ PP.FreeT (go1 p'') | ||
679 | go1 p = do | ||
680 | x <- nextChar p | ||
681 | return $ case x of | ||
682 | Left r -> PP.Pure r | ||
683 | Right (_, p') -> PP.Free $ do | ||
684 | p'' <- span (not . predicate) p' | ||
685 | return $ PP.FreeT (go1 p'') | ||
686 | {-# INLINABLE splitWith #-} | ||
687 | |||
688 | -- | Split a text stream using the given 'Char' as the delimiter | ||
689 | split :: (Monad m) | ||
690 | => Char | ||
691 | -> Producer Text m r | ||
692 | -> FreeT (Producer Text m) m r | ||
693 | split c = splitWith (c ==) | ||
694 | {-# INLINABLE split #-} | ||
695 | |||
696 | {-| Group a text stream into 'FreeT'-delimited byte streams using the supplied | ||
697 | equality predicate | ||
698 | -} | ||
699 | groupBy | ||
700 | :: (Monad m) | ||
701 | => (Char -> Char -> Bool) | ||
702 | -> Producer Text m r | ||
703 | -> FreeT (Producer Text m) m r | ||
704 | groupBy equal p0 = PP.FreeT (go p0) | ||
705 | where | ||
706 | go p = do | ||
707 | x <- next p | ||
708 | case x of | ||
709 | Left r -> return (PP.Pure r) | ||
710 | Right (txt, p') -> case (T.uncons txt) of | ||
711 | Nothing -> go p' | ||
712 | Just (c, _) -> do | ||
713 | return $ PP.Free $ do | ||
714 | p'' <- span (equal c) (yield txt >> p') | ||
715 | return $ PP.FreeT (go p'') | ||
716 | {-# INLINABLE groupBy #-} | ||
717 | |||
718 | -- | Group a byte stream into 'FreeT'-delimited byte streams of identical bytes | ||
719 | group | ||
720 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | ||
721 | group = groupBy (==) | ||
722 | {-# INLINABLE group #-} | ||
723 | |||
724 | {-| Split a byte stream into 'FreeT'-delimited lines | ||
725 | |||
726 | Note: This function is purely for demonstration purposes since it assumes a | ||
727 | particular encoding. You should prefer the 'Data.Text.Text' equivalent of | ||
728 | this function from the upcoming @pipes-text@ library. | ||
729 | -} | ||
730 | lines | ||
731 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | ||
732 | lines p0 = PP.FreeT (go0 p0) | ||
733 | where | ||
734 | go0 p = do | ||
735 | x <- next p | ||
736 | case x of | ||
737 | Left r -> return (PP.Pure r) | ||
738 | Right (txt, p') -> | ||
739 | if (T.null txt) | ||
740 | then go0 p' | ||
741 | else return $ PP.Free $ go1 (yield txt >> p') | ||
742 | go1 p = do | ||
743 | p' <- break ('\n' ==) p | ||
744 | return $ PP.FreeT (go2 p') | ||
745 | go2 p = do | ||
746 | x <- nextChar p | ||
747 | return $ case x of | ||
748 | Left r -> PP.Pure r | ||
749 | Right (_, p') -> PP.Free (go1 p') | ||
750 | {-# INLINABLE lines #-} | ||
751 | |||
752 | |||
753 | |||
754 | -- | Split a text stream into 'FreeT'-delimited words | ||
755 | words | ||
756 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | ||
757 | words p0 = removeEmpty (splitWith isSpace p0) | ||
758 | where | ||
759 | removeEmpty f = PP.FreeT $ do | ||
760 | x <- PP.runFreeT f | ||
761 | case x of | ||
762 | PP.Pure r -> return (PP.Pure r) | ||
763 | PP.Free p -> do | ||
764 | y <- next p | ||
765 | case y of | ||
766 | Left f' -> PP.runFreeT (removeEmpty f') | ||
767 | Right (bs, p') -> return $ PP.Free $ do | ||
768 | yield bs | ||
769 | f' <- p' | ||
770 | return (removeEmpty f') | ||
771 | {-# INLINABLE words #-} | ||
772 | |||
773 | -- | Intersperse a 'Char' in between the bytes of the byte stream | ||
774 | intersperse | ||
775 | :: (Monad m) => Char -> Producer Text m r -> Producer Text m r | ||
776 | intersperse c = go0 | ||
777 | where | ||
778 | go0 p = do | ||
779 | x <- lift (next p) | ||
780 | case x of | ||
781 | Left r -> return r | ||
782 | Right (txt, p') -> do | ||
783 | yield (T.intersperse c txt) | ||
784 | go1 p' | ||
785 | go1 p = do | ||
786 | x <- lift (next p) | ||
787 | case x of | ||
788 | Left r -> return r | ||
789 | Right (txt, p') -> do | ||
790 | yield (T.singleton c) | ||
791 | yield (T.intersperse c txt) | ||
792 | go1 p' | ||
793 | {-# INLINABLE intersperse #-} | ||
794 | |||
795 | {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after | ||
796 | interspersing a text stream in between them | ||
797 | -} | ||
798 | intercalate | ||
799 | :: (Monad m) | ||
800 | => Producer Text m () | ||
801 | -> FreeT (Producer Text m) m r | ||
802 | -> Producer Text m r | ||
803 | intercalate p0 = go0 | ||
804 | where | ||
805 | go0 f = do | ||
806 | x <- lift (PP.runFreeT f) | ||
807 | case x of | ||
808 | PP.Pure r -> return r | ||
809 | PP.Free p -> do | ||
810 | f' <- p | ||
811 | go1 f' | ||
812 | go1 f = do | ||
813 | x <- lift (PP.runFreeT f) | ||
814 | case x of | ||
815 | PP.Pure r -> return r | ||
816 | PP.Free p -> do | ||
817 | p0 | ||
818 | f' <- p | ||
819 | go1 f' | ||
820 | {-# INLINABLE intercalate #-} | ||
821 | |||
822 | {-| Join 'FreeT'-delimited lines into a byte stream | ||
823 | -} | ||
824 | unlines | ||
825 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | ||
826 | unlines = go | ||
827 | where | ||
828 | go f = do | ||
829 | x <- lift (PP.runFreeT f) | ||
830 | case x of | ||
831 | PP.Pure r -> return r | ||
832 | PP.Free p -> do | ||
833 | f' <- p | ||
834 | yield $ T.singleton '\n' | ||
835 | go f' | ||
836 | {-# INLINABLE unlines #-} | ||
837 | |||
838 | {-| Join 'FreeT'-delimited words into a text stream | ||
839 | -} | ||
840 | unwords | ||
841 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | ||
842 | unwords = intercalate (yield $ T.pack " ") | ||
843 | {-# INLINABLE unwords #-} | ||
844 | |||
845 | {- $parse | ||
846 | The following parsing utilities are single-character analogs of the ones found | ||
847 | @pipes-parse@. | ||
848 | -} | ||
849 | |||
850 | {- $reexports | ||
851 | @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'. | ||
852 | |||
853 | @Data.Text@ re-exports the 'Text' type. | ||
854 | |||
855 | @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type). | ||
856 | -} \ No newline at end of file | ||
diff --git a/Pipes/Text/Parse.hs b/Pipes/Text/Parse.hs new file mode 100644 index 0000000..8c3a13e --- /dev/null +++ b/Pipes/Text/Parse.hs | |||
@@ -0,0 +1,139 @@ | |||
1 | -- | Parsing utilities for characterstrings, in the style of @pipes-parse@ | ||
2 | |||
3 | module Pipes.Text.Parse ( | ||
4 | -- * Parsers | ||
5 | nextChar, | ||
6 | drawChar, | ||
7 | unDrawChar, | ||
8 | peekChar, | ||
9 | isEndOfChars, | ||
10 | take, | ||
11 | takeWhile | ||
12 | ) where | ||
13 | |||
14 | import Control.Monad.Trans.State.Strict (StateT, modify) | ||
15 | import qualified Data.Text as T | ||
16 | import Data.Text (Text) | ||
17 | |||
18 | import Pipes | ||
19 | import qualified Pipes.Parse as PP | ||
20 | |||
21 | import Prelude hiding (take, takeWhile) | ||
22 | |||
23 | {-| Consume the first character from a 'Text' stream | ||
24 | |||
25 | 'next' either fails with a 'Left' if the 'Producer' has no more characters or | ||
26 | succeeds with a 'Right' providing the next byte and the remainder of the | ||
27 | 'Producer'. | ||
28 | -} | ||
29 | nextChar | ||
30 | :: (Monad m) | ||
31 | => Producer Text m r | ||
32 | -> m (Either r (Char, Producer Text m r)) | ||
33 | nextChar = go | ||
34 | where | ||
35 | go p = do | ||
36 | x <- next p | ||
37 | case x of | ||
38 | Left r -> return (Left r) | ||
39 | Right (txt, p') -> case (T.uncons txt) of | ||
40 | Nothing -> go p' | ||
41 | Just (c, txt') -> return (Right (c, yield txt' >> p')) | ||
42 | {-# INLINABLE nextChar #-} | ||
43 | |||
44 | {-| Draw one 'Char' from the underlying 'Producer', returning 'Left' if the | ||
45 | 'Producer' is empty | ||
46 | -} | ||
47 | drawChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char) | ||
48 | drawChar = do | ||
49 | x <- PP.draw | ||
50 | case x of | ||
51 | Left r -> return (Left r) | ||
52 | Right txt -> case (T.uncons txt) of | ||
53 | Nothing -> drawChar | ||
54 | Just (c, txt') -> do | ||
55 | PP.unDraw txt' | ||
56 | return (Right c) | ||
57 | {-# INLINABLE drawChar #-} | ||
58 | |||
59 | -- | Push back a 'Char' onto the underlying 'Producer' | ||
60 | unDrawChar :: (Monad m) => Char -> StateT (Producer Text m r) m () | ||
61 | unDrawChar c = modify (yield (T.singleton c) >>) | ||
62 | {-# INLINABLE unDrawChar #-} | ||
63 | |||
64 | {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to | ||
65 | push the 'Char' back | ||
66 | |||
67 | > peekChar = do | ||
68 | > x <- drawChar | ||
69 | > case x of | ||
70 | > Left _ -> return () | ||
71 | > Right c -> unDrawChar c | ||
72 | > return x | ||
73 | -} | ||
74 | peekChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char) | ||
75 | peekChar = do | ||
76 | x <- drawChar | ||
77 | case x of | ||
78 | Left _ -> return () | ||
79 | Right c -> unDrawChar c | ||
80 | return x | ||
81 | {-# INLINABLE peekChar #-} | ||
82 | |||
83 | {-| Check if the underlying 'Producer' has no more characters | ||
84 | |||
85 | Note that this will skip over empty 'Text' chunks, unlike | ||
86 | 'PP.isEndOfInput' from @pipes-parse@. | ||
87 | |||
88 | > isEndOfChars = liftM isLeft peekChar | ||
89 | -} | ||
90 | isEndOfChars :: (Monad m) => StateT (Producer Text m r) m Bool | ||
91 | isEndOfChars = do | ||
92 | x <- peekChar | ||
93 | return (case x of | ||
94 | Left _ -> True | ||
95 | Right _ -> False ) | ||
96 | {-# INLINABLE isEndOfChars #-} | ||
97 | |||
98 | {-| @(take n)@ only allows @n@ characters to pass | ||
99 | |||
100 | Unlike 'take', this 'PP.unDraw's unused characters | ||
101 | -} | ||
102 | take :: (Monad m, Integral a) => a -> Pipe Text Text (StateT (Producer Text m r) m) () | ||
103 | take n0 = go n0 where | ||
104 | go n | ||
105 | | n <= 0 = return () | ||
106 | | otherwise = do | ||
107 | txt <- await | ||
108 | let len = fromIntegral (T.length txt) | ||
109 | if (len > n) | ||
110 | then do | ||
111 | let n' = fromIntegral n | ||
112 | lift . PP.unDraw $ T.drop n' txt | ||
113 | yield $ T.take n' txt | ||
114 | else do | ||
115 | yield txt | ||
116 | go (n - len) | ||
117 | {-# INLINABLE take #-} | ||
118 | |||
119 | {-| Take characters until they fail the predicate | ||
120 | |||
121 | Unlike 'takeWhile', this 'PP.unDraw's unused characters | ||
122 | -} | ||
123 | takeWhile | ||
124 | :: (Monad m) | ||
125 | => (Char -> Bool) | ||
126 | -> Pipe Text Text (StateT (Producer Text m r) m) () | ||
127 | takeWhile predicate = go | ||
128 | where | ||
129 | go = do | ||
130 | txt <- await | ||
131 | let (prefix, suffix) = T.span predicate txt | ||
132 | if (T.null suffix) | ||
133 | then do | ||
134 | yield txt | ||
135 | go | ||
136 | else do | ||
137 | lift $ PP.unDraw suffix | ||
138 | yield prefix | ||
139 | {-# INLINABLE takeWhile #-} | ||