diff options
author | michaelt <what_is_it_to_do_anything@yahoo.com> | 2013-10-22 18:40:23 -0400 |
---|---|---|
committer | michaelt <what_is_it_to_do_anything@yahoo.com> | 2013-10-22 18:40:23 -0400 |
commit | 7faef8bceff2440056da59920fb932b5b76f6541 (patch) | |
tree | 935ae450dad672aca4b0fda84f9993eb93ba547c /Data/Text | |
parent | 31f41a5d197ca9f1a613f2dc684a9fa0467a0f2e (diff) | |
download | text-pipes-7faef8bceff2440056da59920fb932b5b76f6541.tar.gz text-pipes-7faef8bceff2440056da59920fb932b5b76f6541.tar.zst text-pipes-7faef8bceff2440056da59920fb932b5b76f6541.zip |
new module names
Diffstat (limited to 'Data/Text')
-rw-r--r-- | Data/Text/Pipes.hs | 809 | ||||
-rw-r--r-- | Data/Text/Pipes/Parse.hs | 139 |
2 files changed, 0 insertions, 948 deletions
diff --git a/Data/Text/Pipes.hs b/Data/Text/Pipes.hs deleted file mode 100644 index 3063aff..0000000 --- a/Data/Text/Pipes.hs +++ /dev/null | |||
@@ -1,809 +0,0 @@ | |||
1 | {-# LANGUAGE RankNTypes, TypeFamilies, CPP #-} | ||
2 | |||
3 | {-| This module provides @pipes@ utilities for \"text streams\", which are | ||
4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but | ||
5 | can interact lazy 'Text's and 'IO.Handle's. | ||
6 | |||
7 | To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For | ||
8 | example, the following program copies a document from one file to another: | ||
9 | |||
10 | > import Pipes | ||
11 | > import qualified Data.Text.Pipes as Text | ||
12 | > import System.IO | ||
13 | > | ||
14 | > main = | ||
15 | > withFile "inFile.txt" ReadMode $ \hIn -> | ||
16 | > withFile "outFile.txt" WriteMode $ \hOut -> | ||
17 | > runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut | ||
18 | |||
19 | To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe): | ||
20 | |||
21 | > import Pipes | ||
22 | > import qualified Data.Text.Pipes as Text | ||
23 | > import Pipes.Safe | ||
24 | > | ||
25 | > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt" | ||
26 | |||
27 | You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin' | ||
28 | and 'stdout' proxies, as with the following \"echo\" program: | ||
29 | |||
30 | > main = runEffect $ Text.stdin >-> Text.stdout | ||
31 | |||
32 | You can also translate pure lazy 'TL.Text's to and from proxies: | ||
33 | |||
34 | > main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout | ||
35 | |||
36 | In addition, this module provides many functions equivalent to lazy | ||
37 | 'Text' functions so that you can transform or fold text streams. For | ||
38 | example, to stream only the first three lines of 'stdin' to 'stdout' you | ||
39 | might write: | ||
40 | |||
41 | > import Pipes | ||
42 | > import qualified Pipes.Text as Text | ||
43 | > import qualified Pipes.Parse as Parse | ||
44 | > | ||
45 | > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout | ||
46 | > where | ||
47 | > takeLines n = Text.unlines . Parse.takeFree n . Text.lines | ||
48 | |||
49 | The above program will never bring more than one chunk of text (~ 32 KB) into | ||
50 | memory, no matter how long the lines are. | ||
51 | |||
52 | Note that functions in this library are designed to operate on streams that | ||
53 | are insensitive to text boundaries. This means that they may freely split | ||
54 | text into smaller texts and /discard empty texts/. However, they will | ||
55 | /never concatenate texts/ in order to provide strict upper bounds on memory | ||
56 | usage. | ||
57 | -} | ||
58 | |||
59 | module Data.Text.Pipes ( | ||
60 | -- * Producers | ||
61 | fromLazy, | ||
62 | stdin, | ||
63 | fromHandle, | ||
64 | readFile, | ||
65 | stdinLn, | ||
66 | |||
67 | -- * Consumers | ||
68 | stdout, | ||
69 | stdoutLn, | ||
70 | toHandle, | ||
71 | writeFile, | ||
72 | |||
73 | -- * Pipes | ||
74 | map, | ||
75 | concatMap, | ||
76 | take, | ||
77 | drop, | ||
78 | takeWhile, | ||
79 | dropWhile, | ||
80 | filter, | ||
81 | scan, | ||
82 | |||
83 | -- * Folds | ||
84 | toLazy, | ||
85 | toLazyM, | ||
86 | fold, | ||
87 | head, | ||
88 | last, | ||
89 | null, | ||
90 | length, | ||
91 | any, | ||
92 | all, | ||
93 | maximum, | ||
94 | minimum, | ||
95 | find, | ||
96 | index, | ||
97 | -- elemIndex, | ||
98 | -- findIndex, | ||
99 | count, | ||
100 | |||
101 | -- * Splitters | ||
102 | splitAt, | ||
103 | chunksOf, | ||
104 | span, | ||
105 | break, | ||
106 | splitWith, | ||
107 | split, | ||
108 | groupBy, | ||
109 | group, | ||
110 | lines, | ||
111 | words, | ||
112 | #if MIN_VERSION_text(0,11,4) | ||
113 | decodeUtf8, | ||
114 | #endif | ||
115 | -- * Transformations | ||
116 | intersperse, | ||
117 | |||
118 | -- * Joiners | ||
119 | intercalate, | ||
120 | unlines, | ||
121 | unwords, | ||
122 | |||
123 | -- * Character Parsers | ||
124 | -- $parse | ||
125 | nextChar, | ||
126 | drawChar, | ||
127 | unDrawChar, | ||
128 | peekChar, | ||
129 | isEndOfChars, | ||
130 | |||
131 | -- * Re-exports | ||
132 | -- $reexports | ||
133 | module Data.Text, | ||
134 | module Pipes.Parse | ||
135 | ) where | ||
136 | |||
137 | import Control.Exception (throwIO, try) | ||
138 | import Control.Monad (liftM, unless) | ||
139 | import Control.Monad.Trans.State.Strict (StateT) | ||
140 | import qualified Data.Text as T | ||
141 | import qualified Data.Text.IO as T | ||
142 | import qualified Data.Text.Encoding as TE | ||
143 | import Data.Text (Text) | ||
144 | import qualified Data.Text.Lazy as TL | ||
145 | import qualified Data.Text.Lazy.IO as TL | ||
146 | import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize) | ||
147 | import Data.ByteString.Unsafe (unsafeTake, unsafeDrop) | ||
148 | import Data.ByteString (ByteString) | ||
149 | import qualified Data.ByteString as B | ||
150 | import Data.Char (ord) | ||
151 | import Data.Functor.Identity (Identity) | ||
152 | import qualified Data.List as List | ||
153 | import Foreign.C.Error (Errno(Errno), ePIPE) | ||
154 | import qualified GHC.IO.Exception as G | ||
155 | import Pipes | ||
156 | import qualified Pipes.ByteString.Parse as PBP | ||
157 | import Data.Text.Pipes.Parse ( | ||
158 | nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) | ||
159 | import Pipes.Core (respond, Server') | ||
160 | import qualified Pipes.Parse as PP | ||
161 | import Pipes.Parse (input, concat, FreeT) | ||
162 | import qualified Pipes.Safe.Prelude as Safe | ||
163 | import qualified Pipes.Safe as Safe | ||
164 | import Pipes.Safe (MonadSafe(..), Base(..)) | ||
165 | import qualified Pipes.Prelude as P | ||
166 | import qualified System.IO as IO | ||
167 | import Data.Char (isSpace) | ||
168 | import Prelude hiding ( | ||
169 | all, | ||
170 | any, | ||
171 | break, | ||
172 | concat, | ||
173 | concatMap, | ||
174 | drop, | ||
175 | dropWhile, | ||
176 | elem, | ||
177 | filter, | ||
178 | head, | ||
179 | last, | ||
180 | lines, | ||
181 | length, | ||
182 | map, | ||
183 | maximum, | ||
184 | minimum, | ||
185 | notElem, | ||
186 | null, | ||
187 | readFile, | ||
188 | span, | ||
189 | splitAt, | ||
190 | take, | ||
191 | takeWhile, | ||
192 | unlines, | ||
193 | unwords, | ||
194 | words, | ||
195 | writeFile ) | ||
196 | |||
197 | -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's | ||
198 | fromLazy :: (Monad m) => TL.Text -> Producer' Text m () | ||
199 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) | ||
200 | {-# INLINABLE fromLazy #-} | ||
201 | |||
202 | -- | Stream bytes from 'stdin' | ||
203 | stdin :: MonadIO m => Producer' Text m () | ||
204 | stdin = fromHandle IO.stdin | ||
205 | {-# INLINABLE stdin #-} | ||
206 | |||
207 | {-| Convert a 'IO.Handle' into a text stream using a text size | ||
208 | determined by the good sense of the text library. | ||
209 | |||
210 | -} | ||
211 | |||
212 | fromHandle :: MonadIO m => IO.Handle -> Producer' Text m () | ||
213 | fromHandle h = go where | ||
214 | go = do txt <- liftIO (T.hGetChunk h) | ||
215 | unless (T.null txt) $ do yield txt | ||
216 | go | ||
217 | {-# INLINABLE fromHandle#-} | ||
218 | |||
219 | {-| Stream text from a file using Pipes.Safe | ||
220 | |||
221 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout | ||
222 | MAIN = PUTSTRLN "HELLO WORLD" | ||
223 | -} | ||
224 | |||
225 | readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m () | ||
226 | readFile file = Safe.withFile file IO.ReadMode fromHandle | ||
227 | {-# INLINABLE readFile #-} | ||
228 | |||
229 | {-| Stream lines of text from stdin (for testing in ghci etc.) | ||
230 | |||
231 | >>> let safely = runSafeT . runEffect | ||
232 | >>> safely $ for Text.stdinLn (lift . lift . print . T.length) | ||
233 | hello | ||
234 | 5 | ||
235 | world | ||
236 | 5 | ||
237 | |||
238 | -} | ||
239 | stdinLn :: MonadIO m => Producer' Text m () | ||
240 | stdinLn = go where | ||
241 | go = do | ||
242 | eof <- liftIO (IO.hIsEOF IO.stdin) | ||
243 | unless eof $ do | ||
244 | txt <- liftIO (T.hGetLine IO.stdin) | ||
245 | yield txt | ||
246 | go | ||
247 | |||
248 | |||
249 | {-| Stream text to 'stdout' | ||
250 | |||
251 | Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe. | ||
252 | |||
253 | Note: For best performance, use @(for source (liftIO . putStr))@ instead of | ||
254 | @(source >-> stdout)@ in suitable cases. | ||
255 | -} | ||
256 | stdout :: MonadIO m => Consumer' Text m () | ||
257 | stdout = go | ||
258 | where | ||
259 | go = do | ||
260 | txt <- await | ||
261 | x <- liftIO $ try (T.putStr txt) | ||
262 | case x of | ||
263 | Left (G.IOError { G.ioe_type = G.ResourceVanished | ||
264 | , G.ioe_errno = Just ioe }) | ||
265 | | Errno ioe == ePIPE | ||
266 | -> return () | ||
267 | Left e -> liftIO (throwIO e) | ||
268 | Right () -> go | ||
269 | {-# INLINABLE stdout #-} | ||
270 | |||
271 | stdoutLn :: (MonadIO m) => Consumer' Text m () | ||
272 | stdoutLn = go | ||
273 | where | ||
274 | go = do | ||
275 | str <- await | ||
276 | x <- liftIO $ try (T.putStrLn str) | ||
277 | case x of | ||
278 | Left (G.IOError { G.ioe_type = G.ResourceVanished | ||
279 | , G.ioe_errno = Just ioe }) | ||
280 | | Errno ioe == ePIPE | ||
281 | -> return () | ||
282 | Left e -> liftIO (throwIO e) | ||
283 | Right () -> go | ||
284 | {-# INLINABLE stdoutLn #-} | ||
285 | |||
286 | {-| Convert a text stream into a 'Handle' | ||
287 | |||
288 | Note: again, for best performance, where possible use | ||
289 | @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@. | ||
290 | -} | ||
291 | toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r | ||
292 | toHandle h = for cat (liftIO . T.hPutStr h) | ||
293 | {-# INLINABLE toHandle #-} | ||
294 | |||
295 | -- | Stream text into a file. Uses @pipes-safe@. | ||
296 | writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m () | ||
297 | writeFile file = Safe.withFile file IO.WriteMode toHandle | ||
298 | |||
299 | -- | Apply a transformation to each 'Char' in the stream | ||
300 | map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r | ||
301 | map f = P.map (T.map f) | ||
302 | {-# INLINABLE map #-} | ||
303 | |||
304 | -- | Map a function over the characters of a text stream and concatenate the results | ||
305 | concatMap | ||
306 | :: (Monad m) => (Char -> Text) -> Pipe Text Text m r | ||
307 | concatMap f = P.map (T.concatMap f) | ||
308 | {-# INLINABLE concatMap #-} | ||
309 | |||
310 | -- | @(take n)@ only allows @n@ individual characters to pass; | ||
311 | -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass. | ||
312 | take :: (Monad m, Integral a) => a -> Pipe Text Text m () | ||
313 | take n0 = go n0 where | ||
314 | go n | ||
315 | | n <= 0 = return () | ||
316 | | otherwise = do | ||
317 | txt <- await | ||
318 | let len = fromIntegral (T.length txt) | ||
319 | if (len > n) | ||
320 | then yield (T.take (fromIntegral n) txt) | ||
321 | else do | ||
322 | yield txt | ||
323 | go (n - len) | ||
324 | {-# INLINABLE take #-} | ||
325 | |||
326 | -- | @(drop n)@ drops the first @n@ characters | ||
327 | drop :: (Monad m, Integral a) => a -> Pipe Text Text m r | ||
328 | drop n0 = go n0 where | ||
329 | go n | ||
330 | | n <= 0 = cat | ||
331 | | otherwise = do | ||
332 | txt <- await | ||
333 | let len = fromIntegral (T.length txt) | ||
334 | if (len >= n) | ||
335 | then do | ||
336 | yield (T.drop (fromIntegral n) txt) | ||
337 | cat | ||
338 | else go (n - len) | ||
339 | {-# INLINABLE drop #-} | ||
340 | |||
341 | -- | Take characters until they fail the predicate | ||
342 | takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m () | ||
343 | takeWhile predicate = go | ||
344 | where | ||
345 | go = do | ||
346 | txt <- await | ||
347 | let (prefix, suffix) = T.span predicate txt | ||
348 | if (T.null suffix) | ||
349 | then do | ||
350 | yield txt | ||
351 | go | ||
352 | else yield prefix | ||
353 | {-# INLINABLE takeWhile #-} | ||
354 | |||
355 | -- | Drop characters until they fail the predicate | ||
356 | dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r | ||
357 | dropWhile predicate = go where | ||
358 | go = do | ||
359 | txt <- await | ||
360 | case T.findIndex (not . predicate) txt of | ||
361 | Nothing -> go | ||
362 | Just i -> do | ||
363 | yield (T.drop i txt) | ||
364 | cat | ||
365 | {-# INLINABLE dropWhile #-} | ||
366 | |||
367 | -- | Only allows 'Char's to pass if they satisfy the predicate | ||
368 | filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r | ||
369 | filter predicate = P.map (T.filter predicate) | ||
370 | {-# INLINABLE filter #-} | ||
371 | |||
372 | |||
373 | -- | Strict left scan over the characters | ||
374 | scan | ||
375 | :: (Monad m) | ||
376 | => (Char -> Char -> Char) -> Char -> Pipe Text Text m r | ||
377 | scan step begin = go begin | ||
378 | where | ||
379 | go c = do | ||
380 | txt <- await | ||
381 | let txt' = T.scanl step c txt | ||
382 | c' = T.last txt' | ||
383 | yield txt' | ||
384 | go c' | ||
385 | {-# INLINABLE scan #-} | ||
386 | |||
387 | {-| Fold a pure 'Producer' of strict 'Text's into a lazy | ||
388 | 'TL.Text' | ||
389 | -} | ||
390 | toLazy :: Producer Text Identity () -> TL.Text | ||
391 | toLazy = TL.fromChunks . P.toList | ||
392 | {-# INLINABLE toLazy #-} | ||
393 | |||
394 | {-| Fold an effectful 'Producer' of strict 'Text's into a lazy | ||
395 | 'TL.Text' | ||
396 | |||
397 | Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for | ||
398 | simple testing purposes. Idiomatic @pipes@ style consumes the chunks | ||
399 | immediately as they are generated instead of loading them all into memory. | ||
400 | -} | ||
401 | toLazyM :: (Monad m) => Producer Text m () -> m TL.Text | ||
402 | toLazyM = liftM TL.fromChunks . P.toListM | ||
403 | {-# INLINABLE toLazyM #-} | ||
404 | |||
405 | -- | Reduce the text stream using a strict left fold over characters | ||
406 | fold | ||
407 | :: Monad m | ||
408 | => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r | ||
409 | fold step begin done = P.fold (T.foldl' step) begin done | ||
410 | {-# INLINABLE fold #-} | ||
411 | |||
412 | -- | Retrieve the first 'Char' | ||
413 | head :: (Monad m) => Producer Text m () -> m (Maybe Char) | ||
414 | head = go | ||
415 | where | ||
416 | go p = do | ||
417 | x <- nextChar p | ||
418 | case x of | ||
419 | Left _ -> return Nothing | ||
420 | Right (c, _) -> return (Just c) | ||
421 | {-# INLINABLE head #-} | ||
422 | |||
423 | -- | Retrieve the last 'Char' | ||
424 | last :: (Monad m) => Producer Text m () -> m (Maybe Char) | ||
425 | last = go Nothing | ||
426 | where | ||
427 | go r p = do | ||
428 | x <- next p | ||
429 | case x of | ||
430 | Left () -> return r | ||
431 | Right (txt, p') -> | ||
432 | if (T.null txt) | ||
433 | then go r p' | ||
434 | else go (Just $ T.last txt) p' | ||
435 | {-# INLINABLE last #-} | ||
436 | |||
437 | -- | Determine if the stream is empty | ||
438 | null :: (Monad m) => Producer Text m () -> m Bool | ||
439 | null = P.all T.null | ||
440 | {-# INLINABLE null #-} | ||
441 | |||
442 | -- | Count the number of bytes | ||
443 | length :: (Monad m, Num n) => Producer Text m () -> m n | ||
444 | length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id | ||
445 | {-# INLINABLE length #-} | ||
446 | |||
447 | -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate | ||
448 | any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | ||
449 | any predicate = P.any (T.any predicate) | ||
450 | {-# INLINABLE any #-} | ||
451 | |||
452 | -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate | ||
453 | all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | ||
454 | all predicate = P.all (T.all predicate) | ||
455 | {-# INLINABLE all #-} | ||
456 | |||
457 | -- | Return the maximum 'Char' within a byte stream | ||
458 | maximum :: (Monad m) => Producer Text m () -> m (Maybe Char) | ||
459 | maximum = P.fold step Nothing id | ||
460 | where | ||
461 | step mc txt = | ||
462 | if (T.null txt) | ||
463 | then mc | ||
464 | else Just $ case mc of | ||
465 | Nothing -> T.maximum txt | ||
466 | Just c -> max c (T.maximum txt) | ||
467 | {-# INLINABLE maximum #-} | ||
468 | |||
469 | -- | Return the minimum 'Char' within a byte stream | ||
470 | minimum :: (Monad m) => Producer Text m () -> m (Maybe Char) | ||
471 | minimum = P.fold step Nothing id | ||
472 | where | ||
473 | step mc txt = | ||
474 | if (T.null txt) | ||
475 | then mc | ||
476 | else case mc of | ||
477 | Nothing -> Just (T.minimum txt) | ||
478 | Just c -> Just (min c (T.minimum txt)) | ||
479 | {-# INLINABLE minimum #-} | ||
480 | |||
481 | -- | Find the first element in the stream that matches the predicate | ||
482 | find | ||
483 | :: (Monad m) | ||
484 | => (Char -> Bool) -> Producer Text m () -> m (Maybe Char) | ||
485 | find predicate p = head (p >-> filter predicate) | ||
486 | {-# INLINABLE find #-} | ||
487 | |||
488 | -- | Index into a byte stream | ||
489 | index | ||
490 | :: (Monad m, Integral a) | ||
491 | => a-> Producer Text m () -> m (Maybe Char) | ||
492 | index n p = head (p >-> drop n) | ||
493 | {-# INLINABLE index #-} | ||
494 | |||
495 | -- | Find the index of an element that matches the given 'Char' | ||
496 | -- elemIndex | ||
497 | -- :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n) | ||
498 | -- elemIndex w8 = findIndex (w8 ==) | ||
499 | -- {-# INLINABLE elemIndex #-} | ||
500 | |||
501 | -- | Store the first index of an element that satisfies the predicate | ||
502 | -- findIndex | ||
503 | -- :: (Monad m, Num n) | ||
504 | -- => (Char -> Bool) -> Producer Text m () -> m (Maybe n) | ||
505 | -- findIndex predicate p = P.head (p >-> findIndices predicate) | ||
506 | -- {-# INLINABLE findIndex #-} | ||
507 | -- | ||
508 | -- | Store a tally of how many segments match the given 'Text' | ||
509 | count :: (Monad m, Num n) => Text -> Producer Text m () -> m n | ||
510 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | ||
511 | {-# INLINABLE count #-} | ||
512 | |||
513 | #if MIN_VERSION_text(0,11,4) | ||
514 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded | ||
515 | -- into a Pipe of Text | ||
516 | decodeUtf8 | ||
517 | :: Monad m | ||
518 | => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
519 | decodeUtf8 = go TE.streamDecodeUtf8 | ||
520 | where go dec p = do | ||
521 | x <- lift (next p) | ||
522 | case x of | ||
523 | Left r -> return (return r) | ||
524 | Right (chunk, p') -> do | ||
525 | let TE.Some text l dec' = dec chunk | ||
526 | if B.null l | ||
527 | then do | ||
528 | yield text | ||
529 | go dec' p' | ||
530 | else return $ do | ||
531 | yield l | ||
532 | p' | ||
533 | {-# INLINEABLE decodeUtf8 #-} | ||
534 | #endif | ||
535 | |||
536 | -- | Splits a 'Producer' after the given number of characters | ||
537 | splitAt | ||
538 | :: (Monad m, Integral n) | ||
539 | => n | ||
540 | -> Producer Text m r | ||
541 | -> Producer' Text m (Producer Text m r) | ||
542 | splitAt = go | ||
543 | where | ||
544 | go 0 p = return p | ||
545 | go n p = do | ||
546 | x <- lift (next p) | ||
547 | case x of | ||
548 | Left r -> return (return r) | ||
549 | Right (txt, p') -> do | ||
550 | let len = fromIntegral (T.length txt) | ||
551 | if (len <= n) | ||
552 | then do | ||
553 | yield txt | ||
554 | go (n - len) p' | ||
555 | else do | ||
556 | let (prefix, suffix) = T.splitAt (fromIntegral n) txt | ||
557 | yield prefix | ||
558 | return (yield suffix >> p') | ||
559 | {-# INLINABLE splitAt #-} | ||
560 | |||
561 | -- | Split a text stream into 'FreeT'-delimited text streams of fixed size | ||
562 | chunksOf | ||
563 | :: (Monad m, Integral n) | ||
564 | => n -> Producer Text m r -> FreeT (Producer Text m) m r | ||
565 | chunksOf n p0 = PP.FreeT (go p0) | ||
566 | where | ||
567 | go p = do | ||
568 | x <- next p | ||
569 | return $ case x of | ||
570 | Left r -> PP.Pure r | ||
571 | Right (txt, p') -> PP.Free $ do | ||
572 | p'' <- splitAt n (yield txt >> p') | ||
573 | return $ PP.FreeT (go p'') | ||
574 | {-# INLINABLE chunksOf #-} | ||
575 | |||
576 | {-| Split a text stream in two, where the first text stream is the longest | ||
577 | consecutive group of text that satisfy the predicate | ||
578 | -} | ||
579 | span | ||
580 | :: (Monad m) | ||
581 | => (Char -> Bool) | ||
582 | -> Producer Text m r | ||
583 | -> Producer' Text m (Producer Text m r) | ||
584 | span predicate = go | ||
585 | where | ||
586 | go p = do | ||
587 | x <- lift (next p) | ||
588 | case x of | ||
589 | Left r -> return (return r) | ||
590 | Right (txt, p') -> do | ||
591 | let (prefix, suffix) = T.span predicate txt | ||
592 | if (T.null suffix) | ||
593 | then do | ||
594 | yield txt | ||
595 | go p' | ||
596 | else do | ||
597 | yield prefix | ||
598 | return (yield suffix >> p') | ||
599 | {-# INLINABLE span #-} | ||
600 | |||
601 | {-| Split a byte stream in two, where the first byte stream is the longest | ||
602 | consecutive group of bytes that don't satisfy the predicate | ||
603 | -} | ||
604 | break | ||
605 | :: (Monad m) | ||
606 | => (Char -> Bool) | ||
607 | -> Producer Text m r | ||
608 | -> Producer Text m (Producer Text m r) | ||
609 | break predicate = span (not . predicate) | ||
610 | {-# INLINABLE break #-} | ||
611 | |||
612 | {-| Split a byte stream into sub-streams delimited by bytes that satisfy the | ||
613 | predicate | ||
614 | -} | ||
615 | splitWith | ||
616 | :: (Monad m) | ||
617 | => (Char -> Bool) | ||
618 | -> Producer Text m r | ||
619 | -> PP.FreeT (Producer Text m) m r | ||
620 | splitWith predicate p0 = PP.FreeT (go0 p0) | ||
621 | where | ||
622 | go0 p = do | ||
623 | x <- next p | ||
624 | case x of | ||
625 | Left r -> return (PP.Pure r) | ||
626 | Right (txt, p') -> | ||
627 | if (T.null txt) | ||
628 | then go0 p' | ||
629 | else return $ PP.Free $ do | ||
630 | p'' <- span (not . predicate) (yield txt >> p') | ||
631 | return $ PP.FreeT (go1 p'') | ||
632 | go1 p = do | ||
633 | x <- nextChar p | ||
634 | return $ case x of | ||
635 | Left r -> PP.Pure r | ||
636 | Right (_, p') -> PP.Free $ do | ||
637 | p'' <- span (not . predicate) p' | ||
638 | return $ PP.FreeT (go1 p'') | ||
639 | {-# INLINABLE splitWith #-} | ||
640 | |||
641 | -- | Split a text stream using the given 'Char' as the delimiter | ||
642 | split :: (Monad m) | ||
643 | => Char | ||
644 | -> Producer Text m r | ||
645 | -> FreeT (Producer Text m) m r | ||
646 | split c = splitWith (c ==) | ||
647 | {-# INLINABLE split #-} | ||
648 | |||
649 | {-| Group a text stream into 'FreeT'-delimited byte streams using the supplied | ||
650 | equality predicate | ||
651 | -} | ||
652 | groupBy | ||
653 | :: (Monad m) | ||
654 | => (Char -> Char -> Bool) | ||
655 | -> Producer Text m r | ||
656 | -> FreeT (Producer Text m) m r | ||
657 | groupBy equal p0 = PP.FreeT (go p0) | ||
658 | where | ||
659 | go p = do | ||
660 | x <- next p | ||
661 | case x of | ||
662 | Left r -> return (PP.Pure r) | ||
663 | Right (txt, p') -> case (T.uncons txt) of | ||
664 | Nothing -> go p' | ||
665 | Just (c, _) -> do | ||
666 | return $ PP.Free $ do | ||
667 | p'' <- span (equal c) (yield txt >> p') | ||
668 | return $ PP.FreeT (go p'') | ||
669 | {-# INLINABLE groupBy #-} | ||
670 | |||
671 | -- | Group a byte stream into 'FreeT'-delimited byte streams of identical bytes | ||
672 | group | ||
673 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | ||
674 | group = groupBy (==) | ||
675 | {-# INLINABLE group #-} | ||
676 | |||
677 | {-| Split a byte stream into 'FreeT'-delimited lines | ||
678 | |||
679 | Note: This function is purely for demonstration purposes since it assumes a | ||
680 | particular encoding. You should prefer the 'Data.Text.Text' equivalent of | ||
681 | this function from the upcoming @pipes-text@ library. | ||
682 | -} | ||
683 | lines | ||
684 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | ||
685 | lines p0 = PP.FreeT (go0 p0) | ||
686 | where | ||
687 | go0 p = do | ||
688 | x <- next p | ||
689 | case x of | ||
690 | Left r -> return (PP.Pure r) | ||
691 | Right (txt, p') -> | ||
692 | if (T.null txt) | ||
693 | then go0 p' | ||
694 | else return $ PP.Free $ go1 (yield txt >> p') | ||
695 | go1 p = do | ||
696 | p' <- break ('\n' ==) p | ||
697 | return $ PP.FreeT (go2 p') | ||
698 | go2 p = do | ||
699 | x <- nextChar p | ||
700 | return $ case x of | ||
701 | Left r -> PP.Pure r | ||
702 | Right (_, p') -> PP.Free (go1 p') | ||
703 | {-# INLINABLE lines #-} | ||
704 | |||
705 | |||
706 | |||
707 | -- | Split a text stream into 'FreeT'-delimited words | ||
708 | words | ||
709 | :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | ||
710 | words p0 = removeEmpty (splitWith isSpace p0) | ||
711 | where | ||
712 | removeEmpty f = PP.FreeT $ do | ||
713 | x <- PP.runFreeT f | ||
714 | case x of | ||
715 | PP.Pure r -> return (PP.Pure r) | ||
716 | PP.Free p -> do | ||
717 | y <- next p | ||
718 | case y of | ||
719 | Left f' -> PP.runFreeT (removeEmpty f') | ||
720 | Right (bs, p') -> return $ PP.Free $ do | ||
721 | yield bs | ||
722 | f' <- p' | ||
723 | return (removeEmpty f') | ||
724 | {-# INLINABLE words #-} | ||
725 | |||
726 | -- | Intersperse a 'Char' in between the bytes of the byte stream | ||
727 | intersperse | ||
728 | :: (Monad m) => Char -> Producer Text m r -> Producer Text m r | ||
729 | intersperse c = go0 | ||
730 | where | ||
731 | go0 p = do | ||
732 | x <- lift (next p) | ||
733 | case x of | ||
734 | Left r -> return r | ||
735 | Right (txt, p') -> do | ||
736 | yield (T.intersperse c txt) | ||
737 | go1 p' | ||
738 | go1 p = do | ||
739 | x <- lift (next p) | ||
740 | case x of | ||
741 | Left r -> return r | ||
742 | Right (txt, p') -> do | ||
743 | yield (T.singleton c) | ||
744 | yield (T.intersperse c txt) | ||
745 | go1 p' | ||
746 | {-# INLINABLE intersperse #-} | ||
747 | |||
748 | {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after | ||
749 | interspersing a text stream in between them | ||
750 | -} | ||
751 | intercalate | ||
752 | :: (Monad m) | ||
753 | => Producer Text m () | ||
754 | -> FreeT (Producer Text m) m r | ||
755 | -> Producer Text m r | ||
756 | intercalate p0 = go0 | ||
757 | where | ||
758 | go0 f = do | ||
759 | x <- lift (PP.runFreeT f) | ||
760 | case x of | ||
761 | PP.Pure r -> return r | ||
762 | PP.Free p -> do | ||
763 | f' <- p | ||
764 | go1 f' | ||
765 | go1 f = do | ||
766 | x <- lift (PP.runFreeT f) | ||
767 | case x of | ||
768 | PP.Pure r -> return r | ||
769 | PP.Free p -> do | ||
770 | p0 | ||
771 | f' <- p | ||
772 | go1 f' | ||
773 | {-# INLINABLE intercalate #-} | ||
774 | |||
775 | {-| Join 'FreeT'-delimited lines into a byte stream | ||
776 | -} | ||
777 | unlines | ||
778 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | ||
779 | unlines = go | ||
780 | where | ||
781 | go f = do | ||
782 | x <- lift (PP.runFreeT f) | ||
783 | case x of | ||
784 | PP.Pure r -> return r | ||
785 | PP.Free p -> do | ||
786 | f' <- p | ||
787 | yield $ T.singleton '\n' | ||
788 | go f' | ||
789 | {-# INLINABLE unlines #-} | ||
790 | |||
791 | {-| Join 'FreeT'-delimited words into a text stream | ||
792 | -} | ||
793 | unwords | ||
794 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | ||
795 | unwords = intercalate (yield $ T.pack " ") | ||
796 | {-# INLINABLE unwords #-} | ||
797 | |||
798 | {- $parse | ||
799 | The following parsing utilities are single-character analogs of the ones found | ||
800 | @pipes-parse@. | ||
801 | -} | ||
802 | |||
803 | {- $reexports | ||
804 | @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'. | ||
805 | |||
806 | @Data.Text@ re-exports the 'Text' type. | ||
807 | |||
808 | @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type). | ||
809 | -} \ No newline at end of file | ||
diff --git a/Data/Text/Pipes/Parse.hs b/Data/Text/Pipes/Parse.hs deleted file mode 100644 index 675c7aa..0000000 --- a/Data/Text/Pipes/Parse.hs +++ /dev/null | |||
@@ -1,139 +0,0 @@ | |||
1 | -- | Parsing utilities for characterstrings, in the style of @pipes-parse@ | ||
2 | |||
3 | module Data.Text.Pipes.Parse ( | ||
4 | -- * Parsers | ||
5 | nextChar, | ||
6 | drawChar, | ||
7 | unDrawChar, | ||
8 | peekChar, | ||
9 | isEndOfChars, | ||
10 | take, | ||
11 | takeWhile | ||
12 | ) where | ||
13 | |||
14 | import Control.Monad.Trans.State.Strict (StateT, modify) | ||
15 | import qualified Data.Text as T | ||
16 | import Data.Text (Text) | ||
17 | |||
18 | import Pipes | ||
19 | import qualified Pipes.Parse as PP | ||
20 | |||
21 | import Prelude hiding (take, takeWhile) | ||
22 | |||
23 | {-| Consume the first character from a 'Text' stream | ||
24 | |||
25 | 'next' either fails with a 'Left' if the 'Producer' has no more characters or | ||
26 | succeeds with a 'Right' providing the next byte and the remainder of the | ||
27 | 'Producer'. | ||
28 | -} | ||
29 | nextChar | ||
30 | :: (Monad m) | ||
31 | => Producer Text m r | ||
32 | -> m (Either r (Char, Producer Text m r)) | ||
33 | nextChar = go | ||
34 | where | ||
35 | go p = do | ||
36 | x <- next p | ||
37 | case x of | ||
38 | Left r -> return (Left r) | ||
39 | Right (txt, p') -> case (T.uncons txt) of | ||
40 | Nothing -> go p' | ||
41 | Just (c, txt') -> return (Right (c, yield txt' >> p')) | ||
42 | {-# INLINABLE nextChar #-} | ||
43 | |||
44 | {-| Draw one 'Char' from the underlying 'Producer', returning 'Left' if the | ||
45 | 'Producer' is empty | ||
46 | -} | ||
47 | drawChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char) | ||
48 | drawChar = do | ||
49 | x <- PP.draw | ||
50 | case x of | ||
51 | Left r -> return (Left r) | ||
52 | Right txt -> case (T.uncons txt) of | ||
53 | Nothing -> drawChar | ||
54 | Just (c, txt') -> do | ||
55 | PP.unDraw txt' | ||
56 | return (Right c) | ||
57 | {-# INLINABLE drawChar #-} | ||
58 | |||
59 | -- | Push back a 'Char' onto the underlying 'Producer' | ||
60 | unDrawChar :: (Monad m) => Char -> StateT (Producer Text m r) m () | ||
61 | unDrawChar c = modify (yield (T.singleton c) >>) | ||
62 | {-# INLINABLE unDrawChar #-} | ||
63 | |||
64 | {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to | ||
65 | push the 'Char' back | ||
66 | |||
67 | > peekChar = do | ||
68 | > x <- drawChar | ||
69 | > case x of | ||
70 | > Left _ -> return () | ||
71 | > Right c -> unDrawChar c | ||
72 | > return x | ||
73 | -} | ||
74 | peekChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char) | ||
75 | peekChar = do | ||
76 | x <- drawChar | ||
77 | case x of | ||
78 | Left _ -> return () | ||
79 | Right c -> unDrawChar c | ||
80 | return x | ||
81 | {-# INLINABLE peekChar #-} | ||
82 | |||
83 | {-| Check if the underlying 'Producer' has no more characters | ||
84 | |||
85 | Note that this will skip over empty 'Text' chunks, unlike | ||
86 | 'PP.isEndOfInput' from @pipes-parse@. | ||
87 | |||
88 | > isEndOfChars = liftM isLeft peekChar | ||
89 | -} | ||
90 | isEndOfChars :: (Monad m) => StateT (Producer Text m r) m Bool | ||
91 | isEndOfChars = do | ||
92 | x <- peekChar | ||
93 | return (case x of | ||
94 | Left _ -> True | ||
95 | Right _ -> False ) | ||
96 | {-# INLINABLE isEndOfChars #-} | ||
97 | |||
98 | {-| @(take n)@ only allows @n@ characters to pass | ||
99 | |||
100 | Unlike 'take', this 'PP.unDraw's unused characters | ||
101 | -} | ||
102 | take :: (Monad m, Integral a) => a -> Pipe Text Text (StateT (Producer Text m r) m) () | ||
103 | take n0 = go n0 where | ||
104 | go n | ||
105 | | n <= 0 = return () | ||
106 | | otherwise = do | ||
107 | txt <- await | ||
108 | let len = fromIntegral (T.length txt) | ||
109 | if (len > n) | ||
110 | then do | ||
111 | let n' = fromIntegral n | ||
112 | lift . PP.unDraw $ T.drop n' txt | ||
113 | yield $ T.take n' txt | ||
114 | else do | ||
115 | yield txt | ||
116 | go (n - len) | ||
117 | {-# INLINABLE take #-} | ||
118 | |||
119 | {-| Take characters until they fail the predicate | ||
120 | |||
121 | Unlike 'takeWhile', this 'PP.unDraw's unused characters | ||
122 | -} | ||
123 | takeWhile | ||
124 | :: (Monad m) | ||
125 | => (Char -> Bool) | ||
126 | -> Pipe Text Text (StateT (Producer Text m r) m) () | ||
127 | takeWhile predicate = go | ||
128 | where | ||
129 | go = do | ||
130 | txt <- await | ||
131 | let (prefix, suffix) = T.span predicate txt | ||
132 | if (T.null suffix) | ||
133 | then do | ||
134 | yield txt | ||
135 | go | ||
136 | else do | ||
137 | lift $ PP.unDraw suffix | ||
138 | yield prefix | ||
139 | {-# INLINABLE takeWhile #-} | ||