aboutsummaryrefslogtreecommitdiffhomepage
path: root/Data
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2013-10-22 18:22:53 -0400
committermichaelt <what_is_it_to_do_anything@yahoo.com>2013-10-22 18:22:53 -0400
commit31f41a5d197ca9f1a613f2dc684a9fa0467a0f2e (patch)
treec948830b3577eec95cfe956df28da2eaf423fe70 /Data
parent13a43263ca34ae5124f630b5b5cefdde831596b3 (diff)
downloadtext-pipes-31f41a5d197ca9f1a613f2dc684a9fa0467a0f2e.tar.gz
text-pipes-31f41a5d197ca9f1a613f2dc684a9fa0467a0f2e.tar.zst
text-pipes-31f41a5d197ca9f1a613f2dc684a9fa0467a0f2e.zip
documentation, etc
Diffstat (limited to 'Data')
-rw-r--r--Data/Text/Pipes.hs428
-rw-r--r--Data/Text/Pipes/Parse.hs139
2 files changed, 316 insertions, 251 deletions
diff --git a/Data/Text/Pipes.hs b/Data/Text/Pipes.hs
index e918585..3063aff 100644
--- a/Data/Text/Pipes.hs
+++ b/Data/Text/Pipes.hs
@@ -1,61 +1,58 @@
1{-# LANGUAGE RankNTypes, TypeFamilies #-} 1{-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
2 2
3{-| This module provides @pipes@ utilities for \"text streams\", which are 3{-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of strict 'Text' chunks. Use text streams to interact 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 with both 'IO.Handle's and lazy 'Text's. 5 can interact lazy 'Text's and 'IO.Handle's.
6 6
7 To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For 7 To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For
8 example, the following program copies data from one file to another: 8 example, the following program copies a document from one file to another:
9 9
10> import Pipes 10> import Pipes
11> import qualified Data.Text.Pipes as P 11> import qualified Data.Text.Pipes as Text
12> import System.IO 12> import System.IO
13> 13>
14> main = 14> main =
15> withFile "inFile.txt" ReadMode $ \hIn -> 15> withFile "inFile.txt" ReadMode $ \hIn ->
16> withFile "outFile.txt" WriteMode $ \hOut -> 16> withFile "outFile.txt" WriteMode $ \hOut ->
17> runEffect $ P.fromHandle hIn >-> P.toHandle hOut 17> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
18
19To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
18 20
19 The following is the more Prelude-like and uses Pipes.Safe:
20
21> import Pipes 21> import Pipes
22> import qualified Data.Text.Pipes as P 22> import qualified Data.Text.Pipes as Text
23> import Pipes.Safe 23> import Pipes.Safe
24> 24>
25> main = runSafeT $ runEffect $ P.readFile "inFile.txt" >-> P.writeFile "outFile.txt" 25> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
26
27 26
28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin' 27 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
29 and 'stdout' proxies, like in the following \"echo\" program: 28 and 'stdout' proxies, as with the following \"echo\" program:
30 29
31> main = runEffect $ P.stdin >-> P.stdout 30> main = runEffect $ Text.stdin >-> Text.stdout
32 31
33 You can also translate pure lazy 'TL.Text's to and from proxies: 32 You can also translate pure lazy 'TL.Text's to and from proxies:
34 33
35> import qualified Data.Text.Lazy as TL 34> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
36>
37> main = runEffect $ P.fromLazy (TL.pack "Hello, world!\n") >-> P.stdout
38 35
39 In addition, this module provides many functions equivalent to lazy 36 In addition, this module provides many functions equivalent to lazy
40 'Text' functions so that you can transform or fold byte streams. For 37 'Text' functions so that you can transform or fold text streams. For
41 example, to stream only the first three lines of 'stdin' to 'stdout' you 38 example, to stream only the first three lines of 'stdin' to 'stdout' you
42 would write: 39 might write:
43 40
44> import Pipes 41> import Pipes
45> import qualified Pipes.Text as PT 42> import qualified Pipes.Text as Text
46> import qualified Pipes.Parse as PP 43> import qualified Pipes.Parse as Parse
47> 44>
48> main = runEffect $ takeLines 3 PB.stdin >-> PT.stdout 45> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
49> where 46> where
50> takeLines n = PB.unlines . PP.takeFree n . PT.lines 47> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
51 48
52 The above program will never bring more than one chunk (~ 32 KB) into 49 The above program will never bring more than one chunk of text (~ 32 KB) into
53 memory, no matter how long the lines are. 50 memory, no matter how long the lines are.
54 51
55 Note that functions in this library are designed to operate on streams that 52 Note that functions in this library are designed to operate on streams that
56 are insensitive to chunk boundaries. This means that they may freely split 53 are insensitive to text boundaries. This means that they may freely split
57 chunks into smaller chunks and /discard empty chunks/. However, they will 54 text into smaller texts and /discard empty texts/. However, they will
58 /never concatenate chunks/ in order to provide strict upper bounds on memory 55 /never concatenate texts/ in order to provide strict upper bounds on memory
59 usage. 56 usage.
60-} 57-}
61 58
@@ -66,12 +63,6 @@ module Data.Text.Pipes (
66 fromHandle, 63 fromHandle,
67 readFile, 64 readFile,
68 stdinLn, 65 stdinLn,
69-- hGetSome,
70-- hGet,
71
72 -- * Servers
73-- hGetSomeN,
74-- hGetN,
75 66
76 -- * Consumers 67 -- * Consumers
77 stdout, 68 stdout,
@@ -87,8 +78,6 @@ module Data.Text.Pipes (
87 takeWhile, 78 takeWhile,
88 dropWhile, 79 dropWhile,
89 filter, 80 filter,
90-- elemIndices,
91-- findIndices,
92 scan, 81 scan,
93 82
94 -- * Folds 83 -- * Folds
@@ -103,13 +92,11 @@ module Data.Text.Pipes (
103 all, 92 all,
104 maximum, 93 maximum,
105 minimum, 94 minimum,
106-- elem,
107-- notElem,
108 find, 95 find,
109 index, 96 index,
110-- elemIndex, 97-- elemIndex,
111-- findIndex, 98-- findIndex,
112-- count, 99 count,
113 100
114 -- * Splitters 101 -- * Splitters
115 splitAt, 102 splitAt,
@@ -122,28 +109,28 @@ module Data.Text.Pipes (
122 group, 109 group,
123 lines, 110 lines,
124 words, 111 words,
125 112#if MIN_VERSION_text(0,11,4)
113 decodeUtf8,
114#endif
126 -- * Transformations 115 -- * Transformations
127 intersperse, 116 intersperse,
128 117
129 -- * Joiners 118 -- * Joiners
130 intercalate, 119 intercalate,
131 unlines, 120 unlines,
132 unwords, 121 unwords,
133 122
134 -- * Low-level Parsers 123 -- * Character Parsers
135 -- $parse 124 -- $parse
136 nextByte, 125 nextChar,
137 drawByte, 126 drawChar,
138 unDrawByte, 127 unDrawChar,
139 peekByte, 128 peekChar,
140 isEndOfBytes, 129 isEndOfChars,
141-- takeWhile',
142 130
143 -- * Re-exports 131 -- * Re-exports
144 -- $reexports 132 -- $reexports
145 module Data.Text, 133 module Data.Text,
146-- module Data.Word,
147 module Pipes.Parse 134 module Pipes.Parse
148 ) where 135 ) where
149 136
@@ -152,11 +139,14 @@ import Control.Monad (liftM, unless)
152import Control.Monad.Trans.State.Strict (StateT) 139import Control.Monad.Trans.State.Strict (StateT)
153import qualified Data.Text as T 140import qualified Data.Text as T
154import qualified Data.Text.IO as T 141import qualified Data.Text.IO as T
142import qualified Data.Text.Encoding as TE
155import Data.Text (Text) 143import Data.Text (Text)
156import qualified Data.Text.Lazy as TL 144import qualified Data.Text.Lazy as TL
157import qualified Data.Text.Lazy.IO as TL 145import qualified Data.Text.Lazy.IO as TL
158import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize) 146import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
159import Data.ByteString.Unsafe (unsafeTake, unsafeDrop) 147import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
148import Data.ByteString (ByteString)
149import qualified Data.ByteString as B
160import Data.Char (ord) 150import Data.Char (ord)
161import Data.Functor.Identity (Identity) 151import Data.Functor.Identity (Identity)
162import qualified Data.List as List 152import qualified Data.List as List
@@ -164,8 +154,8 @@ import Foreign.C.Error (Errno(Errno), ePIPE)
164import qualified GHC.IO.Exception as G 154import qualified GHC.IO.Exception as G
165import Pipes 155import Pipes
166import qualified Pipes.ByteString.Parse as PBP 156import qualified Pipes.ByteString.Parse as PBP
167import Pipes.ByteString.Parse ( 157import Data.Text.Pipes.Parse (
168 nextByte, drawByte, unDrawByte, peekByte, isEndOfBytes ) 158 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
169import Pipes.Core (respond, Server') 159import Pipes.Core (respond, Server')
170import qualified Pipes.Parse as PP 160import qualified Pipes.Parse as PP
171import Pipes.Parse (input, concat, FreeT) 161import Pipes.Parse (input, concat, FreeT)
@@ -214,8 +204,11 @@ stdin :: MonadIO m => Producer' Text m ()
214stdin = fromHandle IO.stdin 204stdin = fromHandle IO.stdin
215{-# INLINABLE stdin #-} 205{-# INLINABLE stdin #-}
216 206
217-- | Convert a 'IO.Handle' into a text stream using a chunk size 207{-| Convert a 'IO.Handle' into a text stream using a text size
218-- determined by the good sense of the text library. 208 determined by the good sense of the text library.
209
210-}
211
219fromHandle :: MonadIO m => IO.Handle -> Producer' Text m () 212fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
220fromHandle h = go where 213fromHandle h = go where
221 go = do txt <- liftIO (T.hGetChunk h) 214 go = do txt <- liftIO (T.hGetChunk h)
@@ -225,21 +218,26 @@ fromHandle h = go where
225 218
226{-| Stream text from a file using Pipes.Safe 219{-| Stream text from a file using Pipes.Safe
227 220
228>>> runSafeT $ runEffect $ readFile "README.md" >-> map toUpper >-> hoist lift stdout 221>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
229TEXT-PIPES 222MAIN = PUTSTRLN "HELLO WORLD"
230==========
231TEXT PIPES, SOMEHOW TO BE FUSED WITH `PIPES-TEXT`.
232...
233>>>
234-} 223-}
235 224
236readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m () 225readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
237readFile file = Safe.withFile file IO.ReadMode fromHandle 226readFile file = Safe.withFile file IO.ReadMode fromHandle
238{-# INLINABLE readFile #-} 227{-# INLINABLE readFile #-}
239 228
229{-| Stream lines of text from stdin (for testing in ghci etc.)
230
231>>> let safely = runSafeT . runEffect
232>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
233hello
2345
235world
2365
237
238-}
240stdinLn :: MonadIO m => Producer' Text m () 239stdinLn :: MonadIO m => Producer' Text m ()
241stdinLn = go 240stdinLn = go where
242 where
243 go = do 241 go = do
244 eof <- liftIO (IO.hIsEOF IO.stdin) 242 eof <- liftIO (IO.hIsEOF IO.stdin)
245 unless eof $ do 243 unless eof $ do
@@ -247,56 +245,13 @@ stdinLn = go
247 yield txt 245 yield txt
248 go 246 go
249 247
250{-| Convert a handle into a byte stream using a fixed chunk size
251 248
252 'hGet' waits until exactly the requested number of bytes are available for 249{-| Stream text to 'stdout'
253 each chunk.
254-}
255-- hGet :: MonadIO m => Int -> IO.Handle -> Producer' Text m ()
256-- hGet size h = go where
257-- go = do
258-- eof <- liftIO (IO.hIsEOF h)
259-- if eof
260-- then return ()
261-- else do
262-- bs <- liftIO (T.hGet h size)
263-- yield bs
264-- go
265-- {-# INLINABLE hGet #-}
266
267{-| Like 'hGetSome', except you can vary the maximum chunk size for each request
268-}
269-- hGetSomeN :: MonadIO m => IO.Handle -> Int -> Server' Int Text m ()
270-- hGetSomeN h = go where
271-- go size = do
272-- eof <- liftIO (IO.hIsEOF h)
273-- if eof
274-- then return ()
275-- else do
276-- bs <- liftIO (T.hGetSome h size)
277-- size2 <- respond bs
278-- go size2
279-- {-# INLINABLE hGetSomeN #-}
280--
281-- -- | Like 'hGet', except you can vary the chunk size for each request
282-- hGetN :: MonadIO m => IO.Handle -> Int -> Server' Int Text m ()
283-- hGetN h = go where
284-- go size = do
285-- eof <- liftIO (IO.hIsEOF h)
286-- if eof
287-- then return ()
288-- else do
289-- bs <- liftIO (T.hGet h size)
290-- size2 <- respond bs
291-- go size2
292-- {-# INLINABLE hGetN #-}
293
294{-| Stream bytes to 'stdout'
295 250
296 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe. 251 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
297 252
298 Note: For best performance, use @(for source (liftIO . putStr))@ instead of 253 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
299 @(source >-> stdout)@. 254 @(source >-> stdout)@ in suitable cases.
300-} 255-}
301stdout :: MonadIO m => Consumer' Text m () 256stdout :: MonadIO m => Consumer' Text m ()
302stdout = go 257stdout = go
@@ -328,15 +283,16 @@ stdoutLn = go
328 Right () -> go 283 Right () -> go
329{-# INLINABLE stdoutLn #-} 284{-# INLINABLE stdoutLn #-}
330 285
331{-| Convert a byte stream into a 'Handle' 286{-| Convert a text stream into a 'Handle'
332 287
333 Note: For best performance, use @(for source (liftIO . hPutStr handle))@ 288 Note: again, for best performance, where possible use
334 instead of @(source >-> toHandle handle)@. 289 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
335-} 290-}
336toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r 291toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
337toHandle h = for cat (liftIO . T.hPutStr h) 292toHandle h = for cat (liftIO . T.hPutStr h)
338{-# INLINABLE toHandle #-} 293{-# INLINABLE toHandle #-}
339 294
295-- | Stream text into a file. Uses @pipes-safe@.
340writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m () 296writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
341writeFile file = Safe.withFile file IO.WriteMode toHandle 297writeFile file = Safe.withFile file IO.WriteMode toHandle
342 298
@@ -345,65 +301,66 @@ map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
345map f = P.map (T.map f) 301map f = P.map (T.map f)
346{-# INLINABLE map #-} 302{-# INLINABLE map #-}
347 303
348-- | Map a function over the byte stream and concatenate the results 304-- | Map a function over the characters of a text stream and concatenate the results
349concatMap 305concatMap
350 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r 306 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
351concatMap f = P.map (T.concatMap f) 307concatMap f = P.map (T.concatMap f)
352{-# INLINABLE concatMap #-} 308{-# INLINABLE concatMap #-}
353 309
354-- | @(take n)@ only allows @n@ bytes to pass 310-- | @(take n)@ only allows @n@ individual characters to pass;
311-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
355take :: (Monad m, Integral a) => a -> Pipe Text Text m () 312take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
356take n0 = go n0 where 313take n0 = go n0 where
357 go n 314 go n
358 | n <= 0 = return () 315 | n <= 0 = return ()
359 | otherwise = do 316 | otherwise = do
360 bs <- await 317 txt <- await
361 let len = fromIntegral (T.length bs) 318 let len = fromIntegral (T.length txt)
362 if (len > n) 319 if (len > n)
363 then yield (T.take (fromIntegral n) bs) 320 then yield (T.take (fromIntegral n) txt)
364 else do 321 else do
365 yield bs 322 yield txt
366 go (n - len) 323 go (n - len)
367{-# INLINABLE take #-} 324{-# INLINABLE take #-}
368 325
369-- | @(dropD n)@ drops the first @n@ bytes 326-- | @(drop n)@ drops the first @n@ characters
370drop :: (Monad m, Integral a) => a -> Pipe Text Text m r 327drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
371drop n0 = go n0 where 328drop n0 = go n0 where
372 go n 329 go n
373 | n <= 0 = cat 330 | n <= 0 = cat
374 | otherwise = do 331 | otherwise = do
375 bs <- await 332 txt <- await
376 let len = fromIntegral (T.length bs) 333 let len = fromIntegral (T.length txt)
377 if (len >= n) 334 if (len >= n)
378 then do 335 then do
379 yield (T.drop (fromIntegral n) bs) 336 yield (T.drop (fromIntegral n) txt)
380 cat 337 cat
381 else go (n - len) 338 else go (n - len)
382{-# INLINABLE drop #-} 339{-# INLINABLE drop #-}
383 340
384-- | Take bytes until they fail the predicate 341-- | Take characters until they fail the predicate
385takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m () 342takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
386takeWhile predicate = go 343takeWhile predicate = go
387 where 344 where
388 go = do 345 go = do
389 bs <- await 346 txt <- await
390 let (prefix, suffix) = T.span predicate bs 347 let (prefix, suffix) = T.span predicate txt
391 if (T.null suffix) 348 if (T.null suffix)
392 then do 349 then do
393 yield bs 350 yield txt
394 go 351 go
395 else yield prefix 352 else yield prefix
396{-# INLINABLE takeWhile #-} 353{-# INLINABLE takeWhile #-}
397 354
398-- | Drop bytes until they fail the predicate 355-- | Drop characters until they fail the predicate
399dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r 356dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
400dropWhile predicate = go where 357dropWhile predicate = go where
401 go = do 358 go = do
402 bs <- await 359 txt <- await
403 case T.findIndex (not . predicate) bs of 360 case T.findIndex (not . predicate) txt of
404 Nothing -> go 361 Nothing -> go
405 Just i -> do 362 Just i -> do
406 yield (T.drop i bs) 363 yield (T.drop i txt)
407 cat 364 cat
408{-# INLINABLE dropWhile #-} 365{-# INLINABLE dropWhile #-}
409 366
@@ -412,33 +369,19 @@ filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
412filter predicate = P.map (T.filter predicate) 369filter predicate = P.map (T.filter predicate)
413{-# INLINABLE filter #-} 370{-# INLINABLE filter #-}
414 371
415-- | Stream all indices whose elements match the given 'Char' 372
416-- elemIndices :: (Monad m, Num n) => Char -> Pipe Text n m r 373-- | Strict left scan over the characters
417-- elemIndices w8 = findIndices (w8 ==)
418-- {-# INLINABLE elemIndices #-}
419
420-- | Stream all indices whose elements satisfy the given predicate
421-- findIndices :: (Monad m, Num n) => (Char -> Bool) -> Pipe Text n m r
422-- findIndices predicate = go 0
423-- where
424-- go n = do
425-- bs <- await
426-- each $ List.map (\i -> n + fromIntegral i) (T.findIndices predicate bs)
427-- go $! n + fromIntegral (T.length bs)
428-- {-# INLINABLE findIndices #-}
429
430-- | Strict left scan over the bytes
431scan 374scan
432 :: (Monad m) 375 :: (Monad m)
433 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r 376 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
434scan step begin = go begin 377scan step begin = go begin
435 where 378 where
436 go w8 = do 379 go c = do
437 bs <- await 380 txt <- await
438 let bs' = T.scanl step w8 bs 381 let txt' = T.scanl step c txt
439 w8' = T.last bs' 382 c' = T.last txt'
440 yield bs' 383 yield txt'
441 go w8' 384 go c'
442{-# INLINABLE scan #-} 385{-# INLINABLE scan #-}
443 386
444{-| Fold a pure 'Producer' of strict 'Text's into a lazy 387{-| Fold a pure 'Producer' of strict 'Text's into a lazy
@@ -459,11 +402,11 @@ toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
459toLazyM = liftM TL.fromChunks . P.toListM 402toLazyM = liftM TL.fromChunks . P.toListM
460{-# INLINABLE toLazyM #-} 403{-# INLINABLE toLazyM #-}
461 404
462-- | Reduce the stream of bytes using a strict left fold 405-- | Reduce the text stream using a strict left fold over characters
463fold 406fold
464 :: Monad m 407 :: Monad m
465 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r 408 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
466fold step begin done = P.fold (\x bs -> T.foldl' step x bs) begin done 409fold step begin done = P.fold (T.foldl' step) begin done
467{-# INLINABLE fold #-} 410{-# INLINABLE fold #-}
468 411
469-- | Retrieve the first 'Char' 412-- | Retrieve the first 'Char'
@@ -474,7 +417,7 @@ head = go
474 x <- nextChar p 417 x <- nextChar p
475 case x of 418 case x of
476 Left _ -> return Nothing 419 Left _ -> return Nothing
477 Right (w8, _) -> return (Just w8) 420 Right (c, _) -> return (Just c)
478{-# INLINABLE head #-} 421{-# INLINABLE head #-}
479 422
480-- | Retrieve the last 'Char' 423-- | Retrieve the last 'Char'
@@ -485,12 +428,10 @@ last = go Nothing
485 x <- next p 428 x <- next p
486 case x of 429 case x of
487 Left () -> return r 430 Left () -> return r
488 Right (bs, p') -> 431 Right (txt, p') ->
489 if (T.null bs) 432 if (T.null txt)
490 then go r p' 433 then go r p'
491 else go (Just $ T.last bs) p' 434 else go (Just $ T.last txt) p'
492 -- TODO: Change this to 'unsafeLast' when bytestring-0.10.2.0
493 -- becomes more widespread
494{-# INLINABLE last #-} 435{-# INLINABLE last #-}
495 436
496-- | Determine if the stream is empty 437-- | Determine if the stream is empty
@@ -500,7 +441,7 @@ null = P.all T.null
500 441
501-- | Count the number of bytes 442-- | Count the number of bytes
502length :: (Monad m, Num n) => Producer Text m () -> m n 443length :: (Monad m, Num n) => Producer Text m () -> m n
503length = P.fold (\n bs -> n + fromIntegral (T.length bs)) 0 id 444length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
504{-# INLINABLE length #-} 445{-# INLINABLE length #-}
505 446
506-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate 447-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
@@ -517,38 +458,26 @@ all predicate = P.all (T.all predicate)
517maximum :: (Monad m) => Producer Text m () -> m (Maybe Char) 458maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
518maximum = P.fold step Nothing id 459maximum = P.fold step Nothing id
519 where 460 where
520 step mw8 bs = 461 step mc txt =
521 if (T.null bs) 462 if (T.null txt)
522 then mw8 463 then mc
523 else Just $ case mw8 of 464 else Just $ case mc of
524 Nothing -> T.maximum bs 465 Nothing -> T.maximum txt
525 Just w8 -> max w8 (T.maximum bs) 466 Just c -> max c (T.maximum txt)
526{-# INLINABLE maximum #-} 467{-# INLINABLE maximum #-}
527 468
528-- | Return the minimum 'Char' within a byte stream 469-- | Return the minimum 'Char' within a byte stream
529minimum :: (Monad m) => Producer Text m () -> m (Maybe Char) 470minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
530minimum = P.fold step Nothing id 471minimum = P.fold step Nothing id
531 where 472 where
532 step mw8 bs = 473 step mc txt =
533 if (T.null bs) 474 if (T.null txt)
534 then mw8 475 then mc
535 else case mw8 of 476 else case mc of
536 Nothing -> Just (T.minimum bs) 477 Nothing -> Just (T.minimum txt)
537 Just w8 -> Just (min w8 (T.minimum bs)) 478 Just c -> Just (min c (T.minimum txt))
538{-# INLINABLE minimum #-} 479{-# INLINABLE minimum #-}
539 480
540-- | Determine whether any element in the byte stream matches the given 'Char'
541-- elem :: (Monad m) => Char -> Producer Text m () -> m Bool
542-- elem w8 = P.any (T.elem w8)
543-- {-# INLINABLE elem #-}
544--
545-- {-| Determine whether all elements in the byte stream do not match the given
546-- 'Char'
547-- -}
548-- notElem :: (Monad m) => Char -> Producer Text m () -> m Bool
549-- notElem w8 = P.all (T.notElem w8)
550-- {-# INLINABLE notElem #-}
551
552-- | Find the first element in the stream that matches the predicate 481-- | Find the first element in the stream that matches the predicate
553find 482find
554 :: (Monad m) 483 :: (Monad m)
@@ -576,12 +505,35 @@ index n p = head (p >-> drop n)
576-- findIndex predicate p = P.head (p >-> findIndices predicate) 505-- findIndex predicate p = P.head (p >-> findIndices predicate)
577-- {-# INLINABLE findIndex #-} 506-- {-# INLINABLE findIndex #-}
578-- 507--
579-- -- | Store a tally of how many elements match the given 'Char' 508-- | Store a tally of how many segments match the given 'Text'
580-- count :: (Monad m, Num n) => Char -> Producer Text m () -> m n 509count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
581-- count w8 p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count w8)) 510count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
582-- {-# INLINABLE count #-} 511{-# INLINABLE count #-}
583 512
584-- | Splits a 'Producer' after the given number of bytes 513#if MIN_VERSION_text(0,11,4)
514-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
515-- into a Pipe of Text
516decodeUtf8
517 :: Monad m
518 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
519decodeUtf8 = go TE.streamDecodeUtf8
520 where go dec p = do
521 x <- lift (next p)
522 case x of
523 Left r -> return (return r)
524 Right (chunk, p') -> do
525 let TE.Some text l dec' = dec chunk
526 if B.null l
527 then do
528 yield text
529 go dec' p'
530 else return $ do
531 yield l
532 p'
533{-# INLINEABLE decodeUtf8 #-}
534#endif
535
536-- | Splits a 'Producer' after the given number of characters
585splitAt 537splitAt
586 :: (Monad m, Integral n) 538 :: (Monad m, Integral n)
587 => n 539 => n
@@ -594,19 +546,19 @@ splitAt = go
594 x <- lift (next p) 546 x <- lift (next p)
595 case x of 547 case x of
596 Left r -> return (return r) 548 Left r -> return (return r)
597 Right (bs, p') -> do 549 Right (txt, p') -> do
598 let len = fromIntegral (T.length bs) 550 let len = fromIntegral (T.length txt)
599 if (len <= n) 551 if (len <= n)
600 then do 552 then do
601 yield bs 553 yield txt
602 go (n - len) p' 554 go (n - len) p'
603 else do 555 else do
604 let (prefix, suffix) = T.splitAt (fromIntegral n) bs 556 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
605 yield prefix 557 yield prefix
606 return (yield suffix >> p') 558 return (yield suffix >> p')
607{-# INLINABLE splitAt #-} 559{-# INLINABLE splitAt #-}
608 560
609-- | Split a byte stream into 'FreeT'-delimited byte streams of fixed size 561-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
610chunksOf 562chunksOf
611 :: (Monad m, Integral n) 563 :: (Monad m, Integral n)
612 => n -> Producer Text m r -> FreeT (Producer Text m) m r 564 => n -> Producer Text m r -> FreeT (Producer Text m) m r
@@ -616,13 +568,13 @@ chunksOf n p0 = PP.FreeT (go p0)
616 x <- next p 568 x <- next p
617 return $ case x of 569 return $ case x of
618 Left r -> PP.Pure r 570 Left r -> PP.Pure r
619 Right (bs, p') -> PP.Free $ do 571 Right (txt, p') -> PP.Free $ do
620 p'' <- splitAt n (yield bs >> p') 572 p'' <- splitAt n (yield txt >> p')
621 return $ PP.FreeT (go p'') 573 return $ PP.FreeT (go p'')
622{-# INLINABLE chunksOf #-} 574{-# INLINABLE chunksOf #-}
623 575
624{-| Split a byte stream in two, where the first byte stream is the longest 576{-| Split a text stream in two, where the first text stream is the longest
625 consecutive group of bytes that satisfy the predicate 577 consecutive group of text that satisfy the predicate
626-} 578-}
627span 579span
628 :: (Monad m) 580 :: (Monad m)
@@ -635,11 +587,11 @@ span predicate = go
635 x <- lift (next p) 587 x <- lift (next p)
636 case x of 588 case x of
637 Left r -> return (return r) 589 Left r -> return (return r)
638 Right (bs, p') -> do 590 Right (txt, p') -> do
639 let (prefix, suffix) = T.span predicate bs 591 let (prefix, suffix) = T.span predicate txt
640 if (T.null suffix) 592 if (T.null suffix)
641 then do 593 then do
642 yield bs 594 yield txt
643 go p' 595 go p'
644 else do 596 else do
645 yield prefix 597 yield prefix
@@ -671,11 +623,11 @@ splitWith predicate p0 = PP.FreeT (go0 p0)
671 x <- next p 623 x <- next p
672 case x of 624 case x of
673 Left r -> return (PP.Pure r) 625 Left r -> return (PP.Pure r)
674 Right (bs, p') -> 626 Right (txt, p') ->
675 if (T.null bs) 627 if (T.null txt)
676 then go0 p' 628 then go0 p'
677 else return $ PP.Free $ do 629 else return $ PP.Free $ do
678 p'' <- span (not . predicate) (yield bs >> p') 630 p'' <- span (not . predicate) (yield txt >> p')
679 return $ PP.FreeT (go1 p'') 631 return $ PP.FreeT (go1 p'')
680 go1 p = do 632 go1 p = do
681 x <- nextChar p 633 x <- nextChar p
@@ -686,15 +638,15 @@ splitWith predicate p0 = PP.FreeT (go0 p0)
686 return $ PP.FreeT (go1 p'') 638 return $ PP.FreeT (go1 p'')
687{-# INLINABLE splitWith #-} 639{-# INLINABLE splitWith #-}
688 640
689-- | Split a byte stream using the given 'Char' as the delimiter 641-- | Split a text stream using the given 'Char' as the delimiter
690split :: (Monad m) 642split :: (Monad m)
691 => Char 643 => Char
692 -> Producer Text m r 644 -> Producer Text m r
693 -> FreeT (Producer Text m) m r 645 -> FreeT (Producer Text m) m r
694split w8 = splitWith (w8 ==) 646split c = splitWith (c ==)
695{-# INLINABLE split #-} 647{-# INLINABLE split #-}
696 648
697{-| Group a byte stream into 'FreeT'-delimited byte streams using the supplied 649{-| Group a text stream into 'FreeT'-delimited byte streams using the supplied
698 equality predicate 650 equality predicate
699-} 651-}
700groupBy 652groupBy
@@ -708,11 +660,11 @@ groupBy equal p0 = PP.FreeT (go p0)
708 x <- next p 660 x <- next p
709 case x of 661 case x of
710 Left r -> return (PP.Pure r) 662 Left r -> return (PP.Pure r)
711 Right (bs, p') -> case (T.uncons bs) of 663 Right (txt, p') -> case (T.uncons txt) of
712 Nothing -> go p' 664 Nothing -> go p'
713 Just (w8, _) -> do 665 Just (c, _) -> do
714 return $ PP.Free $ do 666 return $ PP.Free $ do
715 p'' <- span (equal w8) (yield bs >> p') 667 p'' <- span (equal c) (yield txt >> p')
716 return $ PP.FreeT (go p'') 668 return $ PP.FreeT (go p'')
717{-# INLINABLE groupBy #-} 669{-# INLINABLE groupBy #-}
718 670
@@ -736,10 +688,10 @@ lines p0 = PP.FreeT (go0 p0)
736 x <- next p 688 x <- next p
737 case x of 689 case x of
738 Left r -> return (PP.Pure r) 690 Left r -> return (PP.Pure r)
739 Right (bs, p') -> 691 Right (txt, p') ->
740 if (T.null bs) 692 if (T.null txt)
741 then go0 p' 693 then go0 p'
742 else return $ PP.Free $ go1 (yield bs >> p') 694 else return $ PP.Free $ go1 (yield txt >> p')
743 go1 p = do 695 go1 p = do
744 p' <- break ('\n' ==) p 696 p' <- break ('\n' ==) p
745 return $ PP.FreeT (go2 p') 697 return $ PP.FreeT (go2 p')
@@ -749,13 +701,10 @@ lines p0 = PP.FreeT (go0 p0)
749 Left r -> PP.Pure r 701 Left r -> PP.Pure r
750 Right (_, p') -> PP.Free (go1 p') 702 Right (_, p') -> PP.Free (go1 p')
751{-# INLINABLE lines #-} 703{-# INLINABLE lines #-}
752nextChar = undefined
753{-| Split a byte stream into 'FreeT'-delimited words
754 704
755 Note: This function is purely for demonstration purposes since it assumes a 705
756 particular encoding. You should prefer the 'Data.Text.Text' equivalent of 706
757 this function from the upcoming @pipes-text@ library. 707-- | Split a text stream into 'FreeT'-delimited words
758-}
759words 708words
760 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r 709 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
761words p0 = removeEmpty (splitWith isSpace p0) 710words p0 = removeEmpty (splitWith isSpace p0)
@@ -777,27 +726,27 @@ words p0 = removeEmpty (splitWith isSpace p0)
777-- | Intersperse a 'Char' in between the bytes of the byte stream 726-- | Intersperse a 'Char' in between the bytes of the byte stream
778intersperse 727intersperse
779 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r 728 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
780intersperse w8 = go0 729intersperse c = go0
781 where 730 where
782 go0 p = do 731 go0 p = do
783 x <- lift (next p) 732 x <- lift (next p)
784 case x of 733 case x of
785 Left r -> return r 734 Left r -> return r
786 Right (bs, p') -> do 735 Right (txt, p') -> do
787 yield (T.intersperse w8 bs) 736 yield (T.intersperse c txt)
788 go1 p' 737 go1 p'
789 go1 p = do 738 go1 p = do
790 x <- lift (next p) 739 x <- lift (next p)
791 case x of 740 case x of
792 Left r -> return r 741 Left r -> return r
793 Right (bs, p') -> do 742 Right (txt, p') -> do
794 yield (T.singleton w8) 743 yield (T.singleton c)
795 yield (T.intersperse w8 bs) 744 yield (T.intersperse c txt)
796 go1 p' 745 go1 p'
797{-# INLINABLE intersperse #-} 746{-# INLINABLE intersperse #-}
798 747
799{-| 'intercalate' concatenates the 'FreeT'-delimited byte streams after 748{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
800 interspersing a byte stream in between them 749 interspersing a text stream in between them
801-} 750-}
802intercalate 751intercalate
803 :: (Monad m) 752 :: (Monad m)
@@ -824,10 +773,6 @@ intercalate p0 = go0
824{-# INLINABLE intercalate #-} 773{-# INLINABLE intercalate #-}
825 774
826{-| Join 'FreeT'-delimited lines into a byte stream 775{-| Join 'FreeT'-delimited lines into a byte stream
827
828 Note: This function is purely for demonstration purposes since it assumes a
829 particular encoding. You should prefer the 'Data.Text.Text' equivalent of
830 this function from the upcoming @pipes-text@ library.
831-} 776-}
832unlines 777unlines
833 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r 778 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
@@ -843,11 +788,7 @@ unlines = go
843 go f' 788 go f'
844{-# INLINABLE unlines #-} 789{-# INLINABLE unlines #-}
845 790
846{-| Join 'FreeT'-delimited words into a byte stream 791{-| Join 'FreeT'-delimited words into a text stream
847
848 Note: This function is purely for demonstration purposes since it assumes a
849 particular encoding. You should prefer the 'Data.Text.Text' equivalent of
850 this function from the upcoming @pipes-text@ library.
851-} 792-}
852unwords 793unwords
853 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r 794 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
@@ -855,29 +796,14 @@ unwords = intercalate (yield $ T.pack " ")
855{-# INLINABLE unwords #-} 796{-# INLINABLE unwords #-}
856 797
857{- $parse 798{- $parse
858 The following parsing utilities are single-byte analogs of the ones found 799 The following parsing utilities are single-character analogs of the ones found
859 in @pipes-parse@. 800 @pipes-parse@.
860-} 801-}
861 802
862{-| Take bytes until they fail the predicate
863
864 Unlike 'takeWhile', this 'PP.unDraw's unused bytes
865-}
866-- takeWhile'
867-- :: (Monad m)
868-- => (Char -> Bool)
869-- -> Pipe Text Text (StateT (Producer Text m r) m) ()
870-- takeWhile' = PBP.takeWhile
871-- {-# INLINABLE takeWhile' #-}
872-- {-# DEPRECATED takeWhile' "Use Pipes.Text.Parse.takeWhile instead" #-}
873
874{- $reexports 803{- $reexports
875 "Pipes.Text.Parse" re-exports 'nextByte', 'drawByte', 'unDrawByte', 804 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
876 'peekByte', and 'isEndOfBytes'.
877 805
878 @Data.Text@ re-exports the 'Text' type. 806 @Data.Text@ re-exports the 'Text' type.
879 807
880 @Data.Word@ re-exports the 'Char' type.
881
882 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type). 808 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
883-} \ No newline at end of file 809-} \ No newline at end of file
diff --git a/Data/Text/Pipes/Parse.hs b/Data/Text/Pipes/Parse.hs
new file mode 100644
index 0000000..675c7aa
--- /dev/null
+++ b/Data/Text/Pipes/Parse.hs
@@ -0,0 +1,139 @@
1-- | Parsing utilities for characterstrings, in the style of @pipes-parse@
2
3module Data.Text.Pipes.Parse (
4 -- * Parsers
5 nextChar,
6 drawChar,
7 unDrawChar,
8 peekChar,
9 isEndOfChars,
10 take,
11 takeWhile
12 ) where
13
14import Control.Monad.Trans.State.Strict (StateT, modify)
15import qualified Data.Text as T
16import Data.Text (Text)
17
18import Pipes
19import qualified Pipes.Parse as PP
20
21import Prelude hiding (take, takeWhile)
22
23{-| Consume the first character from a 'Text' stream
24
25 'next' either fails with a 'Left' if the 'Producer' has no more characters or
26 succeeds with a 'Right' providing the next byte and the remainder of the
27 'Producer'.
28-}
29nextChar
30 :: (Monad m)
31 => Producer Text m r
32 -> m (Either r (Char, Producer Text m r))
33nextChar = go
34 where
35 go p = do
36 x <- next p
37 case x of
38 Left r -> return (Left r)
39 Right (txt, p') -> case (T.uncons txt) of
40 Nothing -> go p'
41 Just (c, txt') -> return (Right (c, yield txt' >> p'))
42{-# INLINABLE nextChar #-}
43
44{-| Draw one 'Char' from the underlying 'Producer', returning 'Left' if the
45 'Producer' is empty
46-}
47drawChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char)
48drawChar = do
49 x <- PP.draw
50 case x of
51 Left r -> return (Left r)
52 Right txt -> case (T.uncons txt) of
53 Nothing -> drawChar
54 Just (c, txt') -> do
55 PP.unDraw txt'
56 return (Right c)
57{-# INLINABLE drawChar #-}
58
59-- | Push back a 'Char' onto the underlying 'Producer'
60unDrawChar :: (Monad m) => Char -> StateT (Producer Text m r) m ()
61unDrawChar c = modify (yield (T.singleton c) >>)
62{-# INLINABLE unDrawChar #-}
63
64{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
65 push the 'Char' back
66
67> peekChar = do
68> x <- drawChar
69> case x of
70> Left _ -> return ()
71> Right c -> unDrawChar c
72> return x
73-}
74peekChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char)
75peekChar = do
76 x <- drawChar
77 case x of
78 Left _ -> return ()
79 Right c -> unDrawChar c
80 return x
81{-# INLINABLE peekChar #-}
82
83{-| Check if the underlying 'Producer' has no more characters
84
85 Note that this will skip over empty 'Text' chunks, unlike
86 'PP.isEndOfInput' from @pipes-parse@.
87
88> isEndOfChars = liftM isLeft peekChar
89-}
90isEndOfChars :: (Monad m) => StateT (Producer Text m r) m Bool
91isEndOfChars = do
92 x <- peekChar
93 return (case x of
94 Left _ -> True
95 Right _ -> False )
96{-# INLINABLE isEndOfChars #-}
97
98{-| @(take n)@ only allows @n@ characters to pass
99
100 Unlike 'take', this 'PP.unDraw's unused characters
101-}
102take :: (Monad m, Integral a) => a -> Pipe Text Text (StateT (Producer Text m r) m) ()
103take n0 = go n0 where
104 go n
105 | n <= 0 = return ()
106 | otherwise = do
107 txt <- await
108 let len = fromIntegral (T.length txt)
109 if (len > n)
110 then do
111 let n' = fromIntegral n
112 lift . PP.unDraw $ T.drop n' txt
113 yield $ T.take n' txt
114 else do
115 yield txt
116 go (n - len)
117{-# INLINABLE take #-}
118
119{-| Take characters until they fail the predicate
120
121 Unlike 'takeWhile', this 'PP.unDraw's unused characters
122-}
123takeWhile
124 :: (Monad m)
125 => (Char -> Bool)
126 -> Pipe Text Text (StateT (Producer Text m r) m) ()
127takeWhile predicate = go
128 where
129 go = do
130 txt <- await
131 let (prefix, suffix) = T.span predicate txt
132 if (T.null suffix)
133 then do
134 yield txt
135 go
136 else do
137 lift $ PP.unDraw suffix
138 yield prefix
139{-# INLINABLE takeWhile #-}