]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
missing case in decodeUtf8
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
c9d1c945 1{-# LANGUAGE RankNTypes, TypeFamilies, NoMonomorphismRestriction #-}
91727d11 2
13a43263 3{-| This module provides @pipes@ utilities for \"text streams\", which are
31f41a5d 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
63ea9ffd 5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 7
63ea9ffd 8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 9 example, the following program copies a document from one file to another:
91727d11 10
11> import Pipes
31f41a5d 12> import qualified Data.Text.Pipes as Text
91727d11 13> import System.IO
14>
15> main =
16> withFile "inFile.txt" ReadMode $ \hIn ->
17> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 18> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
19
20To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 21
13a43263 22> import Pipes
31f41a5d 23> import qualified Data.Text.Pipes as Text
13a43263 24> import Pipes.Safe
25>
31f41a5d 26> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 27
91727d11 28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 29 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 30
31f41a5d 31> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 32
33 You can also translate pure lazy 'TL.Text's to and from proxies:
34
31f41a5d 35> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 36
37 In addition, this module provides many functions equivalent to lazy
31f41a5d 38 'Text' functions so that you can transform or fold text streams. For
91727d11 39 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 40 might write:
91727d11 41
42> import Pipes
31f41a5d 43> import qualified Pipes.Text as Text
44> import qualified Pipes.Parse as Parse
91727d11 45>
31f41a5d 46> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 47> where
31f41a5d 48> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 49
31f41a5d 50 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 51 memory, no matter how long the lines are.
52
53 Note that functions in this library are designed to operate on streams that
31f41a5d 54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
91727d11 57 usage.
58-}
59
7faef8bc 60module Pipes.Text (
91727d11 61 -- * Producers
62 fromLazy,
63 stdin,
64 fromHandle,
65 readFile,
66 stdinLn,
91727d11 67
68 -- * Consumers
69 stdout,
70 stdoutLn,
71 toHandle,
72 writeFile,
73
74 -- * Pipes
75 map,
76 concatMap,
77 take,
78 drop,
79 takeWhile,
80 dropWhile,
81 filter,
91727d11 82 scan,
7faef8bc 83 encodeUtf8,
84 pack,
85 unpack,
1d2434b5 86 toCaseFold,
87 toLower,
88 toUpper,
7faef8bc 89 stripStart,
91727d11 90
91 -- * Folds
92 toLazy,
93 toLazyM,
94 fold,
95 head,
96 last,
97 null,
98 length,
99 any,
100 all,
101 maximum,
102 minimum,
91727d11 103 find,
104 index,
31f41a5d 105 count,
91727d11 106
107 -- * Splitters
108 splitAt,
109 chunksOf,
110 span,
111 break,
112 splitWith,
113 split,
114 groupBy,
115 group,
116 lines,
117 words,
31f41a5d 118 decodeUtf8,
91727d11 119 -- * Transformations
120 intersperse,
31f41a5d 121
91727d11 122 -- * Joiners
123 intercalate,
124 unlines,
125 unwords,
126
31f41a5d 127 -- * Character Parsers
91727d11 128 -- $parse
31f41a5d 129 nextChar,
130 drawChar,
131 unDrawChar,
132 peekChar,
133 isEndOfChars,
91727d11 134
135 -- * Re-exports
136 -- $reexports
137 module Data.Text,
91727d11 138 module Pipes.Parse
139 ) where
140
141import Control.Exception (throwIO, try)
142import Control.Monad (liftM, unless)
acc6868f 143import Control.Monad.Trans.State.Strict (StateT(..))
1b4f5326 144import Data.Monoid ((<>))
91727d11 145import qualified Data.Text as T
146import qualified Data.Text.IO as T
31f41a5d 147import qualified Data.Text.Encoding as TE
63ea9ffd 148import qualified Data.Text.Encoding.Error as TE
91727d11 149import Data.Text (Text)
150import qualified Data.Text.Lazy as TL
151import qualified Data.Text.Lazy.IO as TL
152import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
153import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 154import Data.ByteString (ByteString)
155import qualified Data.ByteString as B
cf10d6f1 156import Data.Char (ord, isSpace)
91727d11 157import Data.Functor.Identity (Identity)
158import qualified Data.List as List
159import Foreign.C.Error (Errno(Errno), ePIPE)
160import qualified GHC.IO.Exception as G
161import Pipes
5e3f5409 162import qualified Pipes.ByteString as PB
91727d11 163import qualified Pipes.ByteString.Parse as PBP
8c482809 164import qualified Pipes.Text.Internal as PE
c0343bc9 165import Pipes.Text.Parse (
31f41a5d 166 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
91727d11 167import Pipes.Core (respond, Server')
168import qualified Pipes.Parse as PP
169import Pipes.Parse (input, concat, FreeT)
170import qualified Pipes.Safe.Prelude as Safe
171import qualified Pipes.Safe as Safe
172import Pipes.Safe (MonadSafe(..), Base(..))
173import qualified Pipes.Prelude as P
174import qualified System.IO as IO
175import Data.Char (isSpace)
63ea9ffd 176import Data.Word (Word8)
91727d11 177import Prelude hiding (
178 all,
179 any,
180 break,
181 concat,
182 concatMap,
183 drop,
184 dropWhile,
185 elem,
186 filter,
187 head,
188 last,
189 lines,
190 length,
191 map,
192 maximum,
193 minimum,
194 notElem,
195 null,
196 readFile,
197 span,
198 splitAt,
199 take,
200 takeWhile,
201 unlines,
202 unwords,
203 words,
204 writeFile )
205
206-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
207fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
208fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
209{-# INLINABLE fromLazy #-}
210
62e8521c 211-- | Stream text from 'stdin'
c9d1c945 212stdin :: MonadIO m => Producer Text m (Producer ByteString m ())
91727d11 213stdin = fromHandle IO.stdin
214{-# INLINABLE stdin #-}
215
31f41a5d 216{-| Convert a 'IO.Handle' into a text stream using a text size
217 determined by the good sense of the text library.
31f41a5d 218-}
219
c9d1c945 220fromHandle :: MonadIO m => IO.Handle -> Producer Text m (Producer ByteString m ())
221fromHandle h = decodeUtf8 (PB.fromHandle h)
8853a440 222{-# INLINE fromHandle#-}
c9d1c945 223
6f6f9974 224{-| Stream text from a file using Pipes.Safe
225
31f41a5d 226>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
227MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 228-}
229
c9d1c945 230readFile :: (MonadSafe m) => FilePath -> Producer Text m (Producer ByteString m ())
91727d11 231readFile file = Safe.withFile file IO.ReadMode fromHandle
232{-# INLINABLE readFile #-}
233
31f41a5d 234{-| Stream lines of text from stdin (for testing in ghci etc.)
235
236>>> let safely = runSafeT . runEffect
237>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
238hello
2395
240world
2415
242
243-}
91727d11 244stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 245stdinLn = go where
91727d11 246 go = do
247 eof <- liftIO (IO.hIsEOF IO.stdin)
248 unless eof $ do
249 txt <- liftIO (T.hGetLine IO.stdin)
250 yield txt
251 go
252
91727d11 253
31f41a5d 254{-| Stream text to 'stdout'
91727d11 255
256 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
257
258 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
31f41a5d 259 @(source >-> stdout)@ in suitable cases.
91727d11 260-}
261stdout :: MonadIO m => Consumer' Text m ()
262stdout = go
263 where
264 go = do
265 txt <- await
266 x <- liftIO $ try (T.putStr txt)
267 case x of
268 Left (G.IOError { G.ioe_type = G.ResourceVanished
269 , G.ioe_errno = Just ioe })
270 | Errno ioe == ePIPE
271 -> return ()
272 Left e -> liftIO (throwIO e)
273 Right () -> go
274{-# INLINABLE stdout #-}
275
276stdoutLn :: (MonadIO m) => Consumer' Text m ()
277stdoutLn = go
278 where
279 go = do
280 str <- await
281 x <- liftIO $ try (T.putStrLn str)
282 case x of
283 Left (G.IOError { G.ioe_type = G.ResourceVanished
284 , G.ioe_errno = Just ioe })
285 | Errno ioe == ePIPE
286 -> return ()
287 Left e -> liftIO (throwIO e)
288 Right () -> go
289{-# INLINABLE stdoutLn #-}
290
31f41a5d 291{-| Convert a text stream into a 'Handle'
91727d11 292
31f41a5d 293 Note: again, for best performance, where possible use
294 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 295-}
296toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
297toHandle h = for cat (liftIO . T.hPutStr h)
298{-# INLINABLE toHandle #-}
299
d4732515 300{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 301 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 302 #-}
303
304
31f41a5d 305-- | Stream text into a file. Uses @pipes-safe@.
c9d1c945 306writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
91727d11 307writeFile file = Safe.withFile file IO.WriteMode toHandle
308
309-- | Apply a transformation to each 'Char' in the stream
310map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
311map f = P.map (T.map f)
312{-# INLINABLE map #-}
313
ff38b9f0 314{-# RULES "p >-> map f" forall p f .
315 p >-> map f = for p (\txt -> yield (T.map f txt))
316 #-}
317
31f41a5d 318-- | Map a function over the characters of a text stream and concatenate the results
91727d11 319concatMap
320 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
321concatMap f = P.map (T.concatMap f)
322{-# INLINABLE concatMap #-}
323
ff38b9f0 324{-# RULES "p >-> concatMap f" forall p f .
325 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
326 #-}
7faef8bc 327
328-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 329-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
330-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 331encodeUtf8 :: Monad m => Pipe Text ByteString m r
332encodeUtf8 = P.map TE.encodeUtf8
333{-# INLINEABLE encodeUtf8 #-}
334
ff38b9f0 335{-# RULES "p >-> encodeUtf8" forall p .
336 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
337 #-}
338
c0343bc9 339-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 340pack :: Monad m => Pipe String Text m r
341pack = P.map T.pack
342{-# INLINEABLE pack #-}
343
ff38b9f0 344{-# RULES "p >-> pack" forall p .
345 p >-> pack = for p (\txt -> yield (T.pack txt))
346 #-}
347
348-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 349unpack :: Monad m => Pipe Text String m r
d4732515 350unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 351{-# INLINEABLE unpack #-}
352
ff38b9f0 353{-# RULES "p >-> unpack" forall p .
354 p >-> unpack = for p (\txt -> yield (T.unpack txt))
355 #-}
d4732515 356
c0343bc9 357-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
7faef8bc 358-- here acting on a 'Text' pipe, rather as they would on a lazy text
359toCaseFold :: Monad m => Pipe Text Text m ()
360toCaseFold = P.map T.toCaseFold
361{-# INLINEABLE toCaseFold #-}
362
ff38b9f0 363{-# RULES "p >-> toCaseFold" forall p .
364 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
365 #-}
366
367
c0343bc9 368-- | lowercase incoming 'Text'
7faef8bc 369toLower :: Monad m => Pipe Text Text m ()
370toLower = P.map T.toLower
371{-# INLINEABLE toLower #-}
372
ff38b9f0 373{-# RULES "p >-> toLower" forall p .
374 p >-> toLower = for p (\txt -> yield (T.toLower txt))
375 #-}
376
c0343bc9 377-- | uppercase incoming 'Text'
7faef8bc 378toUpper :: Monad m => Pipe Text Text m ()
379toUpper = P.map T.toUpper
380{-# INLINEABLE toUpper #-}
381
ff38b9f0 382{-# RULES "p >-> toUpper" forall p .
383 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
384 #-}
385
c0343bc9 386-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 387stripStart :: Monad m => Pipe Text Text m r
388stripStart = do
389 chunk <- await
390 let text = T.stripStart chunk
391 if T.null text
392 then stripStart
393 else cat
394{-# INLINEABLE stripStart #-}
395
31f41a5d 396-- | @(take n)@ only allows @n@ individual characters to pass;
397-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 398take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
399take n0 = go n0 where
400 go n
401 | n <= 0 = return ()
402 | otherwise = do
31f41a5d 403 txt <- await
404 let len = fromIntegral (T.length txt)
91727d11 405 if (len > n)
31f41a5d 406 then yield (T.take (fromIntegral n) txt)
91727d11 407 else do
31f41a5d 408 yield txt
91727d11 409 go (n - len)
410{-# INLINABLE take #-}
411
31f41a5d 412-- | @(drop n)@ drops the first @n@ characters
91727d11 413drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
414drop n0 = go n0 where
415 go n
416 | n <= 0 = cat
417 | otherwise = do
31f41a5d 418 txt <- await
419 let len = fromIntegral (T.length txt)
91727d11 420 if (len >= n)
421 then do
31f41a5d 422 yield (T.drop (fromIntegral n) txt)
91727d11 423 cat
424 else go (n - len)
425{-# INLINABLE drop #-}
426
31f41a5d 427-- | Take characters until they fail the predicate
91727d11 428takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
429takeWhile predicate = go
430 where
431 go = do
31f41a5d 432 txt <- await
433 let (prefix, suffix) = T.span predicate txt
91727d11 434 if (T.null suffix)
435 then do
31f41a5d 436 yield txt
91727d11 437 go
438 else yield prefix
439{-# INLINABLE takeWhile #-}
440
31f41a5d 441-- | Drop characters until they fail the predicate
91727d11 442dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
443dropWhile predicate = go where
444 go = do
31f41a5d 445 txt <- await
446 case T.findIndex (not . predicate) txt of
91727d11 447 Nothing -> go
448 Just i -> do
31f41a5d 449 yield (T.drop i txt)
91727d11 450 cat
451{-# INLINABLE dropWhile #-}
452
453-- | Only allows 'Char's to pass if they satisfy the predicate
454filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
455filter predicate = P.map (T.filter predicate)
456{-# INLINABLE filter #-}
457
ff38b9f0 458{-# RULES "p >-> filter q" forall p q .
459 p >-> filter q = for p (\txt -> yield (T.filter q txt))
460 #-}
461
31f41a5d 462-- | Strict left scan over the characters
91727d11 463scan
464 :: (Monad m)
465 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
466scan step begin = go begin
467 where
31f41a5d 468 go c = do
469 txt <- await
470 let txt' = T.scanl step c txt
471 c' = T.last txt'
472 yield txt'
473 go c'
91727d11 474{-# INLINABLE scan #-}
475
476{-| Fold a pure 'Producer' of strict 'Text's into a lazy
477 'TL.Text'
478-}
479toLazy :: Producer Text Identity () -> TL.Text
480toLazy = TL.fromChunks . P.toList
481{-# INLINABLE toLazy #-}
482
483{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
484 'TL.Text'
485
486 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
487 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
488 immediately as they are generated instead of loading them all into memory.
489-}
490toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
491toLazyM = liftM TL.fromChunks . P.toListM
492{-# INLINABLE toLazyM #-}
493
31f41a5d 494-- | Reduce the text stream using a strict left fold over characters
91727d11 495fold
496 :: Monad m
497 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
31f41a5d 498fold step begin done = P.fold (T.foldl' step) begin done
91727d11 499{-# INLINABLE fold #-}
500
501-- | Retrieve the first 'Char'
502head :: (Monad m) => Producer Text m () -> m (Maybe Char)
503head = go
504 where
505 go p = do
506 x <- nextChar p
507 case x of
508 Left _ -> return Nothing
31f41a5d 509 Right (c, _) -> return (Just c)
91727d11 510{-# INLINABLE head #-}
511
512-- | Retrieve the last 'Char'
513last :: (Monad m) => Producer Text m () -> m (Maybe Char)
514last = go Nothing
515 where
516 go r p = do
517 x <- next p
518 case x of
519 Left () -> return r
31f41a5d 520 Right (txt, p') ->
521 if (T.null txt)
91727d11 522 then go r p'
31f41a5d 523 else go (Just $ T.last txt) p'
91727d11 524{-# INLINABLE last #-}
525
526-- | Determine if the stream is empty
527null :: (Monad m) => Producer Text m () -> m Bool
528null = P.all T.null
529{-# INLINABLE null #-}
530
62e8521c 531-- | Count the number of characters in the stream
91727d11 532length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 533length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 534{-# INLINABLE length #-}
535
536-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
537any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
538any predicate = P.any (T.any predicate)
539{-# INLINABLE any #-}
540
541-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
542all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
543all predicate = P.all (T.all predicate)
544{-# INLINABLE all #-}
545
62e8521c 546-- | Return the maximum 'Char' within a text stream
91727d11 547maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
548maximum = P.fold step Nothing id
549 where
31f41a5d 550 step mc txt =
551 if (T.null txt)
552 then mc
553 else Just $ case mc of
554 Nothing -> T.maximum txt
555 Just c -> max c (T.maximum txt)
91727d11 556{-# INLINABLE maximum #-}
557
62e8521c 558-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 559minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
560minimum = P.fold step Nothing id
561 where
31f41a5d 562 step mc txt =
563 if (T.null txt)
564 then mc
565 else case mc of
566 Nothing -> Just (T.minimum txt)
567 Just c -> Just (min c (T.minimum txt))
91727d11 568{-# INLINABLE minimum #-}
569
91727d11 570-- | Find the first element in the stream that matches the predicate
571find
572 :: (Monad m)
573 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
574find predicate p = head (p >-> filter predicate)
575{-# INLINABLE find #-}
576
62e8521c 577-- | Index into a text stream
91727d11 578index
579 :: (Monad m, Integral a)
580 => a-> Producer Text m () -> m (Maybe Char)
581index n p = head (p >-> drop n)
582{-# INLINABLE index #-}
583
63ea9ffd 584
31f41a5d 585-- | Store a tally of how many segments match the given 'Text'
586count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
587count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
588{-# INLINABLE count #-}
589
31f41a5d 590-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
591-- into a Pipe of Text
8c482809 592
c9d1c945 593decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1b4f5326 594decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
595 go carry dec0 p = do
596 x <- lift (next p)
597 case x of Left r -> return (do yield carry
598 return r)
599 Right (chunk, p') -> case dec0 chunk of
600 PE.Some text carry2 dec -> do yield text
601 go carry2 dec p'
602 PE.Other text bs -> do yield text
603 return (do yield bs
604 p')
31f41a5d 605
606-- | Splits a 'Producer' after the given number of characters
91727d11 607splitAt
608 :: (Monad m, Integral n)
609 => n
610 -> Producer Text m r
611 -> Producer' Text m (Producer Text m r)
612splitAt = go
613 where
614 go 0 p = return p
615 go n p = do
616 x <- lift (next p)
617 case x of
618 Left r -> return (return r)
31f41a5d 619 Right (txt, p') -> do
620 let len = fromIntegral (T.length txt)
91727d11 621 if (len <= n)
622 then do
31f41a5d 623 yield txt
91727d11 624 go (n - len) p'
625 else do
31f41a5d 626 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 627 yield prefix
628 return (yield suffix >> p')
629{-# INLINABLE splitAt #-}
630
31f41a5d 631-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
91727d11 632chunksOf
633 :: (Monad m, Integral n)
634 => n -> Producer Text m r -> FreeT (Producer Text m) m r
635chunksOf n p0 = PP.FreeT (go p0)
636 where
637 go p = do
638 x <- next p
639 return $ case x of
640 Left r -> PP.Pure r
31f41a5d 641 Right (txt, p') -> PP.Free $ do
642 p'' <- splitAt n (yield txt >> p')
91727d11 643 return $ PP.FreeT (go p'')
644{-# INLINABLE chunksOf #-}
645
31f41a5d 646{-| Split a text stream in two, where the first text stream is the longest
647 consecutive group of text that satisfy the predicate
91727d11 648-}
649span
650 :: (Monad m)
651 => (Char -> Bool)
652 -> Producer Text m r
653 -> Producer' Text m (Producer Text m r)
654span predicate = go
655 where
656 go p = do
657 x <- lift (next p)
658 case x of
659 Left r -> return (return r)
31f41a5d 660 Right (txt, p') -> do
661 let (prefix, suffix) = T.span predicate txt
91727d11 662 if (T.null suffix)
663 then do
31f41a5d 664 yield txt
91727d11 665 go p'
666 else do
667 yield prefix
668 return (yield suffix >> p')
669{-# INLINABLE span #-}
670
62e8521c 671{-| Split a text stream in two, where the first text stream is the longest
672 consecutive group of characters that don't satisfy the predicate
91727d11 673-}
674break
675 :: (Monad m)
676 => (Char -> Bool)
677 -> Producer Text m r
678 -> Producer Text m (Producer Text m r)
679break predicate = span (not . predicate)
680{-# INLINABLE break #-}
681
62e8521c 682{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 683 predicate
684-}
685splitWith
686 :: (Monad m)
687 => (Char -> Bool)
688 -> Producer Text m r
689 -> PP.FreeT (Producer Text m) m r
690splitWith predicate p0 = PP.FreeT (go0 p0)
691 where
692 go0 p = do
693 x <- next p
694 case x of
695 Left r -> return (PP.Pure r)
31f41a5d 696 Right (txt, p') ->
697 if (T.null txt)
91727d11 698 then go0 p'
699 else return $ PP.Free $ do
31f41a5d 700 p'' <- span (not . predicate) (yield txt >> p')
91727d11 701 return $ PP.FreeT (go1 p'')
702 go1 p = do
703 x <- nextChar p
704 return $ case x of
705 Left r -> PP.Pure r
706 Right (_, p') -> PP.Free $ do
707 p'' <- span (not . predicate) p'
708 return $ PP.FreeT (go1 p'')
709{-# INLINABLE splitWith #-}
710
31f41a5d 711-- | Split a text stream using the given 'Char' as the delimiter
91727d11 712split :: (Monad m)
713 => Char
714 -> Producer Text m r
715 -> FreeT (Producer Text m) m r
31f41a5d 716split c = splitWith (c ==)
91727d11 717{-# INLINABLE split #-}
718
62e8521c 719{-| Group a text stream into 'FreeT'-delimited text streams using the supplied
91727d11 720 equality predicate
721-}
722groupBy
723 :: (Monad m)
724 => (Char -> Char -> Bool)
725 -> Producer Text m r
726 -> FreeT (Producer Text m) m r
727groupBy equal p0 = PP.FreeT (go p0)
728 where
729 go p = do
730 x <- next p
731 case x of
732 Left r -> return (PP.Pure r)
31f41a5d 733 Right (txt, p') -> case (T.uncons txt) of
91727d11 734 Nothing -> go p'
31f41a5d 735 Just (c, _) -> do
91727d11 736 return $ PP.Free $ do
31f41a5d 737 p'' <- span (equal c) (yield txt >> p')
91727d11 738 return $ PP.FreeT (go p'')
739{-# INLINABLE groupBy #-}
740
62e8521c 741-- | Group a text stream into 'FreeT'-delimited text streams of identical characters
91727d11 742group
743 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
744group = groupBy (==)
745{-# INLINABLE group #-}
746
62e8521c 747{-| Split a text stream into 'FreeT'-delimited lines
91727d11 748-}
749lines
750 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
751lines p0 = PP.FreeT (go0 p0)
752 where
753 go0 p = do
754 x <- next p
755 case x of
756 Left r -> return (PP.Pure r)
31f41a5d 757 Right (txt, p') ->
758 if (T.null txt)
91727d11 759 then go0 p'
31f41a5d 760 else return $ PP.Free $ go1 (yield txt >> p')
91727d11 761 go1 p = do
762 p' <- break ('\n' ==) p
b4d21c02 763 return $ PP.FreeT $ do
764 x <- nextChar p'
765 case x of
766 Left r -> return $ PP.Pure r
767 Right (_, p'') -> go0 p''
91727d11 768{-# INLINABLE lines #-}
91727d11 769
31f41a5d 770
771
772-- | Split a text stream into 'FreeT'-delimited words
91727d11 773words
774 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
cf10d6f1 775words = go
91727d11 776 where
cf10d6f1 777 go p = PP.FreeT $ do
778 x <- next (p >-> dropWhile isSpace)
779 return $ case x of
780 Left r -> PP.Pure r
781 Right (bs, p') -> PP.Free $ do
782 p'' <- break isSpace (yield bs >> p')
783 return (go p'')
91727d11 784{-# INLINABLE words #-}
785
cf10d6f1 786
62e8521c 787-- | Intersperse a 'Char' in between the characters of the text stream
91727d11 788intersperse
789 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
31f41a5d 790intersperse c = go0
91727d11 791 where
792 go0 p = do
793 x <- lift (next p)
794 case x of
795 Left r -> return r
31f41a5d 796 Right (txt, p') -> do
797 yield (T.intersperse c txt)
91727d11 798 go1 p'
799 go1 p = do
800 x <- lift (next p)
801 case x of
802 Left r -> return r
31f41a5d 803 Right (txt, p') -> do
804 yield (T.singleton c)
805 yield (T.intersperse c txt)
91727d11 806 go1 p'
807{-# INLINABLE intersperse #-}
808
31f41a5d 809{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
810 interspersing a text stream in between them
91727d11 811-}
812intercalate
813 :: (Monad m)
814 => Producer Text m ()
815 -> FreeT (Producer Text m) m r
816 -> Producer Text m r
817intercalate p0 = go0
818 where
819 go0 f = do
820 x <- lift (PP.runFreeT f)
821 case x of
822 PP.Pure r -> return r
823 PP.Free p -> do
824 f' <- p
825 go1 f'
826 go1 f = do
827 x <- lift (PP.runFreeT f)
828 case x of
829 PP.Pure r -> return r
830 PP.Free p -> do
831 p0
832 f' <- p
833 go1 f'
834{-# INLINABLE intercalate #-}
835
62e8521c 836{-| Join 'FreeT'-delimited lines into a text stream
91727d11 837-}
838unlines
839 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
840unlines = go
841 where
842 go f = do
843 x <- lift (PP.runFreeT f)
844 case x of
845 PP.Pure r -> return r
846 PP.Free p -> do
847 f' <- p
848 yield $ T.singleton '\n'
849 go f'
850{-# INLINABLE unlines #-}
851
31f41a5d 852{-| Join 'FreeT'-delimited words into a text stream
91727d11 853-}
854unwords
855 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
856unwords = intercalate (yield $ T.pack " ")
857{-# INLINABLE unwords #-}
858
859{- $parse
31f41a5d 860 The following parsing utilities are single-character analogs of the ones found
861 @pipes-parse@.
91727d11 862-}
863
91727d11 864{- $reexports
31f41a5d 865 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
91727d11 866
867 @Data.Text@ re-exports the 'Text' type.
868
91727d11 869 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
870-}