]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
Gabriel's improved 'words'
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
31f41a5d 1{-# LANGUAGE RankNTypes, TypeFamilies, CPP #-}
91727d11 2
13a43263 3{-| This module provides @pipes@ utilities for \"text streams\", which are
31f41a5d 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
63ea9ffd 5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 7
63ea9ffd 8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 9 example, the following program copies a document from one file to another:
91727d11 10
11> import Pipes
31f41a5d 12> import qualified Data.Text.Pipes as Text
91727d11 13> import System.IO
14>
15> main =
16> withFile "inFile.txt" ReadMode $ \hIn ->
17> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 18> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
19
20To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 21
13a43263 22> import Pipes
31f41a5d 23> import qualified Data.Text.Pipes as Text
13a43263 24> import Pipes.Safe
25>
31f41a5d 26> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 27
91727d11 28 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 29 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 30
31f41a5d 31> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 32
33 You can also translate pure lazy 'TL.Text's to and from proxies:
34
31f41a5d 35> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 36
37 In addition, this module provides many functions equivalent to lazy
31f41a5d 38 'Text' functions so that you can transform or fold text streams. For
91727d11 39 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 40 might write:
91727d11 41
42> import Pipes
31f41a5d 43> import qualified Pipes.Text as Text
44> import qualified Pipes.Parse as Parse
91727d11 45>
31f41a5d 46> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 47> where
31f41a5d 48> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 49
31f41a5d 50 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 51 memory, no matter how long the lines are.
52
53 Note that functions in this library are designed to operate on streams that
31f41a5d 54 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will
56 /never concatenate texts/ in order to provide strict upper bounds on memory
91727d11 57 usage.
58-}
59
7faef8bc 60module Pipes.Text (
91727d11 61 -- * Producers
62 fromLazy,
63 stdin,
64 fromHandle,
65 readFile,
66 stdinLn,
91727d11 67
68 -- * Consumers
69 stdout,
70 stdoutLn,
71 toHandle,
72 writeFile,
73
74 -- * Pipes
75 map,
76 concatMap,
77 take,
78 drop,
79 takeWhile,
80 dropWhile,
81 filter,
91727d11 82 scan,
7faef8bc 83 encodeUtf8,
a02a69ad 84#if MIN_VERSION_text(0,11,4)
85 pipeDecodeUtf8,
86 pipeDecodeUtf8With,
87#endif
7faef8bc 88 pack,
89 unpack,
1d2434b5 90 toCaseFold,
91 toLower,
92 toUpper,
7faef8bc 93 stripStart,
91727d11 94
95 -- * Folds
96 toLazy,
97 toLazyM,
98 fold,
99 head,
100 last,
101 null,
102 length,
103 any,
104 all,
105 maximum,
106 minimum,
91727d11 107 find,
108 index,
31f41a5d 109 count,
91727d11 110
111 -- * Splitters
112 splitAt,
113 chunksOf,
114 span,
115 break,
116 splitWith,
117 split,
118 groupBy,
119 group,
120 lines,
121 words,
31f41a5d 122#if MIN_VERSION_text(0,11,4)
123 decodeUtf8,
63ea9ffd 124 decodeUtf8With,
31f41a5d 125#endif
91727d11 126 -- * Transformations
127 intersperse,
31f41a5d 128
91727d11 129 -- * Joiners
130 intercalate,
131 unlines,
132 unwords,
133
31f41a5d 134 -- * Character Parsers
91727d11 135 -- $parse
31f41a5d 136 nextChar,
137 drawChar,
138 unDrawChar,
139 peekChar,
140 isEndOfChars,
91727d11 141
142 -- * Re-exports
143 -- $reexports
144 module Data.Text,
91727d11 145 module Pipes.Parse
146 ) where
147
148import Control.Exception (throwIO, try)
149import Control.Monad (liftM, unless)
acc6868f 150import Control.Monad.Trans.State.Strict (StateT(..))
91727d11 151import qualified Data.Text as T
152import qualified Data.Text.IO as T
31f41a5d 153import qualified Data.Text.Encoding as TE
63ea9ffd 154import qualified Data.Text.Encoding.Error as TE
91727d11 155import Data.Text (Text)
156import qualified Data.Text.Lazy as TL
157import qualified Data.Text.Lazy.IO as TL
158import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
159import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 160import Data.ByteString (ByteString)
161import qualified Data.ByteString as B
cf10d6f1 162import Data.Char (ord, isSpace)
91727d11 163import Data.Functor.Identity (Identity)
164import qualified Data.List as List
165import Foreign.C.Error (Errno(Errno), ePIPE)
166import qualified GHC.IO.Exception as G
167import Pipes
168import qualified Pipes.ByteString.Parse as PBP
c0343bc9 169import Pipes.Text.Parse (
31f41a5d 170 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
91727d11 171import Pipes.Core (respond, Server')
172import qualified Pipes.Parse as PP
173import Pipes.Parse (input, concat, FreeT)
174import qualified Pipes.Safe.Prelude as Safe
175import qualified Pipes.Safe as Safe
176import Pipes.Safe (MonadSafe(..), Base(..))
177import qualified Pipes.Prelude as P
178import qualified System.IO as IO
179import Data.Char (isSpace)
63ea9ffd 180import Data.Word (Word8)
91727d11 181import Prelude hiding (
182 all,
183 any,
184 break,
185 concat,
186 concatMap,
187 drop,
188 dropWhile,
189 elem,
190 filter,
191 head,
192 last,
193 lines,
194 length,
195 map,
196 maximum,
197 minimum,
198 notElem,
199 null,
200 readFile,
201 span,
202 splitAt,
203 take,
204 takeWhile,
205 unlines,
206 unwords,
207 words,
208 writeFile )
209
210-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
211fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
212fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
213{-# INLINABLE fromLazy #-}
214
62e8521c 215-- | Stream text from 'stdin'
91727d11 216stdin :: MonadIO m => Producer' Text m ()
217stdin = fromHandle IO.stdin
218{-# INLINABLE stdin #-}
219
31f41a5d 220{-| Convert a 'IO.Handle' into a text stream using a text size
221 determined by the good sense of the text library.
222
223-}
224
91727d11 225fromHandle :: MonadIO m => IO.Handle -> Producer' Text m ()
226fromHandle h = go where
227 go = do txt <- liftIO (T.hGetChunk h)
228 unless (T.null txt) $ do yield txt
229 go
230{-# INLINABLE fromHandle#-}
231
6f6f9974 232{-| Stream text from a file using Pipes.Safe
233
31f41a5d 234>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
235MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 236-}
237
91727d11 238readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m ()
239readFile file = Safe.withFile file IO.ReadMode fromHandle
240{-# INLINABLE readFile #-}
241
31f41a5d 242{-| Stream lines of text from stdin (for testing in ghci etc.)
243
244>>> let safely = runSafeT . runEffect
245>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
246hello
2475
248world
2495
250
251-}
91727d11 252stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 253stdinLn = go where
91727d11 254 go = do
255 eof <- liftIO (IO.hIsEOF IO.stdin)
256 unless eof $ do
257 txt <- liftIO (T.hGetLine IO.stdin)
258 yield txt
259 go
260
91727d11 261
31f41a5d 262{-| Stream text to 'stdout'
91727d11 263
264 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
265
266 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
31f41a5d 267 @(source >-> stdout)@ in suitable cases.
91727d11 268-}
269stdout :: MonadIO m => Consumer' Text m ()
270stdout = go
271 where
272 go = do
273 txt <- await
274 x <- liftIO $ try (T.putStr txt)
275 case x of
276 Left (G.IOError { G.ioe_type = G.ResourceVanished
277 , G.ioe_errno = Just ioe })
278 | Errno ioe == ePIPE
279 -> return ()
280 Left e -> liftIO (throwIO e)
281 Right () -> go
282{-# INLINABLE stdout #-}
283
284stdoutLn :: (MonadIO m) => Consumer' Text m ()
285stdoutLn = go
286 where
287 go = do
288 str <- await
289 x <- liftIO $ try (T.putStrLn str)
290 case x of
291 Left (G.IOError { G.ioe_type = G.ResourceVanished
292 , G.ioe_errno = Just ioe })
293 | Errno ioe == ePIPE
294 -> return ()
295 Left e -> liftIO (throwIO e)
296 Right () -> go
297{-# INLINABLE stdoutLn #-}
298
31f41a5d 299{-| Convert a text stream into a 'Handle'
91727d11 300
31f41a5d 301 Note: again, for best performance, where possible use
302 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 303-}
304toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
305toHandle h = for cat (liftIO . T.hPutStr h)
306{-# INLINABLE toHandle #-}
307
31f41a5d 308-- | Stream text into a file. Uses @pipes-safe@.
91727d11 309writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m ()
310writeFile file = Safe.withFile file IO.WriteMode toHandle
311
312-- | Apply a transformation to each 'Char' in the stream
313map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
314map f = P.map (T.map f)
315{-# INLINABLE map #-}
316
31f41a5d 317-- | Map a function over the characters of a text stream and concatenate the results
91727d11 318concatMap
319 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
320concatMap f = P.map (T.concatMap f)
321{-# INLINABLE concatMap #-}
322
7faef8bc 323
324-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 325-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
326-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 327encodeUtf8 :: Monad m => Pipe Text ByteString m r
328encodeUtf8 = P.map TE.encodeUtf8
329{-# INLINEABLE encodeUtf8 #-}
330
c0343bc9 331-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 332pack :: Monad m => Pipe String Text m r
333pack = P.map T.pack
334{-# INLINEABLE pack #-}
335
c0343bc9 336-- | Transforma a Pipes of 'Text' chunks into one of 'String's
7faef8bc 337unpack :: Monad m => Pipe Text String m r
338unpack = P.map T.unpack
339{-# INLINEABLE unpack #-}
340
c0343bc9 341-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
7faef8bc 342-- here acting on a 'Text' pipe, rather as they would on a lazy text
343toCaseFold :: Monad m => Pipe Text Text m ()
344toCaseFold = P.map T.toCaseFold
345{-# INLINEABLE toCaseFold #-}
346
c0343bc9 347-- | lowercase incoming 'Text'
7faef8bc 348toLower :: Monad m => Pipe Text Text m ()
349toLower = P.map T.toLower
350{-# INLINEABLE toLower #-}
351
c0343bc9 352-- | uppercase incoming 'Text'
7faef8bc 353toUpper :: Monad m => Pipe Text Text m ()
354toUpper = P.map T.toUpper
355{-# INLINEABLE toUpper #-}
356
c0343bc9 357-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 358stripStart :: Monad m => Pipe Text Text m r
359stripStart = do
360 chunk <- await
361 let text = T.stripStart chunk
362 if T.null text
363 then stripStart
364 else cat
365{-# INLINEABLE stripStart #-}
366
31f41a5d 367-- | @(take n)@ only allows @n@ individual characters to pass;
368-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 369take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
370take n0 = go n0 where
371 go n
372 | n <= 0 = return ()
373 | otherwise = do
31f41a5d 374 txt <- await
375 let len = fromIntegral (T.length txt)
91727d11 376 if (len > n)
31f41a5d 377 then yield (T.take (fromIntegral n) txt)
91727d11 378 else do
31f41a5d 379 yield txt
91727d11 380 go (n - len)
381{-# INLINABLE take #-}
382
31f41a5d 383-- | @(drop n)@ drops the first @n@ characters
91727d11 384drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
385drop n0 = go n0 where
386 go n
387 | n <= 0 = cat
388 | otherwise = do
31f41a5d 389 txt <- await
390 let len = fromIntegral (T.length txt)
91727d11 391 if (len >= n)
392 then do
31f41a5d 393 yield (T.drop (fromIntegral n) txt)
91727d11 394 cat
395 else go (n - len)
396{-# INLINABLE drop #-}
397
31f41a5d 398-- | Take characters until they fail the predicate
91727d11 399takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
400takeWhile predicate = go
401 where
402 go = do
31f41a5d 403 txt <- await
404 let (prefix, suffix) = T.span predicate txt
91727d11 405 if (T.null suffix)
406 then do
31f41a5d 407 yield txt
91727d11 408 go
409 else yield prefix
410{-# INLINABLE takeWhile #-}
411
31f41a5d 412-- | Drop characters until they fail the predicate
91727d11 413dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
414dropWhile predicate = go where
415 go = do
31f41a5d 416 txt <- await
417 case T.findIndex (not . predicate) txt of
91727d11 418 Nothing -> go
419 Just i -> do
31f41a5d 420 yield (T.drop i txt)
91727d11 421 cat
422{-# INLINABLE dropWhile #-}
423
424-- | Only allows 'Char's to pass if they satisfy the predicate
425filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
426filter predicate = P.map (T.filter predicate)
427{-# INLINABLE filter #-}
428
31f41a5d 429
430-- | Strict left scan over the characters
91727d11 431scan
432 :: (Monad m)
433 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
434scan step begin = go begin
435 where
31f41a5d 436 go c = do
437 txt <- await
438 let txt' = T.scanl step c txt
439 c' = T.last txt'
440 yield txt'
441 go c'
91727d11 442{-# INLINABLE scan #-}
443
444{-| Fold a pure 'Producer' of strict 'Text's into a lazy
445 'TL.Text'
446-}
447toLazy :: Producer Text Identity () -> TL.Text
448toLazy = TL.fromChunks . P.toList
449{-# INLINABLE toLazy #-}
450
451{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
452 'TL.Text'
453
454 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
455 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
456 immediately as they are generated instead of loading them all into memory.
457-}
458toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
459toLazyM = liftM TL.fromChunks . P.toListM
460{-# INLINABLE toLazyM #-}
461
31f41a5d 462-- | Reduce the text stream using a strict left fold over characters
91727d11 463fold
464 :: Monad m
465 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
31f41a5d 466fold step begin done = P.fold (T.foldl' step) begin done
91727d11 467{-# INLINABLE fold #-}
468
469-- | Retrieve the first 'Char'
470head :: (Monad m) => Producer Text m () -> m (Maybe Char)
471head = go
472 where
473 go p = do
474 x <- nextChar p
475 case x of
476 Left _ -> return Nothing
31f41a5d 477 Right (c, _) -> return (Just c)
91727d11 478{-# INLINABLE head #-}
479
480-- | Retrieve the last 'Char'
481last :: (Monad m) => Producer Text m () -> m (Maybe Char)
482last = go Nothing
483 where
484 go r p = do
485 x <- next p
486 case x of
487 Left () -> return r
31f41a5d 488 Right (txt, p') ->
489 if (T.null txt)
91727d11 490 then go r p'
31f41a5d 491 else go (Just $ T.last txt) p'
91727d11 492{-# INLINABLE last #-}
493
494-- | Determine if the stream is empty
495null :: (Monad m) => Producer Text m () -> m Bool
496null = P.all T.null
497{-# INLINABLE null #-}
498
62e8521c 499-- | Count the number of characters in the stream
91727d11 500length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 501length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 502{-# INLINABLE length #-}
503
504-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
505any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
506any predicate = P.any (T.any predicate)
507{-# INLINABLE any #-}
508
509-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
510all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
511all predicate = P.all (T.all predicate)
512{-# INLINABLE all #-}
513
62e8521c 514-- | Return the maximum 'Char' within a text stream
91727d11 515maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
516maximum = P.fold step Nothing id
517 where
31f41a5d 518 step mc txt =
519 if (T.null txt)
520 then mc
521 else Just $ case mc of
522 Nothing -> T.maximum txt
523 Just c -> max c (T.maximum txt)
91727d11 524{-# INLINABLE maximum #-}
525
62e8521c 526-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 527minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
528minimum = P.fold step Nothing id
529 where
31f41a5d 530 step mc txt =
531 if (T.null txt)
532 then mc
533 else case mc of
534 Nothing -> Just (T.minimum txt)
535 Just c -> Just (min c (T.minimum txt))
91727d11 536{-# INLINABLE minimum #-}
537
91727d11 538-- | Find the first element in the stream that matches the predicate
539find
540 :: (Monad m)
541 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
542find predicate p = head (p >-> filter predicate)
543{-# INLINABLE find #-}
544
62e8521c 545-- | Index into a text stream
91727d11 546index
547 :: (Monad m, Integral a)
548 => a-> Producer Text m () -> m (Maybe Char)
549index n p = head (p >-> drop n)
550{-# INLINABLE index #-}
551
63ea9ffd 552
31f41a5d 553-- | Store a tally of how many segments match the given 'Text'
554count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
555count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
556{-# INLINABLE count #-}
557
558#if MIN_VERSION_text(0,11,4)
559-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
560-- into a Pipe of Text
561decodeUtf8
562 :: Monad m
563 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
564decodeUtf8 = go TE.streamDecodeUtf8
565 where go dec p = do
566 x <- lift (next p)
567 case x of
568 Left r -> return (return r)
569 Right (chunk, p') -> do
570 let TE.Some text l dec' = dec chunk
571 if B.null l
572 then do
573 yield text
574 go dec' p'
575 else return $ do
576 yield l
577 p'
578{-# INLINEABLE decodeUtf8 #-}
63ea9ffd 579
580-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
581-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
582-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
583decodeUtf8With
584 :: Monad m
585 => TE.OnDecodeError
586 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
587decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
588 where go dec p = do
589 x <- lift (next p)
590 case x of
591 Left r -> return (return r)
592 Right (chunk, p') -> do
593 let TE.Some text l dec' = dec chunk
594 if B.null l
595 then do
596 yield text
597 go dec' p'
598 else return $ do
599 yield l
600 p'
601{-# INLINEABLE decodeUtf8With #-}
a02a69ad 602
603-- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise
604-- with any chunk that contains a sequence of bytes that is unreadable. Otherwise
605-- only few bytes will only be moved from one chunk to the next before decoding.
606pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r
607pipeDecodeUtf8 = go TE.streamDecodeUtf8
608 where go dec = do chunk <- await
609 case dec chunk of
610 TE.Some text l dec' -> do yield text
611 go dec'
612{-# INLINEABLE pipeDecodeUtf8 #-}
613
614-- | A simple pipe from 'ByteString' to 'Text' using a replacement function.
615pipeDecodeUtf8With
616 :: Monad m
617 => TE.OnDecodeError
618 -> Pipe ByteString Text m r
619pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
620 where go dec = do chunk <- await
621 case dec chunk of
622 TE.Some text l dec' -> do yield text
623 go dec'
624{-# INLINEABLE pipeDecodeUtf8With #-}
31f41a5d 625#endif
626
627-- | Splits a 'Producer' after the given number of characters
91727d11 628splitAt
629 :: (Monad m, Integral n)
630 => n
631 -> Producer Text m r
632 -> Producer' Text m (Producer Text m r)
633splitAt = go
634 where
635 go 0 p = return p
636 go n p = do
637 x <- lift (next p)
638 case x of
639 Left r -> return (return r)
31f41a5d 640 Right (txt, p') -> do
641 let len = fromIntegral (T.length txt)
91727d11 642 if (len <= n)
643 then do
31f41a5d 644 yield txt
91727d11 645 go (n - len) p'
646 else do
31f41a5d 647 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 648 yield prefix
649 return (yield suffix >> p')
650{-# INLINABLE splitAt #-}
651
31f41a5d 652-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
91727d11 653chunksOf
654 :: (Monad m, Integral n)
655 => n -> Producer Text m r -> FreeT (Producer Text m) m r
656chunksOf n p0 = PP.FreeT (go p0)
657 where
658 go p = do
659 x <- next p
660 return $ case x of
661 Left r -> PP.Pure r
31f41a5d 662 Right (txt, p') -> PP.Free $ do
663 p'' <- splitAt n (yield txt >> p')
91727d11 664 return $ PP.FreeT (go p'')
665{-# INLINABLE chunksOf #-}
666
31f41a5d 667{-| Split a text stream in two, where the first text stream is the longest
668 consecutive group of text that satisfy the predicate
91727d11 669-}
670span
671 :: (Monad m)
672 => (Char -> Bool)
673 -> Producer Text m r
674 -> Producer' Text m (Producer Text m r)
675span predicate = go
676 where
677 go p = do
678 x <- lift (next p)
679 case x of
680 Left r -> return (return r)
31f41a5d 681 Right (txt, p') -> do
682 let (prefix, suffix) = T.span predicate txt
91727d11 683 if (T.null suffix)
684 then do
31f41a5d 685 yield txt
91727d11 686 go p'
687 else do
688 yield prefix
689 return (yield suffix >> p')
690{-# INLINABLE span #-}
691
62e8521c 692{-| Split a text stream in two, where the first text stream is the longest
693 consecutive group of characters that don't satisfy the predicate
91727d11 694-}
695break
696 :: (Monad m)
697 => (Char -> Bool)
698 -> Producer Text m r
699 -> Producer Text m (Producer Text m r)
700break predicate = span (not . predicate)
701{-# INLINABLE break #-}
702
62e8521c 703{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 704 predicate
705-}
706splitWith
707 :: (Monad m)
708 => (Char -> Bool)
709 -> Producer Text m r
710 -> PP.FreeT (Producer Text m) m r
711splitWith predicate p0 = PP.FreeT (go0 p0)
712 where
713 go0 p = do
714 x <- next p
715 case x of
716 Left r -> return (PP.Pure r)
31f41a5d 717 Right (txt, p') ->
718 if (T.null txt)
91727d11 719 then go0 p'
720 else return $ PP.Free $ do
31f41a5d 721 p'' <- span (not . predicate) (yield txt >> p')
91727d11 722 return $ PP.FreeT (go1 p'')
723 go1 p = do
724 x <- nextChar p
725 return $ case x of
726 Left r -> PP.Pure r
727 Right (_, p') -> PP.Free $ do
728 p'' <- span (not . predicate) p'
729 return $ PP.FreeT (go1 p'')
730{-# INLINABLE splitWith #-}
731
31f41a5d 732-- | Split a text stream using the given 'Char' as the delimiter
91727d11 733split :: (Monad m)
734 => Char
735 -> Producer Text m r
736 -> FreeT (Producer Text m) m r
31f41a5d 737split c = splitWith (c ==)
91727d11 738{-# INLINABLE split #-}
739
62e8521c 740{-| Group a text stream into 'FreeT'-delimited text streams using the supplied
91727d11 741 equality predicate
742-}
743groupBy
744 :: (Monad m)
745 => (Char -> Char -> Bool)
746 -> Producer Text m r
747 -> FreeT (Producer Text m) m r
748groupBy equal p0 = PP.FreeT (go p0)
749 where
750 go p = do
751 x <- next p
752 case x of
753 Left r -> return (PP.Pure r)
31f41a5d 754 Right (txt, p') -> case (T.uncons txt) of
91727d11 755 Nothing -> go p'
31f41a5d 756 Just (c, _) -> do
91727d11 757 return $ PP.Free $ do
31f41a5d 758 p'' <- span (equal c) (yield txt >> p')
91727d11 759 return $ PP.FreeT (go p'')
760{-# INLINABLE groupBy #-}
761
62e8521c 762-- | Group a text stream into 'FreeT'-delimited text streams of identical characters
91727d11 763group
764 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
765group = groupBy (==)
766{-# INLINABLE group #-}
767
62e8521c 768{-| Split a text stream into 'FreeT'-delimited lines
91727d11 769-}
770lines
771 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
772lines p0 = PP.FreeT (go0 p0)
773 where
774 go0 p = do
775 x <- next p
776 case x of
777 Left r -> return (PP.Pure r)
31f41a5d 778 Right (txt, p') ->
779 if (T.null txt)
91727d11 780 then go0 p'
31f41a5d 781 else return $ PP.Free $ go1 (yield txt >> p')
91727d11 782 go1 p = do
783 p' <- break ('\n' ==) p
784 return $ PP.FreeT (go2 p')
785 go2 p = do
786 x <- nextChar p
787 return $ case x of
788 Left r -> PP.Pure r
789 Right (_, p') -> PP.Free (go1 p')
790{-# INLINABLE lines #-}
91727d11 791
31f41a5d 792
793
794-- | Split a text stream into 'FreeT'-delimited words
91727d11 795words
796 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
cf10d6f1 797words = go
91727d11 798 where
cf10d6f1 799 go p = PP.FreeT $ do
800 x <- next (p >-> dropWhile isSpace)
801 return $ case x of
802 Left r -> PP.Pure r
803 Right (bs, p') -> PP.Free $ do
804 p'' <- break isSpace (yield bs >> p')
805 return (go p'')
91727d11 806{-# INLINABLE words #-}
807
cf10d6f1 808
62e8521c 809-- | Intersperse a 'Char' in between the characters of the text stream
91727d11 810intersperse
811 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
31f41a5d 812intersperse c = go0
91727d11 813 where
814 go0 p = do
815 x <- lift (next p)
816 case x of
817 Left r -> return r
31f41a5d 818 Right (txt, p') -> do
819 yield (T.intersperse c txt)
91727d11 820 go1 p'
821 go1 p = do
822 x <- lift (next p)
823 case x of
824 Left r -> return r
31f41a5d 825 Right (txt, p') -> do
826 yield (T.singleton c)
827 yield (T.intersperse c txt)
91727d11 828 go1 p'
829{-# INLINABLE intersperse #-}
830
31f41a5d 831{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
832 interspersing a text stream in between them
91727d11 833-}
834intercalate
835 :: (Monad m)
836 => Producer Text m ()
837 -> FreeT (Producer Text m) m r
838 -> Producer Text m r
839intercalate p0 = go0
840 where
841 go0 f = do
842 x <- lift (PP.runFreeT f)
843 case x of
844 PP.Pure r -> return r
845 PP.Free p -> do
846 f' <- p
847 go1 f'
848 go1 f = do
849 x <- lift (PP.runFreeT f)
850 case x of
851 PP.Pure r -> return r
852 PP.Free p -> do
853 p0
854 f' <- p
855 go1 f'
856{-# INLINABLE intercalate #-}
857
62e8521c 858{-| Join 'FreeT'-delimited lines into a text stream
91727d11 859-}
860unlines
861 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
862unlines = go
863 where
864 go f = do
865 x <- lift (PP.runFreeT f)
866 case x of
867 PP.Pure r -> return r
868 PP.Free p -> do
869 f' <- p
870 yield $ T.singleton '\n'
871 go f'
872{-# INLINABLE unlines #-}
873
31f41a5d 874{-| Join 'FreeT'-delimited words into a text stream
91727d11 875-}
876unwords
877 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
878unwords = intercalate (yield $ T.pack " ")
879{-# INLINABLE unwords #-}
880
881{- $parse
31f41a5d 882 The following parsing utilities are single-character analogs of the ones found
883 @pipes-parse@.
91727d11 884-}
885
91727d11 886{- $reexports
31f41a5d 887 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
91727d11 888
889 @Data.Text@ re-exports the 'Text' type.
890
91727d11 891 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
892-}