]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
renamed fold foldChars and began updating documentation
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
ca6f90a0 1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-}
64e03122 2#if __GLASGOW_HASKELL__ >= 702
3{-# LANGUAGE Trustworthy #-}
4#endif
13a43263 5{-| This module provides @pipes@ utilities for \"text streams\", which are
64e03122 6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 10
63ea9ffd 11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 12 example, the following program copies a document from one file to another:
91727d11 13
14> import Pipes
31f41a5d 15> import qualified Data.Text.Pipes as Text
91727d11 16> import System.IO
17>
18> main =
19> withFile "inFile.txt" ReadMode $ \hIn ->
20> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 21> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 24
13a43263 25> import Pipes
31f41a5d 26> import qualified Data.Text.Pipes as Text
13a43263 27> import Pipes.Safe
28>
31f41a5d 29> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 30
91727d11 31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 32 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 33
31f41a5d 34> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
31f41a5d 38> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 39
40 In addition, this module provides many functions equivalent to lazy
31f41a5d 41 'Text' functions so that you can transform or fold text streams. For
91727d11 42 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 43 might write:
91727d11 44
45> import Pipes
31f41a5d 46> import qualified Pipes.Text as Text
47> import qualified Pipes.Parse as Parse
91727d11 48>
31f41a5d 49> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 50> where
31f41a5d 51> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 52
31f41a5d 53 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
31f41a5d 57 are insensitive to text boundaries. This means that they may freely split
64e03122 58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
91727d11 61-}
62
7faef8bc 63module Pipes.Text (
91727d11 64 -- * Producers
65 fromLazy,
66 stdin,
67 fromHandle,
68 readFile,
69 stdinLn,
91727d11 70
71 -- * Consumers
72 stdout,
73 stdoutLn,
74 toHandle,
75 writeFile,
76
77 -- * Pipes
78 map,
79 concatMap,
80 take,
81 drop,
82 takeWhile,
83 dropWhile,
84 filter,
91727d11 85 scan,
7faef8bc 86 encodeUtf8,
87 pack,
88 unpack,
1d2434b5 89 toCaseFold,
90 toLower,
91 toUpper,
7faef8bc 92 stripStart,
91727d11 93
94 -- * Folds
95 toLazy,
96 toLazyM,
64e03122 97 foldChars,
91727d11 98 head,
99 last,
100 null,
101 length,
102 any,
103 all,
104 maximum,
105 minimum,
91727d11 106 find,
107 index,
31f41a5d 108 count,
91727d11 109
110 -- * Splitters
111 splitAt,
112 chunksOf,
113 span,
114 break,
115 splitWith,
116 split,
117 groupBy,
118 group,
119 lines,
120 words,
31f41a5d 121 decodeUtf8,
64e03122 122 decode,
91727d11 123 -- * Transformations
124 intersperse,
31f41a5d 125
91727d11 126 -- * Joiners
127 intercalate,
128 unlines,
129 unwords,
130
31f41a5d 131 -- * Character Parsers
91727d11 132 -- $parse
31f41a5d 133 nextChar,
134 drawChar,
135 unDrawChar,
136 peekChar,
137 isEndOfChars,
91727d11 138
139 -- * Re-exports
140 -- $reexports
141 module Data.Text,
91727d11 142 module Pipes.Parse
143 ) where
144
145import Control.Exception (throwIO, try)
64e03122 146import Control.Monad (liftM, unless, join)
acc6868f 147import Control.Monad.Trans.State.Strict (StateT(..))
ca6f90a0 148import Data.Monoid ((<>))
91727d11 149import qualified Data.Text as T
150import qualified Data.Text.IO as T
31f41a5d 151import qualified Data.Text.Encoding as TE
63ea9ffd 152import qualified Data.Text.Encoding.Error as TE
91727d11 153import Data.Text (Text)
154import qualified Data.Text.Lazy as TL
155import qualified Data.Text.Lazy.IO as TL
156import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
157import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 158import Data.ByteString (ByteString)
159import qualified Data.ByteString as B
cf10d6f1 160import Data.Char (ord, isSpace)
91727d11 161import Data.Functor.Identity (Identity)
162import qualified Data.List as List
163import Foreign.C.Error (Errno(Errno), ePIPE)
164import qualified GHC.IO.Exception as G
165import Pipes
5e3f5409 166import qualified Pipes.ByteString as PB
64e03122 167import qualified Pipes.ByteString as PBP
ca6f90a0 168import qualified Pipes.Text.Internal as PE
64e03122 169import Pipes.Text.Internal (Codec(..))
c0343bc9 170import Pipes.Text.Parse (
31f41a5d 171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
91727d11 172import Pipes.Core (respond, Server')
173import qualified Pipes.Parse as PP
64e03122 174import Pipes.Parse ( FreeT)
91727d11 175import qualified Pipes.Safe.Prelude as Safe
176import qualified Pipes.Safe as Safe
177import Pipes.Safe (MonadSafe(..), Base(..))
178import qualified Pipes.Prelude as P
179import qualified System.IO as IO
180import Data.Char (isSpace)
63ea9ffd 181import Data.Word (Word8)
91727d11 182import Prelude hiding (
183 all,
184 any,
185 break,
186 concat,
187 concatMap,
188 drop,
189 dropWhile,
190 elem,
191 filter,
192 head,
193 last,
194 lines,
195 length,
196 map,
197 maximum,
198 minimum,
199 notElem,
200 null,
201 readFile,
202 span,
203 splitAt,
204 take,
205 takeWhile,
206 unlines,
207 unwords,
208 words,
209 writeFile )
210
211-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
212fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
213fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 214{-# INLINE fromLazy #-}
91727d11 215
62e8521c 216-- | Stream text from 'stdin'
ca6f90a0 217stdin :: MonadIO m => Producer Text m ()
91727d11 218stdin = fromHandle IO.stdin
ca6f90a0 219{-# INLINE stdin #-}
91727d11 220
31f41a5d 221{-| Convert a 'IO.Handle' into a text stream using a text size
ca6f90a0 222 determined by the good sense of the text library; note that this
223 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
224 but uses the system encoding and has other `Data.Text.IO` features
31f41a5d 225-}
226
ca6f90a0 227fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
228fromHandle h = go where
229 go = do txt <- liftIO (T.hGetChunk h)
230 unless (T.null txt) $ do yield txt
231 go
91727d11 232{-# INLINABLE fromHandle#-}
ca6f90a0 233
234
235{-| Stream text from a file in the simple fashion of @Data.Text.IO@
6f6f9974 236
31f41a5d 237>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
238MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 239-}
240
ca6f90a0 241readFile :: MonadSafe m => FilePath -> Producer Text m ()
91727d11 242readFile file = Safe.withFile file IO.ReadMode fromHandle
ca6f90a0 243{-# INLINE readFile #-}
91727d11 244
31f41a5d 245{-| Stream lines of text from stdin (for testing in ghci etc.)
246
247>>> let safely = runSafeT . runEffect
248>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
249hello
2505
251world
2525
253
254-}
91727d11 255stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 256stdinLn = go where
91727d11 257 go = do
258 eof <- liftIO (IO.hIsEOF IO.stdin)
259 unless eof $ do
260 txt <- liftIO (T.hGetLine IO.stdin)
261 yield txt
262 go
ca6f90a0 263{-# INLINABLE stdinLn #-}
91727d11 264
31f41a5d 265{-| Stream text to 'stdout'
91727d11 266
267 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
268
269 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
31f41a5d 270 @(source >-> stdout)@ in suitable cases.
91727d11 271-}
272stdout :: MonadIO m => Consumer' Text m ()
273stdout = go
274 where
275 go = do
276 txt <- await
277 x <- liftIO $ try (T.putStr txt)
278 case x of
279 Left (G.IOError { G.ioe_type = G.ResourceVanished
280 , G.ioe_errno = Just ioe })
281 | Errno ioe == ePIPE
282 -> return ()
283 Left e -> liftIO (throwIO e)
284 Right () -> go
285{-# INLINABLE stdout #-}
286
287stdoutLn :: (MonadIO m) => Consumer' Text m ()
288stdoutLn = go
289 where
290 go = do
291 str <- await
292 x <- liftIO $ try (T.putStrLn str)
293 case x of
294 Left (G.IOError { G.ioe_type = G.ResourceVanished
295 , G.ioe_errno = Just ioe })
296 | Errno ioe == ePIPE
297 -> return ()
298 Left e -> liftIO (throwIO e)
299 Right () -> go
300{-# INLINABLE stdoutLn #-}
301
31f41a5d 302{-| Convert a text stream into a 'Handle'
91727d11 303
31f41a5d 304 Note: again, for best performance, where possible use
305 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 306-}
307toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
308toHandle h = for cat (liftIO . T.hPutStr h)
309{-# INLINABLE toHandle #-}
310
d4732515 311{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 312 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 313 #-}
314
315
31f41a5d 316-- | Stream text into a file. Uses @pipes-safe@.
ca6f90a0 317writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
91727d11 318writeFile file = Safe.withFile file IO.WriteMode toHandle
ca6f90a0 319{-# INLINE writeFile #-}
91727d11 320
321-- | Apply a transformation to each 'Char' in the stream
322map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
323map f = P.map (T.map f)
324{-# INLINABLE map #-}
325
ff38b9f0 326{-# RULES "p >-> map f" forall p f .
327 p >-> map f = for p (\txt -> yield (T.map f txt))
328 #-}
329
31f41a5d 330-- | Map a function over the characters of a text stream and concatenate the results
91727d11 331concatMap
332 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
333concatMap f = P.map (T.concatMap f)
334{-# INLINABLE concatMap #-}
335
ff38b9f0 336{-# RULES "p >-> concatMap f" forall p f .
337 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
338 #-}
7faef8bc 339
340-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 341-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
342-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 343encodeUtf8 :: Monad m => Pipe Text ByteString m r
344encodeUtf8 = P.map TE.encodeUtf8
345{-# INLINEABLE encodeUtf8 #-}
346
ff38b9f0 347{-# RULES "p >-> encodeUtf8" forall p .
348 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
349 #-}
350
c0343bc9 351-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 352pack :: Monad m => Pipe String Text m r
353pack = P.map T.pack
354{-# INLINEABLE pack #-}
355
ff38b9f0 356{-# RULES "p >-> pack" forall p .
357 p >-> pack = for p (\txt -> yield (T.pack txt))
358 #-}
359
360-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 361unpack :: Monad m => Pipe Text String m r
d4732515 362unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 363{-# INLINEABLE unpack #-}
364
ff38b9f0 365{-# RULES "p >-> unpack" forall p .
366 p >-> unpack = for p (\txt -> yield (T.unpack txt))
367 #-}
d4732515 368
c0343bc9 369-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
7faef8bc 370-- here acting on a 'Text' pipe, rather as they would on a lazy text
371toCaseFold :: Monad m => Pipe Text Text m ()
372toCaseFold = P.map T.toCaseFold
373{-# INLINEABLE toCaseFold #-}
374
ff38b9f0 375{-# RULES "p >-> toCaseFold" forall p .
376 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
377 #-}
378
379
c0343bc9 380-- | lowercase incoming 'Text'
7faef8bc 381toLower :: Monad m => Pipe Text Text m ()
382toLower = P.map T.toLower
383{-# INLINEABLE toLower #-}
384
ff38b9f0 385{-# RULES "p >-> toLower" forall p .
386 p >-> toLower = for p (\txt -> yield (T.toLower txt))
387 #-}
388
c0343bc9 389-- | uppercase incoming 'Text'
7faef8bc 390toUpper :: Monad m => Pipe Text Text m ()
391toUpper = P.map T.toUpper
392{-# INLINEABLE toUpper #-}
393
ff38b9f0 394{-# RULES "p >-> toUpper" forall p .
395 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
396 #-}
397
c0343bc9 398-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 399stripStart :: Monad m => Pipe Text Text m r
400stripStart = do
401 chunk <- await
402 let text = T.stripStart chunk
403 if T.null text
404 then stripStart
405 else cat
406{-# INLINEABLE stripStart #-}
407
31f41a5d 408-- | @(take n)@ only allows @n@ individual characters to pass;
409-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 410take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
411take n0 = go n0 where
412 go n
413 | n <= 0 = return ()
414 | otherwise = do
31f41a5d 415 txt <- await
416 let len = fromIntegral (T.length txt)
91727d11 417 if (len > n)
31f41a5d 418 then yield (T.take (fromIntegral n) txt)
91727d11 419 else do
31f41a5d 420 yield txt
91727d11 421 go (n - len)
422{-# INLINABLE take #-}
423
31f41a5d 424-- | @(drop n)@ drops the first @n@ characters
91727d11 425drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
426drop n0 = go n0 where
427 go n
428 | n <= 0 = cat
429 | otherwise = do
31f41a5d 430 txt <- await
431 let len = fromIntegral (T.length txt)
91727d11 432 if (len >= n)
433 then do
31f41a5d 434 yield (T.drop (fromIntegral n) txt)
91727d11 435 cat
436 else go (n - len)
437{-# INLINABLE drop #-}
438
31f41a5d 439-- | Take characters until they fail the predicate
91727d11 440takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
441takeWhile predicate = go
442 where
443 go = do
31f41a5d 444 txt <- await
445 let (prefix, suffix) = T.span predicate txt
91727d11 446 if (T.null suffix)
447 then do
31f41a5d 448 yield txt
91727d11 449 go
450 else yield prefix
451{-# INLINABLE takeWhile #-}
452
31f41a5d 453-- | Drop characters until they fail the predicate
91727d11 454dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
455dropWhile predicate = go where
456 go = do
31f41a5d 457 txt <- await
458 case T.findIndex (not . predicate) txt of
91727d11 459 Nothing -> go
460 Just i -> do
31f41a5d 461 yield (T.drop i txt)
91727d11 462 cat
463{-# INLINABLE dropWhile #-}
464
465-- | Only allows 'Char's to pass if they satisfy the predicate
466filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
467filter predicate = P.map (T.filter predicate)
468{-# INLINABLE filter #-}
469
ff38b9f0 470{-# RULES "p >-> filter q" forall p q .
471 p >-> filter q = for p (\txt -> yield (T.filter q txt))
472 #-}
473
31f41a5d 474-- | Strict left scan over the characters
91727d11 475scan
476 :: (Monad m)
477 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
478scan step begin = go begin
479 where
31f41a5d 480 go c = do
481 txt <- await
482 let txt' = T.scanl step c txt
483 c' = T.last txt'
484 yield txt'
485 go c'
91727d11 486{-# INLINABLE scan #-}
487
488{-| Fold a pure 'Producer' of strict 'Text's into a lazy
489 'TL.Text'
490-}
491toLazy :: Producer Text Identity () -> TL.Text
492toLazy = TL.fromChunks . P.toList
493{-# INLINABLE toLazy #-}
494
495{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
496 'TL.Text'
497
498 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
499 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
500 immediately as they are generated instead of loading them all into memory.
501-}
502toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
503toLazyM = liftM TL.fromChunks . P.toListM
504{-# INLINABLE toLazyM #-}
505
31f41a5d 506-- | Reduce the text stream using a strict left fold over characters
64e03122 507foldChars
91727d11 508 :: Monad m
509 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 510foldChars step begin done = P.fold (T.foldl' step) begin done
91727d11 511{-# INLINABLE fold #-}
512
513-- | Retrieve the first 'Char'
514head :: (Monad m) => Producer Text m () -> m (Maybe Char)
515head = go
516 where
517 go p = do
518 x <- nextChar p
519 case x of
520 Left _ -> return Nothing
31f41a5d 521 Right (c, _) -> return (Just c)
91727d11 522{-# INLINABLE head #-}
523
524-- | Retrieve the last 'Char'
525last :: (Monad m) => Producer Text m () -> m (Maybe Char)
526last = go Nothing
527 where
528 go r p = do
529 x <- next p
530 case x of
531 Left () -> return r
31f41a5d 532 Right (txt, p') ->
533 if (T.null txt)
91727d11 534 then go r p'
31f41a5d 535 else go (Just $ T.last txt) p'
91727d11 536{-# INLINABLE last #-}
537
538-- | Determine if the stream is empty
539null :: (Monad m) => Producer Text m () -> m Bool
540null = P.all T.null
541{-# INLINABLE null #-}
542
62e8521c 543-- | Count the number of characters in the stream
91727d11 544length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 545length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 546{-# INLINABLE length #-}
547
548-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
549any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
550any predicate = P.any (T.any predicate)
551{-# INLINABLE any #-}
552
553-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
554all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
555all predicate = P.all (T.all predicate)
556{-# INLINABLE all #-}
557
62e8521c 558-- | Return the maximum 'Char' within a text stream
91727d11 559maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
560maximum = P.fold step Nothing id
561 where
31f41a5d 562 step mc txt =
563 if (T.null txt)
564 then mc
565 else Just $ case mc of
566 Nothing -> T.maximum txt
567 Just c -> max c (T.maximum txt)
91727d11 568{-# INLINABLE maximum #-}
569
62e8521c 570-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 571minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
572minimum = P.fold step Nothing id
573 where
31f41a5d 574 step mc txt =
575 if (T.null txt)
576 then mc
577 else case mc of
578 Nothing -> Just (T.minimum txt)
579 Just c -> Just (min c (T.minimum txt))
91727d11 580{-# INLINABLE minimum #-}
581
91727d11 582-- | Find the first element in the stream that matches the predicate
583find
584 :: (Monad m)
585 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
586find predicate p = head (p >-> filter predicate)
587{-# INLINABLE find #-}
588
62e8521c 589-- | Index into a text stream
91727d11 590index
591 :: (Monad m, Integral a)
592 => a-> Producer Text m () -> m (Maybe Char)
593index n p = head (p >-> drop n)
594{-# INLINABLE index #-}
595
63ea9ffd 596
31f41a5d 597-- | Store a tally of how many segments match the given 'Text'
598count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
599count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
600{-# INLINABLE count #-}
601
ca6f90a0 602-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
603-- returning a Pipe of ByteStrings that begins at the point of failure.
604
605decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
606decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
607 go !carry dec0 p = do
608 x <- lift (next p)
609 case x of Left r -> if B.null carry
610 then return (return r) -- all bytestrinput was consumed
611 else return (do yield carry -- a potentially valid fragment remains
612 return r)
613
614 Right (chunk, p') -> case dec0 chunk of
615 PE.Some text carry2 dec -> do yield text
616 go carry2 dec p'
617 PE.Other text bs -> do yield text
618 return (do yield bs -- an invalid blob remains
619 p')
620{-# INLINABLE decodeUtf8 #-}
621
31f41a5d 622
623-- | Splits a 'Producer' after the given number of characters
91727d11 624splitAt
625 :: (Monad m, Integral n)
626 => n
627 -> Producer Text m r
628 -> Producer' Text m (Producer Text m r)
629splitAt = go
630 where
631 go 0 p = return p
632 go n p = do
633 x <- lift (next p)
634 case x of
635 Left r -> return (return r)
31f41a5d 636 Right (txt, p') -> do
637 let len = fromIntegral (T.length txt)
91727d11 638 if (len <= n)
639 then do
31f41a5d 640 yield txt
91727d11 641 go (n - len) p'
642 else do
31f41a5d 643 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 644 yield prefix
645 return (yield suffix >> p')
646{-# INLINABLE splitAt #-}
647
31f41a5d 648-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
91727d11 649chunksOf
650 :: (Monad m, Integral n)
651 => n -> Producer Text m r -> FreeT (Producer Text m) m r
652chunksOf n p0 = PP.FreeT (go p0)
653 where
654 go p = do
655 x <- next p
656 return $ case x of
657 Left r -> PP.Pure r
31f41a5d 658 Right (txt, p') -> PP.Free $ do
659 p'' <- splitAt n (yield txt >> p')
91727d11 660 return $ PP.FreeT (go p'')
661{-# INLINABLE chunksOf #-}
662
31f41a5d 663{-| Split a text stream in two, where the first text stream is the longest
664 consecutive group of text that satisfy the predicate
91727d11 665-}
666span
667 :: (Monad m)
668 => (Char -> Bool)
669 -> Producer Text m r
670 -> Producer' Text m (Producer Text m r)
671span predicate = go
672 where
673 go p = do
674 x <- lift (next p)
675 case x of
676 Left r -> return (return r)
31f41a5d 677 Right (txt, p') -> do
678 let (prefix, suffix) = T.span predicate txt
91727d11 679 if (T.null suffix)
680 then do
31f41a5d 681 yield txt
91727d11 682 go p'
683 else do
684 yield prefix
685 return (yield suffix >> p')
686{-# INLINABLE span #-}
687
62e8521c 688{-| Split a text stream in two, where the first text stream is the longest
689 consecutive group of characters that don't satisfy the predicate
91727d11 690-}
691break
692 :: (Monad m)
693 => (Char -> Bool)
694 -> Producer Text m r
695 -> Producer Text m (Producer Text m r)
696break predicate = span (not . predicate)
697{-# INLINABLE break #-}
698
62e8521c 699{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 700 predicate
701-}
702splitWith
703 :: (Monad m)
704 => (Char -> Bool)
705 -> Producer Text m r
706 -> PP.FreeT (Producer Text m) m r
707splitWith predicate p0 = PP.FreeT (go0 p0)
708 where
709 go0 p = do
710 x <- next p
711 case x of
712 Left r -> return (PP.Pure r)
31f41a5d 713 Right (txt, p') ->
714 if (T.null txt)
91727d11 715 then go0 p'
716 else return $ PP.Free $ do
31f41a5d 717 p'' <- span (not . predicate) (yield txt >> p')
91727d11 718 return $ PP.FreeT (go1 p'')
719 go1 p = do
720 x <- nextChar p
721 return $ case x of
722 Left r -> PP.Pure r
723 Right (_, p') -> PP.Free $ do
724 p'' <- span (not . predicate) p'
725 return $ PP.FreeT (go1 p'')
726{-# INLINABLE splitWith #-}
727
31f41a5d 728-- | Split a text stream using the given 'Char' as the delimiter
91727d11 729split :: (Monad m)
730 => Char
731 -> Producer Text m r
732 -> FreeT (Producer Text m) m r
31f41a5d 733split c = splitWith (c ==)
91727d11 734{-# INLINABLE split #-}
735
62e8521c 736{-| Group a text stream into 'FreeT'-delimited text streams using the supplied
91727d11 737 equality predicate
738-}
739groupBy
740 :: (Monad m)
741 => (Char -> Char -> Bool)
742 -> Producer Text m r
743 -> FreeT (Producer Text m) m r
744groupBy equal p0 = PP.FreeT (go p0)
745 where
746 go p = do
747 x <- next p
748 case x of
749 Left r -> return (PP.Pure r)
31f41a5d 750 Right (txt, p') -> case (T.uncons txt) of
91727d11 751 Nothing -> go p'
31f41a5d 752 Just (c, _) -> do
91727d11 753 return $ PP.Free $ do
31f41a5d 754 p'' <- span (equal c) (yield txt >> p')
91727d11 755 return $ PP.FreeT (go p'')
756{-# INLINABLE groupBy #-}
757
62e8521c 758-- | Group a text stream into 'FreeT'-delimited text streams of identical characters
91727d11 759group
760 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
761group = groupBy (==)
762{-# INLINABLE group #-}
763
62e8521c 764{-| Split a text stream into 'FreeT'-delimited lines
91727d11 765-}
766lines
767 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
768lines p0 = PP.FreeT (go0 p0)
769 where
770 go0 p = do
771 x <- next p
772 case x of
773 Left r -> return (PP.Pure r)
31f41a5d 774 Right (txt, p') ->
775 if (T.null txt)
91727d11 776 then go0 p'
31f41a5d 777 else return $ PP.Free $ go1 (yield txt >> p')
91727d11 778 go1 p = do
779 p' <- break ('\n' ==) p
b4d21c02 780 return $ PP.FreeT $ do
781 x <- nextChar p'
782 case x of
783 Left r -> return $ PP.Pure r
784 Right (_, p'') -> go0 p''
91727d11 785{-# INLINABLE lines #-}
91727d11 786
31f41a5d 787
788
789-- | Split a text stream into 'FreeT'-delimited words
91727d11 790words
791 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
cf10d6f1 792words = go
91727d11 793 where
cf10d6f1 794 go p = PP.FreeT $ do
795 x <- next (p >-> dropWhile isSpace)
796 return $ case x of
797 Left r -> PP.Pure r
798 Right (bs, p') -> PP.Free $ do
799 p'' <- break isSpace (yield bs >> p')
800 return (go p'')
91727d11 801{-# INLINABLE words #-}
802
cf10d6f1 803
62e8521c 804-- | Intersperse a 'Char' in between the characters of the text stream
91727d11 805intersperse
806 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
31f41a5d 807intersperse c = go0
91727d11 808 where
809 go0 p = do
810 x <- lift (next p)
811 case x of
812 Left r -> return r
31f41a5d 813 Right (txt, p') -> do
814 yield (T.intersperse c txt)
91727d11 815 go1 p'
816 go1 p = do
817 x <- lift (next p)
818 case x of
819 Left r -> return r
31f41a5d 820 Right (txt, p') -> do
821 yield (T.singleton c)
822 yield (T.intersperse c txt)
91727d11 823 go1 p'
824{-# INLINABLE intersperse #-}
825
31f41a5d 826{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
827 interspersing a text stream in between them
91727d11 828-}
829intercalate
830 :: (Monad m)
831 => Producer Text m ()
832 -> FreeT (Producer Text m) m r
833 -> Producer Text m r
834intercalate p0 = go0
835 where
836 go0 f = do
837 x <- lift (PP.runFreeT f)
838 case x of
839 PP.Pure r -> return r
840 PP.Free p -> do
841 f' <- p
842 go1 f'
843 go1 f = do
844 x <- lift (PP.runFreeT f)
845 case x of
846 PP.Pure r -> return r
847 PP.Free p -> do
848 p0
849 f' <- p
850 go1 f'
851{-# INLINABLE intercalate #-}
852
62e8521c 853{-| Join 'FreeT'-delimited lines into a text stream
91727d11 854-}
855unlines
856 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
857unlines = go
858 where
859 go f = do
860 x <- lift (PP.runFreeT f)
861 case x of
862 PP.Pure r -> return r
863 PP.Free p -> do
864 f' <- p
865 yield $ T.singleton '\n'
866 go f'
867{-# INLINABLE unlines #-}
868
31f41a5d 869{-| Join 'FreeT'-delimited words into a text stream
91727d11 870-}
871unwords
872 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
873unwords = intercalate (yield $ T.pack " ")
874{-# INLINABLE unwords #-}
875
876{- $parse
31f41a5d 877 The following parsing utilities are single-character analogs of the ones found
878 @pipes-parse@.
91727d11 879-}
880
91727d11 881{- $reexports
31f41a5d 882 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
91727d11 883
884 @Data.Text@ re-exports the 'Text' type.
885
91727d11 886 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
64e03122 887-}
888
889
890
891decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
892-- decode codec = go B.empty where
893-- go extra p0 =
894-- do x <- lift (next p0)
895-- case x of Right (chunk, p) ->
896-- do let (text, stuff) = codecDecode codec (B.append extra chunk)
897-- yield text
898-- case stuff of Right extra' -> go extra' p
899-- Left (exc,bs) -> do yield text
900-- return (do yield bs
901-- p)
902-- Left r -> return (do yield extra
903-- return r)
904
905decode d p0 = case d of
906 PE.Other txt bad -> do yield txt
907 return (do yield bad
908 p0)
909 PE.Some txt extra dec -> do yield txt
910 x <- lift (next p0)
911 case x of Left r -> return (do yield extra
912 return r)
913 Right (chunk,p1) -> decode (dec chunk) p1
914
915-- go !carry dec0 p = do
916-- x <- lift (next p)
917-- case x of Left r -> if B.null carry
918-- then return (return r) -- all bytestrinput was consumed
919-- else return (do yield carry -- a potentially valid fragment remains
920-- return r)
921--
922-- Right (chunk, p') -> case dec0 chunk of
923-- PE.Some text carry2 dec -> do yield text
924-- go carry2 dec p'
925-- PE.Other text bs -> do yield text
926-- return (do yield bs -- an invalid blob remains
927-- p')
928-- {-# INLINABLE decodeUtf8 #-}