]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
reformatted exports fwiw
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
1677dc12 1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
64e03122 2#if __GLASGOW_HASKELL__ >= 702
3{-# LANGUAGE Trustworthy #-}
4#endif
13a43263 5{-| This module provides @pipes@ utilities for \"text streams\", which are
64e03122 6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 10
63ea9ffd 11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 12 example, the following program copies a document from one file to another:
91727d11 13
14> import Pipes
31f41a5d 15> import qualified Data.Text.Pipes as Text
91727d11 16> import System.IO
17>
18> main =
19> withFile "inFile.txt" ReadMode $ \hIn ->
20> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 21> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 24
13a43263 25> import Pipes
31f41a5d 26> import qualified Data.Text.Pipes as Text
13a43263 27> import Pipes.Safe
28>
31f41a5d 29> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 30
91727d11 31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 32 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 33
31f41a5d 34> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
31f41a5d 38> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 39
40 In addition, this module provides many functions equivalent to lazy
31f41a5d 41 'Text' functions so that you can transform or fold text streams. For
91727d11 42 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 43 might write:
91727d11 44
45> import Pipes
31f41a5d 46> import qualified Pipes.Text as Text
47> import qualified Pipes.Parse as Parse
91727d11 48>
31f41a5d 49> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 50> where
31f41a5d 51> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 52
31f41a5d 53 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
31f41a5d 57 are insensitive to text boundaries. This means that they may freely split
64e03122 58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
91727d11 61-}
62
7faef8bc 63module Pipes.Text (
91727d11 64 -- * Producers
1677dc12 65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
69 , stdinLn
91727d11 70
71 -- * Consumers
1677dc12 72 , stdout
73 , stdoutLn
74 , toHandle
75 , writeFile
91727d11 76
77 -- * Pipes
1677dc12 78 , map
79 , concatMap
80 , take
81 , drop
82 , takeWhile
83 , dropWhile
84 , filter
85 , scan
86 , encodeUtf8
87 , pack
88 , unpack
89 , toCaseFold
90 , toLower
91 , toUpper
92 , stripStart
91727d11 93
94 -- * Folds
1677dc12 95 , toLazy
96 , toLazyM
97 , foldChars
98 , head
99 , last
100 , null
101 , length
102 , any
103 , all
104 , maximum
105 , minimum
106 , find
107 , index
108 , count
109
110 -- * Primitive Character Parsers
111 -- $parse
112 , nextChar
113 , drawChar
114 , unDrawChar
115 , peekChar
116 , isEndOfChars,
117
118 -- * Parsing Lenses
119 splitAt
120 , span
121 , break
122 , groupBy
123 , group
124 -- , word
125 -- , line
126 , decodeUtf8
127 , decode
128
129 -- * FreeT Splitters
130 , chunksOf
131 , splitsWith
132 , split
133-- , groupsBy
134-- , groups
135 , lines
136 , words
137
91727d11 138
91727d11 139 -- * Transformations
1677dc12 140 , intersperse
141-- , packChars
31f41a5d 142
91727d11 143 -- * Joiners
1677dc12 144 , intercalate
145 , unlines
146 , unwords
147 -- * Re-exports
91727d11 148 -- $reexports
1677dc12 149 , module Data.ByteString
150 , module Data.Text
151 , module Data.Profunctor
152 , module Data.Word
153 , module Pipes.Parse
91727d11 154 ) where
155
156import Control.Exception (throwIO, try)
64e03122 157import Control.Monad (liftM, unless, join)
acc6868f 158import Control.Monad.Trans.State.Strict (StateT(..))
ca6f90a0 159import Data.Monoid ((<>))
91727d11 160import qualified Data.Text as T
161import qualified Data.Text.IO as T
31f41a5d 162import qualified Data.Text.Encoding as TE
63ea9ffd 163import qualified Data.Text.Encoding.Error as TE
91727d11 164import Data.Text (Text)
165import qualified Data.Text.Lazy as TL
166import qualified Data.Text.Lazy.IO as TL
167import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
168import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 169import Data.ByteString (ByteString)
170import qualified Data.ByteString as B
cf10d6f1 171import Data.Char (ord, isSpace)
1677dc12 172import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 173import Data.Functor.Identity (Identity)
1677dc12 174import Data.Profunctor (Profunctor)
175import qualified Data.Profunctor
91727d11 176import qualified Data.List as List
177import Foreign.C.Error (Errno(Errno), ePIPE)
178import qualified GHC.IO.Exception as G
179import Pipes
5e3f5409 180import qualified Pipes.ByteString as PB
ca6f90a0 181import qualified Pipes.Text.Internal as PE
64e03122 182import Pipes.Text.Internal (Codec(..))
1677dc12 183import Pipes.Text.Parse (nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
91727d11 184import Pipes.Core (respond, Server')
185import qualified Pipes.Parse as PP
1677dc12 186import Pipes.Parse (Parser, concats, intercalates, FreeT)
91727d11 187import qualified Pipes.Safe.Prelude as Safe
188import qualified Pipes.Safe as Safe
189import Pipes.Safe (MonadSafe(..), Base(..))
190import qualified Pipes.Prelude as P
191import qualified System.IO as IO
192import Data.Char (isSpace)
63ea9ffd 193import Data.Word (Word8)
1677dc12 194
91727d11 195import Prelude hiding (
196 all,
197 any,
198 break,
199 concat,
200 concatMap,
201 drop,
202 dropWhile,
203 elem,
204 filter,
205 head,
206 last,
207 lines,
208 length,
209 map,
210 maximum,
211 minimum,
212 notElem,
213 null,
214 readFile,
215 span,
216 splitAt,
217 take,
218 takeWhile,
219 unlines,
220 unwords,
221 words,
222 writeFile )
223
224-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
225fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
226fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 227{-# INLINE fromLazy #-}
91727d11 228
62e8521c 229-- | Stream text from 'stdin'
ca6f90a0 230stdin :: MonadIO m => Producer Text m ()
91727d11 231stdin = fromHandle IO.stdin
ca6f90a0 232{-# INLINE stdin #-}
91727d11 233
31f41a5d 234{-| Convert a 'IO.Handle' into a text stream using a text size
ca6f90a0 235 determined by the good sense of the text library; note that this
236 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
237 but uses the system encoding and has other `Data.Text.IO` features
31f41a5d 238-}
239
ca6f90a0 240fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
241fromHandle h = go where
242 go = do txt <- liftIO (T.hGetChunk h)
243 unless (T.null txt) $ do yield txt
244 go
91727d11 245{-# INLINABLE fromHandle#-}
ca6f90a0 246
247
248{-| Stream text from a file in the simple fashion of @Data.Text.IO@
6f6f9974 249
31f41a5d 250>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
251MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 252-}
253
ca6f90a0 254readFile :: MonadSafe m => FilePath -> Producer Text m ()
91727d11 255readFile file = Safe.withFile file IO.ReadMode fromHandle
ca6f90a0 256{-# INLINE readFile #-}
91727d11 257
31f41a5d 258{-| Stream lines of text from stdin (for testing in ghci etc.)
259
260>>> let safely = runSafeT . runEffect
261>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
262hello
2635
264world
2655
266
267-}
91727d11 268stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 269stdinLn = go where
91727d11 270 go = do
271 eof <- liftIO (IO.hIsEOF IO.stdin)
272 unless eof $ do
273 txt <- liftIO (T.hGetLine IO.stdin)
274 yield txt
275 go
ca6f90a0 276{-# INLINABLE stdinLn #-}
91727d11 277
31f41a5d 278{-| Stream text to 'stdout'
91727d11 279
280 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
281
282 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
31f41a5d 283 @(source >-> stdout)@ in suitable cases.
91727d11 284-}
285stdout :: MonadIO m => Consumer' Text m ()
286stdout = go
287 where
288 go = do
289 txt <- await
290 x <- liftIO $ try (T.putStr txt)
291 case x of
292 Left (G.IOError { G.ioe_type = G.ResourceVanished
293 , G.ioe_errno = Just ioe })
294 | Errno ioe == ePIPE
295 -> return ()
296 Left e -> liftIO (throwIO e)
297 Right () -> go
298{-# INLINABLE stdout #-}
299
300stdoutLn :: (MonadIO m) => Consumer' Text m ()
301stdoutLn = go
302 where
303 go = do
304 str <- await
305 x <- liftIO $ try (T.putStrLn str)
306 case x of
307 Left (G.IOError { G.ioe_type = G.ResourceVanished
308 , G.ioe_errno = Just ioe })
309 | Errno ioe == ePIPE
310 -> return ()
311 Left e -> liftIO (throwIO e)
312 Right () -> go
313{-# INLINABLE stdoutLn #-}
314
31f41a5d 315{-| Convert a text stream into a 'Handle'
91727d11 316
31f41a5d 317 Note: again, for best performance, where possible use
318 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 319-}
320toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
321toHandle h = for cat (liftIO . T.hPutStr h)
322{-# INLINABLE toHandle #-}
323
d4732515 324{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 325 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 326 #-}
327
328
31f41a5d 329-- | Stream text into a file. Uses @pipes-safe@.
ca6f90a0 330writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
91727d11 331writeFile file = Safe.withFile file IO.WriteMode toHandle
ca6f90a0 332{-# INLINE writeFile #-}
91727d11 333
1677dc12 334
335type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
336
337type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
338
339(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
340a ^. lens = getConstant (lens Constant a)
341
342
91727d11 343-- | Apply a transformation to each 'Char' in the stream
344map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
345map f = P.map (T.map f)
346{-# INLINABLE map #-}
347
ff38b9f0 348{-# RULES "p >-> map f" forall p f .
349 p >-> map f = for p (\txt -> yield (T.map f txt))
350 #-}
351
31f41a5d 352-- | Map a function over the characters of a text stream and concatenate the results
91727d11 353concatMap
354 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
355concatMap f = P.map (T.concatMap f)
356{-# INLINABLE concatMap #-}
357
ff38b9f0 358{-# RULES "p >-> concatMap f" forall p f .
359 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
360 #-}
7faef8bc 361
362-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 363-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
364-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 365encodeUtf8 :: Monad m => Pipe Text ByteString m r
366encodeUtf8 = P.map TE.encodeUtf8
367{-# INLINEABLE encodeUtf8 #-}
368
ff38b9f0 369{-# RULES "p >-> encodeUtf8" forall p .
370 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
371 #-}
372
c0343bc9 373-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 374pack :: Monad m => Pipe String Text m r
375pack = P.map T.pack
376{-# INLINEABLE pack #-}
377
ff38b9f0 378{-# RULES "p >-> pack" forall p .
379 p >-> pack = for p (\txt -> yield (T.pack txt))
380 #-}
381
382-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 383unpack :: Monad m => Pipe Text String m r
d4732515 384unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 385{-# INLINEABLE unpack #-}
386
ff38b9f0 387{-# RULES "p >-> unpack" forall p .
388 p >-> unpack = for p (\txt -> yield (T.unpack txt))
389 #-}
d4732515 390
c0343bc9 391-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
7faef8bc 392-- here acting on a 'Text' pipe, rather as they would on a lazy text
393toCaseFold :: Monad m => Pipe Text Text m ()
394toCaseFold = P.map T.toCaseFold
395{-# INLINEABLE toCaseFold #-}
396
ff38b9f0 397{-# RULES "p >-> toCaseFold" forall p .
398 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
399 #-}
400
401
c0343bc9 402-- | lowercase incoming 'Text'
7faef8bc 403toLower :: Monad m => Pipe Text Text m ()
404toLower = P.map T.toLower
405{-# INLINEABLE toLower #-}
406
ff38b9f0 407{-# RULES "p >-> toLower" forall p .
408 p >-> toLower = for p (\txt -> yield (T.toLower txt))
409 #-}
410
c0343bc9 411-- | uppercase incoming 'Text'
7faef8bc 412toUpper :: Monad m => Pipe Text Text m ()
413toUpper = P.map T.toUpper
414{-# INLINEABLE toUpper #-}
415
ff38b9f0 416{-# RULES "p >-> toUpper" forall p .
417 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
418 #-}
419
c0343bc9 420-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 421stripStart :: Monad m => Pipe Text Text m r
422stripStart = do
423 chunk <- await
424 let text = T.stripStart chunk
425 if T.null text
426 then stripStart
427 else cat
428{-# INLINEABLE stripStart #-}
429
31f41a5d 430-- | @(take n)@ only allows @n@ individual characters to pass;
431-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 432take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
433take n0 = go n0 where
434 go n
435 | n <= 0 = return ()
436 | otherwise = do
31f41a5d 437 txt <- await
438 let len = fromIntegral (T.length txt)
91727d11 439 if (len > n)
31f41a5d 440 then yield (T.take (fromIntegral n) txt)
91727d11 441 else do
31f41a5d 442 yield txt
91727d11 443 go (n - len)
444{-# INLINABLE take #-}
445
31f41a5d 446-- | @(drop n)@ drops the first @n@ characters
91727d11 447drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
448drop n0 = go n0 where
449 go n
450 | n <= 0 = cat
451 | otherwise = do
31f41a5d 452 txt <- await
453 let len = fromIntegral (T.length txt)
91727d11 454 if (len >= n)
455 then do
31f41a5d 456 yield (T.drop (fromIntegral n) txt)
91727d11 457 cat
458 else go (n - len)
459{-# INLINABLE drop #-}
460
31f41a5d 461-- | Take characters until they fail the predicate
91727d11 462takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
463takeWhile predicate = go
464 where
465 go = do
31f41a5d 466 txt <- await
467 let (prefix, suffix) = T.span predicate txt
91727d11 468 if (T.null suffix)
469 then do
31f41a5d 470 yield txt
91727d11 471 go
472 else yield prefix
473{-# INLINABLE takeWhile #-}
474
31f41a5d 475-- | Drop characters until they fail the predicate
91727d11 476dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
477dropWhile predicate = go where
478 go = do
31f41a5d 479 txt <- await
480 case T.findIndex (not . predicate) txt of
91727d11 481 Nothing -> go
482 Just i -> do
31f41a5d 483 yield (T.drop i txt)
91727d11 484 cat
485{-# INLINABLE dropWhile #-}
486
487-- | Only allows 'Char's to pass if they satisfy the predicate
488filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
489filter predicate = P.map (T.filter predicate)
490{-# INLINABLE filter #-}
491
ff38b9f0 492{-# RULES "p >-> filter q" forall p q .
493 p >-> filter q = for p (\txt -> yield (T.filter q txt))
494 #-}
495
31f41a5d 496-- | Strict left scan over the characters
91727d11 497scan
498 :: (Monad m)
499 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
500scan step begin = go begin
501 where
31f41a5d 502 go c = do
503 txt <- await
504 let txt' = T.scanl step c txt
505 c' = T.last txt'
506 yield txt'
507 go c'
91727d11 508{-# INLINABLE scan #-}
509
510{-| Fold a pure 'Producer' of strict 'Text's into a lazy
511 'TL.Text'
512-}
513toLazy :: Producer Text Identity () -> TL.Text
514toLazy = TL.fromChunks . P.toList
515{-# INLINABLE toLazy #-}
516
517{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
518 'TL.Text'
519
520 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
521 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
522 immediately as they are generated instead of loading them all into memory.
523-}
524toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
525toLazyM = liftM TL.fromChunks . P.toListM
526{-# INLINABLE toLazyM #-}
527
31f41a5d 528-- | Reduce the text stream using a strict left fold over characters
64e03122 529foldChars
91727d11 530 :: Monad m
531 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 532foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 533{-# INLINABLE foldChars #-}
91727d11 534
535-- | Retrieve the first 'Char'
536head :: (Monad m) => Producer Text m () -> m (Maybe Char)
537head = go
538 where
539 go p = do
540 x <- nextChar p
541 case x of
542 Left _ -> return Nothing
31f41a5d 543 Right (c, _) -> return (Just c)
91727d11 544{-# INLINABLE head #-}
545
546-- | Retrieve the last 'Char'
547last :: (Monad m) => Producer Text m () -> m (Maybe Char)
548last = go Nothing
549 where
550 go r p = do
551 x <- next p
552 case x of
553 Left () -> return r
31f41a5d 554 Right (txt, p') ->
555 if (T.null txt)
91727d11 556 then go r p'
31f41a5d 557 else go (Just $ T.last txt) p'
91727d11 558{-# INLINABLE last #-}
559
560-- | Determine if the stream is empty
561null :: (Monad m) => Producer Text m () -> m Bool
562null = P.all T.null
563{-# INLINABLE null #-}
564
62e8521c 565-- | Count the number of characters in the stream
91727d11 566length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 567length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 568{-# INLINABLE length #-}
569
570-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
571any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
572any predicate = P.any (T.any predicate)
573{-# INLINABLE any #-}
574
575-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
576all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
577all predicate = P.all (T.all predicate)
578{-# INLINABLE all #-}
579
62e8521c 580-- | Return the maximum 'Char' within a text stream
91727d11 581maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
582maximum = P.fold step Nothing id
583 where
31f41a5d 584 step mc txt =
585 if (T.null txt)
586 then mc
587 else Just $ case mc of
588 Nothing -> T.maximum txt
589 Just c -> max c (T.maximum txt)
91727d11 590{-# INLINABLE maximum #-}
591
62e8521c 592-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 593minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
594minimum = P.fold step Nothing id
595 where
31f41a5d 596 step mc txt =
597 if (T.null txt)
598 then mc
599 else case mc of
600 Nothing -> Just (T.minimum txt)
601 Just c -> Just (min c (T.minimum txt))
91727d11 602{-# INLINABLE minimum #-}
603
1677dc12 604
91727d11 605-- | Find the first element in the stream that matches the predicate
606find
607 :: (Monad m)
608 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
609find predicate p = head (p >-> filter predicate)
610{-# INLINABLE find #-}
611
62e8521c 612-- | Index into a text stream
91727d11 613index
614 :: (Monad m, Integral a)
615 => a-> Producer Text m () -> m (Maybe Char)
616index n p = head (p >-> drop n)
617{-# INLINABLE index #-}
618
63ea9ffd 619
31f41a5d 620-- | Store a tally of how many segments match the given 'Text'
621count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
622count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
623{-# INLINABLE count #-}
624
ca6f90a0 625-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
626-- returning a Pipe of ByteStrings that begins at the point of failure.
627
628decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
629decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
630 go !carry dec0 p = do
631 x <- lift (next p)
632 case x of Left r -> if B.null carry
633 then return (return r) -- all bytestrinput was consumed
634 else return (do yield carry -- a potentially valid fragment remains
635 return r)
636
637 Right (chunk, p') -> case dec0 chunk of
638 PE.Some text carry2 dec -> do yield text
639 go carry2 dec p'
640 PE.Other text bs -> do yield text
641 return (do yield bs -- an invalid blob remains
642 p')
643{-# INLINABLE decodeUtf8 #-}
644
31f41a5d 645
646-- | Splits a 'Producer' after the given number of characters
91727d11 647splitAt
648 :: (Monad m, Integral n)
649 => n
650 -> Producer Text m r
651 -> Producer' Text m (Producer Text m r)
652splitAt = go
653 where
654 go 0 p = return p
655 go n p = do
656 x <- lift (next p)
657 case x of
658 Left r -> return (return r)
31f41a5d 659 Right (txt, p') -> do
660 let len = fromIntegral (T.length txt)
91727d11 661 if (len <= n)
662 then do
31f41a5d 663 yield txt
91727d11 664 go (n - len) p'
665 else do
31f41a5d 666 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 667 yield prefix
668 return (yield suffix >> p')
669{-# INLINABLE splitAt #-}
670
31f41a5d 671-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
91727d11 672chunksOf
673 :: (Monad m, Integral n)
674 => n -> Producer Text m r -> FreeT (Producer Text m) m r
675chunksOf n p0 = PP.FreeT (go p0)
676 where
677 go p = do
678 x <- next p
679 return $ case x of
680 Left r -> PP.Pure r
31f41a5d 681 Right (txt, p') -> PP.Free $ do
682 p'' <- splitAt n (yield txt >> p')
91727d11 683 return $ PP.FreeT (go p'')
684{-# INLINABLE chunksOf #-}
685
31f41a5d 686{-| Split a text stream in two, where the first text stream is the longest
687 consecutive group of text that satisfy the predicate
91727d11 688-}
689span
690 :: (Monad m)
691 => (Char -> Bool)
692 -> Producer Text m r
693 -> Producer' Text m (Producer Text m r)
694span predicate = go
695 where
696 go p = do
697 x <- lift (next p)
698 case x of
699 Left r -> return (return r)
31f41a5d 700 Right (txt, p') -> do
701 let (prefix, suffix) = T.span predicate txt
91727d11 702 if (T.null suffix)
703 then do
31f41a5d 704 yield txt
91727d11 705 go p'
706 else do
707 yield prefix
708 return (yield suffix >> p')
709{-# INLINABLE span #-}
710
62e8521c 711{-| Split a text stream in two, where the first text stream is the longest
712 consecutive group of characters that don't satisfy the predicate
91727d11 713-}
714break
715 :: (Monad m)
716 => (Char -> Bool)
717 -> Producer Text m r
718 -> Producer Text m (Producer Text m r)
719break predicate = span (not . predicate)
720{-# INLINABLE break #-}
721
62e8521c 722{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 723 predicate
724-}
1677dc12 725splitsWith
91727d11 726 :: (Monad m)
727 => (Char -> Bool)
728 -> Producer Text m r
729 -> PP.FreeT (Producer Text m) m r
1677dc12 730splitsWith predicate p0 = PP.FreeT (go0 p0)
91727d11 731 where
732 go0 p = do
733 x <- next p
734 case x of
735 Left r -> return (PP.Pure r)
31f41a5d 736 Right (txt, p') ->
737 if (T.null txt)
91727d11 738 then go0 p'
739 else return $ PP.Free $ do
31f41a5d 740 p'' <- span (not . predicate) (yield txt >> p')
91727d11 741 return $ PP.FreeT (go1 p'')
742 go1 p = do
743 x <- nextChar p
744 return $ case x of
745 Left r -> PP.Pure r
746 Right (_, p') -> PP.Free $ do
747 p'' <- span (not . predicate) p'
748 return $ PP.FreeT (go1 p'')
1677dc12 749{-# INLINABLE splitsWith #-}
91727d11 750
31f41a5d 751-- | Split a text stream using the given 'Char' as the delimiter
91727d11 752split :: (Monad m)
753 => Char
754 -> Producer Text m r
755 -> FreeT (Producer Text m) m r
1677dc12 756split c = splitsWith (c ==)
91727d11 757{-# INLINABLE split #-}
758
62e8521c 759{-| Group a text stream into 'FreeT'-delimited text streams using the supplied
91727d11 760 equality predicate
761-}
762groupBy
763 :: (Monad m)
764 => (Char -> Char -> Bool)
765 -> Producer Text m r
766 -> FreeT (Producer Text m) m r
767groupBy equal p0 = PP.FreeT (go p0)
768 where
769 go p = do
770 x <- next p
771 case x of
772 Left r -> return (PP.Pure r)
31f41a5d 773 Right (txt, p') -> case (T.uncons txt) of
91727d11 774 Nothing -> go p'
31f41a5d 775 Just (c, _) -> do
91727d11 776 return $ PP.Free $ do
31f41a5d 777 p'' <- span (equal c) (yield txt >> p')
91727d11 778 return $ PP.FreeT (go p'')
779{-# INLINABLE groupBy #-}
780
62e8521c 781-- | Group a text stream into 'FreeT'-delimited text streams of identical characters
91727d11 782group
783 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
784group = groupBy (==)
785{-# INLINABLE group #-}
786
62e8521c 787{-| Split a text stream into 'FreeT'-delimited lines
91727d11 788-}
789lines
790 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
791lines p0 = PP.FreeT (go0 p0)
792 where
793 go0 p = do
794 x <- next p
795 case x of
796 Left r -> return (PP.Pure r)
31f41a5d 797 Right (txt, p') ->
798 if (T.null txt)
91727d11 799 then go0 p'
31f41a5d 800 else return $ PP.Free $ go1 (yield txt >> p')
91727d11 801 go1 p = do
802 p' <- break ('\n' ==) p
b4d21c02 803 return $ PP.FreeT $ do
804 x <- nextChar p'
805 case x of
806 Left r -> return $ PP.Pure r
807 Right (_, p'') -> go0 p''
91727d11 808{-# INLINABLE lines #-}
91727d11 809
31f41a5d 810
811
812-- | Split a text stream into 'FreeT'-delimited words
91727d11 813words
814 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
cf10d6f1 815words = go
91727d11 816 where
cf10d6f1 817 go p = PP.FreeT $ do
818 x <- next (p >-> dropWhile isSpace)
819 return $ case x of
820 Left r -> PP.Pure r
821 Right (bs, p') -> PP.Free $ do
822 p'' <- break isSpace (yield bs >> p')
823 return (go p'')
91727d11 824{-# INLINABLE words #-}
825
cf10d6f1 826
62e8521c 827-- | Intersperse a 'Char' in between the characters of the text stream
91727d11 828intersperse
829 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
31f41a5d 830intersperse c = go0
91727d11 831 where
832 go0 p = do
833 x <- lift (next p)
834 case x of
835 Left r -> return r
31f41a5d 836 Right (txt, p') -> do
837 yield (T.intersperse c txt)
91727d11 838 go1 p'
839 go1 p = do
840 x <- lift (next p)
841 case x of
842 Left r -> return r
31f41a5d 843 Right (txt, p') -> do
844 yield (T.singleton c)
845 yield (T.intersperse c txt)
91727d11 846 go1 p'
847{-# INLINABLE intersperse #-}
848
31f41a5d 849{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
850 interspersing a text stream in between them
91727d11 851-}
852intercalate
853 :: (Monad m)
854 => Producer Text m ()
855 -> FreeT (Producer Text m) m r
856 -> Producer Text m r
857intercalate p0 = go0
858 where
859 go0 f = do
860 x <- lift (PP.runFreeT f)
861 case x of
862 PP.Pure r -> return r
863 PP.Free p -> do
864 f' <- p
865 go1 f'
866 go1 f = do
867 x <- lift (PP.runFreeT f)
868 case x of
869 PP.Pure r -> return r
870 PP.Free p -> do
871 p0
872 f' <- p
873 go1 f'
874{-# INLINABLE intercalate #-}
875
62e8521c 876{-| Join 'FreeT'-delimited lines into a text stream
91727d11 877-}
878unlines
879 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
880unlines = go
881 where
882 go f = do
883 x <- lift (PP.runFreeT f)
884 case x of
885 PP.Pure r -> return r
886 PP.Free p -> do
887 f' <- p
888 yield $ T.singleton '\n'
889 go f'
890{-# INLINABLE unlines #-}
891
31f41a5d 892{-| Join 'FreeT'-delimited words into a text stream
91727d11 893-}
894unwords
895 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
896unwords = intercalate (yield $ T.pack " ")
897{-# INLINABLE unwords #-}
898
899{- $parse
31f41a5d 900 The following parsing utilities are single-character analogs of the ones found
901 @pipes-parse@.
91727d11 902-}
903
91727d11 904{- $reexports
31f41a5d 905 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
91727d11 906
907 @Data.Text@ re-exports the 'Text' type.
908
91727d11 909 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
64e03122 910-}
911
912
913
914decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
915-- decode codec = go B.empty where
916-- go extra p0 =
917-- do x <- lift (next p0)
918-- case x of Right (chunk, p) ->
919-- do let (text, stuff) = codecDecode codec (B.append extra chunk)
920-- yield text
921-- case stuff of Right extra' -> go extra' p
922-- Left (exc,bs) -> do yield text
923-- return (do yield bs
924-- p)
925-- Left r -> return (do yield extra
926-- return r)
927
928decode d p0 = case d of
929 PE.Other txt bad -> do yield txt
930 return (do yield bad
931 p0)
932 PE.Some txt extra dec -> do yield txt
933 x <- lift (next p0)
934 case x of Left r -> return (do yield extra
935 return r)
936 Right (chunk,p1) -> decode (dec chunk) p1
937
938-- go !carry dec0 p = do
939-- x <- lift (next p)
940-- case x of Left r -> if B.null carry
941-- then return (return r) -- all bytestrinput was consumed
942-- else return (do yield carry -- a potentially valid fragment remains
943-- return r)
944--
945-- Right (chunk, p') -> case dec0 chunk of
946-- PE.Some text carry2 dec -> do yield text
947-- go carry2 dec p'
948-- PE.Other text bs -> do yield text
949-- return (do yield bs -- an invalid blob remains
950-- p')
951-- {-# INLINABLE decodeUtf8 #-}