]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
lensification under way
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
1677dc12 1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
64e03122 2#if __GLASGOW_HASKELL__ >= 702
3{-# LANGUAGE Trustworthy #-}
4#endif
13a43263 5{-| This module provides @pipes@ utilities for \"text streams\", which are
64e03122 6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 10
63ea9ffd 11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 12 example, the following program copies a document from one file to another:
91727d11 13
14> import Pipes
31f41a5d 15> import qualified Data.Text.Pipes as Text
91727d11 16> import System.IO
17>
18> main =
19> withFile "inFile.txt" ReadMode $ \hIn ->
20> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 21> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 24
13a43263 25> import Pipes
31f41a5d 26> import qualified Data.Text.Pipes as Text
13a43263 27> import Pipes.Safe
28>
31f41a5d 29> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 30
91727d11 31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 32 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 33
31f41a5d 34> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
31f41a5d 38> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 39
40 In addition, this module provides many functions equivalent to lazy
31f41a5d 41 'Text' functions so that you can transform or fold text streams. For
91727d11 42 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 43 might write:
91727d11 44
45> import Pipes
31f41a5d 46> import qualified Pipes.Text as Text
47> import qualified Pipes.Parse as Parse
91727d11 48>
31f41a5d 49> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 50> where
31f41a5d 51> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 52
31f41a5d 53 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
31f41a5d 57 are insensitive to text boundaries. This means that they may freely split
64e03122 58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
91727d11 61-}
62
7faef8bc 63module Pipes.Text (
91727d11 64 -- * Producers
1677dc12 65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
69 , stdinLn
91727d11 70
71 -- * Consumers
1677dc12 72 , stdout
73 , stdoutLn
74 , toHandle
75 , writeFile
91727d11 76
77 -- * Pipes
1677dc12 78 , map
79 , concatMap
80 , take
81 , drop
82 , takeWhile
83 , dropWhile
84 , filter
85 , scan
86 , encodeUtf8
87 , pack
88 , unpack
89 , toCaseFold
90 , toLower
91 , toUpper
92 , stripStart
91727d11 93
94 -- * Folds
1677dc12 95 , toLazy
96 , toLazyM
97 , foldChars
98 , head
99 , last
100 , null
101 , length
102 , any
103 , all
104 , maximum
105 , minimum
106 , find
107 , index
108 , count
109
110 -- * Primitive Character Parsers
111 -- $parse
112 , nextChar
113 , drawChar
114 , unDrawChar
115 , peekChar
9e9bb0ce 116 , isEndOfChars
1677dc12 117
118 -- * Parsing Lenses
9e9bb0ce 119 , splitAt
1677dc12 120 , span
121 , break
122 , groupBy
123 , group
9e9bb0ce 124 , word
125 , line
1677dc12 126 , decodeUtf8
127 , decode
128
129 -- * FreeT Splitters
130 , chunksOf
131 , splitsWith
132 , split
133-- , groupsBy
134-- , groups
135 , lines
136 , words
137
91727d11 138
91727d11 139 -- * Transformations
1677dc12 140 , intersperse
9e9bb0ce 141 , packChars
31f41a5d 142
91727d11 143 -- * Joiners
1677dc12 144 , intercalate
145 , unlines
146 , unwords
9e9bb0ce 147
1677dc12 148 -- * Re-exports
91727d11 149 -- $reexports
1677dc12 150 , module Data.ByteString
151 , module Data.Text
152 , module Data.Profunctor
153 , module Data.Word
154 , module Pipes.Parse
91727d11 155 ) where
156
157import Control.Exception (throwIO, try)
64e03122 158import Control.Monad (liftM, unless, join)
9e9bb0ce 159import Control.Monad.Trans.State.Strict (StateT(..), modify)
ca6f90a0 160import Data.Monoid ((<>))
91727d11 161import qualified Data.Text as T
162import qualified Data.Text.IO as T
31f41a5d 163import qualified Data.Text.Encoding as TE
63ea9ffd 164import qualified Data.Text.Encoding.Error as TE
91727d11 165import Data.Text (Text)
166import qualified Data.Text.Lazy as TL
167import qualified Data.Text.Lazy.IO as TL
168import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
169import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 170import Data.ByteString (ByteString)
171import qualified Data.ByteString as B
cf10d6f1 172import Data.Char (ord, isSpace)
1677dc12 173import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 174import Data.Functor.Identity (Identity)
1677dc12 175import Data.Profunctor (Profunctor)
176import qualified Data.Profunctor
91727d11 177import qualified Data.List as List
178import Foreign.C.Error (Errno(Errno), ePIPE)
179import qualified GHC.IO.Exception as G
180import Pipes
5e3f5409 181import qualified Pipes.ByteString as PB
ca6f90a0 182import qualified Pipes.Text.Internal as PE
64e03122 183import Pipes.Text.Internal (Codec(..))
9e9bb0ce 184-- import Pipes.Text.Parse (nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
185
91727d11 186import Pipes.Core (respond, Server')
187import qualified Pipes.Parse as PP
9e9bb0ce 188import Pipes.Parse (Parser, concats, intercalates, FreeT(..))
91727d11 189import qualified Pipes.Safe.Prelude as Safe
190import qualified Pipes.Safe as Safe
191import Pipes.Safe (MonadSafe(..), Base(..))
192import qualified Pipes.Prelude as P
193import qualified System.IO as IO
194import Data.Char (isSpace)
63ea9ffd 195import Data.Word (Word8)
1677dc12 196
91727d11 197import Prelude hiding (
198 all,
199 any,
200 break,
201 concat,
202 concatMap,
203 drop,
204 dropWhile,
205 elem,
206 filter,
207 head,
208 last,
209 lines,
210 length,
211 map,
212 maximum,
213 minimum,
214 notElem,
215 null,
216 readFile,
217 span,
218 splitAt,
219 take,
220 takeWhile,
221 unlines,
222 unwords,
223 words,
224 writeFile )
225
226-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
227fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
228fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 229{-# INLINE fromLazy #-}
91727d11 230
62e8521c 231-- | Stream text from 'stdin'
ca6f90a0 232stdin :: MonadIO m => Producer Text m ()
91727d11 233stdin = fromHandle IO.stdin
ca6f90a0 234{-# INLINE stdin #-}
91727d11 235
31f41a5d 236{-| Convert a 'IO.Handle' into a text stream using a text size
ca6f90a0 237 determined by the good sense of the text library; note that this
238 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
239 but uses the system encoding and has other `Data.Text.IO` features
31f41a5d 240-}
241
ca6f90a0 242fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
243fromHandle h = go where
244 go = do txt <- liftIO (T.hGetChunk h)
245 unless (T.null txt) $ do yield txt
246 go
91727d11 247{-# INLINABLE fromHandle#-}
ca6f90a0 248
249
250{-| Stream text from a file in the simple fashion of @Data.Text.IO@
6f6f9974 251
31f41a5d 252>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
253MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 254-}
255
ca6f90a0 256readFile :: MonadSafe m => FilePath -> Producer Text m ()
91727d11 257readFile file = Safe.withFile file IO.ReadMode fromHandle
ca6f90a0 258{-# INLINE readFile #-}
91727d11 259
31f41a5d 260{-| Stream lines of text from stdin (for testing in ghci etc.)
261
262>>> let safely = runSafeT . runEffect
263>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
264hello
2655
266world
2675
268
269-}
91727d11 270stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 271stdinLn = go where
91727d11 272 go = do
273 eof <- liftIO (IO.hIsEOF IO.stdin)
274 unless eof $ do
275 txt <- liftIO (T.hGetLine IO.stdin)
276 yield txt
277 go
ca6f90a0 278{-# INLINABLE stdinLn #-}
91727d11 279
31f41a5d 280{-| Stream text to 'stdout'
91727d11 281
282 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
283
284 Note: For best performance, use @(for source (liftIO . putStr))@ instead of
31f41a5d 285 @(source >-> stdout)@ in suitable cases.
91727d11 286-}
287stdout :: MonadIO m => Consumer' Text m ()
288stdout = go
289 where
290 go = do
291 txt <- await
292 x <- liftIO $ try (T.putStr txt)
293 case x of
294 Left (G.IOError { G.ioe_type = G.ResourceVanished
295 , G.ioe_errno = Just ioe })
296 | Errno ioe == ePIPE
297 -> return ()
298 Left e -> liftIO (throwIO e)
299 Right () -> go
300{-# INLINABLE stdout #-}
301
302stdoutLn :: (MonadIO m) => Consumer' Text m ()
303stdoutLn = go
304 where
305 go = do
306 str <- await
307 x <- liftIO $ try (T.putStrLn str)
308 case x of
309 Left (G.IOError { G.ioe_type = G.ResourceVanished
310 , G.ioe_errno = Just ioe })
311 | Errno ioe == ePIPE
312 -> return ()
313 Left e -> liftIO (throwIO e)
314 Right () -> go
315{-# INLINABLE stdoutLn #-}
316
31f41a5d 317{-| Convert a text stream into a 'Handle'
91727d11 318
31f41a5d 319 Note: again, for best performance, where possible use
320 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 321-}
322toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
323toHandle h = for cat (liftIO . T.hPutStr h)
324{-# INLINABLE toHandle #-}
325
d4732515 326{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 327 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 328 #-}
329
330
31f41a5d 331-- | Stream text into a file. Uses @pipes-safe@.
ca6f90a0 332writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
91727d11 333writeFile file = Safe.withFile file IO.WriteMode toHandle
ca6f90a0 334{-# INLINE writeFile #-}
91727d11 335
1677dc12 336
337type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
338
339type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
340
341(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
342a ^. lens = getConstant (lens Constant a)
343
344
91727d11 345-- | Apply a transformation to each 'Char' in the stream
346map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
347map f = P.map (T.map f)
348{-# INLINABLE map #-}
349
ff38b9f0 350{-# RULES "p >-> map f" forall p f .
351 p >-> map f = for p (\txt -> yield (T.map f txt))
352 #-}
353
31f41a5d 354-- | Map a function over the characters of a text stream and concatenate the results
91727d11 355concatMap
356 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
357concatMap f = P.map (T.concatMap f)
358{-# INLINABLE concatMap #-}
359
ff38b9f0 360{-# RULES "p >-> concatMap f" forall p f .
361 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
362 #-}
7faef8bc 363
364-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 365-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
366-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 367encodeUtf8 :: Monad m => Pipe Text ByteString m r
368encodeUtf8 = P.map TE.encodeUtf8
369{-# INLINEABLE encodeUtf8 #-}
370
ff38b9f0 371{-# RULES "p >-> encodeUtf8" forall p .
372 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
373 #-}
374
c0343bc9 375-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 376pack :: Monad m => Pipe String Text m r
377pack = P.map T.pack
378{-# INLINEABLE pack #-}
379
ff38b9f0 380{-# RULES "p >-> pack" forall p .
381 p >-> pack = for p (\txt -> yield (T.pack txt))
382 #-}
383
384-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 385unpack :: Monad m => Pipe Text String m r
d4732515 386unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 387{-# INLINEABLE unpack #-}
388
ff38b9f0 389{-# RULES "p >-> unpack" forall p .
390 p >-> unpack = for p (\txt -> yield (T.unpack txt))
391 #-}
d4732515 392
c0343bc9 393-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utility,
7faef8bc 394-- here acting on a 'Text' pipe, rather as they would on a lazy text
395toCaseFold :: Monad m => Pipe Text Text m ()
396toCaseFold = P.map T.toCaseFold
397{-# INLINEABLE toCaseFold #-}
398
ff38b9f0 399{-# RULES "p >-> toCaseFold" forall p .
400 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
401 #-}
402
403
c0343bc9 404-- | lowercase incoming 'Text'
7faef8bc 405toLower :: Monad m => Pipe Text Text m ()
406toLower = P.map T.toLower
407{-# INLINEABLE toLower #-}
408
ff38b9f0 409{-# RULES "p >-> toLower" forall p .
410 p >-> toLower = for p (\txt -> yield (T.toLower txt))
411 #-}
412
c0343bc9 413-- | uppercase incoming 'Text'
7faef8bc 414toUpper :: Monad m => Pipe Text Text m ()
415toUpper = P.map T.toUpper
416{-# INLINEABLE toUpper #-}
417
ff38b9f0 418{-# RULES "p >-> toUpper" forall p .
419 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
420 #-}
421
c0343bc9 422-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 423stripStart :: Monad m => Pipe Text Text m r
424stripStart = do
425 chunk <- await
426 let text = T.stripStart chunk
427 if T.null text
428 then stripStart
429 else cat
430{-# INLINEABLE stripStart #-}
431
31f41a5d 432-- | @(take n)@ only allows @n@ individual characters to pass;
433-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 434take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
435take n0 = go n0 where
436 go n
437 | n <= 0 = return ()
438 | otherwise = do
31f41a5d 439 txt <- await
440 let len = fromIntegral (T.length txt)
91727d11 441 if (len > n)
31f41a5d 442 then yield (T.take (fromIntegral n) txt)
91727d11 443 else do
31f41a5d 444 yield txt
91727d11 445 go (n - len)
446{-# INLINABLE take #-}
447
31f41a5d 448-- | @(drop n)@ drops the first @n@ characters
91727d11 449drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
450drop n0 = go n0 where
451 go n
452 | n <= 0 = cat
453 | otherwise = do
31f41a5d 454 txt <- await
455 let len = fromIntegral (T.length txt)
91727d11 456 if (len >= n)
457 then do
31f41a5d 458 yield (T.drop (fromIntegral n) txt)
91727d11 459 cat
460 else go (n - len)
461{-# INLINABLE drop #-}
462
31f41a5d 463-- | Take characters until they fail the predicate
91727d11 464takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
465takeWhile predicate = go
466 where
467 go = do
31f41a5d 468 txt <- await
469 let (prefix, suffix) = T.span predicate txt
91727d11 470 if (T.null suffix)
471 then do
31f41a5d 472 yield txt
91727d11 473 go
474 else yield prefix
475{-# INLINABLE takeWhile #-}
476
31f41a5d 477-- | Drop characters until they fail the predicate
91727d11 478dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
479dropWhile predicate = go where
480 go = do
31f41a5d 481 txt <- await
482 case T.findIndex (not . predicate) txt of
91727d11 483 Nothing -> go
484 Just i -> do
31f41a5d 485 yield (T.drop i txt)
91727d11 486 cat
487{-# INLINABLE dropWhile #-}
488
489-- | Only allows 'Char's to pass if they satisfy the predicate
490filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
491filter predicate = P.map (T.filter predicate)
492{-# INLINABLE filter #-}
493
ff38b9f0 494{-# RULES "p >-> filter q" forall p q .
495 p >-> filter q = for p (\txt -> yield (T.filter q txt))
496 #-}
497
31f41a5d 498-- | Strict left scan over the characters
91727d11 499scan
500 :: (Monad m)
501 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
502scan step begin = go begin
503 where
31f41a5d 504 go c = do
505 txt <- await
506 let txt' = T.scanl step c txt
507 c' = T.last txt'
508 yield txt'
509 go c'
91727d11 510{-# INLINABLE scan #-}
511
512{-| Fold a pure 'Producer' of strict 'Text's into a lazy
513 'TL.Text'
514-}
515toLazy :: Producer Text Identity () -> TL.Text
516toLazy = TL.fromChunks . P.toList
517{-# INLINABLE toLazy #-}
518
519{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
520 'TL.Text'
521
522 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
523 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
524 immediately as they are generated instead of loading them all into memory.
525-}
526toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
527toLazyM = liftM TL.fromChunks . P.toListM
528{-# INLINABLE toLazyM #-}
529
31f41a5d 530-- | Reduce the text stream using a strict left fold over characters
64e03122 531foldChars
91727d11 532 :: Monad m
533 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 534foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 535{-# INLINABLE foldChars #-}
91727d11 536
537-- | Retrieve the first 'Char'
538head :: (Monad m) => Producer Text m () -> m (Maybe Char)
539head = go
540 where
541 go p = do
542 x <- nextChar p
543 case x of
544 Left _ -> return Nothing
31f41a5d 545 Right (c, _) -> return (Just c)
91727d11 546{-# INLINABLE head #-}
547
548-- | Retrieve the last 'Char'
549last :: (Monad m) => Producer Text m () -> m (Maybe Char)
550last = go Nothing
551 where
552 go r p = do
553 x <- next p
554 case x of
555 Left () -> return r
31f41a5d 556 Right (txt, p') ->
557 if (T.null txt)
91727d11 558 then go r p'
31f41a5d 559 else go (Just $ T.last txt) p'
91727d11 560{-# INLINABLE last #-}
561
562-- | Determine if the stream is empty
563null :: (Monad m) => Producer Text m () -> m Bool
564null = P.all T.null
565{-# INLINABLE null #-}
566
62e8521c 567-- | Count the number of characters in the stream
91727d11 568length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 569length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 570{-# INLINABLE length #-}
571
572-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
573any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
574any predicate = P.any (T.any predicate)
575{-# INLINABLE any #-}
576
577-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
578all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
579all predicate = P.all (T.all predicate)
580{-# INLINABLE all #-}
581
62e8521c 582-- | Return the maximum 'Char' within a text stream
91727d11 583maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
584maximum = P.fold step Nothing id
585 where
31f41a5d 586 step mc txt =
587 if (T.null txt)
588 then mc
589 else Just $ case mc of
590 Nothing -> T.maximum txt
591 Just c -> max c (T.maximum txt)
91727d11 592{-# INLINABLE maximum #-}
593
62e8521c 594-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 595minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
596minimum = P.fold step Nothing id
597 where
31f41a5d 598 step mc txt =
599 if (T.null txt)
600 then mc
601 else case mc of
602 Nothing -> Just (T.minimum txt)
603 Just c -> Just (min c (T.minimum txt))
91727d11 604{-# INLINABLE minimum #-}
605
1677dc12 606
91727d11 607-- | Find the first element in the stream that matches the predicate
608find
609 :: (Monad m)
610 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
611find predicate p = head (p >-> filter predicate)
612{-# INLINABLE find #-}
613
62e8521c 614-- | Index into a text stream
91727d11 615index
616 :: (Monad m, Integral a)
617 => a-> Producer Text m () -> m (Maybe Char)
618index n p = head (p >-> drop n)
619{-# INLINABLE index #-}
620
63ea9ffd 621
31f41a5d 622-- | Store a tally of how many segments match the given 'Text'
623count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
624count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
625{-# INLINABLE count #-}
626
9e9bb0ce 627
628{-| Consume the first character from a stream of 'Text'
629
630 'next' either fails with a 'Left' if the 'Producer' has no more characters or
631 succeeds with a 'Right' providing the next character and the remainder of the
632 'Producer'.
633-}
634nextChar
635 :: (Monad m)
636 => Producer Text m r
637 -> m (Either r (Char, Producer Text m r))
638nextChar = go
639 where
640 go p = do
641 x <- next p
642 case x of
643 Left r -> return (Left r)
644 Right (txt, p') -> case (T.uncons txt) of
645 Nothing -> go p'
646 Just (c, txt') -> return (Right (c, yield txt' >> p'))
647{-# INLINABLE nextChar #-}
648
649{-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
650 'Producer' is empty
651-}
652drawChar :: (Monad m) => Parser Text m (Maybe Char)
653drawChar = do
654 x <- PP.draw
655 case x of
656 Nothing -> return Nothing
657 Just txt -> case (T.uncons txt) of
658 Nothing -> drawChar
659 Just (c, txt') -> do
660 PP.unDraw txt'
661 return (Just c)
662{-# INLINABLE drawChar #-}
663
664-- | Push back a 'Char' onto the underlying 'Producer'
665unDrawChar :: (Monad m) => Char -> Parser Text m ()
666unDrawChar c = modify (yield (T.singleton c) >>)
667{-# INLINABLE unDrawChar #-}
668
669{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
670 push the 'Char' back
671
672> peekChar = do
673> x <- drawChar
674> case x of
675> Left _ -> return ()
676> Right c -> unDrawChar c
677> return x
678-}
679peekChar :: (Monad m) => Parser Text m (Maybe Char)
680peekChar = do
681 x <- drawChar
682 case x of
683 Nothing -> return ()
684 Just c -> unDrawChar c
685 return x
686{-# INLINABLE peekChar #-}
687
688{-| Check if the underlying 'Producer' has no more characters
689
690 Note that this will skip over empty 'Text' chunks, unlike
691 'PP.isEndOfInput' from @pipes-parse@, which would consider
692 an empty 'Text' a valid bit of input.
693
694> isEndOfChars = liftM isLeft peekChar
695-}
696isEndOfChars :: (Monad m) => Parser Text m Bool
697isEndOfChars = do
698 x <- peekChar
699 return (case x of
700 Nothing -> True
701 Just _-> False )
702{-# INLINABLE isEndOfChars #-}
703
704
705
706
707
ca6f90a0 708-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
709-- returning a Pipe of ByteStrings that begins at the point of failure.
710
9e9bb0ce 711decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
712 (Producer Text m (Producer ByteString m r))
713decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
714 (k (go B.empty PE.streamDecodeUtf8 p0)) where
ca6f90a0 715 go !carry dec0 p = do
716 x <- lift (next p)
717 case x of Left r -> if B.null carry
718 then return (return r) -- all bytestrinput was consumed
719 else return (do yield carry -- a potentially valid fragment remains
720 return r)
721
722 Right (chunk, p') -> case dec0 chunk of
723 PE.Some text carry2 dec -> do yield text
724 go carry2 dec p'
725 PE.Other text bs -> do yield text
726 return (do yield bs -- an invalid blob remains
727 p')
728{-# INLINABLE decodeUtf8 #-}
729
31f41a5d 730
731-- | Splits a 'Producer' after the given number of characters
91727d11 732splitAt
733 :: (Monad m, Integral n)
734 => n
9e9bb0ce 735 -> Lens' (Producer Text m r)
736 (Producer Text m (Producer Text m r))
737splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 738 where
739 go 0 p = return p
740 go n p = do
741 x <- lift (next p)
742 case x of
743 Left r -> return (return r)
31f41a5d 744 Right (txt, p') -> do
745 let len = fromIntegral (T.length txt)
91727d11 746 if (len <= n)
747 then do
31f41a5d 748 yield txt
91727d11 749 go (n - len) p'
750 else do
31f41a5d 751 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 752 yield prefix
753 return (yield suffix >> p')
754{-# INLINABLE splitAt #-}
755
31f41a5d 756-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
91727d11 757chunksOf
758 :: (Monad m, Integral n)
9e9bb0ce 759 => n -> Lens' (Producer Text m r)
760 (FreeT (Producer Text m) m r)
761chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
91727d11 762 where
763 go p = do
764 x <- next p
765 return $ case x of
766 Left r -> PP.Pure r
31f41a5d 767 Right (txt, p') -> PP.Free $ do
9e9bb0ce 768 p'' <- (yield txt >> p') ^. splitAt n
91727d11 769 return $ PP.FreeT (go p'')
770{-# INLINABLE chunksOf #-}
771
31f41a5d 772{-| Split a text stream in two, where the first text stream is the longest
773 consecutive group of text that satisfy the predicate
91727d11 774-}
775span
776 :: (Monad m)
777 => (Char -> Bool)
9e9bb0ce 778 -> Lens' (Producer Text m r)
779 (Producer Text m (Producer Text m r))
780span predicate k p0 = fmap join (k (go p0))
91727d11 781 where
782 go p = do
783 x <- lift (next p)
784 case x of
785 Left r -> return (return r)
31f41a5d 786 Right (txt, p') -> do
787 let (prefix, suffix) = T.span predicate txt
91727d11 788 if (T.null suffix)
789 then do
31f41a5d 790 yield txt
91727d11 791 go p'
792 else do
793 yield prefix
794 return (yield suffix >> p')
795{-# INLINABLE span #-}
796
62e8521c 797{-| Split a text stream in two, where the first text stream is the longest
798 consecutive group of characters that don't satisfy the predicate
91727d11 799-}
800break
801 :: (Monad m)
802 => (Char -> Bool)
9e9bb0ce 803 -> Lens' (Producer Text m r)
804 (Producer Text m (Producer Text m r))
91727d11 805break predicate = span (not . predicate)
806{-# INLINABLE break #-}
807
9e9bb0ce 808{-| Improper lens that splits after the first group of equivalent Chars, as
809 defined by the given equivalence relation
810-}
811groupBy
812 :: (Monad m)
813 => (Char -> Char -> Bool)
814 -> Lens' (Producer Text m r)
815 (Producer Text m (Producer Text m r))
816groupBy equals k p0 = fmap join (k ((go p0))) where
817 go p = do
818 x <- lift (next p)
819 case x of
820 Left r -> return (return r)
821 Right (txt, p') -> case T.uncons txt of
822 Nothing -> go p'
823 Just (c, _) -> (yield txt >> p') ^. span (equals c)
824{-# INLINABLE groupBy #-}
825
826-- | Improper lens that splits after the first succession of identical 'Char' s
827group :: Monad m
828 => Lens' (Producer Text m r)
829 (Producer Text m (Producer Text m r))
830group = groupBy (==)
831{-# INLINABLE group #-}
832
833{-| Improper lens that splits a 'Producer' after the first word
834
835 Unlike 'words', this does not drop leading whitespace
836-}
837word :: (Monad m)
838 => Lens' (Producer Text m r)
839 (Producer Text m (Producer Text m r))
840word k p0 = fmap join (k (to p0))
841 where
842 to p = do
843 p' <- p^.span isSpace
844 p'^.break isSpace
845{-# INLINABLE word #-}
846
847
848line :: (Monad m)
849 => Lens' (Producer Text m r)
850 (Producer Text m (Producer Text m r))
851line = break (== '\n')
852
853{-# INLINABLE line #-}
854
855
856-- | Intersperse a 'Char' in between the characters of stream of 'Text'
857intersperse
858 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
859intersperse c = go0
860 where
861 go0 p = do
862 x <- lift (next p)
863 case x of
864 Left r -> return r
865 Right (txt, p') -> do
866 yield (T.intersperse c txt)
867 go1 p'
868 go1 p = do
869 x <- lift (next p)
870 case x of
871 Left r -> return r
872 Right (txt, p') -> do
873 yield (T.singleton c)
874 yield (T.intersperse c txt)
875 go1 p'
876{-# INLINABLE intersperse #-}
877
878
879
880-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
881packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
882packChars = Data.Profunctor.dimap to (fmap from)
883 where
884 -- to :: Monad m => Producer Char m x -> Producer Text m x
885 to p = PP.folds step id done (p^.PP.chunksOf defaultChunkSize)
886
887 step diffAs c = diffAs . (c:)
888
889 done diffAs = T.pack (diffAs [])
890
891 -- from :: Monad m => Producer Text m x -> Producer Char m x
892 from p = for p (each . T.unpack)
893{-# INLINABLE packChars #-}
894
62e8521c 895{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 896 predicate
897-}
1677dc12 898splitsWith
91727d11 899 :: (Monad m)
900 => (Char -> Bool)
901 -> Producer Text m r
902 -> PP.FreeT (Producer Text m) m r
1677dc12 903splitsWith predicate p0 = PP.FreeT (go0 p0)
91727d11 904 where
905 go0 p = do
906 x <- next p
907 case x of
908 Left r -> return (PP.Pure r)
31f41a5d 909 Right (txt, p') ->
910 if (T.null txt)
91727d11 911 then go0 p'
912 else return $ PP.Free $ do
9e9bb0ce 913 p'' <- (yield txt >> p') ^. span (not . predicate)
91727d11 914 return $ PP.FreeT (go1 p'')
915 go1 p = do
916 x <- nextChar p
917 return $ case x of
918 Left r -> PP.Pure r
919 Right (_, p') -> PP.Free $ do
9e9bb0ce 920 p'' <- p' ^. span (not . predicate)
91727d11 921 return $ PP.FreeT (go1 p'')
1677dc12 922{-# INLINABLE splitsWith #-}
91727d11 923
31f41a5d 924-- | Split a text stream using the given 'Char' as the delimiter
91727d11 925split :: (Monad m)
926 => Char
927 -> Producer Text m r
928 -> FreeT (Producer Text m) m r
1677dc12 929split c = splitsWith (c ==)
91727d11 930{-# INLINABLE split #-}
931
91727d11 932
62e8521c 933{-| Split a text stream into 'FreeT'-delimited lines
91727d11 934-}
935lines
936 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
937lines p0 = PP.FreeT (go0 p0)
938 where
939 go0 p = do
940 x <- next p
941 case x of
942 Left r -> return (PP.Pure r)
31f41a5d 943 Right (txt, p') ->
944 if (T.null txt)
91727d11 945 then go0 p'
31f41a5d 946 else return $ PP.Free $ go1 (yield txt >> p')
91727d11 947 go1 p = do
9e9bb0ce 948 p' <- p ^. break ('\n' ==)
b4d21c02 949 return $ PP.FreeT $ do
950 x <- nextChar p'
951 case x of
952 Left r -> return $ PP.Pure r
953 Right (_, p'') -> go0 p''
91727d11 954{-# INLINABLE lines #-}
91727d11 955
31f41a5d 956
957
958-- | Split a text stream into 'FreeT'-delimited words
91727d11 959words
960 :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r
cf10d6f1 961words = go
91727d11 962 where
cf10d6f1 963 go p = PP.FreeT $ do
964 x <- next (p >-> dropWhile isSpace)
965 return $ case x of
966 Left r -> PP.Pure r
967 Right (bs, p') -> PP.Free $ do
9e9bb0ce 968 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 969 return (go p'')
91727d11 970{-# INLINABLE words #-}
971
cf10d6f1 972
9e9bb0ce 973
974
91727d11 975
31f41a5d 976{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
977 interspersing a text stream in between them
91727d11 978-}
979intercalate
980 :: (Monad m)
981 => Producer Text m ()
982 -> FreeT (Producer Text m) m r
983 -> Producer Text m r
984intercalate p0 = go0
985 where
986 go0 f = do
987 x <- lift (PP.runFreeT f)
988 case x of
989 PP.Pure r -> return r
990 PP.Free p -> do
991 f' <- p
992 go1 f'
993 go1 f = do
994 x <- lift (PP.runFreeT f)
995 case x of
996 PP.Pure r -> return r
997 PP.Free p -> do
998 p0
999 f' <- p
1000 go1 f'
1001{-# INLINABLE intercalate #-}
1002
62e8521c 1003{-| Join 'FreeT'-delimited lines into a text stream
91727d11 1004-}
1005unlines
1006 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1007unlines = go
1008 where
1009 go f = do
1010 x <- lift (PP.runFreeT f)
1011 case x of
1012 PP.Pure r -> return r
1013 PP.Free p -> do
1014 f' <- p
1015 yield $ T.singleton '\n'
1016 go f'
1017{-# INLINABLE unlines #-}
1018
31f41a5d 1019{-| Join 'FreeT'-delimited words into a text stream
91727d11 1020-}
1021unwords
1022 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1023unwords = intercalate (yield $ T.pack " ")
1024{-# INLINABLE unwords #-}
1025
1026{- $parse
31f41a5d 1027 The following parsing utilities are single-character analogs of the ones found
1028 @pipes-parse@.
91727d11 1029-}
1030
91727d11 1031{- $reexports
31f41a5d 1032 @Pipes.Text.Parse@ re-exports 'nextChar', 'drawChar', 'unDrawChar', 'peekChar', and 'isEndOfChars'.
91727d11 1033
1034 @Data.Text@ re-exports the 'Text' type.
1035
91727d11 1036 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
64e03122 1037-}
1038
1039
1040
1041decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1042-- decode codec = go B.empty where
1043-- go extra p0 =
1044-- do x <- lift (next p0)
1045-- case x of Right (chunk, p) ->
1046-- do let (text, stuff) = codecDecode codec (B.append extra chunk)
1047-- yield text
1048-- case stuff of Right extra' -> go extra' p
1049-- Left (exc,bs) -> do yield text
1050-- return (do yield bs
1051-- p)
1052-- Left r -> return (do yield extra
1053-- return r)
1054
1055decode d p0 = case d of
1056 PE.Other txt bad -> do yield txt
1057 return (do yield bad
1058 p0)
1059 PE.Some txt extra dec -> do yield txt
1060 x <- lift (next p0)
1061 case x of Left r -> return (do yield extra
1062 return r)
1063 Right (chunk,p1) -> decode (dec chunk) p1
1064
1065-- go !carry dec0 p = do
1066-- x <- lift (next p)
1067-- case x of Left r -> if B.null carry
1068-- then return (return r) -- all bytestrinput was consumed
1069-- else return (do yield carry -- a potentially valid fragment remains
1070-- return r)
1071--
1072-- Right (chunk, p') -> case dec0 chunk of
1073-- PE.Some text carry2 dec -> do yield text
1074-- go carry2 dec p'
1075-- PE.Other text bs -> do yield text
1076-- return (do yield bs -- an invalid blob remains
1077-- p')
1078-- {-# INLINABLE decodeUtf8 #-}