]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
scrapped stdinLn etc. Improved haddocks
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
1677dc12 1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
64e03122 2#if __GLASGOW_HASKELL__ >= 702
3{-# LANGUAGE Trustworthy #-}
4#endif
13a43263 5{-| This module provides @pipes@ utilities for \"text streams\", which are
64e03122 6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 10
63ea9ffd 11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 12 example, the following program copies a document from one file to another:
91727d11 13
14> import Pipes
31f41a5d 15> import qualified Data.Text.Pipes as Text
91727d11 16> import System.IO
17>
18> main =
19> withFile "inFile.txt" ReadMode $ \hIn ->
20> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 21> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 24
13a43263 25> import Pipes
31f41a5d 26> import qualified Data.Text.Pipes as Text
13a43263 27> import Pipes.Safe
28>
31f41a5d 29> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 30
91727d11 31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 32 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 33
31f41a5d 34> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
31f41a5d 38> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 39
40 In addition, this module provides many functions equivalent to lazy
31f41a5d 41 'Text' functions so that you can transform or fold text streams. For
91727d11 42 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 43 might write:
91727d11 44
45> import Pipes
31f41a5d 46> import qualified Pipes.Text as Text
47> import qualified Pipes.Parse as Parse
91727d11 48>
31f41a5d 49> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 50> where
31f41a5d 51> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 52
31f41a5d 53 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
31f41a5d 57 are insensitive to text boundaries. This means that they may freely split
64e03122 58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
91727d11 61-}
62
7faef8bc 63module Pipes.Text (
91727d11 64 -- * Producers
1677dc12 65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
91727d11 69
70 -- * Consumers
1677dc12 71 , stdout
1677dc12 72 , toHandle
73 , writeFile
91727d11 74
75 -- * Pipes
1677dc12 76 , map
77 , concatMap
78 , take
79 , drop
80 , takeWhile
81 , dropWhile
82 , filter
83 , scan
84 , encodeUtf8
85 , pack
86 , unpack
87 , toCaseFold
88 , toLower
89 , toUpper
90 , stripStart
91727d11 91
92 -- * Folds
1677dc12 93 , toLazy
94 , toLazyM
95 , foldChars
96 , head
97 , last
98 , null
99 , length
100 , any
101 , all
102 , maximum
103 , minimum
104 , find
105 , index
106 , count
107
108 -- * Primitive Character Parsers
109 -- $parse
110 , nextChar
111 , drawChar
112 , unDrawChar
113 , peekChar
9e9bb0ce 114 , isEndOfChars
1677dc12 115
116 -- * Parsing Lenses
9e9bb0ce 117 , splitAt
1677dc12 118 , span
119 , break
120 , groupBy
121 , group
9e9bb0ce 122 , word
123 , line
90189414 124
125 -- * Decoding Lenses
1677dc12 126 , decodeUtf8
90189414 127 , codec
128
129 -- * Codecs
130 , utf8
131 , utf16_le
132 , utf16_be
133 , utf32_le
134 , utf32_be
135
136 -- * Other Decoding/Encoding Functions
137 , decodeIso8859_1
138 , decodeAscii
139 , encodeIso8859_1
140 , encodeAscii
1677dc12 141
142 -- * FreeT Splitters
143 , chunksOf
144 , splitsWith
0f8c6f1b 145 , splits
1677dc12 146-- , groupsBy
147-- , groups
148 , lines
149 , words
150
91727d11 151 -- * Transformations
1677dc12 152 , intersperse
9e9bb0ce 153 , packChars
31f41a5d 154
91727d11 155 -- * Joiners
1677dc12 156 , intercalate
157 , unlines
158 , unwords
9e9bb0ce 159
1677dc12 160 -- * Re-exports
91727d11 161 -- $reexports
1677dc12 162 , module Data.ByteString
163 , module Data.Text
164 , module Data.Profunctor
165 , module Data.Word
166 , module Pipes.Parse
7ed76745 167 , module Pipes.Group
7c9f2b8b 168 , module Pipes.Text.Internal
91727d11 169 ) where
170
171import Control.Exception (throwIO, try)
0f8c6f1b 172import Control.Applicative ((<*))
64e03122 173import Control.Monad (liftM, unless, join)
9e9bb0ce 174import Control.Monad.Trans.State.Strict (StateT(..), modify)
ca6f90a0 175import Data.Monoid ((<>))
91727d11 176import qualified Data.Text as T
177import qualified Data.Text.IO as T
31f41a5d 178import qualified Data.Text.Encoding as TE
63ea9ffd 179import qualified Data.Text.Encoding.Error as TE
91727d11 180import Data.Text (Text)
181import qualified Data.Text.Lazy as TL
182import qualified Data.Text.Lazy.IO as TL
183import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
184import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 185import Data.ByteString (ByteString)
186import qualified Data.ByteString as B
90189414 187import qualified Data.ByteString.Char8 as B8
cf10d6f1 188import Data.Char (ord, isSpace)
1677dc12 189import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 190import Data.Functor.Identity (Identity)
1677dc12 191import Data.Profunctor (Profunctor)
192import qualified Data.Profunctor
91727d11 193import qualified Data.List as List
194import Foreign.C.Error (Errno(Errno), ePIPE)
195import qualified GHC.IO.Exception as G
196import Pipes
5e3f5409 197import qualified Pipes.ByteString as PB
7c9f2b8b 198import qualified Pipes.Text.Internal as PI
199import Pipes.Text.Internal
91727d11 200import Pipes.Core (respond, Server')
7fc48f7c 201import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 202import qualified Pipes.Group as PG
91727d11 203import qualified Pipes.Parse as PP
7ed76745 204import Pipes.Parse (Parser)
91727d11 205import qualified Pipes.Safe.Prelude as Safe
206import qualified Pipes.Safe as Safe
207import Pipes.Safe (MonadSafe(..), Base(..))
208import qualified Pipes.Prelude as P
209import qualified System.IO as IO
210import Data.Char (isSpace)
63ea9ffd 211import Data.Word (Word8)
1677dc12 212
91727d11 213import Prelude hiding (
214 all,
215 any,
216 break,
217 concat,
218 concatMap,
219 drop,
220 dropWhile,
221 elem,
222 filter,
223 head,
224 last,
225 lines,
226 length,
227 map,
228 maximum,
229 minimum,
230 notElem,
231 null,
232 readFile,
233 span,
234 splitAt,
235 take,
236 takeWhile,
237 unlines,
238 unwords,
239 words,
240 writeFile )
241
242-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
243fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
244fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 245{-# INLINE fromLazy #-}
91727d11 246
62e8521c 247-- | Stream text from 'stdin'
ca6f90a0 248stdin :: MonadIO m => Producer Text m ()
91727d11 249stdin = fromHandle IO.stdin
ca6f90a0 250{-# INLINE stdin #-}
91727d11 251
31f41a5d 252{-| Convert a 'IO.Handle' into a text stream using a text size
ca6f90a0 253 determined by the good sense of the text library; note that this
254 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
255 but uses the system encoding and has other `Data.Text.IO` features
31f41a5d 256-}
257
ca6f90a0 258fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
259fromHandle h = go where
260 go = do txt <- liftIO (T.hGetChunk h)
4cbc92cc 261 unless (T.null txt) ( do yield txt
262 go )
91727d11 263{-# INLINABLE fromHandle#-}
ca6f90a0 264
265
266{-| Stream text from a file in the simple fashion of @Data.Text.IO@
6f6f9974 267
31f41a5d 268>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
269MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 270-}
271
ca6f90a0 272readFile :: MonadSafe m => FilePath -> Producer Text m ()
91727d11 273readFile file = Safe.withFile file IO.ReadMode fromHandle
ca6f90a0 274{-# INLINE readFile #-}
91727d11 275
91727d11 276
31f41a5d 277{-| Stream text to 'stdout'
91727d11 278
279 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
280
4cbc92cc 281 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
282 instead of @(source >-> stdout)@ .
91727d11 283-}
284stdout :: MonadIO m => Consumer' Text m ()
285stdout = go
286 where
287 go = do
288 txt <- await
289 x <- liftIO $ try (T.putStr txt)
290 case x of
291 Left (G.IOError { G.ioe_type = G.ResourceVanished
292 , G.ioe_errno = Just ioe })
293 | Errno ioe == ePIPE
294 -> return ()
295 Left e -> liftIO (throwIO e)
296 Right () -> go
297{-# INLINABLE stdout #-}
298
91727d11 299
31f41a5d 300{-| Convert a text stream into a 'Handle'
91727d11 301
31f41a5d 302 Note: again, for best performance, where possible use
303 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 304-}
305toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
306toHandle h = for cat (liftIO . T.hPutStr h)
307{-# INLINABLE toHandle #-}
308
d4732515 309{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 310 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 311 #-}
312
313
31f41a5d 314-- | Stream text into a file. Uses @pipes-safe@.
ca6f90a0 315writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
91727d11 316writeFile file = Safe.withFile file IO.WriteMode toHandle
ca6f90a0 317{-# INLINE writeFile #-}
91727d11 318
1677dc12 319
320type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
321
322type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
323
324(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
325a ^. lens = getConstant (lens Constant a)
326
327
91727d11 328-- | Apply a transformation to each 'Char' in the stream
329map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
330map f = P.map (T.map f)
331{-# INLINABLE map #-}
332
ff38b9f0 333{-# RULES "p >-> map f" forall p f .
334 p >-> map f = for p (\txt -> yield (T.map f txt))
335 #-}
336
31f41a5d 337-- | Map a function over the characters of a text stream and concatenate the results
91727d11 338concatMap
339 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
340concatMap f = P.map (T.concatMap f)
341{-# INLINABLE concatMap #-}
342
ff38b9f0 343{-# RULES "p >-> concatMap f" forall p f .
344 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
345 #-}
7faef8bc 346
347-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 348-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
349-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 350encodeUtf8 :: Monad m => Pipe Text ByteString m r
351encodeUtf8 = P.map TE.encodeUtf8
352{-# INLINEABLE encodeUtf8 #-}
353
ff38b9f0 354{-# RULES "p >-> encodeUtf8" forall p .
355 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
356 #-}
357
c0343bc9 358-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 359pack :: Monad m => Pipe String Text m r
360pack = P.map T.pack
361{-# INLINEABLE pack #-}
362
ff38b9f0 363{-# RULES "p >-> pack" forall p .
364 p >-> pack = for p (\txt -> yield (T.pack txt))
365 #-}
366
367-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 368unpack :: Monad m => Pipe Text String m r
d4732515 369unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 370{-# INLINEABLE unpack #-}
371
ff38b9f0 372{-# RULES "p >-> unpack" forall p .
373 p >-> unpack = for p (\txt -> yield (T.unpack txt))
374 #-}
d4732515 375
b0d86a59 376-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
377-- here acting as 'Text' pipes, rather as they would on a lazy text
7faef8bc 378toCaseFold :: Monad m => Pipe Text Text m ()
379toCaseFold = P.map T.toCaseFold
380{-# INLINEABLE toCaseFold #-}
381
ff38b9f0 382{-# RULES "p >-> toCaseFold" forall p .
383 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
384 #-}
385
386
c0343bc9 387-- | lowercase incoming 'Text'
7faef8bc 388toLower :: Monad m => Pipe Text Text m ()
389toLower = P.map T.toLower
390{-# INLINEABLE toLower #-}
391
ff38b9f0 392{-# RULES "p >-> toLower" forall p .
393 p >-> toLower = for p (\txt -> yield (T.toLower txt))
394 #-}
395
c0343bc9 396-- | uppercase incoming 'Text'
7faef8bc 397toUpper :: Monad m => Pipe Text Text m ()
398toUpper = P.map T.toUpper
399{-# INLINEABLE toUpper #-}
400
ff38b9f0 401{-# RULES "p >-> toUpper" forall p .
402 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
403 #-}
404
c0343bc9 405-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 406stripStart :: Monad m => Pipe Text Text m r
407stripStart = do
408 chunk <- await
409 let text = T.stripStart chunk
410 if T.null text
411 then stripStart
b0d86a59 412 else do yield text
413 cat
7faef8bc 414{-# INLINEABLE stripStart #-}
415
31f41a5d 416-- | @(take n)@ only allows @n@ individual characters to pass;
417-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 418take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
419take n0 = go n0 where
420 go n
421 | n <= 0 = return ()
422 | otherwise = do
31f41a5d 423 txt <- await
424 let len = fromIntegral (T.length txt)
91727d11 425 if (len > n)
31f41a5d 426 then yield (T.take (fromIntegral n) txt)
91727d11 427 else do
31f41a5d 428 yield txt
91727d11 429 go (n - len)
430{-# INLINABLE take #-}
431
31f41a5d 432-- | @(drop n)@ drops the first @n@ characters
91727d11 433drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
434drop n0 = go n0 where
435 go n
436 | n <= 0 = cat
437 | otherwise = do
31f41a5d 438 txt <- await
439 let len = fromIntegral (T.length txt)
91727d11 440 if (len >= n)
441 then do
31f41a5d 442 yield (T.drop (fromIntegral n) txt)
91727d11 443 cat
444 else go (n - len)
445{-# INLINABLE drop #-}
446
31f41a5d 447-- | Take characters until they fail the predicate
91727d11 448takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
449takeWhile predicate = go
450 where
451 go = do
31f41a5d 452 txt <- await
453 let (prefix, suffix) = T.span predicate txt
91727d11 454 if (T.null suffix)
455 then do
31f41a5d 456 yield txt
91727d11 457 go
458 else yield prefix
459{-# INLINABLE takeWhile #-}
460
31f41a5d 461-- | Drop characters until they fail the predicate
91727d11 462dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
463dropWhile predicate = go where
464 go = do
31f41a5d 465 txt <- await
466 case T.findIndex (not . predicate) txt of
91727d11 467 Nothing -> go
468 Just i -> do
31f41a5d 469 yield (T.drop i txt)
91727d11 470 cat
471{-# INLINABLE dropWhile #-}
472
473-- | Only allows 'Char's to pass if they satisfy the predicate
474filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
475filter predicate = P.map (T.filter predicate)
476{-# INLINABLE filter #-}
477
ff38b9f0 478{-# RULES "p >-> filter q" forall p q .
479 p >-> filter q = for p (\txt -> yield (T.filter q txt))
480 #-}
481
31f41a5d 482-- | Strict left scan over the characters
91727d11 483scan
484 :: (Monad m)
485 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
486scan step begin = go begin
487 where
31f41a5d 488 go c = do
489 txt <- await
490 let txt' = T.scanl step c txt
491 c' = T.last txt'
492 yield txt'
493 go c'
91727d11 494{-# INLINABLE scan #-}
495
496{-| Fold a pure 'Producer' of strict 'Text's into a lazy
497 'TL.Text'
498-}
499toLazy :: Producer Text Identity () -> TL.Text
500toLazy = TL.fromChunks . P.toList
501{-# INLINABLE toLazy #-}
502
503{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
504 'TL.Text'
505
506 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
507 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
508 immediately as they are generated instead of loading them all into memory.
509-}
510toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
511toLazyM = liftM TL.fromChunks . P.toListM
512{-# INLINABLE toLazyM #-}
513
31f41a5d 514-- | Reduce the text stream using a strict left fold over characters
64e03122 515foldChars
91727d11 516 :: Monad m
517 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 518foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 519{-# INLINABLE foldChars #-}
91727d11 520
521-- | Retrieve the first 'Char'
522head :: (Monad m) => Producer Text m () -> m (Maybe Char)
523head = go
524 where
525 go p = do
526 x <- nextChar p
527 case x of
528 Left _ -> return Nothing
31f41a5d 529 Right (c, _) -> return (Just c)
91727d11 530{-# INLINABLE head #-}
531
532-- | Retrieve the last 'Char'
533last :: (Monad m) => Producer Text m () -> m (Maybe Char)
534last = go Nothing
535 where
536 go r p = do
537 x <- next p
538 case x of
539 Left () -> return r
31f41a5d 540 Right (txt, p') ->
541 if (T.null txt)
91727d11 542 then go r p'
31f41a5d 543 else go (Just $ T.last txt) p'
91727d11 544{-# INLINABLE last #-}
545
546-- | Determine if the stream is empty
547null :: (Monad m) => Producer Text m () -> m Bool
548null = P.all T.null
549{-# INLINABLE null #-}
550
62e8521c 551-- | Count the number of characters in the stream
91727d11 552length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 553length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 554{-# INLINABLE length #-}
555
556-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
557any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
558any predicate = P.any (T.any predicate)
559{-# INLINABLE any #-}
560
561-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
562all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
563all predicate = P.all (T.all predicate)
564{-# INLINABLE all #-}
565
62e8521c 566-- | Return the maximum 'Char' within a text stream
91727d11 567maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
568maximum = P.fold step Nothing id
569 where
31f41a5d 570 step mc txt =
571 if (T.null txt)
572 then mc
573 else Just $ case mc of
574 Nothing -> T.maximum txt
575 Just c -> max c (T.maximum txt)
91727d11 576{-# INLINABLE maximum #-}
577
62e8521c 578-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 579minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
580minimum = P.fold step Nothing id
581 where
31f41a5d 582 step mc txt =
583 if (T.null txt)
584 then mc
585 else case mc of
586 Nothing -> Just (T.minimum txt)
587 Just c -> Just (min c (T.minimum txt))
91727d11 588{-# INLINABLE minimum #-}
589
1677dc12 590
91727d11 591-- | Find the first element in the stream that matches the predicate
592find
593 :: (Monad m)
594 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
595find predicate p = head (p >-> filter predicate)
596{-# INLINABLE find #-}
597
62e8521c 598-- | Index into a text stream
91727d11 599index
600 :: (Monad m, Integral a)
601 => a-> Producer Text m () -> m (Maybe Char)
602index n p = head (p >-> drop n)
603{-# INLINABLE index #-}
604
63ea9ffd 605
31f41a5d 606-- | Store a tally of how many segments match the given 'Text'
607count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
608count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
609{-# INLINABLE count #-}
610
9e9bb0ce 611
612{-| Consume the first character from a stream of 'Text'
613
614 'next' either fails with a 'Left' if the 'Producer' has no more characters or
615 succeeds with a 'Right' providing the next character and the remainder of the
616 'Producer'.
617-}
618nextChar
619 :: (Monad m)
620 => Producer Text m r
621 -> m (Either r (Char, Producer Text m r))
622nextChar = go
623 where
624 go p = do
625 x <- next p
626 case x of
627 Left r -> return (Left r)
628 Right (txt, p') -> case (T.uncons txt) of
629 Nothing -> go p'
630 Just (c, txt') -> return (Right (c, yield txt' >> p'))
631{-# INLINABLE nextChar #-}
632
633{-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
634 'Producer' is empty
635-}
636drawChar :: (Monad m) => Parser Text m (Maybe Char)
637drawChar = do
638 x <- PP.draw
639 case x of
640 Nothing -> return Nothing
641 Just txt -> case (T.uncons txt) of
642 Nothing -> drawChar
643 Just (c, txt') -> do
644 PP.unDraw txt'
645 return (Just c)
646{-# INLINABLE drawChar #-}
647
648-- | Push back a 'Char' onto the underlying 'Producer'
649unDrawChar :: (Monad m) => Char -> Parser Text m ()
650unDrawChar c = modify (yield (T.singleton c) >>)
651{-# INLINABLE unDrawChar #-}
652
653{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
654 push the 'Char' back
655
656> peekChar = do
657> x <- drawChar
658> case x of
659> Left _ -> return ()
660> Right c -> unDrawChar c
661> return x
662-}
663peekChar :: (Monad m) => Parser Text m (Maybe Char)
664peekChar = do
665 x <- drawChar
666 case x of
667 Nothing -> return ()
668 Just c -> unDrawChar c
669 return x
670{-# INLINABLE peekChar #-}
671
672{-| Check if the underlying 'Producer' has no more characters
673
674 Note that this will skip over empty 'Text' chunks, unlike
675 'PP.isEndOfInput' from @pipes-parse@, which would consider
676 an empty 'Text' a valid bit of input.
677
678> isEndOfChars = liftM isLeft peekChar
679-}
680isEndOfChars :: (Monad m) => Parser Text m Bool
681isEndOfChars = do
682 x <- peekChar
683 return (case x of
684 Nothing -> True
685 Just _-> False )
686{-# INLINABLE isEndOfChars #-}
687
688
167f8805 689{- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
690 stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
691 -}
ca6f90a0 692
9e9bb0ce 693decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
694 (Producer Text m (Producer ByteString m r))
695decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
7c9f2b8b 696 (k (go B.empty PI.streamDecodeUtf8 p0)) where
ca6f90a0 697 go !carry dec0 p = do
698 x <- lift (next p)
4cbc92cc 699 case x of Left r -> return (if B.null carry
700 then return r -- all bytestring input was consumed
701 else (do yield carry -- a potentially valid fragment remains
702 return r))
ca6f90a0 703
704 Right (chunk, p') -> case dec0 chunk of
7c9f2b8b 705 PI.Some text carry2 dec -> do yield text
ca6f90a0 706 go carry2 dec p'
7c9f2b8b 707 PI.Other text bs -> do yield text
ca6f90a0 708 return (do yield bs -- an invalid blob remains
709 p')
710{-# INLINABLE decodeUtf8 #-}
711
31f41a5d 712
713-- | Splits a 'Producer' after the given number of characters
91727d11 714splitAt
715 :: (Monad m, Integral n)
716 => n
9e9bb0ce 717 -> Lens' (Producer Text m r)
718 (Producer Text m (Producer Text m r))
719splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 720 where
721 go 0 p = return p
722 go n p = do
723 x <- lift (next p)
724 case x of
725 Left r -> return (return r)
31f41a5d 726 Right (txt, p') -> do
727 let len = fromIntegral (T.length txt)
91727d11 728 if (len <= n)
729 then do
31f41a5d 730 yield txt
91727d11 731 go (n - len) p'
732 else do
31f41a5d 733 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 734 yield prefix
735 return (yield suffix >> p')
736{-# INLINABLE splitAt #-}
737
91727d11 738
31f41a5d 739{-| Split a text stream in two, where the first text stream is the longest
740 consecutive group of text that satisfy the predicate
91727d11 741-}
742span
743 :: (Monad m)
744 => (Char -> Bool)
9e9bb0ce 745 -> Lens' (Producer Text m r)
746 (Producer Text m (Producer Text m r))
747span predicate k p0 = fmap join (k (go p0))
91727d11 748 where
749 go p = do
750 x <- lift (next p)
751 case x of
752 Left r -> return (return r)
31f41a5d 753 Right (txt, p') -> do
754 let (prefix, suffix) = T.span predicate txt
91727d11 755 if (T.null suffix)
756 then do
31f41a5d 757 yield txt
91727d11 758 go p'
759 else do
760 yield prefix
761 return (yield suffix >> p')
762{-# INLINABLE span #-}
763
62e8521c 764{-| Split a text stream in two, where the first text stream is the longest
765 consecutive group of characters that don't satisfy the predicate
91727d11 766-}
767break
768 :: (Monad m)
769 => (Char -> Bool)
9e9bb0ce 770 -> Lens' (Producer Text m r)
771 (Producer Text m (Producer Text m r))
91727d11 772break predicate = span (not . predicate)
773{-# INLINABLE break #-}
774
9e9bb0ce 775{-| Improper lens that splits after the first group of equivalent Chars, as
776 defined by the given equivalence relation
777-}
778groupBy
779 :: (Monad m)
780 => (Char -> Char -> Bool)
781 -> Lens' (Producer Text m r)
782 (Producer Text m (Producer Text m r))
783groupBy equals k p0 = fmap join (k ((go p0))) where
784 go p = do
785 x <- lift (next p)
786 case x of
787 Left r -> return (return r)
788 Right (txt, p') -> case T.uncons txt of
789 Nothing -> go p'
790 Just (c, _) -> (yield txt >> p') ^. span (equals c)
791{-# INLINABLE groupBy #-}
792
793-- | Improper lens that splits after the first succession of identical 'Char' s
794group :: Monad m
795 => Lens' (Producer Text m r)
796 (Producer Text m (Producer Text m r))
797group = groupBy (==)
798{-# INLINABLE group #-}
799
800{-| Improper lens that splits a 'Producer' after the first word
801
802 Unlike 'words', this does not drop leading whitespace
803-}
804word :: (Monad m)
805 => Lens' (Producer Text m r)
806 (Producer Text m (Producer Text m r))
807word k p0 = fmap join (k (to p0))
808 where
809 to p = do
810 p' <- p^.span isSpace
811 p'^.break isSpace
812{-# INLINABLE word #-}
813
814
815line :: (Monad m)
816 => Lens' (Producer Text m r)
817 (Producer Text m (Producer Text m r))
818line = break (== '\n')
819
820{-# INLINABLE line #-}
821
822
823-- | Intersperse a 'Char' in between the characters of stream of 'Text'
824intersperse
825 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
826intersperse c = go0
827 where
828 go0 p = do
829 x <- lift (next p)
830 case x of
831 Left r -> return r
832 Right (txt, p') -> do
833 yield (T.intersperse c txt)
834 go1 p'
835 go1 p = do
836 x <- lift (next p)
837 case x of
838 Left r -> return r
839 Right (txt, p') -> do
840 yield (T.singleton c)
841 yield (T.intersperse c txt)
842 go1 p'
843{-# INLINABLE intersperse #-}
844
845
846
847-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
848packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
849packChars = Data.Profunctor.dimap to (fmap from)
850 where
851 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 852 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 853
854 step diffAs c = diffAs . (c:)
855
856 done diffAs = T.pack (diffAs [])
857
858 -- from :: Monad m => Producer Text m x -> Producer Char m x
859 from p = for p (each . T.unpack)
860{-# INLINABLE packChars #-}
861
0f8c6f1b 862
863-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
864chunksOf
865 :: (Monad m, Integral n)
866 => n -> Lens' (Producer Text m r)
867 (FreeT (Producer Text m) m r)
868chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
869 where
870 go p = do
871 x <- next p
872 return $ case x of
7ed76745 873 Left r -> Pure r
874 Right (txt, p') -> Free $ do
0f8c6f1b 875 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 876 return $ FreeT (go p'')
0f8c6f1b 877{-# INLINABLE chunksOf #-}
878
879
62e8521c 880{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 881 predicate
882-}
1677dc12 883splitsWith
91727d11 884 :: (Monad m)
885 => (Char -> Bool)
886 -> Producer Text m r
7ed76745 887 -> FreeT (Producer Text m) m r
888splitsWith predicate p0 = FreeT (go0 p0)
91727d11 889 where
890 go0 p = do
891 x <- next p
892 case x of
7ed76745 893 Left r -> return (Pure r)
31f41a5d 894 Right (txt, p') ->
895 if (T.null txt)
91727d11 896 then go0 p'
7ed76745 897 else return $ Free $ do
9e9bb0ce 898 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 899 return $ FreeT (go1 p'')
91727d11 900 go1 p = do
901 x <- nextChar p
902 return $ case x of
7ed76745 903 Left r -> Pure r
904 Right (_, p') -> Free $ do
9e9bb0ce 905 p'' <- p' ^. span (not . predicate)
7ed76745 906 return $ FreeT (go1 p'')
1677dc12 907{-# INLINABLE splitsWith #-}
91727d11 908
31f41a5d 909-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 910splits :: (Monad m)
91727d11 911 => Char
0f8c6f1b 912 -> Lens' (Producer Text m r)
913 (FreeT (Producer Text m) m r)
914splits c k p =
7ed76745 915 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 916{-# INLINABLE splits #-}
917
918{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
919 given equivalence relation
920-}
921groupsBy
922 :: Monad m
923 => (Char -> Char -> Bool)
924 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
7ed76745 925groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 926 go p = do x <- next p
7ed76745 927 case x of Left r -> return (Pure r)
0f8c6f1b 928 Right (bs, p') -> case T.uncons bs of
929 Nothing -> go p'
7ed76745 930 Just (c, _) -> do return $ Free $ do
0f8c6f1b 931 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 932 return $ FreeT (go p'')
0f8c6f1b 933{-# INLINABLE groupsBy #-}
934
935
936-- | Like 'groupsBy', where the equality predicate is ('==')
937groups
938 :: Monad m
939 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
940groups = groupsBy (==)
941{-# INLINABLE groups #-}
942
91727d11 943
91727d11 944
62e8521c 945{-| Split a text stream into 'FreeT'-delimited lines
91727d11 946-}
947lines
0f8c6f1b 948 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
949lines = Data.Profunctor.dimap _lines (fmap _unlines)
91727d11 950 where
7ed76745 951 _lines p0 = FreeT (go0 p0)
0f8c6f1b 952 where
953 go0 p = do
954 x <- next p
955 case x of
7ed76745 956 Left r -> return (Pure r)
0f8c6f1b 957 Right (txt, p') ->
958 if (T.null txt)
959 then go0 p'
7ed76745 960 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 961 go1 p = do
962 p' <- p ^. break ('\n' ==)
7ed76745 963 return $ FreeT $ do
0f8c6f1b 964 x <- nextChar p'
965 case x of
7ed76745 966 Left r -> return $ Pure r
0f8c6f1b 967 Right (_, p'') -> go0 p''
968 -- _unlines
969 -- :: Monad m
970 -- => FreeT (Producer Text m) m x -> Producer Text m x
7fc48f7c 971 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
972
0f8c6f1b 973
91727d11 974{-# INLINABLE lines #-}
91727d11 975
31f41a5d 976
31f41a5d 977-- | Split a text stream into 'FreeT'-delimited words
91727d11 978words
0f8c6f1b 979 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
980words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 981 where
7ed76745 982 go p = FreeT $ do
cf10d6f1 983 x <- next (p >-> dropWhile isSpace)
984 return $ case x of
7ed76745 985 Left r -> Pure r
986 Right (bs, p') -> Free $ do
9e9bb0ce 987 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 988 return (go p'')
7ed76745 989 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 990
91727d11 991{-# INLINABLE words #-}
992
cf10d6f1 993
31f41a5d 994{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
995 interspersing a text stream in between them
91727d11 996-}
997intercalate
998 :: (Monad m)
999 => Producer Text m ()
1000 -> FreeT (Producer Text m) m r
1001 -> Producer Text m r
1002intercalate p0 = go0
1003 where
1004 go0 f = do
7ed76745 1005 x <- lift (runFreeT f)
91727d11 1006 case x of
7ed76745 1007 Pure r -> return r
1008 Free p -> do
91727d11 1009 f' <- p
1010 go1 f'
1011 go1 f = do
7ed76745 1012 x <- lift (runFreeT f)
91727d11 1013 case x of
7ed76745 1014 Pure r -> return r
1015 Free p -> do
91727d11 1016 p0
1017 f' <- p
1018 go1 f'
1019{-# INLINABLE intercalate #-}
1020
62e8521c 1021{-| Join 'FreeT'-delimited lines into a text stream
91727d11 1022-}
1023unlines
1024 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1025unlines = go
1026 where
1027 go f = do
7ed76745 1028 x <- lift (runFreeT f)
91727d11 1029 case x of
7ed76745 1030 Pure r -> return r
1031 Free p -> do
91727d11 1032 f' <- p
1033 yield $ T.singleton '\n'
1034 go f'
1035{-# INLINABLE unlines #-}
1036
31f41a5d 1037{-| Join 'FreeT'-delimited words into a text stream
91727d11 1038-}
1039unwords
1040 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 1041unwords = intercalate (yield $ T.singleton ' ')
91727d11 1042{-# INLINABLE unwords #-}
1043
1044{- $parse
31f41a5d 1045 The following parsing utilities are single-character analogs of the ones found
1046 @pipes-parse@.
91727d11 1047-}
1048
91727d11 1049{- $reexports
91727d11 1050
1051 @Data.Text@ re-exports the 'Text' type.
1052
0f8c6f1b 1053 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 1054-}
1055
167f8805 1056{- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
1057 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
1058 individual 'Codec' definitions follow the enumerator and conduit libraries.
1059
1060 Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
1061 to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
1062 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
1063 better implementation.
1064
1065 -}
90189414 1066codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1067codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1068 (k (decoder (dec B.empty) p0) ) where
7c9f2b8b 1069 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
90189414 1070 decoder !d p0 = case d of
7c9f2b8b 1071 PI.Other txt bad -> do yield txt
90189414 1072 return (do yield bad
1073 p0)
7c9f2b8b 1074 PI.Some txt extra dec -> do yield txt
90189414 1075 x <- lift (next p0)
1076 case x of Left r -> return (do yield extra
1077 return r)
1078 Right (chunk,p1) -> decoder (dec chunk) p1
1079
167f8805 1080{- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
1081 use the pipes 'Lens' style to work with them. Rather we simply define functions
1082 each way.
90189414 1083
167f8805 1084 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
1085 returning the rest of the 'Text' at the first non-ascii 'Char'
1086-}
90189414 1087encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1088encodeAscii = go where
1089 go p = do echunk <- lift (next p)
1090 case echunk of
1091 Left r -> return (return r)
1092 Right (chunk, p') ->
1093 if T.null chunk
1094 then go p'
1095 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1096 in do yield (B8.pack (T.unpack safe))
1097 if T.null unsafe
1098 then go p'
1099 else return $ do yield unsafe
1100 p'
167f8805 1101{- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
1102 returning the rest of the 'Text' upon hitting any non-latin 'Char'
1103 -}
90189414 1104encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1105encodeIso8859_1 = go where
1106 go p = do etxt <- lift (next p)
1107 case etxt of
1108 Left r -> return (return r)
1109 Right (txt, p') ->
1110 if T.null txt
1111 then go p'
1112 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1113 in do yield (B8.pack (T.unpack safe))
1114 if T.null unsafe
1115 then go p'
1116 else return $ do yield unsafe
1117 p'
1118
167f8805 1119{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1120 unused 'ByteString' upon hitting an un-ascii byte.
1121 -}
90189414 1122decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1123decodeAscii = go where
1124 go p = do echunk <- lift (next p)
1125 case echunk of
1126 Left r -> return (return r)
1127 Right (chunk, p') ->
1128 if B.null chunk
1129 then go p'
1130 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1131 in do yield (T.pack (B8.unpack safe))
1132 if B.null unsafe
1133 then go p'
1134 else return $ do yield unsafe
1135 p'
1136
167f8805 1137{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1138 unused 'ByteString' upon hitting the rare un-latinizable byte.
1139 -}
90189414 1140decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1141decodeIso8859_1 = go where
1142 go p = do echunk <- lift (next p)
1143 case echunk of
1144 Left r -> return (return r)
1145 Right (chunk, p') ->
1146 if B.null chunk
1147 then go p'
1148 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1149 in do yield (T.pack (B8.unpack safe))
1150 if B.null unsafe
1151 then go p'
1152 else return $ do yield unsafe
1153 p'
1154
1155
1156
167f8805 1157
90189414 1158