]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
Fixed `scan`
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
9667f797
GG
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
2
13a43263 3{-| This module provides @pipes@ utilities for \"text streams\", which are
64e03122 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
7 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 8
63ea9ffd 9 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 10 example, the following program copies a document from one file to another:
91727d11 11
12> import Pipes
31f41a5d 13> import qualified Data.Text.Pipes as Text
91727d11 14> import System.IO
15>
16> main =
17> withFile "inFile.txt" ReadMode $ \hIn ->
18> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 19> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
20
21To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 22
13a43263 23> import Pipes
31f41a5d 24> import qualified Data.Text.Pipes as Text
13a43263 25> import Pipes.Safe
26>
31f41a5d 27> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 28
91727d11 29 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
c8027236 30 and 'stdout' pipes, as with the following \"echo\" program:
91727d11 31
31f41a5d 32> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 33
c8027236 34 You can also translate pure lazy 'TL.Text's to and from pipes:
91727d11 35
31f41a5d 36> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 37
38 In addition, this module provides many functions equivalent to lazy
31f41a5d 39 'Text' functions so that you can transform or fold text streams. For
91727d11 40 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 41 might write:
91727d11 42
43> import Pipes
31f41a5d 44> import qualified Pipes.Text as Text
45> import qualified Pipes.Parse as Parse
91727d11 46>
31f41a5d 47> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 48> where
31f41a5d 49> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 50
31f41a5d 51 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 52 memory, no matter how long the lines are.
53
54 Note that functions in this library are designed to operate on streams that
31f41a5d 55 are insensitive to text boundaries. This means that they may freely split
64e03122 56 text into smaller texts, /discard empty texts/. However, apart from the
57 special case of 'concatMap', they will /never concatenate texts/ in order
58 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
91727d11 59-}
60
7faef8bc 61module Pipes.Text (
91727d11 62 -- * Producers
1677dc12 63 fromLazy
64 , stdin
65 , fromHandle
66 , readFile
91727d11 67
68 -- * Consumers
1677dc12 69 , stdout
1677dc12 70 , toHandle
71 , writeFile
91727d11 72
73 -- * Pipes
1677dc12 74 , map
75 , concatMap
76 , take
77 , drop
78 , takeWhile
79 , dropWhile
80 , filter
81 , scan
82 , encodeUtf8
83 , pack
84 , unpack
85 , toCaseFold
86 , toLower
87 , toUpper
88 , stripStart
91727d11 89
90 -- * Folds
1677dc12 91 , toLazy
92 , toLazyM
93 , foldChars
94 , head
95 , last
96 , null
97 , length
98 , any
99 , all
100 , maximum
101 , minimum
102 , find
103 , index
104 , count
105
106 -- * Primitive Character Parsers
107 -- $parse
108 , nextChar
109 , drawChar
110 , unDrawChar
111 , peekChar
9e9bb0ce 112 , isEndOfChars
1677dc12 113
114 -- * Parsing Lenses
9e9bb0ce 115 , splitAt
1677dc12 116 , span
117 , break
118 , groupBy
119 , group
9e9bb0ce 120 , word
121 , line
90189414 122
123 -- * Decoding Lenses
1677dc12 124 , decodeUtf8
90189414 125 , codec
126
127 -- * Codecs
128 , utf8
129 , utf16_le
130 , utf16_be
131 , utf32_le
132 , utf32_be
133
134 -- * Other Decoding/Encoding Functions
135 , decodeIso8859_1
136 , decodeAscii
137 , encodeIso8859_1
138 , encodeAscii
1677dc12 139
140 -- * FreeT Splitters
141 , chunksOf
142 , splitsWith
0f8c6f1b 143 , splits
1677dc12 144-- , groupsBy
145-- , groups
146 , lines
147 , words
148
91727d11 149 -- * Transformations
1677dc12 150 , intersperse
9e9bb0ce 151 , packChars
31f41a5d 152
91727d11 153 -- * Joiners
1677dc12 154 , intercalate
155 , unlines
156 , unwords
9e9bb0ce 157
1677dc12 158 -- * Re-exports
91727d11 159 -- $reexports
a008fca5 160 , Decoding(..)
161 , streamDecodeUtf8
162 , decodeSomeUtf8
163 , Codec(..)
164 , TextException(..)
1677dc12 165 , module Data.ByteString
166 , module Data.Text
167 , module Data.Profunctor
168 , module Data.Word
169 , module Pipes.Parse
7ed76745 170 , module Pipes.Group
91727d11 171 ) where
172
173import Control.Exception (throwIO, try)
0f8c6f1b 174import Control.Applicative ((<*))
64e03122 175import Control.Monad (liftM, unless, join)
9e9bb0ce 176import Control.Monad.Trans.State.Strict (StateT(..), modify)
ca6f90a0 177import Data.Monoid ((<>))
91727d11 178import qualified Data.Text as T
179import qualified Data.Text.IO as T
31f41a5d 180import qualified Data.Text.Encoding as TE
63ea9ffd 181import qualified Data.Text.Encoding.Error as TE
91727d11 182import Data.Text (Text)
183import qualified Data.Text.Lazy as TL
184import qualified Data.Text.Lazy.IO as TL
185import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
186import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 187import Data.ByteString (ByteString)
188import qualified Data.ByteString as B
90189414 189import qualified Data.ByteString.Char8 as B8
cf10d6f1 190import Data.Char (ord, isSpace)
1677dc12 191import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 192import Data.Functor.Identity (Identity)
1677dc12 193import Data.Profunctor (Profunctor)
194import qualified Data.Profunctor
91727d11 195import qualified Data.List as List
196import Foreign.C.Error (Errno(Errno), ePIPE)
197import qualified GHC.IO.Exception as G
198import Pipes
5e3f5409 199import qualified Pipes.ByteString as PB
7c9f2b8b 200import qualified Pipes.Text.Internal as PI
201import Pipes.Text.Internal
91727d11 202import Pipes.Core (respond, Server')
7fc48f7c 203import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 204import qualified Pipes.Group as PG
91727d11 205import qualified Pipes.Parse as PP
7ed76745 206import Pipes.Parse (Parser)
91727d11 207import qualified Pipes.Safe.Prelude as Safe
208import qualified Pipes.Safe as Safe
209import Pipes.Safe (MonadSafe(..), Base(..))
210import qualified Pipes.Prelude as P
211import qualified System.IO as IO
212import Data.Char (isSpace)
63ea9ffd 213import Data.Word (Word8)
1677dc12 214
91727d11 215import Prelude hiding (
216 all,
217 any,
218 break,
219 concat,
220 concatMap,
221 drop,
222 dropWhile,
223 elem,
224 filter,
225 head,
226 last,
227 lines,
228 length,
229 map,
230 maximum,
231 minimum,
232 notElem,
233 null,
234 readFile,
235 span,
236 splitAt,
237 take,
238 takeWhile,
239 unlines,
240 unwords,
241 words,
242 writeFile )
243
244-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
245fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
246fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 247{-# INLINE fromLazy #-}
91727d11 248
62e8521c 249-- | Stream text from 'stdin'
ca6f90a0 250stdin :: MonadIO m => Producer Text m ()
91727d11 251stdin = fromHandle IO.stdin
ca6f90a0 252{-# INLINE stdin #-}
91727d11 253
31f41a5d 254{-| Convert a 'IO.Handle' into a text stream using a text size
ca6f90a0 255 determined by the good sense of the text library; note that this
256 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
257 but uses the system encoding and has other `Data.Text.IO` features
31f41a5d 258-}
259
ca6f90a0 260fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
261fromHandle h = go where
262 go = do txt <- liftIO (T.hGetChunk h)
4cbc92cc 263 unless (T.null txt) ( do yield txt
264 go )
91727d11 265{-# INLINABLE fromHandle#-}
ca6f90a0 266
267
268{-| Stream text from a file in the simple fashion of @Data.Text.IO@
6f6f9974 269
31f41a5d 270>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
271MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 272-}
273
ca6f90a0 274readFile :: MonadSafe m => FilePath -> Producer Text m ()
91727d11 275readFile file = Safe.withFile file IO.ReadMode fromHandle
ca6f90a0 276{-# INLINE readFile #-}
91727d11 277
91727d11 278
31f41a5d 279{-| Stream text to 'stdout'
91727d11 280
281 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
282
4cbc92cc 283 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
284 instead of @(source >-> stdout)@ .
91727d11 285-}
286stdout :: MonadIO m => Consumer' Text m ()
287stdout = go
288 where
289 go = do
290 txt <- await
291 x <- liftIO $ try (T.putStr txt)
292 case x of
293 Left (G.IOError { G.ioe_type = G.ResourceVanished
294 , G.ioe_errno = Just ioe })
295 | Errno ioe == ePIPE
296 -> return ()
297 Left e -> liftIO (throwIO e)
298 Right () -> go
299{-# INLINABLE stdout #-}
300
91727d11 301
31f41a5d 302{-| Convert a text stream into a 'Handle'
91727d11 303
31f41a5d 304 Note: again, for best performance, where possible use
305 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 306-}
307toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
308toHandle h = for cat (liftIO . T.hPutStr h)
309{-# INLINABLE toHandle #-}
310
d4732515 311{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 312 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 313 #-}
314
315
31f41a5d 316-- | Stream text into a file. Uses @pipes-safe@.
ca6f90a0 317writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
91727d11 318writeFile file = Safe.withFile file IO.WriteMode toHandle
ca6f90a0 319{-# INLINE writeFile #-}
91727d11 320
1677dc12 321
322type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
323
324type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
325
326(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
327a ^. lens = getConstant (lens Constant a)
328
329
91727d11 330-- | Apply a transformation to each 'Char' in the stream
331map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
332map f = P.map (T.map f)
333{-# INLINABLE map #-}
334
ff38b9f0 335{-# RULES "p >-> map f" forall p f .
336 p >-> map f = for p (\txt -> yield (T.map f txt))
337 #-}
338
31f41a5d 339-- | Map a function over the characters of a text stream and concatenate the results
91727d11 340concatMap
341 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
342concatMap f = P.map (T.concatMap f)
343{-# INLINABLE concatMap #-}
344
ff38b9f0 345{-# RULES "p >-> concatMap f" forall p f .
346 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
347 #-}
7faef8bc 348
349-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 350-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
351-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 352encodeUtf8 :: Monad m => Pipe Text ByteString m r
353encodeUtf8 = P.map TE.encodeUtf8
354{-# INLINEABLE encodeUtf8 #-}
355
ff38b9f0 356{-# RULES "p >-> encodeUtf8" forall p .
357 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
358 #-}
359
c0343bc9 360-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 361pack :: Monad m => Pipe String Text m r
362pack = P.map T.pack
363{-# INLINEABLE pack #-}
364
ff38b9f0 365{-# RULES "p >-> pack" forall p .
366 p >-> pack = for p (\txt -> yield (T.pack txt))
367 #-}
368
369-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 370unpack :: Monad m => Pipe Text String m r
d4732515 371unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 372{-# INLINEABLE unpack #-}
373
ff38b9f0 374{-# RULES "p >-> unpack" forall p .
375 p >-> unpack = for p (\txt -> yield (T.unpack txt))
376 #-}
d4732515 377
b0d86a59 378-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
379-- here acting as 'Text' pipes, rather as they would on a lazy text
7faef8bc 380toCaseFold :: Monad m => Pipe Text Text m ()
381toCaseFold = P.map T.toCaseFold
382{-# INLINEABLE toCaseFold #-}
383
ff38b9f0 384{-# RULES "p >-> toCaseFold" forall p .
385 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
386 #-}
387
388
c0343bc9 389-- | lowercase incoming 'Text'
7faef8bc 390toLower :: Monad m => Pipe Text Text m ()
391toLower = P.map T.toLower
392{-# INLINEABLE toLower #-}
393
ff38b9f0 394{-# RULES "p >-> toLower" forall p .
395 p >-> toLower = for p (\txt -> yield (T.toLower txt))
396 #-}
397
c0343bc9 398-- | uppercase incoming 'Text'
7faef8bc 399toUpper :: Monad m => Pipe Text Text m ()
400toUpper = P.map T.toUpper
401{-# INLINEABLE toUpper #-}
402
ff38b9f0 403{-# RULES "p >-> toUpper" forall p .
404 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
405 #-}
406
c0343bc9 407-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 408stripStart :: Monad m => Pipe Text Text m r
409stripStart = do
410 chunk <- await
411 let text = T.stripStart chunk
412 if T.null text
413 then stripStart
b0d86a59 414 else do yield text
415 cat
7faef8bc 416{-# INLINEABLE stripStart #-}
417
31f41a5d 418-- | @(take n)@ only allows @n@ individual characters to pass;
419-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 420take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
421take n0 = go n0 where
422 go n
423 | n <= 0 = return ()
424 | otherwise = do
31f41a5d 425 txt <- await
426 let len = fromIntegral (T.length txt)
91727d11 427 if (len > n)
31f41a5d 428 then yield (T.take (fromIntegral n) txt)
91727d11 429 else do
31f41a5d 430 yield txt
91727d11 431 go (n - len)
432{-# INLINABLE take #-}
433
31f41a5d 434-- | @(drop n)@ drops the first @n@ characters
91727d11 435drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
436drop n0 = go n0 where
437 go n
438 | n <= 0 = cat
439 | otherwise = do
31f41a5d 440 txt <- await
441 let len = fromIntegral (T.length txt)
91727d11 442 if (len >= n)
443 then do
31f41a5d 444 yield (T.drop (fromIntegral n) txt)
91727d11 445 cat
446 else go (n - len)
447{-# INLINABLE drop #-}
448
31f41a5d 449-- | Take characters until they fail the predicate
91727d11 450takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
451takeWhile predicate = go
452 where
453 go = do
31f41a5d 454 txt <- await
455 let (prefix, suffix) = T.span predicate txt
91727d11 456 if (T.null suffix)
457 then do
31f41a5d 458 yield txt
91727d11 459 go
460 else yield prefix
461{-# INLINABLE takeWhile #-}
462
31f41a5d 463-- | Drop characters until they fail the predicate
91727d11 464dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
465dropWhile predicate = go where
466 go = do
31f41a5d 467 txt <- await
468 case T.findIndex (not . predicate) txt of
91727d11 469 Nothing -> go
470 Just i -> do
31f41a5d 471 yield (T.drop i txt)
91727d11 472 cat
473{-# INLINABLE dropWhile #-}
474
475-- | Only allows 'Char's to pass if they satisfy the predicate
476filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
477filter predicate = P.map (T.filter predicate)
478{-# INLINABLE filter #-}
479
ff38b9f0 480{-# RULES "p >-> filter q" forall p q .
481 p >-> filter q = for p (\txt -> yield (T.filter q txt))
482 #-}
483
31f41a5d 484-- | Strict left scan over the characters
91727d11 485scan
486 :: (Monad m)
487 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
11645cdc
GG
488scan step begin = do
489 yield (T.singleton begin)
490 go begin
91727d11 491 where
31f41a5d 492 go c = do
493 txt <- await
494 let txt' = T.scanl step c txt
495 c' = T.last txt'
11645cdc 496 yield (T.tail txt')
31f41a5d 497 go c'
91727d11 498{-# INLINABLE scan #-}
499
500{-| Fold a pure 'Producer' of strict 'Text's into a lazy
501 'TL.Text'
502-}
503toLazy :: Producer Text Identity () -> TL.Text
504toLazy = TL.fromChunks . P.toList
505{-# INLINABLE toLazy #-}
506
507{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
508 'TL.Text'
509
510 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
511 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
512 immediately as they are generated instead of loading them all into memory.
513-}
514toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
515toLazyM = liftM TL.fromChunks . P.toListM
516{-# INLINABLE toLazyM #-}
517
31f41a5d 518-- | Reduce the text stream using a strict left fold over characters
64e03122 519foldChars
91727d11 520 :: Monad m
521 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 522foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 523{-# INLINABLE foldChars #-}
91727d11 524
525-- | Retrieve the first 'Char'
526head :: (Monad m) => Producer Text m () -> m (Maybe Char)
527head = go
528 where
529 go p = do
530 x <- nextChar p
531 case x of
532 Left _ -> return Nothing
31f41a5d 533 Right (c, _) -> return (Just c)
91727d11 534{-# INLINABLE head #-}
535
536-- | Retrieve the last 'Char'
537last :: (Monad m) => Producer Text m () -> m (Maybe Char)
538last = go Nothing
539 where
540 go r p = do
541 x <- next p
542 case x of
543 Left () -> return r
31f41a5d 544 Right (txt, p') ->
545 if (T.null txt)
91727d11 546 then go r p'
31f41a5d 547 else go (Just $ T.last txt) p'
91727d11 548{-# INLINABLE last #-}
549
550-- | Determine if the stream is empty
551null :: (Monad m) => Producer Text m () -> m Bool
552null = P.all T.null
553{-# INLINABLE null #-}
554
62e8521c 555-- | Count the number of characters in the stream
91727d11 556length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 557length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 558{-# INLINABLE length #-}
559
560-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
561any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
562any predicate = P.any (T.any predicate)
563{-# INLINABLE any #-}
564
565-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
566all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
567all predicate = P.all (T.all predicate)
568{-# INLINABLE all #-}
569
62e8521c 570-- | Return the maximum 'Char' within a text stream
91727d11 571maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
572maximum = P.fold step Nothing id
573 where
31f41a5d 574 step mc txt =
575 if (T.null txt)
576 then mc
577 else Just $ case mc of
578 Nothing -> T.maximum txt
579 Just c -> max c (T.maximum txt)
91727d11 580{-# INLINABLE maximum #-}
581
62e8521c 582-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 583minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
584minimum = P.fold step Nothing id
585 where
31f41a5d 586 step mc txt =
587 if (T.null txt)
588 then mc
589 else case mc of
590 Nothing -> Just (T.minimum txt)
591 Just c -> Just (min c (T.minimum txt))
91727d11 592{-# INLINABLE minimum #-}
593
1677dc12 594
91727d11 595-- | Find the first element in the stream that matches the predicate
596find
597 :: (Monad m)
598 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
599find predicate p = head (p >-> filter predicate)
600{-# INLINABLE find #-}
601
62e8521c 602-- | Index into a text stream
91727d11 603index
604 :: (Monad m, Integral a)
605 => a-> Producer Text m () -> m (Maybe Char)
606index n p = head (p >-> drop n)
607{-# INLINABLE index #-}
608
63ea9ffd 609
31f41a5d 610-- | Store a tally of how many segments match the given 'Text'
611count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
612count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
613{-# INLINABLE count #-}
614
9e9bb0ce 615
616{-| Consume the first character from a stream of 'Text'
617
618 'next' either fails with a 'Left' if the 'Producer' has no more characters or
619 succeeds with a 'Right' providing the next character and the remainder of the
620 'Producer'.
621-}
622nextChar
623 :: (Monad m)
624 => Producer Text m r
625 -> m (Either r (Char, Producer Text m r))
626nextChar = go
627 where
628 go p = do
629 x <- next p
630 case x of
631 Left r -> return (Left r)
632 Right (txt, p') -> case (T.uncons txt) of
633 Nothing -> go p'
634 Just (c, txt') -> return (Right (c, yield txt' >> p'))
635{-# INLINABLE nextChar #-}
636
637{-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
638 'Producer' is empty
639-}
640drawChar :: (Monad m) => Parser Text m (Maybe Char)
641drawChar = do
642 x <- PP.draw
643 case x of
644 Nothing -> return Nothing
645 Just txt -> case (T.uncons txt) of
646 Nothing -> drawChar
647 Just (c, txt') -> do
648 PP.unDraw txt'
649 return (Just c)
650{-# INLINABLE drawChar #-}
651
652-- | Push back a 'Char' onto the underlying 'Producer'
653unDrawChar :: (Monad m) => Char -> Parser Text m ()
654unDrawChar c = modify (yield (T.singleton c) >>)
655{-# INLINABLE unDrawChar #-}
656
657{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
658 push the 'Char' back
659
660> peekChar = do
661> x <- drawChar
662> case x of
663> Left _ -> return ()
664> Right c -> unDrawChar c
665> return x
666-}
667peekChar :: (Monad m) => Parser Text m (Maybe Char)
668peekChar = do
669 x <- drawChar
670 case x of
671 Nothing -> return ()
672 Just c -> unDrawChar c
673 return x
674{-# INLINABLE peekChar #-}
675
676{-| Check if the underlying 'Producer' has no more characters
677
678 Note that this will skip over empty 'Text' chunks, unlike
679 'PP.isEndOfInput' from @pipes-parse@, which would consider
680 an empty 'Text' a valid bit of input.
681
682> isEndOfChars = liftM isLeft peekChar
683-}
684isEndOfChars :: (Monad m) => Parser Text m Bool
685isEndOfChars = do
686 x <- peekChar
687 return (case x of
688 Nothing -> True
689 Just _-> False )
690{-# INLINABLE isEndOfChars #-}
691
692
167f8805 693{- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
694 stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
695 -}
ca6f90a0 696
9e9bb0ce 697decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
698 (Producer Text m (Producer ByteString m r))
699decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
7c9f2b8b 700 (k (go B.empty PI.streamDecodeUtf8 p0)) where
ca6f90a0 701 go !carry dec0 p = do
702 x <- lift (next p)
4cbc92cc 703 case x of Left r -> return (if B.null carry
704 then return r -- all bytestring input was consumed
705 else (do yield carry -- a potentially valid fragment remains
706 return r))
ca6f90a0 707
708 Right (chunk, p') -> case dec0 chunk of
7c9f2b8b 709 PI.Some text carry2 dec -> do yield text
ca6f90a0 710 go carry2 dec p'
7c9f2b8b 711 PI.Other text bs -> do yield text
ca6f90a0 712 return (do yield bs -- an invalid blob remains
713 p')
714{-# INLINABLE decodeUtf8 #-}
715
31f41a5d 716
717-- | Splits a 'Producer' after the given number of characters
91727d11 718splitAt
719 :: (Monad m, Integral n)
720 => n
9e9bb0ce 721 -> Lens' (Producer Text m r)
722 (Producer Text m (Producer Text m r))
723splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 724 where
725 go 0 p = return p
726 go n p = do
727 x <- lift (next p)
728 case x of
729 Left r -> return (return r)
31f41a5d 730 Right (txt, p') -> do
731 let len = fromIntegral (T.length txt)
91727d11 732 if (len <= n)
733 then do
31f41a5d 734 yield txt
91727d11 735 go (n - len) p'
736 else do
31f41a5d 737 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 738 yield prefix
739 return (yield suffix >> p')
740{-# INLINABLE splitAt #-}
741
91727d11 742
31f41a5d 743{-| Split a text stream in two, where the first text stream is the longest
744 consecutive group of text that satisfy the predicate
91727d11 745-}
746span
747 :: (Monad m)
748 => (Char -> Bool)
9e9bb0ce 749 -> Lens' (Producer Text m r)
750 (Producer Text m (Producer Text m r))
751span predicate k p0 = fmap join (k (go p0))
91727d11 752 where
753 go p = do
754 x <- lift (next p)
755 case x of
756 Left r -> return (return r)
31f41a5d 757 Right (txt, p') -> do
758 let (prefix, suffix) = T.span predicate txt
91727d11 759 if (T.null suffix)
760 then do
31f41a5d 761 yield txt
91727d11 762 go p'
763 else do
764 yield prefix
765 return (yield suffix >> p')
766{-# INLINABLE span #-}
767
62e8521c 768{-| Split a text stream in two, where the first text stream is the longest
769 consecutive group of characters that don't satisfy the predicate
91727d11 770-}
771break
772 :: (Monad m)
773 => (Char -> Bool)
9e9bb0ce 774 -> Lens' (Producer Text m r)
775 (Producer Text m (Producer Text m r))
91727d11 776break predicate = span (not . predicate)
777{-# INLINABLE break #-}
778
9e9bb0ce 779{-| Improper lens that splits after the first group of equivalent Chars, as
780 defined by the given equivalence relation
781-}
782groupBy
783 :: (Monad m)
784 => (Char -> Char -> Bool)
785 -> Lens' (Producer Text m r)
786 (Producer Text m (Producer Text m r))
787groupBy equals k p0 = fmap join (k ((go p0))) where
788 go p = do
789 x <- lift (next p)
790 case x of
791 Left r -> return (return r)
792 Right (txt, p') -> case T.uncons txt of
793 Nothing -> go p'
794 Just (c, _) -> (yield txt >> p') ^. span (equals c)
795{-# INLINABLE groupBy #-}
796
797-- | Improper lens that splits after the first succession of identical 'Char' s
798group :: Monad m
799 => Lens' (Producer Text m r)
800 (Producer Text m (Producer Text m r))
801group = groupBy (==)
802{-# INLINABLE group #-}
803
804{-| Improper lens that splits a 'Producer' after the first word
805
806 Unlike 'words', this does not drop leading whitespace
807-}
808word :: (Monad m)
809 => Lens' (Producer Text m r)
810 (Producer Text m (Producer Text m r))
811word k p0 = fmap join (k (to p0))
812 where
813 to p = do
814 p' <- p^.span isSpace
815 p'^.break isSpace
816{-# INLINABLE word #-}
817
818
819line :: (Monad m)
820 => Lens' (Producer Text m r)
821 (Producer Text m (Producer Text m r))
822line = break (== '\n')
823
824{-# INLINABLE line #-}
825
826
827-- | Intersperse a 'Char' in between the characters of stream of 'Text'
828intersperse
829 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
830intersperse c = go0
831 where
832 go0 p = do
833 x <- lift (next p)
834 case x of
835 Left r -> return r
836 Right (txt, p') -> do
837 yield (T.intersperse c txt)
838 go1 p'
839 go1 p = do
840 x <- lift (next p)
841 case x of
842 Left r -> return r
843 Right (txt, p') -> do
844 yield (T.singleton c)
845 yield (T.intersperse c txt)
846 go1 p'
847{-# INLINABLE intersperse #-}
848
849
850
851-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
852packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
853packChars = Data.Profunctor.dimap to (fmap from)
854 where
855 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 856 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 857
858 step diffAs c = diffAs . (c:)
859
860 done diffAs = T.pack (diffAs [])
861
862 -- from :: Monad m => Producer Text m x -> Producer Char m x
863 from p = for p (each . T.unpack)
864{-# INLINABLE packChars #-}
865
0f8c6f1b 866
867-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
868chunksOf
869 :: (Monad m, Integral n)
870 => n -> Lens' (Producer Text m r)
871 (FreeT (Producer Text m) m r)
872chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
873 where
874 go p = do
875 x <- next p
876 return $ case x of
7ed76745 877 Left r -> Pure r
878 Right (txt, p') -> Free $ do
0f8c6f1b 879 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 880 return $ FreeT (go p'')
0f8c6f1b 881{-# INLINABLE chunksOf #-}
882
883
62e8521c 884{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 885 predicate
886-}
1677dc12 887splitsWith
91727d11 888 :: (Monad m)
889 => (Char -> Bool)
890 -> Producer Text m r
7ed76745 891 -> FreeT (Producer Text m) m r
892splitsWith predicate p0 = FreeT (go0 p0)
91727d11 893 where
894 go0 p = do
895 x <- next p
896 case x of
7ed76745 897 Left r -> return (Pure r)
31f41a5d 898 Right (txt, p') ->
899 if (T.null txt)
91727d11 900 then go0 p'
7ed76745 901 else return $ Free $ do
9e9bb0ce 902 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 903 return $ FreeT (go1 p'')
91727d11 904 go1 p = do
905 x <- nextChar p
906 return $ case x of
7ed76745 907 Left r -> Pure r
908 Right (_, p') -> Free $ do
9e9bb0ce 909 p'' <- p' ^. span (not . predicate)
7ed76745 910 return $ FreeT (go1 p'')
1677dc12 911{-# INLINABLE splitsWith #-}
91727d11 912
31f41a5d 913-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 914splits :: (Monad m)
91727d11 915 => Char
0f8c6f1b 916 -> Lens' (Producer Text m r)
917 (FreeT (Producer Text m) m r)
918splits c k p =
7ed76745 919 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 920{-# INLINABLE splits #-}
921
922{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
923 given equivalence relation
924-}
925groupsBy
926 :: Monad m
927 => (Char -> Char -> Bool)
928 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
7ed76745 929groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 930 go p = do x <- next p
7ed76745 931 case x of Left r -> return (Pure r)
0f8c6f1b 932 Right (bs, p') -> case T.uncons bs of
933 Nothing -> go p'
7ed76745 934 Just (c, _) -> do return $ Free $ do
0f8c6f1b 935 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 936 return $ FreeT (go p'')
0f8c6f1b 937{-# INLINABLE groupsBy #-}
938
939
940-- | Like 'groupsBy', where the equality predicate is ('==')
941groups
942 :: Monad m
943 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
944groups = groupsBy (==)
945{-# INLINABLE groups #-}
946
91727d11 947
91727d11 948
62e8521c 949{-| Split a text stream into 'FreeT'-delimited lines
91727d11 950-}
951lines
0f8c6f1b 952 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
953lines = Data.Profunctor.dimap _lines (fmap _unlines)
91727d11 954 where
7ed76745 955 _lines p0 = FreeT (go0 p0)
0f8c6f1b 956 where
957 go0 p = do
958 x <- next p
959 case x of
7ed76745 960 Left r -> return (Pure r)
0f8c6f1b 961 Right (txt, p') ->
962 if (T.null txt)
963 then go0 p'
7ed76745 964 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 965 go1 p = do
966 p' <- p ^. break ('\n' ==)
7ed76745 967 return $ FreeT $ do
0f8c6f1b 968 x <- nextChar p'
969 case x of
7ed76745 970 Left r -> return $ Pure r
0f8c6f1b 971 Right (_, p'') -> go0 p''
972 -- _unlines
973 -- :: Monad m
974 -- => FreeT (Producer Text m) m x -> Producer Text m x
7fc48f7c 975 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
976
0f8c6f1b 977
91727d11 978{-# INLINABLE lines #-}
91727d11 979
31f41a5d 980
31f41a5d 981-- | Split a text stream into 'FreeT'-delimited words
91727d11 982words
0f8c6f1b 983 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
984words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 985 where
7ed76745 986 go p = FreeT $ do
cf10d6f1 987 x <- next (p >-> dropWhile isSpace)
988 return $ case x of
7ed76745 989 Left r -> Pure r
990 Right (bs, p') -> Free $ do
9e9bb0ce 991 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 992 return (go p'')
7ed76745 993 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 994
91727d11 995{-# INLINABLE words #-}
996
cf10d6f1 997
31f41a5d 998{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
999 interspersing a text stream in between them
91727d11 1000-}
1001intercalate
1002 :: (Monad m)
1003 => Producer Text m ()
1004 -> FreeT (Producer Text m) m r
1005 -> Producer Text m r
1006intercalate p0 = go0
1007 where
1008 go0 f = do
7ed76745 1009 x <- lift (runFreeT f)
91727d11 1010 case x of
7ed76745 1011 Pure r -> return r
1012 Free p -> do
91727d11 1013 f' <- p
1014 go1 f'
1015 go1 f = do
7ed76745 1016 x <- lift (runFreeT f)
91727d11 1017 case x of
7ed76745 1018 Pure r -> return r
1019 Free p -> do
91727d11 1020 p0
1021 f' <- p
1022 go1 f'
1023{-# INLINABLE intercalate #-}
1024
62e8521c 1025{-| Join 'FreeT'-delimited lines into a text stream
91727d11 1026-}
1027unlines
1028 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1029unlines = go
1030 where
1031 go f = do
7ed76745 1032 x <- lift (runFreeT f)
91727d11 1033 case x of
7ed76745 1034 Pure r -> return r
1035 Free p -> do
91727d11 1036 f' <- p
1037 yield $ T.singleton '\n'
1038 go f'
1039{-# INLINABLE unlines #-}
1040
31f41a5d 1041{-| Join 'FreeT'-delimited words into a text stream
91727d11 1042-}
1043unwords
1044 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 1045unwords = intercalate (yield $ T.singleton ' ')
91727d11 1046{-# INLINABLE unwords #-}
1047
1048{- $parse
31f41a5d 1049 The following parsing utilities are single-character analogs of the ones found
1050 @pipes-parse@.
91727d11 1051-}
1052
91727d11 1053{- $reexports
91727d11 1054
1055 @Data.Text@ re-exports the 'Text' type.
1056
0f8c6f1b 1057 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 1058-}
1059
167f8805 1060{- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
1061 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
1062 individual 'Codec' definitions follow the enumerator and conduit libraries.
1063
1064 Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
1065 to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
1066 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
1067 better implementation.
1068
1069 -}
90189414 1070codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1071codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1072 (k (decoder (dec B.empty) p0) ) where
7c9f2b8b 1073 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
90189414 1074 decoder !d p0 = case d of
7c9f2b8b 1075 PI.Other txt bad -> do yield txt
90189414 1076 return (do yield bad
1077 p0)
7c9f2b8b 1078 PI.Some txt extra dec -> do yield txt
90189414 1079 x <- lift (next p0)
1080 case x of Left r -> return (do yield extra
1081 return r)
1082 Right (chunk,p1) -> decoder (dec chunk) p1
1083
167f8805 1084{- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
1085 use the pipes 'Lens' style to work with them. Rather we simply define functions
1086 each way.
90189414 1087
167f8805 1088 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
1089 returning the rest of the 'Text' at the first non-ascii 'Char'
1090-}
90189414 1091encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1092encodeAscii = go where
1093 go p = do echunk <- lift (next p)
1094 case echunk of
1095 Left r -> return (return r)
1096 Right (chunk, p') ->
1097 if T.null chunk
1098 then go p'
1099 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1100 in do yield (B8.pack (T.unpack safe))
1101 if T.null unsafe
1102 then go p'
1103 else return $ do yield unsafe
1104 p'
167f8805 1105{- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
1106 returning the rest of the 'Text' upon hitting any non-latin 'Char'
1107 -}
90189414 1108encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1109encodeIso8859_1 = go where
1110 go p = do etxt <- lift (next p)
1111 case etxt of
1112 Left r -> return (return r)
1113 Right (txt, p') ->
1114 if T.null txt
1115 then go p'
1116 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1117 in do yield (B8.pack (T.unpack safe))
1118 if T.null unsafe
1119 then go p'
1120 else return $ do yield unsafe
1121 p'
1122
167f8805 1123{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1124 unused 'ByteString' upon hitting an un-ascii byte.
1125 -}
90189414 1126decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1127decodeAscii = go where
1128 go p = do echunk <- lift (next p)
1129 case echunk of
1130 Left r -> return (return r)
1131 Right (chunk, p') ->
1132 if B.null chunk
1133 then go p'
1134 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1135 in do yield (T.pack (B8.unpack safe))
1136 if B.null unsafe
1137 then go p'
1138 else return $ do yield unsafe
1139 p'
1140
167f8805 1141{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1142 unused 'ByteString' upon hitting the rare un-latinizable byte.
1143 -}
90189414 1144decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1145decodeIso8859_1 = go where
1146 go p = do echunk <- lift (next p)
1147 case echunk of
1148 Left r -> return (return r)
1149 Right (chunk, p') ->
1150 if B.null chunk
1151 then go p'
1152 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1153 in do yield (T.pack (B8.unpack safe))
1154 if B.null unsafe
1155 then go p'
1156 else return $ do yield unsafe
1157 p'
1158
1159
1160
167f8805 1161
c8027236 1162