]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - Pipes/Text.hs
verbose project description added.
[github/fretlink/text-pipes.git] / Pipes / Text.hs
CommitLineData
1677dc12 1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
64e03122 2#if __GLASGOW_HASKELL__ >= 702
3{-# LANGUAGE Trustworthy #-}
4#endif
13a43263 5{-| This module provides @pipes@ utilities for \"text streams\", which are
64e03122 6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
91727d11 10
63ea9ffd 11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
31f41a5d 12 example, the following program copies a document from one file to another:
91727d11 13
14> import Pipes
31f41a5d 15> import qualified Data.Text.Pipes as Text
91727d11 16> import System.IO
17>
18> main =
19> withFile "inFile.txt" ReadMode $ \hIn ->
20> withFile "outFile.txt" WriteMode $ \hOut ->
31f41a5d 21> runEffect $ Text.fromHandle hIn >-> Text.toHandle hOut
22
23To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
91727d11 24
13a43263 25> import Pipes
31f41a5d 26> import qualified Data.Text.Pipes as Text
13a43263 27> import Pipes.Safe
28>
31f41a5d 29> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
13a43263 30
91727d11 31 You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
31f41a5d 32 and 'stdout' proxies, as with the following \"echo\" program:
91727d11 33
31f41a5d 34> main = runEffect $ Text.stdin >-> Text.stdout
91727d11 35
36 You can also translate pure lazy 'TL.Text's to and from proxies:
37
31f41a5d 38> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
91727d11 39
40 In addition, this module provides many functions equivalent to lazy
31f41a5d 41 'Text' functions so that you can transform or fold text streams. For
91727d11 42 example, to stream only the first three lines of 'stdin' to 'stdout' you
31f41a5d 43 might write:
91727d11 44
45> import Pipes
31f41a5d 46> import qualified Pipes.Text as Text
47> import qualified Pipes.Parse as Parse
91727d11 48>
31f41a5d 49> main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout
91727d11 50> where
31f41a5d 51> takeLines n = Text.unlines . Parse.takeFree n . Text.lines
91727d11 52
31f41a5d 53 The above program will never bring more than one chunk of text (~ 32 KB) into
91727d11 54 memory, no matter how long the lines are.
55
56 Note that functions in this library are designed to operate on streams that
31f41a5d 57 are insensitive to text boundaries. This means that they may freely split
64e03122 58 text into smaller texts, /discard empty texts/. However, apart from the
59 special case of 'concatMap', they will /never concatenate texts/ in order
60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
91727d11 61-}
62
7faef8bc 63module Pipes.Text (
91727d11 64 -- * Producers
1677dc12 65 fromLazy
66 , stdin
67 , fromHandle
68 , readFile
69 , stdinLn
91727d11 70
71 -- * Consumers
1677dc12 72 , stdout
73 , stdoutLn
74 , toHandle
75 , writeFile
91727d11 76
77 -- * Pipes
1677dc12 78 , map
79 , concatMap
80 , take
81 , drop
82 , takeWhile
83 , dropWhile
84 , filter
85 , scan
86 , encodeUtf8
87 , pack
88 , unpack
89 , toCaseFold
90 , toLower
91 , toUpper
92 , stripStart
91727d11 93
94 -- * Folds
1677dc12 95 , toLazy
96 , toLazyM
97 , foldChars
98 , head
99 , last
100 , null
101 , length
102 , any
103 , all
104 , maximum
105 , minimum
106 , find
107 , index
108 , count
109
110 -- * Primitive Character Parsers
111 -- $parse
112 , nextChar
113 , drawChar
114 , unDrawChar
115 , peekChar
9e9bb0ce 116 , isEndOfChars
1677dc12 117
118 -- * Parsing Lenses
9e9bb0ce 119 , splitAt
1677dc12 120 , span
121 , break
122 , groupBy
123 , group
9e9bb0ce 124 , word
125 , line
90189414 126
127 -- * Decoding Lenses
1677dc12 128 , decodeUtf8
90189414 129 , codec
130
131 -- * Codecs
132 , utf8
133 , utf16_le
134 , utf16_be
135 , utf32_le
136 , utf32_be
137
138 -- * Other Decoding/Encoding Functions
139 , decodeIso8859_1
140 , decodeAscii
141 , encodeIso8859_1
142 , encodeAscii
1677dc12 143
144 -- * FreeT Splitters
145 , chunksOf
146 , splitsWith
0f8c6f1b 147 , splits
1677dc12 148-- , groupsBy
149-- , groups
150 , lines
151 , words
152
91727d11 153 -- * Transformations
1677dc12 154 , intersperse
9e9bb0ce 155 , packChars
31f41a5d 156
91727d11 157 -- * Joiners
1677dc12 158 , intercalate
159 , unlines
160 , unwords
9e9bb0ce 161
1677dc12 162 -- * Re-exports
91727d11 163 -- $reexports
1677dc12 164 , module Data.ByteString
165 , module Data.Text
166 , module Data.Profunctor
167 , module Data.Word
168 , module Pipes.Parse
7ed76745 169 , module Pipes.Group
409759e8 170 , module Pipes.Text.Internal.Codec
91727d11 171 ) where
172
173import Control.Exception (throwIO, try)
0f8c6f1b 174import Control.Applicative ((<*))
64e03122 175import Control.Monad (liftM, unless, join)
9e9bb0ce 176import Control.Monad.Trans.State.Strict (StateT(..), modify)
ca6f90a0 177import Data.Monoid ((<>))
91727d11 178import qualified Data.Text as T
179import qualified Data.Text.IO as T
31f41a5d 180import qualified Data.Text.Encoding as TE
63ea9ffd 181import qualified Data.Text.Encoding.Error as TE
91727d11 182import Data.Text (Text)
183import qualified Data.Text.Lazy as TL
184import qualified Data.Text.Lazy.IO as TL
185import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
186import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
31f41a5d 187import Data.ByteString (ByteString)
188import qualified Data.ByteString as B
90189414 189import qualified Data.ByteString.Char8 as B8
cf10d6f1 190import Data.Char (ord, isSpace)
1677dc12 191import Data.Functor.Constant (Constant(Constant, getConstant))
91727d11 192import Data.Functor.Identity (Identity)
1677dc12 193import Data.Profunctor (Profunctor)
194import qualified Data.Profunctor
91727d11 195import qualified Data.List as List
196import Foreign.C.Error (Errno(Errno), ePIPE)
197import qualified GHC.IO.Exception as G
198import Pipes
5e3f5409 199import qualified Pipes.ByteString as PB
409759e8 200import qualified Pipes.Text.Internal.Decoding as PE
201import Pipes.Text.Internal.Codec
91727d11 202import Pipes.Core (respond, Server')
7fc48f7c 203import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
7ed76745 204import qualified Pipes.Group as PG
91727d11 205import qualified Pipes.Parse as PP
7ed76745 206import Pipes.Parse (Parser)
91727d11 207import qualified Pipes.Safe.Prelude as Safe
208import qualified Pipes.Safe as Safe
209import Pipes.Safe (MonadSafe(..), Base(..))
210import qualified Pipes.Prelude as P
211import qualified System.IO as IO
212import Data.Char (isSpace)
63ea9ffd 213import Data.Word (Word8)
1677dc12 214
91727d11 215import Prelude hiding (
216 all,
217 any,
218 break,
219 concat,
220 concatMap,
221 drop,
222 dropWhile,
223 elem,
224 filter,
225 head,
226 last,
227 lines,
228 length,
229 map,
230 maximum,
231 minimum,
232 notElem,
233 null,
234 readFile,
235 span,
236 splitAt,
237 take,
238 takeWhile,
239 unlines,
240 unwords,
241 words,
242 writeFile )
243
244-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
245fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
246fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
ca6f90a0 247{-# INLINE fromLazy #-}
91727d11 248
62e8521c 249-- | Stream text from 'stdin'
ca6f90a0 250stdin :: MonadIO m => Producer Text m ()
91727d11 251stdin = fromHandle IO.stdin
ca6f90a0 252{-# INLINE stdin #-}
91727d11 253
31f41a5d 254{-| Convert a 'IO.Handle' into a text stream using a text size
ca6f90a0 255 determined by the good sense of the text library; note that this
256 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
257 but uses the system encoding and has other `Data.Text.IO` features
31f41a5d 258-}
259
ca6f90a0 260fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
261fromHandle h = go where
262 go = do txt <- liftIO (T.hGetChunk h)
4cbc92cc 263 unless (T.null txt) ( do yield txt
264 go )
91727d11 265{-# INLINABLE fromHandle#-}
ca6f90a0 266
267
268{-| Stream text from a file in the simple fashion of @Data.Text.IO@
6f6f9974 269
31f41a5d 270>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
271MAIN = PUTSTRLN "HELLO WORLD"
6f6f9974 272-}
273
ca6f90a0 274readFile :: MonadSafe m => FilePath -> Producer Text m ()
91727d11 275readFile file = Safe.withFile file IO.ReadMode fromHandle
ca6f90a0 276{-# INLINE readFile #-}
91727d11 277
4cbc92cc 278{-| Crudely stream lines of input from stdin in the style of Pipes.Prelude.
279 This is for testing in ghci etc.; obviously it will be unsound if used to recieve
280 the contents of immense files with few newlines.
31f41a5d 281
282>>> let safely = runSafeT . runEffect
283>>> safely $ for Text.stdinLn (lift . lift . print . T.length)
284hello
2855
286world
2875
288
289-}
91727d11 290stdinLn :: MonadIO m => Producer' Text m ()
31f41a5d 291stdinLn = go where
91727d11 292 go = do
293 eof <- liftIO (IO.hIsEOF IO.stdin)
294 unless eof $ do
295 txt <- liftIO (T.hGetLine IO.stdin)
296 yield txt
297 go
ca6f90a0 298{-# INLINABLE stdinLn #-}
91727d11 299
31f41a5d 300{-| Stream text to 'stdout'
91727d11 301
302 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
303
4cbc92cc 304 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
305 instead of @(source >-> stdout)@ .
91727d11 306-}
307stdout :: MonadIO m => Consumer' Text m ()
308stdout = go
309 where
310 go = do
311 txt <- await
312 x <- liftIO $ try (T.putStr txt)
313 case x of
314 Left (G.IOError { G.ioe_type = G.ResourceVanished
315 , G.ioe_errno = Just ioe })
316 | Errno ioe == ePIPE
317 -> return ()
318 Left e -> liftIO (throwIO e)
319 Right () -> go
320{-# INLINABLE stdout #-}
321
322stdoutLn :: (MonadIO m) => Consumer' Text m ()
323stdoutLn = go
324 where
325 go = do
326 str <- await
327 x <- liftIO $ try (T.putStrLn str)
328 case x of
329 Left (G.IOError { G.ioe_type = G.ResourceVanished
330 , G.ioe_errno = Just ioe })
331 | Errno ioe == ePIPE
332 -> return ()
333 Left e -> liftIO (throwIO e)
334 Right () -> go
335{-# INLINABLE stdoutLn #-}
336
31f41a5d 337{-| Convert a text stream into a 'Handle'
91727d11 338
31f41a5d 339 Note: again, for best performance, where possible use
340 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
91727d11 341-}
342toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
343toHandle h = for cat (liftIO . T.hPutStr h)
344{-# INLINABLE toHandle #-}
345
d4732515 346{-# RULES "p >-> toHandle h" forall p h .
ff38b9f0 347 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
d4732515 348 #-}
349
350
31f41a5d 351-- | Stream text into a file. Uses @pipes-safe@.
ca6f90a0 352writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
91727d11 353writeFile file = Safe.withFile file IO.WriteMode toHandle
ca6f90a0 354{-# INLINE writeFile #-}
91727d11 355
1677dc12 356
357type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
358
359type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a)
360
361(^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b
362a ^. lens = getConstant (lens Constant a)
363
364
91727d11 365-- | Apply a transformation to each 'Char' in the stream
366map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
367map f = P.map (T.map f)
368{-# INLINABLE map #-}
369
ff38b9f0 370{-# RULES "p >-> map f" forall p f .
371 p >-> map f = for p (\txt -> yield (T.map f txt))
372 #-}
373
31f41a5d 374-- | Map a function over the characters of a text stream and concatenate the results
91727d11 375concatMap
376 :: (Monad m) => (Char -> Text) -> Pipe Text Text m r
377concatMap f = P.map (T.concatMap f)
378{-# INLINABLE concatMap #-}
379
ff38b9f0 380{-# RULES "p >-> concatMap f" forall p f .
381 p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt))
382 #-}
7faef8bc 383
384-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
a02a69ad 385-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
386-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
7faef8bc 387encodeUtf8 :: Monad m => Pipe Text ByteString m r
388encodeUtf8 = P.map TE.encodeUtf8
389{-# INLINEABLE encodeUtf8 #-}
390
ff38b9f0 391{-# RULES "p >-> encodeUtf8" forall p .
392 p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
393 #-}
394
c0343bc9 395-- | Transform a Pipe of 'String's into one of 'Text' chunks
7faef8bc 396pack :: Monad m => Pipe String Text m r
397pack = P.map T.pack
398{-# INLINEABLE pack #-}
399
ff38b9f0 400{-# RULES "p >-> pack" forall p .
401 p >-> pack = for p (\txt -> yield (T.pack txt))
402 #-}
403
404-- | Transform a Pipes of 'Text' chunks into one of 'String's
7faef8bc 405unpack :: Monad m => Pipe Text String m r
d4732515 406unpack = for cat (\t -> yield (T.unpack t))
7faef8bc 407{-# INLINEABLE unpack #-}
408
ff38b9f0 409{-# RULES "p >-> unpack" forall p .
410 p >-> unpack = for p (\txt -> yield (T.unpack txt))
411 #-}
d4732515 412
b0d86a59 413-- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities,
414-- here acting as 'Text' pipes, rather as they would on a lazy text
7faef8bc 415toCaseFold :: Monad m => Pipe Text Text m ()
416toCaseFold = P.map T.toCaseFold
417{-# INLINEABLE toCaseFold #-}
418
ff38b9f0 419{-# RULES "p >-> toCaseFold" forall p .
420 p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt))
421 #-}
422
423
c0343bc9 424-- | lowercase incoming 'Text'
7faef8bc 425toLower :: Monad m => Pipe Text Text m ()
426toLower = P.map T.toLower
427{-# INLINEABLE toLower #-}
428
ff38b9f0 429{-# RULES "p >-> toLower" forall p .
430 p >-> toLower = for p (\txt -> yield (T.toLower txt))
431 #-}
432
c0343bc9 433-- | uppercase incoming 'Text'
7faef8bc 434toUpper :: Monad m => Pipe Text Text m ()
435toUpper = P.map T.toUpper
436{-# INLINEABLE toUpper #-}
437
ff38b9f0 438{-# RULES "p >-> toUpper" forall p .
439 p >-> toUpper = for p (\txt -> yield (T.toUpper txt))
440 #-}
441
c0343bc9 442-- | Remove leading white space from an incoming succession of 'Text's
7faef8bc 443stripStart :: Monad m => Pipe Text Text m r
444stripStart = do
445 chunk <- await
446 let text = T.stripStart chunk
447 if T.null text
448 then stripStart
b0d86a59 449 else do yield text
450 cat
7faef8bc 451{-# INLINEABLE stripStart #-}
452
31f41a5d 453-- | @(take n)@ only allows @n@ individual characters to pass;
454-- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass.
91727d11 455take :: (Monad m, Integral a) => a -> Pipe Text Text m ()
456take n0 = go n0 where
457 go n
458 | n <= 0 = return ()
459 | otherwise = do
31f41a5d 460 txt <- await
461 let len = fromIntegral (T.length txt)
91727d11 462 if (len > n)
31f41a5d 463 then yield (T.take (fromIntegral n) txt)
91727d11 464 else do
31f41a5d 465 yield txt
91727d11 466 go (n - len)
467{-# INLINABLE take #-}
468
31f41a5d 469-- | @(drop n)@ drops the first @n@ characters
91727d11 470drop :: (Monad m, Integral a) => a -> Pipe Text Text m r
471drop n0 = go n0 where
472 go n
473 | n <= 0 = cat
474 | otherwise = do
31f41a5d 475 txt <- await
476 let len = fromIntegral (T.length txt)
91727d11 477 if (len >= n)
478 then do
31f41a5d 479 yield (T.drop (fromIntegral n) txt)
91727d11 480 cat
481 else go (n - len)
482{-# INLINABLE drop #-}
483
31f41a5d 484-- | Take characters until they fail the predicate
91727d11 485takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m ()
486takeWhile predicate = go
487 where
488 go = do
31f41a5d 489 txt <- await
490 let (prefix, suffix) = T.span predicate txt
91727d11 491 if (T.null suffix)
492 then do
31f41a5d 493 yield txt
91727d11 494 go
495 else yield prefix
496{-# INLINABLE takeWhile #-}
497
31f41a5d 498-- | Drop characters until they fail the predicate
91727d11 499dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
500dropWhile predicate = go where
501 go = do
31f41a5d 502 txt <- await
503 case T.findIndex (not . predicate) txt of
91727d11 504 Nothing -> go
505 Just i -> do
31f41a5d 506 yield (T.drop i txt)
91727d11 507 cat
508{-# INLINABLE dropWhile #-}
509
510-- | Only allows 'Char's to pass if they satisfy the predicate
511filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r
512filter predicate = P.map (T.filter predicate)
513{-# INLINABLE filter #-}
514
ff38b9f0 515{-# RULES "p >-> filter q" forall p q .
516 p >-> filter q = for p (\txt -> yield (T.filter q txt))
517 #-}
518
31f41a5d 519-- | Strict left scan over the characters
91727d11 520scan
521 :: (Monad m)
522 => (Char -> Char -> Char) -> Char -> Pipe Text Text m r
523scan step begin = go begin
524 where
31f41a5d 525 go c = do
526 txt <- await
527 let txt' = T.scanl step c txt
528 c' = T.last txt'
529 yield txt'
530 go c'
91727d11 531{-# INLINABLE scan #-}
532
533{-| Fold a pure 'Producer' of strict 'Text's into a lazy
534 'TL.Text'
535-}
536toLazy :: Producer Text Identity () -> TL.Text
537toLazy = TL.fromChunks . P.toList
538{-# INLINABLE toLazy #-}
539
540{-| Fold an effectful 'Producer' of strict 'Text's into a lazy
541 'TL.Text'
542
543 Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for
544 simple testing purposes. Idiomatic @pipes@ style consumes the chunks
545 immediately as they are generated instead of loading them all into memory.
546-}
547toLazyM :: (Monad m) => Producer Text m () -> m TL.Text
548toLazyM = liftM TL.fromChunks . P.toListM
549{-# INLINABLE toLazyM #-}
550
31f41a5d 551-- | Reduce the text stream using a strict left fold over characters
64e03122 552foldChars
91727d11 553 :: Monad m
554 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
64e03122 555foldChars step begin done = P.fold (T.foldl' step) begin done
1677dc12 556{-# INLINABLE foldChars #-}
91727d11 557
558-- | Retrieve the first 'Char'
559head :: (Monad m) => Producer Text m () -> m (Maybe Char)
560head = go
561 where
562 go p = do
563 x <- nextChar p
564 case x of
565 Left _ -> return Nothing
31f41a5d 566 Right (c, _) -> return (Just c)
91727d11 567{-# INLINABLE head #-}
568
569-- | Retrieve the last 'Char'
570last :: (Monad m) => Producer Text m () -> m (Maybe Char)
571last = go Nothing
572 where
573 go r p = do
574 x <- next p
575 case x of
576 Left () -> return r
31f41a5d 577 Right (txt, p') ->
578 if (T.null txt)
91727d11 579 then go r p'
31f41a5d 580 else go (Just $ T.last txt) p'
91727d11 581{-# INLINABLE last #-}
582
583-- | Determine if the stream is empty
584null :: (Monad m) => Producer Text m () -> m Bool
585null = P.all T.null
586{-# INLINABLE null #-}
587
62e8521c 588-- | Count the number of characters in the stream
91727d11 589length :: (Monad m, Num n) => Producer Text m () -> m n
31f41a5d 590length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id
91727d11 591{-# INLINABLE length #-}
592
593-- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate
594any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
595any predicate = P.any (T.any predicate)
596{-# INLINABLE any #-}
597
598-- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate
599all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool
600all predicate = P.all (T.all predicate)
601{-# INLINABLE all #-}
602
62e8521c 603-- | Return the maximum 'Char' within a text stream
91727d11 604maximum :: (Monad m) => Producer Text m () -> m (Maybe Char)
605maximum = P.fold step Nothing id
606 where
31f41a5d 607 step mc txt =
608 if (T.null txt)
609 then mc
610 else Just $ case mc of
611 Nothing -> T.maximum txt
612 Just c -> max c (T.maximum txt)
91727d11 613{-# INLINABLE maximum #-}
614
62e8521c 615-- | Return the minimum 'Char' within a text stream (surely very useful!)
91727d11 616minimum :: (Monad m) => Producer Text m () -> m (Maybe Char)
617minimum = P.fold step Nothing id
618 where
31f41a5d 619 step mc txt =
620 if (T.null txt)
621 then mc
622 else case mc of
623 Nothing -> Just (T.minimum txt)
624 Just c -> Just (min c (T.minimum txt))
91727d11 625{-# INLINABLE minimum #-}
626
1677dc12 627
91727d11 628-- | Find the first element in the stream that matches the predicate
629find
630 :: (Monad m)
631 => (Char -> Bool) -> Producer Text m () -> m (Maybe Char)
632find predicate p = head (p >-> filter predicate)
633{-# INLINABLE find #-}
634
62e8521c 635-- | Index into a text stream
91727d11 636index
637 :: (Monad m, Integral a)
638 => a-> Producer Text m () -> m (Maybe Char)
639index n p = head (p >-> drop n)
640{-# INLINABLE index #-}
641
63ea9ffd 642
31f41a5d 643-- | Store a tally of how many segments match the given 'Text'
644count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
645count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
646{-# INLINABLE count #-}
647
9e9bb0ce 648
649{-| Consume the first character from a stream of 'Text'
650
651 'next' either fails with a 'Left' if the 'Producer' has no more characters or
652 succeeds with a 'Right' providing the next character and the remainder of the
653 'Producer'.
654-}
655nextChar
656 :: (Monad m)
657 => Producer Text m r
658 -> m (Either r (Char, Producer Text m r))
659nextChar = go
660 where
661 go p = do
662 x <- next p
663 case x of
664 Left r -> return (Left r)
665 Right (txt, p') -> case (T.uncons txt) of
666 Nothing -> go p'
667 Just (c, txt') -> return (Right (c, yield txt' >> p'))
668{-# INLINABLE nextChar #-}
669
670{-| Draw one 'Char' from a stream of 'Text', returning 'Left' if the
671 'Producer' is empty
672-}
673drawChar :: (Monad m) => Parser Text m (Maybe Char)
674drawChar = do
675 x <- PP.draw
676 case x of
677 Nothing -> return Nothing
678 Just txt -> case (T.uncons txt) of
679 Nothing -> drawChar
680 Just (c, txt') -> do
681 PP.unDraw txt'
682 return (Just c)
683{-# INLINABLE drawChar #-}
684
685-- | Push back a 'Char' onto the underlying 'Producer'
686unDrawChar :: (Monad m) => Char -> Parser Text m ()
687unDrawChar c = modify (yield (T.singleton c) >>)
688{-# INLINABLE unDrawChar #-}
689
690{-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to
691 push the 'Char' back
692
693> peekChar = do
694> x <- drawChar
695> case x of
696> Left _ -> return ()
697> Right c -> unDrawChar c
698> return x
699-}
700peekChar :: (Monad m) => Parser Text m (Maybe Char)
701peekChar = do
702 x <- drawChar
703 case x of
704 Nothing -> return ()
705 Just c -> unDrawChar c
706 return x
707{-# INLINABLE peekChar #-}
708
709{-| Check if the underlying 'Producer' has no more characters
710
711 Note that this will skip over empty 'Text' chunks, unlike
712 'PP.isEndOfInput' from @pipes-parse@, which would consider
713 an empty 'Text' a valid bit of input.
714
715> isEndOfChars = liftM isLeft peekChar
716-}
717isEndOfChars :: (Monad m) => Parser Text m Bool
718isEndOfChars = do
719 x <- peekChar
720 return (case x of
721 Nothing -> True
722 Just _-> False )
723{-# INLINABLE isEndOfChars #-}
724
725
4cbc92cc 726-- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
727-- stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
ca6f90a0 728
9e9bb0ce 729decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
730 (Producer Text m (Producer ByteString m r))
731decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
732 (k (go B.empty PE.streamDecodeUtf8 p0)) where
ca6f90a0 733 go !carry dec0 p = do
734 x <- lift (next p)
4cbc92cc 735 case x of Left r -> return (if B.null carry
736 then return r -- all bytestring input was consumed
737 else (do yield carry -- a potentially valid fragment remains
738 return r))
ca6f90a0 739
740 Right (chunk, p') -> case dec0 chunk of
741 PE.Some text carry2 dec -> do yield text
742 go carry2 dec p'
743 PE.Other text bs -> do yield text
744 return (do yield bs -- an invalid blob remains
745 p')
746{-# INLINABLE decodeUtf8 #-}
747
31f41a5d 748
749-- | Splits a 'Producer' after the given number of characters
91727d11 750splitAt
751 :: (Monad m, Integral n)
752 => n
9e9bb0ce 753 -> Lens' (Producer Text m r)
754 (Producer Text m (Producer Text m r))
755splitAt n0 k p0 = fmap join (k (go n0 p0))
91727d11 756 where
757 go 0 p = return p
758 go n p = do
759 x <- lift (next p)
760 case x of
761 Left r -> return (return r)
31f41a5d 762 Right (txt, p') -> do
763 let len = fromIntegral (T.length txt)
91727d11 764 if (len <= n)
765 then do
31f41a5d 766 yield txt
91727d11 767 go (n - len) p'
768 else do
31f41a5d 769 let (prefix, suffix) = T.splitAt (fromIntegral n) txt
91727d11 770 yield prefix
771 return (yield suffix >> p')
772{-# INLINABLE splitAt #-}
773
91727d11 774
31f41a5d 775{-| Split a text stream in two, where the first text stream is the longest
776 consecutive group of text that satisfy the predicate
91727d11 777-}
778span
779 :: (Monad m)
780 => (Char -> Bool)
9e9bb0ce 781 -> Lens' (Producer Text m r)
782 (Producer Text m (Producer Text m r))
783span predicate k p0 = fmap join (k (go p0))
91727d11 784 where
785 go p = do
786 x <- lift (next p)
787 case x of
788 Left r -> return (return r)
31f41a5d 789 Right (txt, p') -> do
790 let (prefix, suffix) = T.span predicate txt
91727d11 791 if (T.null suffix)
792 then do
31f41a5d 793 yield txt
91727d11 794 go p'
795 else do
796 yield prefix
797 return (yield suffix >> p')
798{-# INLINABLE span #-}
799
62e8521c 800{-| Split a text stream in two, where the first text stream is the longest
801 consecutive group of characters that don't satisfy the predicate
91727d11 802-}
803break
804 :: (Monad m)
805 => (Char -> Bool)
9e9bb0ce 806 -> Lens' (Producer Text m r)
807 (Producer Text m (Producer Text m r))
91727d11 808break predicate = span (not . predicate)
809{-# INLINABLE break #-}
810
9e9bb0ce 811{-| Improper lens that splits after the first group of equivalent Chars, as
812 defined by the given equivalence relation
813-}
814groupBy
815 :: (Monad m)
816 => (Char -> Char -> Bool)
817 -> Lens' (Producer Text m r)
818 (Producer Text m (Producer Text m r))
819groupBy equals k p0 = fmap join (k ((go p0))) where
820 go p = do
821 x <- lift (next p)
822 case x of
823 Left r -> return (return r)
824 Right (txt, p') -> case T.uncons txt of
825 Nothing -> go p'
826 Just (c, _) -> (yield txt >> p') ^. span (equals c)
827{-# INLINABLE groupBy #-}
828
829-- | Improper lens that splits after the first succession of identical 'Char' s
830group :: Monad m
831 => Lens' (Producer Text m r)
832 (Producer Text m (Producer Text m r))
833group = groupBy (==)
834{-# INLINABLE group #-}
835
836{-| Improper lens that splits a 'Producer' after the first word
837
838 Unlike 'words', this does not drop leading whitespace
839-}
840word :: (Monad m)
841 => Lens' (Producer Text m r)
842 (Producer Text m (Producer Text m r))
843word k p0 = fmap join (k (to p0))
844 where
845 to p = do
846 p' <- p^.span isSpace
847 p'^.break isSpace
848{-# INLINABLE word #-}
849
850
851line :: (Monad m)
852 => Lens' (Producer Text m r)
853 (Producer Text m (Producer Text m r))
854line = break (== '\n')
855
856{-# INLINABLE line #-}
857
858
859-- | Intersperse a 'Char' in between the characters of stream of 'Text'
860intersperse
861 :: (Monad m) => Char -> Producer Text m r -> Producer Text m r
862intersperse c = go0
863 where
864 go0 p = do
865 x <- lift (next p)
866 case x of
867 Left r -> return r
868 Right (txt, p') -> do
869 yield (T.intersperse c txt)
870 go1 p'
871 go1 p = do
872 x <- lift (next p)
873 case x of
874 Left r -> return r
875 Right (txt, p') -> do
876 yield (T.singleton c)
877 yield (T.intersperse c txt)
878 go1 p'
879{-# INLINABLE intersperse #-}
880
881
882
883-- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's
884packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x)
885packChars = Data.Profunctor.dimap to (fmap from)
886 where
887 -- to :: Monad m => Producer Char m x -> Producer Text m x
7ed76745 888 to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
9e9bb0ce 889
890 step diffAs c = diffAs . (c:)
891
892 done diffAs = T.pack (diffAs [])
893
894 -- from :: Monad m => Producer Text m x -> Producer Char m x
895 from p = for p (each . T.unpack)
896{-# INLINABLE packChars #-}
897
0f8c6f1b 898
899-- | Split a text stream into 'FreeT'-delimited text streams of fixed size
900chunksOf
901 :: (Monad m, Integral n)
902 => n -> Lens' (Producer Text m r)
903 (FreeT (Producer Text m) m r)
904chunksOf n k p0 = fmap concats (k (FreeT (go p0)))
905 where
906 go p = do
907 x <- next p
908 return $ case x of
7ed76745 909 Left r -> Pure r
910 Right (txt, p') -> Free $ do
0f8c6f1b 911 p'' <- (yield txt >> p') ^. splitAt n
7ed76745 912 return $ FreeT (go p'')
0f8c6f1b 913{-# INLINABLE chunksOf #-}
914
915
62e8521c 916{-| Split a text stream into sub-streams delimited by characters that satisfy the
91727d11 917 predicate
918-}
1677dc12 919splitsWith
91727d11 920 :: (Monad m)
921 => (Char -> Bool)
922 -> Producer Text m r
7ed76745 923 -> FreeT (Producer Text m) m r
924splitsWith predicate p0 = FreeT (go0 p0)
91727d11 925 where
926 go0 p = do
927 x <- next p
928 case x of
7ed76745 929 Left r -> return (Pure r)
31f41a5d 930 Right (txt, p') ->
931 if (T.null txt)
91727d11 932 then go0 p'
7ed76745 933 else return $ Free $ do
9e9bb0ce 934 p'' <- (yield txt >> p') ^. span (not . predicate)
7ed76745 935 return $ FreeT (go1 p'')
91727d11 936 go1 p = do
937 x <- nextChar p
938 return $ case x of
7ed76745 939 Left r -> Pure r
940 Right (_, p') -> Free $ do
9e9bb0ce 941 p'' <- p' ^. span (not . predicate)
7ed76745 942 return $ FreeT (go1 p'')
1677dc12 943{-# INLINABLE splitsWith #-}
91727d11 944
31f41a5d 945-- | Split a text stream using the given 'Char' as the delimiter
0f8c6f1b 946splits :: (Monad m)
91727d11 947 => Char
0f8c6f1b 948 -> Lens' (Producer Text m r)
949 (FreeT (Producer Text m) m r)
950splits c k p =
7ed76745 951 fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
0f8c6f1b 952{-# INLINABLE splits #-}
953
954{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
955 given equivalence relation
956-}
957groupsBy
958 :: Monad m
959 => (Char -> Char -> Bool)
960 -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
7ed76745 961groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
0f8c6f1b 962 go p = do x <- next p
7ed76745 963 case x of Left r -> return (Pure r)
0f8c6f1b 964 Right (bs, p') -> case T.uncons bs of
965 Nothing -> go p'
7ed76745 966 Just (c, _) -> do return $ Free $ do
0f8c6f1b 967 p'' <- (yield bs >> p')^.span (equals c)
7ed76745 968 return $ FreeT (go p'')
0f8c6f1b 969{-# INLINABLE groupsBy #-}
970
971
972-- | Like 'groupsBy', where the equality predicate is ('==')
973groups
974 :: Monad m
975 => Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
976groups = groupsBy (==)
977{-# INLINABLE groups #-}
978
91727d11 979
91727d11 980
62e8521c 981{-| Split a text stream into 'FreeT'-delimited lines
91727d11 982-}
983lines
0f8c6f1b 984 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
985lines = Data.Profunctor.dimap _lines (fmap _unlines)
91727d11 986 where
7ed76745 987 _lines p0 = FreeT (go0 p0)
0f8c6f1b 988 where
989 go0 p = do
990 x <- next p
991 case x of
7ed76745 992 Left r -> return (Pure r)
0f8c6f1b 993 Right (txt, p') ->
994 if (T.null txt)
995 then go0 p'
7ed76745 996 else return $ Free $ go1 (yield txt >> p')
0f8c6f1b 997 go1 p = do
998 p' <- p ^. break ('\n' ==)
7ed76745 999 return $ FreeT $ do
0f8c6f1b 1000 x <- nextChar p'
1001 case x of
7ed76745 1002 Left r -> return $ Pure r
0f8c6f1b 1003 Right (_, p'') -> go0 p''
1004 -- _unlines
1005 -- :: Monad m
1006 -- => FreeT (Producer Text m) m x -> Producer Text m x
7fc48f7c 1007 _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
1008
0f8c6f1b 1009
91727d11 1010{-# INLINABLE lines #-}
91727d11 1011
31f41a5d 1012
1013
1014-- | Split a text stream into 'FreeT'-delimited words
91727d11 1015words
0f8c6f1b 1016 :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
1017words = Data.Profunctor.dimap go (fmap _unwords)
91727d11 1018 where
7ed76745 1019 go p = FreeT $ do
cf10d6f1 1020 x <- next (p >-> dropWhile isSpace)
1021 return $ case x of
7ed76745 1022 Left r -> Pure r
1023 Right (bs, p') -> Free $ do
9e9bb0ce 1024 p'' <- (yield bs >> p') ^. break isSpace
cf10d6f1 1025 return (go p'')
7ed76745 1026 _unwords = PG.intercalates (yield $ T.singleton ' ')
0f8c6f1b 1027
91727d11 1028{-# INLINABLE words #-}
1029
cf10d6f1 1030
31f41a5d 1031{-| 'intercalate' concatenates the 'FreeT'-delimited text streams after
1032 interspersing a text stream in between them
91727d11 1033-}
1034intercalate
1035 :: (Monad m)
1036 => Producer Text m ()
1037 -> FreeT (Producer Text m) m r
1038 -> Producer Text m r
1039intercalate p0 = go0
1040 where
1041 go0 f = do
7ed76745 1042 x <- lift (runFreeT f)
91727d11 1043 case x of
7ed76745 1044 Pure r -> return r
1045 Free p -> do
91727d11 1046 f' <- p
1047 go1 f'
1048 go1 f = do
7ed76745 1049 x <- lift (runFreeT f)
91727d11 1050 case x of
7ed76745 1051 Pure r -> return r
1052 Free p -> do
91727d11 1053 p0
1054 f' <- p
1055 go1 f'
1056{-# INLINABLE intercalate #-}
1057
62e8521c 1058{-| Join 'FreeT'-delimited lines into a text stream
91727d11 1059-}
1060unlines
1061 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
1062unlines = go
1063 where
1064 go f = do
7ed76745 1065 x <- lift (runFreeT f)
91727d11 1066 case x of
7ed76745 1067 Pure r -> return r
1068 Free p -> do
91727d11 1069 f' <- p
1070 yield $ T.singleton '\n'
1071 go f'
1072{-# INLINABLE unlines #-}
1073
31f41a5d 1074{-| Join 'FreeT'-delimited words into a text stream
91727d11 1075-}
1076unwords
1077 :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
7fc48f7c 1078unwords = intercalate (yield $ T.singleton ' ')
91727d11 1079{-# INLINABLE unwords #-}
1080
1081{- $parse
31f41a5d 1082 The following parsing utilities are single-character analogs of the ones found
1083 @pipes-parse@.
91727d11 1084-}
1085
91727d11 1086{- $reexports
91727d11 1087
1088 @Data.Text@ re-exports the 'Text' type.
1089
0f8c6f1b 1090 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
64e03122 1091-}
1092
90189414 1093codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1094codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1095 (k (decoder (dec B.empty) p0) ) where
1096 decoder :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1097 decoder !d p0 = case d of
1098 PE.Other txt bad -> do yield txt
1099 return (do yield bad
1100 p0)
1101 PE.Some txt extra dec -> do yield txt
1102 x <- lift (next p0)
1103 case x of Left r -> return (do yield extra
1104 return r)
1105 Right (chunk,p1) -> decoder (dec chunk) p1
1106
1107-- decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
1108-- (k (go B.empty PE.streamDecodeUtf8 p0)) where
1109
1110encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1111encodeAscii = go where
1112 go p = do echunk <- lift (next p)
1113 case echunk of
1114 Left r -> return (return r)
1115 Right (chunk, p') ->
1116 if T.null chunk
1117 then go p'
1118 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1119 in do yield (B8.pack (T.unpack safe))
1120 if T.null unsafe
1121 then go p'
1122 else return $ do yield unsafe
1123 p'
1124
1125encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1126encodeIso8859_1 = go where
1127 go p = do etxt <- lift (next p)
1128 case etxt of
1129 Left r -> return (return r)
1130 Right (txt, p') ->
1131 if T.null txt
1132 then go p'
1133 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1134 in do yield (B8.pack (T.unpack safe))
1135 if T.null unsafe
1136 then go p'
1137 else return $ do yield unsafe
1138 p'
1139
1140decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1141decodeAscii = go where
1142 go p = do echunk <- lift (next p)
1143 case echunk of
1144 Left r -> return (return r)
1145 Right (chunk, p') ->
1146 if B.null chunk
1147 then go p'
1148 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1149 in do yield (T.pack (B8.unpack safe))
1150 if B.null unsafe
1151 then go p'
1152 else return $ do yield unsafe
1153 p'
1154
1155
1156decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1157decodeIso8859_1 = go where
1158 go p = do echunk <- lift (next p)
1159 case echunk of
1160 Left r -> return (return r)
1161 Right (chunk, p') ->
1162 if B.null chunk
1163 then go p'
1164 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1165 in do yield (T.pack (B8.unpack safe))
1166 if B.null unsafe
1167 then go p'
1168 else return $ do yield unsafe
1169 p'
1170
1171
1172
1173{-
1174 ascii :: Codec
1175 ascii = Codec name enc (toDecoding dec) where
1176 name = T.pack "ASCII"
1177 enc text = (bytes, extra) where
1178 (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
1179 bytes = B8.pack (T.unpack safe)
1180 extra = if T.null unsafe
1181 then Nothing
1182 else Just (EncodeException ascii (T.head unsafe), unsafe)
1183
1184 dec bytes = (text, extra) where
1185 (safe, unsafe) = B.span (<= 0x7F) bytes
1186 text = T.pack (B8.unpack safe)
1187 extra = if B.null unsafe
1188 then Right B.empty
1189 else Left (DecodeException ascii (B.head unsafe), unsafe)
1190
1191 iso8859_1 :: Codec
1192 iso8859_1 = Codec name enc (toDecoding dec) where
1193 name = T.pack "ISO-8859-1"
1194 enc text = (bytes, extra) where
1195 (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
1196 bytes = B8.pack (T.unpack safe)
1197 extra = if T.null unsafe
1198 then Nothing
1199 else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
1200
1201 dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
1202-}
1203