diff options
Diffstat (limited to 'Pipes/Text.hs')
-rw-r--r-- | Pipes/Text.hs | 148 |
1 files changed, 42 insertions, 106 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs index a5859a3..cf493e9 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs | |||
@@ -1,4 +1,4 @@ | |||
1 | {-# LANGUAGE RankNTypes, TypeFamilies, CPP #-} | 1 | {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-} |
2 | 2 | ||
3 | {-| This module provides @pipes@ utilities for \"text streams\", which are | 3 | {-| This module provides @pipes@ utilities for \"text streams\", which are |
4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but | 4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but |
@@ -81,10 +81,6 @@ module Pipes.Text ( | |||
81 | filter, | 81 | filter, |
82 | scan, | 82 | scan, |
83 | encodeUtf8, | 83 | encodeUtf8, |
84 | #if MIN_VERSION_text(0,11,4) | ||
85 | pipeDecodeUtf8, | ||
86 | pipeDecodeUtf8With, | ||
87 | #endif | ||
88 | pack, | 84 | pack, |
89 | unpack, | 85 | unpack, |
90 | toCaseFold, | 86 | toCaseFold, |
@@ -119,10 +115,7 @@ module Pipes.Text ( | |||
119 | group, | 115 | group, |
120 | lines, | 116 | lines, |
121 | words, | 117 | words, |
122 | #if MIN_VERSION_text(0,11,4) | ||
123 | decodeUtf8, | 118 | decodeUtf8, |
124 | decodeUtf8With, | ||
125 | #endif | ||
126 | -- * Transformations | 119 | -- * Transformations |
127 | intersperse, | 120 | intersperse, |
128 | 121 | ||
@@ -148,6 +141,7 @@ module Pipes.Text ( | |||
148 | import Control.Exception (throwIO, try) | 141 | import Control.Exception (throwIO, try) |
149 | import Control.Monad (liftM, unless) | 142 | import Control.Monad (liftM, unless) |
150 | import Control.Monad.Trans.State.Strict (StateT(..)) | 143 | import Control.Monad.Trans.State.Strict (StateT(..)) |
144 | import Data.Monoid ((<>)) | ||
151 | import qualified Data.Text as T | 145 | import qualified Data.Text as T |
152 | import qualified Data.Text.IO as T | 146 | import qualified Data.Text.IO as T |
153 | import qualified Data.Text.Encoding as TE | 147 | import qualified Data.Text.Encoding as TE |
@@ -167,6 +161,7 @@ import qualified GHC.IO.Exception as G | |||
167 | import Pipes | 161 | import Pipes |
168 | import qualified Pipes.ByteString as PB | 162 | import qualified Pipes.ByteString as PB |
169 | import qualified Pipes.ByteString.Parse as PBP | 163 | import qualified Pipes.ByteString.Parse as PBP |
164 | import qualified Pipes.Text.Internal as PE | ||
170 | import Pipes.Text.Parse ( | 165 | import Pipes.Text.Parse ( |
171 | nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) | 166 | nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) |
172 | import Pipes.Core (respond, Server') | 167 | import Pipes.Core (respond, Server') |
@@ -211,48 +206,36 @@ import Prelude hiding ( | |||
211 | -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's | 206 | -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's |
212 | fromLazy :: (Monad m) => TL.Text -> Producer' Text m () | 207 | fromLazy :: (Monad m) => TL.Text -> Producer' Text m () |
213 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) | 208 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) |
214 | {-# INLINABLE fromLazy #-} | 209 | {-# INLINE fromLazy #-} |
215 | 210 | ||
216 | -- | Stream text from 'stdin' | 211 | -- | Stream text from 'stdin' |
217 | stdin :: MonadIO m => Producer' Text m () | 212 | stdin :: MonadIO m => Producer Text m () |
218 | stdin = fromHandle IO.stdin | 213 | stdin = fromHandle IO.stdin |
219 | {-# INLINABLE stdin #-} | 214 | {-# INLINE stdin #-} |
220 | 215 | ||
221 | {-| Convert a 'IO.Handle' into a text stream using a text size | 216 | {-| Convert a 'IO.Handle' into a text stream using a text size |
222 | determined by the good sense of the text library. | 217 | determined by the good sense of the text library; note that this |
223 | 218 | is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@ | |
219 | but uses the system encoding and has other `Data.Text.IO` features | ||
224 | -} | 220 | -} |
225 | 221 | ||
226 | fromHandle :: MonadIO m => IO.Handle -> Producer' Text m () | 222 | fromHandle :: MonadIO m => IO.Handle -> Producer Text m () |
227 | #if MIN_VERSION_text(0,11,4) | 223 | fromHandle h = go where |
228 | fromHandle h = go TE.streamDecodeUtf8 where | 224 | go = do txt <- liftIO (T.hGetChunk h) |
229 | act = B.hGetSome h defaultChunkSize | 225 | unless (T.null txt) $ do yield txt |
230 | go dec = do chunk <- liftIO act | 226 | go |
231 | case dec chunk of | ||
232 | TE.Some text _ dec' -> do yield text | ||
233 | unless (B.null chunk) (go dec') | ||
234 | {-# INLINE fromHandle#-} | ||
235 | -- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as | ||
236 | -- the dedicated Text IO function 'hGetChunk' ; | ||
237 | -- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut" | ||
238 | -- runs the same as the conduit equivalent, only slightly slower | ||
239 | -- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut" | ||
240 | #else | ||
241 | fromHandle h = go where | ||
242 | go = do txt <- liftIO (T.hGetChunk h) | ||
243 | unless (T.null txt) $ do yield txt | ||
244 | go | ||
245 | {-# INLINABLE fromHandle#-} | 227 | {-# INLINABLE fromHandle#-} |
246 | #endif | 228 | |
247 | {-| Stream text from a file using Pipes.Safe | 229 | |
230 | {-| Stream text from a file in the simple fashion of @Data.Text.IO@ | ||
248 | 231 | ||
249 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout | 232 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout |
250 | MAIN = PUTSTRLN "HELLO WORLD" | 233 | MAIN = PUTSTRLN "HELLO WORLD" |
251 | -} | 234 | -} |
252 | 235 | ||
253 | readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m () | 236 | readFile :: MonadSafe m => FilePath -> Producer Text m () |
254 | readFile file = Safe.withFile file IO.ReadMode fromHandle | 237 | readFile file = Safe.withFile file IO.ReadMode fromHandle |
255 | {-# INLINABLE readFile #-} | 238 | {-# INLINE readFile #-} |
256 | 239 | ||
257 | {-| Stream lines of text from stdin (for testing in ghci etc.) | 240 | {-| Stream lines of text from stdin (for testing in ghci etc.) |
258 | 241 | ||
@@ -272,7 +255,7 @@ stdinLn = go where | |||
272 | txt <- liftIO (T.hGetLine IO.stdin) | 255 | txt <- liftIO (T.hGetLine IO.stdin) |
273 | yield txt | 256 | yield txt |
274 | go | 257 | go |
275 | 258 | {-# INLINABLE stdinLn #-} | |
276 | 259 | ||
277 | {-| Stream text to 'stdout' | 260 | {-| Stream text to 'stdout' |
278 | 261 | ||
@@ -326,8 +309,9 @@ toHandle h = for cat (liftIO . T.hPutStr h) | |||
326 | 309 | ||
327 | 310 | ||
328 | -- | Stream text into a file. Uses @pipes-safe@. | 311 | -- | Stream text into a file. Uses @pipes-safe@. |
329 | writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m () | 312 | writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m () |
330 | writeFile file = Safe.withFile file IO.WriteMode toHandle | 313 | writeFile file = Safe.withFile file IO.WriteMode toHandle |
314 | {-# INLINE writeFile #-} | ||
331 | 315 | ||
332 | -- | Apply a transformation to each 'Char' in the stream | 316 | -- | Apply a transformation to each 'Char' in the stream |
333 | map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r | 317 | map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r |
@@ -610,74 +594,26 @@ count :: (Monad m, Num n) => Text -> Producer Text m () -> m n | |||
610 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | 594 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) |
611 | {-# INLINABLE count #-} | 595 | {-# INLINABLE count #-} |
612 | 596 | ||
613 | #if MIN_VERSION_text(0,11,4) | 597 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text |
614 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded | 598 | -- returning a Pipe of ByteStrings that begins at the point of failure. |
615 | -- into a Pipe of Text | 599 | |
616 | decodeUtf8 | 600 | decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) |
617 | :: Monad m | 601 | decodeUtf8 = go B.empty PE.streamDecodeUtf8 where |
618 | => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | 602 | go !carry dec0 p = do |
619 | decodeUtf8 = go TE.streamDecodeUtf8 | 603 | x <- lift (next p) |
620 | where go dec p = do | 604 | case x of Left r -> if B.null carry |
621 | x <- lift (next p) | 605 | then return (return r) -- all bytestrinput was consumed |
622 | case x of | 606 | else return (do yield carry -- a potentially valid fragment remains |
623 | Left r -> return (return r) | 607 | return r) |
624 | Right (chunk, p') -> do | 608 | |
625 | let TE.Some text l dec' = dec chunk | 609 | Right (chunk, p') -> case dec0 chunk of |
626 | if B.null l | 610 | PE.Some text carry2 dec -> do yield text |
627 | then do | 611 | go carry2 dec p' |
628 | yield text | 612 | PE.Other text bs -> do yield text |
629 | go dec' p' | 613 | return (do yield bs -- an invalid blob remains |
630 | else return $ do | 614 | p') |
631 | yield l | 615 | {-# INLINABLE decodeUtf8 #-} |
632 | p' | 616 | |
633 | {-# INLINEABLE decodeUtf8 #-} | ||
634 | |||
635 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded | ||
636 | -- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@ | ||
637 | -- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\" | ||
638 | decodeUtf8With | ||
639 | :: Monad m | ||
640 | => TE.OnDecodeError | ||
641 | -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
642 | decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr) | ||
643 | where go dec p = do | ||
644 | x <- lift (next p) | ||
645 | case x of | ||
646 | Left r -> return (return r) | ||
647 | Right (chunk, p') -> do | ||
648 | let TE.Some text l dec' = dec chunk | ||
649 | if B.null l | ||
650 | then do | ||
651 | yield text | ||
652 | go dec' p' | ||
653 | else return $ do | ||
654 | yield l | ||
655 | p' | ||
656 | {-# INLINEABLE decodeUtf8With #-} | ||
657 | |||
658 | -- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise | ||
659 | -- with any chunk that contains a sequence of bytes that is unreadable. Otherwise | ||
660 | -- only few bytes will only be moved from one chunk to the next before decoding. | ||
661 | pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r | ||
662 | pipeDecodeUtf8 = go TE.streamDecodeUtf8 | ||
663 | where go dec = do chunk <- await | ||
664 | case dec chunk of | ||
665 | TE.Some text l dec' -> do yield text | ||
666 | go dec' | ||
667 | {-# INLINEABLE pipeDecodeUtf8 #-} | ||
668 | |||
669 | -- | A simple pipe from 'ByteString' to 'Text' using a replacement function. | ||
670 | pipeDecodeUtf8With | ||
671 | :: Monad m | ||
672 | => TE.OnDecodeError | ||
673 | -> Pipe ByteString Text m r | ||
674 | pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr) | ||
675 | where go dec = do chunk <- await | ||
676 | case dec chunk of | ||
677 | TE.Some text l dec' -> do yield text | ||
678 | go dec' | ||
679 | {-# INLINEABLE pipeDecodeUtf8With #-} | ||
680 | #endif | ||
681 | 617 | ||
682 | -- | Splits a 'Producer' after the given number of characters | 618 | -- | Splits a 'Producer' after the given number of characters |
683 | splitAt | 619 | splitAt |