diff options
author | michaelt <what_is_it_to_do_anything@yahoo.com> | 2013-10-30 16:51:43 -0400 |
---|---|---|
committer | michaelt <what_is_it_to_do_anything@yahoo.com> | 2013-10-30 16:51:43 -0400 |
commit | 63ea9ffd3b32d1b4816e5b1e183d942df3d0de33 (patch) | |
tree | b3e1dfb87f23b56fb437ffba6cb742c33891da47 /Pipes | |
parent | acc6868f63bdbede411874f4cfdbbb2d4bfa41da (diff) | |
download | text-pipes-63ea9ffd3b32d1b4816e5b1e183d942df3d0de33.tar.gz text-pipes-63ea9ffd3b32d1b4816e5b1e183d942df3d0de33.tar.zst text-pipes-63ea9ffd3b32d1b4816e5b1e183d942df3d0de33.zip |
decodeUtf8With
Diffstat (limited to 'Pipes')
-rw-r--r-- | Pipes/Text.hs | 47 |
1 files changed, 30 insertions, 17 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs index 1092491..bdd706a 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs | |||
@@ -2,9 +2,10 @@ | |||
2 | 2 | ||
3 | {-| This module provides @pipes@ utilities for \"text streams\", which are | 3 | {-| This module provides @pipes@ utilities for \"text streams\", which are |
4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but | 4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but |
5 | can interact lazy 'Text's and 'IO.Handle's. | 5 | a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can |
6 | be associated with a 'Producer' or 'Consumer' according as it is read or written to. | ||
6 | 7 | ||
7 | To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For | 8 | To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For |
8 | example, the following program copies a document from one file to another: | 9 | example, the following program copies a document from one file to another: |
9 | 10 | ||
10 | > import Pipes | 11 | > import Pipes |
@@ -101,8 +102,6 @@ module Pipes.Text ( | |||
101 | minimum, | 102 | minimum, |
102 | find, | 103 | find, |
103 | index, | 104 | index, |
104 | -- elemIndex, | ||
105 | -- findIndex, | ||
106 | count, | 105 | count, |
107 | 106 | ||
108 | -- * Splitters | 107 | -- * Splitters |
@@ -118,6 +117,7 @@ module Pipes.Text ( | |||
118 | words, | 117 | words, |
119 | #if MIN_VERSION_text(0,11,4) | 118 | #if MIN_VERSION_text(0,11,4) |
120 | decodeUtf8, | 119 | decodeUtf8, |
120 | decodeUtf8With, | ||
121 | #endif | 121 | #endif |
122 | -- * Transformations | 122 | -- * Transformations |
123 | intersperse, | 123 | intersperse, |
@@ -147,6 +147,7 @@ import Control.Monad.Trans.State.Strict (StateT(..)) | |||
147 | import qualified Data.Text as T | 147 | import qualified Data.Text as T |
148 | import qualified Data.Text.IO as T | 148 | import qualified Data.Text.IO as T |
149 | import qualified Data.Text.Encoding as TE | 149 | import qualified Data.Text.Encoding as TE |
150 | import qualified Data.Text.Encoding.Error as TE | ||
150 | import Data.Text (Text) | 151 | import Data.Text (Text) |
151 | import qualified Data.Text.Lazy as TL | 152 | import qualified Data.Text.Lazy as TL |
152 | import qualified Data.Text.Lazy.IO as TL | 153 | import qualified Data.Text.Lazy.IO as TL |
@@ -172,6 +173,7 @@ import Pipes.Safe (MonadSafe(..), Base(..)) | |||
172 | import qualified Pipes.Prelude as P | 173 | import qualified Pipes.Prelude as P |
173 | import qualified System.IO as IO | 174 | import qualified System.IO as IO |
174 | import Data.Char (isSpace) | 175 | import Data.Char (isSpace) |
176 | import Data.Word (Word8) | ||
175 | import Prelude hiding ( | 177 | import Prelude hiding ( |
176 | all, | 178 | all, |
177 | any, | 179 | any, |
@@ -542,19 +544,7 @@ index | |||
542 | index n p = head (p >-> drop n) | 544 | index n p = head (p >-> drop n) |
543 | {-# INLINABLE index #-} | 545 | {-# INLINABLE index #-} |
544 | 546 | ||
545 | -- | Find the index of an element that matches the given 'Char' | 547 | |
546 | -- elemIndex | ||
547 | -- :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n) | ||
548 | -- elemIndex w8 = findIndex (w8 ==) | ||
549 | -- {-# INLINABLE elemIndex #-} | ||
550 | |||
551 | -- | Store the first index of an element that satisfies the predicate | ||
552 | -- findIndex | ||
553 | -- :: (Monad m, Num n) | ||
554 | -- => (Char -> Bool) -> Producer Text m () -> m (Maybe n) | ||
555 | -- findIndex predicate p = P.head (p >-> findIndices predicate) | ||
556 | -- {-# INLINABLE findIndex #-} | ||
557 | -- | ||
558 | -- | Store a tally of how many segments match the given 'Text' | 548 | -- | Store a tally of how many segments match the given 'Text' |
559 | count :: (Monad m, Num n) => Text -> Producer Text m () -> m n | 549 | count :: (Monad m, Num n) => Text -> Producer Text m () -> m n |
560 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | 550 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) |
@@ -581,6 +571,29 @@ decodeUtf8 = go TE.streamDecodeUtf8 | |||
581 | yield l | 571 | yield l |
582 | p' | 572 | p' |
583 | {-# INLINEABLE decodeUtf8 #-} | 573 | {-# INLINEABLE decodeUtf8 #-} |
574 | |||
575 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded | ||
576 | -- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@ | ||
577 | -- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\" | ||
578 | decodeUtf8With | ||
579 | :: Monad m | ||
580 | => TE.OnDecodeError | ||
581 | -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
582 | decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr) | ||
583 | where go dec p = do | ||
584 | x <- lift (next p) | ||
585 | case x of | ||
586 | Left r -> return (return r) | ||
587 | Right (chunk, p') -> do | ||
588 | let TE.Some text l dec' = dec chunk | ||
589 | if B.null l | ||
590 | then do | ||
591 | yield text | ||
592 | go dec' p' | ||
593 | else return $ do | ||
594 | yield l | ||
595 | p' | ||
596 | {-# INLINEABLE decodeUtf8With #-} | ||
584 | #endif | 597 | #endif |
585 | 598 | ||
586 | -- | Splits a 'Producer' after the given number of characters | 599 | -- | Splits a 'Producer' after the given number of characters |