aboutsummaryrefslogtreecommitdiffhomepage
path: root/Pipes
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2013-10-30 16:51:43 -0400
committermichaelt <what_is_it_to_do_anything@yahoo.com>2013-10-30 16:51:43 -0400
commit63ea9ffd3b32d1b4816e5b1e183d942df3d0de33 (patch)
treeb3e1dfb87f23b56fb437ffba6cb742c33891da47 /Pipes
parentacc6868f63bdbede411874f4cfdbbb2d4bfa41da (diff)
downloadtext-pipes-63ea9ffd3b32d1b4816e5b1e183d942df3d0de33.tar.gz
text-pipes-63ea9ffd3b32d1b4816e5b1e183d942df3d0de33.tar.zst
text-pipes-63ea9ffd3b32d1b4816e5b1e183d942df3d0de33.zip
decodeUtf8With
Diffstat (limited to 'Pipes')
-rw-r--r--Pipes/Text.hs47
1 files changed, 30 insertions, 17 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index 1092491..bdd706a 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -2,9 +2,10 @@
2 2
3{-| This module provides @pipes@ utilities for \"text streams\", which are 3{-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 can interact lazy 'Text's and 'IO.Handle's. 5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to.
6 7
7 To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For 8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
8 example, the following program copies a document from one file to another: 9 example, the following program copies a document from one file to another:
9 10
10> import Pipes 11> import Pipes
@@ -101,8 +102,6 @@ module Pipes.Text (
101 minimum, 102 minimum,
102 find, 103 find,
103 index, 104 index,
104-- elemIndex,
105-- findIndex,
106 count, 105 count,
107 106
108 -- * Splitters 107 -- * Splitters
@@ -118,6 +117,7 @@ module Pipes.Text (
118 words, 117 words,
119#if MIN_VERSION_text(0,11,4) 118#if MIN_VERSION_text(0,11,4)
120 decodeUtf8, 119 decodeUtf8,
120 decodeUtf8With,
121#endif 121#endif
122 -- * Transformations 122 -- * Transformations
123 intersperse, 123 intersperse,
@@ -147,6 +147,7 @@ import Control.Monad.Trans.State.Strict (StateT(..))
147import qualified Data.Text as T 147import qualified Data.Text as T
148import qualified Data.Text.IO as T 148import qualified Data.Text.IO as T
149import qualified Data.Text.Encoding as TE 149import qualified Data.Text.Encoding as TE
150import qualified Data.Text.Encoding.Error as TE
150import Data.Text (Text) 151import Data.Text (Text)
151import qualified Data.Text.Lazy as TL 152import qualified Data.Text.Lazy as TL
152import qualified Data.Text.Lazy.IO as TL 153import qualified Data.Text.Lazy.IO as TL
@@ -172,6 +173,7 @@ import Pipes.Safe (MonadSafe(..), Base(..))
172import qualified Pipes.Prelude as P 173import qualified Pipes.Prelude as P
173import qualified System.IO as IO 174import qualified System.IO as IO
174import Data.Char (isSpace) 175import Data.Char (isSpace)
176import Data.Word (Word8)
175import Prelude hiding ( 177import Prelude hiding (
176 all, 178 all,
177 any, 179 any,
@@ -542,19 +544,7 @@ index
542index n p = head (p >-> drop n) 544index n p = head (p >-> drop n)
543{-# INLINABLE index #-} 545{-# INLINABLE index #-}
544 546
545-- | Find the index of an element that matches the given 'Char' 547
546-- elemIndex
547-- :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n)
548-- elemIndex w8 = findIndex (w8 ==)
549-- {-# INLINABLE elemIndex #-}
550
551-- | Store the first index of an element that satisfies the predicate
552-- findIndex
553-- :: (Monad m, Num n)
554-- => (Char -> Bool) -> Producer Text m () -> m (Maybe n)
555-- findIndex predicate p = P.head (p >-> findIndices predicate)
556-- {-# INLINABLE findIndex #-}
557--
558-- | Store a tally of how many segments match the given 'Text' 548-- | Store a tally of how many segments match the given 'Text'
559count :: (Monad m, Num n) => Text -> Producer Text m () -> m n 549count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
560count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) 550count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
@@ -581,6 +571,29 @@ decodeUtf8 = go TE.streamDecodeUtf8
581 yield l 571 yield l
582 p' 572 p'
583{-# INLINEABLE decodeUtf8 #-} 573{-# INLINEABLE decodeUtf8 #-}
574
575-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
576-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
577-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
578decodeUtf8With
579 :: Monad m
580 => TE.OnDecodeError
581 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
582decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
583 where go dec p = do
584 x <- lift (next p)
585 case x of
586 Left r -> return (return r)
587 Right (chunk, p') -> do
588 let TE.Some text l dec' = dec chunk
589 if B.null l
590 then do
591 yield text
592 go dec' p'
593 else return $ do
594 yield l
595 p'
596{-# INLINEABLE decodeUtf8With #-}
584#endif 597#endif
585 598
586-- | Splits a 'Producer' after the given number of characters 599-- | Splits a 'Producer' after the given number of characters