1 files changed, 30 insertions, 17 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index 1092491..bdd706a 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -2,9 +2,10 @@
 {-| This module provides @pipes@ utilities for \"text streams\", which are
    streams of 'Text' chunks.  The individual chunks are uniformly @strict@, but 
-    can interact lazy 'Text's  and 'IO.Handle's.
+    a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
+    be associated with a 'Producer' or 'Consumer' according as it is read or written to.
-    To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'.  For
+    To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'.  For
    example, the following program copies a document from one file to another:
 > import Pipes
@@ -101,8 +102,6 @@ module Pipes.Text  (
    minimum,
    find,
    index,
--    elemIndex,
--    findIndex,
    count,
    -- * Splitters
@@ -118,6 +117,7 @@ module Pipes.Text  (
    words,
 #if MIN_VERSION_text(0,11,4)
    decodeUtf8,
+    decodeUtf8With,
 #endif
    -- * Transformations
    intersperse,
@@ -147,6 +147,7 @@ import Control.Monad.Trans.State.Strict (StateT(..))
 import qualified Data.Text as T
 import qualified Data.Text.IO as T
 import qualified Data.Text.Encoding as TE
+import qualified Data.Text.Encoding.Error as TE
 import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.IO as TL
@@ -172,6 +173,7 @@ import Pipes.Safe (MonadSafe(..), Base(..))
 import qualified Pipes.Prelude as P
 import qualified System.IO as IO
 import Data.Char (isSpace)
+import Data.Word (Word8)
 import Prelude hiding (
    all,
    any,
@@ -542,19 +544,7 @@ index
 index n p = head (p >-> drop n)
 {-# INLINABLE index #-}
-- | Find the index of an element that matches the given 'Char'
-- elemIndex
--     :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n)
-- elemIndex w8 = findIndex (w8 ==)
-- {-# INLINABLE elemIndex #-}
-- | Store the first index of an element that satisfies the predicate
-- findIndex
--     :: (Monad m, Num n)
--     => (Char -> Bool) -> Producer Text m () -> m (Maybe n)
-- findIndex predicate p = P.head (p >-> findIndices predicate)
-- {-# INLINABLE findIndex #-}
-- 
 -- | Store a tally of how many segments match the given 'Text'
 count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
 count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
@@ -581,6 +571,29 @@ decodeUtf8 = go TE.streamDecodeUtf8
                          yield l
                          p'
 {-# INLINEABLE decodeUtf8 #-}
+-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
+-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
+-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
+decodeUtf8With 
+  :: Monad m  
+  => TE.OnDecodeError 
+  -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
+  where go dec p = do
+            x <- lift (next p)
+            case x of
+                Left r -> return (return r)
+                Right (chunk, p') -> do
+                    let TE.Some text l dec' = dec chunk
+                    if B.null l
+                      then do
+                          yield text
+                          go dec' p'
+                      else return $ do
+                          yield l
+                          p'
+{-# INLINEABLE decodeUtf8With #-}
 #endif
 -- | Splits a 'Producer' after the given number of characters

diff --git a/Pipes/Text.hs b/Pipes/Text.hs index 1092491..bdd706a 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs
@@ -2,9 +2,10 @@
2		2
3	{-\| This module provides @pipes@ utilities for \"text streams\", which are	3	{-\| This module provides @pipes@ utilities for \"text streams\", which are
4	streams of 'Text' chunks. The individual chunks are uniformly @strict@, but	4	streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5	can interact lazy 'Text's and 'IO.Handle's.	5	a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
		6	be associated with a 'Producer' or 'Consumer' according as it is read or written to.
6		7
7	To stream to or from 'IO.Handle's, use 'fromHandle' or 'toHandle'. For	8	To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
8	example, the following program copies a document from one file to another:	9	example, the following program copies a document from one file to another:
9		10
10	> import Pipes	11	> import Pipes
@@ -101,8 +102,6 @@ module Pipes.Text (
101	minimum,	102	minimum,
102	find,	103	find,
103	index,	104	index,
104	-- elemIndex,
105	-- findIndex,
106	count,	105	count,
107		106
108	-- * Splitters	107	-- * Splitters
@@ -118,6 +117,7 @@ module Pipes.Text (
118	words,	117	words,
119	#if MIN_VERSION_text(0,11,4)	118	#if MIN_VERSION_text(0,11,4)
120	decodeUtf8,	119	decodeUtf8,
		120	decodeUtf8With,
121	#endif	121	#endif
122	-- * Transformations	122	-- * Transformations
123	intersperse,	123	intersperse,
@@ -147,6 +147,7 @@ import Control.Monad.Trans.State.Strict (StateT(..))
147	import qualified Data.Text as T	147	import qualified Data.Text as T
148	import qualified Data.Text.IO as T	148	import qualified Data.Text.IO as T
149	import qualified Data.Text.Encoding as TE	149	import qualified Data.Text.Encoding as TE
		150	import qualified Data.Text.Encoding.Error as TE
150	import Data.Text (Text)	151	import Data.Text (Text)
151	import qualified Data.Text.Lazy as TL	152	import qualified Data.Text.Lazy as TL
152	import qualified Data.Text.Lazy.IO as TL	153	import qualified Data.Text.Lazy.IO as TL
@@ -172,6 +173,7 @@ import Pipes.Safe (MonadSafe(..), Base(..))
172	import qualified Pipes.Prelude as P	173	import qualified Pipes.Prelude as P
173	import qualified System.IO as IO	174	import qualified System.IO as IO
174	import Data.Char (isSpace)	175	import Data.Char (isSpace)
		176	import Data.Word (Word8)
175	import Prelude hiding (	177	import Prelude hiding (
176	all,	178	all,
177	any,	179	any,
@@ -542,19 +544,7 @@ index
542	index n p = head (p >-> drop n)	544	index n p = head (p >-> drop n)
543	{-# INLINABLE index #-}	545	{-# INLINABLE index #-}
544		546
545	-- \| Find the index of an element that matches the given 'Char'	547
546	-- elemIndex
547	-- :: (Monad m, Num n) => Char -> Producer Text m () -> m (Maybe n)
548	-- elemIndex w8 = findIndex (w8 ==)
549	-- {-# INLINABLE elemIndex #-}
550
551	-- \| Store the first index of an element that satisfies the predicate
552	-- findIndex
553	-- :: (Monad m, Num n)
554	-- => (Char -> Bool) -> Producer Text m () -> m (Maybe n)
555	-- findIndex predicate p = P.head (p >-> findIndices predicate)
556	-- {-# INLINABLE findIndex #-}
557	--
558	-- \| Store a tally of how many segments match the given 'Text'	548	-- \| Store a tally of how many segments match the given 'Text'
559	count :: (Monad m, Num n) => Text -> Producer Text m () -> m n	549	count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
560	count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))	550	count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
@@ -581,6 +571,29 @@ decodeUtf8 = go TE.streamDecodeUtf8
581	yield l	571	yield l
582	p'	572	p'
583	{-# INLINEABLE decodeUtf8 #-}	573	{-# INLINEABLE decodeUtf8 #-}
		574
		575	-- \| Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
		576	-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
		577	-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
		578	decodeUtf8With
		579	:: Monad m
		580	=> TE.OnDecodeError
		581	-> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
		582	decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
		583	where go dec p = do
		584	x <- lift (next p)
		585	case x of
		586	Left r -> return (return r)
		587	Right (chunk, p') -> do
		588	let TE.Some text l dec' = dec chunk
		589	if B.null l
		590	then do
		591	yield text
		592	go dec' p'
		593	else return $ do
		594	yield l
		595	p'
		596	{-# INLINEABLE decodeUtf8With #-}
584	#endif	597	#endif
585		598
586	-- \| Splits a 'Producer' after the given number of characters	599	-- \| Splits a 'Producer' after the given number of characters