aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2014-01-25 21:42:54 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2014-01-25 21:42:54 -0500
commit64e03122e6ecc4898cb1b193cdcf3b26d3e71b14 (patch)
tree63b707950efcd92db00ac6979b792b9f30627e06
parent7ded3267a3b62ff896ea22262549f9511273c45f (diff)
downloadtext-pipes-64e03122e6ecc4898cb1b193cdcf3b26d3e71b14.tar.gz
text-pipes-64e03122e6ecc4898cb1b193cdcf3b26d3e71b14.tar.zst
text-pipes-64e03122e6ecc4898cb1b193cdcf3b26d3e71b14.zip
renamed fold foldChars and began updating documentation
-rw-r--r--Pipes/Text.hs74
-rw-r--r--Pipes/Text/Internal.hs264
-rw-r--r--Pipes/Text/Parse.hs18
-rw-r--r--pipes-text.cabal4
-rw-r--r--test/Test.hs17
5 files changed, 318 insertions, 59 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index cf493e9..99e4ed6 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -1,9 +1,12 @@
1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-} 1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-}
2 2#if __GLASGOW_HASKELL__ >= 702
3{-# LANGUAGE Trustworthy #-}
4#endif
3{-| This module provides @pipes@ utilities for \"text streams\", which are 5{-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but 6 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
5 a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can 7 a 'Producer' can be converted to and from lazy 'Text's, though this is generally
6 be associated with a 'Producer' or 'Consumer' according as it is read or written to. 8 unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
9 An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
7 10
8 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For 11 To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
9 example, the following program copies a document from one file to another: 12 example, the following program copies a document from one file to another:
@@ -52,9 +55,9 @@ To stream from files, the following is perhaps more Prelude-like (note that it u
52 55
53 Note that functions in this library are designed to operate on streams that 56 Note that functions in this library are designed to operate on streams that
54 are insensitive to text boundaries. This means that they may freely split 57 are insensitive to text boundaries. This means that they may freely split
55 text into smaller texts and /discard empty texts/. However, they will 58 text into smaller texts, /discard empty texts/. However, apart from the
56 /never concatenate texts/ in order to provide strict upper bounds on memory 59 special case of 'concatMap', they will /never concatenate texts/ in order
57 usage. 60 to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
58-} 61-}
59 62
60module Pipes.Text ( 63module Pipes.Text (
@@ -91,7 +94,7 @@ module Pipes.Text (
91 -- * Folds 94 -- * Folds
92 toLazy, 95 toLazy,
93 toLazyM, 96 toLazyM,
94 fold, 97 foldChars,
95 head, 98 head,
96 last, 99 last,
97 null, 100 null,
@@ -116,6 +119,7 @@ module Pipes.Text (
116 lines, 119 lines,
117 words, 120 words,
118 decodeUtf8, 121 decodeUtf8,
122 decode,
119 -- * Transformations 123 -- * Transformations
120 intersperse, 124 intersperse,
121 125
@@ -139,7 +143,7 @@ module Pipes.Text (
139 ) where 143 ) where
140 144
141import Control.Exception (throwIO, try) 145import Control.Exception (throwIO, try)
142import Control.Monad (liftM, unless) 146import Control.Monad (liftM, unless, join)
143import Control.Monad.Trans.State.Strict (StateT(..)) 147import Control.Monad.Trans.State.Strict (StateT(..))
144import Data.Monoid ((<>)) 148import Data.Monoid ((<>))
145import qualified Data.Text as T 149import qualified Data.Text as T
@@ -160,13 +164,14 @@ import Foreign.C.Error (Errno(Errno), ePIPE)
160import qualified GHC.IO.Exception as G 164import qualified GHC.IO.Exception as G
161import Pipes 165import Pipes
162import qualified Pipes.ByteString as PB 166import qualified Pipes.ByteString as PB
163import qualified Pipes.ByteString.Parse as PBP 167import qualified Pipes.ByteString as PBP
164import qualified Pipes.Text.Internal as PE 168import qualified Pipes.Text.Internal as PE
169import Pipes.Text.Internal (Codec(..))
165import Pipes.Text.Parse ( 170import Pipes.Text.Parse (
166 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) 171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
167import Pipes.Core (respond, Server') 172import Pipes.Core (respond, Server')
168import qualified Pipes.Parse as PP 173import qualified Pipes.Parse as PP
169import Pipes.Parse (input, concat, FreeT) 174import Pipes.Parse ( FreeT)
170import qualified Pipes.Safe.Prelude as Safe 175import qualified Pipes.Safe.Prelude as Safe
171import qualified Pipes.Safe as Safe 176import qualified Pipes.Safe as Safe
172import Pipes.Safe (MonadSafe(..), Base(..)) 177import Pipes.Safe (MonadSafe(..), Base(..))
@@ -499,10 +504,10 @@ toLazyM = liftM TL.fromChunks . P.toListM
499{-# INLINABLE toLazyM #-} 504{-# INLINABLE toLazyM #-}
500 505
501-- | Reduce the text stream using a strict left fold over characters 506-- | Reduce the text stream using a strict left fold over characters
502fold 507foldChars
503 :: Monad m 508 :: Monad m
504 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r 509 => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
505fold step begin done = P.fold (T.foldl' step) begin done 510foldChars step begin done = P.fold (T.foldl' step) begin done
506{-# INLINABLE fold #-} 511{-# INLINABLE fold #-}
507 512
508-- | Retrieve the first 'Char' 513-- | Retrieve the first 'Char'
@@ -879,4 +884,45 @@ unwords = intercalate (yield $ T.pack " ")
879 @Data.Text@ re-exports the 'Text' type. 884 @Data.Text@ re-exports the 'Text' type.
880 885
881 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type). 886 @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
882-} \ No newline at end of file 887-}
888
889
890
891decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
892-- decode codec = go B.empty where
893-- go extra p0 =
894-- do x <- lift (next p0)
895-- case x of Right (chunk, p) ->
896-- do let (text, stuff) = codecDecode codec (B.append extra chunk)
897-- yield text
898-- case stuff of Right extra' -> go extra' p
899-- Left (exc,bs) -> do yield text
900-- return (do yield bs
901-- p)
902-- Left r -> return (do yield extra
903-- return r)
904
905decode d p0 = case d of
906 PE.Other txt bad -> do yield txt
907 return (do yield bad
908 p0)
909 PE.Some txt extra dec -> do yield txt
910 x <- lift (next p0)
911 case x of Left r -> return (do yield extra
912 return r)
913 Right (chunk,p1) -> decode (dec chunk) p1
914
915-- go !carry dec0 p = do
916-- x <- lift (next p)
917-- case x of Left r -> if B.null carry
918-- then return (return r) -- all bytestrinput was consumed
919-- else return (do yield carry -- a potentially valid fragment remains
920-- return r)
921--
922-- Right (chunk, p') -> case dec0 chunk of
923-- PE.Some text carry2 dec -> do yield text
924-- go carry2 dec p'
925-- PE.Other text bs -> do yield text
926-- return (do yield bs -- an invalid blob remains
927-- p')
928-- {-# INLINABLE decodeUtf8 #-}
diff --git a/Pipes/Text/Internal.hs b/Pipes/Text/Internal.hs
index 7e5b044..76c2f4f 100644
--- a/Pipes/Text/Internal.hs
+++ b/Pipes/Text/Internal.hs
@@ -1,5 +1,7 @@
1{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, GeneralizedNewtypeDeriving, MagicHash, 1{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface #-}
2 UnliftedFFITypes #-} 2{-# LANGUAGE GeneralizedNewtypeDeriving, MagicHash, UnliftedFFITypes #-}
3{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
4
3-- This module lifts assorted materials from Brian O'Sullivan's text package 5-- This module lifts assorted materials from Brian O'Sullivan's text package
4-- especially Data.Text.Encoding in order to define a pipes-appropriate 6-- especially Data.Text.Encoding in order to define a pipes-appropriate
5-- streamDecodeUtf8 7-- streamDecodeUtf8
@@ -7,13 +9,20 @@ module Pipes.Text.Internal
7 ( Decoding(..) 9 ( Decoding(..)
8 , streamDecodeUtf8 10 , streamDecodeUtf8
9 , decodeSomeUtf8 11 , decodeSomeUtf8
12 , Codec(..)
13 , TextException(..)
14 , utf8
10 ) where 15 ) where
11import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO) 16import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
12import Control.Monad.ST (ST, runST) 17import Control.Monad.ST (ST, runST)
13import Data.Bits ((.&.)) 18import Data.Bits ((.&.))
14import Data.ByteString as B 19import Data.ByteString as B
20import Data.ByteString (ByteString)
15import Data.ByteString.Internal as B 21import Data.ByteString.Internal as B
16import qualified Data.Text as T (null) 22import Data.ByteString.Char8 as B8
23import Data.Text (Text)
24import qualified Data.Text as T
25import qualified Data.Text.Encoding as TE
17import Data.Text.Encoding.Error () 26import Data.Text.Encoding.Error ()
18import Data.Text.Internal (Text, textP) 27import Data.Text.Internal (Text, textP)
19import Foreign.C.Types (CSize) 28import Foreign.C.Types (CSize)
@@ -24,9 +33,226 @@ import Foreign.Storable (Storable, peek, poke)
24import GHC.Base (Char(..), Int(..), MutableByteArray#, ord#, iShiftRA#) 33import GHC.Base (Char(..), Int(..), MutableByteArray#, ord#, iShiftRA#)
25import GHC.Word (Word8, Word32) 34import GHC.Word (Word8, Word32)
26import qualified Data.Text.Array as A 35import qualified Data.Text.Array as A
27 36import Data.Word (Word8, Word16)
37import System.IO.Unsafe (unsafePerformIO)
38import qualified Control.Exception as Exc
39import Data.Bits ((.&.), (.|.), shiftL)
40import Data.Typeable
41import Control.Arrow (first)
42import Data.Maybe (catMaybes)
28#include "pipes_text_cbits.h" 43#include "pipes_text_cbits.h"
29 44
45
46-- | A specific character encoding.
47--
48-- Since 0.3.0
49data Codec = Codec
50 { codecName :: Text
51 , codecEncode :: Text -> (ByteString, Maybe (TextException, Text))
52 , codecDecode :: ByteString -> Decoding -- (Text, Either (TextException, ByteString) ByteString)
53 }
54
55instance Show Codec where
56 showsPrec d c = showParen (d > 10) $ showString "Codec " . shows (codecName c)
57
58-- Since 0.3.0
59data TextException = DecodeException Codec Word8
60 | EncodeException Codec Char
61 | LengthExceeded Int
62 | TextException Exc.SomeException
63 deriving (Show, Typeable)
64instance Exc.Exception TextException
65
66toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
67 -> (ByteString -> Decoding)
68toDecoding op = loop B.empty where
69 loop extra bs0 = case op (B.append extra bs0) of
70 (txt, Right bs) -> Some txt bs (loop bs)
71 (txt, Left (_,bs)) -> Other txt bs
72
73
74splitSlowly :: (ByteString -> Text)
75 -> ByteString
76 -> (Text, Either (TextException, ByteString) ByteString)
77splitSlowly dec bytes = valid where
78 valid:_ = catMaybes $ Prelude.map decFirst $ splits (B.length bytes)
79 splits 0 = [(B.empty, bytes)]
80 splits n = B.splitAt n bytes : splits (n - 1)
81 decFirst (a, b) = case tryEvaluate (dec a) of
82 Left _ -> Nothing
83 Right text -> let trouble = case tryEvaluate (dec b) of
84 Left exc -> Left (TextException exc, b)
85 Right _ -> Right B.empty
86 in Just (text, trouble)
87 -- this case shouldn't occur,
88 -- since splitSlowly is only called
89 -- when parsing failed somewhere
90
91utf8 :: Codec
92utf8 = Codec name enc (toDecoding dec) where
93 name = T.pack "UTF-8"
94 enc text = (TE.encodeUtf8 text, Nothing)
95 dec bytes = case decodeSomeUtf8 bytes of
96 (t,b) -> (t, Right b)
97
98-- -- Whether the given byte is a continuation byte.
99-- isContinuation byte = byte .&. 0xC0 == 0x80
100--
101-- -- The number of continuation bytes needed by the given
102-- -- non-continuation byte. Returns -1 for an illegal UTF-8
103-- -- non-continuation byte and the whole split quickly must fail so
104-- -- as the input is passed to TE.decodeUtf8, which will issue a
105-- -- suitable error.
106-- required x0
107-- | x0 .&. 0x80 == 0x00 = 0
108-- | x0 .&. 0xE0 == 0xC0 = 1
109-- | x0 .&. 0xF0 == 0xE0 = 2
110-- | x0 .&. 0xF8 == 0xF0 = 3
111-- | otherwise = -1
112--
113-- splitQuickly bytes
114-- | B.null l || req == -1 = Nothing
115-- | req == B.length r = Just (TE.decodeUtf8 bytes, B.empty)
116-- | otherwise = Just (TE.decodeUtf8 l', r')
117-- where
118-- (l, r) = B.spanEnd isContinuation bytes
119-- req = required (B.last l)
120-- l' = B.init l
121-- r' = B.cons (B.last l) r
122
123-- |
124-- Since 0.3.0
125utf16_le :: Codec
126utf16_le = Codec name enc (toDecoding dec) where
127 name = T.pack "UTF-16-LE"
128 enc text = (TE.encodeUtf16LE text, Nothing)
129 dec bytes = case splitQuickly bytes of
130 Just (text, extra) -> (text, Right extra)
131 Nothing -> splitSlowly TE.decodeUtf16LE bytes
132
133 splitQuickly bytes = maybeDecode (loop 0) where
134 maxN = B.length bytes
135
136 loop n | n == maxN = decodeAll
137 | (n + 1) == maxN = decodeTo n
138 loop n = let
139 req = utf16Required
140 (B.index bytes n)
141 (B.index bytes (n + 1))
142 decodeMore = loop $! n + req
143 in if n + req > maxN
144 then decodeTo n
145 else decodeMore
146
147 decodeTo n = first TE.decodeUtf16LE (B.splitAt n bytes)
148 decodeAll = (TE.decodeUtf16LE bytes, B.empty)
149
150-- |
151-- Since 0.3.0
152utf16_be :: Codec
153utf16_be = Codec name enc (toDecoding dec) where
154 name = T.pack "UTF-16-BE"
155 enc text = (TE.encodeUtf16BE text, Nothing)
156 dec bytes = case splitQuickly bytes of
157 Just (text, extra) -> (text, Right extra)
158 Nothing -> splitSlowly TE.decodeUtf16BE bytes
159
160 splitQuickly bytes = maybeDecode (loop 0) where
161 maxN = B.length bytes
162
163 loop n | n == maxN = decodeAll
164 | (n + 1) == maxN = decodeTo n
165 loop n = let
166 req = utf16Required
167 (B.index bytes (n + 1))
168 (B.index bytes n)
169 decodeMore = loop $! n + req
170 in if n + req > maxN
171 then decodeTo n
172 else decodeMore
173
174 decodeTo n = first TE.decodeUtf16BE (B.splitAt n bytes)
175 decodeAll = (TE.decodeUtf16BE bytes, B.empty)
176
177utf16Required :: Word8 -> Word8 -> Int
178utf16Required x0 x1 = if x >= 0xD800 && x <= 0xDBFF then 4 else 2 where
179 x :: Word16
180 x = (fromIntegral x1 `shiftL` 8) .|. fromIntegral x0
181
182-- |
183-- Since 0.3.0
184utf32_le :: Codec
185utf32_le = Codec name enc (toDecoding dec) where
186 name = T.pack "UTF-32-LE"
187 enc text = (TE.encodeUtf32LE text, Nothing)
188 dec bs = case utf32SplitBytes TE.decodeUtf32LE bs of
189 Just (text, extra) -> (text, Right extra)
190 Nothing -> splitSlowly TE.decodeUtf32LE bs
191
192-- |
193-- Since 0.3.0
194utf32_be :: Codec
195utf32_be = Codec name enc (toDecoding dec) where
196 name = T.pack "UTF-32-BE"
197 enc text = (TE.encodeUtf32BE text, Nothing)
198 dec bs = case utf32SplitBytes TE.decodeUtf32BE bs of
199 Just (text, extra) -> (text, Right extra)
200 Nothing -> splitSlowly TE.decodeUtf32BE bs
201
202utf32SplitBytes :: (ByteString -> Text)
203 -> ByteString
204 -> Maybe (Text, ByteString)
205utf32SplitBytes dec bytes = split where
206 split = maybeDecode (dec toDecode, extra)
207 len = B.length bytes
208 lenExtra = mod len 4
209
210 lenToDecode = len - lenExtra
211 (toDecode, extra) = if lenExtra == 0
212 then (bytes, B.empty)
213 else B.splitAt lenToDecode bytes
214
215-- |
216-- Since 0.3.0
217ascii :: Codec
218ascii = Codec name enc (toDecoding dec) where
219 name = T.pack "ASCII"
220 enc text = (bytes, extra) where
221 (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
222 bytes = B8.pack (T.unpack safe)
223 extra = if T.null unsafe
224 then Nothing
225 else Just (EncodeException ascii (T.head unsafe), unsafe)
226
227 dec bytes = (text, extra) where
228 (safe, unsafe) = B.span (<= 0x7F) bytes
229 text = T.pack (B8.unpack safe)
230 extra = if B.null unsafe
231 then Right B.empty
232 else Left (DecodeException ascii (B.head unsafe), unsafe)
233
234-- |
235-- Since 0.3.0
236iso8859_1 :: Codec
237iso8859_1 = Codec name enc (toDecoding dec) where
238 name = T.pack "ISO-8859-1"
239 enc text = (bytes, extra) where
240 (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
241 bytes = B8.pack (T.unpack safe)
242 extra = if T.null unsafe
243 then Nothing
244 else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
245
246 dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
247
248tryEvaluate :: a -> Either Exc.SomeException a
249tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate
250
251maybeDecode :: (a, b) -> Maybe (a, b)
252maybeDecode (a, b) = case tryEvaluate a of
253 Left _ -> Nothing
254 Right _ -> Just (a, b)
255
30-- | A stream oriented decoding result. 256-- | A stream oriented decoding result.
31data Decoding = Some Text ByteString (ByteString -> Decoding) 257data Decoding = Some Text ByteString (ByteString -> Decoding)
32 | Other Text ByteString 258 | Other Text ByteString
@@ -103,36 +329,6 @@ decodeSomeUtf8 bs@(PS fp off len) = runST $ do
103 return $! (chunkText, remaining) 329 return $! (chunkText, remaining)
104{-# INLINE decodeSomeUtf8 #-} 330{-# INLINE decodeSomeUtf8 #-}
105 331
106-- decodeSomeUtf8 :: ByteString -> (Text, ByteString)
107-- decodeSomeUtf8 bs@(PS fp off len) =
108-- runST $ do marray <- A.new (len+1)
109-- unsafeIOToST (decodeChunkToBuffer marray)
110--
111-- where
112-- decodeChunkToBuffer :: A.MArray s -> IO (Text, ByteString)
113-- decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
114-- with (0::CSize) $ \destOffPtr ->
115-- with (0::CodePoint) $ \codepointPtr ->
116-- with (0::DecoderState) $ \statePtr ->
117-- with nullPtr $ \curPtrPtr ->
118-- do let end = ptr `plusPtr` (off + len)
119-- curPtr = ptr `plusPtr` off
120-- poke curPtrPtr curPtr
121-- c_decode_utf8_with_state (A.maBA dest) destOffPtr curPtrPtr end codepointPtr statePtr
122-- state <- peek statePtr
123-- lastPtr <- peek curPtrPtr
124-- codepoint <- peek codepointPtr
125-- n <- peek destOffPtr
126-- chunkText <- unsafeSTToIO $ do arr <- A.unsafeFreeze dest
127-- return $! textP arr 0 (fromIntegral n)
128-- let left = lastPtr `minusPtr` curPtr
129-- remaining = B.drop left bs
130-- return $! (chunkText, remaining)
131-- {-# INLINE decodeChunkToBuffer #-}
132-- {-# INLINE decodeSomeUtf8 #-}
133
134
135
136mkText :: A.MArray s -> CSize -> IO Text 332mkText :: A.MArray s -> CSize -> IO Text
137mkText dest n = unsafeSTToIO $ do arr <- A.unsafeFreeze dest 333mkText dest n = unsafeSTToIO $ do arr <- A.unsafeFreeze dest
138 return $! textP arr 0 (fromIntegral n) 334 return $! textP arr 0 (fromIntegral n)
diff --git a/Pipes/Text/Parse.hs b/Pipes/Text/Parse.hs
index ed0afa1..9cabaa6 100644
--- a/Pipes/Text/Parse.hs
+++ b/Pipes/Text/Parse.hs
@@ -44,16 +44,16 @@ nextChar = go
44{-| Draw one 'Char' from the underlying 'Producer', returning 'Left' if the 44{-| Draw one 'Char' from the underlying 'Producer', returning 'Left' if the
45 'Producer' is empty 45 'Producer' is empty
46-} 46-}
47drawChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char) 47drawChar :: (Monad m) => StateT (Producer Text m r) m (Maybe Char)
48drawChar = do 48drawChar = do
49 x <- PP.draw 49 x <- PP.draw
50 case x of 50 case x of
51 Left r -> return (Left r) 51 Nothing -> return Nothing
52 Right txt -> case (T.uncons txt) of 52 Just txt -> case (T.uncons txt) of
53 Nothing -> drawChar 53 Nothing -> drawChar
54 Just (c, txt') -> do 54 Just (c, txt') -> do
55 PP.unDraw txt' 55 PP.unDraw txt'
56 return (Right c) 56 return (Just c)
57{-# INLINABLE drawChar #-} 57{-# INLINABLE drawChar #-}
58 58
59-- | Push back a 'Char' onto the underlying 'Producer' 59-- | Push back a 'Char' onto the underlying 'Producer'
@@ -71,12 +71,12 @@ unDrawChar c = modify (yield (T.singleton c) >>)
71> Right c -> unDrawChar c 71> Right c -> unDrawChar c
72> return x 72> return x
73-} 73-}
74peekChar :: (Monad m) => StateT (Producer Text m r) m (Either r Char) 74peekChar :: (Monad m) => StateT (Producer Text m r) m (Maybe Char)
75peekChar = do 75peekChar = do
76 x <- drawChar 76 x <- drawChar
77 case x of 77 case x of
78 Left _ -> return () 78 Nothing -> return ()
79 Right c -> unDrawChar c 79 Just c -> unDrawChar c
80 return x 80 return x
81{-# INLINABLE peekChar #-} 81{-# INLINABLE peekChar #-}
82 82
@@ -91,8 +91,8 @@ isEndOfChars :: (Monad m) => StateT (Producer Text m r) m Bool
91isEndOfChars = do 91isEndOfChars = do
92 x <- peekChar 92 x <- peekChar
93 return (case x of 93 return (case x of
94 Left _ -> True 94 Nothing -> True
95 Right _ -> False ) 95 Just _-> False )
96{-# INLINABLE isEndOfChars #-} 96{-# INLINABLE isEndOfChars #-}
97 97
98{-| @(take n)@ only allows @n@ characters to pass 98{-| @(take n)@ only allows @n@ characters to pass
diff --git a/pipes-text.cabal b/pipes-text.cabal
index b4388be..4e77d1b 100644
--- a/pipes-text.cabal
+++ b/pipes-text.cabal
@@ -20,9 +20,9 @@ library
20 build-depends: base >= 4 && < 5 , 20 build-depends: base >= 4 && < 5 ,
21 transformers >= 0.2.0.0 && < 0.4, 21 transformers >= 0.2.0.0 && < 0.4,
22 pipes >=4.0 && < 4.2, 22 pipes >=4.0 && < 4.2,
23 pipes-parse >=2.0 && < 2.2, 23 pipes-parse >=2.0 && < 3.1,
24 pipes-safe, 24 pipes-safe,
25 pipes-bytestring >= 1.0 && < 1.2, 25 pipes-bytestring >= 1.0 && < 2.1,
26 transformers >= 0.3 && < 0.4, 26 transformers >= 0.3 && < 0.4,
27 text >=0.11 && < 0.12, 27 text >=0.11 && < 0.12,
28 bytestring >=0.10 && < 0.11, 28 bytestring >=0.10 && < 0.11,
diff --git a/test/Test.hs b/test/Test.hs
index 373bafb..7832f76 100644
--- a/test/Test.hs
+++ b/test/Test.hs
@@ -31,6 +31,7 @@ tests = testGroup "stream_decode" [
31 -- testProperty "t_utf8_incr_valid" t_utf8_incr_valid, 31 -- testProperty "t_utf8_incr_valid" t_utf8_incr_valid,
32 testProperty "t_utf8_incr_mixed" t_utf8_incr_mixed , 32 testProperty "t_utf8_incr_mixed" t_utf8_incr_mixed ,
33 testProperty "t_utf8_incr_pipe" t_utf8_incr_pipe, 33 testProperty "t_utf8_incr_pipe" t_utf8_incr_pipe,
34 testProperty "t_utf8_incr_decoding" t_utf8_incr_decoding,
34 testProperty "t_utf8_dec_some" t_utf8_dec_some] 35 testProperty "t_utf8_dec_some" t_utf8_dec_some]
35 36
36t_utf8_incr_valid = do 37t_utf8_incr_valid = do
@@ -83,6 +84,22 @@ t_utf8_incr_pipe = do
83 appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append 84 appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append
84 85
85-- 86--
87t_utf8_incr_decoding = do
88 Positive m <- arbitrary
89 Positive n <- arbitrary
90 txt <- genUnicode
91 let chunkSize = mod n 7 + 1
92 bytesLength = mod 10 m
93 forAll (vector bytesLength) $
94 (BL.toStrict . BP.toLazy . roundtrip . P.each . chunk chunkSize . appendBytes txt)
95 `eq`
96 appendBytes txt
97 where
98 roundtrip :: Monad m => P.Producer B.ByteString m r -> P.Producer B.ByteString m r
99 roundtrip p = join (TP.decode utf8_start p P.>-> TP.encodeUtf8)
100 chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b
101 appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append
102 utf8_start = PE.Some T.empty B.empty (PE.codecDecode PE.utf8)
86t_utf8_dec_some = do 103t_utf8_dec_some = do
87 Positive m <- arbitrary 104 Positive m <- arbitrary
88 txt <- genUnicode 105 txt <- genUnicode