diff options
author | michaelt <what_is_it_to_do_anything@yahoo.com> | 2013-12-25 22:25:07 -0500 |
---|---|---|
committer | michaelt <what_is_it_to_do_anything@yahoo.com> | 2013-12-25 22:25:07 -0500 |
commit | c9d1c945a4343d756533b85060c35c04be0c8b02 (patch) | |
tree | 0c31ae2e13002c1311ae3cc1e15608750c5a0a9c | |
parent | 8c48280926efffc0ca52a5d9ca796d639d053379 (diff) | |
download | text-pipes-c9d1c945a4343d756533b85060c35c04be0c8b02.tar.gz text-pipes-c9d1c945a4343d756533b85060c35c04be0c8b02.tar.zst text-pipes-c9d1c945a4343d756533b85060c35c04be0c8b02.zip |
scrap character replacement; simplify
-rw-r--r-- | Pipes/Text.hs | 96 | ||||
-rw-r--r-- | Pipes/Text/Internal.hs | 118 | ||||
-rw-r--r-- | test/Test.hs | 60 |
3 files changed, 103 insertions, 171 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs index 6845dd3..d62aee7 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs | |||
@@ -1,4 +1,4 @@ | |||
1 | {-# LANGUAGE RankNTypes, TypeFamilies, CPP #-} | 1 | {-# LANGUAGE RankNTypes, TypeFamilies, NoMonomorphismRestriction #-} |
2 | 2 | ||
3 | {-| This module provides @pipes@ utilities for \"text streams\", which are | 3 | {-| This module provides @pipes@ utilities for \"text streams\", which are |
4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but | 4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but |
@@ -116,7 +116,6 @@ module Pipes.Text ( | |||
116 | lines, | 116 | lines, |
117 | words, | 117 | words, |
118 | decodeUtf8, | 118 | decodeUtf8, |
119 | decodeUtf8With, | ||
120 | -- * Transformations | 119 | -- * Transformations |
121 | intersperse, | 120 | intersperse, |
122 | 121 | ||
@@ -209,7 +208,7 @@ fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) | |||
209 | {-# INLINABLE fromLazy #-} | 208 | {-# INLINABLE fromLazy #-} |
210 | 209 | ||
211 | -- | Stream text from 'stdin' | 210 | -- | Stream text from 'stdin' |
212 | stdin :: MonadIO m => Producer' Text m (Producer ByteString m ()) | 211 | stdin :: MonadIO m => Producer Text m (Producer ByteString m ()) |
213 | stdin = fromHandle IO.stdin | 212 | stdin = fromHandle IO.stdin |
214 | {-# INLINABLE stdin #-} | 213 | {-# INLINABLE stdin #-} |
215 | 214 | ||
@@ -217,52 +216,17 @@ stdin = fromHandle IO.stdin | |||
217 | determined by the good sense of the text library. | 216 | determined by the good sense of the text library. |
218 | -} | 217 | -} |
219 | 218 | ||
220 | fromHandle :: MonadIO m => IO.Handle -> Producer' Text m (Producer ByteString m ()) | 219 | fromHandle :: MonadIO m => IO.Handle -> Producer Text m (Producer ByteString m ()) |
221 | -- TODO: this should perhaps just be `decodeUtf8 (PB.fromHandle h)` | 220 | fromHandle h = decodeUtf8 (PB.fromHandle h) |
222 | -- if only so that mistakes can be concentrated in one place. | ||
223 | -- This modifies something that was faster on an earlier iteration. | ||
224 | -- Note also that the `text` replacement system is being ignored; | ||
225 | -- with a replacement scheme one could have `Producer Text m ()` | ||
226 | -- the relation to the replacement business needs to be thought out. | ||
227 | -- The complicated type seems overmuch for the toy stdin above | ||
228 | fromHandle h = go PE.streamDecodeUtf8 B.empty where | ||
229 | act = B.hGetSome h defaultChunkSize | ||
230 | go dec old = do chunk <- liftIO act | ||
231 | if B.null chunk | ||
232 | then if B.null old then return (return ()) | ||
233 | else return (yield old >> return ()) | ||
234 | else case dec chunk of | ||
235 | PE.Some text bs dec' -> | ||
236 | if T.null text then go dec' (B.append old bs) | ||
237 | else do yield text | ||
238 | go dec' B.empty | ||
239 | PE.Other text bs -> | ||
240 | if T.null text then return (do yield old | ||
241 | yield bs | ||
242 | PB.fromHandle h) | ||
243 | else do yield text | ||
244 | return (do yield bs | ||
245 | PB.fromHandle h) | ||
246 | {-# INLINE fromHandle#-} | 221 | {-# INLINE fromHandle#-} |
247 | -- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as | 222 | |
248 | -- the dedicated Text IO function 'hGetChunk' ; | ||
249 | -- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut" | ||
250 | -- runs the same as the conduit equivalent, only slightly slower | ||
251 | -- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut" | ||
252 | -- #else | ||
253 | -- fromHandle h = go where | ||
254 | -- go = do txt <- liftIO (T.hGetChunk h) | ||
255 | -- unless (T.null txt) $ do yield txt | ||
256 | -- go | ||
257 | -- {-# INLINABLE fromHandle#-} | ||
258 | -- #endif | ||
259 | {-| Stream text from a file using Pipes.Safe | 223 | {-| Stream text from a file using Pipes.Safe |
260 | 224 | ||
261 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout | 225 | >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout |
262 | MAIN = PUTSTRLN "HELLO WORLD" | 226 | MAIN = PUTSTRLN "HELLO WORLD" |
263 | -} | 227 | -} |
264 | 228 | ||
265 | readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m (Producer ByteString m ()) | 229 | readFile :: (MonadSafe m) => FilePath -> Producer Text m (Producer ByteString m ()) |
266 | readFile file = Safe.withFile file IO.ReadMode fromHandle | 230 | readFile file = Safe.withFile file IO.ReadMode fromHandle |
267 | {-# INLINABLE readFile #-} | 231 | {-# INLINABLE readFile #-} |
268 | 232 | ||
@@ -338,7 +302,7 @@ toHandle h = for cat (liftIO . T.hPutStr h) | |||
338 | 302 | ||
339 | 303 | ||
340 | -- | Stream text into a file. Uses @pipes-safe@. | 304 | -- | Stream text into a file. Uses @pipes-safe@. |
341 | writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m () | 305 | writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m () |
342 | writeFile file = Safe.withFile file IO.WriteMode toHandle | 306 | writeFile file = Safe.withFile file IO.WriteMode toHandle |
343 | 307 | ||
344 | -- | Apply a transformation to each 'Char' in the stream | 308 | -- | Apply a transformation to each 'Char' in the stream |
@@ -624,42 +588,18 @@ count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | |||
624 | 588 | ||
625 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded | 589 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded |
626 | -- into a Pipe of Text | 590 | -- into a Pipe of Text |
627 | decodeUtf8 | ||
628 | :: Monad m | ||
629 | => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
630 | decodeUtf8 = decodeUtf8With Nothing | ||
631 | {-# INLINEABLE decodeUtf8 #-} | ||
632 | |||
633 | -- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded | ||
634 | -- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@ | ||
635 | -- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\" | ||
636 | decodeUtf8With | ||
637 | :: Monad m | ||
638 | => Maybe TE.OnDecodeError | ||
639 | -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
640 | decodeUtf8With onErr = go (PE.streamDecodeUtf8With onErr) B.empty where | ||
641 | go dec old p = do | ||
642 | x <- lift (next p) | ||
643 | case x of | ||
644 | Left r -> if B.null old then return (return r) | ||
645 | else return (do yield old | ||
646 | return r) | ||
647 | Right (chunk, p') -> | ||
648 | case dec chunk of | ||
649 | PE.Some text l dec' -> | ||
650 | if T.null text then go dec' (B.append old l) p' | ||
651 | else do yield text | ||
652 | go dec' B.empty p' | ||
653 | PE.Other text bs -> | ||
654 | if T.null text then return (do yield old | ||
655 | yield bs | ||
656 | p') | ||
657 | else do yield text | ||
658 | return (do yield bs | ||
659 | p') | ||
660 | {-# INLINEABLE decodeUtf8With #-} | ||
661 | |||
662 | 591 | ||
592 | decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
593 | decodeUtf8 = go PE.streamDecodeUtf8 where | ||
594 | go dec0 p = do | ||
595 | x <- lift (next p) | ||
596 | case x of Left r -> return (return r) | ||
597 | Right (chunk, p') -> | ||
598 | case dec0 chunk of PE.Some text _ dec -> do yield text | ||
599 | go dec p' | ||
600 | PE.Other text bs -> do yield text | ||
601 | return (do yield bs | ||
602 | p') | ||
663 | 603 | ||
664 | -- | Splits a 'Producer' after the given number of characters | 604 | -- | Splits a 'Producer' after the given number of characters |
665 | splitAt | 605 | splitAt |
diff --git a/Pipes/Text/Internal.hs b/Pipes/Text/Internal.hs index 05d9887..73d6fa4 100644 --- a/Pipes/Text/Internal.hs +++ b/Pipes/Text/Internal.hs | |||
@@ -1,11 +1,10 @@ | |||
1 | {-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, GeneralizedNewtypeDeriving, MagicHash, | 1 | {-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, GeneralizedNewtypeDeriving, MagicHash, |
2 | UnliftedFFITypes #-} | 2 | UnliftedFFITypes #-} |
3 | -- This module lifts material from Brian O'Sullivan's text package | 3 | -- This module lifts assorted materials from Brian O'Sullivan's text package |
4 | -- especially Data.Text.Encoding in order to define a pipes-appropriate | 4 | -- especially Data.Text.Encoding in order to define a pipes-appropriate |
5 | -- streamDecodeUtf8 | 5 | -- streamDecodeUtf8 |
6 | module Pipes.Text.Internal | 6 | module Pipes.Text.Internal |
7 | ( Decoding(..) | 7 | ( Decoding(..) |
8 | , streamDecodeUtf8With | ||
9 | , streamDecodeUtf8 | 8 | , streamDecodeUtf8 |
10 | ) where | 9 | ) where |
11 | 10 | ||
@@ -20,6 +19,7 @@ import Data.Bits ((.&.)) | |||
20 | import Data.ByteString as B | 19 | import Data.ByteString as B |
21 | import Data.ByteString.Internal as B | 20 | import Data.ByteString.Internal as B |
22 | import Data.Text () | 21 | import Data.Text () |
22 | import qualified Data.Text as T | ||
23 | import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode) | 23 | import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode) |
24 | import Data.Text.Internal (Text(..), safe, textP) | 24 | import Data.Text.Internal (Text(..), safe, textP) |
25 | import Data.Word (Word8, Word32) | 25 | import Data.Word (Word8, Word32) |
@@ -56,94 +56,52 @@ instance Show Decoding where | |||
56 | newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable) | 56 | newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable) |
57 | newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable) | 57 | newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable) |
58 | 58 | ||
59 | -- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8 | ||
60 | -- encoded text that is known to be valid. | ||
61 | -- | ||
62 | -- If the input contains any invalid UTF-8 data, an exception will be | ||
63 | -- thrown (either by this function or a continuation) that cannot be | ||
64 | -- caught in pure code. For more control over the handling of invalid | ||
65 | -- data, use 'streamDecodeUtf8With'. | ||
66 | streamDecodeUtf8 :: ByteString -> Decoding | 59 | streamDecodeUtf8 :: ByteString -> Decoding |
67 | streamDecodeUtf8 = streamDecodeUtf8With (Just strictDecode) | 60 | streamDecodeUtf8 = decodeChunk B.empty 0 0 |
68 | 61 | ||
69 | -- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8 | 62 | decodeChunk :: ByteString -> CodePoint -> DecoderState -> ByteString -> Decoding |
70 | -- encoded text. | 63 | decodeChunk old codepoint0 state0 bs@(PS fp off len) = |
71 | streamDecodeUtf8With :: Maybe OnDecodeError -> ByteString -> Decoding | 64 | runST $ do marray <- A.new (len+1) |
72 | streamDecodeUtf8With mErr = case mErr of | 65 | unsafeIOToST (decodeChunkToBuffer marray) |
73 | Nothing -> decodeWith False strictDecode | ||
74 | Just onErr -> decodeWith True onErr | ||
75 | where | ||
76 | -- We create a slightly larger than necessary buffer to accommodate a | ||
77 | -- potential surrogate pair started in the last buffer | ||
78 | decodeWith replace onErr = decodeChunk 0 0 | ||
79 | where | ||
80 | decodeChunk :: CodePoint -> DecoderState -> ByteString -> Decoding | ||
81 | decodeChunk codepoint0 state0 bs@(PS fp off len) = | ||
82 | runST $ (unsafeIOToST . decodeChunkToBuffer) =<< A.new (len+1) | ||
83 | where | 66 | where |
84 | decodeChunkToBuffer :: A.MArray s -> IO Decoding | 67 | decodeChunkToBuffer :: A.MArray s -> IO Decoding |
85 | decodeChunkToBuffer dest = withForeignPtr fp $ \ptr -> | 68 | decodeChunkToBuffer dest = withForeignPtr fp $ \ptr -> |
86 | with (0::CSize) $ \destOffPtr -> | 69 | with (0::CSize) $ \destOffPtr -> |
87 | with codepoint0 $ \codepointPtr -> | 70 | with codepoint0 $ \codepointPtr -> |
88 | with state0 $ \statePtr -> | 71 | with state0 $ \statePtr -> |
89 | with nullPtr $ \curPtrPtr -> | 72 | with nullPtr $ \curPtrPtr -> |
90 | let end = ptr `plusPtr` (off + len) | 73 | do let end = ptr `plusPtr` (off + len) |
91 | loop curPtr = do | 74 | curPtr = ptr `plusPtr` off |
92 | poke curPtrPtr curPtr | 75 | poke curPtrPtr curPtr |
93 | curPtr' <- c_decode_utf8_with_state (A.maBA dest) destOffPtr | 76 | c_decode_utf8_with_state (A.maBA dest) destOffPtr curPtrPtr end codepointPtr statePtr |
94 | curPtrPtr end codepointPtr statePtr | 77 | state <- peek statePtr |
95 | state <- peek statePtr | 78 | lastPtr <- peek curPtrPtr |
96 | case state of | 79 | codepoint <- peek codepointPtr |
97 | UTF8_REJECT -> | 80 | n <- peek destOffPtr |
98 | -- We encountered an encoding error | 81 | chunkText <- mkText dest n |
99 | if replace | 82 | let left = lastPtr `minusPtr` curPtr |
100 | then do | 83 | remaining = B.drop left bs |
101 | x <- peek curPtr' | 84 | accum = if T.null chunkText then B.append old remaining else remaining |
102 | case onErr desc (Just x) of | 85 | return $ case state of |
103 | Nothing -> loop $ curPtr' `plusPtr` 1 | 86 | UTF8_REJECT -> Other chunkText accum -- We encountered an encoding error |
104 | Just c -> do | 87 | _ -> Some chunkText accum (decodeChunk accum codepoint state) |
105 | destOff <- peek destOffPtr | 88 | |
106 | w <- unsafeSTToIO $ | 89 | |
107 | unsafeWrite dest (fromIntegral destOff) (safe c) | 90 | mkText :: A.MArray s -> CSize -> IO Text |
108 | poke destOffPtr (destOff + fromIntegral w) | 91 | mkText dest n = unsafeSTToIO $ do arr <- A.unsafeFreeze dest |
109 | poke statePtr 0 | 92 | return $! textP arr 0 (fromIntegral n) |
110 | loop $ curPtr' `plusPtr` 1 | ||
111 | else do | ||
112 | n <- peek destOffPtr | ||
113 | chunkText <- unsafeSTToIO $ do | ||
114 | arr <- A.unsafeFreeze dest | ||
115 | return $! textP arr 0 (fromIntegral n) | ||
116 | lastPtr <- peek curPtrPtr | ||
117 | let left = lastPtr `minusPtr` curPtr | ||
118 | return $ Other chunkText (B.drop left bs) | ||
119 | _ -> do | ||
120 | -- We encountered the end of the buffer while decoding | ||
121 | n <- peek destOffPtr | ||
122 | codepoint <- peek codepointPtr | ||
123 | chunkText <- unsafeSTToIO $ do | ||
124 | arr <- A.unsafeFreeze dest | ||
125 | return $! textP arr 0 (fromIntegral n) | ||
126 | lastPtr <- peek curPtrPtr | ||
127 | let left = lastPtr `minusPtr` curPtr | ||
128 | return $ Some chunkText (B.drop left bs) | ||
129 | (decodeChunk codepoint state) | ||
130 | in loop (ptr `plusPtr` off) | ||
131 | desc = "Data.Text.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream" | ||
132 | 93 | ||
133 | ord :: Char -> Int | 94 | ord :: Char -> Int |
134 | ord (C# c#) = I# (ord# c#) | 95 | ord (C# c#) = I# (ord# c#) |
135 | {-# INLINE ord #-} | 96 | {-# INLINE ord #-} |
136 | 97 | ||
137 | |||
138 | unsafeWrite :: A.MArray s -> Int -> Char -> ST s Int | 98 | unsafeWrite :: A.MArray s -> Int -> Char -> ST s Int |
139 | unsafeWrite marr i c | 99 | unsafeWrite marr i c |
140 | | n < 0x10000 = do | 100 | | n < 0x10000 = do A.unsafeWrite marr i (fromIntegral n) |
141 | A.unsafeWrite marr i (fromIntegral n) | 101 | return 1 |
142 | return 1 | 102 | | otherwise = do A.unsafeWrite marr i lo |
143 | | otherwise = do | 103 | A.unsafeWrite marr (i+1) hi |
144 | A.unsafeWrite marr i lo | 104 | return 2 |
145 | A.unsafeWrite marr (i+1) hi | ||
146 | return 2 | ||
147 | where n = ord c | 105 | where n = ord c |
148 | m = n - 0x10000 | 106 | m = n - 0x10000 |
149 | lo = fromIntegral $ (m `shiftR` 10) + 0xD800 | 107 | lo = fromIntegral $ (m `shiftR` 10) + 0xD800 |
diff --git a/test/Test.hs b/test/Test.hs index 1579f2b..66351d1 100644 --- a/test/Test.hs +++ b/test/Test.hs | |||
@@ -8,6 +8,7 @@ import Test.Framework.Providers.QuickCheck2 (testProperty) | |||
8 | import Control.Exception (catch) | 8 | import Control.Exception (catch) |
9 | import Data.Char (chr, isDigit, isHexDigit, isLower, isSpace, isUpper, ord) | 9 | import Data.Char (chr, isDigit, isHexDigit, isLower, isSpace, isUpper, ord) |
10 | import Data.Monoid (Monoid(..)) | 10 | import Data.Monoid (Monoid(..)) |
11 | import Control.Monad | ||
11 | import Data.String (fromString) | 12 | import Data.String (fromString) |
12 | import Data.Text.Encoding.Error | 13 | import Data.Text.Encoding.Error |
13 | import qualified Data.List as L | 14 | import qualified Data.List as L |
@@ -19,15 +20,20 @@ import qualified Data.Text as T | |||
19 | import qualified Data.Text.Lazy as TL | 20 | import qualified Data.Text.Lazy as TL |
20 | import qualified Data.Text.Encoding as E | 21 | import qualified Data.Text.Encoding as E |
21 | import qualified Pipes.Text.Internal as PE | 22 | import qualified Pipes.Text.Internal as PE |
23 | import qualified Pipes.Text as TP | ||
24 | import qualified Pipes.ByteString as BP | ||
25 | import qualified Pipes as P | ||
22 | 26 | ||
27 | |||
28 | import Debug.Trace | ||
23 | main :: IO () | 29 | main :: IO () |
24 | main = defaultMain [tests] | 30 | main = defaultMain [tests] |
25 | -- >>> :main -a 10000 | 31 | -- >>> :main -a 10000 |
26 | 32 | ||
27 | tests = testGroup "stream_decode" [ | 33 | tests = testGroup "stream_decode" [ |
28 | 34 | -- testProperty "t_utf8_incr_valid" t_utf8_incr_valid, | |
29 | testProperty "t_utf8_incr_valid" t_utf8_incr_valid, | 35 | testProperty "t_utf8_incr_mixed" t_utf8_incr_mixed, |
30 | testProperty "t_utf8_incr_mixed" t_utf8_incr_mixed] | 36 | testProperty "t_utf8_incr_pipe" t_utf8_incr_pipe] |
31 | 37 | ||
32 | t_utf8_incr_valid = do | 38 | t_utf8_incr_valid = do |
33 | Positive n <- arbitrary | 39 | Positive n <- arbitrary |
@@ -43,18 +49,46 @@ t_utf8_incr_valid = do | |||
43 | PE.Some t _ f' -> t : feedChunksOf n f' b | 49 | PE.Some t _ f' -> t : feedChunksOf n f' b |
44 | _ -> [] | 50 | _ -> [] |
45 | 51 | ||
46 | t_utf8_incr_mixed = do | 52 | t_utf8_incr_mixed = do |
47 | Positive n <- arbitrary | 53 | Positive n <- arbitrary |
48 | txt <- genUnicode | 54 | txt <- genUnicode |
49 | forAll (vector 9) $ (roundtrip . chunk (mod n 7 + 1) . appendBytes txt) `eq` appendBytes txt | 55 | let chunkSize = mod n 7 + 1 |
56 | forAll (vector 9) $ | ||
57 | (roundtrip . chunk chunkSize . appendBytes txt) `eq` (appendBytes txt) | ||
50 | where | 58 | where |
51 | roundtrip :: [B.ByteString] -> B.ByteString | 59 | roundtrip :: [B.ByteString] -> B.ByteString |
52 | roundtrip bss = go (PE.streamDecodeUtf8With Nothing) B.empty B.empty bss where | 60 | roundtrip bss = go PE.streamDecodeUtf8 B.empty bss where |
53 | go dec acc old [] = acc <> old | 61 | go dec acc [] = acc |
54 | go dec acc old (bs:bss) = case dec bs of | 62 | go dec acc [bs] = case dec bs of |
55 | PE.Some t new dec' -> if T.null t then go dec' (acc <> E.encodeUtf8 t) (old <> new) bss | 63 | PE.Some t l dec' -> acc <> E.encodeUtf8 t <> l |
56 | else go dec' (acc <> E.encodeUtf8 t) new bss | 64 | PE.Other t bs' -> acc <> E.encodeUtf8 t <> bs' |
57 | PE.Other t bs' -> if T.null t then acc <> old <> bs <> B.concat bss | 65 | go dec acc (bs:bss) = case dec bs of |
58 | else acc <> E.encodeUtf8 t <> bs' <> B.concat bss | 66 | PE.Some t l dec' -> go dec' (acc <> E.encodeUtf8 t) bss |
67 | PE.Other t bs' -> acc <> E.encodeUtf8 t <> bs' <> B.concat bss | ||
68 | chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b | ||
69 | appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append | ||
70 | |||
71 | |||
72 | |||
73 | |||
74 | t_utf8_incr_pipe = do | ||
75 | Positive m <- arbitrary | ||
76 | Positive n <- arbitrary | ||
77 | txt <- genUnicode | ||
78 | let chunkSize = mod n 7 + 1 | ||
79 | bytesLength = mod 20 m | ||
80 | forAll (vector bytesLength) $ | ||
81 | (BL.toStrict . BP.toLazy . roundtrip . P.each . chunk chunkSize . appendBytes txt) | ||
82 | `eq` | ||
83 | appendBytes txt | ||
84 | where | ||
85 | roundtrip :: Monad m => P.Producer B.ByteString m r -> P.Producer B.ByteString m r | ||
86 | roundtrip p = do pbs <- TP.decodeUtf8 p P.>-> TP.encodeUtf8 | ||
87 | pbs | ||
59 | chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b | 88 | chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b |
60 | appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append | 89 | appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append |
90 | |||
91 | |||
92 | |||
93 | |||
94 | |||