From 8c48280926efffc0ca52a5d9ca796d639d053379 Mon Sep 17 00:00:00 2001 From: michaelt Date: Mon, 23 Dec 2013 13:02:49 -0500 Subject: variant using text internals in place of text streamDecodeUtf8 --- test/Test.hs | 60 ++++++++++++++++++++++++++++++++ test/Utils.hs | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 test/Test.hs create mode 100644 test/Utils.hs (limited to 'test') diff --git a/test/Test.hs b/test/Test.hs new file mode 100644 index 0000000..1579f2b --- /dev/null +++ b/test/Test.hs @@ -0,0 +1,60 @@ +import Utils + +import Test.QuickCheck +import Test.QuickCheck.Monadic +import Test.Framework (Test, testGroup, defaultMain) +import Test.Framework.Providers.QuickCheck2 (testProperty) + +import Control.Exception (catch) +import Data.Char (chr, isDigit, isHexDigit, isLower, isSpace, isUpper, ord) +import Data.Monoid (Monoid(..)) +import Data.String (fromString) +import Data.Text.Encoding.Error +import qualified Data.List as L + +import qualified Data.Bits as Bits (shiftL, shiftR) +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Encoding as E +import qualified Pipes.Text.Internal as PE + +main :: IO () +main = defaultMain [tests] +-- >>> :main -a 10000 + +tests = testGroup "stream_decode" [ + + testProperty "t_utf8_incr_valid" t_utf8_incr_valid, + testProperty "t_utf8_incr_mixed" t_utf8_incr_mixed] + +t_utf8_incr_valid = do + Positive n <- arbitrary + forAll genUnicode $ recode n `eq` id + where recode n = T.concat . feedChunksOf n PE.streamDecodeUtf8 . E.encodeUtf8 + feedChunksOf :: Int -> (B.ByteString -> PE.Decoding) -> B.ByteString + -> [T.Text] + feedChunksOf n f bs + | B.null bs = [] + | otherwise = let (a,b) = B.splitAt n bs + PE.Some t _ f' = f a + in case f a of + PE.Some t _ f' -> t : feedChunksOf n f' b + _ -> [] + +t_utf8_incr_mixed = do + Positive n <- arbitrary + txt <- genUnicode + forAll (vector 9) $ (roundtrip . chunk (mod n 7 + 1) . appendBytes txt) `eq` appendBytes txt + where + roundtrip :: [B.ByteString] -> B.ByteString + roundtrip bss = go (PE.streamDecodeUtf8With Nothing) B.empty B.empty bss where + go dec acc old [] = acc <> old + go dec acc old (bs:bss) = case dec bs of + PE.Some t new dec' -> if T.null t then go dec' (acc <> E.encodeUtf8 t) (old <> new) bss + else go dec' (acc <> E.encodeUtf8 t) new bss + PE.Other t bs' -> if T.null t then acc <> old <> bs <> B.concat bss + else acc <> E.encodeUtf8 t <> bs' <> B.concat bss + chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b + appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append diff --git a/test/Utils.hs b/test/Utils.hs new file mode 100644 index 0000000..75cd1db --- /dev/null +++ b/test/Utils.hs @@ -0,0 +1,109 @@ +{-#LANGUAGE ScopedTypeVariables#-} +module Utils where +import Control.Exception (SomeException, bracket, bracket_, evaluate, try) +import System.IO.Unsafe (unsafePerformIO) +import Debug.Trace (trace) +import Data.Bits ((.&.)) +import Data.Char (chr) +import Data.String (IsString, fromString) +import System.Random (Random (..), RandomGen) +import Test.QuickCheck hiding ((.&.)) +import Test.QuickCheck.Monadic (assert, monadicIO, run) +import qualified Data.ByteString as B +import Pipes.Text.Internal + + + + + +-- Ensure that two potentially bottom values (in the sense of crashing +-- for some inputs, not looping infinitely) either both crash, or both +-- give comparable results for some input. +(=^=) :: (Eq a, Show a) => a -> a -> Bool +i =^= j = unsafePerformIO $ do + x <- try (evaluate i) + y <- try (evaluate j) + case (x,y) of + (Left (_ :: SomeException), Left (_ :: SomeException)) + -> return True + (Right a, Right b) -> return (a == b) + e -> trace ("*** Divergence: " ++ show e) return False +infix 4 =^= +{-# NOINLINE (=^=) #-} + +-- Do two functions give the same answer? +eq :: (Eq a, Show a) => (t -> a) -> (t -> a) -> t -> Bool +eq a b s = a s =^= b s + +-- What about with the RHS packed? +-- eqP :: (Eq a, Show a, Stringy s) => +-- (String -> a) -> (s -> a) -> String -> Word8 -> Bool +-- eqP f g s w = eql "orig" (f s) (g t) && +-- eql "mini" (f s) (g mini) && +-- eql "head" (f sa) (g ta) && +-- eql "tail" (f sb) (g tb) +-- where t = packS s +-- mini = packSChunkSize 10 s +-- (sa,sb) = splitAt m s +-- (ta,tb) = splitAtS m t +-- l = length s +-- m | l == 0 = n +-- | otherwise = n `mod` l +-- n = fromIntegral w +-- eql d a b +-- | a =^= b = True +-- | otherwise = trace (d ++ ": " ++ show a ++ " /= " ++ show b) False + + +instance Arbitrary B.ByteString where + arbitrary = B.pack `fmap` arbitrary + +genUnicode :: IsString a => Gen a +genUnicode = fmap fromString string where + string = sized $ \n -> + do k <- choose (0,n) + sequence [ char | _ <- [1..k] ] + + excluding :: [a -> Bool] -> Gen a -> Gen a + excluding bad gen = loop + where + loop = do + x <- gen + if or (map ($ x) bad) + then loop + else return x + + reserved = [lowSurrogate, highSurrogate, noncharacter] + lowSurrogate c = c >= 0xDC00 && c <= 0xDFFF + highSurrogate c = c >= 0xD800 && c <= 0xDBFF + noncharacter c = masked == 0xFFFE || masked == 0xFFFF + where + masked = c .&. 0xFFFF + + ascii = choose (0,0x7F) + plane0 = choose (0xF0, 0xFFFF) + plane1 = oneof [ choose (0x10000, 0x10FFF) + , choose (0x11000, 0x11FFF) + , choose (0x12000, 0x12FFF) + , choose (0x13000, 0x13FFF) + , choose (0x1D000, 0x1DFFF) + , choose (0x1F000, 0x1FFFF) + ] + plane2 = oneof [ choose (0x20000, 0x20FFF) + , choose (0x21000, 0x21FFF) + , choose (0x22000, 0x22FFF) + , choose (0x23000, 0x23FFF) + , choose (0x24000, 0x24FFF) + , choose (0x25000, 0x25FFF) + , choose (0x26000, 0x26FFF) + , choose (0x27000, 0x27FFF) + , choose (0x28000, 0x28FFF) + , choose (0x29000, 0x29FFF) + , choose (0x2A000, 0x2AFFF) + , choose (0x2B000, 0x2BFFF) + , choose (0x2F000, 0x2FFFF) + ] + plane14 = choose (0xE0000, 0xE0FFF) + planes = [ascii, plane0, plane1, plane2, plane14] + + char = chr `fmap` excluding reserved (oneof planes) -- cgit v1.2.3