aboutsummaryrefslogtreecommitdiffhomepage
path: root/test/Test.hs
blob: 7832f760e963c35a665ca43f7bedc0ee707f5e23 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import Utils

import Test.QuickCheck
import Test.QuickCheck.Monadic
import Test.Framework (Test, testGroup, defaultMain)
import Test.Framework.Providers.QuickCheck2 (testProperty)

import Control.Exception (catch)
import Data.Char (chr, isDigit, isHexDigit, isLower, isSpace, isUpper, ord)
import Data.Monoid (Monoid(..))
import Control.Monad
import Data.String (fromString)
import Data.Text.Encoding.Error
import qualified Data.List as L

import qualified Data.Bits as Bits (shiftL, shiftR)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Encoding as E
import qualified Pipes.Text.Internal as PE
import qualified Pipes.Text as TP
import qualified Pipes.ByteString as BP 
import qualified Pipes as P 

main :: IO ()
main = defaultMain [tests]
-- >>> :main  -a 10000
tests = testGroup "stream_decode" [
  -- testProperty "t_utf8_incr_valid" t_utf8_incr_valid,
  testProperty "t_utf8_incr_mixed" t_utf8_incr_mixed ,
  testProperty "t_utf8_incr_pipe" t_utf8_incr_pipe,
  testProperty "t_utf8_incr_decoding" t_utf8_incr_decoding,
  testProperty "t_utf8_dec_some" t_utf8_dec_some]

t_utf8_incr_valid  = do
        Positive n <- arbitrary
        forAll genUnicode $ recode n `eq` id
    where recode n = T.concat . feedChunksOf n PE.streamDecodeUtf8 . E.encodeUtf8
          feedChunksOf :: Int -> (B.ByteString -> PE.Decoding) -> B.ByteString
                       -> [T.Text]
          feedChunksOf n f bs
            | B.null bs  = []
            | otherwise  = let (a,b) = B.splitAt n bs
                               PE.Some t _ f' = f a
                           in case f a of 
                              PE.Some t _ f' -> t : feedChunksOf n f' b
                              _             -> []

t_utf8_incr_mixed  = do
       Positive n <- arbitrary
       txt <- genUnicode
       let chunkSize = mod n 7 + 1
       forAll (vector 9) $ 
              (roundtrip . chunk chunkSize . appendBytes txt) `eq` (appendBytes txt)
    where 
    roundtrip :: [B.ByteString] -> B.ByteString
    roundtrip bss = go PE.streamDecodeUtf8 B.empty bss where    
       go dec acc [] = acc   
       go dec acc [bs]  = case dec bs of 
          PE.Some t l dec' -> acc <> E.encodeUtf8 t <> l
          PE.Other t bs'   -> acc <> E.encodeUtf8 t <> bs' 
       go dec acc (bs:bss) = case dec bs of 
         PE.Some t l dec' -> go dec' (acc <> E.encodeUtf8 t) bss
         PE.Other t bs'   -> acc <> E.encodeUtf8 t <> bs' <> B.concat bss
    chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b
    appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append

t_utf8_incr_pipe  = do    
       Positive  m <- arbitrary
       Positive n  <- arbitrary  
       txt         <- genUnicode
       let chunkSize = mod n 7 + 1
           bytesLength = mod 10 m
       forAll (vector bytesLength) $ 
              (BL.toStrict . BP.toLazy . roundtrip . P.each . chunk chunkSize . appendBytes txt) 
              `eq` 
              appendBytes txt
    where 
    roundtrip :: Monad m => P.Producer B.ByteString m r -> P.Producer B.ByteString m r
    roundtrip p = join (TP.decodeUtf8 p P.>-> TP.encodeUtf8) 
    chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b
    appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append

--
t_utf8_incr_decoding  = do    
       Positive  m <- arbitrary
       Positive n  <- arbitrary  
       txt         <- genUnicode
       let chunkSize = mod n 7 + 1
           bytesLength = mod 10 m
       forAll (vector bytesLength) $ 
              (BL.toStrict . BP.toLazy . roundtrip . P.each . chunk chunkSize . appendBytes txt) 
              `eq` 
              appendBytes txt
    where 
    roundtrip :: Monad m => P.Producer B.ByteString m r -> P.Producer B.ByteString m r
    roundtrip p = join (TP.decode utf8_start p P.>-> TP.encodeUtf8) 
    chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b
    appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append
    utf8_start = PE.Some T.empty B.empty (PE.codecDecode PE.utf8)
t_utf8_dec_some = do    
       Positive  m <- arbitrary
       txt         <- genUnicode
       let bytesLength = mod 10 m :: Int
       forAll (vector bytesLength) $ 
              (roundtrip . appendBytes txt) 
              `eq` 
              appendBytes txt
    where 
    roundtrip bs = case PE.decodeSomeUtf8 bs of
                        (txt,bys) -> E.encodeUtf8 txt <> bys
    appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append