Use clunky Data.Text.IO when bytestring is not explicit

author michaelt <what_is_it_to_do_anything@yahoo.com>

Wed, 15 Jan 2014 03:05:12 +0000 (22:05 -0500)

committer michaelt <what_is_it_to_do_anything@yahoo.com>

Wed, 15 Jan 2014 03:05:12 +0000 (22:05 -0500)
author michaelt <what_is_it_to_do_anything@yahoo.com>
Wed, 15 Jan 2014 03:05:12 +0000 (22:05 -0500)
committer michaelt <what_is_it_to_do_anything@yahoo.com>
Wed, 15 Jan 2014 03:05:12 +0000 (22:05 -0500)
diff --git a/Pipes/Text.hs b/Pipes/Text.hs

index 74d2023a7265552404cf4ee656e2559084362f66..cf493e9ab27b90d3c52d46f8296024d7ed8225c7 100644 (file)
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE RankNTypes, TypeFamilies #-}
+{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-}
  
  {-| This module provides @pipes@ utilities for \"text streams\", which are
      streams of 'Text' chunks.  The individual chunks are uniformly @strict@, but 
  
  {-| This module provides @pipes@ utilities for \"text streams\", which are
      streams of 'Text' chunks.  The individual chunks are uniformly @strict@, but 
@@ -206,30 +206,36 @@ import Prelude hiding (
  -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
  fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
  fromLazy  = foldrChunks (\e a -> yield e >> a) (return ()) 
  -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
  fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
  fromLazy  = foldrChunks (\e a -> yield e >> a) (return ()) 
-{-# INLINABLE fromLazy #-}
+{-# INLINE fromLazy #-}
  
  -- | Stream text from 'stdin'
  
  -- | Stream text from 'stdin'
-stdin :: MonadIO m => Producer Text m (Producer ByteString m ())
+stdin :: MonadIO m => Producer Text m ()
  stdin = fromHandle IO.stdin
  stdin = fromHandle IO.stdin
-{-# INLINABLE stdin #-}
+{-# INLINE stdin #-}
  
  {-| Convert a 'IO.Handle' into a text stream using a text size 
  
  {-| Convert a 'IO.Handle' into a text stream using a text size 
-    determined by the good sense of the text library. 
+    determined by the good sense of the text library; note that this
+    is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
+    but uses the system encoding and has other `Data.Text.IO` features
  -}
  
  -}
  
-fromHandle :: MonadIO m => IO.Handle -> Producer Text m (Producer ByteString m ())
-fromHandle h = decodeUtf8 (PB.fromHandle h)
-{-# INLINE fromHandle#-}
+fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
+fromHandle h =  go where
+      go = do txt <- liftIO (T.hGetChunk h)
+              unless (T.null txt) $ do yield txt
+                                       go
+{-# INLINABLE fromHandle#-}
  
  
-{-| Stream text from a file using Pipes.Safe
+
+{-| Stream text from a file in the simple fashion of @Data.Text.IO@ 
  
  >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
  MAIN = PUTSTRLN "HELLO WORLD"
  -}
  
  
  >>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
  MAIN = PUTSTRLN "HELLO WORLD"
  -}
  
-readFile :: (MonadSafe m) => FilePath -> Producer Text m (Producer ByteString m ())
+readFile :: MonadSafe m => FilePath -> Producer Text m ()
  readFile file = Safe.withFile file IO.ReadMode fromHandle
  readFile file = Safe.withFile file IO.ReadMode fromHandle
-{-# INLINABLE readFile #-}
+{-# INLINE readFile #-}
  
  {-| Stream lines of text from stdin (for testing in ghci etc.) 
  
  
  {-| Stream lines of text from stdin (for testing in ghci etc.) 
  
@@ -249,7 +255,7 @@ stdinLn = go where
              txt <- liftIO (T.hGetLine IO.stdin)
              yield txt
              go
              txt <- liftIO (T.hGetLine IO.stdin)
              yield txt
              go
-
+{-# INLINABLE stdinLn #-}
  
  {-| Stream text to 'stdout'
  
  
  {-| Stream text to 'stdout'
  
@@ -305,6 +311,7 @@ toHandle h = for cat (liftIO . T.hPutStr h)
  -- | Stream text into a file. Uses @pipes-safe@.
  writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
  writeFile file = Safe.withFile file IO.WriteMode toHandle
  -- | Stream text into a file. Uses @pipes-safe@.
  writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
  writeFile file = Safe.withFile file IO.WriteMode toHandle
+{-# INLINE writeFile #-}
  
  -- | Apply a transformation to each 'Char' in the stream
  map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
  
  -- | Apply a transformation to each 'Char' in the stream
  map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
@@ -592,10 +599,10 @@ count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
  
  decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
  decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
  
  decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
  decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
-  go carry dec0 p = do 
+  go !carry dec0 p = do 
       x <- lift (next p) 
       case x of Left r -> if B.null carry 
       x <- lift (next p) 
       case x of Left r -> if B.null carry 
-                           then return (return r)      -- all input was consumed
+                           then return (return r)      -- all bytestrinput was consumed
                             else return (do yield carry -- a potentially valid fragment remains
                                             return r)
                                             
                             else return (do yield carry -- a potentially valid fragment remains
                                             return r)
                                             
@@ -605,6 +612,8 @@ decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
                     PE.Other text bs -> do yield text 
                                            return (do yield bs -- an invalid blob remains
                                                       p')
                     PE.Other text bs -> do yield text 
                                            return (do yield bs -- an invalid blob remains
                                                       p')
+{-# INLINABLE decodeUtf8 #-}
+
  
  -- | Splits a 'Producer' after the given number of characters
  splitAt
  
  -- | Splits a 'Producer' after the given number of characters
  splitAt
diff --git a/Pipes/Text/Internal.hs b/Pipes/Text/Internal.hs

index 73d6fa487e4b4fb4491c7f680fd4dbc0f91b037c..7e5b04491dba0c7fa2638a400f5b34eb076e1057 100644 (file)
--- a/Pipes/Text/Internal.hs
+++ b/Pipes/Text/Internal.hs
@@ -6,36 +6,25 @@
  module Pipes.Text.Internal 
      ( Decoding(..)
      , streamDecodeUtf8
  module Pipes.Text.Internal 
      ( Decoding(..)
      , streamDecodeUtf8
+    , decodeSomeUtf8
      ) where
      ) where
-
-import Control.Exception (evaluate, try)
-#if __GLASGOW_HASKELL__ >= 702
  import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
  import Control.Monad.ST (ST, runST)
  import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
  import Control.Monad.ST (ST, runST)
-#else
-import Control.Monad.ST (unsafeIOToST, unsafeSTToIO, ST, runST)
-#endif
  import Data.Bits ((.&.))
  import Data.Bits ((.&.))
-import Data.ByteString as B
-import Data.ByteString.Internal as B
-import Data.Text ()
-import qualified Data.Text as T
-import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
-import Data.Text.Internal (Text(..), safe, textP)
-import Data.Word (Word8, Word32)
+import Data.ByteString as B 
+import Data.ByteString.Internal as B 
+import qualified Data.Text as T (null)
+import Data.Text.Encoding.Error ()
+import Data.Text.Internal (Text, textP)
  import Foreign.C.Types (CSize)
  import Foreign.ForeignPtr (withForeignPtr)
  import Foreign.Marshal.Utils (with)
  import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
  import Foreign.Storable (Storable, peek, poke)
  import Foreign.C.Types (CSize)
  import Foreign.ForeignPtr (withForeignPtr)
  import Foreign.Marshal.Utils (with)
  import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
  import Foreign.Storable (Storable, peek, poke)
-import GHC.Base hiding (ord)
-import GHC.Word
+import GHC.Base  (Char(..), Int(..), MutableByteArray#, ord#, iShiftRA#)
+import GHC.Word (Word8, Word32)
  import qualified Data.Text.Array as A
  import qualified Data.Text.Array as A
-import GHC.Exts (Char(..), Int(..), chr#, ord#, word2Int#)
-import GHC.Word (Word8(..), Word16(..), Word32(..))
-
-import Data.Text.Unsafe (unsafeDupablePerformIO)
-
+    
  #include "pipes_text_cbits.h"
  
  -- | A stream oriented decoding result.
  #include "pipes_text_cbits.h"
  
  -- | A stream oriented decoding result.
@@ -52,44 +41,102 @@ instance Show Decoding where
                                  showChar ' ' . showsPrec prec' bs .
                                  showString " _"
        where prec = 10; prec' = prec + 1
                                  showChar ' ' . showsPrec prec' bs .
                                  showString " _"
        where prec = 10; prec' = prec + 1
-      
+
  newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
  newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
  
  streamDecodeUtf8 :: ByteString -> Decoding
  newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
  newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
  
  streamDecodeUtf8 :: ByteString -> Decoding
-streamDecodeUtf8 = decodeChunk B.empty 0 0
+streamDecodeUtf8 = decodeChunkUtf8 B.empty 0 0 
+  where
+  decodeChunkUtf8 :: ByteString -> CodePoint -> DecoderState -> ByteString -> Decoding
+  decodeChunkUtf8 old codepoint0 state0 bs@(PS fp off len) = 
+                    runST $ do marray <- A.new (len+1) 
+                               unsafeIOToST (decodeChunkToBuffer marray)
+     where
+     decodeChunkToBuffer :: A.MArray s -> IO Decoding
+     decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
+       with (0::CSize) $ \destOffPtr ->
+       with codepoint0 $ \codepointPtr ->
+       with state0     $ \statePtr ->
+       with nullPtr    $ \curPtrPtr ->
+         do let end = ptr `plusPtr` (off + len)
+                curPtr = ptr `plusPtr` off
+            poke curPtrPtr curPtr
+            c_decode_utf8_with_state (A.maBA dest) destOffPtr curPtrPtr end codepointPtr statePtr
+            state <- peek statePtr
+            lastPtr <- peek curPtrPtr
+            codepoint <- peek codepointPtr
+            n <- peek destOffPtr
+            chunkText <- mkText dest n
+            let left      = lastPtr `minusPtr` curPtr
+                remaining = B.drop left bs
+                accum = if T.null chunkText then B.append old remaining  else remaining 
+            return $! case state of 
+              UTF8_REJECT -> Other chunkText accum -- We encountered an encoding error
+              _ ->           Some  chunkText accum (decodeChunkUtf8 accum codepoint state)
+     {-# INLINE decodeChunkToBuffer #-}
+  {-# INLINE decodeChunkUtf8 #-}
+{-# INLINE streamDecodeUtf8 #-}
  
  
-decodeChunk :: ByteString -> CodePoint -> DecoderState -> ByteString -> Decoding
-decodeChunk old codepoint0 state0 bs@(PS fp off len) = 
-                  runST $ do marray <- A.new (len+1) 
-                             unsafeIOToST (decodeChunkToBuffer marray)
-   where
-   decodeChunkToBuffer :: A.MArray s -> IO Decoding
-   decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
-     with (0::CSize) $ \destOffPtr ->
-     with codepoint0 $ \codepointPtr ->
-     with state0     $ \statePtr ->
-     with nullPtr    $ \curPtrPtr ->
+decodeSomeUtf8 :: ByteString -> (Text, ByteString)
+decodeSomeUtf8 bs@(PS fp off len) = runST $ do 
+  dest <- A.new (len+1) 
+  unsafeIOToST $ 
+     withForeignPtr fp $ \ptr ->
+     with (0::CSize)        $ \destOffPtr ->
+     with (0::CodePoint)    $ \codepointPtr ->
+     with (0::DecoderState) $ \statePtr ->
+     with nullPtr           $ \curPtrPtr ->
         do let end = ptr `plusPtr` (off + len)
                curPtr = ptr `plusPtr` off
            poke curPtrPtr curPtr
         do let end = ptr `plusPtr` (off + len)
                curPtr = ptr `plusPtr` off
            poke curPtrPtr curPtr
-          c_decode_utf8_with_state (A.maBA dest) destOffPtr curPtrPtr end codepointPtr statePtr
+          c_decode_utf8_with_state (A.maBA dest) destOffPtr 
+                                   curPtrPtr end codepointPtr statePtr
            state <- peek statePtr
            lastPtr <- peek curPtrPtr
            codepoint <- peek codepointPtr
            n <- peek destOffPtr
            state <- peek statePtr
            lastPtr <- peek curPtrPtr
            codepoint <- peek codepointPtr
            n <- peek destOffPtr
-          chunkText <- mkText dest n
+          chunkText <- unsafeSTToIO $ do arr <- A.unsafeFreeze dest
+                                         return $! textP arr 0 (fromIntegral n)
            let left      = lastPtr `minusPtr` curPtr
                remaining = B.drop left bs
            let left      = lastPtr `minusPtr` curPtr
                remaining = B.drop left bs
-              accum = if T.null chunkText then B.append old remaining  else remaining 
-          return $ case state of 
-            UTF8_REJECT -> Other chunkText accum -- We encountered an encoding error
-            _ ->           Some  chunkText accum (decodeChunk accum codepoint state)
+          return $! (chunkText, remaining)
+{-# INLINE decodeSomeUtf8 #-}
+
+-- decodeSomeUtf8 :: ByteString -> (Text, ByteString)
+-- decodeSomeUtf8 bs@(PS fp off len) = 
+--                   runST $ do marray <- A.new (len+1) 
+--                              unsafeIOToST (decodeChunkToBuffer marray)
+--   
+--      where
+--      decodeChunkToBuffer :: A.MArray s -> IO (Text, ByteString)
+--      decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
+--        with (0::CSize)        $ \destOffPtr ->
+--        with (0::CodePoint)    $ \codepointPtr ->
+--        with (0::DecoderState) $ \statePtr ->
+--        with nullPtr           $ \curPtrPtr ->
+--          do let end = ptr `plusPtr` (off + len)
+--                 curPtr = ptr `plusPtr` off
+--             poke curPtrPtr curPtr
+--             c_decode_utf8_with_state (A.maBA dest) destOffPtr curPtrPtr end codepointPtr statePtr
+--             state <- peek statePtr
+--             lastPtr <- peek curPtrPtr
+--             codepoint <- peek codepointPtr
+--             n <- peek destOffPtr
+--             chunkText <- unsafeSTToIO $ do arr <- A.unsafeFreeze dest
+--                                            return $! textP arr 0 (fromIntegral n)
+--             let left      = lastPtr `minusPtr` curPtr
+--                 remaining = B.drop left bs
+--             return $! (chunkText, remaining)
+--      {-# INLINE decodeChunkToBuffer #-}
+-- {-# INLINE decodeSomeUtf8 #-}
+
  
  
  mkText :: A.MArray s -> CSize -> IO Text
  mkText dest n =  unsafeSTToIO $ do arr <- A.unsafeFreeze dest
                                     return $! textP arr 0 (fromIntegral n)
  
  
  mkText :: A.MArray s -> CSize -> IO Text
  mkText dest n =  unsafeSTToIO $ do arr <- A.unsafeFreeze dest
                                     return $! textP arr 0 (fromIntegral n)
+{-# INLINE mkText #-}
  
  ord :: Char -> Int
  ord (C# c#) = I# (ord# c#)
  
  ord :: Char -> Int
  ord (C# c#) = I# (ord# c#)
@@ -107,6 +154,7 @@ unsafeWrite marr i c
            lo = fromIntegral $ (m `shiftR` 10) + 0xD800
            hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
            shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
            lo = fromIntegral $ (m `shiftR` 10) + 0xD800
            hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
            shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
+          {-# INLINE shiftR #-}
  {-# INLINE unsafeWrite #-}
  
  foreign import ccall unsafe "_hs_pipes_text_decode_utf8_state" c_decode_utf8_with_state
  {-# INLINE unsafeWrite #-}
  
  foreign import ccall unsafe "_hs_pipes_text_decode_utf8_state" c_decode_utf8_with_state
diff --git a/bench/IO.hs b/bench/IO.hs

new file mode 100644 (file)

index 0000000..b3a52f6
--- /dev/null
+++ b/bench/IO.hs
@@ -0,0 +1,20 @@
+import qualified Data.Text.IO as T
+import qualified Data.Text as T
+import qualified Data.Text.Lazy.IO as TL
+import qualified Data.Text.Lazy as TL
+
+import Pipes
+import qualified Pipes.Text as TP
+import qualified Pipes.ByteString as BP
+import Pipes.Safe
+
+main = textaction
+big = "../../examples/txt/words2.txt"
+
+textaction = T.readFile big >>= T.putStrLn
+pipeaction =  runEffect $ for ((TP.readFile big) >> return ()) (lift . T.putStrLn)
+
+
+
+
+
diff --git a/cbits/cbits.c b/cbits/cbits.c

index e0fdfd5a11a622ef68fd7c7106bbcf0a336ea13d..c11645b3bb067ca6158eeb98f2e101f34a3bf589 100644 (file)
--- a/cbits/cbits.c
+++ b/cbits/cbits.c
@@ -79,30 +79,38 @@ decode(uint32_t *state, uint32_t* codep, uint32_t byte) {
   *      state0 != UTF8_ACCEPT, UTF8_REJECT
   *
   */
   *      state0 != UTF8_ACCEPT, UTF8_REJECT
   *
   */
-const uint8_t *
-_hs_pipes_text_decode_utf8_state(uint16_t *const dest, size_t *destoff,
-                           const uint8_t **const src,
-                           const uint8_t *const srcend,
-                           uint32_t *codepoint0, uint32_t *state0)
+
+ #if defined(__GNUC__) || defined(__clang__)
+ static inline uint8_t const *
+ _hs_pipes_text_decode_utf8_int(uint16_t *const dest, size_t *destoff,
+                        const uint8_t const **src, const uint8_t const *srcend,
+                        uint32_t *codepoint0, uint32_t *state0)
+   __attribute((always_inline));
+ #endif
+
+static inline uint8_t const *
+_hs_pipes_text_decode_utf8_int(uint16_t *const dest, size_t *destoff,
+                        const uint8_t const **src, const uint8_t const *srcend,
+                        uint32_t *codepoint0, uint32_t *state0)
  {
  {
-  uint16_t *d = dest + *destoff;
-  const uint8_t *s = *src, *last = *src;
-  uint32_t state = *state0;
-  uint32_t codepoint = *codepoint0;
+ uint16_t *d = dest + *destoff;
+ const uint8_t *s = *src, *last = *src;
+ uint32_t state = *state0;
+ uint32_t codepoint = *codepoint0;
  
  
-  while (s < srcend) {
+ while (s < srcend) {
  #if defined(__i386__) || defined(__x86_64__)
  #if defined(__i386__) || defined(__x86_64__)
-    /*
-     * This code will only work on a little-endian system that
-     * supports unaligned loads.
-     *
-     * It gives a substantial speed win on data that is purely or
-     * partly ASCII (e.g. HTML), at only a slight cost on purely
-     * non-ASCII text.
-     */
-
-    if (state == UTF8_ACCEPT) {
-      while (s < srcend - 4) {
+   /*
+    * This code will only work on a little-endian system that
+    * supports unaligned loads.
+    *
+    * It gives a substantial speed win on data that is purely or
+    * partly ASCII (e.g. HTML), at only a slight cost on purely
+    * non-ASCII text.
+    */
+
+   if (state == UTF8_ACCEPT) {
+     while (s < srcend - 4) {
         codepoint = *((uint32_t *) s);
         if ((codepoint & 0x80808080) != 0)
           break;
         codepoint = *((uint32_t *) s);
         if ((codepoint & 0x80808080) != 0)
           break;
@@ -117,35 +125,44 @@ _hs_pipes_text_decode_utf8_state(uint16_t *const dest, size_t *destoff,
         *d++ = (uint16_t) ((codepoint >> 8) & 0xff);
         *d++ = (uint16_t) ((codepoint >> 16) & 0xff);
         *d++ = (uint16_t) ((codepoint >> 24) & 0xff);
         *d++ = (uint16_t) ((codepoint >> 8) & 0xff);
         *d++ = (uint16_t) ((codepoint >> 16) & 0xff);
         *d++ = (uint16_t) ((codepoint >> 24) & 0xff);
-      }
-      last = s;
-    }
+     }
+     last = s;
+   }
  #endif
  
  #endif
  
-    if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) {
-      if (state != UTF8_REJECT)
+   if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) {
+     if (state != UTF8_REJECT)
         continue;
         continue;
-      break;
-    }
-
-    if (codepoint <= 0xffff)
-      *d++ = (uint16_t) codepoint;
-    else {
-      *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10));
-      *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF));
-    }
-    last = s;
-  }
-
-  /* Invalid encoding, back up to the errant character */
-  if (state == UTF8_REJECT)
-    s -= 1;
-
-  *destoff = d - dest;
-  *codepoint0 = codepoint;
-  *state0 = state;
-  *src = last;
-
-  return s;
+     break;
+   }
+
+   if (codepoint <= 0xffff)
+     *d++ = (uint16_t) codepoint;
+   else {
+     *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10));
+     *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF));
+   }
+   last = s;
+ }
+
+ *destoff = d - dest;
+ *codepoint0 = codepoint;
+ *state0 = state;
+ *src = last;
+
+ return s;
+}
+
+uint8_t const *
+_hs_pipes_text_decode_utf8_state(uint16_t *const dest, size_t *destoff,
+                          const uint8_t const **src,
+                          const uint8_t const *srcend,
+                          uint32_t *codepoint0, uint32_t *state0)
+{
+ uint8_t const *ret = _hs_pipes_text_decode_utf8_int(dest, destoff, src, srcend,
+                                               codepoint0, state0);
+ if (*state0 == UTF8_REJECT)
+   ret -=1;
+ return ret;
  }
  
  }
  
diff --git a/pipes-text.cabal b/pipes-text.cabal

index 86fbab821593bcfcbd1a259ba815e280317ec753..b4388bef3b4c639841a2f9c81be5e1a2d48ef187 100644 (file)
--- a/pipes-text.cabal
+++ b/pipes-text.cabal
@@ -25,6 +25,9 @@ library
                         pipes-bytestring >= 1.0 && < 1.2,
                         transformers >= 0.3     && < 0.4,
                         text >=0.11             && < 0.12,
                         pipes-bytestring >= 1.0 && < 1.2,
                         transformers >= 0.3     && < 0.4,
                         text >=0.11             && < 0.12,
-                       bytestring >=0.10       && < 0.11
+                       bytestring >=0.10       && < 0.11,
+                       vector,
+                       void
    -- hs-source-dirs:      
    -- hs-source-dirs:      
-  default-language:    Haskell2010
-\ No newline at end of file
+  default-language:    Haskell2010
+  ghc-options: -O2 
diff --git a/test/Test.hs b/test/Test.hs

index f2bf17b43c09a8651bbbfd91b3ee910845ec7c37..373bafb861f99e7bed7390e4192df9eef369bb7c 100644 (file)
--- a/test/Test.hs
+++ b/test/Test.hs
@@ -27,11 +27,11 @@ import qualified Pipes as P
  main :: IO ()
  main = defaultMain [tests]
  -- >>> :main  -a 10000
  main :: IO ()
  main = defaultMain [tests]
  -- >>> :main  -a 10000
-
  tests = testGroup "stream_decode" [
    -- testProperty "t_utf8_incr_valid" t_utf8_incr_valid,
    testProperty "t_utf8_incr_mixed" t_utf8_incr_mixed ,
  tests = testGroup "stream_decode" [
    -- testProperty "t_utf8_incr_valid" t_utf8_incr_valid,
    testProperty "t_utf8_incr_mixed" t_utf8_incr_mixed ,
-  testProperty "t_utf8_incr_pipe" t_utf8_incr_pipe]
+  testProperty "t_utf8_incr_pipe" t_utf8_incr_pipe,
+  testProperty "t_utf8_dec_some" t_utf8_dec_some]
  
  t_utf8_incr_valid  = do
          Positive n <- arbitrary
  
  t_utf8_incr_valid  = do
          Positive n <- arbitrary
@@ -82,6 +82,19 @@ t_utf8_incr_pipe  = do
      chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b
      appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append
  
      chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b
      appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append
  
+--
+t_utf8_dec_some = do    
+       Positive  m <- arbitrary
+       txt         <- genUnicode
+       let bytesLength = mod 10 m :: Int
+       forAll (vector bytesLength) $ 
+              (roundtrip . appendBytes txt) 
+              `eq` 
+              appendBytes txt
+    where 
+    roundtrip bs = case PE.decodeSomeUtf8 bs of
+                        (txt,bys) -> E.encodeUtf8 txt <> bys
+    appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts ; (<>) = B.append
author	michaelt <what_is_it_to_do_anything@yahoo.com>
	Wed, 15 Jan 2014 03:05:12 +0000 (22:05 -0500)
committer	michaelt <what_is_it_to_do_anything@yahoo.com>
	Wed, 15 Jan 2014 03:05:12 +0000 (22:05 -0500)
Pipes/Text.hs		patch \| blob \| blame \| history
Pipes/Text/Internal.hs		patch \| blob \| blame \| history
bench/IO.hs	[new file with mode: 0644]	patch \| blob
cbits/cbits.c		patch \| blob \| blame \| history
pipes-text.cabal		patch \| blob \| blame \| history
test/Test.hs		patch \| blob \| blame \| history