use new text-stream-decoding

author michaelt <what_is_it_to_do_anything@yahoo.com>

Sat, 15 Feb 2014 15:01:48 +0000 (10:01 -0500)

committer michaelt <what_is_it_to_do_anything@yahoo.com>

Sat, 15 Feb 2014 15:01:48 +0000 (10:01 -0500)
author michaelt <what_is_it_to_do_anything@yahoo.com>
Sat, 15 Feb 2014 15:01:48 +0000 (10:01 -0500)
committer michaelt <what_is_it_to_do_anything@yahoo.com>
Sat, 15 Feb 2014 15:01:48 +0000 (10:01 -0500)
diff --git a/Pipes/Text.hs b/Pipes/Text.hs

index 4b2d2b04261f9dfe8d565b5b04acfc7be95c67c6..8221c01ff160336f29009ee67ccdb904d9bc8bd3 100644 (file)
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -10,7 +10,8 @@
      example, the following program copies a document from one file to another:
  
  > import Pipes
-> import qualified Data.Text.Pipes as Text
+> import qualified Pipes.Text as Text
+> import qualified Pipes.Text.IO as Text
  > import System.IO
  >
  > main =
@@ -21,7 +22,8 @@
  To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
  
  > import Pipes
-> import qualified Data.Text.Pipes as Text
+> import qualified Pipes.Text as Text
+> import qualified Pipes.Text.IO as Text
  > import Pipes.Safe
  >
  > main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
@@ -61,14 +63,14 @@ To stream from files, the following is perhaps more Prelude-like (note that it u
  module Pipes.Text  (
      -- * Producers
        fromLazy
-    , stdin
-    , fromHandle
-    , readFile
+    -- , stdin
+    -- , fromHandle
+    -- , readFile
  
      -- * Consumers
-    , stdout
-    , toHandle
-    , writeFile
+    -- , stdout
+    -- , toHandle
+    -- , writeFile
  
      -- * Pipes
      , map
@@ -79,7 +81,7 @@ module Pipes.Text  (
      , dropWhile
      , filter
      , scan
-    , encodeUtf8
+--    , encodeUtf8
      , pack
      , unpack
      , toCaseFold
@@ -120,22 +122,22 @@ module Pipes.Text  (
      , word
      , line
      
-    -- * Decoding Lenses 
-    , decodeUtf8
-    , codec
-    
-    -- * Codecs
-    , utf8
-    , utf16_le
-    , utf16_be
-    , utf32_le
-    , utf32_be
-    
-    -- * Other Decoding/Encoding Functions
-    , decodeIso8859_1
-    , decodeAscii
-    , encodeIso8859_1
-    , encodeAscii
+    -- -- * Decoding Lenses 
+    -- , decodeUtf8
+    -- , codec
+    -- 
+    -- -- * Codecs
+    -- , utf8
+    -- , utf16_le
+    -- , utf16_be
+    -- , utf32_le
+    -- , utf32_be
+    -- 
+    -- -- * Other Decoding/Encoding Functions
+    -- , decodeIso8859_1
+    -- , decodeAscii
+    -- , encodeIso8859_1
+    -- , encodeAscii
  
      -- * FreeT Splitters
      , chunksOf
@@ -157,11 +159,9 @@ module Pipes.Text  (
  
     -- * Re-exports
      -- $reexports
-    , Decoding(..)
-    , streamDecodeUtf8
-    , decodeSomeUtf8
-    , Codec(..)
-    , TextException(..)
+    -- , DecodeResult(..)
+    -- , Codec
+    -- , TextException(..)
      , module Data.ByteString
      , module Data.Text
      , module Data.Profunctor
@@ -170,7 +170,6 @@ module Pipes.Text  (
      , module Pipes.Group
      ) where
  
-import Control.Exception (throwIO, try)
  import Control.Applicative ((<*)) 
  import Control.Monad (liftM, unless, join)
  import Control.Monad.Trans.State.Strict (StateT(..), modify)
@@ -193,24 +192,20 @@ import Data.Functor.Identity (Identity)
  import Data.Profunctor (Profunctor)
  import qualified Data.Profunctor
  import qualified Data.List as List
-import Foreign.C.Error (Errno(Errno), ePIPE)
-import qualified GHC.IO.Exception as G
  import Pipes
  import qualified Pipes.ByteString as PB
-import qualified Pipes.Text.Internal as PI
-import Pipes.Text.Internal 
+-- import Pipes.Text.Decoding
  import Pipes.Core (respond, Server')
  import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
  import qualified Pipes.Group as PG
  import qualified Pipes.Parse as PP
  import Pipes.Parse (Parser)
-import qualified Pipes.Safe.Prelude as Safe
-import qualified Pipes.Safe as Safe
-import Pipes.Safe (MonadSafe(..), Base(..))
+
  import qualified Pipes.Prelude as P
  import qualified System.IO as IO
  import Data.Char (isSpace)
  import Data.Word (Word8)
+import Data.Text.StreamDecoding
  
  import Prelude hiding (
      all,
@@ -246,78 +241,6 @@ fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
  fromLazy  = foldrChunks (\e a -> yield e >> a) (return ()) 
  {-# INLINE fromLazy #-}
  
--- | Stream text from 'stdin'
-stdin :: MonadIO m => Producer Text m ()
-stdin = fromHandle IO.stdin
-{-# INLINE stdin #-}
-
-{-| Convert a 'IO.Handle' into a text stream using a text size 
-    determined by the good sense of the text library; note that this
-    is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
-    but uses the system encoding and has other `Data.Text.IO` features
--}
-
-fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
-fromHandle h =  go where
-      go = do txt <- liftIO (T.hGetChunk h)
-              unless (T.null txt) ( do yield txt
-                                       go )
-{-# INLINABLE fromHandle#-}
-
-
-{-| Stream text from a file in the simple fashion of @Data.Text.IO@ 
-
->>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
-MAIN = PUTSTRLN "HELLO WORLD"
--}
-
-readFile :: MonadSafe m => FilePath -> Producer Text m ()
-readFile file = Safe.withFile file IO.ReadMode fromHandle
-{-# INLINE readFile #-}
-
-
-{-| Stream text to 'stdout'
-
-    Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
-
-    Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@ 
-    instead of @(source >-> stdout)@ .
--}
-stdout :: MonadIO m => Consumer' Text m ()
-stdout = go
-  where
-    go = do
-        txt <- await
-        x  <- liftIO $ try (T.putStr txt)
-        case x of
-            Left (G.IOError { G.ioe_type  = G.ResourceVanished
-                            , G.ioe_errno = Just ioe })
-                 | Errno ioe == ePIPE
-                     -> return ()
-            Left  e  -> liftIO (throwIO e)
-            Right () -> go
-{-# INLINABLE stdout #-}
-
-
-{-| Convert a text stream into a 'Handle'
-
-    Note: again, for best performance, where possible use 
-    @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
--}
-toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
-toHandle h = for cat (liftIO . T.hPutStr h)
-{-# INLINABLE toHandle #-}
-
-{-# RULES "p >-> toHandle h" forall p h .
-        p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
-  #-}
-
-
--- | Stream text into a file. Uses @pipes-safe@.
-writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
-writeFile file = Safe.withFile file IO.WriteMode toHandle
-{-# INLINE writeFile #-}
-
  
  type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
  
@@ -690,28 +613,6 @@ isEndOfChars = do
  {-# INLINABLE isEndOfChars #-}
  
  
-{- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
-   stream of Text ends by returning a stream of ByteStrings beginning at the point of failure. 
-   -}
-
-decodeUtf8 :: Monad m => Lens' (Producer ByteString m r) 
-                               (Producer Text m (Producer ByteString m r))
-decodeUtf8 k p0 = fmap (\p -> join  (for p (yield . TE.encodeUtf8))) 
-                       (k (go B.empty PI.streamDecodeUtf8 p0)) where
-  go !carry dec0 p = do 
-     x <- lift (next p) 
-     case x of Left r -> return (if B.null carry 
-                                    then return r -- all bytestring input was consumed
-                                    else (do yield carry -- a potentially valid fragment remains
-                                             return r))
-                                           
-               Right (chunk, p') -> case dec0 chunk of 
-                   PI.Some text carry2 dec -> do yield text
-                                                 go carry2 dec p'
-                   PI.Other text bs -> do yield text 
-                                          return (do yield bs -- an invalid blob remains
-                                                     p')
-{-# INLINABLE decodeUtf8 #-}
  
  
  -- | Splits a 'Producer' after the given number of characters
@@ -1057,106 +958,4 @@ unwords = intercalate (yield $ T.singleton ' ')
      @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. 
  -}
  
-{- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
-     'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the 
-     individual 'Codec' definitions follow the enumerator and conduit libraries. 
-     
-     Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co 
-     to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
-     'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
-     better implementation.  
-
-     -}
-codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
-codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc))) 
-                                     (k (decoder (dec B.empty) p0) ) where 
-  decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
-  decoder !d p0 = case d of 
-      PI.Other txt bad      -> do yield txt
-                                  return (do yield bad
-                                             p0)
-      PI.Some txt extra dec -> do yield txt
-                                  x <- lift (next p0)
-                                  case x of Left r -> return (do yield extra
-                                                                 return r)
-                                            Right (chunk,p1) -> decoder (dec chunk) p1
-
-{- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
-     use the pipes 'Lens' style to work with them. Rather we simply define functions 
-     each way. 
-
-     'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
-     returning the rest of the 'Text' at the first non-ascii 'Char'
--}
-encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
-encodeAscii = go where
-  go p = do echunk <- lift (next p)
-            case echunk of 
-              Left r -> return (return r)
-              Right (chunk, p') -> 
-                 if T.null chunk 
-                   then go p'
-                   else let (safe, unsafe)  = T.span (\c -> ord c <= 0x7F) chunk
-                        in do yield (B8.pack (T.unpack safe))
-                              if T.null unsafe
-                                then go p'
-                                else return $ do yield unsafe 
-                                                 p'
-{- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
-     returning the rest of the 'Text' upon hitting any non-latin 'Char'
-   -}
-encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
-encodeIso8859_1 = go where
-  go p = do etxt <- lift (next p)
-            case etxt of 
-              Left r -> return (return r)
-              Right (txt, p') -> 
-                 if T.null txt 
-                   then go p'
-                   else let (safe, unsafe)  = T.span (\c -> ord c <= 0xFF) txt
-                        in do yield (B8.pack (T.unpack safe))
-                              if T.null unsafe
-                                then go p'
-                                else return $ do yield unsafe 
-                                                 p'
-
-{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
-     unused 'ByteString' upon hitting an un-ascii byte.
-   -}
-decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
-decodeAscii = go where
-  go p = do echunk <- lift (next p)
-            case echunk of 
-              Left r -> return (return r)
-              Right (chunk, p') -> 
-                 if B.null chunk 
-                   then go p'
-                   else let (safe, unsafe)  = B.span (<= 0x7F) chunk
-                        in do yield (T.pack (B8.unpack safe))
-                              if B.null unsafe
-                                then go p'
-                                else return $ do yield unsafe 
-                                                 p'
-
-{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
-     unused 'ByteString' upon hitting the rare un-latinizable byte.
-     -}
-decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
-decodeIso8859_1 = go where
-  go p = do echunk <- lift (next p)
-            case echunk of 
-              Left r -> return (return r)
-              Right (chunk, p') -> 
-                 if B.null chunk 
-                   then go p'
-                   else let (safe, unsafe)  = B.span (<= 0xFF) chunk
-                        in do yield (T.pack (B8.unpack safe))
-                              if B.null unsafe
-                                then go p'
-                                else return $ do yield unsafe 
-                                                 p'
-
-
-
-
-                                            
+
diff --git a/Pipes/Text/Encoding.hs b/Pipes/Text/Encoding.hs

new file mode 100644 (file)

index 0000000..2bb5807
--- /dev/null
+++ b/Pipes/Text/Encoding.hs
@@ -0,0 +1,205 @@
+
+{-# LANGUAGE RankNTypes, BangPatterns #-}
+-- |
+-- Copyright: 2014 Michael Thompson
+--
+-- This module uses the stream decoding functions from the text-stream-decoding package
+-- to define pipes decoding functions and lenses.
+
+module Pipes.Text.Encoding
+    ( DecodeResult (..)
+    , Codec
+    , decodeUtf8
+    , decodeUtf8Pure
+    , decodeUtf16LE
+    , decodeUtf16BE
+    , decodeUtf32LE
+    , decodeUtf32BE
+    , utf8
+    , utf8Pure
+    , utf16LE
+    , utf16BE
+    , utf32LE
+    , utf32BE
+    , encodeAscii
+    , decodeAscii
+    , encodeIso8859_1
+    , decodeIso8859_1
+    ) 
+    where
+
+import Data.Char (ord)
+import Data.ByteString as B 
+import Data.ByteString (ByteString)
+import Data.ByteString.Internal as B 
+import Data.ByteString.Char8 as B8
+import Data.Text (Text)
+import qualified Data.Text as T 
+import qualified Data.Text.Encoding as TE 
+import Data.Text.StreamDecoding
+import GHC.Word (Word8, Word32)
+import Data.Word (Word8, Word16)
+import Control.Monad
+import Pipes
+import Pipes.Core
+
+
+
+{- | A 'Codec' is just an improper lens into a byte stream that is expected to contain text.
+    They are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc.
+    The stream of text they 'see' in a bytestream ends by returning the original byte stream 
+    beginning at the point of failure, or the empty bytestream with its return value.
+   -}
+type Codec  = forall f m r . (Functor f , Monad m ) => 
+     (Producer Text m (Producer ByteString m r) -> f (Producer Text m (Producer ByteString m r)))
+     -> Producer ByteString m r -> f (Producer ByteString m r )
+
+decodeStream :: Monad m 
+       => (B.ByteString -> DecodeResult) 
+       -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeStream = loop where
+  loop dec0 p = 
+    do x <- lift (next p) 
+       case x of Left r -> return (return r)
+                 Right (chunk, p') -> case dec0 chunk of 
+                    DecodeResultSuccess text dec -> do yield text
+                                                       loop dec p'
+                    DecodeResultFailure text bs -> do yield text 
+                                                      return (do yield bs 
+                                                                 p')
+{-# INLINABLE decodeStream#-}
+
+decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeUtf8 = decodeStream streamUtf8
+{-# INLINE decodeUtf8 #-}
+
+decodeUtf8Pure :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeUtf8Pure = decodeStream streamUtf8Pure
+{-# INLINE decodeUtf8Pure #-}
+
+decodeUtf16LE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeUtf16LE = decodeStream streamUtf16LE
+{-# INLINE decodeUtf16LE #-}
+
+decodeUtf16BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeUtf16BE = decodeStream streamUtf16BE
+{-# INLINE decodeUtf16BE #-}
+
+decodeUtf32LE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeUtf32LE = decodeStream streamUtf32LE
+{-# INLINE decodeUtf32LE #-}
+
+decodeUtf32BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeUtf32BE = decodeStream streamUtf32BE
+{-# INLINE decodeUtf32BE #-}
+
+mkCodec :: (forall r m . Monad m => 
+           Producer ByteString m r -> Producer Text m (Producer ByteString m r ))
+        -> (Text -> ByteString)
+        -> Codec
+mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc)))  (k (dec p0))
+
+
+{- | An improper lens into a byte stream expected to be UTF-8 encoded; the associated
+   text stream ends by returning the original bytestream beginning at the point of failure,
+   or the empty bytestring for a well-encoded text. 
+   -}
+
+utf8 :: Codec
+utf8 = mkCodec decodeUtf8 TE.encodeUtf8
+
+utf8Pure :: Codec
+utf8Pure = mkCodec decodeUtf8Pure TE.encodeUtf8
+
+utf16LE :: Codec
+utf16LE = mkCodec decodeUtf16LE TE.encodeUtf16LE
+
+utf16BE :: Codec
+utf16BE = mkCodec decodeUtf16BE TE.encodeUtf16BE
+
+utf32LE :: Codec
+utf32LE = mkCodec decodeUtf32LE TE.encodeUtf32LE
+
+utf32BE :: Codec
+utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE
+
+
+{- | ascii and latin encodings only use a small number of the characters 'Text'
+     recognizes; thus we cannot use the pipes 'Lens' style to work with them. 
+     Rather we simply define functions each way. 
+
+     'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
+     returning the rest of the 'Text' at the first non-ascii 'Char'
+-}
+
+encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
+encodeAscii = go where
+  go p = do e <- lift (next p)
+            case e of 
+              Left r -> return (return r)
+              Right (chunk, p') -> 
+                 if T.null chunk 
+                   then go p'
+                   else let (safe, unsafe)  = T.span (\c -> ord c <= 0x7F) chunk
+                        in do yield (B8.pack (T.unpack safe))
+                              if T.null unsafe
+                                then go p'
+                                else return $ do yield unsafe 
+                                                 p'
+                                                 
+{- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
+     returning the rest of the 'Text' upon hitting any non-latin 'Char'
+   -}
+encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
+encodeIso8859_1 = go where
+  go p = do e <- lift (next p)
+            case e of 
+              Left r -> return (return r)
+              Right (txt, p') -> 
+                 if T.null txt 
+                   then go p'
+                   else let (safe, unsafe)  = T.span (\c -> ord c <= 0xFF) txt
+                        in do yield (B8.pack (T.unpack safe))
+                              if T.null unsafe
+                                then go p'
+                                else return $ do yield unsafe 
+                                                 p'
+
+{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
+     unused 'ByteString' upon hitting an un-ascii byte.
+   -}
+decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeAscii = go where
+  go p = do e <- lift (next p)
+            case e of 
+              Left r -> return (return r)
+              Right (chunk, p') -> 
+                 if B.null chunk 
+                   then go p'
+                   else let (safe, unsafe) = B.span (<= 0x7F) chunk
+                        in do yield (T.pack (B8.unpack safe))
+                              if B.null unsafe
+                                then go p'
+                                else return (do yield unsafe 
+                                                p')
+
+{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
+     unused 'ByteString' upon hitting the rare un-latinizable byte.
+     -}
+decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeIso8859_1 = go where
+  go p = do e <- lift (next p)
+            case e of 
+              Left r -> return (return r)
+              Right (chunk, p') -> 
+                 if B.null chunk 
+                    then go p'
+                    else do let (safe, unsafe) = B.span (<= 0xFF) chunk
+                            yield (T.pack (B8.unpack safe))
+                            if B.null unsafe 
+                               then go p'
+                               else return (do yield unsafe 
+                                               p')
+
+
+
diff --git a/Pipes/Text/IO.hs b/Pipes/Text/IO.hs

new file mode 100644 (file)

index 0000000..3c9ac98
--- /dev/null
+++ b/Pipes/Text/IO.hs
@@ -0,0 +1,96 @@
+{-#LANGUAGE RankNTypes#-}
+
+module Pipes.Text.IO 
+   ( stdin
+   , stdout
+   , fromHandle
+   , toHandle
+   , readFile
+   , writeFile
+   ) where
+
+import qualified System.IO as IO
+import Control.Exception (throwIO, try)
+import Foreign.C.Error (Errno(Errno), ePIPE)
+import qualified GHC.IO.Exception as G
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.IO as T
+import Pipes
+import qualified Pipes.Safe.Prelude as Safe
+import qualified Pipes.Safe as Safe
+import Pipes.Safe (MonadSafe(..), Base(..))
+import Prelude hiding (readFile, writeFile)
+
+-- | Stream text from 'stdin'
+stdin :: MonadIO m => Producer Text m ()
+stdin = fromHandle IO.stdin
+{-# INLINE stdin #-}
+
+{-| Convert a 'IO.Handle' into a text stream using a text size 
+    determined by the good sense of the text library; note that this
+    is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
+    but uses the system encoding and has other `Data.Text.IO` features
+-}
+
+fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
+fromHandle h =  go where
+      go = do txt <- liftIO (T.hGetChunk h)
+              if T.null txt then return ()
+                            else do yield txt
+                                    go 
+{-# INLINABLE fromHandle#-}
+
+
+{-| Stream text from a file in the simple fashion of @Data.Text.IO@ 
+
+>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
+MAIN = PUTSTRLN "HELLO WORLD"
+-}
+
+readFile :: MonadSafe m => FilePath -> Producer Text m ()
+readFile file = Safe.withFile file IO.ReadMode fromHandle
+{-# INLINE readFile #-}
+
+
+{-| Stream text to 'stdout'
+
+    Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
+
+    Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@ 
+    instead of @(source >-> stdout)@ .
+-}
+stdout :: MonadIO m => Consumer' Text m ()
+stdout = go
+  where
+    go = do
+        txt <- await
+        x  <- liftIO $ try (T.putStr txt)
+        case x of
+            Left (G.IOError { G.ioe_type  = G.ResourceVanished
+                            , G.ioe_errno = Just ioe })
+                 | Errno ioe == ePIPE
+                     -> return ()
+            Left  e  -> liftIO (throwIO e)
+            Right () -> go
+{-# INLINABLE stdout #-}
+
+
+{-| Convert a text stream into a 'Handle'
+
+    Note: again, for best performance, where possible use 
+    @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
+-}
+toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
+toHandle h = for cat (liftIO . T.hPutStr h)
+{-# INLINABLE toHandle #-}
+
+{-# RULES "p >-> toHandle h" forall p h .
+        p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
+  #-}
+
+
+-- | Stream text into a file. Uses @pipes-safe@.
+writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
+writeFile file = Safe.withFile file IO.WriteMode toHandle
+{-# INLINE writeFile #-}
diff --git a/Pipes/Text/Internal.hs b/Pipes/Text/Internal.hs

deleted file mode 100644 (file)

index 582ef14..0000000
--- a/Pipes/Text/Internal.hs
+++ /dev/null
@@ -1,7 +0,0 @@
-module Pipes.Text.Internal
-    (module Pipes.Text.Internal.Codec
-    , module Pipes.Text.Internal.Decoding
-    ) where
-
-import Pipes.Text.Internal.Codec
-import Pipes.Text.Internal.Decoding
-\ No newline at end of file
diff --git a/Pipes/Text/Internal/Codec.hs b/Pipes/Text/Internal/Codec.hs

deleted file mode 100644 (file)

index 075a152..0000000
--- a/Pipes/Text/Internal/Codec.hs
+++ /dev/null
@@ -1,216 +0,0 @@
-
-{-# LANGUAGE DeriveDataTypeable, RankNTypes, BangPatterns #-}
--- |
--- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin
--- License: MIT
---  This Parts of this code were taken from enumerator and conduits, and adapted for pipes
-
--- This module follows the model of the enumerator and conduits libraries, and defines
--- 'Codec' s for various encodings. Note that we do not export a 'Codec' for ascii and 
--- iso8859_1. A 'Lens' in the sense of the pipes library cannot be defined for these, so
--- special functions appear in @Pipes.Text@
-
-
-module Pipes.Text.Internal.Codec
-    ( Codec(..)
-    , TextException(..)
-    , utf8
-    , utf16_le
-    , utf16_be
-    , utf32_le
-    , utf32_be
-    ) where
-
-import Data.Bits ((.&.))
-import Data.Char (ord)
-import Data.ByteString as B 
-import Data.ByteString (ByteString)
-import Data.ByteString.Internal as B 
-import Data.ByteString.Char8 as B8
-import Data.Text (Text)
-import qualified Data.Text as T 
-import qualified Data.Text.Encoding as TE 
-import Data.Text.Encoding.Error ()
-import GHC.Word (Word8, Word32)
-import qualified Data.Text.Array as A
-import Data.Word (Word8, Word16)
-import System.IO.Unsafe (unsafePerformIO)
-import qualified Control.Exception as Exc
-import Data.Bits ((.&.), (.|.), shiftL)
-import Data.Typeable
-import Control.Arrow (first)
-import Data.Maybe (catMaybes)
-import Pipes.Text.Internal.Decoding
-import Pipes
--- | A specific character encoding.
-
-data Codec = Codec
-  { codecName :: Text
-  , codecEncode :: Text -> (ByteString, Maybe (TextException, Text))
-  , codecDecode :: ByteString -> Decoding 
-  }
-
-instance Show Codec where
-    showsPrec d c = showParen (d > 10) $ 
-                    showString "Codec " . shows (codecName c)
-
-data TextException = DecodeException Codec Word8
-                   | EncodeException Codec Char
-                   | LengthExceeded Int
-                   | TextException Exc.SomeException
-    deriving (Show, Typeable)
-instance Exc.Exception TextException
-
-
-toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
-           -> (ByteString -> Decoding)
-toDecoding op = loop B.empty where
-  loop !extra bs0 = case op (B.append extra bs0) of
-                      (txt, Right bs) -> Some txt bs (loop bs)
-                      (txt, Left (_,bs)) -> Other txt bs
--- To do: toDecoding should be inlined in each of the 'Codec' definitions
--- or else Codec changed to the conduit/enumerator definition.  We have
--- altered it to use 'streamDecodeUtf8'
-
-splitSlowly :: (ByteString -> Text)
-            -> ByteString 
-            -> (Text, Either (TextException, ByteString) ByteString)
-splitSlowly dec bytes = valid where
-    valid:_ = catMaybes $ Prelude.map decFirst $ splits (B.length bytes)
-    splits 0 = [(B.empty, bytes)]
-    splits n = B.splitAt n bytes : splits (n - 1)
-    decFirst (a, b) = case tryEvaluate (dec a) of
-        Left _ -> Nothing
-        Right text -> let trouble = case tryEvaluate (dec b) of
-                            Left exc -> Left (TextException exc, b)
-                            Right _  -> Right B.empty 
-                      in Just (text, trouble) -- this case shouldn't occur, 
-                                      -- since splitSlowly is only called
-                                      -- when parsing failed somewhere
-
-utf8 :: Codec
-utf8 = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-8"
-    enc text = (TE.encodeUtf8 text, Nothing)
-    dec bytes = case decodeSomeUtf8 bytes of (t,b) -> (t, Right b)
-
---     -- Whether the given byte is a continuation byte.
---     isContinuation byte = byte .&. 0xC0 == 0x80
--- 
---     -- The number of continuation bytes needed by the given
---     -- non-continuation byte. Returns -1 for an illegal UTF-8
---     -- non-continuation byte and the whole split quickly must fail so
---     -- as the input is passed to TE.decodeUtf8, which will issue a
---     -- suitable error.
---     required x0
---         | x0 .&. 0x80 == 0x00 = 0
---         | x0 .&. 0xE0 == 0xC0 = 1
---         | x0 .&. 0xF0 == 0xE0 = 2
---         | x0 .&. 0xF8 == 0xF0 = 3
---         | otherwise           = -1
--- 
---     splitQuickly bytes
---         | B.null l || req == -1 = Nothing
---         | req == B.length r = Just (TE.decodeUtf8 bytes, B.empty)
---         | otherwise = Just (TE.decodeUtf8 l', r')
---       where
---         (l, r) = B.spanEnd isContinuation bytes
---         req = required (B.last l)
---         l' = B.init l
---         r' = B.cons (B.last l) r
-
-
-utf16_le :: Codec
-utf16_le = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-16-LE"
-    enc text = (TE.encodeUtf16LE text, Nothing)
-    dec bytes = case splitQuickly bytes of
-        Just (text, extra) -> (text, Right extra)
-        Nothing -> splitSlowly TE.decodeUtf16LE bytes
-
-    splitQuickly bytes = maybeDecode (loop 0) where
-        maxN = B.length bytes
-
-        loop n |  n      == maxN = decodeAll
-               | (n + 1) == maxN = decodeTo n
-        loop n = let
-            req = utf16Required
-                (B.index bytes n)
-                (B.index bytes (n + 1))
-            decodeMore = loop $! n + req
-            in if n + req > maxN
-                then decodeTo n
-                else decodeMore
-
-        decodeTo n = first TE.decodeUtf16LE (B.splitAt n bytes)
-        decodeAll = (TE.decodeUtf16LE bytes, B.empty)
-
-utf16_be :: Codec
-utf16_be = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-16-BE"
-    enc text = (TE.encodeUtf16BE text, Nothing)
-    dec bytes = case splitQuickly bytes of
-        Just (text, extra) -> (text, Right extra)
-        Nothing -> splitSlowly TE.decodeUtf16BE bytes
-
-    splitQuickly bytes = maybeDecode (loop 0) where
-        maxN = B.length bytes
-
-        loop n |  n      == maxN = decodeAll
-               | (n + 1) == maxN = decodeTo n
-        loop n = let
-            req = utf16Required
-                (B.index bytes (n + 1))
-                (B.index bytes n)
-            decodeMore = loop $! n + req
-            in if n + req > maxN
-                then decodeTo n
-                else decodeMore
-
-        decodeTo n = first TE.decodeUtf16BE (B.splitAt n bytes)
-        decodeAll = (TE.decodeUtf16BE bytes, B.empty)
-
-utf16Required :: Word8 -> Word8 -> Int
-utf16Required x0 x1 = if x >= 0xD800 && x <= 0xDBFF then 4 else 2 where
-    x :: Word16
-    x = (fromIntegral x1 `shiftL` 8) .|. fromIntegral x0
-
-
-utf32_le :: Codec
-utf32_le = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-32-LE"
-    enc text = (TE.encodeUtf32LE text, Nothing)
-    dec bs = case utf32SplitBytes TE.decodeUtf32LE bs of
-        Just (text, extra) -> (text, Right extra)
-        Nothing -> splitSlowly TE.decodeUtf32LE bs
-
-
-utf32_be :: Codec
-utf32_be = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-32-BE"
-    enc text = (TE.encodeUtf32BE text, Nothing)
-    dec bs = case utf32SplitBytes TE.decodeUtf32BE bs of
-        Just (text, extra) -> (text, Right extra)
-        Nothing -> splitSlowly TE.decodeUtf32BE bs
-
-utf32SplitBytes :: (ByteString -> Text)
-                -> ByteString
-                -> Maybe (Text, ByteString)
-utf32SplitBytes dec bytes = split where
-    split = maybeDecode (dec toDecode, extra)
-    len = B.length bytes
-    lenExtra = mod len 4
-
-    lenToDecode = len - lenExtra
-    (toDecode, extra) = if lenExtra == 0
-        then (bytes, B.empty)
-        else B.splitAt lenToDecode bytes
-
-
-tryEvaluate :: a -> Either Exc.SomeException a
-tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate
-
-maybeDecode :: (a, b) -> Maybe (a, b)
-maybeDecode (a, b) = case tryEvaluate a of
-    Left _ -> Nothing
-    Right _ -> Just (a, b)
diff --git a/Pipes/Text/Internal/Decoding.hs b/Pipes/Text/Internal/Decoding.hs

deleted file mode 100644 (file)

index b5d928a..0000000
--- a/Pipes/Text/Internal/Decoding.hs
+++ /dev/null
@@ -1,154 +0,0 @@
-{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving, MagicHash, UnliftedFFITypes #-}
-{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
-
--- This module lifts assorted materials from Brian O'Sullivan's text package 
--- especially @Data.Text.Encoding@ in order to define a pipes-appropriate
--- 'streamDecodeUtf8'
-
-module Pipes.Text.Internal.Decoding 
-    ( Decoding(..)
-    , streamDecodeUtf8
-    , decodeSomeUtf8
-    ) where
-import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
-import Control.Monad.ST (ST, runST)
-import Data.Bits ((.&.))
-import Data.ByteString as B 
-import Data.ByteString (ByteString)
-import Data.ByteString.Internal as B 
-import Data.ByteString.Char8 as B8
-import Data.Text (Text)
-import qualified Data.Text as T 
-import qualified Data.Text.Encoding as TE 
-import Data.Text.Encoding.Error ()
-import Data.Text.Internal (Text, textP)
-import Foreign.C.Types (CSize)
-import Foreign.ForeignPtr (withForeignPtr)
-import Foreign.Marshal.Utils (with)
-import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
-import Foreign.Storable (Storable, peek, poke)
-import GHC.Base  (Char(..), Int(..), MutableByteArray#, ord#, iShiftRA#)
-import GHC.Word (Word8, Word32)
-import qualified Data.Text.Array as A
-import Data.Word (Word8, Word16)
-import System.IO.Unsafe (unsafePerformIO)
-import qualified Control.Exception as Exc
-import Data.Bits ((.&.), (.|.), shiftL)
-import Data.Typeable
-import Control.Arrow (first)
-import Data.Maybe (catMaybes)
-#include "pipes_text_cbits.h"
-
-
-
---  A stream oriented decoding result. Distinct from the similar type in Data.Text.Encoding
-
-data Decoding = Some Text ByteString (ByteString -> Decoding) 
-               -- Text, continuation and any undecoded fragment.
-              | Other Text ByteString  
-              --  Text followed by an undecodable ByteString
-              
-instance Show Decoding where
-    showsPrec d (Some t bs _) = showParen (d > prec) $
-                                showString "Some " . showsPrec prec' t .
-                                showChar ' ' . showsPrec prec' bs .
-                                showString " _"
-      where prec = 10; prec' = prec + 1
-    showsPrec d (Other t bs)  = showParen (d > prec) $
-                                showString "Other " . showsPrec prec' t .
-                                showChar ' ' . showsPrec prec' bs .
-                                showString " _"
-      where prec = 10; prec' = prec + 1
-
-newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
-newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
-
---  Resolve a 'ByteString' into 'Text' and a continuation that can handle further 'ByteStrings'. 
-streamDecodeUtf8 :: ByteString -> Decoding
-streamDecodeUtf8 = decodeChunkUtf8 B.empty 0 0 
-  where
-  decodeChunkUtf8 :: ByteString -> CodePoint -> DecoderState -> ByteString -> Decoding
-  decodeChunkUtf8 old codepoint0 state0 bs@(PS fp off len) = 
-                    runST $ do marray <- A.new (len+1) 
-                               unsafeIOToST (decodeChunkToBuffer marray)
-     where
-     decodeChunkToBuffer :: A.MArray s -> IO Decoding
-     decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
-       with (0::CSize) $ \destOffPtr ->
-       with codepoint0 $ \codepointPtr ->
-       with state0     $ \statePtr ->
-       with nullPtr    $ \curPtrPtr ->
-         do let end = ptr `plusPtr` (off + len)
-                curPtr = ptr `plusPtr` off
-            poke curPtrPtr curPtr
-            c_decode_utf8_with_state (A.maBA dest) destOffPtr curPtrPtr end codepointPtr statePtr
-            state <- peek statePtr
-            lastPtr <- peek curPtrPtr
-            codepoint <- peek codepointPtr
-            n <- peek destOffPtr
-            chunkText <- mkText dest n
-            let left      = lastPtr `minusPtr` curPtr
-                remaining = B.drop left bs
-                accum = if T.null chunkText then B.append old remaining  else remaining 
-            return $! case state of 
-              UTF8_REJECT -> Other chunkText accum -- We encountered an encoding error
-              _ ->           Some  chunkText accum (decodeChunkUtf8 accum codepoint state)
-     {-# INLINE decodeChunkToBuffer #-}
-  {-# INLINE decodeChunkUtf8 #-}
-{-# INLINE streamDecodeUtf8 #-}
-
---  Resolve a ByteString into an initial segment of intelligible 'Text' and whatever is unintelligble
-decodeSomeUtf8 :: ByteString -> (Text, ByteString)
-decodeSomeUtf8 bs@(PS fp off len) = runST $ do 
-  dest <- A.new (len+1) 
-  unsafeIOToST $ 
-     withForeignPtr fp $ \ptr ->
-     with (0::CSize)        $ \destOffPtr ->
-     with (0::CodePoint)    $ \codepointPtr ->
-     with (0::DecoderState) $ \statePtr ->
-     with nullPtr           $ \curPtrPtr ->
-       do let end = ptr `plusPtr` (off + len)
-              curPtr = ptr `plusPtr` off
-          poke curPtrPtr curPtr
-          c_decode_utf8_with_state (A.maBA dest) destOffPtr 
-                                   curPtrPtr end codepointPtr statePtr
-          state <- peek statePtr
-          lastPtr <- peek curPtrPtr
-          codepoint <- peek codepointPtr
-          n <- peek destOffPtr
-          chunkText <- unsafeSTToIO $ do arr <- A.unsafeFreeze dest
-                                         return $! textP arr 0 (fromIntegral n)
-          let left      = lastPtr `minusPtr` curPtr
-              remaining = B.drop left bs
-          return $! (chunkText, remaining)
-{-# INLINE decodeSomeUtf8 #-}
-
-mkText :: A.MArray s -> CSize -> IO Text
-mkText dest n =  unsafeSTToIO $ do arr <- A.unsafeFreeze dest
-                                   return $! textP arr 0 (fromIntegral n)
-{-# INLINE mkText #-}
-
-ord :: Char -> Int
-ord (C# c#) = I# (ord# c#)
-{-# INLINE ord #-}
-
-unsafeWrite :: A.MArray s -> Int -> Char -> ST s Int
-unsafeWrite marr i c
-    | n < 0x10000 = do A.unsafeWrite marr i (fromIntegral n)
-                       return 1
-    | otherwise   = do A.unsafeWrite marr i lo
-                       A.unsafeWrite marr (i+1) hi
-                       return 2
-    where n = ord c
-          m = n - 0x10000
-          lo = fromIntegral $ (m `shiftR` 10) + 0xD800
-          hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
-          shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
-          {-# INLINE shiftR #-}
-{-# INLINE unsafeWrite #-}
-
-foreign import ccall unsafe "_hs_pipes_text_decode_utf8_state" c_decode_utf8_with_state
-    :: MutableByteArray# s -> Ptr CSize
-    -> Ptr (Ptr Word8) -> Ptr Word8
-    -> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8)
-\ No newline at end of file
diff --git a/cbits/cbits.c b/cbits/cbits.c

deleted file mode 100644 (file)

index c11645b..0000000
--- a/cbits/cbits.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2011 Bryan O'Sullivan <bos@serpentine.com>.
- *
- * Portions copyright (c) 2008-2010 Björn Höhrmann <bjoern@hoehrmann.de>.
- *
- * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
- */
-
-#include <string.h>
-#include <stdint.h>
-#include <stdio.h>
-#include "pipes_text_cbits.h"
-
-
-
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 12
-
-static const uint8_t utf8d[] = {
-  /*
-   * The first part of the table maps bytes to character classes that
-   * to reduce the size of the transition table and create bitmasks.
-   */
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
-
-  /*
-   * The second part is a transition table that maps a combination of
-   * a state of the automaton and a character class to a state.
-   */
-   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
-  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
-  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
-  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
-  12,36,12,12,12,12,12,12,12,12,12,12,
-};
-
-static inline uint32_t
-decode(uint32_t *state, uint32_t* codep, uint32_t byte) {
-  uint32_t type = utf8d[byte];
-
-  *codep = (*state != UTF8_ACCEPT) ?
-    (byte & 0x3fu) | (*codep << 6) :
-    (0xff >> type) & (byte);
-
-  return *state = utf8d[256 + *state + type];
-}
-
-/*
- * A best-effort decoder. Runs until it hits either end of input or
- * the start of an invalid byte sequence.
- *
- * At exit, we update *destoff with the next offset to write to, *src
- * with the next source location past the last one successfully
- * decoded, and return the next source location to read from.
- *
- * Moreover, we expose the internal decoder state (state0 and
- * codepoint0), allowing one to restart the decoder after it
- * terminates (say, due to a partial codepoint).
- *
- * In particular, there are a few possible outcomes,
- *
- *   1) We decoded the buffer entirely:
- *      In this case we return srcend
- *      state0 == UTF8_ACCEPT
- *
- *   2) We met an invalid encoding
- *      In this case we return the address of the first invalid byte
- *      state0 == UTF8_REJECT
- *
- *   3) We reached the end of the buffer while decoding a codepoint
- *      In this case we return a pointer to the first byte of the partial codepoint
- *      state0 != UTF8_ACCEPT, UTF8_REJECT
- *
- */
-
- #if defined(__GNUC__) || defined(__clang__)
- static inline uint8_t const *
- _hs_pipes_text_decode_utf8_int(uint16_t *const dest, size_t *destoff,
-                        const uint8_t const **src, const uint8_t const *srcend,
-                        uint32_t *codepoint0, uint32_t *state0)
-   __attribute((always_inline));
- #endif
-
-static inline uint8_t const *
-_hs_pipes_text_decode_utf8_int(uint16_t *const dest, size_t *destoff,
-                        const uint8_t const **src, const uint8_t const *srcend,
-                        uint32_t *codepoint0, uint32_t *state0)
-{
- uint16_t *d = dest + *destoff;
- const uint8_t *s = *src, *last = *src;
- uint32_t state = *state0;
- uint32_t codepoint = *codepoint0;
-
- while (s < srcend) {
-#if defined(__i386__) || defined(__x86_64__)
-   /*
-    * This code will only work on a little-endian system that
-    * supports unaligned loads.
-    *
-    * It gives a substantial speed win on data that is purely or
-    * partly ASCII (e.g. HTML), at only a slight cost on purely
-    * non-ASCII text.
-    */
-
-   if (state == UTF8_ACCEPT) {
-     while (s < srcend - 4) {
-       codepoint = *((uint32_t *) s);
-       if ((codepoint & 0x80808080) != 0)
-         break;
-       s += 4;
-
-       /*
-        * Tried 32-bit stores here, but the extra bit-twiddling
-        * slowed the code down.
-        */
-
-       *d++ = (uint16_t) (codepoint & 0xff);
-       *d++ = (uint16_t) ((codepoint >> 8) & 0xff);
-       *d++ = (uint16_t) ((codepoint >> 16) & 0xff);
-       *d++ = (uint16_t) ((codepoint >> 24) & 0xff);
-     }
-     last = s;
-   }
-#endif
-
-   if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) {
-     if (state != UTF8_REJECT)
-       continue;
-     break;
-   }
-
-   if (codepoint <= 0xffff)
-     *d++ = (uint16_t) codepoint;
-   else {
-     *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10));
-     *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF));
-   }
-   last = s;
- }
-
- *destoff = d - dest;
- *codepoint0 = codepoint;
- *state0 = state;
- *src = last;
-
- return s;
-}
-
-uint8_t const *
-_hs_pipes_text_decode_utf8_state(uint16_t *const dest, size_t *destoff,
-                          const uint8_t const **src,
-                          const uint8_t const *srcend,
-                          uint32_t *codepoint0, uint32_t *state0)
-{
- uint8_t const *ret = _hs_pipes_text_decode_utf8_int(dest, destoff, src, srcend,
-                                               codepoint0, state0);
- if (*state0 == UTF8_REJECT)
-   ret -=1;
- return ret;
-}
-
diff --git a/include/pipes_text_cbits.h b/include/pipes_text_cbits.h

deleted file mode 100644 (file)

index b9ab670..0000000
--- a/include/pipes_text_cbits.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * Copyright (c) 2013 Bryan O'Sullivan <bos@serpentine.com>.
- */
-
-#ifndef _pipes_text_cbits_h
-#define _pipes_text_cbits_h
-
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 12
-
-#endif
diff --git a/pipes-text.cabal b/pipes-text.cabal

index a1d57bb3c914752e211c565d6db84ea62fba1394..017d41c3ddb719047a6fa039dd99d22bf4e86866 100644 (file)
--- a/pipes-text.cabal
+++ b/pipes-text.cabal
@@ -1,5 +1,5 @@
  name:                pipes-text
-version:             0.0.0.6
+version:             0.0.0.7
  synopsis:            Text pipes.
  description:         Many of the pipes and other operations defined here mirror those in
                       the `pipes-bytestring` library. Folds like `length` and grouping 
@@ -36,23 +36,28 @@ source-repository head
      type: git
      location: https://github.com/michaelt/text-pipes
  
+flag noio
+  default: False
+  Description: Use a version of text earlier than 0.11.3
  
  library
-  c-sources:    cbits/cbits.c
-  include-dirs: include
-  exposed-modules:     Pipes.Text, Pipes.Text.Internal
-  other-modules:       Pipes.Text.Internal.Decoding, Pipes.Text.Internal.Codec
-  other-extensions:    RankNTypes
-  build-depends:       base         >= 4       && < 5  ,
-                       bytestring >=0.10       && < 0.11,
-                       text >=0.11.3           && < 1.2,
-                       profunctors  >= 3.1.1   && < 4.1 ,
-                       pipes >=4.0             && < 4.2,
-                       pipes-group  >= 1.0.0   && < 1.1 ,
-                       pipes-parse >=2.0       && < 3.1,
+  exposed-modules:     Pipes.Text, Pipes.Text.Encoding
+  build-depends:       base >= 4                  && < 5  ,
+                       bytestring >= 0.9                  ,
+                       text >=0.11.3              && < 1.2,
+                       text-stream-decode >= 0.1  && < 0.2,  
+                       profunctors  >= 3.1.1      && < 4.1,
+                       pipes >=4.0                && < 4.2,
+                       pipes-group  >= 1.0.0      && < 1.1,
+                       pipes-parse >=2.0          && < 3.1,
                         pipes-safe, 
-                       pipes-bytestring >= 1.0 && < 2.1,
-                       transformers >= 0.2.0.0 && < 0.4
-  -- hs-source-dirs:      
+                       pipes-bytestring >= 1.0    && < 2.1,
+                       transformers >= 0.2.0.0    && < 0.4
+  other-extensions:    RankNTypes
    default-language:    Haskell2010
-  ghc-options: -O2 
+  ghc-options: -O2
+
+  if !flag(noio)
+    exposed-modules:   Pipes.Text.IO
+    build-depends:     text >=0.11.3              && < 1.2
+
author	michaelt <what_is_it_to_do_anything@yahoo.com>
	Sat, 15 Feb 2014 15:01:48 +0000 (10:01 -0500)
committer	michaelt <what_is_it_to_do_anything@yahoo.com>
	Sat, 15 Feb 2014 15:01:48 +0000 (10:01 -0500)
Pipes/Text.hs		patch \| blob \| blame \| history
Pipes/Text/Encoding.hs	[new file with mode: 0644]	patch \| blob
Pipes/Text/IO.hs	[new file with mode: 0644]	patch \| blob
Pipes/Text/Internal.hs	[deleted file]	patch \| blob \| blame \| history
Pipes/Text/Internal/Codec.hs	[deleted file]	patch \| blob \| blame \| history
Pipes/Text/Internal/Decoding.hs	[deleted file]	patch \| blob \| blame \| history
cbits/cbits.c	[deleted file]	patch \| blob \| blame \| history
include/pipes_text_cbits.h	[deleted file]	patch \| blob \| blame \| history
pipes-text.cabal		patch \| blob \| blame \| history