, module Data.Word
, module Pipes.Parse
, module Pipes.Group
- , module Pipes.Text.Internal.Codec
+ , module Pipes.Text.Internal
) where
import Control.Exception (throwIO, try)
import qualified GHC.IO.Exception as G
import Pipes
import qualified Pipes.ByteString as PB
-import qualified Pipes.Text.Internal.Decoding as PE
-import Pipes.Text.Internal.Codec
+import qualified Pipes.Text.Internal as PI
+import Pipes.Text.Internal
import Pipes.Core (respond, Server')
import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
import qualified Pipes.Group as PG
decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
(Producer Text m (Producer ByteString m r))
decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
- (k (go B.empty PE.streamDecodeUtf8 p0)) where
+ (k (go B.empty PI.streamDecodeUtf8 p0)) where
go !carry dec0 p = do
x <- lift (next p)
case x of Left r -> return (if B.null carry
return r))
Right (chunk, p') -> case dec0 chunk of
- PE.Some text carry2 dec -> do yield text
+ PI.Some text carry2 dec -> do yield text
go carry2 dec p'
- PE.Other text bs -> do yield text
+ PI.Other text bs -> do yield text
return (do yield bs -- an invalid blob remains
p')
{-# INLINABLE decodeUtf8 #-}
codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
(k (decoder (dec B.empty) p0) ) where
- decoder :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+ decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decoder !d p0 = case d of
- PE.Other txt bad -> do yield txt
+ PI.Other txt bad -> do yield txt
return (do yield bad
p0)
- PE.Some txt extra dec -> do yield txt
+ PI.Some txt extra dec -> do yield txt
x <- lift (next p0)
case x of Left r -> return (do yield extra
return r)
Right (chunk,p1) -> decoder (dec chunk) p1
-- decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
--- (k (go B.empty PE.streamDecodeUtf8 p0)) where
+-- (k (go B.empty PI.streamDecodeUtf8 p0)) where
encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
encodeAscii = go where
-{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving, MagicHash, UnliftedFFITypes #-}
-{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
-
--- This module lifts assorted materials from Brian O'Sullivan's text package
--- especially Data.Text.Encoding in order to define a pipes-appropriate
--- streamDecodeUtf8
-module Pipes.Text.Internal
+module Pipes.Text.Internal
( Decoding(..)
, streamDecodeUtf8
, decodeSomeUtf8
+ , Codec(..)
+ , TextException(..)
+ , utf8
+ , utf16_le
+ , utf16_be
+ , utf32_le
+ , utf32_be
) where
-import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
-import Control.Monad.ST (ST, runST)
-import Data.Bits ((.&.))
-import Data.ByteString as B
-import Data.ByteString (ByteString)
-import Data.ByteString.Internal as B
-import Data.ByteString.Char8 as B8
-import Data.Text (Text)
-import qualified Data.Text as T
-import qualified Data.Text.Encoding as TE
-import Data.Text.Encoding.Error ()
-import Data.Text.Internal (Text, textP)
-import Foreign.C.Types (CSize)
-import Foreign.ForeignPtr (withForeignPtr)
-import Foreign.Marshal.Utils (with)
-import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
-import Foreign.Storable (Storable, peek, poke)
-import GHC.Base (Char(..), Int(..), MutableByteArray#, ord#, iShiftRA#)
-import GHC.Word (Word8, Word32)
-import qualified Data.Text.Array as A
-import Data.Word (Word8, Word16)
-import System.IO.Unsafe (unsafePerformIO)
-import qualified Control.Exception as Exc
-import Data.Bits ((.&.), (.|.), shiftL)
-import Data.Typeable
-import Control.Arrow (first)
-import Data.Maybe (catMaybes)
-#include "pipes_text_cbits.h"
-
-
-
--- | A stream oriented decoding result.
-data Decoding = Some Text ByteString (ByteString -> Decoding)
- | Other Text ByteString
-instance Show Decoding where
- showsPrec d (Some t bs _) = showParen (d > prec) $
- showString "Some " . showsPrec prec' t .
- showChar ' ' . showsPrec prec' bs .
- showString " _"
- where prec = 10; prec' = prec + 1
- showsPrec d (Other t bs) = showParen (d > prec) $
- showString "Other " . showsPrec prec' t .
- showChar ' ' . showsPrec prec' bs .
- showString " _"
- where prec = 10; prec' = prec + 1
-
-newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
-newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
-
-streamDecodeUtf8 :: ByteString -> Decoding
-streamDecodeUtf8 = decodeChunkUtf8 B.empty 0 0
- where
- decodeChunkUtf8 :: ByteString -> CodePoint -> DecoderState -> ByteString -> Decoding
- decodeChunkUtf8 old codepoint0 state0 bs@(PS fp off len) =
- runST $ do marray <- A.new (len+1)
- unsafeIOToST (decodeChunkToBuffer marray)
- where
- decodeChunkToBuffer :: A.MArray s -> IO Decoding
- decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
- with (0::CSize) $ \destOffPtr ->
- with codepoint0 $ \codepointPtr ->
- with state0 $ \statePtr ->
- with nullPtr $ \curPtrPtr ->
- do let end = ptr `plusPtr` (off + len)
- curPtr = ptr `plusPtr` off
- poke curPtrPtr curPtr
- c_decode_utf8_with_state (A.maBA dest) destOffPtr curPtrPtr end codepointPtr statePtr
- state <- peek statePtr
- lastPtr <- peek curPtrPtr
- codepoint <- peek codepointPtr
- n <- peek destOffPtr
- chunkText <- mkText dest n
- let left = lastPtr `minusPtr` curPtr
- remaining = B.drop left bs
- accum = if T.null chunkText then B.append old remaining else remaining
- return $! case state of
- UTF8_REJECT -> Other chunkText accum -- We encountered an encoding error
- _ -> Some chunkText accum (decodeChunkUtf8 accum codepoint state)
- {-# INLINE decodeChunkToBuffer #-}
- {-# INLINE decodeChunkUtf8 #-}
-{-# INLINE streamDecodeUtf8 #-}
-
-decodeSomeUtf8 :: ByteString -> (Text, ByteString)
-decodeSomeUtf8 bs@(PS fp off len) = runST $ do
- dest <- A.new (len+1)
- unsafeIOToST $
- withForeignPtr fp $ \ptr ->
- with (0::CSize) $ \destOffPtr ->
- with (0::CodePoint) $ \codepointPtr ->
- with (0::DecoderState) $ \statePtr ->
- with nullPtr $ \curPtrPtr ->
- do let end = ptr `plusPtr` (off + len)
- curPtr = ptr `plusPtr` off
- poke curPtrPtr curPtr
- c_decode_utf8_with_state (A.maBA dest) destOffPtr
- curPtrPtr end codepointPtr statePtr
- state <- peek statePtr
- lastPtr <- peek curPtrPtr
- codepoint <- peek codepointPtr
- n <- peek destOffPtr
- chunkText <- unsafeSTToIO $ do arr <- A.unsafeFreeze dest
- return $! textP arr 0 (fromIntegral n)
- let left = lastPtr `minusPtr` curPtr
- remaining = B.drop left bs
- return $! (chunkText, remaining)
-{-# INLINE decodeSomeUtf8 #-}
-
-mkText :: A.MArray s -> CSize -> IO Text
-mkText dest n = unsafeSTToIO $ do arr <- A.unsafeFreeze dest
- return $! textP arr 0 (fromIntegral n)
-{-# INLINE mkText #-}
-
-ord :: Char -> Int
-ord (C# c#) = I# (ord# c#)
-{-# INLINE ord #-}
-
-unsafeWrite :: A.MArray s -> Int -> Char -> ST s Int
-unsafeWrite marr i c
- | n < 0x10000 = do A.unsafeWrite marr i (fromIntegral n)
- return 1
- | otherwise = do A.unsafeWrite marr i lo
- A.unsafeWrite marr (i+1) hi
- return 2
- where n = ord c
- m = n - 0x10000
- lo = fromIntegral $ (m `shiftR` 10) + 0xD800
- hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
- shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
- {-# INLINE shiftR #-}
-{-# INLINE unsafeWrite #-}
-foreign import ccall unsafe "_hs_pipes_text_decode_utf8_state" c_decode_utf8_with_state
- :: MutableByteArray# s -> Ptr CSize
- -> Ptr (Ptr Word8) -> Ptr Word8
- -> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8)
\ No newline at end of file
+import Pipes.Text.Internal.Decoding
+import Pipes.Text.Internal.Codec
\ No newline at end of file