aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2014-02-04 23:46:30 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2014-02-04 23:46:30 -0500
commit7c9f2b8b11f51b2a4e9dcd6e8168e34f1135d6e2 (patch)
tree8123141a2f8ab16885456713788f6c5fa9defecf
parent11b2cbe97317e97e5bf532df2d897f96985178cc (diff)
downloadtext-pipes-7c9f2b8b11f51b2a4e9dcd6e8168e34f1135d6e2.tar.gz
text-pipes-7c9f2b8b11f51b2a4e9dcd6e8168e34f1135d6e2.tar.zst
text-pipes-7c9f2b8b11f51b2a4e9dcd6e8168e34f1135d6e2.zip
special Internal module
-rw-r--r--LICENSE2
-rw-r--r--Pipes/Text.hs20
-rw-r--r--Pipes/Text/Internal.hs152
-rw-r--r--pipes-text.cabal6
4 files changed, 24 insertions, 156 deletions
diff --git a/LICENSE b/LICENSE
index 3b4771d..f307de2 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
1Copyright (c) 2013, Gabriel Gonzalez, Tobias Florek, Michael Thompson 1Copyright (c) 2013-14, Gabriel Gonzalez, Tobias Florek, Michael Thompson
2 2
3All rights reserved. 3All rights reserved.
4 4
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index 0957a7d..796f672 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -167,7 +167,7 @@ module Pipes.Text (
167 , module Data.Word 167 , module Data.Word
168 , module Pipes.Parse 168 , module Pipes.Parse
169 , module Pipes.Group 169 , module Pipes.Group
170 , module Pipes.Text.Internal.Codec 170 , module Pipes.Text.Internal
171 ) where 171 ) where
172 172
173import Control.Exception (throwIO, try) 173import Control.Exception (throwIO, try)
@@ -197,8 +197,8 @@ import Foreign.C.Error (Errno(Errno), ePIPE)
197import qualified GHC.IO.Exception as G 197import qualified GHC.IO.Exception as G
198import Pipes 198import Pipes
199import qualified Pipes.ByteString as PB 199import qualified Pipes.ByteString as PB
200import qualified Pipes.Text.Internal.Decoding as PE 200import qualified Pipes.Text.Internal as PI
201import Pipes.Text.Internal.Codec 201import Pipes.Text.Internal
202import Pipes.Core (respond, Server') 202import Pipes.Core (respond, Server')
203import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) 203import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
204import qualified Pipes.Group as PG 204import qualified Pipes.Group as PG
@@ -729,7 +729,7 @@ isEndOfChars = do
729decodeUtf8 :: Monad m => Lens' (Producer ByteString m r) 729decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
730 (Producer Text m (Producer ByteString m r)) 730 (Producer Text m (Producer ByteString m r))
731decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8))) 731decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
732 (k (go B.empty PE.streamDecodeUtf8 p0)) where 732 (k (go B.empty PI.streamDecodeUtf8 p0)) where
733 go !carry dec0 p = do 733 go !carry dec0 p = do
734 x <- lift (next p) 734 x <- lift (next p)
735 case x of Left r -> return (if B.null carry 735 case x of Left r -> return (if B.null carry
@@ -738,9 +738,9 @@ decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
738 return r)) 738 return r))
739 739
740 Right (chunk, p') -> case dec0 chunk of 740 Right (chunk, p') -> case dec0 chunk of
741 PE.Some text carry2 dec -> do yield text 741 PI.Some text carry2 dec -> do yield text
742 go carry2 dec p' 742 go carry2 dec p'
743 PE.Other text bs -> do yield text 743 PI.Other text bs -> do yield text
744 return (do yield bs -- an invalid blob remains 744 return (do yield bs -- an invalid blob remains
745 p') 745 p')
746{-# INLINABLE decodeUtf8 #-} 746{-# INLINABLE decodeUtf8 #-}
@@ -1093,19 +1093,19 @@ unwords = intercalate (yield $ T.singleton ' ')
1093codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r)) 1093codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1094codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc))) 1094codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1095 (k (decoder (dec B.empty) p0) ) where 1095 (k (decoder (dec B.empty) p0) ) where
1096 decoder :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) 1096 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1097 decoder !d p0 = case d of 1097 decoder !d p0 = case d of
1098 PE.Other txt bad -> do yield txt 1098 PI.Other txt bad -> do yield txt
1099 return (do yield bad 1099 return (do yield bad
1100 p0) 1100 p0)
1101 PE.Some txt extra dec -> do yield txt 1101 PI.Some txt extra dec -> do yield txt
1102 x <- lift (next p0) 1102 x <- lift (next p0)
1103 case x of Left r -> return (do yield extra 1103 case x of Left r -> return (do yield extra
1104 return r) 1104 return r)
1105 Right (chunk,p1) -> decoder (dec chunk) p1 1105 Right (chunk,p1) -> decoder (dec chunk) p1
1106 1106
1107-- decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8))) 1107-- decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
1108-- (k (go B.empty PE.streamDecodeUtf8 p0)) where 1108-- (k (go B.empty PI.streamDecodeUtf8 p0)) where
1109 1109
1110encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r) 1110encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1111encodeAscii = go where 1111encodeAscii = go where
diff --git a/Pipes/Text/Internal.hs b/Pipes/Text/Internal.hs
index bcee278..2530b23 100644
--- a/Pipes/Text/Internal.hs
+++ b/Pipes/Text/Internal.hs
@@ -1,147 +1,15 @@
1{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface #-} 1module Pipes.Text.Internal
2{-# LANGUAGE GeneralizedNewtypeDeriving, MagicHash, UnliftedFFITypes #-}
3{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
4
5-- This module lifts assorted materials from Brian O'Sullivan's text package
6-- especially Data.Text.Encoding in order to define a pipes-appropriate
7-- streamDecodeUtf8
8module Pipes.Text.Internal
9 ( Decoding(..) 2 ( Decoding(..)
10 , streamDecodeUtf8 3 , streamDecodeUtf8
11 , decodeSomeUtf8 4 , decodeSomeUtf8
5 , Codec(..)
6 , TextException(..)
7 , utf8
8 , utf16_le
9 , utf16_be
10 , utf32_le
11 , utf32_be
12 ) where 12 ) where
13import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
14import Control.Monad.ST (ST, runST)
15import Data.Bits ((.&.))
16import Data.ByteString as B
17import Data.ByteString (ByteString)
18import Data.ByteString.Internal as B
19import Data.ByteString.Char8 as B8
20import Data.Text (Text)
21import qualified Data.Text as T
22import qualified Data.Text.Encoding as TE
23import Data.Text.Encoding.Error ()
24import Data.Text.Internal (Text, textP)
25import Foreign.C.Types (CSize)
26import Foreign.ForeignPtr (withForeignPtr)
27import Foreign.Marshal.Utils (with)
28import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
29import Foreign.Storable (Storable, peek, poke)
30import GHC.Base (Char(..), Int(..), MutableByteArray#, ord#, iShiftRA#)
31import GHC.Word (Word8, Word32)
32import qualified Data.Text.Array as A
33import Data.Word (Word8, Word16)
34import System.IO.Unsafe (unsafePerformIO)
35import qualified Control.Exception as Exc
36import Data.Bits ((.&.), (.|.), shiftL)
37import Data.Typeable
38import Control.Arrow (first)
39import Data.Maybe (catMaybes)
40#include "pipes_text_cbits.h"
41
42
43
44-- | A stream oriented decoding result.
45data Decoding = Some Text ByteString (ByteString -> Decoding)
46 | Other Text ByteString
47instance Show Decoding where
48 showsPrec d (Some t bs _) = showParen (d > prec) $
49 showString "Some " . showsPrec prec' t .
50 showChar ' ' . showsPrec prec' bs .
51 showString " _"
52 where prec = 10; prec' = prec + 1
53 showsPrec d (Other t bs) = showParen (d > prec) $
54 showString "Other " . showsPrec prec' t .
55 showChar ' ' . showsPrec prec' bs .
56 showString " _"
57 where prec = 10; prec' = prec + 1
58
59newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
60newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
61
62streamDecodeUtf8 :: ByteString -> Decoding
63streamDecodeUtf8 = decodeChunkUtf8 B.empty 0 0
64 where
65 decodeChunkUtf8 :: ByteString -> CodePoint -> DecoderState -> ByteString -> Decoding
66 decodeChunkUtf8 old codepoint0 state0 bs@(PS fp off len) =
67 runST $ do marray <- A.new (len+1)
68 unsafeIOToST (decodeChunkToBuffer marray)
69 where
70 decodeChunkToBuffer :: A.MArray s -> IO Decoding
71 decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
72 with (0::CSize) $ \destOffPtr ->
73 with codepoint0 $ \codepointPtr ->
74 with state0 $ \statePtr ->
75 with nullPtr $ \curPtrPtr ->
76 do let end = ptr `plusPtr` (off + len)
77 curPtr = ptr `plusPtr` off
78 poke curPtrPtr curPtr
79 c_decode_utf8_with_state (A.maBA dest) destOffPtr curPtrPtr end codepointPtr statePtr
80 state <- peek statePtr
81 lastPtr <- peek curPtrPtr
82 codepoint <- peek codepointPtr
83 n <- peek destOffPtr
84 chunkText <- mkText dest n
85 let left = lastPtr `minusPtr` curPtr
86 remaining = B.drop left bs
87 accum = if T.null chunkText then B.append old remaining else remaining
88 return $! case state of
89 UTF8_REJECT -> Other chunkText accum -- We encountered an encoding error
90 _ -> Some chunkText accum (decodeChunkUtf8 accum codepoint state)
91 {-# INLINE decodeChunkToBuffer #-}
92 {-# INLINE decodeChunkUtf8 #-}
93{-# INLINE streamDecodeUtf8 #-}
94
95decodeSomeUtf8 :: ByteString -> (Text, ByteString)
96decodeSomeUtf8 bs@(PS fp off len) = runST $ do
97 dest <- A.new (len+1)
98 unsafeIOToST $
99 withForeignPtr fp $ \ptr ->
100 with (0::CSize) $ \destOffPtr ->
101 with (0::CodePoint) $ \codepointPtr ->
102 with (0::DecoderState) $ \statePtr ->
103 with nullPtr $ \curPtrPtr ->
104 do let end = ptr `plusPtr` (off + len)
105 curPtr = ptr `plusPtr` off
106 poke curPtrPtr curPtr
107 c_decode_utf8_with_state (A.maBA dest) destOffPtr
108 curPtrPtr end codepointPtr statePtr
109 state <- peek statePtr
110 lastPtr <- peek curPtrPtr
111 codepoint <- peek codepointPtr
112 n <- peek destOffPtr
113 chunkText <- unsafeSTToIO $ do arr <- A.unsafeFreeze dest
114 return $! textP arr 0 (fromIntegral n)
115 let left = lastPtr `minusPtr` curPtr
116 remaining = B.drop left bs
117 return $! (chunkText, remaining)
118{-# INLINE decodeSomeUtf8 #-}
119
120mkText :: A.MArray s -> CSize -> IO Text
121mkText dest n = unsafeSTToIO $ do arr <- A.unsafeFreeze dest
122 return $! textP arr 0 (fromIntegral n)
123{-# INLINE mkText #-}
124
125ord :: Char -> Int
126ord (C# c#) = I# (ord# c#)
127{-# INLINE ord #-}
128
129unsafeWrite :: A.MArray s -> Int -> Char -> ST s Int
130unsafeWrite marr i c
131 | n < 0x10000 = do A.unsafeWrite marr i (fromIntegral n)
132 return 1
133 | otherwise = do A.unsafeWrite marr i lo
134 A.unsafeWrite marr (i+1) hi
135 return 2
136 where n = ord c
137 m = n - 0x10000
138 lo = fromIntegral $ (m `shiftR` 10) + 0xD800
139 hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
140 shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
141 {-# INLINE shiftR #-}
142{-# INLINE unsafeWrite #-}
143 13
144foreign import ccall unsafe "_hs_pipes_text_decode_utf8_state" c_decode_utf8_with_state 14import Pipes.Text.Internal.Decoding
145 :: MutableByteArray# s -> Ptr CSize 15import Pipes.Text.Internal.Codec \ No newline at end of file
146 -> Ptr (Ptr Word8) -> Ptr Word8
147 -> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8) \ No newline at end of file
diff --git a/pipes-text.cabal b/pipes-text.cabal
index 1333f92..9af2a09 100644
--- a/pipes-text.cabal
+++ b/pipes-text.cabal
@@ -1,5 +1,5 @@
1name: pipes-text 1name: pipes-text
2version: 0.0.1.0 2version: 0.0.0.0
3synopsis: Text pipes. 3synopsis: Text pipes.
4description: Many of the pipes and other operations defined here mirror those in 4description: Many of the pipes and other operations defined here mirror those in
5 the `pipes-bytestring` library. Folds like `length` and grouping 5 the `pipes-bytestring` library. Folds like `length` and grouping
@@ -34,8 +34,8 @@ extra-source-files: README.md
34library 34library
35 c-sources: cbits/cbits.c 35 c-sources: cbits/cbits.c
36 include-dirs: include 36 include-dirs: include
37 exposed-modules: Pipes.Text, Pipes.Text.Internal.Decoding, Pipes.Text.Internal.Codec 37 exposed-modules: Pipes.Text, Pipes.Text.Internal
38 -- other-modules: 38 other-modules: Pipes.Text.Internal.Decoding, Pipes.Text.Internal.Codec
39 other-extensions: RankNTypes 39 other-extensions: RankNTypes
40 build-depends: base >= 4 && < 5 , 40 build-depends: base >= 4 && < 5 ,
41 bytestring >=0.10 && < 0.11, 41 bytestring >=0.10 && < 0.11,