left codec module

author: michaelt <what_is_it_to_do_anything@yahoo.com> 2014-02-12 19:08:07 -0500
committer: michaelt <what_is_it_to_do_anything@yahoo.com> 2014-02-12 19:08:07 -0500
commit: fd27d0c43e2995437cf1f3e2d6f292597371ebb0 (patch)
tree: 2d10fe6c24967e2200a2c5ca9d122c63f28610d7 /Pipes
parent: 2a3b4d7143728858d4e3dc401bdc369346da9027 (diff)
download: text-pipes-fd27d0c43e2995437cf1f3e2d6f292597371ebb0.tar.gz
text-pipes-fd27d0c43e2995437cf1f3e2d6f292597371ebb0.tar.zst
text-pipes-fd27d0c43e2995437cf1f3e2d6f292597371ebb0.zip
1 files changed, 0 insertions, 215 deletions
diff --git a/Pipes/Text/Codec.hs b/Pipes/Text/Codec.hs
deleted file mode 100644
index 070b0d9..0000000
--- a/Pipes/Text/Codec.hs
+++ /dev/null
@@ -1,215 +0,0 @@
-{-# LANGUAGE DeriveDataTypeable, RankNTypes, BangPatterns #-}
-- |
-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin
-- License: MIT
--
-- Parts of this code were taken from enumerator and conduits, and adapted for pipes.
-module Pipes.Text.Codec
-    ( Decoding(..)
-    , streamDecodeUtf8
-    , decodeSomeUtf8
-    , Codec(..)
-    , TextException(..)
-    , utf8
-    , utf16_le
-    , utf16_be
-    , utf32_le
-    , utf32_be
-    ) where
-import Data.Bits ((.&.))
-import Data.Char (ord)
-import Data.ByteString as B 
-import Data.ByteString (ByteString)
-import Data.ByteString.Internal as B 
-import Data.ByteString.Char8 as B8
-import Data.Text (Text)
-import qualified Data.Text as T 
-import qualified Data.Text.Encoding as TE 
-import Data.Text.Encoding.Error ()
-import GHC.Word (Word8, Word32)
-import qualified Data.Text.Array as A
-import Data.Word (Word8, Word16)
-import System.IO.Unsafe (unsafePerformIO)
-import qualified Control.Exception as Exc
-import Data.Bits ((.&.), (.|.), shiftL)
-import Data.Typeable
-import Control.Arrow (first)
-import Data.Maybe (catMaybes)
-import Pipes.Text.Internal
-import Pipes
-- | A specific character encoding.
--
-- Since 0.3.0
-data Codec = Codec
-  { codecName :: Text
-  , codecEncode :: Text -> (ByteString, Maybe (TextException, Text))
-  , codecDecode :: ByteString -> Decoding -- (Text, Either (TextException, ByteString) ByteString)
-  }
-instance Show Codec where
-    showsPrec d c = showParen (d > 10) $ 
-                    showString "Codec " . shows (codecName c)
-data TextException = DecodeException Codec Word8
-                   | EncodeException Codec Char
-                   | LengthExceeded Int
-                   | TextException Exc.SomeException
-    deriving (Show, Typeable)
-instance Exc.Exception TextException
-toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
-           -> (ByteString -> Decoding)
-toDecoding op = loop B.empty where
-  loop !extra bs0 = case op (B.append extra bs0) of
-                      (txt, Right bs) -> Some txt bs (loop bs)
-                      (txt, Left (_,bs)) -> Other txt bs
-- To do: toDecoding should be inlined in each of the 'Codec' definitions
-- or else Codec changed to the conduit/enumerator definition.  We have
-- altered it to use 'streamDecodeUtf8'
-splitSlowly :: (ByteString -> Text)
-            -> ByteString 
-            -> (Text, Either (TextException, ByteString) ByteString)
-splitSlowly dec bytes = valid where
-    valid:_ = catMaybes $ Prelude.map decFirst $ splits (B.length bytes)
-    splits 0 = [(B.empty, bytes)]
-    splits n = B.splitAt n bytes : splits (n - 1)
-    decFirst (a, b) = case tryEvaluate (dec a) of
-        Left _ -> Nothing
-        Right text -> let trouble = case tryEvaluate (dec b) of
-                            Left exc -> Left (TextException exc, b)
-                            Right _  -> Right B.empty 
-                      in Just (text, trouble) -- this case shouldn't occur, 
-                                      -- since splitSlowly is only called
-                                      -- when parsing failed somewhere
-utf8 :: Codec
-utf8 = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-8"
-    enc text = (TE.encodeUtf8 text, Nothing)
-    dec bytes = case decodeSomeUtf8 bytes of (t,b) -> (t, Right b)
--     -- Whether the given byte is a continuation byte.
--     isContinuation byte = byte .&. 0xC0 == 0x80
-- 
--     -- The number of continuation bytes needed by the given
--     -- non-continuation byte. Returns -1 for an illegal UTF-8
--     -- non-continuation byte and the whole split quickly must fail so
--     -- as the input is passed to TE.decodeUtf8, which will issue a
--     -- suitable error.
--     required x0
--         | x0 .&. 0x80 == 0x00 = 0
--         | x0 .&. 0xE0 == 0xC0 = 1
--         | x0 .&. 0xF0 == 0xE0 = 2
--         | x0 .&. 0xF8 == 0xF0 = 3
--         | otherwise           = -1
-- 
--     splitQuickly bytes
--         | B.null l || req == -1 = Nothing
--         | req == B.length r = Just (TE.decodeUtf8 bytes, B.empty)
--         | otherwise = Just (TE.decodeUtf8 l', r')
--       where
--         (l, r) = B.spanEnd isContinuation bytes
--         req = required (B.last l)
--         l' = B.init l
--         r' = B.cons (B.last l) r
-utf16_le :: Codec
-utf16_le = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-16-LE"
-    enc text = (TE.encodeUtf16LE text, Nothing)
-    dec bytes = case splitQuickly bytes of
-        Just (text, extra) -> (text, Right extra)
-        Nothing -> splitSlowly TE.decodeUtf16LE bytes
-    splitQuickly bytes = maybeDecode (loop 0) where
-        maxN = B.length bytes
-        loop n |  n      == maxN = decodeAll
-               | (n + 1) == maxN = decodeTo n
-        loop n = let
-            req = utf16Required
-                (B.index bytes n)
-                (B.index bytes (n + 1))
-            decodeMore = loop $! n + req
-            in if n + req > maxN
-                then decodeTo n
-                else decodeMore
-        decodeTo n = first TE.decodeUtf16LE (B.splitAt n bytes)
-        decodeAll = (TE.decodeUtf16LE bytes, B.empty)
-utf16_be :: Codec
-utf16_be = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-16-BE"
-    enc text = (TE.encodeUtf16BE text, Nothing)
-    dec bytes = case splitQuickly bytes of
-        Just (text, extra) -> (text, Right extra)
-        Nothing -> splitSlowly TE.decodeUtf16BE bytes
-    splitQuickly bytes = maybeDecode (loop 0) where
-        maxN = B.length bytes
-        loop n |  n      == maxN = decodeAll
-               | (n + 1) == maxN = decodeTo n
-        loop n = let
-            req = utf16Required
-                (B.index bytes (n + 1))
-                (B.index bytes n)
-            decodeMore = loop $! n + req
-            in if n + req > maxN
-                then decodeTo n
-                else decodeMore
-        decodeTo n = first TE.decodeUtf16BE (B.splitAt n bytes)
-        decodeAll = (TE.decodeUtf16BE bytes, B.empty)
-utf16Required :: Word8 -> Word8 -> Int
-utf16Required x0 x1 = if x >= 0xD800 && x <= 0xDBFF then 4 else 2 where
-    x :: Word16
-    x = (fromIntegral x1 `shiftL` 8) .|. fromIntegral x0
-utf32_le :: Codec
-utf32_le = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-32-LE"
-    enc text = (TE.encodeUtf32LE text, Nothing)
-    dec bs = case utf32SplitBytes TE.decodeUtf32LE bs of
-        Just (text, extra) -> (text, Right extra)
-        Nothing -> splitSlowly TE.decodeUtf32LE bs
-utf32_be :: Codec
-utf32_be = Codec name enc (toDecoding dec) where
-    name = T.pack "UTF-32-BE"
-    enc text = (TE.encodeUtf32BE text, Nothing)
-    dec bs = case utf32SplitBytes TE.decodeUtf32BE bs of
-        Just (text, extra) -> (text, Right extra)
-        Nothing -> splitSlowly TE.decodeUtf32BE bs
-utf32SplitBytes :: (ByteString -> Text)
-                -> ByteString
-                -> Maybe (Text, ByteString)
-utf32SplitBytes dec bytes = split where
-    split = maybeDecode (dec toDecode, extra)
-    len = B.length bytes
-    lenExtra = mod len 4
-    lenToDecode = len - lenExtra
-    (toDecode, extra) = if lenExtra == 0
-        then (bytes, B.empty)
-        else B.splitAt lenToDecode bytes
-tryEvaluate :: a -> Either Exc.SomeException a
-tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate
-maybeDecode :: (a, b) -> Maybe (a, b)
-maybeDecode (a, b) = case tryEvaluate a of
-    Left _ -> Nothing
-    Right _ -> Just (a, b)
author	michaelt <what_is_it_to_do_anything@yahoo.com>	2014-02-12 19:08:07 -0500
committer	michaelt <what_is_it_to_do_anything@yahoo.com>	2014-02-12 19:08:07 -0500
commit	fd27d0c43e2995437cf1f3e2d6f292597371ebb0 (patch)
tree	2d10fe6c24967e2200a2c5ca9d122c63f28610d7 /Pipes
parent	2a3b4d7143728858d4e3dc401bdc369346da9027 (diff)
download	text-pipes-fd27d0c43e2995437cf1f3e2d6f292597371ebb0.tar.gz text-pipes-fd27d0c43e2995437cf1f3e2d6f292597371ebb0.tar.zst text-pipes-fd27d0c43e2995437cf1f3e2d6f292597371ebb0.zip

diff --git a/Pipes/Text/Codec.hs b/Pipes/Text/Codec.hs deleted file mode 100644 index 070b0d9..0000000 --- a/Pipes/Text/Codec.hs +++ /dev/null
@@ -1,215 +0,0 @@
1
2	{-# LANGUAGE DeriveDataTypeable, RankNTypes, BangPatterns #-}
3	-- \|
4	-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin
5	-- License: MIT
6	--
7	-- Parts of this code were taken from enumerator and conduits, and adapted for pipes.
8
9	module Pipes.Text.Codec
10	( Decoding(..)
11	, streamDecodeUtf8
12	, decodeSomeUtf8
13	, Codec(..)
14	, TextException(..)
15	, utf8
16	, utf16_le
17	, utf16_be
18	, utf32_le
19	, utf32_be
20	) where
21
22	import Data.Bits ((.&.))
23	import Data.Char (ord)
24	import Data.ByteString as B
25	import Data.ByteString (ByteString)
26	import Data.ByteString.Internal as B
27	import Data.ByteString.Char8 as B8
28	import Data.Text (Text)
29	import qualified Data.Text as T
30	import qualified Data.Text.Encoding as TE
31	import Data.Text.Encoding.Error ()
32	import GHC.Word (Word8, Word32)
33	import qualified Data.Text.Array as A
34	import Data.Word (Word8, Word16)
35	import System.IO.Unsafe (unsafePerformIO)
36	import qualified Control.Exception as Exc
37	import Data.Bits ((.&.), (.\|.), shiftL)
38	import Data.Typeable
39	import Control.Arrow (first)
40	import Data.Maybe (catMaybes)
41	import Pipes.Text.Internal
42	import Pipes
43	-- \| A specific character encoding.
44	--
45	-- Since 0.3.0
46	data Codec = Codec
47	{ codecName :: Text
48	, codecEncode :: Text -> (ByteString, Maybe (TextException, Text))
49	, codecDecode :: ByteString -> Decoding -- (Text, Either (TextException, ByteString) ByteString)
50	}
51
52	instance Show Codec where
53	showsPrec d c = showParen (d > 10) $
54	showString "Codec " . shows (codecName c)
55
56	data TextException = DecodeException Codec Word8
57	\| EncodeException Codec Char
58	\| LengthExceeded Int
59	\| TextException Exc.SomeException
60	deriving (Show, Typeable)
61	instance Exc.Exception TextException
62
63
64	toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
65	-> (ByteString -> Decoding)
66	toDecoding op = loop B.empty where
67	loop !extra bs0 = case op (B.append extra bs0) of
68	(txt, Right bs) -> Some txt bs (loop bs)
69	(txt, Left (_,bs)) -> Other txt bs
70	-- To do: toDecoding should be inlined in each of the 'Codec' definitions
71	-- or else Codec changed to the conduit/enumerator definition. We have
72	-- altered it to use 'streamDecodeUtf8'
73
74	splitSlowly :: (ByteString -> Text)
75	-> ByteString
76	-> (Text, Either (TextException, ByteString) ByteString)
77	splitSlowly dec bytes = valid where
78	valid:_ = catMaybes $ Prelude.map decFirst $ splits (B.length bytes)
79	splits 0 = [(B.empty, bytes)]
80	splits n = B.splitAt n bytes : splits (n - 1)
81	decFirst (a, b) = case tryEvaluate (dec a) of
82	Left _ -> Nothing
83	Right text -> let trouble = case tryEvaluate (dec b) of
84	Left exc -> Left (TextException exc, b)
85	Right _ -> Right B.empty
86	in Just (text, trouble) -- this case shouldn't occur,
87	-- since splitSlowly is only called
88	-- when parsing failed somewhere
89
90	utf8 :: Codec
91	utf8 = Codec name enc (toDecoding dec) where
92	name = T.pack "UTF-8"
93	enc text = (TE.encodeUtf8 text, Nothing)
94	dec bytes = case decodeSomeUtf8 bytes of (t,b) -> (t, Right b)
95
96	-- -- Whether the given byte is a continuation byte.
97	-- isContinuation byte = byte .&. 0xC0 == 0x80
98	--
99	-- -- The number of continuation bytes needed by the given
100	-- -- non-continuation byte. Returns -1 for an illegal UTF-8
101	-- -- non-continuation byte and the whole split quickly must fail so
102	-- -- as the input is passed to TE.decodeUtf8, which will issue a
103	-- -- suitable error.
104	-- required x0
105	-- \| x0 .&. 0x80 == 0x00 = 0
106	-- \| x0 .&. 0xE0 == 0xC0 = 1
107	-- \| x0 .&. 0xF0 == 0xE0 = 2
108	-- \| x0 .&. 0xF8 == 0xF0 = 3
109	-- \| otherwise = -1
110	--
111	-- splitQuickly bytes
112	-- \| B.null l \|\| req == -1 = Nothing
113	-- \| req == B.length r = Just (TE.decodeUtf8 bytes, B.empty)
114	-- \| otherwise = Just (TE.decodeUtf8 l', r')
115	-- where
116	-- (l, r) = B.spanEnd isContinuation bytes
117	-- req = required (B.last l)
118	-- l' = B.init l
119	-- r' = B.cons (B.last l) r
120
121
122	utf16_le :: Codec
123	utf16_le = Codec name enc (toDecoding dec) where
124	name = T.pack "UTF-16-LE"
125	enc text = (TE.encodeUtf16LE text, Nothing)
126	dec bytes = case splitQuickly bytes of
127	Just (text, extra) -> (text, Right extra)
128	Nothing -> splitSlowly TE.decodeUtf16LE bytes
129
130	splitQuickly bytes = maybeDecode (loop 0) where
131	maxN = B.length bytes
132
133	loop n \| n == maxN = decodeAll
134	\| (n + 1) == maxN = decodeTo n
135	loop n = let
136	req = utf16Required
137	(B.index bytes n)
138	(B.index bytes (n + 1))
139	decodeMore = loop $! n + req
140	in if n + req > maxN
141	then decodeTo n
142	else decodeMore
143
144	decodeTo n = first TE.decodeUtf16LE (B.splitAt n bytes)
145	decodeAll = (TE.decodeUtf16LE bytes, B.empty)
146
147	utf16_be :: Codec
148	utf16_be = Codec name enc (toDecoding dec) where
149	name = T.pack "UTF-16-BE"
150	enc text = (TE.encodeUtf16BE text, Nothing)
151	dec bytes = case splitQuickly bytes of
152	Just (text, extra) -> (text, Right extra)
153	Nothing -> splitSlowly TE.decodeUtf16BE bytes
154
155	splitQuickly bytes = maybeDecode (loop 0) where
156	maxN = B.length bytes
157
158	loop n \| n == maxN = decodeAll
159	\| (n + 1) == maxN = decodeTo n
160	loop n = let
161	req = utf16Required
162	(B.index bytes (n + 1))
163	(B.index bytes n)
164	decodeMore = loop $! n + req
165	in if n + req > maxN
166	then decodeTo n
167	else decodeMore
168
169	decodeTo n = first TE.decodeUtf16BE (B.splitAt n bytes)
170	decodeAll = (TE.decodeUtf16BE bytes, B.empty)
171
172	utf16Required :: Word8 -> Word8 -> Int
173	utf16Required x0 x1 = if x >= 0xD800 && x <= 0xDBFF then 4 else 2 where
174	x :: Word16
175	x = (fromIntegral x1 `shiftL` 8) .\|. fromIntegral x0
176
177
178	utf32_le :: Codec
179	utf32_le = Codec name enc (toDecoding dec) where
180	name = T.pack "UTF-32-LE"
181	enc text = (TE.encodeUtf32LE text, Nothing)
182	dec bs = case utf32SplitBytes TE.decodeUtf32LE bs of
183	Just (text, extra) -> (text, Right extra)
184	Nothing -> splitSlowly TE.decodeUtf32LE bs
185
186
187	utf32_be :: Codec
188	utf32_be = Codec name enc (toDecoding dec) where
189	name = T.pack "UTF-32-BE"
190	enc text = (TE.encodeUtf32BE text, Nothing)
191	dec bs = case utf32SplitBytes TE.decodeUtf32BE bs of
192	Just (text, extra) -> (text, Right extra)
193	Nothing -> splitSlowly TE.decodeUtf32BE bs
194
195	utf32SplitBytes :: (ByteString -> Text)
196	-> ByteString
197	-> Maybe (Text, ByteString)
198	utf32SplitBytes dec bytes = split where
199	split = maybeDecode (dec toDecode, extra)
200	len = B.length bytes
201	lenExtra = mod len 4
202
203	lenToDecode = len - lenExtra
204	(toDecode, extra) = if lenExtra == 0
205	then (bytes, B.empty)
206	else B.splitAt lenToDecode bytes
207
208
209	tryEvaluate :: a -> Either Exc.SomeException a
210	tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate
211
212	maybeDecode :: (a, b) -> Maybe (a, b)
213	maybeDecode (a, b) = case tryEvaluate a of
214	Left _ -> Nothing
215	Right _ -> Just (a, b)