finished draft of codec lens, ascii and iso8859 are not codecs, so they are given special functions

author: michaelt <what_is_it_to_do_anything@yahoo.com> 2014-02-04 18:29:15 -0500
committer: michaelt <what_is_it_to_do_anything@yahoo.com> 2014-02-04 18:29:15 -0500
commit: 9018941435a48aa5437981dfdb1377aa14b13159 (patch)
tree: de9f1a119a8a0ced8cc0ede97012f2ccc5bb3d30 /Pipes/Text
parent: 4cbc92cc93073d1a5b99a03ad802f710d0205994 (diff)
download: text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.tar.gz
text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.tar.zst
text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.zip
1 files changed, 13 insertions, 38 deletions
diff --git a/Pipes/Text/Codec.hs b/Pipes/Text/Codec.hs
index e4357b9..070b0d9 100644
--- a/Pipes/Text/Codec.hs
+++ b/Pipes/Text/Codec.hs
@@ -1,11 +1,9 @@
-{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
+{-# LANGUAGE DeriveDataTypeable, RankNTypes, BangPatterns #-}
 -- |
 -- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin
 -- License: MIT
 --
-- Handle streams of text.
--
 -- Parts of this code were taken from enumerator and conduits, and adapted for pipes.
 module Pipes.Text.Codec
@@ -15,6 +13,10 @@ module Pipes.Text.Codec
    , Codec(..)
    , TextException(..)
    , utf8
+    , utf16_le
+    , utf16_be
+    , utf32_le
+    , utf32_be
    ) where
 import Data.Bits ((.&.))
@@ -37,7 +39,7 @@ import Data.Typeable
 import Control.Arrow (first)
 import Data.Maybe (catMaybes)
 import Pipes.Text.Internal
+import Pipes
 -- | A specific character encoding.
 --
 -- Since 0.3.0
@@ -62,10 +64,12 @@ instance Exc.Exception TextException
 toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
           -> (ByteString -> Decoding)
 toDecoding op = loop B.empty where
-  loop extra bs0 = case op (B.append extra bs0) of
+  loop !extra bs0 = case op (B.append extra bs0) of
-                    (txt, Right bs) -> Some txt bs (loop bs)
+                      (txt, Right bs) -> Some txt bs (loop bs)
-                    (txt, Left (_,bs)) -> Other txt bs
+                      (txt, Left (_,bs)) -> Other txt bs
+-- To do: toDecoding should be inlined in each of the 'Codec' definitions
+-- or else Codec changed to the conduit/enumerator definition.  We have
+-- altered it to use 'streamDecodeUtf8'
 splitSlowly :: (ByteString -> Text)
            -> ByteString 
@@ -87,8 +91,7 @@ utf8 :: Codec
 utf8 = Codec name enc (toDecoding dec) where
    name = T.pack "UTF-8"
    enc text = (TE.encodeUtf8 text, Nothing)
-    dec bytes = case decodeSomeUtf8 bytes of 
+    dec bytes = case decodeSomeUtf8 bytes of (t,b) -> (t, Right b)
-      (t,b) -> (t, Right b)
 --     -- Whether the given byte is a continuation byte.
 --     isContinuation byte = byte .&. 0xC0 == 0x80
@@ -202,34 +205,6 @@ utf32SplitBytes dec bytes = split where
        then (bytes, B.empty)
        else B.splitAt lenToDecode bytes
-ascii :: Codec
-ascii = Codec name enc (toDecoding dec) where
-    name = T.pack "ASCII"
-    enc text = (bytes, extra) where
-        (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
-        bytes = B8.pack (T.unpack safe)
-        extra = if T.null unsafe
-            then Nothing
-            else Just (EncodeException ascii (T.head unsafe), unsafe)
-    dec bytes = (text, extra) where
-        (safe, unsafe) = B.span (<= 0x7F) bytes
-        text = T.pack (B8.unpack safe)
-        extra = if B.null unsafe
-            then Right B.empty
-            else Left (DecodeException ascii (B.head unsafe), unsafe)
-iso8859_1 :: Codec
-iso8859_1 = Codec name enc (toDecoding dec) where
-    name = T.pack "ISO-8859-1"
-    enc text = (bytes, extra) where
-        (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
-        bytes = B8.pack (T.unpack safe)
-        extra = if T.null unsafe
-            then Nothing
-            else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
-    dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
 tryEvaluate :: a -> Either Exc.SomeException a
 tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate
author	michaelt <what_is_it_to_do_anything@yahoo.com>	2014-02-04 18:29:15 -0500
committer	michaelt <what_is_it_to_do_anything@yahoo.com>	2014-02-04 18:29:15 -0500
commit	9018941435a48aa5437981dfdb1377aa14b13159 (patch)
tree	de9f1a119a8a0ced8cc0ede97012f2ccc5bb3d30 /Pipes/Text
parent	4cbc92cc93073d1a5b99a03ad802f710d0205994 (diff)
download	text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.tar.gz text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.tar.zst text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.zip

diff --git a/Pipes/Text/Codec.hs b/Pipes/Text/Codec.hs index e4357b9..070b0d9 100644 --- a/Pipes/Text/Codec.hs +++ b/Pipes/Text/Codec.hs
@@ -1,11 +1,9 @@
1		1
2	{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}	2	{-# LANGUAGE DeriveDataTypeable, RankNTypes, BangPatterns #-}
3	-- \|	3	-- \|
4	-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin	4	-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin
5	-- License: MIT	5	-- License: MIT
6	--	6	--
7	-- Handle streams of text.
8	--
9	-- Parts of this code were taken from enumerator and conduits, and adapted for pipes.	7	-- Parts of this code were taken from enumerator and conduits, and adapted for pipes.
10		8
11	module Pipes.Text.Codec	9	module Pipes.Text.Codec
@@ -15,6 +13,10 @@ module Pipes.Text.Codec
15	, Codec(..)	13	, Codec(..)
16	, TextException(..)	14	, TextException(..)
17	, utf8	15	, utf8
		16	, utf16_le
		17	, utf16_be
		18	, utf32_le
		19	, utf32_be
18	) where	20	) where
19		21
20	import Data.Bits ((.&.))	22	import Data.Bits ((.&.))
@@ -37,7 +39,7 @@ import Data.Typeable
37	import Control.Arrow (first)	39	import Control.Arrow (first)
38	import Data.Maybe (catMaybes)	40	import Data.Maybe (catMaybes)
39	import Pipes.Text.Internal	41	import Pipes.Text.Internal
40		42	import Pipes
41	-- \| A specific character encoding.	43	-- \| A specific character encoding.
42	--	44	--
43	-- Since 0.3.0	45	-- Since 0.3.0
@@ -62,10 +64,12 @@ instance Exc.Exception TextException
62	toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))	64	toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
63	-> (ByteString -> Decoding)	65	-> (ByteString -> Decoding)
64	toDecoding op = loop B.empty where	66	toDecoding op = loop B.empty where
65	loop extra bs0 = case op (B.append extra bs0) of	67	loop !extra bs0 = case op (B.append extra bs0) of
66	(txt, Right bs) -> Some txt bs (loop bs)	68	(txt, Right bs) -> Some txt bs (loop bs)
67	(txt, Left (_,bs)) -> Other txt bs	69	(txt, Left (_,bs)) -> Other txt bs
68		70	-- To do: toDecoding should be inlined in each of the 'Codec' definitions
		71	-- or else Codec changed to the conduit/enumerator definition. We have
		72	-- altered it to use 'streamDecodeUtf8'
69		73
70	splitSlowly :: (ByteString -> Text)	74	splitSlowly :: (ByteString -> Text)
71	-> ByteString	75	-> ByteString
@@ -87,8 +91,7 @@ utf8 :: Codec
87	utf8 = Codec name enc (toDecoding dec) where	91	utf8 = Codec name enc (toDecoding dec) where
88	name = T.pack "UTF-8"	92	name = T.pack "UTF-8"
89	enc text = (TE.encodeUtf8 text, Nothing)	93	enc text = (TE.encodeUtf8 text, Nothing)
90	dec bytes = case decodeSomeUtf8 bytes of	94	dec bytes = case decodeSomeUtf8 bytes of (t,b) -> (t, Right b)
91	(t,b) -> (t, Right b)
92		95
93	-- -- Whether the given byte is a continuation byte.	96	-- -- Whether the given byte is a continuation byte.
94	-- isContinuation byte = byte .&. 0xC0 == 0x80	97	-- isContinuation byte = byte .&. 0xC0 == 0x80
@@ -202,34 +205,6 @@ utf32SplitBytes dec bytes = split where
202	then (bytes, B.empty)	205	then (bytes, B.empty)
203	else B.splitAt lenToDecode bytes	206	else B.splitAt lenToDecode bytes
204		207
205	ascii :: Codec
206	ascii = Codec name enc (toDecoding dec) where
207	name = T.pack "ASCII"
208	enc text = (bytes, extra) where
209	(safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
210	bytes = B8.pack (T.unpack safe)
211	extra = if T.null unsafe
212	then Nothing
213	else Just (EncodeException ascii (T.head unsafe), unsafe)
214
215	dec bytes = (text, extra) where
216	(safe, unsafe) = B.span (<= 0x7F) bytes
217	text = T.pack (B8.unpack safe)
218	extra = if B.null unsafe
219	then Right B.empty
220	else Left (DecodeException ascii (B.head unsafe), unsafe)
221
222	iso8859_1 :: Codec
223	iso8859_1 = Codec name enc (toDecoding dec) where
224	name = T.pack "ISO-8859-1"
225	enc text = (bytes, extra) where
226	(safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
227	bytes = B8.pack (T.unpack safe)
228	extra = if T.null unsafe
229	then Nothing
230	else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
231
232	dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
233		208
234	tryEvaluate :: a -> Either Exc.SomeException a	209	tryEvaluate :: a -> Either Exc.SomeException a
235	tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate	210	tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate