aboutsummaryrefslogtreecommitdiffhomepage
path: root/Pipes/Text
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2014-02-04 18:29:15 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2014-02-04 18:29:15 -0500
commit9018941435a48aa5437981dfdb1377aa14b13159 (patch)
treede9f1a119a8a0ced8cc0ede97012f2ccc5bb3d30 /Pipes/Text
parent4cbc92cc93073d1a5b99a03ad802f710d0205994 (diff)
downloadtext-pipes-9018941435a48aa5437981dfdb1377aa14b13159.tar.gz
text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.tar.zst
text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.zip
finished draft of codec lens, ascii and iso8859 are not codecs, so they are given special functions
Diffstat (limited to 'Pipes/Text')
-rw-r--r--Pipes/Text/Codec.hs51
1 files changed, 13 insertions, 38 deletions
diff --git a/Pipes/Text/Codec.hs b/Pipes/Text/Codec.hs
index e4357b9..070b0d9 100644
--- a/Pipes/Text/Codec.hs
+++ b/Pipes/Text/Codec.hs
@@ -1,11 +1,9 @@
1 1
2{-# LANGUAGE DeriveDataTypeable, RankNTypes #-} 2{-# LANGUAGE DeriveDataTypeable, RankNTypes, BangPatterns #-}
3-- | 3-- |
4-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin 4-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin
5-- License: MIT 5-- License: MIT
6-- 6--
7-- Handle streams of text.
8--
9-- Parts of this code were taken from enumerator and conduits, and adapted for pipes. 7-- Parts of this code were taken from enumerator and conduits, and adapted for pipes.
10 8
11module Pipes.Text.Codec 9module Pipes.Text.Codec
@@ -15,6 +13,10 @@ module Pipes.Text.Codec
15 , Codec(..) 13 , Codec(..)
16 , TextException(..) 14 , TextException(..)
17 , utf8 15 , utf8
16 , utf16_le
17 , utf16_be
18 , utf32_le
19 , utf32_be
18 ) where 20 ) where
19 21
20import Data.Bits ((.&.)) 22import Data.Bits ((.&.))
@@ -37,7 +39,7 @@ import Data.Typeable
37import Control.Arrow (first) 39import Control.Arrow (first)
38import Data.Maybe (catMaybes) 40import Data.Maybe (catMaybes)
39import Pipes.Text.Internal 41import Pipes.Text.Internal
40 42import Pipes
41-- | A specific character encoding. 43-- | A specific character encoding.
42-- 44--
43-- Since 0.3.0 45-- Since 0.3.0
@@ -62,10 +64,12 @@ instance Exc.Exception TextException
62toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString)) 64toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
63 -> (ByteString -> Decoding) 65 -> (ByteString -> Decoding)
64toDecoding op = loop B.empty where 66toDecoding op = loop B.empty where
65 loop extra bs0 = case op (B.append extra bs0) of 67 loop !extra bs0 = case op (B.append extra bs0) of
66 (txt, Right bs) -> Some txt bs (loop bs) 68 (txt, Right bs) -> Some txt bs (loop bs)
67 (txt, Left (_,bs)) -> Other txt bs 69 (txt, Left (_,bs)) -> Other txt bs
68 70-- To do: toDecoding should be inlined in each of the 'Codec' definitions
71-- or else Codec changed to the conduit/enumerator definition. We have
72-- altered it to use 'streamDecodeUtf8'
69 73
70splitSlowly :: (ByteString -> Text) 74splitSlowly :: (ByteString -> Text)
71 -> ByteString 75 -> ByteString
@@ -87,8 +91,7 @@ utf8 :: Codec
87utf8 = Codec name enc (toDecoding dec) where 91utf8 = Codec name enc (toDecoding dec) where
88 name = T.pack "UTF-8" 92 name = T.pack "UTF-8"
89 enc text = (TE.encodeUtf8 text, Nothing) 93 enc text = (TE.encodeUtf8 text, Nothing)
90 dec bytes = case decodeSomeUtf8 bytes of 94 dec bytes = case decodeSomeUtf8 bytes of (t,b) -> (t, Right b)
91 (t,b) -> (t, Right b)
92 95
93-- -- Whether the given byte is a continuation byte. 96-- -- Whether the given byte is a continuation byte.
94-- isContinuation byte = byte .&. 0xC0 == 0x80 97-- isContinuation byte = byte .&. 0xC0 == 0x80
@@ -202,34 +205,6 @@ utf32SplitBytes dec bytes = split where
202 then (bytes, B.empty) 205 then (bytes, B.empty)
203 else B.splitAt lenToDecode bytes 206 else B.splitAt lenToDecode bytes
204 207
205ascii :: Codec
206ascii = Codec name enc (toDecoding dec) where
207 name = T.pack "ASCII"
208 enc text = (bytes, extra) where
209 (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
210 bytes = B8.pack (T.unpack safe)
211 extra = if T.null unsafe
212 then Nothing
213 else Just (EncodeException ascii (T.head unsafe), unsafe)
214
215 dec bytes = (text, extra) where
216 (safe, unsafe) = B.span (<= 0x7F) bytes
217 text = T.pack (B8.unpack safe)
218 extra = if B.null unsafe
219 then Right B.empty
220 else Left (DecodeException ascii (B.head unsafe), unsafe)
221
222iso8859_1 :: Codec
223iso8859_1 = Codec name enc (toDecoding dec) where
224 name = T.pack "ISO-8859-1"
225 enc text = (bytes, extra) where
226 (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
227 bytes = B8.pack (T.unpack safe)
228 extra = if T.null unsafe
229 then Nothing
230 else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
231
232 dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
233 208
234tryEvaluate :: a -> Either Exc.SomeException a 209tryEvaluate :: a -> Either Exc.SomeException a
235tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate 210tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate