aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2014-02-04 18:29:15 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2014-02-04 18:29:15 -0500
commit9018941435a48aa5437981dfdb1377aa14b13159 (patch)
treede9f1a119a8a0ced8cc0ede97012f2ccc5bb3d30
parent4cbc92cc93073d1a5b99a03ad802f710d0205994 (diff)
downloadtext-pipes-9018941435a48aa5437981dfdb1377aa14b13159.tar.gz
text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.tar.zst
text-pipes-9018941435a48aa5437981dfdb1377aa14b13159.zip
finished draft of codec lens, ascii and iso8859 are not codecs, so they are given special functions
-rw-r--r--Pipes/Text.hs171
-rw-r--r--Pipes/Text/Codec.hs51
2 files changed, 142 insertions, 80 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index 9ed0d78..18ec8ec 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -123,8 +123,23 @@ module Pipes.Text (
123 , group 123 , group
124 , word 124 , word
125 , line 125 , line
126
127 -- * Decoding Lenses
126 , decodeUtf8 128 , decodeUtf8
127 , decode 129 , codec
130
131 -- * Codecs
132 , utf8
133 , utf16_le
134 , utf16_be
135 , utf32_le
136 , utf32_be
137
138 -- * Other Decoding/Encoding Functions
139 , decodeIso8859_1
140 , decodeAscii
141 , encodeIso8859_1
142 , encodeAscii
128 143
129 -- * FreeT Splitters 144 -- * FreeT Splitters
130 , chunksOf 145 , chunksOf
@@ -170,6 +185,7 @@ import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
170import Data.ByteString.Unsafe (unsafeTake, unsafeDrop) 185import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
171import Data.ByteString (ByteString) 186import Data.ByteString (ByteString)
172import qualified Data.ByteString as B 187import qualified Data.ByteString as B
188import qualified Data.ByteString.Char8 as B8
173import Data.Char (ord, isSpace) 189import Data.Char (ord, isSpace)
174import Data.Functor.Constant (Constant(Constant, getConstant)) 190import Data.Functor.Constant (Constant(Constant, getConstant))
175import Data.Functor.Identity (Identity) 191import Data.Functor.Identity (Identity)
@@ -181,7 +197,7 @@ import qualified GHC.IO.Exception as G
181import Pipes 197import Pipes
182import qualified Pipes.ByteString as PB 198import qualified Pipes.ByteString as PB
183import qualified Pipes.Text.Internal as PE 199import qualified Pipes.Text.Internal as PE
184import Pipes.Text.Codec (Codec(..)) 200import Pipes.Text.Codec
185import Pipes.Core (respond, Server') 201import Pipes.Core (respond, Server')
186import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) 202import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
187import qualified Pipes.Group as PG 203import qualified Pipes.Group as PG
@@ -1073,43 +1089,114 @@ unwords = intercalate (yield $ T.singleton ' ')
1073 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. 1089 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1074-} 1090-}
1075 1091
1076 1092codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1077 1093codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1078decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) 1094 (k (decoder (dec B.empty) p0) ) where
1079-- decode codec = go B.empty where 1095 decoder :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1080-- go extra p0 = 1096 decoder !d p0 = case d of
1081-- do x <- lift (next p0) 1097 PE.Other txt bad -> do yield txt
1082-- case x of Right (chunk, p) -> 1098 return (do yield bad
1083-- do let (text, stuff) = codecDecode codec (B.append extra chunk) 1099 p0)
1084-- yield text 1100 PE.Some txt extra dec -> do yield txt
1085-- case stuff of Right extra' -> go extra' p 1101 x <- lift (next p0)
1086-- Left (exc,bs) -> do yield text 1102 case x of Left r -> return (do yield extra
1087-- return (do yield bs 1103 return r)
1088-- p) 1104 Right (chunk,p1) -> decoder (dec chunk) p1
1089-- Left r -> return (do yield extra 1105
1090-- return r) 1106-- decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
1091 1107-- (k (go B.empty PE.streamDecodeUtf8 p0)) where
1092decode d p0 = case d of 1108
1093 PE.Other txt bad -> do yield txt 1109encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1094 return (do yield bad 1110encodeAscii = go where
1095 p0) 1111 go p = do echunk <- lift (next p)
1096 PE.Some txt extra dec -> do yield txt 1112 case echunk of
1097 x <- lift (next p0) 1113 Left r -> return (return r)
1098 case x of Left r -> return (do yield extra 1114 Right (chunk, p') ->
1099 return r) 1115 if T.null chunk
1100 Right (chunk,p1) -> decode (dec chunk) p1 1116 then go p'
1101 1117 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1102-- go !carry dec0 p = do 1118 in do yield (B8.pack (T.unpack safe))
1103-- x <- lift (next p) 1119 if T.null unsafe
1104-- case x of Left r -> if B.null carry 1120 then go p'
1105-- then return (return r) -- all bytestrinput was consumed 1121 else return $ do yield unsafe
1106-- else return (do yield carry -- a potentially valid fragment remains 1122 p'
1107-- return r) 1123
1108-- 1124encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1109-- Right (chunk, p') -> case dec0 chunk of 1125encodeIso8859_1 = go where
1110-- PE.Some text carry2 dec -> do yield text 1126 go p = do etxt <- lift (next p)
1111-- go carry2 dec p' 1127 case etxt of
1112-- PE.Other text bs -> do yield text 1128 Left r -> return (return r)
1113-- return (do yield bs -- an invalid blob remains 1129 Right (txt, p') ->
1114-- p') 1130 if T.null txt
1115-- {-# INLINABLE decodeUtf8 #-} \ No newline at end of file 1131 then go p'
1132 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1133 in do yield (B8.pack (T.unpack safe))
1134 if T.null unsafe
1135 then go p'
1136 else return $ do yield unsafe
1137 p'
1138
1139decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1140decodeAscii = go where
1141 go p = do echunk <- lift (next p)
1142 case echunk of
1143 Left r -> return (return r)
1144 Right (chunk, p') ->
1145 if B.null chunk
1146 then go p'
1147 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1148 in do yield (T.pack (B8.unpack safe))
1149 if B.null unsafe
1150 then go p'
1151 else return $ do yield unsafe
1152 p'
1153
1154
1155decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1156decodeIso8859_1 = go where
1157 go p = do echunk <- lift (next p)
1158 case echunk of
1159 Left r -> return (return r)
1160 Right (chunk, p') ->
1161 if B.null chunk
1162 then go p'
1163 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1164 in do yield (T.pack (B8.unpack safe))
1165 if B.null unsafe
1166 then go p'
1167 else return $ do yield unsafe
1168 p'
1169
1170
1171
1172{-
1173 ascii :: Codec
1174 ascii = Codec name enc (toDecoding dec) where
1175 name = T.pack "ASCII"
1176 enc text = (bytes, extra) where
1177 (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
1178 bytes = B8.pack (T.unpack safe)
1179 extra = if T.null unsafe
1180 then Nothing
1181 else Just (EncodeException ascii (T.head unsafe), unsafe)
1182
1183 dec bytes = (text, extra) where
1184 (safe, unsafe) = B.span (<= 0x7F) bytes
1185 text = T.pack (B8.unpack safe)
1186 extra = if B.null unsafe
1187 then Right B.empty
1188 else Left (DecodeException ascii (B.head unsafe), unsafe)
1189
1190 iso8859_1 :: Codec
1191 iso8859_1 = Codec name enc (toDecoding dec) where
1192 name = T.pack "ISO-8859-1"
1193 enc text = (bytes, extra) where
1194 (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
1195 bytes = B8.pack (T.unpack safe)
1196 extra = if T.null unsafe
1197 then Nothing
1198 else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
1199
1200 dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
1201-}
1202 \ No newline at end of file
diff --git a/Pipes/Text/Codec.hs b/Pipes/Text/Codec.hs
index e4357b9..070b0d9 100644
--- a/Pipes/Text/Codec.hs
+++ b/Pipes/Text/Codec.hs
@@ -1,11 +1,9 @@
1 1
2{-# LANGUAGE DeriveDataTypeable, RankNTypes #-} 2{-# LANGUAGE DeriveDataTypeable, RankNTypes, BangPatterns #-}
3-- | 3-- |
4-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin 4-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin
5-- License: MIT 5-- License: MIT
6-- 6--
7-- Handle streams of text.
8--
9-- Parts of this code were taken from enumerator and conduits, and adapted for pipes. 7-- Parts of this code were taken from enumerator and conduits, and adapted for pipes.
10 8
11module Pipes.Text.Codec 9module Pipes.Text.Codec
@@ -15,6 +13,10 @@ module Pipes.Text.Codec
15 , Codec(..) 13 , Codec(..)
16 , TextException(..) 14 , TextException(..)
17 , utf8 15 , utf8
16 , utf16_le
17 , utf16_be
18 , utf32_le
19 , utf32_be
18 ) where 20 ) where
19 21
20import Data.Bits ((.&.)) 22import Data.Bits ((.&.))
@@ -37,7 +39,7 @@ import Data.Typeable
37import Control.Arrow (first) 39import Control.Arrow (first)
38import Data.Maybe (catMaybes) 40import Data.Maybe (catMaybes)
39import Pipes.Text.Internal 41import Pipes.Text.Internal
40 42import Pipes
41-- | A specific character encoding. 43-- | A specific character encoding.
42-- 44--
43-- Since 0.3.0 45-- Since 0.3.0
@@ -62,10 +64,12 @@ instance Exc.Exception TextException
62toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString)) 64toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
63 -> (ByteString -> Decoding) 65 -> (ByteString -> Decoding)
64toDecoding op = loop B.empty where 66toDecoding op = loop B.empty where
65 loop extra bs0 = case op (B.append extra bs0) of 67 loop !extra bs0 = case op (B.append extra bs0) of
66 (txt, Right bs) -> Some txt bs (loop bs) 68 (txt, Right bs) -> Some txt bs (loop bs)
67 (txt, Left (_,bs)) -> Other txt bs 69 (txt, Left (_,bs)) -> Other txt bs
68 70-- To do: toDecoding should be inlined in each of the 'Codec' definitions
71-- or else Codec changed to the conduit/enumerator definition. We have
72-- altered it to use 'streamDecodeUtf8'
69 73
70splitSlowly :: (ByteString -> Text) 74splitSlowly :: (ByteString -> Text)
71 -> ByteString 75 -> ByteString
@@ -87,8 +91,7 @@ utf8 :: Codec
87utf8 = Codec name enc (toDecoding dec) where 91utf8 = Codec name enc (toDecoding dec) where
88 name = T.pack "UTF-8" 92 name = T.pack "UTF-8"
89 enc text = (TE.encodeUtf8 text, Nothing) 93 enc text = (TE.encodeUtf8 text, Nothing)
90 dec bytes = case decodeSomeUtf8 bytes of 94 dec bytes = case decodeSomeUtf8 bytes of (t,b) -> (t, Right b)
91 (t,b) -> (t, Right b)
92 95
93-- -- Whether the given byte is a continuation byte. 96-- -- Whether the given byte is a continuation byte.
94-- isContinuation byte = byte .&. 0xC0 == 0x80 97-- isContinuation byte = byte .&. 0xC0 == 0x80
@@ -202,34 +205,6 @@ utf32SplitBytes dec bytes = split where
202 then (bytes, B.empty) 205 then (bytes, B.empty)
203 else B.splitAt lenToDecode bytes 206 else B.splitAt lenToDecode bytes
204 207
205ascii :: Codec
206ascii = Codec name enc (toDecoding dec) where
207 name = T.pack "ASCII"
208 enc text = (bytes, extra) where
209 (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
210 bytes = B8.pack (T.unpack safe)
211 extra = if T.null unsafe
212 then Nothing
213 else Just (EncodeException ascii (T.head unsafe), unsafe)
214
215 dec bytes = (text, extra) where
216 (safe, unsafe) = B.span (<= 0x7F) bytes
217 text = T.pack (B8.unpack safe)
218 extra = if B.null unsafe
219 then Right B.empty
220 else Left (DecodeException ascii (B.head unsafe), unsafe)
221
222iso8859_1 :: Codec
223iso8859_1 = Codec name enc (toDecoding dec) where
224 name = T.pack "ISO-8859-1"
225 enc text = (bytes, extra) where
226 (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
227 bytes = B8.pack (T.unpack safe)
228 extra = if T.null unsafe
229 then Nothing
230 else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
231
232 dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
233 208
234tryEvaluate :: a -> Either Exc.SomeException a 209tryEvaluate :: a -> Either Exc.SomeException a
235tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate 210tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate