, group
, word
, line
+
+ -- * Decoding Lenses
, decodeUtf8
- , decode
+ , codec
+
+ -- * Codecs
+ , utf8
+ , utf16_le
+ , utf16_be
+ , utf32_le
+ , utf32_be
+
+ -- * Other Decoding/Encoding Functions
+ , decodeIso8859_1
+ , decodeAscii
+ , encodeIso8859_1
+ , encodeAscii
-- * FreeT Splitters
, chunksOf
import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B8
import Data.Char (ord, isSpace)
import Data.Functor.Constant (Constant(Constant, getConstant))
import Data.Functor.Identity (Identity)
import Pipes
import qualified Pipes.ByteString as PB
import qualified Pipes.Text.Internal as PE
-import Pipes.Text.Codec (Codec(..))
+import Pipes.Text.Codec
import Pipes.Core (respond, Server')
import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
import qualified Pipes.Group as PG
@Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
-}
-
-
-decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
--- decode codec = go B.empty where
--- go extra p0 =
--- do x <- lift (next p0)
--- case x of Right (chunk, p) ->
--- do let (text, stuff) = codecDecode codec (B.append extra chunk)
--- yield text
--- case stuff of Right extra' -> go extra' p
--- Left (exc,bs) -> do yield text
--- return (do yield bs
--- p)
--- Left r -> return (do yield extra
--- return r)
-
-decode d p0 = case d of
- PE.Other txt bad -> do yield txt
- return (do yield bad
- p0)
- PE.Some txt extra dec -> do yield txt
- x <- lift (next p0)
- case x of Left r -> return (do yield extra
- return r)
- Right (chunk,p1) -> decode (dec chunk) p1
-
--- go !carry dec0 p = do
--- x <- lift (next p)
--- case x of Left r -> if B.null carry
--- then return (return r) -- all bytestrinput was consumed
--- else return (do yield carry -- a potentially valid fragment remains
--- return r)
---
--- Right (chunk, p') -> case dec0 chunk of
--- PE.Some text carry2 dec -> do yield text
--- go carry2 dec p'
--- PE.Other text bs -> do yield text
--- return (do yield bs -- an invalid blob remains
--- p')
--- {-# INLINABLE decodeUtf8 #-}
\ No newline at end of file
+codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
+codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
+ (k (decoder (dec B.empty) p0) ) where
+ decoder :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+ decoder !d p0 = case d of
+ PE.Other txt bad -> do yield txt
+ return (do yield bad
+ p0)
+ PE.Some txt extra dec -> do yield txt
+ x <- lift (next p0)
+ case x of Left r -> return (do yield extra
+ return r)
+ Right (chunk,p1) -> decoder (dec chunk) p1
+
+-- decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
+-- (k (go B.empty PE.streamDecodeUtf8 p0)) where
+
+encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
+encodeAscii = go where
+ go p = do echunk <- lift (next p)
+ case echunk of
+ Left r -> return (return r)
+ Right (chunk, p') ->
+ if T.null chunk
+ then go p'
+ else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
+ in do yield (B8.pack (T.unpack safe))
+ if T.null unsafe
+ then go p'
+ else return $ do yield unsafe
+ p'
+
+encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
+encodeIso8859_1 = go where
+ go p = do etxt <- lift (next p)
+ case etxt of
+ Left r -> return (return r)
+ Right (txt, p') ->
+ if T.null txt
+ then go p'
+ else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
+ in do yield (B8.pack (T.unpack safe))
+ if T.null unsafe
+ then go p'
+ else return $ do yield unsafe
+ p'
+
+decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeAscii = go where
+ go p = do echunk <- lift (next p)
+ case echunk of
+ Left r -> return (return r)
+ Right (chunk, p') ->
+ if B.null chunk
+ then go p'
+ else let (safe, unsafe) = B.span (<= 0x7F) chunk
+ in do yield (T.pack (B8.unpack safe))
+ if B.null unsafe
+ then go p'
+ else return $ do yield unsafe
+ p'
+
+
+decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+decodeIso8859_1 = go where
+ go p = do echunk <- lift (next p)
+ case echunk of
+ Left r -> return (return r)
+ Right (chunk, p') ->
+ if B.null chunk
+ then go p'
+ else let (safe, unsafe) = B.span (<= 0xFF) chunk
+ in do yield (T.pack (B8.unpack safe))
+ if B.null unsafe
+ then go p'
+ else return $ do yield unsafe
+ p'
+
+
+
+{-
+ ascii :: Codec
+ ascii = Codec name enc (toDecoding dec) where
+ name = T.pack "ASCII"
+ enc text = (bytes, extra) where
+ (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
+ bytes = B8.pack (T.unpack safe)
+ extra = if T.null unsafe
+ then Nothing
+ else Just (EncodeException ascii (T.head unsafe), unsafe)
+
+ dec bytes = (text, extra) where
+ (safe, unsafe) = B.span (<= 0x7F) bytes
+ text = T.pack (B8.unpack safe)
+ extra = if B.null unsafe
+ then Right B.empty
+ else Left (DecodeException ascii (B.head unsafe), unsafe)
+
+ iso8859_1 :: Codec
+ iso8859_1 = Codec name enc (toDecoding dec) where
+ name = T.pack "ISO-8859-1"
+ enc text = (bytes, extra) where
+ (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
+ bytes = B8.pack (T.unpack safe)
+ extra = if T.null unsafe
+ then Nothing
+ else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
+
+ dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
+-}
+
\ No newline at end of file
-{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
+{-# LANGUAGE DeriveDataTypeable, RankNTypes, BangPatterns #-}
-- |
-- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin
-- License: MIT
--
--- Handle streams of text.
---
-- Parts of this code were taken from enumerator and conduits, and adapted for pipes.
module Pipes.Text.Codec
, Codec(..)
, TextException(..)
, utf8
+ , utf16_le
+ , utf16_be
+ , utf32_le
+ , utf32_be
) where
import Data.Bits ((.&.))
import Control.Arrow (first)
import Data.Maybe (catMaybes)
import Pipes.Text.Internal
-
+import Pipes
-- | A specific character encoding.
--
-- Since 0.3.0
toDecoding :: (ByteString -> (Text, Either (TextException, ByteString) ByteString))
-> (ByteString -> Decoding)
toDecoding op = loop B.empty where
- loop extra bs0 = case op (B.append extra bs0) of
- (txt, Right bs) -> Some txt bs (loop bs)
- (txt, Left (_,bs)) -> Other txt bs
-
+ loop !extra bs0 = case op (B.append extra bs0) of
+ (txt, Right bs) -> Some txt bs (loop bs)
+ (txt, Left (_,bs)) -> Other txt bs
+-- To do: toDecoding should be inlined in each of the 'Codec' definitions
+-- or else Codec changed to the conduit/enumerator definition. We have
+-- altered it to use 'streamDecodeUtf8'
splitSlowly :: (ByteString -> Text)
-> ByteString
utf8 = Codec name enc (toDecoding dec) where
name = T.pack "UTF-8"
enc text = (TE.encodeUtf8 text, Nothing)
- dec bytes = case decodeSomeUtf8 bytes of
- (t,b) -> (t, Right b)
+ dec bytes = case decodeSomeUtf8 bytes of (t,b) -> (t, Right b)
-- -- Whether the given byte is a continuation byte.
-- isContinuation byte = byte .&. 0xC0 == 0x80
then (bytes, B.empty)
else B.splitAt lenToDecode bytes
-ascii :: Codec
-ascii = Codec name enc (toDecoding dec) where
- name = T.pack "ASCII"
- enc text = (bytes, extra) where
- (safe, unsafe) = T.span (\c -> ord c <= 0x7F) text
- bytes = B8.pack (T.unpack safe)
- extra = if T.null unsafe
- then Nothing
- else Just (EncodeException ascii (T.head unsafe), unsafe)
-
- dec bytes = (text, extra) where
- (safe, unsafe) = B.span (<= 0x7F) bytes
- text = T.pack (B8.unpack safe)
- extra = if B.null unsafe
- then Right B.empty
- else Left (DecodeException ascii (B.head unsafe), unsafe)
-
-iso8859_1 :: Codec
-iso8859_1 = Codec name enc (toDecoding dec) where
- name = T.pack "ISO-8859-1"
- enc text = (bytes, extra) where
- (safe, unsafe) = T.span (\c -> ord c <= 0xFF) text
- bytes = B8.pack (T.unpack safe)
- extra = if T.null unsafe
- then Nothing
- else Just (EncodeException iso8859_1 (T.head unsafe), unsafe)
-
- dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
tryEvaluate :: a -> Either Exc.SomeException a
tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate