diff options
-rw-r--r-- | Pipes/Text/Internal/Codec.hs | 13 | ||||
-rw-r--r-- | Pipes/Text/Internal/Decoding.hs | 16 |
2 files changed, 18 insertions, 11 deletions
diff --git a/Pipes/Text/Internal/Codec.hs b/Pipes/Text/Internal/Codec.hs index 4b9367f..63cbd74 100644 --- a/Pipes/Text/Internal/Codec.hs +++ b/Pipes/Text/Internal/Codec.hs | |||
@@ -3,8 +3,12 @@ | |||
3 | -- | | 3 | -- | |
4 | -- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin | 4 | -- Copyright: 2014 Michael Thompson, 2011 Michael Snoyman, 2010-2011 John Millikin |
5 | -- License: MIT | 5 | -- License: MIT |
6 | -- | 6 | -- This Parts of this code were taken from enumerator and conduits, and adapted for pipes |
7 | -- Parts of this code were taken from enumerator and conduits, and adapted for pipes. | 7 | {- | This module follows the model of the enumerator and conduits libraries, and defines |
8 | 'Codec' s for various encodings. Note that we do not export a 'Codec' for ascii and | ||
9 | iso8859_1. A 'Lens' in the sense of the pipes library cannot be defined for these, so | ||
10 | special functions appear in @Pipes.Text@ | ||
11 | -} | ||
8 | 12 | ||
9 | module Pipes.Text.Internal.Codec | 13 | module Pipes.Text.Internal.Codec |
10 | ( Decoding(..) | 14 | ( Decoding(..) |
@@ -41,12 +45,11 @@ import Data.Maybe (catMaybes) | |||
41 | import Pipes.Text.Internal.Decoding | 45 | import Pipes.Text.Internal.Decoding |
42 | import Pipes | 46 | import Pipes |
43 | -- | A specific character encoding. | 47 | -- | A specific character encoding. |
44 | -- | 48 | |
45 | -- Since 0.3.0 | ||
46 | data Codec = Codec | 49 | data Codec = Codec |
47 | { codecName :: Text | 50 | { codecName :: Text |
48 | , codecEncode :: Text -> (ByteString, Maybe (TextException, Text)) | 51 | , codecEncode :: Text -> (ByteString, Maybe (TextException, Text)) |
49 | , codecDecode :: ByteString -> Decoding -- (Text, Either (TextException, ByteString) ByteString) | 52 | , codecDecode :: ByteString -> Decoding |
50 | } | 53 | } |
51 | 54 | ||
52 | instance Show Codec where | 55 | instance Show Codec where |
diff --git a/Pipes/Text/Internal/Decoding.hs b/Pipes/Text/Internal/Decoding.hs index 531104a..4b4bbe6 100644 --- a/Pipes/Text/Internal/Decoding.hs +++ b/Pipes/Text/Internal/Decoding.hs | |||
@@ -2,9 +2,11 @@ | |||
2 | {-# LANGUAGE GeneralizedNewtypeDeriving, MagicHash, UnliftedFFITypes #-} | 2 | {-# LANGUAGE GeneralizedNewtypeDeriving, MagicHash, UnliftedFFITypes #-} |
3 | {-# LANGUAGE DeriveDataTypeable, RankNTypes #-} | 3 | {-# LANGUAGE DeriveDataTypeable, RankNTypes #-} |
4 | 4 | ||
5 | -- This module lifts assorted materials from Brian O'Sullivan's text package | 5 | {- | |
6 | -- especially Data.Text.Encoding in order to define a pipes-appropriate | 6 | This module lifts assorted materials from Brian O'Sullivan's text package |
7 | -- streamDecodeUtf8 | 7 | especially @Data.Text.Encoding@ in order to define a pipes-appropriate |
8 | 'streamDecodeUtf8' | ||
9 | -} | ||
8 | module Pipes.Text.Internal.Decoding | 10 | module Pipes.Text.Internal.Decoding |
9 | ( Decoding(..) | 11 | ( Decoding(..) |
10 | , streamDecodeUtf8 | 12 | , streamDecodeUtf8 |
@@ -41,9 +43,9 @@ import Data.Maybe (catMaybes) | |||
41 | 43 | ||
42 | 44 | ||
43 | 45 | ||
44 | -- | A stream oriented decoding result. | 46 | -- | A stream oriented decoding result. Distinct from the similar type in @Data.Text.Encoding@ |
45 | data Decoding = Some Text ByteString (ByteString -> Decoding) | 47 | data Decoding = Some Text ByteString (ByteString -> Decoding) -- | Text, continuation and any undecoded fragment. |
46 | | Other Text ByteString | 48 | | Other Text ByteString -- | Text followed by an undecodable ByteString |
47 | instance Show Decoding where | 49 | instance Show Decoding where |
48 | showsPrec d (Some t bs _) = showParen (d > prec) $ | 50 | showsPrec d (Some t bs _) = showParen (d > prec) $ |
49 | showString "Some " . showsPrec prec' t . | 51 | showString "Some " . showsPrec prec' t . |
@@ -59,6 +61,7 @@ instance Show Decoding where | |||
59 | newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable) | 61 | newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable) |
60 | newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable) | 62 | newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable) |
61 | 63 | ||
64 | -- | Resolve a 'ByteString' into 'Text' and a continuation that can handle further 'ByteStrings'. | ||
62 | streamDecodeUtf8 :: ByteString -> Decoding | 65 | streamDecodeUtf8 :: ByteString -> Decoding |
63 | streamDecodeUtf8 = decodeChunkUtf8 B.empty 0 0 | 66 | streamDecodeUtf8 = decodeChunkUtf8 B.empty 0 0 |
64 | where | 67 | where |
@@ -92,6 +95,7 @@ streamDecodeUtf8 = decodeChunkUtf8 B.empty 0 0 | |||
92 | {-# INLINE decodeChunkUtf8 #-} | 95 | {-# INLINE decodeChunkUtf8 #-} |
93 | {-# INLINE streamDecodeUtf8 #-} | 96 | {-# INLINE streamDecodeUtf8 #-} |
94 | 97 | ||
98 | -- | Resolve a ByteString into an initial segment of intelligible 'Text' and whatever is unintelligble | ||
95 | decodeSomeUtf8 :: ByteString -> (Text, ByteString) | 99 | decodeSomeUtf8 :: ByteString -> (Text, ByteString) |
96 | decodeSomeUtf8 bs@(PS fp off len) = runST $ do | 100 | decodeSomeUtf8 bs@(PS fp off len) = runST $ do |
97 | dest <- A.new (len+1) | 101 | dest <- A.new (len+1) |