[github/fretlink/text-pipes.git] / Pipes / Text / Encoding.hs

{-# LANGUAGE RankNTypes, BangPatterns #-}

-- | This module uses the stream decoding functions from Michael Snoyman's new
--  <http://hackage.haskell.org/package/text-stream-decode text-stream-decode> 
--  package to define decoding functions and lenses.  The exported names
--  conflict with names in @Data.Text.Encoding@ but the module can otherwise be 
--  imported unqualified. 

module Pipes.Text.Encoding
    ( 
    -- * The Lens or Codec type
    -- $lenses
    Codec
    , decode
    -- * \'Viewing\' the Text in a byte stream
    -- $codecs
    , utf8
    , utf8Pure
    , utf16LE
    , utf16BE
    , utf32LE
    , utf32BE
    -- * Non-lens decoding functions 
    -- $decoders
    , decodeUtf8
    , decodeUtf8Pure
    , decodeUtf16LE
    , decodeUtf16BE
    , decodeUtf32LE
    , decodeUtf32BE
    -- * Re-encoding functions
    -- $encoders
    , encodeUtf8
    , encodeUtf16LE
    , encodeUtf16BE
    , encodeUtf32LE
    , encodeUtf32BE
    -- * Functions for latin and ascii text
    -- $ascii
    , encodeAscii
    , decodeAscii
    , encodeIso8859_1
    , decodeIso8859_1
    ) 
    where

import Data.Functor.Constant (Constant(..))
import Data.Char (ord)
import Data.ByteString as B 
import Data.ByteString (ByteString)
import Data.ByteString.Char8 as B8
import Data.Text (Text)
import qualified Data.Text as T 
import qualified Data.Text.Encoding as TE 
import qualified Data.Streaming.Text as Stream
import Data.Streaming.Text (DecodeResult(..))
import Control.Monad (join)
import Data.Word (Word8)
import Pipes

type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)

{- $lenses
    The 'Codec' type is a simple specializion of 
    the @Lens'@ type synonymn used by the standard lens libraries, 
    <http://hackage.haskell.org/package/lens lens> and 
    <http://hackage.haskell.org/package/lens-family lens-family>. That type, 
    
>   type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)

    is just an alias for a Prelude type. Thus you use any particular codec with
    the @view@ / @(^.)@ , @zoom@ and @over@ functions from either of those libraries;
    we presuppose neither since we already have access to the types they require.

    -}

type Codec
    =  forall m r
    .  Monad m
    => Lens' (Producer ByteString m r)
             (Producer Text m (Producer ByteString m r))

{- | 'decode' is just the ordinary @view@ or @(^.)@ of the lens libraries;
      exported here under a name appropriate to the material. All of these are
      the same: 

>    decode utf8 p = decodeUtf8 p = view utf8 p = p ^. utf8

-}

decode :: ((b -> Constant b b) -> (a -> Constant b a)) -> a -> b
decode codec a = getConstant (codec Constant a)


{- $codecs
    
    Each Codec-lens looks into a byte stream that is supposed to contain text.
    The particular \'Codec\' lenses are named in accordance with the expected 
    encoding, 'utf8', 'utf16LE' etc. To turn a Codec into an ordinary function, 
    use @view@ / @(^.)@ -- here also called 'decode':

>   view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
>   decode utf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
>   Bytes.stdin ^. utf8 ::  Producer Text IO (Producer ByteString IO r)

    Uses of a codec with @view@ or @(^.)@ or 'decode' can always be replaced by the specialized 
    decoding functions exported here, e.g. 

>   decodeUtf8 ::  Producer ByteString m r -> Producer Text m (Producer ByteString m r)
>   decodeUtf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)

    The stream of text that a @Codec@ \'sees\' in the stream of bytes begins at its head. 
    At any point of decoding failure, the stream of text ends and reverts to (returns) 
    the original byte stream. Thus if the first bytes are already
    un-decodable, the whole ByteString producer will be returned, i.e.

>   view utf8 bytestream 

    will just come to the same as 

>   return bytestream

    Where there is no decoding failure, the return value of the text stream will be
    an empty byte stream followed by its own return value.  In all cases you must
    deal with the fact that it is a /ByteString producer/ that is returned, even if
    it can be thrown away with @Control.Monad.void@

>   void (Bytes.stdin ^. utf8) :: Producer Text IO ()
    
    @zoom@ converts a Text parser into a ByteString parser:

>   zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char)

    or, using the type synonymn from @Pipes.Parse@:
    
>   zoom utf8 drawChar :: Monad m => Parser ByteString m (Maybe Char)

    Thus we can define a ByteString parser like this:
    
>   withNextByte :: Parser ByteString m (Maybe Char, Maybe Word8))) 
>   withNextByte = do char_ <- zoom utf8 Text.drawChar
>                     byte_ <- Bytes.peekByte
>                     return (char_, byte_)

     Though @withNextByte@ is partly defined with a Text parser 'drawChar'; 
     but it is a ByteString parser; it will return the first valid utf8-encoded 
     Char in a ByteString, whatever its length, 
     and the first byte of the next character, if they exist. Because 
     we \'draw\' one and \'peek\' at the other, the parser as a whole only 
     advances one Char's length along the bytestring, whatever that length may be.
     See the slightly more complex example \'decode.hs\' in the 
     <http://www.haskellforall.com/2014/02/pipes-parse-30-lens-based-parsing.html#batteries-included haskellforall> 
     discussion of this type of byte stream parsing.
    -}

utf8 :: Codec
utf8 = mkCodec decodeUtf8 TE.encodeUtf8

utf8Pure :: Codec
utf8Pure = mkCodec decodeUtf8Pure TE.encodeUtf8

utf16LE :: Codec
utf16LE = mkCodec decodeUtf16LE TE.encodeUtf16LE

utf16BE :: Codec
utf16BE = mkCodec decodeUtf16BE TE.encodeUtf16BE

utf32LE :: Codec
utf32LE = mkCodec decodeUtf32LE TE.encodeUtf32LE

utf32BE :: Codec
utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE

decodeStream :: Monad m 
       => (B.ByteString -> DecodeResult) 
       -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeStream = loop where
  loop dec0 p = 
    do x <- lift (next p) 
       case x of Left r -> return (return r)
                 Right (chunk, p') -> case dec0 chunk of 
                    DecodeResultSuccess text dec -> do yield text
                                                       loop dec p'
                    DecodeResultFailure text bs -> do yield text 
                                                      return (do yield bs 
                                                                 p')
{-# INLINABLE decodeStream#-}

{- $decoders
   These are functions with the simple type:
   
>   decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)

   Thus in general 

>     decodeUtf8 = view utf8
>     decodeUtf16LE = view utf16LE

   and so forth, but these forms
   may be more convenient (and give better type errors!) where lenses are
   not desired.
-}


decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf8 = decodeStream Stream.decodeUtf8
{-# INLINE decodeUtf8 #-}

decodeUtf8Pure :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf8Pure = decodeStream Stream.decodeUtf8Pure
{-# INLINE decodeUtf8Pure #-}

decodeUtf16LE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf16LE = decodeStream Stream.decodeUtf16LE
{-# INLINE decodeUtf16LE #-}

decodeUtf16BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf16BE = decodeStream Stream.decodeUtf16BE
{-# INLINE decodeUtf16BE #-}

decodeUtf32LE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf32LE = decodeStream Stream.decodeUtf32LE
{-# INLINE decodeUtf32LE #-}

decodeUtf32BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf32BE = decodeStream Stream.decodeUtf32BE
{-# INLINE decodeUtf32BE #-}


{- $encoders
   These are simply defined 
   
>      encodeUtf8 = yield . TE.encodeUtf8
   
   They are intended for use with 'for'
   
>      for Text.stdin encodeUtf8 :: Producer ByteString IO ()

   which would have the effect of 
   
>      Text.stdin >-> Pipes.Prelude.map (TE.encodeUtf8)

   using the encoding functions from Data.Text.Encoding 
-}

encodeUtf8 :: Monad m => Text -> Producer' ByteString m ()
encodeUtf8 = yield . TE.encodeUtf8
encodeUtf16LE :: Monad m => Text -> Producer' ByteString m ()
encodeUtf16LE = yield . TE.encodeUtf16LE
encodeUtf16BE :: Monad m => Text -> Producer' ByteString m ()
encodeUtf16BE = yield . TE.encodeUtf16BE
encodeUtf32LE :: Monad m => Text -> Producer' ByteString m ()
encodeUtf32LE = yield . TE.encodeUtf32LE
encodeUtf32BE :: Monad m => Text -> Producer' ByteString m ()
encodeUtf32BE = yield . TE.encodeUtf32BE

mkCodec :: (forall r m . Monad m => 
           Producer ByteString m r -> Producer Text m (Producer ByteString m r ))
        -> (Text -> ByteString)
        -> Codec
mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc)))  (k (dec p0))


{- $ascii
   ascii and latin encodings only use a small number of the characters 'Text'
     recognizes; thus we cannot use the pipes @Lens@ style to work with them. 
     Rather we simply define functions each way. 
-}


-- | 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream,
--   returning the rest of the 'Text' at the first non-ascii 'Char'

encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
encodeAscii = go where
  go p = do e <- lift (next p)
            case e of 
              Left r -> return (return r)
              Right (chunk, p') -> 
                 if T.null chunk 
                   then go p'
                   else let (safe, unsafe)  = T.span (\c -> ord c <= 0x7F) chunk
                        in do yield (B8.pack (T.unpack safe))
                              if T.null unsafe
                                then go p'
                                else return $ do yield unsafe 
                                                 p'
                                                 
{- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
     returning the rest of the 'Text' upon hitting any non-latin 'Char'
   -}
encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
encodeIso8859_1 = go where
  go p = do e <- lift (next p)
            case e of 
              Left r -> return (return r)
              Right (txt, p') -> 
                 if T.null txt 
                   then go p'
                   else let (safe, unsafe)  = T.span (\c -> ord c <= 0xFF) txt
                        in do yield (B8.pack (T.unpack safe))
                              if T.null unsafe
                                then go p'
                                else return $ do yield unsafe 
                                                 p'

{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
     unused 'ByteString' upon hitting an un-ascii byte.
   -}
decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeAscii = go where
  go p = do e <- lift (next p)
            case e of 
              Left r -> return (return r)
              Right (chunk, p') -> 
                 if B.null chunk 
                   then go p'
                   else let (safe, unsafe) = B.span (<= 0x7F) chunk
                        in do yield (T.pack (B8.unpack safe))
                              if B.null unsafe
                                then go p'
                                else return (do yield unsafe 
                                                p')

{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
     unused 'ByteString' upon hitting the rare un-latinizable byte.
     -}
decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeIso8859_1 = go where
  go p = do e <- lift (next p)
            case e of 
              Left r -> return (return r)
              Right (chunk, p') -> 
                 if B.null chunk 
                    then go p'
                    else do let (safe, unsafe) = B.span (<= 0xFF) chunk
                            yield (T.pack (B8.unpack safe))
                            if B.null unsafe 
                               then go p'
                               else return (do yield unsafe 
                                               p')
Commit	Line	Data
bbdfd305	1	{-# LANGUAGE RankNTypes, BangPatterns #-}
89d80557	2
0ac0c414	3	-- \| This module uses the stream decoding functions from Michael Snoyman's new
0ac0c414	4	-- <http://hackage.haskell.org/package/text-stream-decode text-stream-decode>
4ea59a8b	5	-- package to define decoding functions and lenses. The exported names
	6	-- conflict with names in @Data.Text.Encoding@ but the module can otherwise be
	7	-- imported unqualified.
bbdfd305	8
bbdfd305	9	module Pipes.Text.Encoding
fafcbeb5	10	(
0ac0c414	11	-- * The Lens or Codec type
fafcbeb5	12	-- $lenses
fafcbeb5	13	Codec
4ea59a8b	14	, decode
a4913c42	15	-- * \'Viewing\' the Text in a byte stream
fafcbeb5	16	-- $codecs
bbdfd305	17	, utf8
	18	, utf8Pure
	19	, utf16LE
	20	, utf16BE
	21	, utf32LE
	22	, utf32BE
fafcbeb5	23	-- * Non-lens decoding functions
0ac0c414	24	-- $decoders
89d80557	25	, decodeUtf8
	26	, decodeUtf8Pure
	27	, decodeUtf16LE
	28	, decodeUtf16BE
	29	, decodeUtf32LE
	30	, decodeUtf32BE
0ac0c414	31	-- * Re-encoding functions
	32	-- $encoders
	33	, encodeUtf8
	34	, encodeUtf16LE
	35	, encodeUtf16BE
	36	, encodeUtf32LE
	37	, encodeUtf32BE
fafcbeb5	38	-- * Functions for latin and ascii text
fafcbeb5	39	-- $ascii
bbdfd305	40	, encodeAscii
	41	, decodeAscii
	42	, encodeIso8859_1
	43	, decodeIso8859_1
	44	)
	45	where
	46
0ac0c414	47	import Data.Functor.Constant (Constant(..))
bbdfd305	48	import Data.Char (ord)
	49	import Data.ByteString as B
	50	import Data.ByteString (ByteString)
bbdfd305	51	import Data.ByteString.Char8 as B8
	52	import Data.Text (Text)
	53	import qualified Data.Text as T
	54	import qualified Data.Text.Encoding as TE
eae50557	55	import qualified Data.Streaming.Text as Stream
eae50557	56	import Data.Streaming.Text (DecodeResult(..))
70125641	57	import Control.Monad (join)
89d80557	58	import Data.Word (Word8)
bbdfd305	59	import Pipes
bbdfd305	60
2f4a83f8	61	type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
bbdfd305	62
fafcbeb5	63	{- $lenses
0ac0c414	64	The 'Codec' type is a simple specializion of
2f4a83f8	65	the @Lens'@ type synonymn used by the standard lens libraries,
0ac0c414	66	<http://hackage.haskell.org/package/lens lens> and
0ac0c414	67	<http://hackage.haskell.org/package/lens-family lens-family>. That type,
fafcbeb5	68
2f4a83f8	69	> type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
0ac0c414	70
4ea59a8b	71	is just an alias for a Prelude type. Thus you use any particular codec with
	72	the @view@ / @(^.)@ , @zoom@ and @over@ functions from either of those libraries;
	73	we presuppose neither since we already have access to the types they require.
0ac0c414	74
fafcbeb5	75	-}
fafcbeb5	76
21eb409c	77	type Codec
d199072b	78	= forall m r
21eb409c	79	. Monad m
2f4a83f8	80	=> Lens' (Producer ByteString m r)
d199072b	81	(Producer Text m (Producer ByteString m r))
d199072b	82
0ac0c414	83	{- \| 'decode' is just the ordinary @view@ or @(^.)@ of the lens libraries;
4ea59a8b	84	exported here under a name appropriate to the material. All of these are
4ea59a8b	85	the same:
0ac0c414	86
4ea59a8b	87	> decode utf8 p = decodeUtf8 p = view utf8 p = p ^. utf8
0ac0c414	88
	89	-}
	90
	91	decode :: ((b -> Constant b b) -> (a -> Constant b a)) -> a -> b
	92	decode codec a = getConstant (codec Constant a)
	93
bbdfd305	94
4ea59a8b	95	{- $codecs
	96
	97	Each Codec-lens looks into a byte stream that is supposed to contain text.
	98	The particular \'Codec\' lenses are named in accordance with the expected
	99	encoding, 'utf8', 'utf16LE' etc. To turn a Codec into an ordinary function,
	100	use @view@ / @(^.)@ -- here also called 'decode':
	101
	102	> view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
	103	> decode utf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
	104	> Bytes.stdin ^. utf8 :: Producer Text IO (Producer ByteString IO r)
	105
	106	Uses of a codec with @view@ or @(^.)@ or 'decode' can always be replaced by the specialized
	107	decoding functions exported here, e.g.
	108
	109	> decodeUtf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
	110	> decodeUtf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
	111
	112	The stream of text that a @Codec@ \'sees\' in the stream of bytes begins at its head.
	113	At any point of decoding failure, the stream of text ends and reverts to (returns)
	114	the original byte stream. Thus if the first bytes are already
	115	un-decodable, the whole ByteString producer will be returned, i.e.
	116
	117	> view utf8 bytestream
	118
	119	will just come to the same as
	120
	121	> return bytestream
	122
	123	Where there is no decoding failure, the return value of the text stream will be
	124	an empty byte stream followed by its own return value. In all cases you must
	125	deal with the fact that it is a /ByteString producer/ that is returned, even if
	126	it can be thrown away with @Control.Monad.void@
	127
	128	> void (Bytes.stdin ^. utf8) :: Producer Text IO ()
	129
	130	@zoom@ converts a Text parser into a ByteString parser:
	131
	132	> zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char)
	133
a4913c42	134	or, using the type synonymn from @Pipes.Parse@:
4ea59a8b	135
	136	> zoom utf8 drawChar :: Monad m => Parser ByteString m (Maybe Char)
	137
a4913c42	138	Thus we can define a ByteString parser like this:
4ea59a8b	139
	140	> withNextByte :: Parser ByteString m (Maybe Char, Maybe Word8)))
	141	> withNextByte = do char_ <- zoom utf8 Text.drawChar
	142	> byte_ <- Bytes.peekByte
	143	> return (char_, byte_)
	144
	145	Though @withNextByte@ is partly defined with a Text parser 'drawChar';
	146	but it is a ByteString parser; it will return the first valid utf8-encoded
	147	Char in a ByteString, whatever its length,
	148	and the first byte of the next character, if they exist. Because
	149	we \'draw\' one and \'peek\' at the other, the parser as a whole only
	150	advances one Char's length along the bytestring, whatever that length may be.
	151	See the slightly more complex example \'decode.hs\' in the
	152	<http://www.haskellforall.com/2014/02/pipes-parse-30-lens-based-parsing.html#batteries-included haskellforall>
	153	discussion of this type of byte stream parsing.
	154	-}
	155
	156	utf8 :: Codec
	157	utf8 = mkCodec decodeUtf8 TE.encodeUtf8
	158
	159	utf8Pure :: Codec
	160	utf8Pure = mkCodec decodeUtf8Pure TE.encodeUtf8
	161
	162	utf16LE :: Codec
	163	utf16LE = mkCodec decodeUtf16LE TE.encodeUtf16LE
	164
	165	utf16BE :: Codec
	166	utf16BE = mkCodec decodeUtf16BE TE.encodeUtf16BE
	167
	168	utf32LE :: Codec
	169	utf32LE = mkCodec decodeUtf32LE TE.encodeUtf32LE
	170
	171	utf32BE :: Codec
	172	utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE
	173
bbdfd305	174	decodeStream :: Monad m
	175	=> (B.ByteString -> DecodeResult)
	176	-> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
	177	decodeStream = loop where
	178	loop dec0 p =
	179	do x <- lift (next p)
	180	case x of Left r -> return (return r)
	181	Right (chunk, p') -> case dec0 chunk of
	182	DecodeResultSuccess text dec -> do yield text
	183	loop dec p'
	184	DecodeResultFailure text bs -> do yield text
	185	return (do yield bs
	186	p')
	187	{-# INLINABLE decodeStream#-}
	188
0ac0c414	189	{- $decoders
	190	These are functions with the simple type:
	191
	192	> decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
	193
	194	Thus in general
	195
	196	> decodeUtf8 = view utf8
	197	> decodeUtf16LE = view utf16LE
fafcbeb5	198
0ac0c414	199	and so forth, but these forms
	200	may be more convenient (and give better type errors!) where lenses are
	201	not desired.
	202	-}
fafcbeb5	203
fafcbeb5	204
bbdfd305	205	decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
eae50557	206	decodeUtf8 = decodeStream Stream.decodeUtf8
bbdfd305	207	{-# INLINE decodeUtf8 #-}
	208
	209	decodeUtf8Pure :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
eae50557	210	decodeUtf8Pure = decodeStream Stream.decodeUtf8Pure
bbdfd305	211	{-# INLINE decodeUtf8Pure #-}
	212
	213	decodeUtf16LE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
eae50557	214	decodeUtf16LE = decodeStream Stream.decodeUtf16LE
bbdfd305	215	{-# INLINE decodeUtf16LE #-}
	216
	217	decodeUtf16BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
eae50557	218	decodeUtf16BE = decodeStream Stream.decodeUtf16BE
bbdfd305	219	{-# INLINE decodeUtf16BE #-}
	220
	221	decodeUtf32LE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
eae50557	222	decodeUtf32LE = decodeStream Stream.decodeUtf32LE
bbdfd305	223	{-# INLINE decodeUtf32LE #-}
	224
	225	decodeUtf32BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
eae50557	226	decodeUtf32BE = decodeStream Stream.decodeUtf32BE
bbdfd305	227	{-# INLINE decodeUtf32BE #-}
bbdfd305	228
0ac0c414	229
	230	{- $encoders
	231	These are simply defined
	232
	233	> encodeUtf8 = yield . TE.encodeUtf8
	234
	235	They are intended for use with 'for'
	236
	237	> for Text.stdin encodeUtf8 :: Producer ByteString IO ()
	238
	239	which would have the effect of
	240
	241	> Text.stdin >-> Pipes.Prelude.map (TE.encodeUtf8)
	242
	243	using the encoding functions from Data.Text.Encoding
	244	-}
	245
10cfd90e	246	encodeUtf8 :: Monad m => Text -> Producer' ByteString m ()
0ac0c414	247	encodeUtf8 = yield . TE.encodeUtf8
10cfd90e	248	encodeUtf16LE :: Monad m => Text -> Producer' ByteString m ()
0ac0c414	249	encodeUtf16LE = yield . TE.encodeUtf16LE
10cfd90e	250	encodeUtf16BE :: Monad m => Text -> Producer' ByteString m ()
0ac0c414	251	encodeUtf16BE = yield . TE.encodeUtf16BE
10cfd90e	252	encodeUtf32LE :: Monad m => Text -> Producer' ByteString m ()
0ac0c414	253	encodeUtf32LE = yield . TE.encodeUtf32LE
10cfd90e	254	encodeUtf32BE :: Monad m => Text -> Producer' ByteString m ()
0ac0c414	255	encodeUtf32BE = yield . TE.encodeUtf32BE
0ac0c414	256
bbdfd305	257	mkCodec :: (forall r m . Monad m =>
	258	Producer ByteString m r -> Producer Text m (Producer ByteString m r ))
	259	-> (Text -> ByteString)
	260	-> Codec
	261	mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc))) (k (dec p0))
	262
	263
bbdfd305	264
fafcbeb5	265	{- $ascii
	266	ascii and latin encodings only use a small number of the characters 'Text'
	267	recognizes; thus we cannot use the pipes @Lens@ style to work with them.
bbdfd305	268	Rather we simply define functions each way.
bbdfd305	269	-}
bbdfd305	270
fafcbeb5	271
0ac0c414	272	-- \| 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream,
fafcbeb5	273	-- returning the rest of the 'Text' at the first non-ascii 'Char'
fafcbeb5	274
bbdfd305	275	encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
	276	encodeAscii = go where
	277	go p = do e <- lift (next p)
	278	case e of
	279	Left r -> return (return r)
	280	Right (chunk, p') ->
	281	if T.null chunk
	282	then go p'
	283	else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
	284	in do yield (B8.pack (T.unpack safe))
	285	if T.null unsafe
	286	then go p'
	287	else return $ do yield unsafe
	288	p'
	289
	290	{- \| Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
	291	returning the rest of the 'Text' upon hitting any non-latin 'Char'
	292	-}
	293	encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
	294	encodeIso8859_1 = go where
	295	go p = do e <- lift (next p)
	296	case e of
	297	Left r -> return (return r)
	298	Right (txt, p') ->
	299	if T.null txt
	300	then go p'
	301	else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
	302	in do yield (B8.pack (T.unpack safe))
	303	if T.null unsafe
	304	then go p'
	305	else return $ do yield unsafe
	306	p'
	307
	308	{- \| Reduce a byte stream to a corresponding stream of ascii chars, returning the
	309	unused 'ByteString' upon hitting an un-ascii byte.
	310	-}
	311	decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
	312	decodeAscii = go where
	313	go p = do e <- lift (next p)
	314	case e of
	315	Left r -> return (return r)
	316	Right (chunk, p') ->
	317	if B.null chunk
	318	then go p'
	319	else let (safe, unsafe) = B.span (<= 0x7F) chunk
	320	in do yield (T.pack (B8.unpack safe))
	321	if B.null unsafe
	322	then go p'
	323	else return (do yield unsafe
	324	p')
	325
	326	{- \| Reduce a byte stream to a corresponding stream of ascii chars, returning the
	327	unused 'ByteString' upon hitting the rare un-latinizable byte.
	328	-}
	329	decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
	330	decodeIso8859_1 = go where
	331	go p = do e <- lift (next p)
	332	case e of
	333	Left r -> return (return r)
	334	Right (chunk, p') ->
	335	if B.null chunk
	336	then go p'
	337	else do let (safe, unsafe) = B.span (<= 0xFF) chunk
	338	yield (T.pack (B8.unpack safe))
339	if B.null unsafe
340	then go p'
341	else return (do yield unsafe
342	p')
343
344
345