-{- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are
- 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
- individual 'Codec' definitions follow the enumerator and conduit libraries.
-
- Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
- to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
- 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
- better implementation.
-
- -}
-codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
-codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
- (k (decoder (dec B.empty) p0) ) where
- decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
- decoder !d p0 = case d of
- PI.Other txt bad -> do yield txt
- return (do yield bad
- p0)
- PI.Some txt extra dec -> do yield txt
- x <- lift (next p0)
- case x of Left r -> return (do yield extra
- return r)
- Right (chunk,p1) -> decoder (dec chunk) p1
-
-{- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
- use the pipes 'Lens' style to work with them. Rather we simply define functions
- each way.
-
- 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
- returning the rest of the 'Text' at the first non-ascii 'Char'
--}
-encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
-encodeAscii = go where
- go p = do echunk <- lift (next p)
- case echunk of
- Left r -> return (return r)
- Right (chunk, p') ->
- if T.null chunk
- then go p'
- else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
- in do yield (B8.pack (T.unpack safe))
- if T.null unsafe
- then go p'
- else return $ do yield unsafe
- p'
-{- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
- returning the rest of the 'Text' upon hitting any non-latin 'Char'
- -}
-encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
-encodeIso8859_1 = go where
- go p = do etxt <- lift (next p)
- case etxt of
- Left r -> return (return r)
- Right (txt, p') ->
- if T.null txt
- then go p'
- else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
- in do yield (B8.pack (T.unpack safe))
- if T.null unsafe
- then go p'
- else return $ do yield unsafe
- p'
-
-{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
- unused 'ByteString' upon hitting an un-ascii byte.
- -}
-decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
-decodeAscii = go where
- go p = do echunk <- lift (next p)
- case echunk of
- Left r -> return (return r)
- Right (chunk, p') ->
- if B.null chunk
- then go p'
- else let (safe, unsafe) = B.span (<= 0x7F) chunk
- in do yield (T.pack (B8.unpack safe))
- if B.null unsafe
- then go p'
- else return $ do yield unsafe
- p'
-
-{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
- unused 'ByteString' upon hitting the rare un-latinizable byte.
- -}
-decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
-decodeIso8859_1 = go where
- go p = do echunk <- lift (next p)
- case echunk of
- Left r -> return (return r)
- Right (chunk, p') ->
- if B.null chunk
- then go p'
- else let (safe, unsafe) = B.span (<= 0xFF) chunk
- in do yield (T.pack (B8.unpack safe))
- if B.null unsafe
- then go p'
- else return $ do yield unsafe
- p'
-
-
-
-
-