encoding documentation beginning to improve

author: michaelt <what_is_it_to_do_anything@yahoo.com> 2014-02-16 12:19:41 -0500
committer: michaelt <what_is_it_to_do_anything@yahoo.com> 2014-02-16 12:19:41 -0500
commit: fafcbeb516fda29cae18b61f84cc79b3e688f79c (patch)
tree: ad916a67eced3e0e2c3f37aa882b200516f1cb7e
parent: 3f76b550da195af30d32f611d55b33e0651cd0e8 (diff)
download: text-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.tar.gz
text-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.tar.zst
text-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.zip
1 files changed, 56 insertions, 17 deletions
diff --git a/Pipes/Text/Encoding.hs b/Pipes/Text/Encoding.hs
index 21269cf..e07c47e 100644
--- a/Pipes/Text/Encoding.hs
+++ b/Pipes/Text/Encoding.hs
@@ -1,24 +1,31 @@
 {-# LANGUAGE RankNTypes, BangPatterns #-}
 -- |
 -- This module uses the stream decoding functions from the text-stream-decoding package
-- to define pipes decoding functions and lenses.
+-- to define decoding functions and lenses.
 module Pipes.Text.Encoding
-    ( Codec
+    ( 
+    -- * Lens type
+    -- $lenses
+    Codec
+    -- * Standard lenses for viewing Text in ByteString
+    -- $codecs
    , utf8
    , utf8Pure
    , utf16LE
    , utf16BE
    , utf32LE
    , utf32BE
+    -- * Non-lens decoding functions 
    , decodeUtf8
    , decodeUtf8Pure
    , decodeUtf16LE
    , decodeUtf16BE
    , decodeUtf32LE
    , decodeUtf32BE
+    -- * Functions for latin and ascii text
+    -- $ascii
    , encodeAscii
    , decodeAscii
    , encodeIso8859_1
@@ -38,13 +45,22 @@ import Control.Monad (join)
 import Data.Word (Word8)
 import Pipes
 type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
-{- | A 'Codec' is just an improper lens into a byte stream that is expected to contain text.
+{- $lenses
-    They are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc.
+    The 'Codec' type is just an aliased standard Prelude type. It just specializes 
-    The stream of text they 'see' in a bytestream ends by returning the original byte stream 
+    the @Lens'@ type synonymn used by the standard lens libraries, @lens@ and 
-    beginning at the point of failure, or the empty bytestream with its return value.
+    @lens-families@ . You use them with
-   -}
+    the @view@ or @(^.)@ and @zoom@ functions from those libraries.
+    
+    Each codec lens looks into a byte stream that is understood to contain text.
+    The stream of text it 'sees' in the stream of bytes begins at its head; it ends 
+    by reverting to (returning) the original byte stream 
+    beginning at the point of decoding failure. Where there is no decoding failure, 
+    it returns an empty byte stream with its return value. 
+    -}
 type Codec
    =  forall m r
    .  Monad m
@@ -66,6 +82,9 @@ decodeStream = loop where
                                                                 p')
 {-# INLINABLE decodeStream#-}
 decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
 decodeUtf8 = decodeStream streamUtf8
 {-# INLINE decodeUtf8 #-}
@@ -97,10 +116,28 @@ mkCodec :: (forall r m . Monad m =>
 mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc)))  (k (dec p0))
-{- | An improper lens into a byte stream expected to be UTF-8 encoded; the associated
+{- $codecs
-   text stream ends by returning the original bytestream beginning at the point of failure,
+    
-   or the empty bytestring for a well-encoded text. 
+    The particular \'Codec\' lenses are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc.
-   -}
+>   view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+>   Bytes.stdin ^. utf8 ::  Producer Text IO (Producer ByteString IO r)
+    @zoom@ converts a Text parser into a ByteString parser:
+>   zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char)
+> 
+>   withNextByte :: Parser ByteString m (Maybe Char, Maybe Word8))) 
+>   withNextByte = do char_ <- zoom utf8 Text.drawChar
+>                     byte_ <- Bytes.peekByte
+>                     return (char_, byte_)
+     @withNextByte@ will return the first valid Char in a ByteString, 
+     and the first byte of the next character, if they exists. Because 
+     we \'draw\' one and \'peek\' at the other, the parser as a whole only 
+     advances one Char's length along the bytestring.
+    -}
 utf8 :: Codec
 utf8 = mkCodec decodeUtf8 TE.encodeUtf8
@@ -121,14 +158,16 @@ utf32BE :: Codec
 utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE
-{- | ascii and latin encodings only use a small number of the characters 'Text'
+{- $ascii
-     recognizes; thus we cannot use the pipes 'Lens' style to work with them. 
+   ascii and latin encodings only use a small number of the characters 'Text'
+     recognizes; thus we cannot use the pipes @Lens@ style to work with them. 
     Rather we simply define functions each way. 
-     'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
-     returning the rest of the 'Text' at the first non-ascii 'Char'
 -}
+--  'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream,
+--   returning the rest of the 'Text' at the first non-ascii 'Char'
 encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
 encodeAscii = go where
  go p = do e <- lift (next p)
author	michaelt <what_is_it_to_do_anything@yahoo.com>	2014-02-16 12:19:41 -0500
committer	michaelt <what_is_it_to_do_anything@yahoo.com>	2014-02-16 12:19:41 -0500
commit	fafcbeb516fda29cae18b61f84cc79b3e688f79c (patch)
tree	ad916a67eced3e0e2c3f37aa882b200516f1cb7e
parent	3f76b550da195af30d32f611d55b33e0651cd0e8 (diff)
download	text-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.tar.gz text-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.tar.zst text-pipes-fafcbeb516fda29cae18b61f84cc79b3e688f79c.zip

diff --git a/Pipes/Text/Encoding.hs b/Pipes/Text/Encoding.hs index 21269cf..e07c47e 100644 --- a/Pipes/Text/Encoding.hs +++ b/Pipes/Text/Encoding.hs
@@ -1,24 +1,31 @@
1
2	{-# LANGUAGE RankNTypes, BangPatterns #-}	1	{-# LANGUAGE RankNTypes, BangPatterns #-}
3	-- \|	2	-- \|
4		3
5	-- This module uses the stream decoding functions from the text-stream-decoding package	4	-- This module uses the stream decoding functions from the text-stream-decoding package
6	-- to define pipes decoding functions and lenses.	5	-- to define decoding functions and lenses.
7		6
8	module Pipes.Text.Encoding	7	module Pipes.Text.Encoding
9	( Codec	8	(
		9	-- * Lens type
		10	-- $lenses
		11	Codec
		12	-- * Standard lenses for viewing Text in ByteString
		13	-- $codecs
10	, utf8	14	, utf8
11	, utf8Pure	15	, utf8Pure
12	, utf16LE	16	, utf16LE
13	, utf16BE	17	, utf16BE
14	, utf32LE	18	, utf32LE
15	, utf32BE	19	, utf32BE
		20	-- * Non-lens decoding functions
16	, decodeUtf8	21	, decodeUtf8
17	, decodeUtf8Pure	22	, decodeUtf8Pure
18	, decodeUtf16LE	23	, decodeUtf16LE
19	, decodeUtf16BE	24	, decodeUtf16BE
20	, decodeUtf32LE	25	, decodeUtf32LE
21	, decodeUtf32BE	26	, decodeUtf32BE
		27	-- * Functions for latin and ascii text
		28	-- $ascii
22	, encodeAscii	29	, encodeAscii
23	, decodeAscii	30	, decodeAscii
24	, encodeIso8859_1	31	, encodeIso8859_1
@@ -38,13 +45,22 @@ import Control.Monad (join)
38	import Data.Word (Word8)	45	import Data.Word (Word8)
39	import Pipes	46	import Pipes
40		47
		48
41	type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)	49	type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
42		50
43	{- \| A 'Codec' is just an improper lens into a byte stream that is expected to contain text.	51	{- $lenses
44	They are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc.	52	The 'Codec' type is just an aliased standard Prelude type. It just specializes
45	The stream of text they 'see' in a bytestream ends by returning the original byte stream	53	the @Lens'@ type synonymn used by the standard lens libraries, @lens@ and
46	beginning at the point of failure, or the empty bytestream with its return value.	54	@lens-families@ . You use them with
47	-}	55	the @view@ or @(^.)@ and @zoom@ functions from those libraries.
		56
		57	Each codec lens looks into a byte stream that is understood to contain text.
		58	The stream of text it 'sees' in the stream of bytes begins at its head; it ends
		59	by reverting to (returning) the original byte stream
		60	beginning at the point of decoding failure. Where there is no decoding failure,
		61	it returns an empty byte stream with its return value.
		62	-}
		63
48	type Codec	64	type Codec
49	= forall m r	65	= forall m r
50	. Monad m	66	. Monad m
@@ -66,6 +82,9 @@ decodeStream = loop where
66	p')	82	p')
67	{-# INLINABLE decodeStream#-}	83	{-# INLINABLE decodeStream#-}
68		84
		85
		86
		87
69	decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)	88	decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
70	decodeUtf8 = decodeStream streamUtf8	89	decodeUtf8 = decodeStream streamUtf8
71	{-# INLINE decodeUtf8 #-}	90	{-# INLINE decodeUtf8 #-}
@@ -97,10 +116,28 @@ mkCodec :: (forall r m . Monad m =>
97	mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc))) (k (dec p0))	116	mkCodec dec enc = \k p0 -> fmap (\p -> join (for p (yield . enc))) (k (dec p0))
98		117
99		118
100	{- \| An improper lens into a byte stream expected to be UTF-8 encoded; the associated	119	{- $codecs
101	text stream ends by returning the original bytestream beginning at the point of failure,	120
102	or the empty bytestring for a well-encoded text.	121	The particular \'Codec\' lenses are named in accordance with the expected encoding, 'utf8', 'utf16LE' etc.
103	-}	122
		123	> view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
		124	> Bytes.stdin ^. utf8 :: Producer Text IO (Producer ByteString IO r)
		125
		126	@zoom@ converts a Text parser into a ByteString parser:
		127
		128	> zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char)
		129	>
		130	> withNextByte :: Parser ByteString m (Maybe Char, Maybe Word8)))
		131	> withNextByte = do char_ <- zoom utf8 Text.drawChar
		132	> byte_ <- Bytes.peekByte
		133	> return (char_, byte_)
		134
		135	@withNextByte@ will return the first valid Char in a ByteString,
		136	and the first byte of the next character, if they exists. Because
		137	we \'draw\' one and \'peek\' at the other, the parser as a whole only
		138	advances one Char's length along the bytestring.
		139
		140	-}
104		141
105	utf8 :: Codec	142	utf8 :: Codec
106	utf8 = mkCodec decodeUtf8 TE.encodeUtf8	143	utf8 = mkCodec decodeUtf8 TE.encodeUtf8
@@ -121,14 +158,16 @@ utf32BE :: Codec
121	utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE	158	utf32BE = mkCodec decodeUtf32BE TE.encodeUtf32BE
122		159
123		160
124	{- \| ascii and latin encodings only use a small number of the characters 'Text'	161	{- $ascii
125	recognizes; thus we cannot use the pipes 'Lens' style to work with them.	162	ascii and latin encodings only use a small number of the characters 'Text'
		163	recognizes; thus we cannot use the pipes @Lens@ style to work with them.
126	Rather we simply define functions each way.	164	Rather we simply define functions each way.
127
128	'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
129	returning the rest of the 'Text' at the first non-ascii 'Char'
130	-}	165	-}
131		166
		167
		168	-- 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream,
		169	-- returning the rest of the 'Text' at the first non-ascii 'Char'
		170
132	encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)	171	encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
133	encodeAscii = go where	172	encodeAscii = go where
134	go p = do e <- lift (next p)	173	go p = do e <- lift (next p)