tutorial etc

author: michaelt <what_is_it_to_do_anything@yahoo.com> 2014-11-08 22:30:32 -0500
committer: michaelt <what_is_it_to_do_anything@yahoo.com> 2014-11-08 22:30:32 -0500
commit: 955edd331812d8638f57a23ddcc90194ad7002b7 (patch)
tree: 7f832da066a52ec329f26dc793275a029dd00963 /examples/zoom.hs
parent: 2f4a83f82d206217456a035b7e8a1e56c585bfd0 (diff)
download: text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.tar.gz
text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.tar.zst
text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.zip
1 files changed, 152 insertions, 0 deletions
diff --git a/examples/zoom.hs b/examples/zoom.hs
new file mode 100644
index 0000000..3442dc8
--- /dev/null
+++ b/examples/zoom.hs
@@ -0,0 +1,152 @@
+-- this file illustrates several uses of `zoom` 
+-- one of them is quadratic in the length of the file
+-- since it has to decode and encode repeatedly,
+-- and is thus no good on long files. 
+{-# LANGUAGE OverloadedStrings  #-}
+{-# LANGUAGE BangPatterns#-}
+{-# LANGUAGE RankNTypes #-}
+import           Blaze.ByteString.Builder  (Builder, fromByteString, toByteString)
+import           Control.Exception         (Exception)
+import           Control.Monad.Trans.Class (lift)
+import           Data.ByteString           (ByteString)
+import qualified Data.ByteString           as S
+import qualified Data.ByteString.Lazy      as L
+import           Data.Monoid
+import           Data.Text                 (Text)
+import qualified Data.Text                 as T
+import qualified Data.Text.Encoding        as TEE
+import qualified Data.Text.Lazy            as TL
+import qualified Data.Text.Lazy.Encoding   as TLE
+import Pipes
+import Pipes.Parse
+import qualified Pipes.Prelude as PP
+import qualified Pipes.ByteString as Bytes
+import qualified Pipes.Text as Txt
+import Pipes.Text.Encoding (utf8)
+import Control.Lens
+import Control.Lens.Internal.Zoom
+import Control.Monad
+import qualified System.IO as IO
+import Control.Monad.Trans.Maybe
+import Control.Monad.State.Class
+main :: IO ()
+main = do -- S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1
+          -- parse_file fileParser0  -- pathological
+          -- parse_file fileParser1  -- programs
+          parse_file fileParser2  -- good program 
+          
+   where 
+   parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h ->
+                         do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print
+                            runEffect $ p' >-> PP.print
+   parseWith parser = loop where
+      loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p)
+                  case m of Nothing -> return p'
+                            Just file -> do yield file
+                                            loop p'
+   fp = "encoded.fileformat"
+   contents n =  (toByteString . mconcat . replicate n . encodeFiles) input
+                 <> S.pack (replicate 10 250)
+fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
+fileParser0  = do (name, len) <- zoom utf8 parseText
+                  contents    <- zoom (Bytes.splitAt len) (lift drawAll)
+                  return (File name (S.concat contents))
+    where
+    -- this parser aggregates all Text parsing into one preliminary parser
+    -- which is then applied with `zoom utf8`
+    -- we cannot tell in advance how long, e.g. the file name will be
+    parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
+    parseText = do nameLength    <- parseNumber
+                   names         <- zoom (Txt.splitAt nameLength) $ (lift drawAll)
+                   contentLength <- parseNumber
+                   return $! (T.concat names, contentLength)
+-- here we disaggregate the little Text parsers but still apply them with `zoom utf8`
+-- this makes no difference
+fileParser1  = do nameLength    <- zoom utf8 parseNumber
+                  names         <- zoom (utf8 . Txt.splitAt nameLength)  (lift drawAll)
+                  contentLength <- zoom utf8 parseNumber
+                  contents      <- zoom (Bytes.splitAt contentLength) (lift drawAll)
+                  return (File (T.concat names) (S.concat contents))
+-- this is the good program; be reflecting on the fact that file names
+-- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10
+-- we can restrict the length of the byte stream to which we apply `zoom utf8`
+fileParser2  = do nameLength  <- Bytes.splitAt 3 ~~> utf8 ~~> parseNumber
+                  names       <- Bytes.splitAt nameLength ~~> utf8 ~~> lift drawAll
+                  len         <- Bytes.splitAt 10 ~~>  utf8 ~~> parseNumber
+                  contents    <- Bytes.splitAt len ~~> lift drawAll
+                  return (File (T.concat names) (S.concat contents))
+-- infix lens nonsense
+infixr 1 ~~>
+(~~>) :: Zoom m n s t 
+      => ((s -> Zoomed n c s) -> t -> Zoomed n c t)
+      -> m c -> n c
+(~~>) = zoom
+{-# INLINE (~~>) #-}
+parseNumber :: Monad m =>  MaybeT (StateT (Producer Text m x) m) Int
+parseNumber  = loop  0 where
+   loop !n = do c <- MaybeT  Txt.drawChar
+                case c of ':' -> return n
+                          _   -> do guard ('0' <= c && c <= '9')
+                                    loop  $! n * 10 + (fromEnum c - fromEnum '0')
+-- --- Michael S's `File` type and its binary encoding, etc.
+data File = File
+    { fileName     :: !Text
+    , fileContents :: !ByteString
+    }
+    deriving Show
+encodeFile :: File -> Builder
+encodeFile (File name contents) =
+    tellLength (S.length bytesname) <>
+    fromByteString bytesname        <>
+    tellLength (S.length contents)  <>
+    fromByteString contents
+  where
+    tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":"))
+    bytesname = TEE.encodeUtf8 name
+encodeFiles :: [File] -> Builder
+encodeFiles = mconcat . map encodeFile
+input :: [File]
+input =
+    [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
+    , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
+    , File "binary.dat" "we'll pretend to be binary"
+    ]
+---
+-- This desperate scheme actually has some efficacy, if used before `utf8` in a zoom
+-- but not much 
+chunk :: Monad m => Int -> Lens' (Producer ByteString m r) (Producer ByteString m r)
+chunk n = lens (chunkyN n) (\_ b -> b) where
+    chunkyN  :: Monad m => Int -> Producer ByteString m r -> Producer ByteString m r
+    chunkyN n  = prod_loop where
+      prod_loop p = do mbs <- lift $ next p
+                       case mbs of Left r -> return r
+                                   Right (bs, p') -> do bs_loop bs
+                                                        prod_loop p'
+      bs_loop bs = unless (S.null bs) $ do yield fore
+                                           unless (S.null aft)  (bs_loop aft)
+            where (fore, aft) = S.splitAt n bs
author	michaelt <what_is_it_to_do_anything@yahoo.com>	2014-11-08 22:30:32 -0500
committer	michaelt <what_is_it_to_do_anything@yahoo.com>	2014-11-08 22:30:32 -0500
commit	955edd331812d8638f57a23ddcc90194ad7002b7 (patch)
tree	7f832da066a52ec329f26dc793275a029dd00963 /examples/zoom.hs
parent	2f4a83f82d206217456a035b7e8a1e56c585bfd0 (diff)
download	text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.tar.gz text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.tar.zst text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.zip

diff --git a/examples/zoom.hs b/examples/zoom.hs new file mode 100644 index 0000000..3442dc8 --- /dev/null +++ b/examples/zoom.hs
@@ -0,0 +1,152 @@
	1	-- this file illustrates several uses of `zoom`
	2	-- one of them is quadratic in the length of the file
	3	-- since it has to decode and encode repeatedly,
	4	-- and is thus no good on long files.
	5
	6	{-# LANGUAGE OverloadedStrings #-}
	7	{-# LANGUAGE BangPatterns#-}
	8	{-# LANGUAGE RankNTypes #-}
	9	import Blaze.ByteString.Builder (Builder, fromByteString, toByteString)
	10	import Control.Exception (Exception)
	11	import Control.Monad.Trans.Class (lift)
	12	import Data.ByteString (ByteString)
	13	import qualified Data.ByteString as S
	14	import qualified Data.ByteString.Lazy as L
	15	import Data.Monoid
	16	import Data.Text (Text)
	17	import qualified Data.Text as T
	18	import qualified Data.Text.Encoding as TEE
	19	import qualified Data.Text.Lazy as TL
	20	import qualified Data.Text.Lazy.Encoding as TLE
	21
	22	import Pipes
	23	import Pipes.Parse
	24	import qualified Pipes.Prelude as PP
	25	import qualified Pipes.ByteString as Bytes
	26	import qualified Pipes.Text as Txt
	27	import Pipes.Text.Encoding (utf8)
	28
	29	import Control.Lens
	30	import Control.Lens.Internal.Zoom
	31	import Control.Monad
	32	import qualified System.IO as IO
	33	import Control.Monad.Trans.Maybe
	34	import Control.Monad.State.Class
	35
	36	main :: IO ()
	37	main = do -- S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1
	38	-- parse_file fileParser0 -- pathological
	39	-- parse_file fileParser1 -- programs
	40	parse_file fileParser2 -- good program
	41
	42	where
	43	parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h ->
	44	do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print
	45	runEffect $ p' >-> PP.print
	46	parseWith parser = loop where
	47	loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p)
	48	case m of Nothing -> return p'
	49	Just file -> do yield file
	50	loop p'
	51	fp = "encoded.fileformat"
	52	contents n = (toByteString . mconcat . replicate n . encodeFiles) input
	53	<> S.pack (replicate 10 250)
	54
	55
	56
	57	fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
	58	fileParser0 = do (name, len) <- zoom utf8 parseText
	59	contents <- zoom (Bytes.splitAt len) (lift drawAll)
	60	return (File name (S.concat contents))
	61	where
	62	-- this parser aggregates all Text parsing into one preliminary parser
	63	-- which is then applied with `zoom utf8`
	64	-- we cannot tell in advance how long, e.g. the file name will be
	65	parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
	66	parseText = do nameLength <- parseNumber
	67	names <- zoom (Txt.splitAt nameLength) $ (lift drawAll)
	68	contentLength <- parseNumber
	69	return $! (T.concat names, contentLength)
	70
	71	-- here we disaggregate the little Text parsers but still apply them with `zoom utf8`
	72	-- this makes no difference
	73	fileParser1 = do nameLength <- zoom utf8 parseNumber
	74	names <- zoom (utf8 . Txt.splitAt nameLength) (lift drawAll)
	75	contentLength <- zoom utf8 parseNumber
	76	contents <- zoom (Bytes.splitAt contentLength) (lift drawAll)
	77	return (File (T.concat names) (S.concat contents))
	78
	79	-- this is the good program; be reflecting on the fact that file names
	80	-- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10
	81	-- we can restrict the length of the byte stream to which we apply `zoom utf8`
	82	fileParser2 = do nameLength <- Bytes.splitAt 3 ~~> utf8 ~~> parseNumber
	83	names <- Bytes.splitAt nameLength ~~> utf8 ~~> lift drawAll
	84	len <- Bytes.splitAt 10 ~~> utf8 ~~> parseNumber
	85	contents <- Bytes.splitAt len ~~> lift drawAll
	86	return (File (T.concat names) (S.concat contents))
	87
	88	-- infix lens nonsense
	89	infixr 1 ~~>
	90	(~~>) :: Zoom m n s t
	91	=> ((s -> Zoomed n c s) -> t -> Zoomed n c t)
	92	-> m c -> n c
	93	(~~>) = zoom
	94	{-# INLINE (~~>) #-}
	95
	96	parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int
	97	parseNumber = loop 0 where
	98	loop !n = do c <- MaybeT Txt.drawChar
	99	case c of ':' -> return n
	100	_ -> do guard ('0' <= c && c <= '9')
	101	loop $! n * 10 + (fromEnum c - fromEnum '0')
	102
	103
	104
	105	-- --- Michael S's `File` type and its binary encoding, etc.
	106
	107
	108	data File = File
	109	{ fileName :: !Text
	110	, fileContents :: !ByteString
	111	}
	112	deriving Show
	113
	114	encodeFile :: File -> Builder
	115	encodeFile (File name contents) =
	116	tellLength (S.length bytesname) <>
	117	fromByteString bytesname <>
	118	tellLength (S.length contents) <>
	119	fromByteString contents
	120	where
	121	tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":"))
	122	bytesname = TEE.encodeUtf8 name
	123
	124	encodeFiles :: [File] -> Builder
	125	encodeFiles = mconcat . map encodeFile
	126
	127	input :: [File]
	128	input =
	129	[ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
	130	, File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
	131	, File "binary.dat" "we'll pretend to be binary"
	132	]
	133
	134
	135	---
	136
	137	-- This desperate scheme actually has some efficacy, if used before `utf8` in a zoom
	138	-- but not much
	139
	140	chunk :: Monad m => Int -> Lens' (Producer ByteString m r) (Producer ByteString m r)
	141	chunk n = lens (chunkyN n) (\_ b -> b) where
	142
	143	chunkyN :: Monad m => Int -> Producer ByteString m r -> Producer ByteString m r
	144	chunkyN n = prod_loop where
	145
	146	prod_loop p = do mbs <- lift $ next p
	147	case mbs of Left r -> return r
	148	Right (bs, p') -> do bs_loop bs
	149	prod_loop p'
	150	bs_loop bs = unless (S.null bs) $ do yield fore
	151	unless (S.null aft) (bs_loop aft)
	152	where (fore, aft) = S.splitAt n bs