aboutsummaryrefslogtreecommitdiffhomepage
path: root/examples/zoom.hs
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2014-11-08 22:30:32 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2014-11-08 22:30:32 -0500
commit955edd331812d8638f57a23ddcc90194ad7002b7 (patch)
tree7f832da066a52ec329f26dc793275a029dd00963 /examples/zoom.hs
parent2f4a83f82d206217456a035b7e8a1e56c585bfd0 (diff)
downloadtext-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.tar.gz
text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.tar.zst
text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.zip
tutorial etc
Diffstat (limited to 'examples/zoom.hs')
-rw-r--r--examples/zoom.hs152
1 files changed, 152 insertions, 0 deletions
diff --git a/examples/zoom.hs b/examples/zoom.hs
new file mode 100644
index 0000000..3442dc8
--- /dev/null
+++ b/examples/zoom.hs
@@ -0,0 +1,152 @@
1-- this file illustrates several uses of `zoom`
2-- one of them is quadratic in the length of the file
3-- since it has to decode and encode repeatedly,
4-- and is thus no good on long files.
5
6{-# LANGUAGE OverloadedStrings #-}
7{-# LANGUAGE BangPatterns#-}
8{-# LANGUAGE RankNTypes #-}
9import Blaze.ByteString.Builder (Builder, fromByteString, toByteString)
10import Control.Exception (Exception)
11import Control.Monad.Trans.Class (lift)
12import Data.ByteString (ByteString)
13import qualified Data.ByteString as S
14import qualified Data.ByteString.Lazy as L
15import Data.Monoid
16import Data.Text (Text)
17import qualified Data.Text as T
18import qualified Data.Text.Encoding as TEE
19import qualified Data.Text.Lazy as TL
20import qualified Data.Text.Lazy.Encoding as TLE
21
22import Pipes
23import Pipes.Parse
24import qualified Pipes.Prelude as PP
25import qualified Pipes.ByteString as Bytes
26import qualified Pipes.Text as Txt
27import Pipes.Text.Encoding (utf8)
28
29import Control.Lens
30import Control.Lens.Internal.Zoom
31import Control.Monad
32import qualified System.IO as IO
33import Control.Monad.Trans.Maybe
34import Control.Monad.State.Class
35
36main :: IO ()
37main = do -- S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1
38 -- parse_file fileParser0 -- pathological
39 -- parse_file fileParser1 -- programs
40 parse_file fileParser2 -- good program
41
42 where
43 parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h ->
44 do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print
45 runEffect $ p' >-> PP.print
46 parseWith parser = loop where
47 loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p)
48 case m of Nothing -> return p'
49 Just file -> do yield file
50 loop p'
51 fp = "encoded.fileformat"
52 contents n = (toByteString . mconcat . replicate n . encodeFiles) input
53 <> S.pack (replicate 10 250)
54
55
56
57fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
58fileParser0 = do (name, len) <- zoom utf8 parseText
59 contents <- zoom (Bytes.splitAt len) (lift drawAll)
60 return (File name (S.concat contents))
61 where
62 -- this parser aggregates all Text parsing into one preliminary parser
63 -- which is then applied with `zoom utf8`
64 -- we cannot tell in advance how long, e.g. the file name will be
65 parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
66 parseText = do nameLength <- parseNumber
67 names <- zoom (Txt.splitAt nameLength) $ (lift drawAll)
68 contentLength <- parseNumber
69 return $! (T.concat names, contentLength)
70
71-- here we disaggregate the little Text parsers but still apply them with `zoom utf8`
72-- this makes no difference
73fileParser1 = do nameLength <- zoom utf8 parseNumber
74 names <- zoom (utf8 . Txt.splitAt nameLength) (lift drawAll)
75 contentLength <- zoom utf8 parseNumber
76 contents <- zoom (Bytes.splitAt contentLength) (lift drawAll)
77 return (File (T.concat names) (S.concat contents))
78
79-- this is the good program; be reflecting on the fact that file names
80-- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10
81-- we can restrict the length of the byte stream to which we apply `zoom utf8`
82fileParser2 = do nameLength <- Bytes.splitAt 3 ~~> utf8 ~~> parseNumber
83 names <- Bytes.splitAt nameLength ~~> utf8 ~~> lift drawAll
84 len <- Bytes.splitAt 10 ~~> utf8 ~~> parseNumber
85 contents <- Bytes.splitAt len ~~> lift drawAll
86 return (File (T.concat names) (S.concat contents))
87
88-- infix lens nonsense
89infixr 1 ~~>
90(~~>) :: Zoom m n s t
91 => ((s -> Zoomed n c s) -> t -> Zoomed n c t)
92 -> m c -> n c
93(~~>) = zoom
94{-# INLINE (~~>) #-}
95
96parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int
97parseNumber = loop 0 where
98 loop !n = do c <- MaybeT Txt.drawChar
99 case c of ':' -> return n
100 _ -> do guard ('0' <= c && c <= '9')
101 loop $! n * 10 + (fromEnum c - fromEnum '0')
102
103
104
105-- --- Michael S's `File` type and its binary encoding, etc.
106
107
108data File = File
109 { fileName :: !Text
110 , fileContents :: !ByteString
111 }
112 deriving Show
113
114encodeFile :: File -> Builder
115encodeFile (File name contents) =
116 tellLength (S.length bytesname) <>
117 fromByteString bytesname <>
118 tellLength (S.length contents) <>
119 fromByteString contents
120 where
121 tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":"))
122 bytesname = TEE.encodeUtf8 name
123
124encodeFiles :: [File] -> Builder
125encodeFiles = mconcat . map encodeFile
126
127input :: [File]
128input =
129 [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
130 , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
131 , File "binary.dat" "we'll pretend to be binary"
132 ]
133
134
135---
136
137-- This desperate scheme actually has some efficacy, if used before `utf8` in a zoom
138-- but not much
139
140chunk :: Monad m => Int -> Lens' (Producer ByteString m r) (Producer ByteString m r)
141chunk n = lens (chunkyN n) (\_ b -> b) where
142
143 chunkyN :: Monad m => Int -> Producer ByteString m r -> Producer ByteString m r
144 chunkyN n = prod_loop where
145
146 prod_loop p = do mbs <- lift $ next p
147 case mbs of Left r -> return r
148 Right (bs, p') -> do bs_loop bs
149 prod_loop p'
150 bs_loop bs = unless (S.null bs) $ do yield fore
151 unless (S.null aft) (bs_loop aft)
152 where (fore, aft) = S.splitAt n bs