diff options
Diffstat (limited to 'examples/zoom.hs')
-rw-r--r-- | examples/zoom.hs | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/examples/zoom.hs b/examples/zoom.hs new file mode 100644 index 0000000..3442dc8 --- /dev/null +++ b/examples/zoom.hs | |||
@@ -0,0 +1,152 @@ | |||
1 | -- this file illustrates several uses of `zoom` | ||
2 | -- one of them is quadratic in the length of the file | ||
3 | -- since it has to decode and encode repeatedly, | ||
4 | -- and is thus no good on long files. | ||
5 | |||
6 | {-# LANGUAGE OverloadedStrings #-} | ||
7 | {-# LANGUAGE BangPatterns#-} | ||
8 | {-# LANGUAGE RankNTypes #-} | ||
9 | import Blaze.ByteString.Builder (Builder, fromByteString, toByteString) | ||
10 | import Control.Exception (Exception) | ||
11 | import Control.Monad.Trans.Class (lift) | ||
12 | import Data.ByteString (ByteString) | ||
13 | import qualified Data.ByteString as S | ||
14 | import qualified Data.ByteString.Lazy as L | ||
15 | import Data.Monoid | ||
16 | import Data.Text (Text) | ||
17 | import qualified Data.Text as T | ||
18 | import qualified Data.Text.Encoding as TEE | ||
19 | import qualified Data.Text.Lazy as TL | ||
20 | import qualified Data.Text.Lazy.Encoding as TLE | ||
21 | |||
22 | import Pipes | ||
23 | import Pipes.Parse | ||
24 | import qualified Pipes.Prelude as PP | ||
25 | import qualified Pipes.ByteString as Bytes | ||
26 | import qualified Pipes.Text as Txt | ||
27 | import Pipes.Text.Encoding (utf8) | ||
28 | |||
29 | import Control.Lens | ||
30 | import Control.Lens.Internal.Zoom | ||
31 | import Control.Monad | ||
32 | import qualified System.IO as IO | ||
33 | import Control.Monad.Trans.Maybe | ||
34 | import Control.Monad.State.Class | ||
35 | |||
36 | main :: IO () | ||
37 | main = do -- S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1 | ||
38 | -- parse_file fileParser0 -- pathological | ||
39 | -- parse_file fileParser1 -- programs | ||
40 | parse_file fileParser2 -- good program | ||
41 | |||
42 | where | ||
43 | parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h -> | ||
44 | do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print | ||
45 | runEffect $ p' >-> PP.print | ||
46 | parseWith parser = loop where | ||
47 | loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p) | ||
48 | case m of Nothing -> return p' | ||
49 | Just file -> do yield file | ||
50 | loop p' | ||
51 | fp = "encoded.fileformat" | ||
52 | contents n = (toByteString . mconcat . replicate n . encodeFiles) input | ||
53 | <> S.pack (replicate 10 250) | ||
54 | |||
55 | |||
56 | |||
57 | fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File | ||
58 | fileParser0 = do (name, len) <- zoom utf8 parseText | ||
59 | contents <- zoom (Bytes.splitAt len) (lift drawAll) | ||
60 | return (File name (S.concat contents)) | ||
61 | where | ||
62 | -- this parser aggregates all Text parsing into one preliminary parser | ||
63 | -- which is then applied with `zoom utf8` | ||
64 | -- we cannot tell in advance how long, e.g. the file name will be | ||
65 | parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int) | ||
66 | parseText = do nameLength <- parseNumber | ||
67 | names <- zoom (Txt.splitAt nameLength) $ (lift drawAll) | ||
68 | contentLength <- parseNumber | ||
69 | return $! (T.concat names, contentLength) | ||
70 | |||
71 | -- here we disaggregate the little Text parsers but still apply them with `zoom utf8` | ||
72 | -- this makes no difference | ||
73 | fileParser1 = do nameLength <- zoom utf8 parseNumber | ||
74 | names <- zoom (utf8 . Txt.splitAt nameLength) (lift drawAll) | ||
75 | contentLength <- zoom utf8 parseNumber | ||
76 | contents <- zoom (Bytes.splitAt contentLength) (lift drawAll) | ||
77 | return (File (T.concat names) (S.concat contents)) | ||
78 | |||
79 | -- this is the good program; be reflecting on the fact that file names | ||
80 | -- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10 | ||
81 | -- we can restrict the length of the byte stream to which we apply `zoom utf8` | ||
82 | fileParser2 = do nameLength <- Bytes.splitAt 3 ~~> utf8 ~~> parseNumber | ||
83 | names <- Bytes.splitAt nameLength ~~> utf8 ~~> lift drawAll | ||
84 | len <- Bytes.splitAt 10 ~~> utf8 ~~> parseNumber | ||
85 | contents <- Bytes.splitAt len ~~> lift drawAll | ||
86 | return (File (T.concat names) (S.concat contents)) | ||
87 | |||
88 | -- infix lens nonsense | ||
89 | infixr 1 ~~> | ||
90 | (~~>) :: Zoom m n s t | ||
91 | => ((s -> Zoomed n c s) -> t -> Zoomed n c t) | ||
92 | -> m c -> n c | ||
93 | (~~>) = zoom | ||
94 | {-# INLINE (~~>) #-} | ||
95 | |||
96 | parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int | ||
97 | parseNumber = loop 0 where | ||
98 | loop !n = do c <- MaybeT Txt.drawChar | ||
99 | case c of ':' -> return n | ||
100 | _ -> do guard ('0' <= c && c <= '9') | ||
101 | loop $! n * 10 + (fromEnum c - fromEnum '0') | ||
102 | |||
103 | |||
104 | |||
105 | -- --- Michael S's `File` type and its binary encoding, etc. | ||
106 | |||
107 | |||
108 | data File = File | ||
109 | { fileName :: !Text | ||
110 | , fileContents :: !ByteString | ||
111 | } | ||
112 | deriving Show | ||
113 | |||
114 | encodeFile :: File -> Builder | ||
115 | encodeFile (File name contents) = | ||
116 | tellLength (S.length bytesname) <> | ||
117 | fromByteString bytesname <> | ||
118 | tellLength (S.length contents) <> | ||
119 | fromByteString contents | ||
120 | where | ||
121 | tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":")) | ||
122 | bytesname = TEE.encodeUtf8 name | ||
123 | |||
124 | encodeFiles :: [File] -> Builder | ||
125 | encodeFiles = mconcat . map encodeFile | ||
126 | |||
127 | input :: [File] | ||
128 | input = | ||
129 | [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8" | ||
130 | , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16" | ||
131 | , File "binary.dat" "we'll pretend to be binary" | ||
132 | ] | ||
133 | |||
134 | |||
135 | --- | ||
136 | |||
137 | -- This desperate scheme actually has some efficacy, if used before `utf8` in a zoom | ||
138 | -- but not much | ||
139 | |||
140 | chunk :: Monad m => Int -> Lens' (Producer ByteString m r) (Producer ByteString m r) | ||
141 | chunk n = lens (chunkyN n) (\_ b -> b) where | ||
142 | |||
143 | chunkyN :: Monad m => Int -> Producer ByteString m r -> Producer ByteString m r | ||
144 | chunkyN n = prod_loop where | ||
145 | |||
146 | prod_loop p = do mbs <- lift $ next p | ||
147 | case mbs of Left r -> return r | ||
148 | Right (bs, p') -> do bs_loop bs | ||
149 | prod_loop p' | ||
150 | bs_loop bs = unless (S.null bs) $ do yield fore | ||
151 | unless (S.null aft) (bs_loop aft) | ||
152 | where (fore, aft) = S.splitAt n bs | ||