]>
Commit | Line | Data |
---|---|---|
955edd33 | 1 | -- this file illustrates several uses of `zoom` |
2 | -- one of them is quadratic in the length of the file | |
3 | -- since it has to decode and encode repeatedly, | |
4 | -- and is thus no good on long files. | |
5 | ||
6 | {-# LANGUAGE OverloadedStrings #-} | |
7 | {-# LANGUAGE BangPatterns#-} | |
8 | {-# LANGUAGE RankNTypes #-} | |
9 | import Blaze.ByteString.Builder (Builder, fromByteString, toByteString) | |
10 | import Control.Exception (Exception) | |
11 | import Control.Monad.Trans.Class (lift) | |
12 | import Data.ByteString (ByteString) | |
13 | import qualified Data.ByteString as S | |
14 | import qualified Data.ByteString.Lazy as L | |
15 | import Data.Monoid | |
16 | import Data.Text (Text) | |
17 | import qualified Data.Text as T | |
18 | import qualified Data.Text.Encoding as TEE | |
19 | import qualified Data.Text.Lazy as TL | |
20 | import qualified Data.Text.Lazy.Encoding as TLE | |
21 | ||
22 | import Pipes | |
23 | import Pipes.Parse | |
24 | import qualified Pipes.Prelude as PP | |
25 | import qualified Pipes.ByteString as Bytes | |
26 | import qualified Pipes.Text as Txt | |
27 | import Pipes.Text.Encoding (utf8) | |
28 | ||
8197d6e0 | 29 | import Control.Lens -- we use 'zoom' with MonadState, not just StateT |
955edd33 | 30 | import Control.Monad |
31 | import qualified System.IO as IO | |
32 | import Control.Monad.Trans.Maybe | |
33 | import Control.Monad.State.Class | |
34 | ||
35 | main :: IO () | |
8197d6e0 | 36 | main = do S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1 |
955edd33 | 37 | -- parse_file fileParser0 -- pathological |
38 | -- parse_file fileParser1 -- programs | |
39 | parse_file fileParser2 -- good program | |
40 | ||
41 | where | |
42 | parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h -> | |
43 | do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print | |
44 | runEffect $ p' >-> PP.print | |
45 | parseWith parser = loop where | |
46 | loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p) | |
47 | case m of Nothing -> return p' | |
48 | Just file -> do yield file | |
49 | loop p' | |
50 | fp = "encoded.fileformat" | |
51 | contents n = (toByteString . mconcat . replicate n . encodeFiles) input | |
52 | <> S.pack (replicate 10 250) | |
53 | ||
54 | ||
55 | ||
56 | fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File | |
57 | fileParser0 = do (name, len) <- zoom utf8 parseText | |
58 | contents <- zoom (Bytes.splitAt len) (lift drawAll) | |
59 | return (File name (S.concat contents)) | |
60 | where | |
61 | -- this parser aggregates all Text parsing into one preliminary parser | |
62 | -- which is then applied with `zoom utf8` | |
63 | -- we cannot tell in advance how long, e.g. the file name will be | |
64 | parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int) | |
65 | parseText = do nameLength <- parseNumber | |
66 | names <- zoom (Txt.splitAt nameLength) $ (lift drawAll) | |
67 | contentLength <- parseNumber | |
68 | return $! (T.concat names, contentLength) | |
69 | ||
70 | -- here we disaggregate the little Text parsers but still apply them with `zoom utf8` | |
71 | -- this makes no difference | |
72 | fileParser1 = do nameLength <- zoom utf8 parseNumber | |
73 | names <- zoom (utf8 . Txt.splitAt nameLength) (lift drawAll) | |
74 | contentLength <- zoom utf8 parseNumber | |
75 | contents <- zoom (Bytes.splitAt contentLength) (lift drawAll) | |
76 | return (File (T.concat names) (S.concat contents)) | |
77 | ||
8197d6e0 | 78 | -- This is the good program; by reflecting on the fact that file names |
955edd33 | 79 | -- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10 |
80 | -- we can restrict the length of the byte stream to which we apply `zoom utf8` | |
8197d6e0 | 81 | fileParser2 = do nameLength <- zoom (Bytes.splitAt 3 . utf8) parseNumber |
82 | names <- zoom (Bytes.splitAt nameLength . utf8) (lift drawAll) | |
83 | len <- zoom (Bytes.splitAt 10 . utf8) parseNumber | |
84 | contents <- zoom (Bytes.splitAt len) (lift drawAll) | |
955edd33 | 85 | return (File (T.concat names) (S.concat contents)) |
86 | ||
8197d6e0 | 87 | |
955edd33 | 88 | |
89 | parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int | |
90 | parseNumber = loop 0 where | |
91 | loop !n = do c <- MaybeT Txt.drawChar | |
92 | case c of ':' -> return n | |
93 | _ -> do guard ('0' <= c && c <= '9') | |
94 | loop $! n * 10 + (fromEnum c - fromEnum '0') | |
95 | ||
96 | ||
97 | ||
98 | -- --- Michael S's `File` type and its binary encoding, etc. | |
99 | ||
100 | ||
101 | data File = File | |
102 | { fileName :: !Text | |
103 | , fileContents :: !ByteString | |
104 | } | |
105 | deriving Show | |
106 | ||
107 | encodeFile :: File -> Builder | |
108 | encodeFile (File name contents) = | |
109 | tellLength (S.length bytesname) <> | |
110 | fromByteString bytesname <> | |
111 | tellLength (S.length contents) <> | |
112 | fromByteString contents | |
113 | where | |
114 | tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":")) | |
115 | bytesname = TEE.encodeUtf8 name | |
116 | ||
117 | encodeFiles :: [File] -> Builder | |
118 | encodeFiles = mconcat . map encodeFile | |
119 | ||
120 | input :: [File] | |
121 | input = | |
122 | [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8" | |
123 | , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16" | |
124 | , File "binary.dat" "we'll pretend to be binary" | |
125 | ] | |
126 | ||
127 |