]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - examples/zoom.hs
detritus
[github/fretlink/text-pipes.git] / examples / zoom.hs
1 -- this file illustrates several uses of `zoom`
2 -- one of them is quadratic in the length of the file
3 -- since it has to decode and encode repeatedly,
4 -- and is thus no good on long files.
5
6 {-# LANGUAGE OverloadedStrings #-}
7 {-# LANGUAGE BangPatterns#-}
8 {-# LANGUAGE RankNTypes #-}
9 import Blaze.ByteString.Builder (Builder, fromByteString, toByteString)
10 import Control.Exception (Exception)
11 import Control.Monad.Trans.Class (lift)
12 import Data.ByteString (ByteString)
13 import qualified Data.ByteString as S
14 import qualified Data.ByteString.Lazy as L
15 import Data.Monoid
16 import Data.Text (Text)
17 import qualified Data.Text as T
18 import qualified Data.Text.Encoding as TEE
19 import qualified Data.Text.Lazy as TL
20 import qualified Data.Text.Lazy.Encoding as TLE
21
22 import Pipes
23 import Pipes.Parse
24 import qualified Pipes.Prelude as PP
25 import qualified Pipes.ByteString as Bytes
26 import qualified Pipes.Text as Txt
27 import Pipes.Text.Encoding (utf8)
28
29 import Control.Lens -- we use 'zoom' with MonadState, not just StateT
30 import Control.Monad
31 import qualified System.IO as IO
32 import Control.Monad.Trans.Maybe
33 import Control.Monad.State.Class
34
35 main :: IO ()
36 main = do S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1
37 -- parse_file fileParser0 -- pathological
38 -- parse_file fileParser1 -- programs
39 parse_file fileParser2 -- good program
40
41 where
42 parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h ->
43 do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print
44 runEffect $ p' >-> PP.print
45 parseWith parser = loop where
46 loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p)
47 case m of Nothing -> return p'
48 Just file -> do yield file
49 loop p'
50 fp = "encoded.fileformat"
51 contents n = (toByteString . mconcat . replicate n . encodeFiles) input
52 <> S.pack (replicate 10 250)
53
54
55
56 fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
57 fileParser0 = do (name, len) <- zoom utf8 parseText
58 contents <- zoom (Bytes.splitAt len) (lift drawAll)
59 return (File name (S.concat contents))
60 where
61 -- this parser aggregates all Text parsing into one preliminary parser
62 -- which is then applied with `zoom utf8`
63 -- we cannot tell in advance how long, e.g. the file name will be
64 parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
65 parseText = do nameLength <- parseNumber
66 names <- zoom (Txt.splitAt nameLength) $ (lift drawAll)
67 contentLength <- parseNumber
68 return $! (T.concat names, contentLength)
69
70 -- here we disaggregate the little Text parsers but still apply them with `zoom utf8`
71 -- this makes no difference
72 fileParser1 = do nameLength <- zoom utf8 parseNumber
73 names <- zoom (utf8 . Txt.splitAt nameLength) (lift drawAll)
74 contentLength <- zoom utf8 parseNumber
75 contents <- zoom (Bytes.splitAt contentLength) (lift drawAll)
76 return (File (T.concat names) (S.concat contents))
77
78 -- This is the good program; by reflecting on the fact that file names
79 -- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10
80 -- we can restrict the length of the byte stream to which we apply `zoom utf8`
81 fileParser2 = do nameLength <- zoom (Bytes.splitAt 3 . utf8) parseNumber
82 names <- zoom (Bytes.splitAt nameLength . utf8) (lift drawAll)
83 len <- zoom (Bytes.splitAt 10 . utf8) parseNumber
84 contents <- zoom (Bytes.splitAt len) (lift drawAll)
85 return (File (T.concat names) (S.concat contents))
86
87
88
89 parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int
90 parseNumber = loop 0 where
91 loop !n = do c <- MaybeT Txt.drawChar
92 case c of ':' -> return n
93 _ -> do guard ('0' <= c && c <= '9')
94 loop $! n * 10 + (fromEnum c - fromEnum '0')
95
96
97
98 -- --- Michael S's `File` type and its binary encoding, etc.
99
100
101 data File = File
102 { fileName :: !Text
103 , fileContents :: !ByteString
104 }
105 deriving Show
106
107 encodeFile :: File -> Builder
108 encodeFile (File name contents) =
109 tellLength (S.length bytesname) <>
110 fromByteString bytesname <>
111 tellLength (S.length contents) <>
112 fromByteString contents
113 where
114 tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":"))
115 bytesname = TEE.encodeUtf8 name
116
117 encodeFiles :: [File] -> Builder
118 encodeFiles = mconcat . map encodeFile
119
120 input :: [File]
121 input =
122 [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
123 , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
124 , File "binary.dat" "we'll pretend to be binary"
125 ]
126
127