]> git.immae.eu Git - github/fretlink/text-pipes.git/blame - examples/zoom.hs
detritus
[github/fretlink/text-pipes.git] / examples / zoom.hs
CommitLineData
955edd33 1-- this file illustrates several uses of `zoom`
2-- one of them is quadratic in the length of the file
3-- since it has to decode and encode repeatedly,
4-- and is thus no good on long files.
5
6{-# LANGUAGE OverloadedStrings #-}
7{-# LANGUAGE BangPatterns#-}
8{-# LANGUAGE RankNTypes #-}
9import Blaze.ByteString.Builder (Builder, fromByteString, toByteString)
10import Control.Exception (Exception)
11import Control.Monad.Trans.Class (lift)
12import Data.ByteString (ByteString)
13import qualified Data.ByteString as S
14import qualified Data.ByteString.Lazy as L
15import Data.Monoid
16import Data.Text (Text)
17import qualified Data.Text as T
18import qualified Data.Text.Encoding as TEE
19import qualified Data.Text.Lazy as TL
20import qualified Data.Text.Lazy.Encoding as TLE
21
22import Pipes
23import Pipes.Parse
24import qualified Pipes.Prelude as PP
25import qualified Pipes.ByteString as Bytes
26import qualified Pipes.Text as Txt
27import Pipes.Text.Encoding (utf8)
28
8197d6e0 29import Control.Lens -- we use 'zoom' with MonadState, not just StateT
955edd33 30import Control.Monad
31import qualified System.IO as IO
32import Control.Monad.Trans.Maybe
33import Control.Monad.State.Class
34
35main :: IO ()
8197d6e0 36main = do S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1
955edd33 37 -- parse_file fileParser0 -- pathological
38 -- parse_file fileParser1 -- programs
39 parse_file fileParser2 -- good program
40
41 where
42 parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h ->
43 do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print
44 runEffect $ p' >-> PP.print
45 parseWith parser = loop where
46 loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p)
47 case m of Nothing -> return p'
48 Just file -> do yield file
49 loop p'
50 fp = "encoded.fileformat"
51 contents n = (toByteString . mconcat . replicate n . encodeFiles) input
52 <> S.pack (replicate 10 250)
53
54
55
56fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
57fileParser0 = do (name, len) <- zoom utf8 parseText
58 contents <- zoom (Bytes.splitAt len) (lift drawAll)
59 return (File name (S.concat contents))
60 where
61 -- this parser aggregates all Text parsing into one preliminary parser
62 -- which is then applied with `zoom utf8`
63 -- we cannot tell in advance how long, e.g. the file name will be
64 parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
65 parseText = do nameLength <- parseNumber
66 names <- zoom (Txt.splitAt nameLength) $ (lift drawAll)
67 contentLength <- parseNumber
68 return $! (T.concat names, contentLength)
69
70-- here we disaggregate the little Text parsers but still apply them with `zoom utf8`
71-- this makes no difference
72fileParser1 = do nameLength <- zoom utf8 parseNumber
73 names <- zoom (utf8 . Txt.splitAt nameLength) (lift drawAll)
74 contentLength <- zoom utf8 parseNumber
75 contents <- zoom (Bytes.splitAt contentLength) (lift drawAll)
76 return (File (T.concat names) (S.concat contents))
77
8197d6e0 78-- This is the good program; by reflecting on the fact that file names
955edd33 79-- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10
80-- we can restrict the length of the byte stream to which we apply `zoom utf8`
8197d6e0 81fileParser2 = do nameLength <- zoom (Bytes.splitAt 3 . utf8) parseNumber
82 names <- zoom (Bytes.splitAt nameLength . utf8) (lift drawAll)
83 len <- zoom (Bytes.splitAt 10 . utf8) parseNumber
84 contents <- zoom (Bytes.splitAt len) (lift drawAll)
955edd33 85 return (File (T.concat names) (S.concat contents))
86
8197d6e0 87
955edd33 88
89parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int
90parseNumber = loop 0 where
91 loop !n = do c <- MaybeT Txt.drawChar
92 case c of ':' -> return n
93 _ -> do guard ('0' <= c && c <= '9')
94 loop $! n * 10 + (fromEnum c - fromEnum '0')
95
96
97
98-- --- Michael S's `File` type and its binary encoding, etc.
99
100
101data File = File
102 { fileName :: !Text
103 , fileContents :: !ByteString
104 }
105 deriving Show
106
107encodeFile :: File -> Builder
108encodeFile (File name contents) =
109 tellLength (S.length bytesname) <>
110 fromByteString bytesname <>
111 tellLength (S.length contents) <>
112 fromByteString contents
113 where
114 tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":"))
115 bytesname = TEE.encodeUtf8 name
116
117encodeFiles :: [File] -> Builder
118encodeFiles = mconcat . map encodeFile
119
120input :: [File]
121input =
122 [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
123 , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
124 , File "binary.dat" "we'll pretend to be binary"
125 ]
126
127