examples/zoom.hs

   1 -- this file illustrates several uses of `zoom`
   2 -- one of them is quadratic in the length of the file
   3 -- since it has to decode and encode repeatedly,
   4 -- and is thus no good on long files.
   5
   6 {-# LANGUAGE OverloadedStrings  #-}
   7 {-# LANGUAGE BangPatterns#-}
   8 {-# LANGUAGE RankNTypes #-}
   9 import           Blaze.ByteString.Builder  (Builder, fromByteString, toByteString)
  10 import           Control.Exception         (Exception)
  11 import           Control.Monad.Trans.Class (lift)
  12 import           Data.ByteString           (ByteString)
  13 import qualified Data.ByteString           as S
  14 import qualified Data.ByteString.Lazy      as L
  15 import           Data.Monoid
  16 import           Data.Text                 (Text)
  17 import qualified Data.Text                 as T
  18 import qualified Data.Text.Encoding        as TEE
  19 import qualified Data.Text.Lazy            as TL
  20 import qualified Data.Text.Lazy.Encoding   as TLE
  21
  22 import Pipes
  23 import Pipes.Parse
  24 import qualified Pipes.Prelude as PP
  25 import qualified Pipes.ByteString as Bytes
  26 import qualified Pipes.Text as Txt
  27 import Pipes.Text.Encoding (utf8)
  28
  29 import Control.Lens -- we use 'zoom' with MonadState, not just StateT
  30 import Control.Monad
  31 import qualified System.IO as IO
  32 import Control.Monad.Trans.Maybe
  33 import Control.Monad.State.Class
  34
  35 main :: IO ()
  36 main = do S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1
  37           -- parse_file fileParser0  -- pathological
  38           -- parse_file fileParser1  -- programs
  39           parse_file fileParser2  -- good program
  40
  41    where
  42    parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h ->
  43                          do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print
  44                             runEffect $ p' >-> PP.print
  45    parseWith parser = loop where
  46       loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p)
  47                   case m of Nothing -> return p'
  48                             Just file -> do yield file
  49                                             loop p'
  50    fp = "encoded.fileformat"
  51    contents n =  (toByteString . mconcat . replicate n . encodeFiles) input
  52                  <> S.pack (replicate 10 250)
  53
  54
  55
  56 fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
  57 fileParser0  = do (name, len) <- zoom utf8 parseText
  58                   contents    <- zoom (Bytes.splitAt len) (lift drawAll)
  59                   return (File name (S.concat contents))
  60     where
  61     -- this parser aggregates all Text parsing into one preliminary parser
  62     -- which is then applied with `zoom utf8`
  63     -- we cannot tell in advance how long, e.g. the file name will be
  64     parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
  65     parseText = do nameLength    <- parseNumber
  66                    names         <- zoom (Txt.splitAt nameLength) $ (lift drawAll)
  67                    contentLength <- parseNumber
  68                    return $! (T.concat names, contentLength)
  69
  70 -- here we disaggregate the little Text parsers but still apply them with `zoom utf8`
  71 -- this makes no difference
  72 fileParser1  = do nameLength    <- zoom utf8 parseNumber
  73                   names         <- zoom (utf8 . Txt.splitAt nameLength)  (lift drawAll)
  74                   contentLength <- zoom utf8 parseNumber
  75                   contents      <- zoom (Bytes.splitAt contentLength) (lift drawAll)
  76                   return (File (T.concat names) (S.concat contents))
  77
  78 -- This is the good program; by reflecting on the fact that file names
  79 -- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10
  80 -- we can restrict the length of the byte stream to which we apply `zoom utf8`
  81 fileParser2  = do nameLength  <- zoom (Bytes.splitAt 3 . utf8) parseNumber
  82                   names       <- zoom (Bytes.splitAt nameLength . utf8) (lift drawAll)
  83                   len         <- zoom (Bytes.splitAt 10 . utf8) parseNumber
  84                   contents    <- zoom (Bytes.splitAt len) (lift drawAll)
  85                   return (File (T.concat names) (S.concat contents))
  86
  87
  88
  89 parseNumber :: Monad m =>  MaybeT (StateT (Producer Text m x) m) Int
  90 parseNumber  = loop  0 where
  91    loop !n = do c <- MaybeT  Txt.drawChar
  92                 case c of ':' -> return n
  93                           _   -> do guard ('0' <= c && c <= '9')
  94                                     loop  $! n * 10 + (fromEnum c - fromEnum '0')
  95
  96
  97
  98 -- --- Michael S's `File` type and its binary encoding, etc.
  99
 100
 101 data File = File
 102     { fileName     :: !Text
 103     , fileContents :: !ByteString
 104     }
 105     deriving Show
 106
 107 encodeFile :: File -> Builder
 108 encodeFile (File name contents) =
 109     tellLength (S.length bytesname) <>
 110     fromByteString bytesname        <>
 111     tellLength (S.length contents)  <>
 112     fromByteString contents
 113   where
 114     tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":"))
 115     bytesname = TEE.encodeUtf8 name
 116
 117 encodeFiles :: [File] -> Builder
 118 encodeFiles = mconcat . map encodeFile
 119
 120 input :: [File]
 121 input =
 122     [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
 123     , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
 124     , File "binary.dat" "we'll pretend to be binary"
 125     ]
 126
 127