From 955edd331812d8638f57a23ddcc90194ad7002b7 Mon Sep 17 00:00:00 2001 From: michaelt Date: Sat, 8 Nov 2014 22:30:32 -0500 Subject: tutorial etc --- examples/attoparser.hs | 21 +++++++ examples/decode.hs | 30 ++++++++++ examples/lines_url.hs | 37 ++++++++++++ examples/zoom.hs | 152 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 240 insertions(+) create mode 100644 examples/attoparser.hs create mode 100644 examples/decode.hs create mode 100644 examples/lines_url.hs create mode 100644 examples/zoom.hs (limited to 'examples') diff --git a/examples/attoparser.hs b/examples/attoparser.hs new file mode 100644 index 0000000..ddf770d --- /dev/null +++ b/examples/attoparser.hs @@ -0,0 +1,21 @@ +import Pipes +import Pipes.Text.IO (fromHandle) +import Pipes.Attoparsec (parsed) +import qualified System.IO as IO + +data Test = Test { + a :: Int, + b :: Int + } deriving (Show) + +testParser :: Parser Test +testParser = do + a <- decimal + space + b <- decimal + endOfLine + return $ Test a b + +main = IO.withFile "./testfile" IO.ReadMode $ \handle -> runEffect $ + for test_parser (lift . print) + where (parsed (testParser <* endOfLine) (fromHandle handle)) \ No newline at end of file diff --git a/examples/decode.hs b/examples/decode.hs new file mode 100644 index 0000000..8cb44f8 --- /dev/null +++ b/examples/decode.hs @@ -0,0 +1,30 @@ +-- http://www.haskellforall.com/2014/02/pipes-parse-30-lens-based-parsing.html + +import Data.ByteString (ByteString) +import Data.Text (Text) +import Lens.Family.State.Strict (zoom) +import Pipes +import Pipes.Parse +import qualified Pipes.ByteString as ByteString +import qualified Pipes.Text as Text + +-- Retrieve all `Text` chunks up to 10 characters +parser :: Monad m => Parser ByteString m [Text] +parser = zoom (Text.decodeUtf8 . Text.splitAt 10) drawAll + +main = do + (textChunks, leftovers) <- runStateT parser ByteString.stdin + print textChunks + + -- Now print the remaining `ByteString` chunks + byteChunks <- evalStateT drawAll leftovers + print byteChunks +{- +$ ./decode +Hello, 世界!!! +["Hello, \19990\30028!"] +abcdefg + +["!!\n","abcdefg\n"] + +-} \ No newline at end of file diff --git a/examples/lines_url.hs b/examples/lines_url.hs new file mode 100644 index 0000000..b676656 --- /dev/null +++ b/examples/lines_url.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE OverloadedStrings #-} +-- https://gist.github.com/michaelt/88e1fac12876857deefe +-- following +-- https://gist.github.com/gelisam/c769d186493221d7ebbe and associated controversy. + +module Main where + +import Prelude hiding (lines) +import Lens.Family +import Pipes +import Pipes.Group +import Pipes.HTTP +import Pipes.Text +import Pipes.Text.Encoding +import Pipes.Text.IO (toHandle,stdout) +import qualified System.IO as IO +import Data.Functor (void) +import qualified Data.Text as T + +main = do + req <- parseUrl "https://gist.github.com/gelisam/c769d186493221d7ebbe" + -- "http://www.example.com" + -- "http://www.gutenberg.org/files/10/10-h/10-h.htm" + withManager tlsManagerSettings $ \m -> + withHTTP req m $ \resp -> void $ runEffect $ + number_lines_of (responseBody resp ^. utf8 . lines) >-> toHandle IO.stdout + +number_lines_of :: Monad m => FreeT (Producer Text m) m bad -> Producer Text m bad +number_lines_of = number_loop (1 :: Int) where + number_loop n freeProducers = do + freeProducer <- lift $ runFreeT freeProducers + case freeProducer of + Pure badbytes -> do yield $ T.pack "\n" + return badbytes -- these could be inspected ... + Free p -> do yield $ T.pack ("\n" ++ show n ++ " ") + nextFreeProducers <- p + number_loop (n+1) nextFreeProducers diff --git a/examples/zoom.hs b/examples/zoom.hs new file mode 100644 index 0000000..3442dc8 --- /dev/null +++ b/examples/zoom.hs @@ -0,0 +1,152 @@ +-- this file illustrates several uses of `zoom` +-- one of them is quadratic in the length of the file +-- since it has to decode and encode repeatedly, +-- and is thus no good on long files. + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE BangPatterns#-} +{-# LANGUAGE RankNTypes #-} +import Blaze.ByteString.Builder (Builder, fromByteString, toByteString) +import Control.Exception (Exception) +import Control.Monad.Trans.Class (lift) +import Data.ByteString (ByteString) +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L +import Data.Monoid +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Encoding as TEE +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TLE + +import Pipes +import Pipes.Parse +import qualified Pipes.Prelude as PP +import qualified Pipes.ByteString as Bytes +import qualified Pipes.Text as Txt +import Pipes.Text.Encoding (utf8) + +import Control.Lens +import Control.Lens.Internal.Zoom +import Control.Monad +import qualified System.IO as IO +import Control.Monad.Trans.Maybe +import Control.Monad.State.Class + +main :: IO () +main = do -- S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1 + -- parse_file fileParser0 -- pathological + -- parse_file fileParser1 -- programs + parse_file fileParser2 -- good program + + where + parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h -> + do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print + runEffect $ p' >-> PP.print + parseWith parser = loop where + loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p) + case m of Nothing -> return p' + Just file -> do yield file + loop p' + fp = "encoded.fileformat" + contents n = (toByteString . mconcat . replicate n . encodeFiles) input + <> S.pack (replicate 10 250) + + + +fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File +fileParser0 = do (name, len) <- zoom utf8 parseText + contents <- zoom (Bytes.splitAt len) (lift drawAll) + return (File name (S.concat contents)) + where + -- this parser aggregates all Text parsing into one preliminary parser + -- which is then applied with `zoom utf8` + -- we cannot tell in advance how long, e.g. the file name will be + parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int) + parseText = do nameLength <- parseNumber + names <- zoom (Txt.splitAt nameLength) $ (lift drawAll) + contentLength <- parseNumber + return $! (T.concat names, contentLength) + +-- here we disaggregate the little Text parsers but still apply them with `zoom utf8` +-- this makes no difference +fileParser1 = do nameLength <- zoom utf8 parseNumber + names <- zoom (utf8 . Txt.splitAt nameLength) (lift drawAll) + contentLength <- zoom utf8 parseNumber + contents <- zoom (Bytes.splitAt contentLength) (lift drawAll) + return (File (T.concat names) (S.concat contents)) + +-- this is the good program; be reflecting on the fact that file names +-- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10 +-- we can restrict the length of the byte stream to which we apply `zoom utf8` +fileParser2 = do nameLength <- Bytes.splitAt 3 ~~> utf8 ~~> parseNumber + names <- Bytes.splitAt nameLength ~~> utf8 ~~> lift drawAll + len <- Bytes.splitAt 10 ~~> utf8 ~~> parseNumber + contents <- Bytes.splitAt len ~~> lift drawAll + return (File (T.concat names) (S.concat contents)) + +-- infix lens nonsense +infixr 1 ~~> +(~~>) :: Zoom m n s t + => ((s -> Zoomed n c s) -> t -> Zoomed n c t) + -> m c -> n c +(~~>) = zoom +{-# INLINE (~~>) #-} + +parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int +parseNumber = loop 0 where + loop !n = do c <- MaybeT Txt.drawChar + case c of ':' -> return n + _ -> do guard ('0' <= c && c <= '9') + loop $! n * 10 + (fromEnum c - fromEnum '0') + + + +-- --- Michael S's `File` type and its binary encoding, etc. + + +data File = File + { fileName :: !Text + , fileContents :: !ByteString + } + deriving Show + +encodeFile :: File -> Builder +encodeFile (File name contents) = + tellLength (S.length bytesname) <> + fromByteString bytesname <> + tellLength (S.length contents) <> + fromByteString contents + where + tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":")) + bytesname = TEE.encodeUtf8 name + +encodeFiles :: [File] -> Builder +encodeFiles = mconcat . map encodeFile + +input :: [File] +input = + [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8" + , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16" + , File "binary.dat" "we'll pretend to be binary" + ] + + +--- + +-- This desperate scheme actually has some efficacy, if used before `utf8` in a zoom +-- but not much + +chunk :: Monad m => Int -> Lens' (Producer ByteString m r) (Producer ByteString m r) +chunk n = lens (chunkyN n) (\_ b -> b) where + + chunkyN :: Monad m => Int -> Producer ByteString m r -> Producer ByteString m r + chunkyN n = prod_loop where + + prod_loop p = do mbs <- lift $ next p + case mbs of Left r -> return r + Right (bs, p') -> do bs_loop bs + prod_loop p' + bs_loop bs = unless (S.null bs) $ do yield fore + unless (S.null aft) (bs_loop aft) + where (fore, aft) = S.splitAt n bs -- cgit v1.2.3