aboutsummaryrefslogtreecommitdiffhomepage
path: root/examples
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2014-11-08 22:30:32 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2014-11-08 22:30:32 -0500
commit955edd331812d8638f57a23ddcc90194ad7002b7 (patch)
tree7f832da066a52ec329f26dc793275a029dd00963 /examples
parent2f4a83f82d206217456a035b7e8a1e56c585bfd0 (diff)
downloadtext-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.tar.gz
text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.tar.zst
text-pipes-955edd331812d8638f57a23ddcc90194ad7002b7.zip
tutorial etc
Diffstat (limited to 'examples')
-rw-r--r--examples/attoparser.hs21
-rw-r--r--examples/decode.hs30
-rw-r--r--examples/lines_url.hs37
-rw-r--r--examples/zoom.hs152
4 files changed, 240 insertions, 0 deletions
diff --git a/examples/attoparser.hs b/examples/attoparser.hs
new file mode 100644
index 0000000..ddf770d
--- /dev/null
+++ b/examples/attoparser.hs
@@ -0,0 +1,21 @@
1import Pipes
2import Pipes.Text.IO (fromHandle)
3import Pipes.Attoparsec (parsed)
4import qualified System.IO as IO
5
6data Test = Test {
7 a :: Int,
8 b :: Int
9 } deriving (Show)
10
11testParser :: Parser Test
12testParser = do
13 a <- decimal
14 space
15 b <- decimal
16 endOfLine
17 return $ Test a b
18
19main = IO.withFile "./testfile" IO.ReadMode $ \handle -> runEffect $
20 for test_parser (lift . print)
21 where (parsed (testParser <* endOfLine) (fromHandle handle)) \ No newline at end of file
diff --git a/examples/decode.hs b/examples/decode.hs
new file mode 100644
index 0000000..8cb44f8
--- /dev/null
+++ b/examples/decode.hs
@@ -0,0 +1,30 @@
1-- http://www.haskellforall.com/2014/02/pipes-parse-30-lens-based-parsing.html
2
3import Data.ByteString (ByteString)
4import Data.Text (Text)
5import Lens.Family.State.Strict (zoom)
6import Pipes
7import Pipes.Parse
8import qualified Pipes.ByteString as ByteString
9import qualified Pipes.Text as Text
10
11-- Retrieve all `Text` chunks up to 10 characters
12parser :: Monad m => Parser ByteString m [Text]
13parser = zoom (Text.decodeUtf8 . Text.splitAt 10) drawAll
14
15main = do
16 (textChunks, leftovers) <- runStateT parser ByteString.stdin
17 print textChunks
18
19 -- Now print the remaining `ByteString` chunks
20 byteChunks <- evalStateT drawAll leftovers
21 print byteChunks
22{-
23$ ./decode
24Hello, 世界!!!<Enter>
25["Hello, \19990\30028!"]
26abcdefg<Enter>
27<Ctrl-D>
28["!!\n","abcdefg\n"]
29
30-} \ No newline at end of file
diff --git a/examples/lines_url.hs b/examples/lines_url.hs
new file mode 100644
index 0000000..b676656
--- /dev/null
+++ b/examples/lines_url.hs
@@ -0,0 +1,37 @@
1{-# LANGUAGE OverloadedStrings #-}
2-- https://gist.github.com/michaelt/88e1fac12876857deefe
3-- following
4-- https://gist.github.com/gelisam/c769d186493221d7ebbe and associated controversy.
5
6module Main where
7
8import Prelude hiding (lines)
9import Lens.Family
10import Pipes
11import Pipes.Group
12import Pipes.HTTP
13import Pipes.Text
14import Pipes.Text.Encoding
15import Pipes.Text.IO (toHandle,stdout)
16import qualified System.IO as IO
17import Data.Functor (void)
18import qualified Data.Text as T
19
20main = do
21 req <- parseUrl "https://gist.github.com/gelisam/c769d186493221d7ebbe"
22 -- "http://www.example.com"
23 -- "http://www.gutenberg.org/files/10/10-h/10-h.htm"
24 withManager tlsManagerSettings $ \m ->
25 withHTTP req m $ \resp -> void $ runEffect $
26 number_lines_of (responseBody resp ^. utf8 . lines) >-> toHandle IO.stdout
27
28number_lines_of :: Monad m => FreeT (Producer Text m) m bad -> Producer Text m bad
29number_lines_of = number_loop (1 :: Int) where
30 number_loop n freeProducers = do
31 freeProducer <- lift $ runFreeT freeProducers
32 case freeProducer of
33 Pure badbytes -> do yield $ T.pack "\n"
34 return badbytes -- these could be inspected ...
35 Free p -> do yield $ T.pack ("\n" ++ show n ++ " ")
36 nextFreeProducers <- p
37 number_loop (n+1) nextFreeProducers
diff --git a/examples/zoom.hs b/examples/zoom.hs
new file mode 100644
index 0000000..3442dc8
--- /dev/null
+++ b/examples/zoom.hs
@@ -0,0 +1,152 @@
1-- this file illustrates several uses of `zoom`
2-- one of them is quadratic in the length of the file
3-- since it has to decode and encode repeatedly,
4-- and is thus no good on long files.
5
6{-# LANGUAGE OverloadedStrings #-}
7{-# LANGUAGE BangPatterns#-}
8{-# LANGUAGE RankNTypes #-}
9import Blaze.ByteString.Builder (Builder, fromByteString, toByteString)
10import Control.Exception (Exception)
11import Control.Monad.Trans.Class (lift)
12import Data.ByteString (ByteString)
13import qualified Data.ByteString as S
14import qualified Data.ByteString.Lazy as L
15import Data.Monoid
16import Data.Text (Text)
17import qualified Data.Text as T
18import qualified Data.Text.Encoding as TEE
19import qualified Data.Text.Lazy as TL
20import qualified Data.Text.Lazy.Encoding as TLE
21
22import Pipes
23import Pipes.Parse
24import qualified Pipes.Prelude as PP
25import qualified Pipes.ByteString as Bytes
26import qualified Pipes.Text as Txt
27import Pipes.Text.Encoding (utf8)
28
29import Control.Lens
30import Control.Lens.Internal.Zoom
31import Control.Monad
32import qualified System.IO as IO
33import Control.Monad.Trans.Maybe
34import Control.Monad.State.Class
35
36main :: IO ()
37main = do -- S.writeFile fp $ contents 10000 -- 10000 cannot be handled fileParser0 and 1
38 -- parse_file fileParser0 -- pathological
39 -- parse_file fileParser1 -- programs
40 parse_file fileParser2 -- good program
41
42 where
43 parse_file parser = IO.withBinaryFile fp IO.ReadMode $ \h ->
44 do p' <- runEffect $ parseWith parser ( Bytes.fromHandle h ) >-> PP.print
45 runEffect $ p' >-> PP.print
46 parseWith parser = loop where
47 loop p = do (m,p') <- lift (runStateT (runMaybeT parser) p)
48 case m of Nothing -> return p'
49 Just file -> do yield file
50 loop p'
51 fp = "encoded.fileformat"
52 contents n = (toByteString . mconcat . replicate n . encodeFiles) input
53 <> S.pack (replicate 10 250)
54
55
56
57fileParser0, fileParser1, fileParser2 :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
58fileParser0 = do (name, len) <- zoom utf8 parseText
59 contents <- zoom (Bytes.splitAt len) (lift drawAll)
60 return (File name (S.concat contents))
61 where
62 -- this parser aggregates all Text parsing into one preliminary parser
63 -- which is then applied with `zoom utf8`
64 -- we cannot tell in advance how long, e.g. the file name will be
65 parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
66 parseText = do nameLength <- parseNumber
67 names <- zoom (Txt.splitAt nameLength) $ (lift drawAll)
68 contentLength <- parseNumber
69 return $! (T.concat names, contentLength)
70
71-- here we disaggregate the little Text parsers but still apply them with `zoom utf8`
72-- this makes no difference
73fileParser1 = do nameLength <- zoom utf8 parseNumber
74 names <- zoom (utf8 . Txt.splitAt nameLength) (lift drawAll)
75 contentLength <- zoom utf8 parseNumber
76 contents <- zoom (Bytes.splitAt contentLength) (lift drawAll)
77 return (File (T.concat names) (S.concat contents))
78
79-- this is the good program; be reflecting on the fact that file names
80-- should not be a 1000 bytes long, and binary files longer than e.g. 10 ^ 10
81-- we can restrict the length of the byte stream to which we apply `zoom utf8`
82fileParser2 = do nameLength <- Bytes.splitAt 3 ~~> utf8 ~~> parseNumber
83 names <- Bytes.splitAt nameLength ~~> utf8 ~~> lift drawAll
84 len <- Bytes.splitAt 10 ~~> utf8 ~~> parseNumber
85 contents <- Bytes.splitAt len ~~> lift drawAll
86 return (File (T.concat names) (S.concat contents))
87
88-- infix lens nonsense
89infixr 1 ~~>
90(~~>) :: Zoom m n s t
91 => ((s -> Zoomed n c s) -> t -> Zoomed n c t)
92 -> m c -> n c
93(~~>) = zoom
94{-# INLINE (~~>) #-}
95
96parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int
97parseNumber = loop 0 where
98 loop !n = do c <- MaybeT Txt.drawChar
99 case c of ':' -> return n
100 _ -> do guard ('0' <= c && c <= '9')
101 loop $! n * 10 + (fromEnum c - fromEnum '0')
102
103
104
105-- --- Michael S's `File` type and its binary encoding, etc.
106
107
108data File = File
109 { fileName :: !Text
110 , fileContents :: !ByteString
111 }
112 deriving Show
113
114encodeFile :: File -> Builder
115encodeFile (File name contents) =
116 tellLength (S.length bytesname) <>
117 fromByteString bytesname <>
118 tellLength (S.length contents) <>
119 fromByteString contents
120 where
121 tellLength i = fromByteString $ TEE.encodeUtf8 (T.pack (shows i ":"))
122 bytesname = TEE.encodeUtf8 name
123
124encodeFiles :: [File] -> Builder
125encodeFiles = mconcat . map encodeFile
126
127input :: [File]
128input =
129 [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
130 , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
131 , File "binary.dat" "we'll pretend to be binary"
132 ]
133
134
135---
136
137-- This desperate scheme actually has some efficacy, if used before `utf8` in a zoom
138-- but not much
139
140chunk :: Monad m => Int -> Lens' (Producer ByteString m r) (Producer ByteString m r)
141chunk n = lens (chunkyN n) (\_ b -> b) where
142
143 chunkyN :: Monad m => Int -> Producer ByteString m r -> Producer ByteString m r
144 chunkyN n = prod_loop where
145
146 prod_loop p = do mbs <- lift $ next p
147 case mbs of Left r -> return r
148 Right (bs, p') -> do bs_loop bs
149 prod_loop p'
150 bs_loop bs = unless (S.null bs) $ do yield fore
151 unless (S.null aft) (bs_loop aft)
152 where (fore, aft) = S.splitAt n bs