]> git.immae.eu Git - github/fretlink/text-pipes.git/blob - Pipes/Prelude/Text.hs
commentary
[github/fretlink/text-pipes.git] / Pipes / Prelude / Text.hs
1 {-#LANGUAGE RankNTypes#-}
2
3
4 module Pipes.Prelude.Text
5 (
6 -- * Simple line-based Text IO
7 -- $lineio
8
9 fromHandleLn
10 , toHandleLn
11 , stdinLn
12 , stdoutLn
13 , stdoutLn'
14 , readFileLn
15 , writeFileLn
16 ) where
17
18 import qualified System.IO as IO
19 import Control.Exception (throwIO, try)
20 import Foreign.C.Error (Errno(Errno), ePIPE)
21 import qualified GHC.IO.Exception as G
22 import Data.Text (Text)
23 import qualified Data.Text as T
24 import qualified Data.Text.IO as T
25 import Pipes
26 import qualified Pipes.Safe.Prelude as Safe
27 import Pipes.Safe (MonadSafe(..), runSafeT, runSafeP)
28 import Prelude hiding (readFile, writeFile)
29
30 {- $lineio
31 Line-based operations are marked with a final \-@Ln@, like 'stdinLn', 'readFileLn', etc.
32 They are drop-in 'Text' replacements for the corresponding 'String' operations in
33 @Pipes.Prelude@ and @Pipes.Safe.Prelude@ - a final \-@Ln@ being added where necessary.
34 This module can thus be imported unqualified if @Pipes.Prelude@ is imported qualified, as
35 it must be.
36
37 In using the line-based operations, one is producing and consuming semantically significant individual texts,
38 understood as lines, just as one would produce or pipe 'Int's or 'Char's or anything else.
39 The standard materials from @Pipes@ and @Pipes.Prelude@ and
40 @Data.Text@ are all you need to work with them, and
41 you can use these operations without using any of the other modules in this package.
42
43 Thus, to take a trivial case, here we upper-case three lines from standard input and write
44 them to a file. (@runSafeT@ from @Pipes.Safe@ just makes sure to close any handles opened in its scope;
45 it is only needed for @readFileLn@ and @writeFileLn@.)
46
47 >>> import Pipes
48 >>> import qualified Pipes.Prelude as P
49 >>> import qualified Pipes.Prelude.Text as Text
50 >>> import qualified Data.Text as T
51 >>> Text.runSafeT $ runEffect $ Text.stdinLn >-> P.take 3 >-> P.map T.toUpper >-> Text.writeFileLn "threelines.txt"
52 one<Enter>
53 two<Enter>
54 three<Enter>
55 >>> :! cat "threelines.txt"
56 ONE
57 TWO
58 THREE
59
60 The point of view is very much that of @Pipes.Prelude@, substituting @Text@ for @String@.
61 It would still be the same even if
62 we did something a bit more sophisticated, like run an ordinary attoparsec 'Text' parser on
63 each line, as is frequently desirable. Here we use
64 a minimal attoparsec number parser, @scientific@, on separate lines of standard input,
65 dropping bad parses with @P.concat@:
66
67 >>> import Data.Attoparsec.Text (parseOnly, scientific)
68 >>> P.toListM $ Text.stdinLn >-> P.takeWhile (/= "quit") >-> P.map (parseOnly scientific) >-> P.concat
69 1<Enter>
70 2<Enter>
71 bad<Enter>
72 3<Enter>
73 quit<Enter>
74 [1.0,2.0,3.0]
75
76 The line-based operations are, however, subject to a number of caveats.
77
78 * Where these line-based operations read from a handle, they will
79 accumulate indefinitely long lines. This makes sense for input
80 typed in by a user, and for locally produced files of known characteristics, but
81 otherwise not. See the post on
82 <http://www.haskellforall.com/2013/09/perfect-streaming-using-pipes-bytestring.html perfect streaming>
83 to see why @pipes-bytestring@ and this package, outside this module, take a different approach, in which
84 lines themselves are permitted to stream without accumulation.
85
86 * The line-based operations,
87 like those in @Data.Text.IO@, use the system encoding (and @T.hGetLine@, @T.hPutLine@ etc.)
88 and thus are slower than the \'official\' route, which would use the very fast
89 bytestring IO operations from @Pipes.ByteString@ and the
90 encoding and decoding functions in @Pipes.Text.Encoding@, which are also quite fast
91 thanks to the @streaming-commons@ package.
92
93 * The line-based operations (again like those in @Data.Text.IO@) will
94 generate text exceptions after the fashion of
95 @Data.Text.Encoding@, rather than returning the undigested bytes in the
96 style of @Pipes.Text.Encoding@. This is the standard practice in the pipes libraries.
97
98 -}
99
100
101 {-| Read separate lines of 'Text' from 'IO.stdin' using 'T.getLine', terminating on end of input.
102
103 This function will accumulate indefinitely long strict 'Text's. See the caveats above.
104 -}
105 stdinLn :: MonadIO m => Producer' T.Text m ()
106 stdinLn = fromHandleLn IO.stdin
107 {-# INLINABLE stdinLn #-}
108
109
110 {-| Write 'Text' lines to 'IO.stdout' using 'putStrLn', terminating without error on a broken output pipe
111 -}
112 stdoutLn :: MonadIO m => Consumer' T.Text m ()
113 stdoutLn = go
114 where
115 go = do
116 str <- await
117 x <- liftIO $ try (T.putStrLn str)
118 case x of
119 Left (G.IOError { G.ioe_type = G.ResourceVanished
120 , G.ioe_errno = Just ioe })
121 | Errno ioe == ePIPE
122 -> return ()
123 Left e -> liftIO (throwIO e)
124 Right () -> go
125 {-# INLINABLE stdoutLn #-}
126
127 {-| Write lines of 'Text' to 'IO.stdout'. This does not handle a broken output pipe,
128 but has a polymorphic return value.
129 -}
130 stdoutLn' :: MonadIO m => Consumer' T.Text m r
131 stdoutLn' = for cat (\str -> liftIO (T.putStrLn str))
132 {-# INLINABLE stdoutLn' #-}
133
134 {-# RULES
135 "p >-> stdoutLn'" forall p .
136 p >-> stdoutLn' = for p (\str -> liftIO (T.putStrLn str))
137 #-}
138
139 {-| Read separate lines of 'Text' from a 'IO.Handle' using 'T.hGetLine',
140 terminating at the end of input
141
142 This operation will accumulate indefinitely large strict texts. See the caveats above.
143 -}
144 fromHandleLn :: MonadIO m => IO.Handle -> Producer' Text m ()
145 fromHandleLn h = go where
146 getLine :: IO (Either G.IOException Text)
147 getLine = try (T.hGetLine h)
148
149 go = do txt <- liftIO getLine
150 case txt of
151 Left e -> return ()
152 Right y -> do yield y
153 go
154 {-# INLINABLE fromHandleLn #-}
155
156 -- to do: investigate differences from the above:
157 -- fromHandleLn :: MonadIO m => IO.Handle -> Producer' T.Text m ()
158 -- fromHandleLn h = go
159 -- where
160 -- go = do
161 -- eof <- liftIO $ IO.hIsEOF h
162 -- unless eof $ do
163 -- str <- liftIO $ T.hGetLine h
164 -- yield str
165 -- go
166 -- {-# INLINABLE fromHandleLn #-}
167
168
169 -- | Write separate lines of 'Text' to a 'IO.Handle' using 'T.hPutStrLn'
170 toHandleLn :: MonadIO m => IO.Handle -> Consumer' T.Text m r
171 toHandleLn handle = for cat (\str -> liftIO (T.hPutStrLn handle str))
172 {-# INLINABLE toHandleLn #-}
173
174 {-# RULES
175 "p >-> toHandleLn handle" forall p handle .
176 p >-> toHandleLn handle = for p (\str -> liftIO (T.hPutStrLn handle str))
177 #-}
178
179
180 {-| Stream separate lines of text from a file. Apply @runSafeT@ after running the
181 pipeline to manage the opening and closing of the handle.
182
183 This operation will accumulate indefinitely long strict text chunks.
184 See the caveats above.
185 -}
186 readFileLn :: MonadSafe m => FilePath -> Producer Text m ()
187 readFileLn file = Safe.withFile file IO.ReadMode fromHandleLn
188 {-# INLINE readFileLn #-}
189
190
191
192 {-| Write lines to a file. Apply @runSafeT@ after running the
193 pipeline to manage the opening and closing of the handle.
194 -}
195 writeFileLn :: (MonadSafe m) => FilePath -> Consumer' Text m r
196 writeFileLn file = Safe.withFile file IO.WriteMode toHandleLn
197 {-# INLINABLE writeFileLn #-}
198