From b6f8504f1bbd2c388af30752ff32c52a48644706 Mon Sep 17 00:00:00 2001 From: michaelt Date: Sat, 6 Feb 2016 16:03:09 -0500 Subject: further line-based functions --- Pipes/Text/IO.hs | 220 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 177 insertions(+), 43 deletions(-) (limited to 'Pipes/Text') diff --git a/Pipes/Text/IO.hs b/Pipes/Text/IO.hs index 51c6926..d30f13c 100644 --- a/Pipes/Text/IO.hs +++ b/Pipes/Text/IO.hs @@ -3,22 +3,39 @@ module Pipes.Text.IO ( - -- * Text IO + -- * Simple line-based Text IO + -- $lineio + + fromHandleLn + , toHandleLn + , stdinLn + , stdoutLn + , stdoutLn' + , readFileLn + , writeFileLn + + + -- * Simple streaming text IO -- $textio -- * Caveats -- $caveats -- * Producers - fromHandle - , fromHandleLn + , fromHandle , stdin , readFile - , readFileLn + -- * Consumers , toHandle , stdout , writeFile + + -- * Re-exports + , MonadSafe(..) + , runSafeT + , runSafeP + , Safe.withFile ) where import qualified System.IO as IO @@ -30,14 +47,164 @@ import qualified Data.Text as T import qualified Data.Text.IO as T import Pipes import qualified Pipes.Safe.Prelude as Safe -import Pipes.Safe (MonadSafe(..)) +import Pipes.Safe (MonadSafe(..), runSafeT, runSafeP) import Prelude hiding (readFile, writeFile) +{- $lineio + Line-based operations are marked with a final \-@Ln@, like 'stdinLn', 'readFileLn'. They are + drop-in replacements for the line-based operations in @Pipes.Prelude@ and + @Pipes.Safe.Prelude@ - the final \-@Ln@ being added where necessary. + With them, one is producing, piping and consuming semantically significant individual texts, + understood as lines, just as one would pipe 'Int's. The standard materials from @Pipes@ and @Pipes.Prelude@ and + @Data.Text@ are all you need to interact with these lines as you read or write them. + You can use these operations without using any of the other material in this package. + + Thus, to take a trivial case, here we upper-case three lines from standard input and write + them to a file. + +>>> import Pipes +>>> import qualified Pipes.Prelude as P +>>> import qualified Pipes.Text.IO as Text +>>> import qualified Data.Text as T +>>> Text.runSafeT $ runEffect $ Text.stdinLn >-> P.take 3 >-> P.map T.toUpper >-> Text.writeFileLn "threelines.txt" +one +two +three +>>> :! cat "threelines.txt" +ONE +TWO +THREE + + The point of view is very much that of @Pipes.Prelude@ and the user who needs no more + can use them ignoring the rest of this package. + + The line-based operations are, however, subject to a number of caveats. + First, where they read from a handle, they will of course happily + accumulate indefinitely long lines. This is likely to be legitimate for input + typed in by a user, and for locally produced log files and other known material, but + otherwise not. See the post on + + to see why @pipes-bytestring@ and this package take a different approach. Furthermore, + like those in @Data.Text.IO@, the operations use the system encoding and @T.hGetLine@ + and thus are slower than the \'official\' route, which would use bytestring IO and + the encoding and decoding functions in @Pipes.Text.Encoding@. Finally, they will generate + text exceptions after the fashion of @Data.Text.Encoding@ rather than returning the + undigested bytes in the style of @Pipes.Text.Encoding@ + +-} + + +{-| Read separate lines of 'Text' from 'IO.stdin' using 'T.getLine' + This function will accumulate indefinitely long strict 'Text's. See the caveats above. + + Terminates on end of input +-} +stdinLn :: MonadIO m => Producer' T.Text m () +stdinLn = fromHandleLn IO.stdin +{-# INLINABLE stdinLn #-} + + +{-| Write 'String's to 'IO.stdout' using 'putStrLn' + + Unlike 'toHandle', 'stdoutLn' gracefully terminates on a broken output pipe +-} +stdoutLn :: MonadIO m => Consumer' T.Text m () +stdoutLn = go + where + go = do + str <- await + x <- liftIO $ try (T.putStrLn str) + case x of + Left (G.IOError { G.ioe_type = G.ResourceVanished + , G.ioe_errno = Just ioe }) + | Errno ioe == ePIPE + -> return () + Left e -> liftIO (throwIO e) + Right () -> go +{-# INLINABLE stdoutLn #-} + +{-| Write lines of 'Text's to 'IO.stdout'. + + This does not handle a broken output pipe, but has a polymorphic return + value. +-} +stdoutLn' :: MonadIO m => Consumer' T.Text m r +stdoutLn' = for cat (\str -> liftIO (T.putStrLn str)) +{-# INLINABLE stdoutLn' #-} + +{-# RULES + "p >-> stdoutLn'" forall p . + p >-> stdoutLn' = for p (\str -> liftIO (T.putStrLn str)) + #-} + +{-| Read separate lines of 'Text' from a 'IO.Handle' using 'T.hGetLine'. + This operation will accumulate indefinitely large strict texts. See the caveats above. + + Terminates on end of input +-} +fromHandleLn :: MonadIO m => IO.Handle -> Producer' Text m () +fromHandleLn h = go where + getLine :: IO (Either G.IOException Text) + getLine = try (T.hGetLine h) + + go = do txt <- liftIO getLine + case txt of + Left e -> return () + Right y -> do yield y + go +{-# INLINABLE fromHandleLn #-} + +-- to do: investigate differences from the above: +-- fromHandleLn :: MonadIO m => IO.Handle -> Producer' T.Text m () +-- fromHandleLn h = go +-- where +-- go = do +-- eof <- liftIO $ IO.hIsEOF h +-- unless eof $ do +-- str <- liftIO $ T.hGetLine h +-- yield str +-- go +-- {-# INLINABLE fromHandleLn #-} + + +-- | Write separate lines of 'Text' to a 'IO.Handle' using 'T.hPutStrLn' +toHandleLn :: MonadIO m => IO.Handle -> Consumer' T.Text m r +toHandleLn handle = for cat (\str -> liftIO (T.hPutStrLn handle str)) +{-# INLINABLE toHandleLn #-} + +{-# RULES + "p >-> toHandleLn handle" forall p handle . + p >-> toHandleLn handle = for p (\str -> liftIO (T.hPutStrLn handle str)) + #-} + + +{-| Stream separate lines of text from a file. This operation will accumulate + indefinitely long strict text chunks. See the caveats above. +-} +readFileLn :: MonadSafe m => FilePath -> Producer Text m () +readFileLn file = Safe.withFile file IO.ReadMode fromHandleLn +{-# INLINE readFileLn #-} + + + +{-| Write lines to a file, automatically opening and closing the file as + necessary +-} +writeFileLn :: (MonadSafe m) => FilePath -> Consumer' Text m r +writeFileLn file = Safe.withFile file IO.WriteMode toHandleLn +{-# INLINABLE writeFileLn #-} + + + {- $textio Where pipes @IO@ replaces lazy @IO@, @Producer Text IO r@ replaces lazy 'Text'. - This module exports some convenient functions for producing and consuming - pipes 'Text' in @IO@, namely, 'readFile', 'writeFile', 'fromHandle', 'toHandle', - 'stdin' and 'stdout'. Some caveats described below. + The official IO of this package and the pipes ecosystem generally would use the + IO functions in @Pipes.ByteString@ and the encoding and decoding material in + @Pipes.Text.Encoding@. + + The streaming functions exported here, namely, 'readFile', 'writeFile', 'fromHandle', 'toHandle', + 'stdin' and 'stdout' simplify this and use the system encoding on the model of @Data.Text.IO@ + and @Data.Text.Lazy.IO@ Some caveats described below. The main points are as in : @@ -69,6 +236,8 @@ To stream from files, the following is perhaps more Prelude-like (note that it u > main = runEffect $ Text.stdin >-> Text.stdout + These programs, unlike the corresponding programs written with the line-based functions, + will pass along a 1 terabyte line without affecting memory use. -} @@ -87,23 +256,6 @@ To stream from files, the following is perhaps more Prelude-like (note that it u * Like the functions in @Data.Text.IO@ , they use Text exceptions, not the standard Pipes protocols. - Something like - -> view utf8 . Bytes.fromHandle :: Handle -> Producer Text IO (Producer ByteString m ()) - - yields a stream of Text, and follows - standard pipes protocols by reverting to (i.e. returning) the underlying byte stream - upon reaching any decoding error. (See especially the pipes-binary package.) - - By contrast, something like - -> Text.fromHandle :: Handle -> Producer Text IO () - - supplies a stream of text returning '()', which is convenient for many tasks, - but violates the pipes @pipes-binary@ approach to decoding errors and - throws an exception of the kind characteristic of the @text@ library instead. - - -} {-| Convert a 'IO.Handle' into a text stream using a text size @@ -122,18 +274,6 @@ fromHandle h = go where {-# INLINABLE fromHandle#-} -fromHandleLn :: MonadIO m => IO.Handle -> Producer Text m () -fromHandleLn h = go where - getLine :: IO (Either G.IOException Text) - getLine = try (T.hGetLine h) - - go = do txt <- liftIO getLine - case txt of - Left e -> return () - Right y -> do yield y - go -{-# INLINABLE fromHandleLn #-} - -- | Stream text from 'stdin' stdin :: MonadIO m => Producer Text m () stdin = fromHandle IO.stdin @@ -151,12 +291,6 @@ readFile file = Safe.withFile file IO.ReadMode fromHandle {-# INLINE readFile #-} -{-| Stream lines of text from a file --} -readFileLn :: MonadSafe m => FilePath -> Producer Text m () -readFileLn file = Safe.withFile file IO.ReadMode fromHandleLn -{-# INLINE readFileLn #-} - {-| Stream text to 'stdout' -- cgit v1.2.3