From a31032bb469a2d7eef089d54fb5069405f0710c9 Mon Sep 17 00:00:00 2001 From: michaelt Date: Mon, 30 May 2016 09:28:49 -0400 Subject: commentary --- Pipes/Prelude/Text.hs | 77 +++++++++++++++++++++++++++----------------------- Pipes/Text/Encoding.hs | 15 ++++++---- 2 files changed, 52 insertions(+), 40 deletions(-) (limited to 'Pipes') diff --git a/Pipes/Prelude/Text.hs b/Pipes/Prelude/Text.hs index 701d5f6..46424a7 100644 --- a/Pipes/Prelude/Text.hs +++ b/Pipes/Prelude/Text.hs @@ -31,14 +31,18 @@ import Prelude hiding (readFile, writeFile) Line-based operations are marked with a final \-@Ln@, like 'stdinLn', 'readFileLn', etc. They are drop-in 'Text' replacements for the corresponding 'String' operations in @Pipes.Prelude@ and @Pipes.Safe.Prelude@ - a final \-@Ln@ being added where necessary. - In using them, one is producing and consuming semantically significant individual texts, + This module can thus be imported unqualified if @Pipes.Prelude@ is imported qualified, as + it must be. + + In using the line-based operations, one is producing and consuming semantically significant individual texts, understood as lines, just as one would produce or pipe 'Int's or 'Char's or anything else. The standard materials from @Pipes@ and @Pipes.Prelude@ and @Data.Text@ are all you need to work with them, and you can use these operations without using any of the other modules in this package. Thus, to take a trivial case, here we upper-case three lines from standard input and write - them to a file. + them to a file. (@runSafeT@ from @Pipes.Safe@ just makes sure to close any handles opened in its scope; + it is only needed for @readFileLn@ and @writeFileLn@.) >>> import Pipes >>> import qualified Pipes.Prelude as P @@ -53,8 +57,7 @@ ONE TWO THREE - Here @runSafeT@ from @Pipes.Safe@ just makes sure to close any handles opened in its scope. - Otherwise the point of view is very much that of @Pipes.Prelude@, substituting @Text@ for @String@. + The point of view is very much that of @Pipes.Prelude@, substituting @Text@ for @String@. It would still be the same even if we did something a bit more sophisticated, like run an ordinary attoparsec 'Text' parser on each line, as is frequently desirable. Here we use @@ -71,37 +74,40 @@ quit [1.0,2.0,3.0] The line-based operations are, however, subject to a number of caveats. - First, where they read from a handle, they will of course happily - accumulate indefinitely long lines. This is likely to be legitimate for input - typed in by a user, and for locally produced files of known characteristics, but - otherwise not. See the post on - - to see why @pipes-bytestring@ and this package, outside this module, take a different approach. - Furthermore, the line-based operations, - like those in @Data.Text.IO@, use the system encoding (and @T.hGetLine@, @T.hPutLine@ etc.) - and thus are slower than the \'official\' route, which would use the very fast - bytestring IO operations from @Pipes.ByteString@ and - encoding and decoding functions in @Pipes.Text.Encoding@. Finally, the line-based - operations will generate text exceptions after the fashion of - @Data.Text.Encoding@, rather than returning the undigested bytes in the - style of @Pipes.Text.Encoding@. + + * Where these line-based operations read from a handle, they will + accumulate indefinitely long lines. This makes sense for input + typed in by a user, and for locally produced files of known characteristics, but + otherwise not. See the post on + + to see why @pipes-bytestring@ and this package, outside this module, take a different approach, in which + lines themselves are permitted to stream without accumulation. + + * The line-based operations, + like those in @Data.Text.IO@, use the system encoding (and @T.hGetLine@, @T.hPutLine@ etc.) + and thus are slower than the \'official\' route, which would use the very fast + bytestring IO operations from @Pipes.ByteString@ and the + encoding and decoding functions in @Pipes.Text.Encoding@, which are also quite fast + thanks to the @streaming-commons@ package. + + * The line-based operations (again like those in @Data.Text.IO@) will + generate text exceptions after the fashion of + @Data.Text.Encoding@, rather than returning the undigested bytes in the + style of @Pipes.Text.Encoding@. This is the standard practice in the pipes libraries. -} -{-| Read separate lines of 'Text' from 'IO.stdin' using 'T.getLine' - This function will accumulate indefinitely long strict 'Text's. See the caveats above. +{-| Read separate lines of 'Text' from 'IO.stdin' using 'T.getLine', terminating on end of input. - Terminates on end of input + This function will accumulate indefinitely long strict 'Text's. See the caveats above. -} stdinLn :: MonadIO m => Producer' T.Text m () stdinLn = fromHandleLn IO.stdin {-# INLINABLE stdinLn #-} -{-| Write 'Text' lines to 'IO.stdout' using 'putStrLn' - - Unlike 'toHandle', 'stdoutLn' gracefully terminates on a broken output pipe +{-| Write 'Text' lines to 'IO.stdout' using 'putStrLn', terminating without error on a broken output pipe -} stdoutLn :: MonadIO m => Consumer' T.Text m () stdoutLn = go @@ -118,10 +124,8 @@ stdoutLn = go Right () -> go {-# INLINABLE stdoutLn #-} -{-| Write lines of 'Text' to 'IO.stdout'. - - This does not handle a broken output pipe, but has a polymorphic return - value. +{-| Write lines of 'Text' to 'IO.stdout'. This does not handle a broken output pipe, + but has a polymorphic return value. -} stdoutLn' :: MonadIO m => Consumer' T.Text m r stdoutLn' = for cat (\str -> liftIO (T.putStrLn str)) @@ -132,10 +136,10 @@ stdoutLn' = for cat (\str -> liftIO (T.putStrLn str)) p >-> stdoutLn' = for p (\str -> liftIO (T.putStrLn str)) #-} -{-| Read separate lines of 'Text' from a 'IO.Handle' using 'T.hGetLine'. - This operation will accumulate indefinitely large strict texts. See the caveats above. +{-| Read separate lines of 'Text' from a 'IO.Handle' using 'T.hGetLine', + terminating at the end of input - Terminates on end of input + This operation will accumulate indefinitely large strict texts. See the caveats above. -} fromHandleLn :: MonadIO m => IO.Handle -> Producer' Text m () fromHandleLn h = go where @@ -173,8 +177,11 @@ toHandleLn handle = for cat (\str -> liftIO (T.hPutStrLn handle str)) #-} -{-| Stream separate lines of text from a file. This operation will accumulate - indefinitely long strict text chunks. See the caveats above. +{-| Stream separate lines of text from a file. Apply @runSafeT@ after running the + pipeline to manage the opening and closing of the handle. + + This operation will accumulate indefinitely long strict text chunks. + See the caveats above. -} readFileLn :: MonadSafe m => FilePath -> Producer Text m () readFileLn file = Safe.withFile file IO.ReadMode fromHandleLn @@ -182,8 +189,8 @@ readFileLn file = Safe.withFile file IO.ReadMode fromHandleLn -{-| Write lines to a file, automatically opening and closing the file as - necessary +{-| Write lines to a file. Apply @runSafeT@ after running the + pipeline to manage the opening and closing of the handle. -} writeFileLn :: (MonadSafe m) => FilePath -> Consumer' Text m r writeFileLn file = Safe.withFile file IO.WriteMode toHandleLn diff --git a/Pipes/Text/Encoding.hs b/Pipes/Text/Encoding.hs index e242411..a576a42 100644 --- a/Pipes/Text/Encoding.hs +++ b/Pipes/Text/Encoding.hs @@ -220,14 +220,17 @@ type Codec {- | @decode@ is just the ordinary @view@ or @(^.)@ of the lens libraries; - exported here under a name appropriate to the material. Thus + exported here under a name appropriate to the material. + Thus given a bytestring producer called @bytes@ we have > decode utf8 bytes :: Producer Text IO (Producer ByteString IO ()) All of these are thus the same: -> decode utf8 bytes = view utf8 bytes = bytes ^. utf8 = decodeUtf8 bytes - +> decode utf8 bytes +> view utf8 bytes +> bytes ^. utf8 +> decodeUtf8 bytes -} @@ -244,9 +247,11 @@ decode codec a = getConstant (codec Constant a) returned as a Left value; in the happy case, a Right value is returned with the anticipated return value for the original bytestring producer. - Again, all of these are the same + Given a bytestring producer called @bytes@ all of these will be the same: -> decode (utf8 . eof) bytes = view (utf8 . eof) p = p^.utf8.eof +> decode (utf8 . eof) bytes +> view (utf8 . eof) bytes +> bytes^.utf8.eof -} -- cgit v1.2.3