-{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
-#if __GLASGOW_HASKELL__ >= 702
-{-# LANGUAGE Trustworthy #-}
-#endif
+{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
+
{-| This module provides @pipes@ utilities for \"text streams\", which are
streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
a 'Producer' can be converted to and from lazy 'Text's, though this is generally
example, the following program copies a document from one file to another:
> import Pipes
-> import qualified Data.Text.Pipes as Text
+> import qualified Pipes.Text as Text
+> import qualified Pipes.Text.IO as Text
> import System.IO
>
> main =
To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
> import Pipes
-> import qualified Data.Text.Pipes as Text
+> import qualified Pipes.Text as Text
+> import qualified Pipes.Text.IO as Text
> import Pipes.Safe
>
> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
- and 'stdout' proxies, as with the following \"echo\" program:
+ and 'stdout' pipes, as with the following \"echo\" program:
> main = runEffect $ Text.stdin >-> Text.stdout
- You can also translate pure lazy 'TL.Text's to and from proxies:
+ You can also translate pure lazy 'TL.Text's to and from pipes:
> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
module Pipes.Text (
-- * Producers
fromLazy
- , stdin
- , fromHandle
- , readFile
- , stdinLn
+ -- , stdin
+ -- , fromHandle
+ -- , readFile
-- * Consumers
- , stdout
- , stdoutLn
- , toHandle
- , writeFile
+ -- , stdout
+ -- , toHandle
+ -- , writeFile
-- * Pipes
, map
, dropWhile
, filter
, scan
- , encodeUtf8
+-- , encodeUtf8
, pack
, unpack
, toCaseFold
, group
, word
, line
- , decodeUtf8
- , decode
+
+ -- -- * Decoding Lenses
+ -- , decodeUtf8
+ -- , codec
+ --
+ -- -- * Codecs
+ -- , utf8
+ -- , utf16_le
+ -- , utf16_be
+ -- , utf32_le
+ -- , utf32_be
+ --
+ -- -- * Other Decoding/Encoding Functions
+ -- , decodeIso8859_1
+ -- , decodeAscii
+ -- , encodeIso8859_1
+ -- , encodeAscii
-- * FreeT Splitters
, chunksOf
-- * Re-exports
-- $reexports
+ -- , DecodeResult(..)
+ -- , Codec
+ -- , TextException(..)
, module Data.ByteString
, module Data.Text
, module Data.Profunctor
- , module Data.Word
, module Pipes.Parse
, module Pipes.Group
) where
-import Control.Exception (throwIO, try)
import Control.Applicative ((<*))
-import Control.Monad (liftM, unless, join)
+import Control.Monad (liftM, join)
import Control.Monad.Trans.State.Strict (StateT(..), modify)
-import Data.Monoid ((<>))
import qualified Data.Text as T
-import qualified Data.Text.IO as T
-import qualified Data.Text.Encoding as TE
-import qualified Data.Text.Encoding.Error as TE
import Data.Text (Text)
import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.IO as TL
import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
-import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
import Data.ByteString (ByteString)
-import qualified Data.ByteString as B
-import Data.Char (ord, isSpace)
import Data.Functor.Constant (Constant(Constant, getConstant))
import Data.Functor.Identity (Identity)
import Data.Profunctor (Profunctor)
import qualified Data.Profunctor
-import qualified Data.List as List
-import Foreign.C.Error (Errno(Errno), ePIPE)
-import qualified GHC.IO.Exception as G
import Pipes
-import qualified Pipes.ByteString as PB
-import qualified Pipes.Text.Internal as PE
-import Pipes.Text.Codec (Codec(..))
-import Pipes.Core (respond, Server')
import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
import qualified Pipes.Group as PG
import qualified Pipes.Parse as PP
import Pipes.Parse (Parser)
-import qualified Pipes.Safe.Prelude as Safe
-import qualified Pipes.Safe as Safe
-import Pipes.Safe (MonadSafe(..), Base(..))
import qualified Pipes.Prelude as P
-import qualified System.IO as IO
import Data.Char (isSpace)
-import Data.Word (Word8)
import Prelude hiding (
all,
fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
{-# INLINE fromLazy #-}
--- | Stream text from 'stdin'
-stdin :: MonadIO m => Producer Text m ()
-stdin = fromHandle IO.stdin
-{-# INLINE stdin #-}
-
-{-| Convert a 'IO.Handle' into a text stream using a text size
- determined by the good sense of the text library; note that this
- is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
- but uses the system encoding and has other `Data.Text.IO` features
--}
-
-fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
-fromHandle h = go where
- go = do txt <- liftIO (T.hGetChunk h)
- unless (T.null txt) ( do yield txt
- go )
-{-# INLINABLE fromHandle#-}
-
-
-{-| Stream text from a file in the simple fashion of @Data.Text.IO@
-
->>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
-MAIN = PUTSTRLN "HELLO WORLD"
--}
-
-readFile :: MonadSafe m => FilePath -> Producer Text m ()
-readFile file = Safe.withFile file IO.ReadMode fromHandle
-{-# INLINE readFile #-}
-
-{-| Crudely stream lines of input from stdin in the style of Pipes.Prelude.
- This is for testing in ghci etc.; obviously it will be unsound if used to recieve
- the contents of immense files with few newlines.
-
->>> let safely = runSafeT . runEffect
->>> safely $ for Text.stdinLn (lift . lift . print . T.length)
-hello
-5
-world
-5
-
--}
-stdinLn :: MonadIO m => Producer' Text m ()
-stdinLn = go where
- go = do
- eof <- liftIO (IO.hIsEOF IO.stdin)
- unless eof $ do
- txt <- liftIO (T.hGetLine IO.stdin)
- yield txt
- go
-{-# INLINABLE stdinLn #-}
-
-{-| Stream text to 'stdout'
-
- Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
-
- Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
- instead of @(source >-> stdout)@ .
--}
-stdout :: MonadIO m => Consumer' Text m ()
-stdout = go
- where
- go = do
- txt <- await
- x <- liftIO $ try (T.putStr txt)
- case x of
- Left (G.IOError { G.ioe_type = G.ResourceVanished
- , G.ioe_errno = Just ioe })
- | Errno ioe == ePIPE
- -> return ()
- Left e -> liftIO (throwIO e)
- Right () -> go
-{-# INLINABLE stdout #-}
-
-stdoutLn :: (MonadIO m) => Consumer' Text m ()
-stdoutLn = go
- where
- go = do
- str <- await
- x <- liftIO $ try (T.putStrLn str)
- case x of
- Left (G.IOError { G.ioe_type = G.ResourceVanished
- , G.ioe_errno = Just ioe })
- | Errno ioe == ePIPE
- -> return ()
- Left e -> liftIO (throwIO e)
- Right () -> go
-{-# INLINABLE stdoutLn #-}
-
-{-| Convert a text stream into a 'Handle'
-
- Note: again, for best performance, where possible use
- @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
--}
-toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
-toHandle h = for cat (liftIO . T.hPutStr h)
-{-# INLINABLE toHandle #-}
-
-{-# RULES "p >-> toHandle h" forall p h .
- p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
- #-}
-
-
--- | Stream text into a file. Uses @pipes-safe@.
-writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
-writeFile file = Safe.withFile file IO.WriteMode toHandle
-{-# INLINE writeFile #-}
-
type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
-encodeUtf8 :: Monad m => Pipe Text ByteString m r
-encodeUtf8 = P.map TE.encodeUtf8
-{-# INLINEABLE encodeUtf8 #-}
-
-{-# RULES "p >-> encodeUtf8" forall p .
- p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
- #-}
+-- encodeUtf8 :: Monad m => Pipe Text ByteString m r
+-- encodeUtf8 = P.map TE.encodeUtf8
+-- {-# INLINEABLE encodeUtf8 #-}
+--
+-- {-# RULES "p >-> encodeUtf8" forall p .
+-- p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
+-- #-}
-- | Transform a Pipe of 'String's into one of 'Text' chunks
pack :: Monad m => Pipe String Text m r
scan
:: (Monad m)
=> (Char -> Char -> Char) -> Char -> Pipe Text Text m r
-scan step begin = go begin
+scan step begin = do
+ yield (T.singleton begin)
+ go begin
where
go c = do
txt <- await
let txt' = T.scanl step c txt
c' = T.last txt'
- yield txt'
+ yield (T.tail txt')
go c'
{-# INLINABLE scan #-}
{-# INLINABLE isEndOfChars #-}
--- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
--- stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
-
-decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
- (Producer Text m (Producer ByteString m r))
-decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
- (k (go B.empty PE.streamDecodeUtf8 p0)) where
- go !carry dec0 p = do
- x <- lift (next p)
- case x of Left r -> return (if B.null carry
- then return r -- all bytestring input was consumed
- else (do yield carry -- a potentially valid fragment remains
- return r))
-
- Right (chunk, p') -> case dec0 chunk of
- PE.Some text carry2 dec -> do yield text
- go carry2 dec p'
- PE.Other text bs -> do yield text
- return (do yield bs -- an invalid blob remains
- p')
-{-# INLINABLE decodeUtf8 #-}
-- | Splits a 'Producer' after the given number of characters
{-# INLINABLE lines #-}
-
-- | Split a text stream into 'FreeT'-delimited words
words
:: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
-}
-
-decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
--- decode codec = go B.empty where
--- go extra p0 =
--- do x <- lift (next p0)
--- case x of Right (chunk, p) ->
--- do let (text, stuff) = codecDecode codec (B.append extra chunk)
--- yield text
--- case stuff of Right extra' -> go extra' p
--- Left (exc,bs) -> do yield text
--- return (do yield bs
--- p)
--- Left r -> return (do yield extra
--- return r)
-
-decode d p0 = case d of
- PE.Other txt bad -> do yield txt
- return (do yield bad
- p0)
- PE.Some txt extra dec -> do yield txt
- x <- lift (next p0)
- case x of Left r -> return (do yield extra
- return r)
- Right (chunk,p1) -> decode (dec chunk) p1
-
--- go !carry dec0 p = do
--- x <- lift (next p)
--- case x of Left r -> if B.null carry
--- then return (return r) -- all bytestrinput was consumed
--- else return (do yield carry -- a potentially valid fragment remains
--- return r)
---
--- Right (chunk, p') -> case dec0 chunk of
--- PE.Some text carry2 dec -> do yield text
--- go carry2 dec p'
--- PE.Other text bs -> do yield text
--- return (do yield bs -- an invalid blob remains
--- p')
--- {-# INLINABLE decodeUtf8 #-}
\ No newline at end of file