-{-# LANGUAGE RankNTypes, TypeFamilies, NoMonomorphismRestriction #-}
-
+{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
{-| This module provides @pipes@ utilities for \"text streams\", which are
- streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
- a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can
- be associated with a 'Producer' or 'Consumer' according as it is read or written to.
+ streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
+ a 'Producer' can be converted to and from lazy 'Text's, though this is generally
+ unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'.
+ An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to.
To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For
example, the following program copies a document from one file to another:
Note that functions in this library are designed to operate on streams that
are insensitive to text boundaries. This means that they may freely split
- text into smaller texts and /discard empty texts/. However, they will
- /never concatenate texts/ in order to provide strict upper bounds on memory
- usage.
+ text into smaller texts, /discard empty texts/. However, apart from the
+ special case of 'concatMap', they will /never concatenate texts/ in order
+ to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'.
-}
module Pipes.Text (
-- * Folds
toLazy,
toLazyM,
- fold,
+ foldChars,
head,
last,
null,
lines,
words,
decodeUtf8,
+ decode,
-- * Transformations
intersperse,
) where
import Control.Exception (throwIO, try)
-import Control.Monad (liftM, unless)
+import Control.Monad (liftM, unless, join)
import Control.Monad.Trans.State.Strict (StateT(..))
+import Data.Monoid ((<>))
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.Encoding as TE
import qualified GHC.IO.Exception as G
import Pipes
import qualified Pipes.ByteString as PB
-import qualified Pipes.ByteString.Parse as PBP
+import qualified Pipes.ByteString as PBP
import qualified Pipes.Text.Internal as PE
+import Pipes.Text.Internal (Codec(..))
import Pipes.Text.Parse (
nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
import Pipes.Core (respond, Server')
import qualified Pipes.Parse as PP
-import Pipes.Parse (input, concat, FreeT)
+import Pipes.Parse ( FreeT)
import qualified Pipes.Safe.Prelude as Safe
import qualified Pipes.Safe as Safe
import Pipes.Safe (MonadSafe(..), Base(..))
-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
-{-# INLINABLE fromLazy #-}
+{-# INLINE fromLazy #-}
-- | Stream text from 'stdin'
-stdin :: MonadIO m => Producer Text m (Producer ByteString m ())
+stdin :: MonadIO m => Producer Text m ()
stdin = fromHandle IO.stdin
-{-# INLINABLE stdin #-}
+{-# INLINE stdin #-}
{-| Convert a 'IO.Handle' into a text stream using a text size
- determined by the good sense of the text library.
+ determined by the good sense of the text library; note that this
+ is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
+ but uses the system encoding and has other `Data.Text.IO` features
-}
-fromHandle :: MonadIO m => IO.Handle -> Producer Text m (Producer ByteString m ())
-fromHandle h = decodeUtf8 (PB.fromHandle h)
-{-# INLINE fromHandle#-}
+fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
+fromHandle h = go where
+ go = do txt <- liftIO (T.hGetChunk h)
+ unless (T.null txt) $ do yield txt
+ go
+{-# INLINABLE fromHandle#-}
+
-{-| Stream text from a file using Pipes.Safe
+{-| Stream text from a file in the simple fashion of @Data.Text.IO@
>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
MAIN = PUTSTRLN "HELLO WORLD"
-}
-readFile :: (MonadSafe m) => FilePath -> Producer Text m (Producer ByteString m ())
+readFile :: MonadSafe m => FilePath -> Producer Text m ()
readFile file = Safe.withFile file IO.ReadMode fromHandle
-{-# INLINABLE readFile #-}
+{-# INLINE readFile #-}
{-| Stream lines of text from stdin (for testing in ghci etc.)
txt <- liftIO (T.hGetLine IO.stdin)
yield txt
go
-
+{-# INLINABLE stdinLn #-}
{-| Stream text to 'stdout'
-- | Stream text into a file. Uses @pipes-safe@.
writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
writeFile file = Safe.withFile file IO.WriteMode toHandle
+{-# INLINE writeFile #-}
-- | Apply a transformation to each 'Char' in the stream
map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
{-# INLINABLE toLazyM #-}
-- | Reduce the text stream using a strict left fold over characters
-fold
+foldChars
:: Monad m
=> (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r
-fold step begin done = P.fold (T.foldl' step) begin done
+foldChars step begin done = P.fold (T.foldl' step) begin done
{-# INLINABLE fold #-}
-- | Retrieve the first 'Char'
count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
{-# INLINABLE count #-}
--- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
--- into a Pipe of Text
+-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
+-- returning a Pipe of ByteStrings that begins at the point of failure.
decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
-decodeUtf8 = go PE.streamDecodeUtf8 where
- go dec0 p = do
- x <- lift (next p)
- case x of Left r -> return (return r)
- Right (chunk, p') ->
- case dec0 chunk of PE.Some text _ dec -> do yield text
- go dec p'
- PE.Other text bs -> do yield text
- return (do yield bs
- p')
+decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
+ go !carry dec0 p = do
+ x <- lift (next p)
+ case x of Left r -> if B.null carry
+ then return (return r) -- all bytestrinput was consumed
+ else return (do yield carry -- a potentially valid fragment remains
+ return r)
+
+ Right (chunk, p') -> case dec0 chunk of
+ PE.Some text carry2 dec -> do yield text
+ go carry2 dec p'
+ PE.Other text bs -> do yield text
+ return (do yield bs -- an invalid blob remains
+ p')
+{-# INLINABLE decodeUtf8 #-}
+
-- | Splits a 'Producer' after the given number of characters
splitAt
@Data.Text@ re-exports the 'Text' type.
@Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type).
--}
\ No newline at end of file
+-}
+
+
+
+decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
+-- decode codec = go B.empty where
+-- go extra p0 =
+-- do x <- lift (next p0)
+-- case x of Right (chunk, p) ->
+-- do let (text, stuff) = codecDecode codec (B.append extra chunk)
+-- yield text
+-- case stuff of Right extra' -> go extra' p
+-- Left (exc,bs) -> do yield text
+-- return (do yield bs
+-- p)
+-- Left r -> return (do yield extra
+-- return r)
+
+decode d p0 = case d of
+ PE.Other txt bad -> do yield txt
+ return (do yield bad
+ p0)
+ PE.Some txt extra dec -> do yield txt
+ x <- lift (next p0)
+ case x of Left r -> return (do yield extra
+ return r)
+ Right (chunk,p1) -> decode (dec chunk) p1
+
+-- go !carry dec0 p = do
+-- x <- lift (next p)
+-- case x of Left r -> if B.null carry
+-- then return (return r) -- all bytestrinput was consumed
+-- else return (do yield carry -- a potentially valid fragment remains
+-- return r)
+--
+-- Right (chunk, p') -> case dec0 chunk of
+-- PE.Some text carry2 dec -> do yield text
+-- go carry2 dec p'
+-- PE.Other text bs -> do yield text
+-- return (do yield bs -- an invalid blob remains
+-- p')
+-- {-# INLINABLE decodeUtf8 #-}