-{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, CPP #-}
-#if __GLASGOW_HASKELL__ >= 702
-{-# LANGUAGE Trustworthy #-}
-#endif
+{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-}
+
{-| This module provides @pipes@ utilities for \"text streams\", which are
streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
a 'Producer' can be converted to and from lazy 'Text's, though this is generally
example, the following program copies a document from one file to another:
> import Pipes
-> import qualified Data.Text.Pipes as Text
+> import qualified Pipes.Text as Text
+> import qualified Pipes.Text.IO as Text
> import System.IO
>
> main =
To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
> import Pipes
-> import qualified Data.Text.Pipes as Text
+> import qualified Pipes.Text as Text
+> import qualified Pipes.Text.IO as Text
> import Pipes.Safe
>
> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
You can stream to and from 'stdin' and 'stdout' using the predefined 'stdin'
- and 'stdout' proxies, as with the following \"echo\" program:
+ and 'stdout' pipes, as with the following \"echo\" program:
> main = runEffect $ Text.stdin >-> Text.stdout
- You can also translate pure lazy 'TL.Text's to and from proxies:
+ You can also translate pure lazy 'TL.Text's to and from pipes:
> main = runEffect $ Text.fromLazy (TL.pack "Hello, world!\n") >-> Text.stdout
module Pipes.Text (
-- * Producers
fromLazy
- , stdin
- , fromHandle
- , readFile
- , stdinLn
+ -- , stdin
+ -- , fromHandle
+ -- , readFile
-- * Consumers
- , stdout
- , stdoutLn
- , toHandle
- , writeFile
+ -- , stdout
+ -- , toHandle
+ -- , writeFile
-- * Pipes
, map
, dropWhile
, filter
, scan
- , encodeUtf8
+-- , encodeUtf8
, pack
, unpack
, toCaseFold
, group
, word
, line
- , decodeUtf8
- , decode
+
+ -- -- * Decoding Lenses
+ -- , decodeUtf8
+ -- , codec
+ --
+ -- -- * Codecs
+ -- , utf8
+ -- , utf16_le
+ -- , utf16_be
+ -- , utf32_le
+ -- , utf32_be
+ --
+ -- -- * Other Decoding/Encoding Functions
+ -- , decodeIso8859_1
+ -- , decodeAscii
+ -- , encodeIso8859_1
+ -- , encodeAscii
-- * FreeT Splitters
, chunksOf
, lines
, words
-
-- * Transformations
, intersperse
, packChars
-- * Re-exports
-- $reexports
+ -- , DecodeResult(..)
+ -- , Codec
+ -- , TextException(..)
, module Data.ByteString
, module Data.Text
, module Data.Profunctor
- , module Data.Word
, module Pipes.Parse
+ , module Pipes.Group
) where
-import Control.Exception (throwIO, try)
import Control.Applicative ((<*))
-import Control.Monad (liftM, unless, join)
+import Control.Monad (liftM, join)
import Control.Monad.Trans.State.Strict (StateT(..), modify)
-import Data.Monoid ((<>))
import qualified Data.Text as T
-import qualified Data.Text.IO as T
-import qualified Data.Text.Encoding as TE
-import qualified Data.Text.Encoding.Error as TE
import Data.Text (Text)
import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.IO as TL
import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize)
-import Data.ByteString.Unsafe (unsafeTake, unsafeDrop)
import Data.ByteString (ByteString)
-import qualified Data.ByteString as B
-import Data.Char (ord, isSpace)
import Data.Functor.Constant (Constant(Constant, getConstant))
import Data.Functor.Identity (Identity)
import Data.Profunctor (Profunctor)
import qualified Data.Profunctor
-import qualified Data.List as List
-import Foreign.C.Error (Errno(Errno), ePIPE)
-import qualified GHC.IO.Exception as G
import Pipes
-import qualified Pipes.ByteString as PB
-import qualified Pipes.Text.Internal as PE
-import Pipes.Text.Internal (Codec(..))
-import Pipes.Core (respond, Server')
+import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
+import qualified Pipes.Group as PG
import qualified Pipes.Parse as PP
-import Pipes.Parse (Parser, concats, intercalates, FreeT(..))
-import qualified Pipes.Safe.Prelude as Safe
-import qualified Pipes.Safe as Safe
-import Pipes.Safe (MonadSafe(..), Base(..))
+import Pipes.Parse (Parser)
import qualified Pipes.Prelude as P
-import qualified System.IO as IO
import Data.Char (isSpace)
-import Data.Word (Word8)
import Prelude hiding (
all,
fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
{-# INLINE fromLazy #-}
--- | Stream text from 'stdin'
-stdin :: MonadIO m => Producer Text m ()
-stdin = fromHandle IO.stdin
-{-# INLINE stdin #-}
-
-{-| Convert a 'IO.Handle' into a text stream using a text size
- determined by the good sense of the text library; note that this
- is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
- but uses the system encoding and has other `Data.Text.IO` features
--}
-
-fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
-fromHandle h = go where
- go = do txt <- liftIO (T.hGetChunk h)
- unless (T.null txt) $ do yield txt
- go
-{-# INLINABLE fromHandle#-}
-
-
-{-| Stream text from a file in the simple fashion of @Data.Text.IO@
-
->>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
-MAIN = PUTSTRLN "HELLO WORLD"
--}
-
-readFile :: MonadSafe m => FilePath -> Producer Text m ()
-readFile file = Safe.withFile file IO.ReadMode fromHandle
-{-# INLINE readFile #-}
-
-{-| Stream lines of text from stdin (for testing in ghci etc.)
-
->>> let safely = runSafeT . runEffect
->>> safely $ for Text.stdinLn (lift . lift . print . T.length)
-hello
-5
-world
-5
-
--}
-stdinLn :: MonadIO m => Producer' Text m ()
-stdinLn = go where
- go = do
- eof <- liftIO (IO.hIsEOF IO.stdin)
- unless eof $ do
- txt <- liftIO (T.hGetLine IO.stdin)
- yield txt
- go
-{-# INLINABLE stdinLn #-}
-
-{-| Stream text to 'stdout'
-
- Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
-
- Note: For best performance, use @(for source (liftIO . putStr))@ instead of
- @(source >-> stdout)@ in suitable cases.
--}
-stdout :: MonadIO m => Consumer' Text m ()
-stdout = go
- where
- go = do
- txt <- await
- x <- liftIO $ try (T.putStr txt)
- case x of
- Left (G.IOError { G.ioe_type = G.ResourceVanished
- , G.ioe_errno = Just ioe })
- | Errno ioe == ePIPE
- -> return ()
- Left e -> liftIO (throwIO e)
- Right () -> go
-{-# INLINABLE stdout #-}
-
-stdoutLn :: (MonadIO m) => Consumer' Text m ()
-stdoutLn = go
- where
- go = do
- str <- await
- x <- liftIO $ try (T.putStrLn str)
- case x of
- Left (G.IOError { G.ioe_type = G.ResourceVanished
- , G.ioe_errno = Just ioe })
- | Errno ioe == ePIPE
- -> return ()
- Left e -> liftIO (throwIO e)
- Right () -> go
-{-# INLINABLE stdoutLn #-}
-
-{-| Convert a text stream into a 'Handle'
-
- Note: again, for best performance, where possible use
- @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
--}
-toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
-toHandle h = for cat (liftIO . T.hPutStr h)
-{-# INLINABLE toHandle #-}
-
-{-# RULES "p >-> toHandle h" forall p h .
- p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
- #-}
-
-
--- | Stream text into a file. Uses @pipes-safe@.
-writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
-writeFile file = Safe.withFile file IO.WriteMode toHandle
-{-# INLINE writeFile #-}
-
type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
-- | Transform a Pipe of 'Text' into a Pipe of 'ByteString's using UTF-8
-- encoding; @encodeUtf8 = Pipes.Prelude.map TE.encodeUtf8@ so more complex
-- encoding pipes can easily be constructed with the functions in @Data.Text.Encoding@
-encodeUtf8 :: Monad m => Pipe Text ByteString m r
-encodeUtf8 = P.map TE.encodeUtf8
-{-# INLINEABLE encodeUtf8 #-}
-
-{-# RULES "p >-> encodeUtf8" forall p .
- p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
- #-}
+-- encodeUtf8 :: Monad m => Pipe Text ByteString m r
+-- encodeUtf8 = P.map TE.encodeUtf8
+-- {-# INLINEABLE encodeUtf8 #-}
+--
+-- {-# RULES "p >-> encodeUtf8" forall p .
+-- p >-> encodeUtf8 = for p (\txt -> yield (TE.encodeUtf8 txt))
+-- #-}
-- | Transform a Pipe of 'String's into one of 'Text' chunks
pack :: Monad m => Pipe String Text m r
scan
:: (Monad m)
=> (Char -> Char -> Char) -> Char -> Pipe Text Text m r
-scan step begin = go begin
+scan step begin = do
+ yield (T.singleton begin)
+ go begin
where
go c = do
txt <- await
let txt' = T.scanl step c txt
c' = T.last txt'
- yield txt'
+ yield (T.tail txt')
go c'
{-# INLINABLE scan #-}
-
--- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
--- returning a Pipe of ByteStrings that begins at the point of failure.
-
-decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
- (Producer Text m (Producer ByteString m r))
-decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
- (k (go B.empty PE.streamDecodeUtf8 p0)) where
- go !carry dec0 p = do
- x <- lift (next p)
- case x of Left r -> if B.null carry
- then return (return r) -- all bytestrinput was consumed
- else return (do yield carry -- a potentially valid fragment remains
- return r)
-
- Right (chunk, p') -> case dec0 chunk of
- PE.Some text carry2 dec -> do yield text
- go carry2 dec p'
- PE.Other text bs -> do yield text
- return (do yield bs -- an invalid blob remains
- p')
-{-# INLINABLE decodeUtf8 #-}
-
-
-- | Splits a 'Producer' after the given number of characters
splitAt
:: (Monad m, Integral n)
packChars = Data.Profunctor.dimap to (fmap from)
where
-- to :: Monad m => Producer Char m x -> Producer Text m x
- to p = PP.folds step id done (p^.PP.chunksOf defaultChunkSize)
+ to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize)
step diffAs c = diffAs . (c:)
go p = do
x <- next p
return $ case x of
- Left r -> PP.Pure r
- Right (txt, p') -> PP.Free $ do
+ Left r -> Pure r
+ Right (txt, p') -> Free $ do
p'' <- (yield txt >> p') ^. splitAt n
- return $ PP.FreeT (go p'')
+ return $ FreeT (go p'')
{-# INLINABLE chunksOf #-}
:: (Monad m)
=> (Char -> Bool)
-> Producer Text m r
- -> PP.FreeT (Producer Text m) m r
-splitsWith predicate p0 = PP.FreeT (go0 p0)
+ -> FreeT (Producer Text m) m r
+splitsWith predicate p0 = FreeT (go0 p0)
where
go0 p = do
x <- next p
case x of
- Left r -> return (PP.Pure r)
+ Left r -> return (Pure r)
Right (txt, p') ->
if (T.null txt)
then go0 p'
- else return $ PP.Free $ do
+ else return $ Free $ do
p'' <- (yield txt >> p') ^. span (not . predicate)
- return $ PP.FreeT (go1 p'')
+ return $ FreeT (go1 p'')
go1 p = do
x <- nextChar p
return $ case x of
- Left r -> PP.Pure r
- Right (_, p') -> PP.Free $ do
+ Left r -> Pure r
+ Right (_, p') -> Free $ do
p'' <- p' ^. span (not . predicate)
- return $ PP.FreeT (go1 p'')
+ return $ FreeT (go1 p'')
{-# INLINABLE splitsWith #-}
-- | Split a text stream using the given 'Char' as the delimiter
-> Lens' (Producer Text m r)
(FreeT (Producer Text m) m r)
splits c k p =
- fmap (PP.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
+ fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p))
{-# INLINABLE splits #-}
{-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the
:: Monad m
=> (Char -> Char -> Bool)
-> Lens' (Producer Text m x) (FreeT (Producer Text m) m x)
-groupsBy equals k p0 = fmap concats (k (PP.FreeT (go p0))) where
+groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where
go p = do x <- next p
- case x of Left r -> return (PP.Pure r)
+ case x of Left r -> return (Pure r)
Right (bs, p') -> case T.uncons bs of
Nothing -> go p'
- Just (c, _) -> do return $ PP.Free $ do
+ Just (c, _) -> do return $ Free $ do
p'' <- (yield bs >> p')^.span (equals c)
- return $ PP.FreeT (go p'')
+ return $ FreeT (go p'')
{-# INLINABLE groupsBy #-}
:: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
lines = Data.Profunctor.dimap _lines (fmap _unlines)
where
- _lines p0 = PP.FreeT (go0 p0)
+ _lines p0 = FreeT (go0 p0)
where
go0 p = do
x <- next p
case x of
- Left r -> return (PP.Pure r)
+ Left r -> return (Pure r)
Right (txt, p') ->
if (T.null txt)
then go0 p'
- else return $ PP.Free $ go1 (yield txt >> p')
+ else return $ Free $ go1 (yield txt >> p')
go1 p = do
p' <- p ^. break ('\n' ==)
- return $ PP.FreeT $ do
+ return $ FreeT $ do
x <- nextChar p'
case x of
- Left r -> return $ PP.Pure r
+ Left r -> return $ Pure r
Right (_, p'') -> go0 p''
-- _unlines
-- :: Monad m
-- => FreeT (Producer Text m) m x -> Producer Text m x
- _unlines = PP.concats . PP.transFreeT addNewline
+ _unlines = concats . PG.maps (<* yield (T.singleton '\n'))
+
- -- addNewline
- -- :: Monad m => Producer Text m r -> Producer Text m r
- addNewline p = p <* yield (T.singleton '\n')
{-# INLINABLE lines #-}
-
-- | Split a text stream into 'FreeT'-delimited words
words
:: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r)
words = Data.Profunctor.dimap go (fmap _unwords)
where
- go p = PP.FreeT $ do
+ go p = FreeT $ do
x <- next (p >-> dropWhile isSpace)
return $ case x of
- Left r -> PP.Pure r
- Right (bs, p') -> PP.Free $ do
+ Left r -> Pure r
+ Right (bs, p') -> Free $ do
p'' <- (yield bs >> p') ^. break isSpace
return (go p'')
- _unwords = PP.intercalates (yield $ T.singleton ' ')
+ _unwords = PG.intercalates (yield $ T.singleton ' ')
{-# INLINABLE words #-}
intercalate p0 = go0
where
go0 f = do
- x <- lift (PP.runFreeT f)
+ x <- lift (runFreeT f)
case x of
- PP.Pure r -> return r
- PP.Free p -> do
+ Pure r -> return r
+ Free p -> do
f' <- p
go1 f'
go1 f = do
- x <- lift (PP.runFreeT f)
+ x <- lift (runFreeT f)
case x of
- PP.Pure r -> return r
- PP.Free p -> do
+ Pure r -> return r
+ Free p -> do
p0
f' <- p
go1 f'
unlines = go
where
go f = do
- x <- lift (PP.runFreeT f)
+ x <- lift (runFreeT f)
case x of
- PP.Pure r -> return r
- PP.Free p -> do
+ Pure r -> return r
+ Free p -> do
f' <- p
yield $ T.singleton '\n'
go f'
-}
unwords
:: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r
-unwords = intercalate (yield $ T.pack " ")
+unwords = intercalate (yield $ T.singleton ' ')
{-# INLINABLE unwords #-}
{- $parse
-}
-
-decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
--- decode codec = go B.empty where
--- go extra p0 =
--- do x <- lift (next p0)
--- case x of Right (chunk, p) ->
--- do let (text, stuff) = codecDecode codec (B.append extra chunk)
--- yield text
--- case stuff of Right extra' -> go extra' p
--- Left (exc,bs) -> do yield text
--- return (do yield bs
--- p)
--- Left r -> return (do yield extra
--- return r)
-
-decode d p0 = case d of
- PE.Other txt bad -> do yield txt
- return (do yield bad
- p0)
- PE.Some txt extra dec -> do yield txt
- x <- lift (next p0)
- case x of Left r -> return (do yield extra
- return r)
- Right (chunk,p1) -> decode (dec chunk) p1
-
--- go !carry dec0 p = do
--- x <- lift (next p)
--- case x of Left r -> if B.null carry
--- then return (return r) -- all bytestrinput was consumed
--- else return (do yield carry -- a potentially valid fragment remains
--- return r)
---
--- Right (chunk, p') -> case dec0 chunk of
--- PE.Some text carry2 dec -> do yield text
--- go carry2 dec p'
--- PE.Other text bs -> do yield text
--- return (do yield bs -- an invalid blob remains
--- p')
--- {-# INLINABLE decodeUtf8 #-}