aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2014-01-14 22:11:25 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2014-01-14 22:11:25 -0500
commitca6f90a05bee6471d6837d629ddaee9b0a75bd50 (patch)
tree228d107d94ce2810667a4641ecf566227410147d
parent8853a440e37523bae8cb46827d0d2d356bad5c46 (diff)
downloadtext-pipes-ca6f90a05bee6471d6837d629ddaee9b0a75bd50.tar.gz
text-pipes-ca6f90a05bee6471d6837d629ddaee9b0a75bd50.tar.zst
text-pipes-ca6f90a05bee6471d6837d629ddaee9b0a75bd50.zip
nugatory
-rw-r--r--Pipes/Text.hs148
1 files changed, 42 insertions, 106 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index a5859a3..cf493e9 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -1,4 +1,4 @@
1{-# LANGUAGE RankNTypes, TypeFamilies, CPP #-} 1{-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-}
2 2
3{-| This module provides @pipes@ utilities for \"text streams\", which are 3{-| This module provides @pipes@ utilities for \"text streams\", which are
4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but 4 streams of 'Text' chunks. The individual chunks are uniformly @strict@, but
@@ -81,10 +81,6 @@ module Pipes.Text (
81 filter, 81 filter,
82 scan, 82 scan,
83 encodeUtf8, 83 encodeUtf8,
84#if MIN_VERSION_text(0,11,4)
85 pipeDecodeUtf8,
86 pipeDecodeUtf8With,
87#endif
88 pack, 84 pack,
89 unpack, 85 unpack,
90 toCaseFold, 86 toCaseFold,
@@ -119,10 +115,7 @@ module Pipes.Text (
119 group, 115 group,
120 lines, 116 lines,
121 words, 117 words,
122#if MIN_VERSION_text(0,11,4)
123 decodeUtf8, 118 decodeUtf8,
124 decodeUtf8With,
125#endif
126 -- * Transformations 119 -- * Transformations
127 intersperse, 120 intersperse,
128 121
@@ -148,6 +141,7 @@ module Pipes.Text (
148import Control.Exception (throwIO, try) 141import Control.Exception (throwIO, try)
149import Control.Monad (liftM, unless) 142import Control.Monad (liftM, unless)
150import Control.Monad.Trans.State.Strict (StateT(..)) 143import Control.Monad.Trans.State.Strict (StateT(..))
144import Data.Monoid ((<>))
151import qualified Data.Text as T 145import qualified Data.Text as T
152import qualified Data.Text.IO as T 146import qualified Data.Text.IO as T
153import qualified Data.Text.Encoding as TE 147import qualified Data.Text.Encoding as TE
@@ -167,6 +161,7 @@ import qualified GHC.IO.Exception as G
167import Pipes 161import Pipes
168import qualified Pipes.ByteString as PB 162import qualified Pipes.ByteString as PB
169import qualified Pipes.ByteString.Parse as PBP 163import qualified Pipes.ByteString.Parse as PBP
164import qualified Pipes.Text.Internal as PE
170import Pipes.Text.Parse ( 165import Pipes.Text.Parse (
171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) 166 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
172import Pipes.Core (respond, Server') 167import Pipes.Core (respond, Server')
@@ -211,48 +206,36 @@ import Prelude hiding (
211-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's 206-- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's
212fromLazy :: (Monad m) => TL.Text -> Producer' Text m () 207fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
213fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) 208fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
214{-# INLINABLE fromLazy #-} 209{-# INLINE fromLazy #-}
215 210
216-- | Stream text from 'stdin' 211-- | Stream text from 'stdin'
217stdin :: MonadIO m => Producer' Text m () 212stdin :: MonadIO m => Producer Text m ()
218stdin = fromHandle IO.stdin 213stdin = fromHandle IO.stdin
219{-# INLINABLE stdin #-} 214{-# INLINE stdin #-}
220 215
221{-| Convert a 'IO.Handle' into a text stream using a text size 216{-| Convert a 'IO.Handle' into a text stream using a text size
222 determined by the good sense of the text library. 217 determined by the good sense of the text library; note that this
223 218 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
219 but uses the system encoding and has other `Data.Text.IO` features
224-} 220-}
225 221
226fromHandle :: MonadIO m => IO.Handle -> Producer' Text m () 222fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
227#if MIN_VERSION_text(0,11,4) 223fromHandle h = go where
228fromHandle h = go TE.streamDecodeUtf8 where 224 go = do txt <- liftIO (T.hGetChunk h)
229 act = B.hGetSome h defaultChunkSize 225 unless (T.null txt) $ do yield txt
230 go dec = do chunk <- liftIO act 226 go
231 case dec chunk of
232 TE.Some text _ dec' -> do yield text
233 unless (B.null chunk) (go dec')
234{-# INLINE fromHandle#-}
235-- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as
236-- the dedicated Text IO function 'hGetChunk' ;
237-- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut"
238-- runs the same as the conduit equivalent, only slightly slower
239-- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut"
240#else
241fromHandle h = go where
242 go = do txt <- liftIO (T.hGetChunk h)
243 unless (T.null txt) $ do yield txt
244 go
245{-# INLINABLE fromHandle#-} 227{-# INLINABLE fromHandle#-}
246#endif 228
247{-| Stream text from a file using Pipes.Safe 229
230{-| Stream text from a file in the simple fashion of @Data.Text.IO@
248 231
249>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout 232>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
250MAIN = PUTSTRLN "HELLO WORLD" 233MAIN = PUTSTRLN "HELLO WORLD"
251-} 234-}
252 235
253readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m () 236readFile :: MonadSafe m => FilePath -> Producer Text m ()
254readFile file = Safe.withFile file IO.ReadMode fromHandle 237readFile file = Safe.withFile file IO.ReadMode fromHandle
255{-# INLINABLE readFile #-} 238{-# INLINE readFile #-}
256 239
257{-| Stream lines of text from stdin (for testing in ghci etc.) 240{-| Stream lines of text from stdin (for testing in ghci etc.)
258 241
@@ -272,7 +255,7 @@ stdinLn = go where
272 txt <- liftIO (T.hGetLine IO.stdin) 255 txt <- liftIO (T.hGetLine IO.stdin)
273 yield txt 256 yield txt
274 go 257 go
275 258{-# INLINABLE stdinLn #-}
276 259
277{-| Stream text to 'stdout' 260{-| Stream text to 'stdout'
278 261
@@ -326,8 +309,9 @@ toHandle h = for cat (liftIO . T.hPutStr h)
326 309
327 310
328-- | Stream text into a file. Uses @pipes-safe@. 311-- | Stream text into a file. Uses @pipes-safe@.
329writeFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Consumer' Text m () 312writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
330writeFile file = Safe.withFile file IO.WriteMode toHandle 313writeFile file = Safe.withFile file IO.WriteMode toHandle
314{-# INLINE writeFile #-}
331 315
332-- | Apply a transformation to each 'Char' in the stream 316-- | Apply a transformation to each 'Char' in the stream
333map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r 317map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r
@@ -610,74 +594,26 @@ count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
610count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) 594count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
611{-# INLINABLE count #-} 595{-# INLINABLE count #-}
612 596
613#if MIN_VERSION_text(0,11,4) 597-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded into a Pipe of Text
614-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded 598-- returning a Pipe of ByteStrings that begins at the point of failure.
615-- into a Pipe of Text 599
616decodeUtf8 600decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
617 :: Monad m 601decodeUtf8 = go B.empty PE.streamDecodeUtf8 where
618 => Producer ByteString m r -> Producer Text m (Producer ByteString m r) 602 go !carry dec0 p = do
619decodeUtf8 = go TE.streamDecodeUtf8 603 x <- lift (next p)
620 where go dec p = do 604 case x of Left r -> if B.null carry
621 x <- lift (next p) 605 then return (return r) -- all bytestrinput was consumed
622 case x of 606 else return (do yield carry -- a potentially valid fragment remains
623 Left r -> return (return r) 607 return r)
624 Right (chunk, p') -> do 608
625 let TE.Some text l dec' = dec chunk 609 Right (chunk, p') -> case dec0 chunk of
626 if B.null l 610 PE.Some text carry2 dec -> do yield text
627 then do 611 go carry2 dec p'
628 yield text 612 PE.Other text bs -> do yield text
629 go dec' p' 613 return (do yield bs -- an invalid blob remains
630 else return $ do 614 p')
631 yield l 615{-# INLINABLE decodeUtf8 #-}
632 p' 616
633{-# INLINEABLE decodeUtf8 #-}
634
635-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
636-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
637-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
638decodeUtf8With
639 :: Monad m
640 => TE.OnDecodeError
641 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
642decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
643 where go dec p = do
644 x <- lift (next p)
645 case x of
646 Left r -> return (return r)
647 Right (chunk, p') -> do
648 let TE.Some text l dec' = dec chunk
649 if B.null l
650 then do
651 yield text
652 go dec' p'
653 else return $ do
654 yield l
655 p'
656{-# INLINEABLE decodeUtf8With #-}
657
658-- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise
659-- with any chunk that contains a sequence of bytes that is unreadable. Otherwise
660-- only few bytes will only be moved from one chunk to the next before decoding.
661pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r
662pipeDecodeUtf8 = go TE.streamDecodeUtf8
663 where go dec = do chunk <- await
664 case dec chunk of
665 TE.Some text l dec' -> do yield text
666 go dec'
667{-# INLINEABLE pipeDecodeUtf8 #-}
668
669-- | A simple pipe from 'ByteString' to 'Text' using a replacement function.
670pipeDecodeUtf8With
671 :: Monad m
672 => TE.OnDecodeError
673 -> Pipe ByteString Text m r
674pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
675 where go dec = do chunk <- await
676 case dec chunk of
677 TE.Some text l dec' -> do yield text
678 go dec'
679{-# INLINEABLE pipeDecodeUtf8With #-}
680#endif
681 617
682-- | Splits a 'Producer' after the given number of characters 618-- | Splits a 'Producer' after the given number of characters
683splitAt 619splitAt