aboutsummaryrefslogtreecommitdiffhomepage
path: root/Pipes/Text.hs
diff options
context:
space:
mode:
authormichaelt <what_is_it_to_do_anything@yahoo.com>2013-12-23 13:02:49 -0500
committermichaelt <what_is_it_to_do_anything@yahoo.com>2013-12-23 13:02:49 -0500
commit8c48280926efffc0ca52a5d9ca796d639d053379 (patch)
tree972ca8955b5581d634663424e973e56fa4487fe5 /Pipes/Text.hs
parent8853a440e37523bae8cb46827d0d2d356bad5c46 (diff)
downloadtext-pipes-8c48280926efffc0ca52a5d9ca796d639d053379.tar.gz
text-pipes-8c48280926efffc0ca52a5d9ca796d639d053379.tar.zst
text-pipes-8c48280926efffc0ca52a5d9ca796d639d053379.zip
variant using text internals in place of text streamDecodeUtf8
Diffstat (limited to 'Pipes/Text.hs')
-rw-r--r--Pipes/Text.hs136
1 files changed, 59 insertions, 77 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index a5859a3..6845dd3 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -81,10 +81,6 @@ module Pipes.Text (
81 filter, 81 filter,
82 scan, 82 scan,
83 encodeUtf8, 83 encodeUtf8,
84#if MIN_VERSION_text(0,11,4)
85 pipeDecodeUtf8,
86 pipeDecodeUtf8With,
87#endif
88 pack, 84 pack,
89 unpack, 85 unpack,
90 toCaseFold, 86 toCaseFold,
@@ -119,10 +115,8 @@ module Pipes.Text (
119 group, 115 group,
120 lines, 116 lines,
121 words, 117 words,
122#if MIN_VERSION_text(0,11,4)
123 decodeUtf8, 118 decodeUtf8,
124 decodeUtf8With, 119 decodeUtf8With,
125#endif
126 -- * Transformations 120 -- * Transformations
127 intersperse, 121 intersperse,
128 122
@@ -167,6 +161,7 @@ import qualified GHC.IO.Exception as G
167import Pipes 161import Pipes
168import qualified Pipes.ByteString as PB 162import qualified Pipes.ByteString as PB
169import qualified Pipes.ByteString.Parse as PBP 163import qualified Pipes.ByteString.Parse as PBP
164import qualified Pipes.Text.Internal as PE
170import Pipes.Text.Parse ( 165import Pipes.Text.Parse (
171 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) 166 nextChar, drawChar, unDrawChar, peekChar, isEndOfChars )
172import Pipes.Core (respond, Server') 167import Pipes.Core (respond, Server')
@@ -214,43 +209,60 @@ fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
214{-# INLINABLE fromLazy #-} 209{-# INLINABLE fromLazy #-}
215 210
216-- | Stream text from 'stdin' 211-- | Stream text from 'stdin'
217stdin :: MonadIO m => Producer' Text m () 212stdin :: MonadIO m => Producer' Text m (Producer ByteString m ())
218stdin = fromHandle IO.stdin 213stdin = fromHandle IO.stdin
219{-# INLINABLE stdin #-} 214{-# INLINABLE stdin #-}
220 215
221{-| Convert a 'IO.Handle' into a text stream using a text size 216{-| Convert a 'IO.Handle' into a text stream using a text size
222 determined by the good sense of the text library. 217 determined by the good sense of the text library.
223
224-} 218-}
225 219
226fromHandle :: MonadIO m => IO.Handle -> Producer' Text m () 220fromHandle :: MonadIO m => IO.Handle -> Producer' Text m (Producer ByteString m ())
227#if MIN_VERSION_text(0,11,4) 221-- TODO: this should perhaps just be `decodeUtf8 (PB.fromHandle h)`
228fromHandle h = go TE.streamDecodeUtf8 where 222-- if only so that mistakes can be concentrated in one place.
223-- This modifies something that was faster on an earlier iteration.
224-- Note also that the `text` replacement system is being ignored;
225-- with a replacement scheme one could have `Producer Text m ()`
226-- the relation to the replacement business needs to be thought out.
227-- The complicated type seems overmuch for the toy stdin above
228fromHandle h = go PE.streamDecodeUtf8 B.empty where
229 act = B.hGetSome h defaultChunkSize 229 act = B.hGetSome h defaultChunkSize
230 go dec = do chunk <- liftIO act 230 go dec old = do chunk <- liftIO act
231 case dec chunk of 231 if B.null chunk
232 TE.Some text _ dec' -> do yield text 232 then if B.null old then return (return ())
233 unless (B.null chunk) (go dec') 233 else return (yield old >> return ())
234 else case dec chunk of
235 PE.Some text bs dec' ->
236 if T.null text then go dec' (B.append old bs)
237 else do yield text
238 go dec' B.empty
239 PE.Other text bs ->
240 if T.null text then return (do yield old
241 yield bs
242 PB.fromHandle h)
243 else do yield text
244 return (do yield bs
245 PB.fromHandle h)
234{-# INLINE fromHandle#-} 246{-# INLINE fromHandle#-}
235-- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as 247-- bytestring fromHandle + streamDecodeUtf8 is 3 times as fast as
236-- the dedicated Text IO function 'hGetChunk' ; 248-- the dedicated Text IO function 'hGetChunk' ;
237-- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut" 249-- this way "runEffect $ PT.fromHandle hIn >-> PT.toHandle hOut"
238-- runs the same as the conduit equivalent, only slightly slower 250-- runs the same as the conduit equivalent, only slightly slower
239-- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut" 251-- than "runEffect $ PB.fromHandle hIn >-> PB.toHandle hOut"
240#else 252-- #else
241fromHandle h = go where 253-- fromHandle h = go where
242 go = do txt <- liftIO (T.hGetChunk h) 254-- go = do txt <- liftIO (T.hGetChunk h)
243 unless (T.null txt) $ do yield txt 255-- unless (T.null txt) $ do yield txt
244 go 256-- go
245{-# INLINABLE fromHandle#-} 257-- {-# INLINABLE fromHandle#-}
246#endif 258-- #endif
247{-| Stream text from a file using Pipes.Safe 259{-| Stream text from a file using Pipes.Safe
248 260
249>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout 261>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
250MAIN = PUTSTRLN "HELLO WORLD" 262MAIN = PUTSTRLN "HELLO WORLD"
251-} 263-}
252 264
253readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m () 265readFile :: (MonadSafe m, Base m ~ IO) => FilePath -> Producer' Text m (Producer ByteString m ())
254readFile file = Safe.withFile file IO.ReadMode fromHandle 266readFile file = Safe.withFile file IO.ReadMode fromHandle
255{-# INLINABLE readFile #-} 267{-# INLINABLE readFile #-}
256 268
@@ -610,74 +622,44 @@ count :: (Monad m, Num n) => Text -> Producer Text m () -> m n
610count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) 622count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c))
611{-# INLINABLE count #-} 623{-# INLINABLE count #-}
612 624
613#if MIN_VERSION_text(0,11,4)
614-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded 625-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
615-- into a Pipe of Text 626-- into a Pipe of Text
616decodeUtf8 627decodeUtf8
617 :: Monad m 628 :: Monad m
618 => Producer ByteString m r -> Producer Text m (Producer ByteString m r) 629 => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
619decodeUtf8 = go TE.streamDecodeUtf8 630decodeUtf8 = decodeUtf8With Nothing
620 where go dec p = do
621 x <- lift (next p)
622 case x of
623 Left r -> return (return r)
624 Right (chunk, p') -> do
625 let TE.Some text l dec' = dec chunk
626 if B.null l
627 then do
628 yield text
629 go dec' p'
630 else return $ do
631 yield l
632 p'
633{-# INLINEABLE decodeUtf8 #-} 631{-# INLINEABLE decodeUtf8 #-}
634 632
635-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded 633-- | Transform a Pipe of 'ByteString's expected to be UTF-8 encoded
636-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@ 634-- into a Pipe of Text with a replacement function of type @String -> Maybe Word8 -> Maybe Char@
637-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\" 635-- E.g. 'Data.Text.Encoding.Error.lenientDecode', which simply replaces bad bytes with \"�\"
638decodeUtf8With 636decodeUtf8With
639 :: Monad m 637 :: Monad m
640 => TE.OnDecodeError 638 => Maybe TE.OnDecodeError
641 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) 639 -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
642decodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr) 640decodeUtf8With onErr = go (PE.streamDecodeUtf8With onErr) B.empty where
643 where go dec p = do 641 go dec old p = do
644 x <- lift (next p) 642 x <- lift (next p)
645 case x of 643 case x of
646 Left r -> return (return r) 644 Left r -> if B.null old then return (return r)
647 Right (chunk, p') -> do 645 else return (do yield old
648 let TE.Some text l dec' = dec chunk 646 return r)
649 if B.null l 647 Right (chunk, p') ->
650 then do 648 case dec chunk of
651 yield text 649 PE.Some text l dec' ->
652 go dec' p' 650 if T.null text then go dec' (B.append old l) p'
653 else return $ do 651 else do yield text
654 yield l 652 go dec' B.empty p'
655 p' 653 PE.Other text bs ->
654 if T.null text then return (do yield old
655 yield bs
656 p')
657 else do yield text
658 return (do yield bs
659 p')
656{-# INLINEABLE decodeUtf8With #-} 660{-# INLINEABLE decodeUtf8With #-}
657 661
658-- | A simple pipe from 'ByteString' to 'Text'; a decoding error will arise 662
659-- with any chunk that contains a sequence of bytes that is unreadable. Otherwise
660-- only few bytes will only be moved from one chunk to the next before decoding.
661pipeDecodeUtf8 :: Monad m => Pipe ByteString Text m r
662pipeDecodeUtf8 = go TE.streamDecodeUtf8
663 where go dec = do chunk <- await
664 case dec chunk of
665 TE.Some text l dec' -> do yield text
666 go dec'
667{-# INLINEABLE pipeDecodeUtf8 #-}
668
669-- | A simple pipe from 'ByteString' to 'Text' using a replacement function.
670pipeDecodeUtf8With
671 :: Monad m
672 => TE.OnDecodeError
673 -> Pipe ByteString Text m r
674pipeDecodeUtf8With onErr = go (TE.streamDecodeUtf8With onErr)
675 where go dec = do chunk <- await
676 case dec chunk of
677 TE.Some text l dec' -> do yield text
678 go dec'
679{-# INLINEABLE pipeDecodeUtf8With #-}
680#endif
681 663
682-- | Splits a 'Producer' after the given number of characters 664-- | Splits a 'Producer' after the given number of characters
683splitAt 665splitAt