aboutsummaryrefslogtreecommitdiffhomepage
path: root/Pipes/Text.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Pipes/Text.hs')
-rw-r--r--Pipes/Text.hs269
1 files changed, 34 insertions, 235 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs
index 4b2d2b0..8221c01 100644
--- a/Pipes/Text.hs
+++ b/Pipes/Text.hs
@@ -10,7 +10,8 @@
10 example, the following program copies a document from one file to another: 10 example, the following program copies a document from one file to another:
11 11
12> import Pipes 12> import Pipes
13> import qualified Data.Text.Pipes as Text 13> import qualified Pipes.Text as Text
14> import qualified Pipes.Text.IO as Text
14> import System.IO 15> import System.IO
15> 16>
16> main = 17> main =
@@ -21,7 +22,8 @@
21To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe): 22To stream from files, the following is perhaps more Prelude-like (note that it uses Pipes.Safe):
22 23
23> import Pipes 24> import Pipes
24> import qualified Data.Text.Pipes as Text 25> import qualified Pipes.Text as Text
26> import qualified Pipes.Text.IO as Text
25> import Pipes.Safe 27> import Pipes.Safe
26> 28>
27> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt" 29> main = runSafeT $ runEffect $ Text.readFile "inFile.txt" >-> Text.writeFile "outFile.txt"
@@ -61,14 +63,14 @@ To stream from files, the following is perhaps more Prelude-like (note that it u
61module Pipes.Text ( 63module Pipes.Text (
62 -- * Producers 64 -- * Producers
63 fromLazy 65 fromLazy
64 , stdin 66 -- , stdin
65 , fromHandle 67 -- , fromHandle
66 , readFile 68 -- , readFile
67 69
68 -- * Consumers 70 -- * Consumers
69 , stdout 71 -- , stdout
70 , toHandle 72 -- , toHandle
71 , writeFile 73 -- , writeFile
72 74
73 -- * Pipes 75 -- * Pipes
74 , map 76 , map
@@ -79,7 +81,7 @@ module Pipes.Text (
79 , dropWhile 81 , dropWhile
80 , filter 82 , filter
81 , scan 83 , scan
82 , encodeUtf8 84-- , encodeUtf8
83 , pack 85 , pack
84 , unpack 86 , unpack
85 , toCaseFold 87 , toCaseFold
@@ -120,22 +122,22 @@ module Pipes.Text (
120 , word 122 , word
121 , line 123 , line
122 124
123 -- * Decoding Lenses 125 -- -- * Decoding Lenses
124 , decodeUtf8 126 -- , decodeUtf8
125 , codec 127 -- , codec
126 128 --
127 -- * Codecs 129 -- -- * Codecs
128 , utf8 130 -- , utf8
129 , utf16_le 131 -- , utf16_le
130 , utf16_be 132 -- , utf16_be
131 , utf32_le 133 -- , utf32_le
132 , utf32_be 134 -- , utf32_be
133 135 --
134 -- * Other Decoding/Encoding Functions 136 -- -- * Other Decoding/Encoding Functions
135 , decodeIso8859_1 137 -- , decodeIso8859_1
136 , decodeAscii 138 -- , decodeAscii
137 , encodeIso8859_1 139 -- , encodeIso8859_1
138 , encodeAscii 140 -- , encodeAscii
139 141
140 -- * FreeT Splitters 142 -- * FreeT Splitters
141 , chunksOf 143 , chunksOf
@@ -157,11 +159,9 @@ module Pipes.Text (
157 159
158 -- * Re-exports 160 -- * Re-exports
159 -- $reexports 161 -- $reexports
160 , Decoding(..) 162 -- , DecodeResult(..)
161 , streamDecodeUtf8 163 -- , Codec
162 , decodeSomeUtf8 164 -- , TextException(..)
163 , Codec(..)
164 , TextException(..)
165 , module Data.ByteString 165 , module Data.ByteString
166 , module Data.Text 166 , module Data.Text
167 , module Data.Profunctor 167 , module Data.Profunctor
@@ -170,7 +170,6 @@ module Pipes.Text (
170 , module Pipes.Group 170 , module Pipes.Group
171 ) where 171 ) where
172 172
173import Control.Exception (throwIO, try)
174import Control.Applicative ((<*)) 173import Control.Applicative ((<*))
175import Control.Monad (liftM, unless, join) 174import Control.Monad (liftM, unless, join)
176import Control.Monad.Trans.State.Strict (StateT(..), modify) 175import Control.Monad.Trans.State.Strict (StateT(..), modify)
@@ -193,24 +192,20 @@ import Data.Functor.Identity (Identity)
193import Data.Profunctor (Profunctor) 192import Data.Profunctor (Profunctor)
194import qualified Data.Profunctor 193import qualified Data.Profunctor
195import qualified Data.List as List 194import qualified Data.List as List
196import Foreign.C.Error (Errno(Errno), ePIPE)
197import qualified GHC.IO.Exception as G
198import Pipes 195import Pipes
199import qualified Pipes.ByteString as PB 196import qualified Pipes.ByteString as PB
200import qualified Pipes.Text.Internal as PI 197-- import Pipes.Text.Decoding
201import Pipes.Text.Internal
202import Pipes.Core (respond, Server') 198import Pipes.Core (respond, Server')
203import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) 199import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..))
204import qualified Pipes.Group as PG 200import qualified Pipes.Group as PG
205import qualified Pipes.Parse as PP 201import qualified Pipes.Parse as PP
206import Pipes.Parse (Parser) 202import Pipes.Parse (Parser)
207import qualified Pipes.Safe.Prelude as Safe 203
208import qualified Pipes.Safe as Safe
209import Pipes.Safe (MonadSafe(..), Base(..))
210import qualified Pipes.Prelude as P 204import qualified Pipes.Prelude as P
211import qualified System.IO as IO 205import qualified System.IO as IO
212import Data.Char (isSpace) 206import Data.Char (isSpace)
213import Data.Word (Word8) 207import Data.Word (Word8)
208import Data.Text.StreamDecoding
214 209
215import Prelude hiding ( 210import Prelude hiding (
216 all, 211 all,
@@ -246,78 +241,6 @@ fromLazy :: (Monad m) => TL.Text -> Producer' Text m ()
246fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) 241fromLazy = foldrChunks (\e a -> yield e >> a) (return ())
247{-# INLINE fromLazy #-} 242{-# INLINE fromLazy #-}
248 243
249-- | Stream text from 'stdin'
250stdin :: MonadIO m => Producer Text m ()
251stdin = fromHandle IO.stdin
252{-# INLINE stdin #-}
253
254{-| Convert a 'IO.Handle' into a text stream using a text size
255 determined by the good sense of the text library; note that this
256 is distinctly slower than @decideUtf8 (Pipes.ByteString.fromHandle h)@
257 but uses the system encoding and has other `Data.Text.IO` features
258-}
259
260fromHandle :: MonadIO m => IO.Handle -> Producer Text m ()
261fromHandle h = go where
262 go = do txt <- liftIO (T.hGetChunk h)
263 unless (T.null txt) ( do yield txt
264 go )
265{-# INLINABLE fromHandle#-}
266
267
268{-| Stream text from a file in the simple fashion of @Data.Text.IO@
269
270>>> runSafeT $ runEffect $ Text.readFile "hello.hs" >-> Text.map toUpper >-> hoist lift Text.stdout
271MAIN = PUTSTRLN "HELLO WORLD"
272-}
273
274readFile :: MonadSafe m => FilePath -> Producer Text m ()
275readFile file = Safe.withFile file IO.ReadMode fromHandle
276{-# INLINE readFile #-}
277
278
279{-| Stream text to 'stdout'
280
281 Unlike 'toHandle', 'stdout' gracefully terminates on a broken output pipe.
282
283 Note: For best performance, it might be best just to use @(for source (liftIO . putStr))@
284 instead of @(source >-> stdout)@ .
285-}
286stdout :: MonadIO m => Consumer' Text m ()
287stdout = go
288 where
289 go = do
290 txt <- await
291 x <- liftIO $ try (T.putStr txt)
292 case x of
293 Left (G.IOError { G.ioe_type = G.ResourceVanished
294 , G.ioe_errno = Just ioe })
295 | Errno ioe == ePIPE
296 -> return ()
297 Left e -> liftIO (throwIO e)
298 Right () -> go
299{-# INLINABLE stdout #-}
300
301
302{-| Convert a text stream into a 'Handle'
303
304 Note: again, for best performance, where possible use
305 @(for source (liftIO . hPutStr handle))@ instead of @(source >-> toHandle handle)@.
306-}
307toHandle :: MonadIO m => IO.Handle -> Consumer' Text m r
308toHandle h = for cat (liftIO . T.hPutStr h)
309{-# INLINABLE toHandle #-}
310
311{-# RULES "p >-> toHandle h" forall p h .
312 p >-> toHandle h = for p (\txt -> liftIO (T.hPutStr h txt))
313 #-}
314
315
316-- | Stream text into a file. Uses @pipes-safe@.
317writeFile :: (MonadSafe m) => FilePath -> Consumer' Text m ()
318writeFile file = Safe.withFile file IO.WriteMode toHandle
319{-# INLINE writeFile #-}
320
321 244
322type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) 245type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
323 246
@@ -690,28 +613,6 @@ isEndOfChars = do
690{-# INLINABLE isEndOfChars #-} 613{-# INLINABLE isEndOfChars #-}
691 614
692 615
693{- | An improper lens into a stream of 'ByteString' expected to be UTF-8 encoded; the associated
694 stream of Text ends by returning a stream of ByteStrings beginning at the point of failure.
695 -}
696
697decodeUtf8 :: Monad m => Lens' (Producer ByteString m r)
698 (Producer Text m (Producer ByteString m r))
699decodeUtf8 k p0 = fmap (\p -> join (for p (yield . TE.encodeUtf8)))
700 (k (go B.empty PI.streamDecodeUtf8 p0)) where
701 go !carry dec0 p = do
702 x <- lift (next p)
703 case x of Left r -> return (if B.null carry
704 then return r -- all bytestring input was consumed
705 else (do yield carry -- a potentially valid fragment remains
706 return r))
707
708 Right (chunk, p') -> case dec0 chunk of
709 PI.Some text carry2 dec -> do yield text
710 go carry2 dec p'
711 PI.Other text bs -> do yield text
712 return (do yield bs -- an invalid blob remains
713 p')
714{-# INLINABLE decodeUtf8 #-}
715 616
716 617
717-- | Splits a 'Producer' after the given number of characters 618-- | Splits a 'Producer' after the given number of characters
@@ -1057,106 +958,4 @@ unwords = intercalate (yield $ T.singleton ' ')
1057 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. 958 @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym.
1058-} 959-}
1059 960
1060{- | Use a 'Codec' as a pipes-style 'Lens' into a byte stream; the available 'Codec' s are 961
1061 'utf8', 'utf16_le', 'utf16_be', 'utf32_le', 'utf32_be' . The 'Codec' concept and the
1062 individual 'Codec' definitions follow the enumerator and conduit libraries.
1063
1064 Utf8 is handled differently in this library -- without the use of 'unsafePerformIO' &co
1065 to catch 'Text' exceptions; but the same 'mypipe ^. codec utf8' interface can be used.
1066 'mypipe ^. decodeUtf8' should be the same, but has a somewhat more direct and thus perhaps
1067 better implementation.
1068
1069 -}
1070codec :: Monad m => Codec -> Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
1071codec (Codec _ enc dec) k p0 = fmap (\p -> join (for p (yield . fst . enc)))
1072 (k (decoder (dec B.empty) p0) ) where
1073 decoder :: Monad m => PI.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1074 decoder !d p0 = case d of
1075 PI.Other txt bad -> do yield txt
1076 return (do yield bad
1077 p0)
1078 PI.Some txt extra dec -> do yield txt
1079 x <- lift (next p0)
1080 case x of Left r -> return (do yield extra
1081 return r)
1082 Right (chunk,p1) -> decoder (dec chunk) p1
1083
1084{- | ascii and latin encodings only represent a small fragment of 'Text'; thus we cannot
1085 use the pipes 'Lens' style to work with them. Rather we simply define functions
1086 each way.
1087
1088 'encodeAscii' : Reduce as much of your stream of 'Text' actually is ascii to a byte stream,
1089 returning the rest of the 'Text' at the first non-ascii 'Char'
1090-}
1091encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1092encodeAscii = go where
1093 go p = do echunk <- lift (next p)
1094 case echunk of
1095 Left r -> return (return r)
1096 Right (chunk, p') ->
1097 if T.null chunk
1098 then go p'
1099 else let (safe, unsafe) = T.span (\c -> ord c <= 0x7F) chunk
1100 in do yield (B8.pack (T.unpack safe))
1101 if T.null unsafe
1102 then go p'
1103 else return $ do yield unsafe
1104 p'
1105{- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
1106 returning the rest of the 'Text' upon hitting any non-latin 'Char'
1107 -}
1108encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
1109encodeIso8859_1 = go where
1110 go p = do etxt <- lift (next p)
1111 case etxt of
1112 Left r -> return (return r)
1113 Right (txt, p') ->
1114 if T.null txt
1115 then go p'
1116 else let (safe, unsafe) = T.span (\c -> ord c <= 0xFF) txt
1117 in do yield (B8.pack (T.unpack safe))
1118 if T.null unsafe
1119 then go p'
1120 else return $ do yield unsafe
1121 p'
1122
1123{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1124 unused 'ByteString' upon hitting an un-ascii byte.
1125 -}
1126decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1127decodeAscii = go where
1128 go p = do echunk <- lift (next p)
1129 case echunk of
1130 Left r -> return (return r)
1131 Right (chunk, p') ->
1132 if B.null chunk
1133 then go p'
1134 else let (safe, unsafe) = B.span (<= 0x7F) chunk
1135 in do yield (T.pack (B8.unpack safe))
1136 if B.null unsafe
1137 then go p'
1138 else return $ do yield unsafe
1139 p'
1140
1141{- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
1142 unused 'ByteString' upon hitting the rare un-latinizable byte.
1143 -}
1144decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
1145decodeIso8859_1 = go where
1146 go p = do echunk <- lift (next p)
1147 case echunk of
1148 Left r -> return (return r)
1149 Right (chunk, p') ->
1150 if B.null chunk
1151 then go p'
1152 else let (safe, unsafe) = B.span (<= 0xFF) chunk
1153 in do yield (T.pack (B8.unpack safe))
1154 if B.null unsafe
1155 then go p'
1156 else return $ do yield unsafe
1157 p'
1158
1159
1160
1161
1162