diff options
Diffstat (limited to 'Pipes/Text.hs')
-rw-r--r-- | Pipes/Text.hs | 74 |
1 files changed, 60 insertions, 14 deletions
diff --git a/Pipes/Text.hs b/Pipes/Text.hs index cf493e9..99e4ed6 100644 --- a/Pipes/Text.hs +++ b/Pipes/Text.hs | |||
@@ -1,9 +1,12 @@ | |||
1 | {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-} | 1 | {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns #-} |
2 | 2 | #if __GLASGOW_HASKELL__ >= 702 | |
3 | {-# LANGUAGE Trustworthy #-} | ||
4 | #endif | ||
3 | {-| This module provides @pipes@ utilities for \"text streams\", which are | 5 | {-| This module provides @pipes@ utilities for \"text streams\", which are |
4 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but | 6 | streams of 'Text' chunks. The individual chunks are uniformly @strict@, but |
5 | a 'Producer' can be converted to and from lazy 'Text's; an 'IO.Handle' can | 7 | a 'Producer' can be converted to and from lazy 'Text's, though this is generally |
6 | be associated with a 'Producer' or 'Consumer' according as it is read or written to. | 8 | unwise. Where pipes IO replaces lazy IO, 'Producer Text m r' replaces lazy 'Text'. |
9 | An 'IO.Handle' can be associated with a 'Producer' or 'Consumer' according as it is read or written to. | ||
7 | 10 | ||
8 | To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For | 11 | To stream to or from 'IO.Handle's, one can use 'fromHandle' or 'toHandle'. For |
9 | example, the following program copies a document from one file to another: | 12 | example, the following program copies a document from one file to another: |
@@ -52,9 +55,9 @@ To stream from files, the following is perhaps more Prelude-like (note that it u | |||
52 | 55 | ||
53 | Note that functions in this library are designed to operate on streams that | 56 | Note that functions in this library are designed to operate on streams that |
54 | are insensitive to text boundaries. This means that they may freely split | 57 | are insensitive to text boundaries. This means that they may freely split |
55 | text into smaller texts and /discard empty texts/. However, they will | 58 | text into smaller texts, /discard empty texts/. However, apart from the |
56 | /never concatenate texts/ in order to provide strict upper bounds on memory | 59 | special case of 'concatMap', they will /never concatenate texts/ in order |
57 | usage. | 60 | to provide strict upper bounds on memory usage -- with the single exception of 'concatMap'. |
58 | -} | 61 | -} |
59 | 62 | ||
60 | module Pipes.Text ( | 63 | module Pipes.Text ( |
@@ -91,7 +94,7 @@ module Pipes.Text ( | |||
91 | -- * Folds | 94 | -- * Folds |
92 | toLazy, | 95 | toLazy, |
93 | toLazyM, | 96 | toLazyM, |
94 | fold, | 97 | foldChars, |
95 | head, | 98 | head, |
96 | last, | 99 | last, |
97 | null, | 100 | null, |
@@ -116,6 +119,7 @@ module Pipes.Text ( | |||
116 | lines, | 119 | lines, |
117 | words, | 120 | words, |
118 | decodeUtf8, | 121 | decodeUtf8, |
122 | decode, | ||
119 | -- * Transformations | 123 | -- * Transformations |
120 | intersperse, | 124 | intersperse, |
121 | 125 | ||
@@ -139,7 +143,7 @@ module Pipes.Text ( | |||
139 | ) where | 143 | ) where |
140 | 144 | ||
141 | import Control.Exception (throwIO, try) | 145 | import Control.Exception (throwIO, try) |
142 | import Control.Monad (liftM, unless) | 146 | import Control.Monad (liftM, unless, join) |
143 | import Control.Monad.Trans.State.Strict (StateT(..)) | 147 | import Control.Monad.Trans.State.Strict (StateT(..)) |
144 | import Data.Monoid ((<>)) | 148 | import Data.Monoid ((<>)) |
145 | import qualified Data.Text as T | 149 | import qualified Data.Text as T |
@@ -160,13 +164,14 @@ import Foreign.C.Error (Errno(Errno), ePIPE) | |||
160 | import qualified GHC.IO.Exception as G | 164 | import qualified GHC.IO.Exception as G |
161 | import Pipes | 165 | import Pipes |
162 | import qualified Pipes.ByteString as PB | 166 | import qualified Pipes.ByteString as PB |
163 | import qualified Pipes.ByteString.Parse as PBP | 167 | import qualified Pipes.ByteString as PBP |
164 | import qualified Pipes.Text.Internal as PE | 168 | import qualified Pipes.Text.Internal as PE |
169 | import Pipes.Text.Internal (Codec(..)) | ||
165 | import Pipes.Text.Parse ( | 170 | import Pipes.Text.Parse ( |
166 | nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) | 171 | nextChar, drawChar, unDrawChar, peekChar, isEndOfChars ) |
167 | import Pipes.Core (respond, Server') | 172 | import Pipes.Core (respond, Server') |
168 | import qualified Pipes.Parse as PP | 173 | import qualified Pipes.Parse as PP |
169 | import Pipes.Parse (input, concat, FreeT) | 174 | import Pipes.Parse ( FreeT) |
170 | import qualified Pipes.Safe.Prelude as Safe | 175 | import qualified Pipes.Safe.Prelude as Safe |
171 | import qualified Pipes.Safe as Safe | 176 | import qualified Pipes.Safe as Safe |
172 | import Pipes.Safe (MonadSafe(..), Base(..)) | 177 | import Pipes.Safe (MonadSafe(..), Base(..)) |
@@ -499,10 +504,10 @@ toLazyM = liftM TL.fromChunks . P.toListM | |||
499 | {-# INLINABLE toLazyM #-} | 504 | {-# INLINABLE toLazyM #-} |
500 | 505 | ||
501 | -- | Reduce the text stream using a strict left fold over characters | 506 | -- | Reduce the text stream using a strict left fold over characters |
502 | fold | 507 | foldChars |
503 | :: Monad m | 508 | :: Monad m |
504 | => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r | 509 | => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r |
505 | fold step begin done = P.fold (T.foldl' step) begin done | 510 | foldChars step begin done = P.fold (T.foldl' step) begin done |
506 | {-# INLINABLE fold #-} | 511 | {-# INLINABLE fold #-} |
507 | 512 | ||
508 | -- | Retrieve the first 'Char' | 513 | -- | Retrieve the first 'Char' |
@@ -879,4 +884,45 @@ unwords = intercalate (yield $ T.pack " ") | |||
879 | @Data.Text@ re-exports the 'Text' type. | 884 | @Data.Text@ re-exports the 'Text' type. |
880 | 885 | ||
881 | @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type). | 886 | @Pipes.Parse@ re-exports 'input', 'concat', and 'FreeT' (the type). |
882 | -} \ No newline at end of file | 887 | -} |
888 | |||
889 | |||
890 | |||
891 | decode :: Monad m => PE.Decoding -> Producer ByteString m r -> Producer Text m (Producer ByteString m r) | ||
892 | -- decode codec = go B.empty where | ||
893 | -- go extra p0 = | ||
894 | -- do x <- lift (next p0) | ||
895 | -- case x of Right (chunk, p) -> | ||
896 | -- do let (text, stuff) = codecDecode codec (B.append extra chunk) | ||
897 | -- yield text | ||
898 | -- case stuff of Right extra' -> go extra' p | ||
899 | -- Left (exc,bs) -> do yield text | ||
900 | -- return (do yield bs | ||
901 | -- p) | ||
902 | -- Left r -> return (do yield extra | ||
903 | -- return r) | ||
904 | |||
905 | decode d p0 = case d of | ||
906 | PE.Other txt bad -> do yield txt | ||
907 | return (do yield bad | ||
908 | p0) | ||
909 | PE.Some txt extra dec -> do yield txt | ||
910 | x <- lift (next p0) | ||
911 | case x of Left r -> return (do yield extra | ||
912 | return r) | ||
913 | Right (chunk,p1) -> decode (dec chunk) p1 | ||
914 | |||
915 | -- go !carry dec0 p = do | ||
916 | -- x <- lift (next p) | ||
917 | -- case x of Left r -> if B.null carry | ||
918 | -- then return (return r) -- all bytestrinput was consumed | ||
919 | -- else return (do yield carry -- a potentially valid fragment remains | ||
920 | -- return r) | ||
921 | -- | ||
922 | -- Right (chunk, p') -> case dec0 chunk of | ||
923 | -- PE.Some text carry2 dec -> do yield text | ||
924 | -- go carry2 dec p' | ||
925 | -- PE.Other text bs -> do yield text | ||
926 | -- return (do yield bs -- an invalid blob remains | ||
927 | -- p') | ||
928 | -- {-# INLINABLE decodeUtf8 #-} | ||