]>
Commit | Line | Data |
---|---|---|
9667f797 GG |
1 | {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-} |
2 | ||
955edd33 | 3 | {-| The module @Pipes.Text@ closely follows @Pipes.ByteString@ from |
4 | the @pipes-bytestring@ package. A draft tutorial can be found in | |
5 | @Pipes.Text.Tutorial@. | |
6 | -} | |
2f4a83f8 | 7 | |
955edd33 | 8 | module Pipes.Text ( |
91727d11 | 9 | -- * Producers |
1a83ae4e | 10 | fromLazy |
91727d11 | 11 | |
12 | -- * Pipes | |
1677dc12 | 13 | , map |
14 | , concatMap | |
15 | , take | |
1677dc12 | 16 | , takeWhile |
1677dc12 | 17 | , filter |
1677dc12 | 18 | , toCaseFold |
19 | , toLower | |
20 | , toUpper | |
21 | , stripStart | |
2f4a83f8 | 22 | , scan |
91727d11 | 23 | |
24 | -- * Folds | |
1677dc12 | 25 | , toLazy |
26 | , toLazyM | |
27 | , foldChars | |
28 | , head | |
29 | , last | |
30 | , null | |
31 | , length | |
32 | , any | |
33 | , all | |
34 | , maximum | |
35 | , minimum | |
36 | , find | |
37 | , index | |
1677dc12 | 38 | |
39 | -- * Primitive Character Parsers | |
1677dc12 | 40 | , nextChar |
41 | , drawChar | |
42 | , unDrawChar | |
43 | , peekChar | |
9e9bb0ce | 44 | , isEndOfChars |
1677dc12 | 45 | |
2f4a83f8 | 46 | -- * Parsing Lenses |
9e9bb0ce | 47 | , splitAt |
1677dc12 | 48 | , span |
49 | , break | |
50 | , groupBy | |
51 | , group | |
9e9bb0ce | 52 | , word |
53 | , line | |
1677dc12 | 54 | |
2f4a83f8 | 55 | -- * Transforming Text and Character Streams |
56 | , drop | |
57 | , dropWhile | |
58 | , pack | |
59 | , unpack | |
60 | , intersperse | |
61 | ||
62 | -- * FreeT Transformations | |
1677dc12 | 63 | , chunksOf |
64 | , splitsWith | |
0f8c6f1b | 65 | , splits |
1a83ae4e | 66 | , groupsBy |
67 | , groups | |
1677dc12 | 68 | , lines |
1677dc12 | 69 | , unlines |
2f4a83f8 | 70 | , words |
1677dc12 | 71 | , unwords |
2f4a83f8 | 72 | , intercalate |
9e9bb0ce | 73 | |
1a83ae4e | 74 | -- * Re-exports |
91727d11 | 75 | -- $reexports |
1677dc12 | 76 | , module Data.ByteString |
77 | , module Data.Text | |
1677dc12 | 78 | , module Pipes.Parse |
7ed76745 | 79 | , module Pipes.Group |
91727d11 | 80 | ) where |
81 | ||
2f4a83f8 | 82 | import Control.Applicative ((<*)) |
70125641 | 83 | import Control.Monad (liftM, join) |
9e9bb0ce | 84 | import Control.Monad.Trans.State.Strict (StateT(..), modify) |
91727d11 | 85 | import qualified Data.Text as T |
91727d11 | 86 | import Data.Text (Text) |
87 | import qualified Data.Text.Lazy as TL | |
31f41a5d | 88 | import Data.ByteString (ByteString) |
1677dc12 | 89 | import Data.Functor.Constant (Constant(Constant, getConstant)) |
91727d11 | 90 | import Data.Functor.Identity (Identity) |
2f4a83f8 | 91 | |
91727d11 | 92 | import Pipes |
2f4a83f8 | 93 | import Pipes.Group (folds, maps, concats, intercalates, FreeT(..), FreeF(..)) |
7ed76745 | 94 | import qualified Pipes.Group as PG |
91727d11 | 95 | import qualified Pipes.Parse as PP |
7ed76745 | 96 | import Pipes.Parse (Parser) |
91727d11 | 97 | import qualified Pipes.Prelude as P |
91727d11 | 98 | import Data.Char (isSpace) |
1a83ae4e | 99 | import Data.Word (Word8) |
79917d53 | 100 | import Foreign.Storable (sizeOf) |
101 | import Data.Bits (shiftL) | |
91727d11 | 102 | import Prelude hiding ( |
103 | all, | |
104 | any, | |
105 | break, | |
106 | concat, | |
107 | concatMap, | |
108 | drop, | |
109 | dropWhile, | |
110 | elem, | |
111 | filter, | |
112 | head, | |
113 | last, | |
114 | lines, | |
115 | length, | |
116 | map, | |
117 | maximum, | |
118 | minimum, | |
119 | notElem, | |
120 | null, | |
121 | readFile, | |
122 | span, | |
123 | splitAt, | |
124 | take, | |
125 | takeWhile, | |
126 | unlines, | |
127 | unwords, | |
128 | words, | |
129 | writeFile ) | |
130 | ||
e20590eb | 131 | -- $setup |
132 | -- >>> :set -XOverloadedStrings | |
133 | -- >>> import Data.Text (Text) | |
134 | -- >>> import qualified Data.Text as T | |
135 | -- >>> import qualified Data.Text.Lazy.IO as TL | |
136 | -- >>> import Data.Char | |
137 | ||
138 | -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's. Producers in | |
139 | -- IO can be found in 'Pipes.Text.IO' or in pipes-bytestring, employed with the | |
140 | -- decoding lenses in 'Pipes.Text.Encoding' | |
91727d11 | 141 | fromLazy :: (Monad m) => TL.Text -> Producer' Text m () |
2f4a83f8 | 142 | fromLazy = TL.foldrChunks (\e a -> yield e >> a) (return ()) |
ca6f90a0 | 143 | {-# INLINE fromLazy #-} |
91727d11 | 144 | |
d199072b | 145 | (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b |
146 | a ^. lens = getConstant (lens Constant a) | |
147 | ||
91727d11 | 148 | -- | Apply a transformation to each 'Char' in the stream |
e20590eb | 149 | |
150 | -- >>> let margaret = ["Margaret, are you grieving\nOver Golde","ngrove unleaving?":: Text] | |
b28660f6 | 151 | -- >>> TL.putStrLn . toLazy $ each margaret >-> map Data.Char.toUpper |
e20590eb | 152 | -- MARGARET, ARE YOU GRIEVING |
153 | -- OVER GOLDENGROVE UNLEAVING? | |
91727d11 | 154 | map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r |
155 | map f = P.map (T.map f) | |
156 | {-# INLINABLE map #-} | |
157 | ||
31f41a5d | 158 | -- | Map a function over the characters of a text stream and concatenate the results |
e20590eb | 159 | |
91727d11 | 160 | concatMap |
161 | :: (Monad m) => (Char -> Text) -> Pipe Text Text m r | |
162 | concatMap f = P.map (T.concatMap f) | |
163 | {-# INLINABLE concatMap #-} | |
164 | ||
2f4a83f8 | 165 | -- | @(take n)@ only allows @n@ individual characters to pass; |
31f41a5d | 166 | -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass. |
91727d11 | 167 | take :: (Monad m, Integral a) => a -> Pipe Text Text m () |
168 | take n0 = go n0 where | |
169 | go n | |
170 | | n <= 0 = return () | |
b28660f6 | 171 | | otherwise = do |
31f41a5d | 172 | txt <- await |
173 | let len = fromIntegral (T.length txt) | |
91727d11 | 174 | if (len > n) |
31f41a5d | 175 | then yield (T.take (fromIntegral n) txt) |
91727d11 | 176 | else do |
31f41a5d | 177 | yield txt |
91727d11 | 178 | go (n - len) |
179 | {-# INLINABLE take #-} | |
180 | ||
31f41a5d | 181 | -- | Take characters until they fail the predicate |
91727d11 | 182 | takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m () |
183 | takeWhile predicate = go | |
184 | where | |
185 | go = do | |
31f41a5d | 186 | txt <- await |
187 | let (prefix, suffix) = T.span predicate txt | |
91727d11 | 188 | if (T.null suffix) |
189 | then do | |
31f41a5d | 190 | yield txt |
91727d11 | 191 | go |
192 | else yield prefix | |
193 | {-# INLINABLE takeWhile #-} | |
194 | ||
91727d11 | 195 | -- | Only allows 'Char's to pass if they satisfy the predicate |
196 | filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r | |
197 | filter predicate = P.map (T.filter predicate) | |
198 | {-# INLINABLE filter #-} | |
199 | ||
31f41a5d | 200 | -- | Strict left scan over the characters |
e20590eb | 201 | -- >>> let margaret = ["Margaret, are you grieving\nOver Golde","ngrove unleaving?":: Text] |
202 | -- >>> let title_caser a x = case a of ' ' -> Data.Char.toUpper x; _ -> x | |
203 | -- >>> toLazy $ each margaret >-> scan title_caser ' ' | |
204 | -- " Margaret, Are You Grieving\nOver Goldengrove Unleaving?" | |
205 | ||
91727d11 | 206 | scan |
207 | :: (Monad m) | |
208 | => (Char -> Char -> Char) -> Char -> Pipe Text Text m r | |
11645cdc GG |
209 | scan step begin = do |
210 | yield (T.singleton begin) | |
211 | go begin | |
91727d11 | 212 | where |
31f41a5d | 213 | go c = do |
214 | txt <- await | |
215 | let txt' = T.scanl step c txt | |
216 | c' = T.last txt' | |
11645cdc | 217 | yield (T.tail txt') |
31f41a5d | 218 | go c' |
91727d11 | 219 | {-# INLINABLE scan #-} |
220 | ||
2f4a83f8 | 221 | -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities, |
222 | -- here acting as 'Text' pipes, rather as they would on a lazy text | |
223 | toCaseFold :: Monad m => Pipe Text Text m r | |
224 | toCaseFold = P.map T.toCaseFold | |
225 | {-# INLINEABLE toCaseFold #-} | |
226 | ||
227 | -- | lowercase incoming 'Text' | |
228 | toLower :: Monad m => Pipe Text Text m r | |
229 | toLower = P.map T.toLower | |
230 | {-# INLINEABLE toLower #-} | |
231 | ||
232 | -- | uppercase incoming 'Text' | |
233 | toUpper :: Monad m => Pipe Text Text m r | |
234 | toUpper = P.map T.toUpper | |
235 | {-# INLINEABLE toUpper #-} | |
236 | ||
237 | -- | Remove leading white space from an incoming succession of 'Text's | |
238 | stripStart :: Monad m => Pipe Text Text m r | |
239 | stripStart = do | |
240 | chunk <- await | |
241 | let text = T.stripStart chunk | |
242 | if T.null text | |
243 | then stripStart | |
244 | else do yield text | |
245 | cat | |
246 | {-# INLINEABLE stripStart #-} | |
247 | ||
91727d11 | 248 | {-| Fold a pure 'Producer' of strict 'Text's into a lazy |
249 | 'TL.Text' | |
250 | -} | |
251 | toLazy :: Producer Text Identity () -> TL.Text | |
252 | toLazy = TL.fromChunks . P.toList | |
253 | {-# INLINABLE toLazy #-} | |
254 | ||
255 | {-| Fold an effectful 'Producer' of strict 'Text's into a lazy | |
256 | 'TL.Text' | |
257 | ||
258 | Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for | |
259 | simple testing purposes. Idiomatic @pipes@ style consumes the chunks | |
260 | immediately as they are generated instead of loading them all into memory. | |
261 | -} | |
262 | toLazyM :: (Monad m) => Producer Text m () -> m TL.Text | |
263 | toLazyM = liftM TL.fromChunks . P.toListM | |
264 | {-# INLINABLE toLazyM #-} | |
265 | ||
31f41a5d | 266 | -- | Reduce the text stream using a strict left fold over characters |
64e03122 | 267 | foldChars |
91727d11 | 268 | :: Monad m |
269 | => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r | |
64e03122 | 270 | foldChars step begin done = P.fold (T.foldl' step) begin done |
1677dc12 | 271 | {-# INLINABLE foldChars #-} |
91727d11 | 272 | |
2f4a83f8 | 273 | |
91727d11 | 274 | -- | Retrieve the first 'Char' |
275 | head :: (Monad m) => Producer Text m () -> m (Maybe Char) | |
276 | head = go | |
277 | where | |
278 | go p = do | |
279 | x <- nextChar p | |
280 | case x of | |
281 | Left _ -> return Nothing | |
31f41a5d | 282 | Right (c, _) -> return (Just c) |
91727d11 | 283 | {-# INLINABLE head #-} |
284 | ||
285 | -- | Retrieve the last 'Char' | |
286 | last :: (Monad m) => Producer Text m () -> m (Maybe Char) | |
287 | last = go Nothing | |
288 | where | |
289 | go r p = do | |
290 | x <- next p | |
291 | case x of | |
292 | Left () -> return r | |
31f41a5d | 293 | Right (txt, p') -> |
294 | if (T.null txt) | |
91727d11 | 295 | then go r p' |
31f41a5d | 296 | else go (Just $ T.last txt) p' |
91727d11 | 297 | {-# INLINABLE last #-} |
298 | ||
299 | -- | Determine if the stream is empty | |
300 | null :: (Monad m) => Producer Text m () -> m Bool | |
301 | null = P.all T.null | |
302 | {-# INLINABLE null #-} | |
303 | ||
62e8521c | 304 | -- | Count the number of characters in the stream |
91727d11 | 305 | length :: (Monad m, Num n) => Producer Text m () -> m n |
31f41a5d | 306 | length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id |
91727d11 | 307 | {-# INLINABLE length #-} |
308 | ||
309 | -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate | |
310 | any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | |
311 | any predicate = P.any (T.any predicate) | |
312 | {-# INLINABLE any #-} | |
313 | ||
314 | -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate | |
315 | all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | |
316 | all predicate = P.all (T.all predicate) | |
317 | {-# INLINABLE all #-} | |
318 | ||
62e8521c | 319 | -- | Return the maximum 'Char' within a text stream |
91727d11 | 320 | maximum :: (Monad m) => Producer Text m () -> m (Maybe Char) |
321 | maximum = P.fold step Nothing id | |
322 | where | |
31f41a5d | 323 | step mc txt = |
324 | if (T.null txt) | |
325 | then mc | |
326 | else Just $ case mc of | |
327 | Nothing -> T.maximum txt | |
328 | Just c -> max c (T.maximum txt) | |
91727d11 | 329 | {-# INLINABLE maximum #-} |
330 | ||
62e8521c | 331 | -- | Return the minimum 'Char' within a text stream (surely very useful!) |
91727d11 | 332 | minimum :: (Monad m) => Producer Text m () -> m (Maybe Char) |
333 | minimum = P.fold step Nothing id | |
334 | where | |
31f41a5d | 335 | step mc txt = |
336 | if (T.null txt) | |
337 | then mc | |
338 | else case mc of | |
339 | Nothing -> Just (T.minimum txt) | |
340 | Just c -> Just (min c (T.minimum txt)) | |
91727d11 | 341 | {-# INLINABLE minimum #-} |
342 | ||
91727d11 | 343 | -- | Find the first element in the stream that matches the predicate |
344 | find | |
345 | :: (Monad m) | |
346 | => (Char -> Bool) -> Producer Text m () -> m (Maybe Char) | |
347 | find predicate p = head (p >-> filter predicate) | |
348 | {-# INLINABLE find #-} | |
349 | ||
62e8521c | 350 | -- | Index into a text stream |
91727d11 | 351 | index |
352 | :: (Monad m, Integral a) | |
353 | => a-> Producer Text m () -> m (Maybe Char) | |
2f4a83f8 | 354 | index n p = head (drop n p) |
91727d11 | 355 | {-# INLINABLE index #-} |
356 | ||
63ea9ffd | 357 | |
9e9bb0ce | 358 | |
1a83ae4e | 359 | -- | Consume the first character from a stream of 'Text' |
2f4a83f8 | 360 | -- |
1a83ae4e | 361 | -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or |
362 | -- succeeds with a 'Right' providing the next character and the remainder of the | |
363 | -- 'Producer'. | |
9e9bb0ce | 364 | |
9e9bb0ce | 365 | nextChar |
366 | :: (Monad m) | |
367 | => Producer Text m r | |
368 | -> m (Either r (Char, Producer Text m r)) | |
369 | nextChar = go | |
370 | where | |
371 | go p = do | |
372 | x <- next p | |
373 | case x of | |
374 | Left r -> return (Left r) | |
375 | Right (txt, p') -> case (T.uncons txt) of | |
376 | Nothing -> go p' | |
377 | Just (c, txt') -> return (Right (c, yield txt' >> p')) | |
378 | {-# INLINABLE nextChar #-} | |
379 | ||
1a83ae4e | 380 | -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty |
381 | ||
9e9bb0ce | 382 | drawChar :: (Monad m) => Parser Text m (Maybe Char) |
383 | drawChar = do | |
384 | x <- PP.draw | |
385 | case x of | |
386 | Nothing -> return Nothing | |
387 | Just txt -> case (T.uncons txt) of | |
388 | Nothing -> drawChar | |
389 | Just (c, txt') -> do | |
390 | PP.unDraw txt' | |
391 | return (Just c) | |
392 | {-# INLINABLE drawChar #-} | |
393 | ||
394 | -- | Push back a 'Char' onto the underlying 'Producer' | |
395 | unDrawChar :: (Monad m) => Char -> Parser Text m () | |
396 | unDrawChar c = modify (yield (T.singleton c) >>) | |
397 | {-# INLINABLE unDrawChar #-} | |
398 | ||
399 | {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to | |
400 | push the 'Char' back | |
401 | ||
402 | > peekChar = do | |
403 | > x <- drawChar | |
404 | > case x of | |
405 | > Left _ -> return () | |
406 | > Right c -> unDrawChar c | |
407 | > return x | |
1a83ae4e | 408 | |
9e9bb0ce | 409 | -} |
1a83ae4e | 410 | |
9e9bb0ce | 411 | peekChar :: (Monad m) => Parser Text m (Maybe Char) |
412 | peekChar = do | |
413 | x <- drawChar | |
414 | case x of | |
415 | Nothing -> return () | |
416 | Just c -> unDrawChar c | |
417 | return x | |
418 | {-# INLINABLE peekChar #-} | |
419 | ||
420 | {-| Check if the underlying 'Producer' has no more characters | |
421 | ||
422 | Note that this will skip over empty 'Text' chunks, unlike | |
423 | 'PP.isEndOfInput' from @pipes-parse@, which would consider | |
424 | an empty 'Text' a valid bit of input. | |
425 | ||
426 | > isEndOfChars = liftM isLeft peekChar | |
427 | -} | |
428 | isEndOfChars :: (Monad m) => Parser Text m Bool | |
429 | isEndOfChars = do | |
430 | x <- peekChar | |
431 | return (case x of | |
432 | Nothing -> True | |
433 | Just _-> False ) | |
434 | {-# INLINABLE isEndOfChars #-} | |
435 | ||
31f41a5d | 436 | -- | Splits a 'Producer' after the given number of characters |
91727d11 | 437 | splitAt |
438 | :: (Monad m, Integral n) | |
439 | => n | |
57454c33 | 440 | -> Lens' (Producer Text m r) |
d199072b | 441 | (Producer Text m (Producer Text m r)) |
9e9bb0ce | 442 | splitAt n0 k p0 = fmap join (k (go n0 p0)) |
91727d11 | 443 | where |
444 | go 0 p = return p | |
445 | go n p = do | |
446 | x <- lift (next p) | |
447 | case x of | |
448 | Left r -> return (return r) | |
31f41a5d | 449 | Right (txt, p') -> do |
450 | let len = fromIntegral (T.length txt) | |
91727d11 | 451 | if (len <= n) |
452 | then do | |
31f41a5d | 453 | yield txt |
91727d11 | 454 | go (n - len) p' |
455 | else do | |
31f41a5d | 456 | let (prefix, suffix) = T.splitAt (fromIntegral n) txt |
91727d11 | 457 | yield prefix |
458 | return (yield suffix >> p') | |
459 | {-# INLINABLE splitAt #-} | |
460 | ||
91727d11 | 461 | |
1a83ae4e | 462 | -- | Split a text stream in two, producing the longest |
463 | -- consecutive group of characters that satisfies the predicate | |
464 | -- and returning the rest | |
465 | ||
91727d11 | 466 | span |
467 | :: (Monad m) | |
468 | => (Char -> Bool) | |
57454c33 | 469 | -> Lens' (Producer Text m r) |
d199072b | 470 | (Producer Text m (Producer Text m r)) |
9e9bb0ce | 471 | span predicate k p0 = fmap join (k (go p0)) |
91727d11 | 472 | where |
473 | go p = do | |
474 | x <- lift (next p) | |
475 | case x of | |
476 | Left r -> return (return r) | |
31f41a5d | 477 | Right (txt, p') -> do |
478 | let (prefix, suffix) = T.span predicate txt | |
91727d11 | 479 | if (T.null suffix) |
480 | then do | |
31f41a5d | 481 | yield txt |
91727d11 | 482 | go p' |
483 | else do | |
484 | yield prefix | |
485 | return (yield suffix >> p') | |
486 | {-# INLINABLE span #-} | |
487 | ||
1a83ae4e | 488 | {-| Split a text stream in two, producing the longest |
62e8521c | 489 | consecutive group of characters that don't satisfy the predicate |
91727d11 | 490 | -} |
491 | break | |
492 | :: (Monad m) | |
493 | => (Char -> Bool) | |
57454c33 | 494 | -> Lens' (Producer Text m r) |
d199072b | 495 | (Producer Text m (Producer Text m r)) |
91727d11 | 496 | break predicate = span (not . predicate) |
497 | {-# INLINABLE break #-} | |
498 | ||
9e9bb0ce | 499 | {-| Improper lens that splits after the first group of equivalent Chars, as |
500 | defined by the given equivalence relation | |
501 | -} | |
502 | groupBy | |
503 | :: (Monad m) | |
504 | => (Char -> Char -> Bool) | |
57454c33 | 505 | -> Lens' (Producer Text m r) |
d199072b | 506 | (Producer Text m (Producer Text m r)) |
9e9bb0ce | 507 | groupBy equals k p0 = fmap join (k ((go p0))) where |
508 | go p = do | |
509 | x <- lift (next p) | |
510 | case x of | |
511 | Left r -> return (return r) | |
512 | Right (txt, p') -> case T.uncons txt of | |
513 | Nothing -> go p' | |
2f4a83f8 | 514 | Just (c, _) -> (yield txt >> p') ^. span (equals c) |
9e9bb0ce | 515 | {-# INLINABLE groupBy #-} |
516 | ||
517 | -- | Improper lens that splits after the first succession of identical 'Char' s | |
2f4a83f8 | 518 | group :: Monad m |
57454c33 | 519 | => Lens' (Producer Text m r) |
9e9bb0ce | 520 | (Producer Text m (Producer Text m r)) |
521 | group = groupBy (==) | |
522 | {-# INLINABLE group #-} | |
523 | ||
524 | {-| Improper lens that splits a 'Producer' after the first word | |
525 | ||
2f4a83f8 | 526 | Unlike 'words', this does not drop leading whitespace |
9e9bb0ce | 527 | -} |
2f4a83f8 | 528 | word :: (Monad m) |
57454c33 | 529 | => Lens' (Producer Text m r) |
d199072b | 530 | (Producer Text m (Producer Text m r)) |
9e9bb0ce | 531 | word k p0 = fmap join (k (to p0)) |
532 | where | |
533 | to p = do | |
534 | p' <- p^.span isSpace | |
535 | p'^.break isSpace | |
536 | {-# INLINABLE word #-} | |
537 | ||
2f4a83f8 | 538 | line :: (Monad m) |
57454c33 | 539 | => Lens' (Producer Text m r) |
d199072b | 540 | (Producer Text m (Producer Text m r)) |
9e9bb0ce | 541 | line = break (== '\n') |
9e9bb0ce | 542 | {-# INLINABLE line #-} |
543 | ||
2f4a83f8 | 544 | -- | @(drop n)@ drops the first @n@ characters |
545 | drop :: (Monad m, Integral n) | |
546 | => n -> Producer Text m r -> Producer Text m r | |
547 | drop n p = do | |
548 | p' <- lift $ runEffect (for (p ^. splitAt n) discard) | |
549 | p' | |
550 | {-# INLINABLE drop #-} | |
551 | ||
552 | -- | Drop characters until they fail the predicate | |
553 | dropWhile :: (Monad m) | |
554 | => (Char -> Bool) -> Producer Text m r -> Producer Text m r | |
555 | dropWhile predicate p = do | |
556 | p' <- lift $ runEffect (for (p ^. span predicate) discard) | |
557 | p' | |
558 | {-# INLINABLE dropWhile #-} | |
9e9bb0ce | 559 | |
560 | -- | Intersperse a 'Char' in between the characters of stream of 'Text' | |
561 | intersperse | |
562 | :: (Monad m) => Char -> Producer Text m r -> Producer Text m r | |
563 | intersperse c = go0 | |
564 | where | |
565 | go0 p = do | |
566 | x <- lift (next p) | |
567 | case x of | |
568 | Left r -> return r | |
569 | Right (txt, p') -> do | |
570 | yield (T.intersperse c txt) | |
571 | go1 p' | |
572 | go1 p = do | |
573 | x <- lift (next p) | |
574 | case x of | |
575 | Left r -> return r | |
576 | Right (txt, p') -> do | |
577 | yield (T.singleton c) | |
578 | yield (T.intersperse c txt) | |
579 | go1 p' | |
580 | {-# INLINABLE intersperse #-} | |
581 | ||
582 | ||
2f4a83f8 | 583 | -- | Improper lens from unpacked 'Word8's to packaged 'ByteString's |
584 | pack :: Monad m => Lens' (Producer Char m r) (Producer Text m r) | |
585 | pack k p = fmap _unpack (k (_pack p)) | |
586 | {-# INLINABLE pack #-} | |
587 | ||
588 | -- | Improper lens from packed 'ByteString's to unpacked 'Word8's | |
589 | unpack :: Monad m => Lens' (Producer Text m r) (Producer Char m r) | |
590 | unpack k p = fmap _pack (k (_unpack p)) | |
591 | {-# INLINABLE unpack #-} | |
9e9bb0ce | 592 | |
2f4a83f8 | 593 | _pack :: Monad m => Producer Char m r -> Producer Text m r |
594 | _pack p = folds step id done (p^.PG.chunksOf defaultChunkSize) | |
595 | where | |
596 | step diffAs w8 = diffAs . (w8:) | |
9e9bb0ce | 597 | |
598 | done diffAs = T.pack (diffAs []) | |
2f4a83f8 | 599 | {-# INLINABLE _pack #-} |
9e9bb0ce | 600 | |
2f4a83f8 | 601 | _unpack :: Monad m => Producer Text m r -> Producer Char m r |
602 | _unpack p = for p (each . T.unpack) | |
603 | {-# INLINABLE _unpack #-} | |
9e9bb0ce | 604 | |
79917d53 | 605 | defaultChunkSize :: Int |
606 | defaultChunkSize = 16384 - (sizeOf (undefined :: Int) `shiftL` 1) | |
0f8c6f1b | 607 | |
2f4a83f8 | 608 | |
0f8c6f1b | 609 | -- | Split a text stream into 'FreeT'-delimited text streams of fixed size |
610 | chunksOf | |
611 | :: (Monad m, Integral n) | |
2f4a83f8 | 612 | => n -> Lens' (Producer Text m r) |
d199072b | 613 | (FreeT (Producer Text m) m r) |
0f8c6f1b | 614 | chunksOf n k p0 = fmap concats (k (FreeT (go p0))) |
615 | where | |
616 | go p = do | |
617 | x <- next p | |
618 | return $ case x of | |
7ed76745 | 619 | Left r -> Pure r |
620 | Right (txt, p') -> Free $ do | |
2f4a83f8 | 621 | p'' <- (yield txt >> p') ^. splitAt n |
7ed76745 | 622 | return $ FreeT (go p'') |
0f8c6f1b | 623 | {-# INLINABLE chunksOf #-} |
624 | ||
625 | ||
62e8521c | 626 | {-| Split a text stream into sub-streams delimited by characters that satisfy the |
91727d11 | 627 | predicate |
628 | -} | |
1677dc12 | 629 | splitsWith |
91727d11 | 630 | :: (Monad m) |
631 | => (Char -> Bool) | |
2f4a83f8 | 632 | -> Producer Text m r -> FreeT (Producer Text m) m r |
7ed76745 | 633 | splitsWith predicate p0 = FreeT (go0 p0) |
91727d11 | 634 | where |
635 | go0 p = do | |
636 | x <- next p | |
637 | case x of | |
7ed76745 | 638 | Left r -> return (Pure r) |
31f41a5d | 639 | Right (txt, p') -> |
640 | if (T.null txt) | |
91727d11 | 641 | then go0 p' |
7ed76745 | 642 | else return $ Free $ do |
9e9bb0ce | 643 | p'' <- (yield txt >> p') ^. span (not . predicate) |
7ed76745 | 644 | return $ FreeT (go1 p'') |
91727d11 | 645 | go1 p = do |
646 | x <- nextChar p | |
647 | return $ case x of | |
7ed76745 | 648 | Left r -> Pure r |
649 | Right (_, p') -> Free $ do | |
2f4a83f8 | 650 | p'' <- p' ^. span (not . predicate) |
7ed76745 | 651 | return $ FreeT (go1 p'') |
1677dc12 | 652 | {-# INLINABLE splitsWith #-} |
91727d11 | 653 | |
31f41a5d | 654 | -- | Split a text stream using the given 'Char' as the delimiter |
0f8c6f1b | 655 | splits :: (Monad m) |
d199072b | 656 | => Char |
57454c33 | 657 | -> Lens' (Producer Text m r) |
d199072b | 658 | (FreeT (Producer Text m) m r) |
0f8c6f1b | 659 | splits c k p = |
2f4a83f8 | 660 | fmap (intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p)) |
0f8c6f1b | 661 | {-# INLINABLE splits #-} |
662 | ||
663 | {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the | |
664 | given equivalence relation | |
665 | -} | |
666 | groupsBy | |
667 | :: Monad m | |
668 | => (Char -> Char -> Bool) | |
57454c33 | 669 | -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x) |
2f4a83f8 | 670 | groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where |
0f8c6f1b | 671 | go p = do x <- next p |
7ed76745 | 672 | case x of Left r -> return (Pure r) |
0f8c6f1b | 673 | Right (bs, p') -> case T.uncons bs of |
674 | Nothing -> go p' | |
7ed76745 | 675 | Just (c, _) -> do return $ Free $ do |
0f8c6f1b | 676 | p'' <- (yield bs >> p')^.span (equals c) |
7ed76745 | 677 | return $ FreeT (go p'') |
0f8c6f1b | 678 | {-# INLINABLE groupsBy #-} |
679 | ||
680 | ||
681 | -- | Like 'groupsBy', where the equality predicate is ('==') | |
682 | groups | |
683 | :: Monad m | |
57454c33 | 684 | => Lens' (Producer Text m x) (FreeT (Producer Text m) m x) |
0f8c6f1b | 685 | groups = groupsBy (==) |
686 | {-# INLINABLE groups #-} | |
687 | ||
91727d11 | 688 | |
91727d11 | 689 | |
62e8521c | 690 | {-| Split a text stream into 'FreeT'-delimited lines |
91727d11 | 691 | -} |
692 | lines | |
2f4a83f8 | 693 | :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r) |
694 | lines k p = fmap _unlines (k (_lines p)) | |
695 | {-# INLINABLE lines #-} | |
696 | ||
697 | unlines | |
698 | :: Monad m | |
699 | => Lens' (FreeT (Producer Text m) m r) (Producer Text m r) | |
700 | unlines k p = fmap _lines (k (_unlines p)) | |
701 | {-# INLINABLE unlines #-} | |
702 | ||
703 | _lines :: Monad m | |
704 | => Producer Text m r -> FreeT (Producer Text m) m r | |
705 | _lines p0 = FreeT (go0 p0) | |
0f8c6f1b | 706 | where |
707 | go0 p = do | |
708 | x <- next p | |
709 | case x of | |
7ed76745 | 710 | Left r -> return (Pure r) |
0f8c6f1b | 711 | Right (txt, p') -> |
712 | if (T.null txt) | |
713 | then go0 p' | |
7ed76745 | 714 | else return $ Free $ go1 (yield txt >> p') |
0f8c6f1b | 715 | go1 p = do |
716 | p' <- p ^. break ('\n' ==) | |
7ed76745 | 717 | return $ FreeT $ do |
0f8c6f1b | 718 | x <- nextChar p' |
719 | case x of | |
7ed76745 | 720 | Left r -> return $ Pure r |
0f8c6f1b | 721 | Right (_, p'') -> go0 p'' |
2f4a83f8 | 722 | {-# INLINABLE _lines #-} |
0f8c6f1b | 723 | |
2f4a83f8 | 724 | _unlines :: Monad m |
725 | => FreeT (Producer Text m) m r -> Producer Text m r | |
726 | _unlines = concats . maps (<* yield (T.singleton '\n')) | |
727 | {-# INLINABLE _unlines #-} | |
91727d11 | 728 | |
2f4a83f8 | 729 | -- | Split a text stream into 'FreeT'-delimited words. Note that |
730 | -- roundtripping with e.g. @over words id@ eliminates extra space | |
731 | -- characters as with @Prelude.unwords . Prelude.words@ | |
91727d11 | 732 | words |
2f4a83f8 | 733 | :: (Monad m) => Lens' (Producer Text m r) (FreeT (Producer Text m) m r) |
734 | words k p = fmap _unwords (k (_words p)) | |
735 | {-# INLINABLE words #-} | |
736 | ||
737 | unwords | |
738 | :: Monad m | |
739 | => Lens' (FreeT (Producer Text m) m r) (Producer Text m r) | |
740 | unwords k p = fmap _words (k (_unwords p)) | |
741 | {-# INLINABLE unwords #-} | |
742 | ||
743 | _words :: (Monad m) => Producer Text m r -> FreeT (Producer Text m) m r | |
744 | _words p = FreeT $ do | |
745 | x <- next (dropWhile isSpace p) | |
cf10d6f1 | 746 | return $ case x of |
7ed76745 | 747 | Left r -> Pure r |
748 | Right (bs, p') -> Free $ do | |
9e9bb0ce | 749 | p'' <- (yield bs >> p') ^. break isSpace |
2f4a83f8 | 750 | return (_words p'') |
751 | {-# INLINABLE _words #-} | |
752 | ||
753 | _unwords :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | |
754 | _unwords = intercalates (yield $ T.singleton ' ') | |
755 | {-# INLINABLE _unwords #-} | |
91727d11 | 756 | |
cf10d6f1 | 757 | |
31f41a5d | 758 | {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after |
759 | interspersing a text stream in between them | |
91727d11 | 760 | -} |
761 | intercalate | |
762 | :: (Monad m) | |
2f4a83f8 | 763 | => Producer Text m () -> FreeT (Producer Text m) m r -> Producer Text m r |
91727d11 | 764 | intercalate p0 = go0 |
765 | where | |
766 | go0 f = do | |
7ed76745 | 767 | x <- lift (runFreeT f) |
91727d11 | 768 | case x of |
7ed76745 | 769 | Pure r -> return r |
770 | Free p -> do | |
91727d11 | 771 | f' <- p |
772 | go1 f' | |
773 | go1 f = do | |
7ed76745 | 774 | x <- lift (runFreeT f) |
91727d11 | 775 | case x of |
7ed76745 | 776 | Pure r -> return r |
777 | Free p -> do | |
91727d11 | 778 | p0 |
779 | f' <- p | |
780 | go1 f' | |
781 | {-# INLINABLE intercalate #-} | |
782 | ||
91727d11 | 783 | |
91727d11 | 784 | |
91727d11 | 785 | {- $reexports |
2f4a83f8 | 786 | |
91727d11 | 787 | @Data.Text@ re-exports the 'Text' type. |
788 | ||
2f4a83f8 | 789 | @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. |
64e03122 | 790 | -} |
791 | ||
bbdfd305 | 792 | |
57454c33 | 793 | type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) |