]>
Commit | Line | Data |
---|---|---|
9667f797 GG |
1 | {-# LANGUAGE RankNTypes, TypeFamilies, BangPatterns, Trustworthy #-} |
2 | ||
91727d11 | 3 | |
7faef8bc | 4 | module Pipes.Text ( |
e4b6dc67 | 5 | -- * Introduction |
6 | -- $intro | |
7 | ||
91727d11 | 8 | -- * Producers |
1a83ae4e | 9 | fromLazy |
91727d11 | 10 | |
11 | -- * Pipes | |
1677dc12 | 12 | , map |
13 | , concatMap | |
14 | , take | |
15 | , drop | |
16 | , takeWhile | |
17 | , dropWhile | |
18 | , filter | |
19 | , scan | |
1677dc12 | 20 | , pack |
21 | , unpack | |
22 | , toCaseFold | |
23 | , toLower | |
24 | , toUpper | |
25 | , stripStart | |
91727d11 | 26 | |
27 | -- * Folds | |
1677dc12 | 28 | , toLazy |
29 | , toLazyM | |
30 | , foldChars | |
31 | , head | |
32 | , last | |
33 | , null | |
34 | , length | |
35 | , any | |
36 | , all | |
37 | , maximum | |
38 | , minimum | |
39 | , find | |
40 | , index | |
41 | , count | |
42 | ||
43 | -- * Primitive Character Parsers | |
1677dc12 | 44 | , nextChar |
45 | , drawChar | |
46 | , unDrawChar | |
47 | , peekChar | |
9e9bb0ce | 48 | , isEndOfChars |
1677dc12 | 49 | |
50 | -- * Parsing Lenses | |
9e9bb0ce | 51 | , splitAt |
1677dc12 | 52 | , span |
53 | , break | |
54 | , groupBy | |
55 | , group | |
9e9bb0ce | 56 | , word |
57 | , line | |
1677dc12 | 58 | |
59 | -- * FreeT Splitters | |
60 | , chunksOf | |
61 | , splitsWith | |
0f8c6f1b | 62 | , splits |
1a83ae4e | 63 | , groupsBy |
64 | , groups | |
1677dc12 | 65 | , lines |
66 | , words | |
67 | ||
91727d11 | 68 | -- * Transformations |
1677dc12 | 69 | , intersperse |
9e9bb0ce | 70 | , packChars |
31f41a5d | 71 | |
91727d11 | 72 | -- * Joiners |
1677dc12 | 73 | , intercalate |
74 | , unlines | |
75 | , unwords | |
9e9bb0ce | 76 | |
1a83ae4e | 77 | -- * Re-exports |
91727d11 | 78 | -- $reexports |
1677dc12 | 79 | , module Data.ByteString |
80 | , module Data.Text | |
81 | , module Data.Profunctor | |
1677dc12 | 82 | , module Pipes.Parse |
7ed76745 | 83 | , module Pipes.Group |
91727d11 | 84 | ) where |
85 | ||
0f8c6f1b | 86 | import Control.Applicative ((<*)) |
70125641 | 87 | import Control.Monad (liftM, join) |
9e9bb0ce | 88 | import Control.Monad.Trans.State.Strict (StateT(..), modify) |
91727d11 | 89 | import qualified Data.Text as T |
91727d11 | 90 | import Data.Text (Text) |
91 | import qualified Data.Text.Lazy as TL | |
91727d11 | 92 | import Data.Text.Lazy.Internal (foldrChunks, defaultChunkSize) |
31f41a5d | 93 | import Data.ByteString (ByteString) |
1677dc12 | 94 | import Data.Functor.Constant (Constant(Constant, getConstant)) |
91727d11 | 95 | import Data.Functor.Identity (Identity) |
1677dc12 | 96 | import Data.Profunctor (Profunctor) |
97 | import qualified Data.Profunctor | |
91727d11 | 98 | import Pipes |
7fc48f7c | 99 | import Pipes.Group (concats, intercalates, FreeT(..), FreeF(..)) |
7ed76745 | 100 | import qualified Pipes.Group as PG |
91727d11 | 101 | import qualified Pipes.Parse as PP |
7ed76745 | 102 | import Pipes.Parse (Parser) |
91727d11 | 103 | import qualified Pipes.Prelude as P |
91727d11 | 104 | import Data.Char (isSpace) |
1a83ae4e | 105 | import Data.Word (Word8) |
1677dc12 | 106 | |
91727d11 | 107 | import Prelude hiding ( |
108 | all, | |
109 | any, | |
110 | break, | |
111 | concat, | |
112 | concatMap, | |
113 | drop, | |
114 | dropWhile, | |
115 | elem, | |
116 | filter, | |
117 | head, | |
118 | last, | |
119 | lines, | |
120 | length, | |
121 | map, | |
122 | maximum, | |
123 | minimum, | |
124 | notElem, | |
125 | null, | |
126 | readFile, | |
127 | span, | |
128 | splitAt, | |
129 | take, | |
130 | takeWhile, | |
131 | unlines, | |
132 | unwords, | |
133 | words, | |
134 | writeFile ) | |
135 | ||
e4b6dc67 | 136 | {- $intro |
137 | ||
138 | * /Effectful Text/ | |
139 | ||
140 | This package provides @pipes@ utilities for /text streams/, understood as | |
141 | streams of 'Text' chunks. The individual chunks are uniformly /strict/, and thus you | |
142 | will generally want @Data.Text@ in scope. But the type @Producer Text m r@ as we | |
143 | are using it is a sort of pipes equivalent of the lazy @Text@ type. | |
144 | ||
145 | This particular module provides many functions equivalent in one way or another to | |
146 | the pure functions in | |
147 | <https://hackage.haskell.org/package/text-1.1.0.0/docs/Data-Text-Lazy.html Data.Text.Lazy>. | |
148 | They transform, divide, group and fold text streams. Though @Producer Text m r@ | |
149 | is the type of \'effectful Text\', the functions in this module are \'pure\' | |
150 | in the sense that they are uniformly monad-independent. | |
151 | Simple /IO/ operations are defined in @Pipes.Text.IO@ -- as lazy IO @Text@ | |
152 | operations are in @Data.Text.Lazy.IO@. Inter-operation with @ByteString@ | |
153 | is provided in @Pipes.Text.Encoding@, which parallels @Data.Text.Lazy.Encoding@. | |
154 | ||
155 | The Text type exported by @Data.Text.Lazy@ is basically that of a lazy list of | |
156 | strict Text: the implementation is arranged so that the individual strict 'Text' | |
157 | chunks are kept to a reasonable size; the user is not aware of the divisions | |
158 | between the connected 'Text' chunks. | |
159 | So also here: the functions in this module are designed to operate on streams that | |
160 | are insensitive to text boundaries. This means that they may freely split | |
161 | text into smaller texts and /discard empty texts/. The objective, though, is | |
162 | that they should /never concatenate texts/ in order to provide strict upper | |
163 | bounds on memory usage. | |
164 | ||
165 | For example, to stream only the first three lines of 'stdin' to 'stdout' you | |
166 | might write: | |
167 | ||
168 | > import Pipes | |
169 | > import qualified Pipes.Text as Text | |
170 | > import qualified Pipes.Text.IO as Text | |
171 | > import Pipes.Group (takes') | |
172 | > import Lens.Family | |
173 | > | |
174 | > main = runEffect $ takeLines 3 Text.stdin >-> Text.stdout | |
175 | > where | |
176 | > takeLines n = Text.unlines . takes' n . view Text.lines | |
177 | ||
178 | The above program will never bring more than one chunk of text (~ 32 KB) into | |
179 | memory, no matter how long the lines are. | |
180 | ||
181 | * /Lenses/ | |
182 | ||
183 | As this example shows, one superficial difference from @Data.Text.Lazy@ | |
184 | is that many of the operations, like 'lines', | |
185 | are \'lensified\'; this has a number of advantages (where it is possible), in particular | |
186 | it facilitates their use with 'Parser's of Text (in the general | |
187 | <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html pipes-parse> | |
188 | sense.) | |
189 | Each such lens, e.g. 'lines', 'chunksOf' or 'splitAt', reduces to the | |
190 | intuitively corresponding function when used with @view@ or @(^.)@. | |
191 | ||
192 | Note similarly that many equivalents of 'Text -> Text' functions are exported here as 'Pipe's. | |
193 | They reduce to the intuitively corresponding functions when used with '(>->)'. Thus something like | |
194 | ||
195 | > stripLines = Text.unlines . Group.maps (>-> Text.stripStart) . view Text.lines | |
196 | ||
197 | would drop the leading white space from each line. | |
198 | ||
199 | The lens combinators | |
200 | you will find indispensible are @view@ / @(^.)@), @zoom@ and probably @over@. These | |
201 | are supplied by both <http://hackage.haskell.org/package/lens lens> and | |
202 | <http://hackage.haskell.org/package/lens-family lens-family> The use of 'zoom' is explained | |
203 | in <http://hackage.haskell.org/package/pipes-parse-3.0.1/docs/Pipes-Parse-Tutorial.html Pipes.Parse.Tutorial> | |
204 | and to some extent in the @Pipes.Text.Encoding@ module here. The use of | |
205 | @over@ is simple, illustrated by the fact that we can rewrite @stripLines@ above as | |
206 | ||
207 | > stripLines = over Text.lines $ maps (>-> stripStart) | |
208 | ||
209 | ||
210 | * Special types: @Producer Text m (Producer Text m r)@ and @FreeT (Producer Text m) m r@ | |
211 | ||
212 | These simple 'lines' examples reveal a more important difference from @Data.Text.Lazy@ . | |
213 | This is in the types that are most closely associated with our central text type, | |
214 | @Producer Text m r@. In @Data.Text@ and @Data.Text.Lazy@ we find functions like | |
215 | ||
216 | > splitAt :: Int -> Text -> (Text, Text) | |
217 | > lines :: Text -> [Text] | |
218 | > chunksOf :: Int -> Text -> [Text] | |
219 | ||
220 | which relate a Text with a pair of Texts or a list of Texts. | |
221 | The corresponding functions here (taking account of \'lensification\') are | |
222 | ||
223 | > view . splitAt :: (Monad m, Integral n) => n -> Producer Text m r -> Producer Text m (Producer Text m r) | |
224 | > view lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r | |
225 | > view . chunksOf :: (Monad m, Integral n) => n -> Producer Text m r -> FreeT (Producer Text m) m r | |
226 | ||
227 | Some of the types may be more readable if you imagine that we have introduced | |
228 | our own type synonyms | |
229 | ||
230 | > type Text m r = Producer T.Text m r | |
231 | > type Texts m r = FreeT (Producer T.Text m) m r | |
232 | ||
233 | Then we would think of the types above as | |
234 | ||
235 | > view . splitAt :: (Monad m, Integral n) => n -> Text m r -> Text m (Text m r) | |
236 | > view lines :: (Monad m) => Text m r -> Texts m r | |
237 | > view . chunksOf :: (Monad m, Integral n) => n -> Text m r -> Texts m r | |
238 | ||
239 | which brings one closer to the types of the similar functions in @Data.Text.Lazy@ | |
240 | ||
241 | In the type @Producer Text m (Producer Text m r)@ the second | |
242 | element of the \'pair\' of effectful Texts cannot simply be retrieved | |
243 | with something like 'snd'. This is an \'effectful\' pair, and one must work | |
244 | through the effects of the first element to arrive at the second Text stream, even | |
245 | if you are proposing to throw the Text in the first element away. | |
246 | Note that we use Control.Monad.join to fuse the pair back together, since it specializes to | |
247 | ||
248 | > join :: Monad m => Producer Text m (Producer m r) -> Producer m r | |
249 | ||
250 | The return type of 'lines', 'words', 'chunksOf' and the other "splitter" functions, | |
251 | @FreeT (Producer m Text) m r@ -- our @Texts m r@ -- is the type of (effectful) | |
252 | lists of (effectful) texts. The type @([Text],r)@ might be seen to gather | |
253 | together things of the forms: | |
254 | ||
255 | > r | |
256 | > (Text,r) | |
257 | > (Text, (Text, r)) | |
258 | > (Text, (Text, (Text, r))) | |
259 | > (Text, (Text, (Text, (Text, r)))) | |
260 | > ... | |
261 | ||
262 | We might also have identified the sum of those types with @Free ((,) Text) r@ | |
263 | -- or, more absurdly, @FreeT ((,) Text) Identity r@. Similarly, @FreeT (Producer Text m) m r@ | |
264 | encompasses all the members of the sequence: | |
265 | ||
266 | > m r | |
267 | > Producer Text m r | |
268 | > Producer Text m (Producer Text m r) | |
269 | > Producer Text m (Producer Text m (Producer Text m r)) | |
270 | > ... | |
271 | ||
272 | One might think that | |
273 | ||
274 | > lines :: Monad m => Lens' (Producer Text m r) (FreeT (Producer Text m) m r) | |
275 | > view . lines :: Monad m => Producer Text m r -> FreeT (Producer Text m) m r | |
276 | ||
277 | should really have the type | |
278 | ||
279 | > lines :: Monad m => Pipe Text Text m r | |
280 | ||
281 | as e.g. 'toUpper' does. But this would spoil the control we are | |
282 | attempting to maintain over the size of chunks. It is in fact just | |
283 | as unreasonable to want such a pipe as to want | |
284 | ||
285 | > Data.Text.Lazy.lines :: Text -> Text | |
286 | ||
287 | to 'rechunk' the strict Text chunks inside the lazy Text to respect | |
288 | line boundaries. In fact we have | |
289 | ||
290 | > Data.Text.Lazy.lines :: Text -> [Text] | |
291 | > Prelude.lines :: String -> [String] | |
292 | ||
293 | where the elements of the list are themselves lazy Texts or Strings; the use | |
294 | of @FreeT (Producer Text m) m r@ is simply the 'effectful' version of this. | |
295 | ||
296 | The @Pipes.Group@ module, which can generally be imported without qualification, | |
297 | provides many functions for working with things of type @FreeT (Producer a m) m r@ | |
298 | ||
299 | ||
300 | -} | |
301 | ||
91727d11 | 302 | -- | Convert a lazy 'TL.Text' into a 'Producer' of strict 'Text's |
303 | fromLazy :: (Monad m) => TL.Text -> Producer' Text m () | |
304 | fromLazy = foldrChunks (\e a -> yield e >> a) (return ()) | |
ca6f90a0 | 305 | {-# INLINE fromLazy #-} |
91727d11 | 306 | |
1677dc12 | 307 | |
308 | type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a) | |
309 | ||
310 | type Iso' a b = forall f p . (Functor f, Profunctor p) => p b (f b) -> p a (f a) | |
311 | ||
312 | (^.) :: a -> ((b -> Constant b b) -> (a -> Constant b a)) -> b | |
313 | a ^. lens = getConstant (lens Constant a) | |
314 | ||
315 | ||
91727d11 | 316 | -- | Apply a transformation to each 'Char' in the stream |
317 | map :: (Monad m) => (Char -> Char) -> Pipe Text Text m r | |
318 | map f = P.map (T.map f) | |
319 | {-# INLINABLE map #-} | |
320 | ||
ff38b9f0 | 321 | {-# RULES "p >-> map f" forall p f . |
322 | p >-> map f = for p (\txt -> yield (T.map f txt)) | |
323 | #-} | |
324 | ||
31f41a5d | 325 | -- | Map a function over the characters of a text stream and concatenate the results |
91727d11 | 326 | concatMap |
327 | :: (Monad m) => (Char -> Text) -> Pipe Text Text m r | |
328 | concatMap f = P.map (T.concatMap f) | |
329 | {-# INLINABLE concatMap #-} | |
330 | ||
ff38b9f0 | 331 | {-# RULES "p >-> concatMap f" forall p f . |
332 | p >-> concatMap f = for p (\txt -> yield (T.concatMap f txt)) | |
333 | #-} | |
7faef8bc | 334 | |
ff38b9f0 | 335 | |
c0343bc9 | 336 | -- | Transform a Pipe of 'String's into one of 'Text' chunks |
7faef8bc | 337 | pack :: Monad m => Pipe String Text m r |
338 | pack = P.map T.pack | |
339 | {-# INLINEABLE pack #-} | |
340 | ||
ff38b9f0 | 341 | {-# RULES "p >-> pack" forall p . |
342 | p >-> pack = for p (\txt -> yield (T.pack txt)) | |
343 | #-} | |
344 | ||
345 | -- | Transform a Pipes of 'Text' chunks into one of 'String's | |
7faef8bc | 346 | unpack :: Monad m => Pipe Text String m r |
d4732515 | 347 | unpack = for cat (\t -> yield (T.unpack t)) |
7faef8bc | 348 | {-# INLINEABLE unpack #-} |
349 | ||
ff38b9f0 | 350 | {-# RULES "p >-> unpack" forall p . |
351 | p >-> unpack = for p (\txt -> yield (T.unpack txt)) | |
352 | #-} | |
d4732515 | 353 | |
b0d86a59 | 354 | -- | @toCaseFold@, @toLower@, @toUpper@ and @stripStart@ are standard 'Text' utilities, |
355 | -- here acting as 'Text' pipes, rather as they would on a lazy text | |
a4913c42 | 356 | toCaseFold :: Monad m => Pipe Text Text m r |
7faef8bc | 357 | toCaseFold = P.map T.toCaseFold |
358 | {-# INLINEABLE toCaseFold #-} | |
359 | ||
ff38b9f0 | 360 | {-# RULES "p >-> toCaseFold" forall p . |
361 | p >-> toCaseFold = for p (\txt -> yield (T.toCaseFold txt)) | |
362 | #-} | |
363 | ||
364 | ||
c0343bc9 | 365 | -- | lowercase incoming 'Text' |
a4913c42 | 366 | toLower :: Monad m => Pipe Text Text m r |
7faef8bc | 367 | toLower = P.map T.toLower |
368 | {-# INLINEABLE toLower #-} | |
369 | ||
ff38b9f0 | 370 | {-# RULES "p >-> toLower" forall p . |
371 | p >-> toLower = for p (\txt -> yield (T.toLower txt)) | |
372 | #-} | |
373 | ||
c0343bc9 | 374 | -- | uppercase incoming 'Text' |
c70edb9d | 375 | toUpper :: Monad m => Pipe Text Text m r |
7faef8bc | 376 | toUpper = P.map T.toUpper |
377 | {-# INLINEABLE toUpper #-} | |
378 | ||
ff38b9f0 | 379 | {-# RULES "p >-> toUpper" forall p . |
380 | p >-> toUpper = for p (\txt -> yield (T.toUpper txt)) | |
381 | #-} | |
382 | ||
c0343bc9 | 383 | -- | Remove leading white space from an incoming succession of 'Text's |
7faef8bc | 384 | stripStart :: Monad m => Pipe Text Text m r |
385 | stripStart = do | |
386 | chunk <- await | |
387 | let text = T.stripStart chunk | |
388 | if T.null text | |
389 | then stripStart | |
b0d86a59 | 390 | else do yield text |
391 | cat | |
7faef8bc | 392 | {-# INLINEABLE stripStart #-} |
393 | ||
31f41a5d | 394 | -- | @(take n)@ only allows @n@ individual characters to pass; |
395 | -- contrast @Pipes.Prelude.take@ which would let @n@ chunks pass. | |
91727d11 | 396 | take :: (Monad m, Integral a) => a -> Pipe Text Text m () |
397 | take n0 = go n0 where | |
398 | go n | |
399 | | n <= 0 = return () | |
400 | | otherwise = do | |
31f41a5d | 401 | txt <- await |
402 | let len = fromIntegral (T.length txt) | |
91727d11 | 403 | if (len > n) |
31f41a5d | 404 | then yield (T.take (fromIntegral n) txt) |
91727d11 | 405 | else do |
31f41a5d | 406 | yield txt |
91727d11 | 407 | go (n - len) |
408 | {-# INLINABLE take #-} | |
409 | ||
31f41a5d | 410 | -- | @(drop n)@ drops the first @n@ characters |
91727d11 | 411 | drop :: (Monad m, Integral a) => a -> Pipe Text Text m r |
412 | drop n0 = go n0 where | |
413 | go n | |
414 | | n <= 0 = cat | |
415 | | otherwise = do | |
31f41a5d | 416 | txt <- await |
417 | let len = fromIntegral (T.length txt) | |
91727d11 | 418 | if (len >= n) |
419 | then do | |
31f41a5d | 420 | yield (T.drop (fromIntegral n) txt) |
91727d11 | 421 | cat |
422 | else go (n - len) | |
423 | {-# INLINABLE drop #-} | |
424 | ||
31f41a5d | 425 | -- | Take characters until they fail the predicate |
91727d11 | 426 | takeWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m () |
427 | takeWhile predicate = go | |
428 | where | |
429 | go = do | |
31f41a5d | 430 | txt <- await |
431 | let (prefix, suffix) = T.span predicate txt | |
91727d11 | 432 | if (T.null suffix) |
433 | then do | |
31f41a5d | 434 | yield txt |
91727d11 | 435 | go |
436 | else yield prefix | |
437 | {-# INLINABLE takeWhile #-} | |
438 | ||
31f41a5d | 439 | -- | Drop characters until they fail the predicate |
91727d11 | 440 | dropWhile :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r |
441 | dropWhile predicate = go where | |
442 | go = do | |
31f41a5d | 443 | txt <- await |
444 | case T.findIndex (not . predicate) txt of | |
91727d11 | 445 | Nothing -> go |
446 | Just i -> do | |
31f41a5d | 447 | yield (T.drop i txt) |
91727d11 | 448 | cat |
449 | {-# INLINABLE dropWhile #-} | |
450 | ||
451 | -- | Only allows 'Char's to pass if they satisfy the predicate | |
452 | filter :: (Monad m) => (Char -> Bool) -> Pipe Text Text m r | |
453 | filter predicate = P.map (T.filter predicate) | |
454 | {-# INLINABLE filter #-} | |
455 | ||
ff38b9f0 | 456 | {-# RULES "p >-> filter q" forall p q . |
457 | p >-> filter q = for p (\txt -> yield (T.filter q txt)) | |
458 | #-} | |
459 | ||
31f41a5d | 460 | -- | Strict left scan over the characters |
91727d11 | 461 | scan |
462 | :: (Monad m) | |
463 | => (Char -> Char -> Char) -> Char -> Pipe Text Text m r | |
11645cdc GG |
464 | scan step begin = do |
465 | yield (T.singleton begin) | |
466 | go begin | |
91727d11 | 467 | where |
31f41a5d | 468 | go c = do |
469 | txt <- await | |
470 | let txt' = T.scanl step c txt | |
471 | c' = T.last txt' | |
11645cdc | 472 | yield (T.tail txt') |
31f41a5d | 473 | go c' |
91727d11 | 474 | {-# INLINABLE scan #-} |
475 | ||
476 | {-| Fold a pure 'Producer' of strict 'Text's into a lazy | |
477 | 'TL.Text' | |
478 | -} | |
479 | toLazy :: Producer Text Identity () -> TL.Text | |
480 | toLazy = TL.fromChunks . P.toList | |
481 | {-# INLINABLE toLazy #-} | |
482 | ||
483 | {-| Fold an effectful 'Producer' of strict 'Text's into a lazy | |
484 | 'TL.Text' | |
485 | ||
486 | Note: 'toLazyM' is not an idiomatic use of @pipes@, but I provide it for | |
487 | simple testing purposes. Idiomatic @pipes@ style consumes the chunks | |
488 | immediately as they are generated instead of loading them all into memory. | |
489 | -} | |
490 | toLazyM :: (Monad m) => Producer Text m () -> m TL.Text | |
491 | toLazyM = liftM TL.fromChunks . P.toListM | |
492 | {-# INLINABLE toLazyM #-} | |
493 | ||
31f41a5d | 494 | -- | Reduce the text stream using a strict left fold over characters |
64e03122 | 495 | foldChars |
91727d11 | 496 | :: Monad m |
497 | => (x -> Char -> x) -> x -> (x -> r) -> Producer Text m () -> m r | |
64e03122 | 498 | foldChars step begin done = P.fold (T.foldl' step) begin done |
1677dc12 | 499 | {-# INLINABLE foldChars #-} |
91727d11 | 500 | |
501 | -- | Retrieve the first 'Char' | |
502 | head :: (Monad m) => Producer Text m () -> m (Maybe Char) | |
503 | head = go | |
504 | where | |
505 | go p = do | |
506 | x <- nextChar p | |
507 | case x of | |
508 | Left _ -> return Nothing | |
31f41a5d | 509 | Right (c, _) -> return (Just c) |
91727d11 | 510 | {-# INLINABLE head #-} |
511 | ||
512 | -- | Retrieve the last 'Char' | |
513 | last :: (Monad m) => Producer Text m () -> m (Maybe Char) | |
514 | last = go Nothing | |
515 | where | |
516 | go r p = do | |
517 | x <- next p | |
518 | case x of | |
519 | Left () -> return r | |
31f41a5d | 520 | Right (txt, p') -> |
521 | if (T.null txt) | |
91727d11 | 522 | then go r p' |
31f41a5d | 523 | else go (Just $ T.last txt) p' |
91727d11 | 524 | {-# INLINABLE last #-} |
525 | ||
526 | -- | Determine if the stream is empty | |
527 | null :: (Monad m) => Producer Text m () -> m Bool | |
528 | null = P.all T.null | |
529 | {-# INLINABLE null #-} | |
530 | ||
62e8521c | 531 | -- | Count the number of characters in the stream |
91727d11 | 532 | length :: (Monad m, Num n) => Producer Text m () -> m n |
31f41a5d | 533 | length = P.fold (\n txt -> n + fromIntegral (T.length txt)) 0 id |
91727d11 | 534 | {-# INLINABLE length #-} |
535 | ||
536 | -- | Fold that returns whether 'M.Any' received 'Char's satisfy the predicate | |
537 | any :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | |
538 | any predicate = P.any (T.any predicate) | |
539 | {-# INLINABLE any #-} | |
540 | ||
541 | -- | Fold that returns whether 'M.All' received 'Char's satisfy the predicate | |
542 | all :: (Monad m) => (Char -> Bool) -> Producer Text m () -> m Bool | |
543 | all predicate = P.all (T.all predicate) | |
544 | {-# INLINABLE all #-} | |
545 | ||
62e8521c | 546 | -- | Return the maximum 'Char' within a text stream |
91727d11 | 547 | maximum :: (Monad m) => Producer Text m () -> m (Maybe Char) |
548 | maximum = P.fold step Nothing id | |
549 | where | |
31f41a5d | 550 | step mc txt = |
551 | if (T.null txt) | |
552 | then mc | |
553 | else Just $ case mc of | |
554 | Nothing -> T.maximum txt | |
555 | Just c -> max c (T.maximum txt) | |
91727d11 | 556 | {-# INLINABLE maximum #-} |
557 | ||
62e8521c | 558 | -- | Return the minimum 'Char' within a text stream (surely very useful!) |
91727d11 | 559 | minimum :: (Monad m) => Producer Text m () -> m (Maybe Char) |
560 | minimum = P.fold step Nothing id | |
561 | where | |
31f41a5d | 562 | step mc txt = |
563 | if (T.null txt) | |
564 | then mc | |
565 | else case mc of | |
566 | Nothing -> Just (T.minimum txt) | |
567 | Just c -> Just (min c (T.minimum txt)) | |
91727d11 | 568 | {-# INLINABLE minimum #-} |
569 | ||
91727d11 | 570 | -- | Find the first element in the stream that matches the predicate |
571 | find | |
572 | :: (Monad m) | |
573 | => (Char -> Bool) -> Producer Text m () -> m (Maybe Char) | |
574 | find predicate p = head (p >-> filter predicate) | |
575 | {-# INLINABLE find #-} | |
576 | ||
62e8521c | 577 | -- | Index into a text stream |
91727d11 | 578 | index |
579 | :: (Monad m, Integral a) | |
580 | => a-> Producer Text m () -> m (Maybe Char) | |
581 | index n p = head (p >-> drop n) | |
582 | {-# INLINABLE index #-} | |
583 | ||
63ea9ffd | 584 | |
31f41a5d | 585 | -- | Store a tally of how many segments match the given 'Text' |
586 | count :: (Monad m, Num n) => Text -> Producer Text m () -> m n | |
587 | count c p = P.fold (+) 0 id (p >-> P.map (fromIntegral . T.count c)) | |
588 | {-# INLINABLE count #-} | |
589 | ||
9e9bb0ce | 590 | |
1a83ae4e | 591 | -- | Consume the first character from a stream of 'Text' |
592 | -- | |
593 | -- 'next' either fails with a 'Left' if the 'Producer' has no more characters or | |
594 | -- succeeds with a 'Right' providing the next character and the remainder of the | |
595 | -- 'Producer'. | |
9e9bb0ce | 596 | |
9e9bb0ce | 597 | nextChar |
598 | :: (Monad m) | |
599 | => Producer Text m r | |
600 | -> m (Either r (Char, Producer Text m r)) | |
601 | nextChar = go | |
602 | where | |
603 | go p = do | |
604 | x <- next p | |
605 | case x of | |
606 | Left r -> return (Left r) | |
607 | Right (txt, p') -> case (T.uncons txt) of | |
608 | Nothing -> go p' | |
609 | Just (c, txt') -> return (Right (c, yield txt' >> p')) | |
610 | {-# INLINABLE nextChar #-} | |
611 | ||
1a83ae4e | 612 | -- | Draw one 'Char' from a stream of 'Text', returning 'Left' if the 'Producer' is empty |
613 | ||
9e9bb0ce | 614 | drawChar :: (Monad m) => Parser Text m (Maybe Char) |
615 | drawChar = do | |
616 | x <- PP.draw | |
617 | case x of | |
618 | Nothing -> return Nothing | |
619 | Just txt -> case (T.uncons txt) of | |
620 | Nothing -> drawChar | |
621 | Just (c, txt') -> do | |
622 | PP.unDraw txt' | |
623 | return (Just c) | |
624 | {-# INLINABLE drawChar #-} | |
625 | ||
626 | -- | Push back a 'Char' onto the underlying 'Producer' | |
627 | unDrawChar :: (Monad m) => Char -> Parser Text m () | |
628 | unDrawChar c = modify (yield (T.singleton c) >>) | |
629 | {-# INLINABLE unDrawChar #-} | |
630 | ||
631 | {-| 'peekChar' checks the first 'Char' in the stream, but uses 'unDrawChar' to | |
632 | push the 'Char' back | |
633 | ||
634 | > peekChar = do | |
635 | > x <- drawChar | |
636 | > case x of | |
637 | > Left _ -> return () | |
638 | > Right c -> unDrawChar c | |
639 | > return x | |
1a83ae4e | 640 | |
9e9bb0ce | 641 | -} |
1a83ae4e | 642 | |
9e9bb0ce | 643 | peekChar :: (Monad m) => Parser Text m (Maybe Char) |
644 | peekChar = do | |
645 | x <- drawChar | |
646 | case x of | |
647 | Nothing -> return () | |
648 | Just c -> unDrawChar c | |
649 | return x | |
650 | {-# INLINABLE peekChar #-} | |
651 | ||
652 | {-| Check if the underlying 'Producer' has no more characters | |
653 | ||
654 | Note that this will skip over empty 'Text' chunks, unlike | |
655 | 'PP.isEndOfInput' from @pipes-parse@, which would consider | |
656 | an empty 'Text' a valid bit of input. | |
657 | ||
658 | > isEndOfChars = liftM isLeft peekChar | |
659 | -} | |
660 | isEndOfChars :: (Monad m) => Parser Text m Bool | |
661 | isEndOfChars = do | |
662 | x <- peekChar | |
663 | return (case x of | |
664 | Nothing -> True | |
665 | Just _-> False ) | |
666 | {-# INLINABLE isEndOfChars #-} | |
667 | ||
668 | ||
31f41a5d | 669 | -- | Splits a 'Producer' after the given number of characters |
91727d11 | 670 | splitAt |
671 | :: (Monad m, Integral n) | |
672 | => n | |
9e9bb0ce | 673 | -> Lens' (Producer Text m r) |
674 | (Producer Text m (Producer Text m r)) | |
675 | splitAt n0 k p0 = fmap join (k (go n0 p0)) | |
91727d11 | 676 | where |
677 | go 0 p = return p | |
678 | go n p = do | |
679 | x <- lift (next p) | |
680 | case x of | |
681 | Left r -> return (return r) | |
31f41a5d | 682 | Right (txt, p') -> do |
683 | let len = fromIntegral (T.length txt) | |
91727d11 | 684 | if (len <= n) |
685 | then do | |
31f41a5d | 686 | yield txt |
91727d11 | 687 | go (n - len) p' |
688 | else do | |
31f41a5d | 689 | let (prefix, suffix) = T.splitAt (fromIntegral n) txt |
91727d11 | 690 | yield prefix |
691 | return (yield suffix >> p') | |
692 | {-# INLINABLE splitAt #-} | |
693 | ||
91727d11 | 694 | |
1a83ae4e | 695 | -- | Split a text stream in two, producing the longest |
696 | -- consecutive group of characters that satisfies the predicate | |
697 | -- and returning the rest | |
698 | ||
91727d11 | 699 | span |
700 | :: (Monad m) | |
701 | => (Char -> Bool) | |
9e9bb0ce | 702 | -> Lens' (Producer Text m r) |
703 | (Producer Text m (Producer Text m r)) | |
704 | span predicate k p0 = fmap join (k (go p0)) | |
91727d11 | 705 | where |
706 | go p = do | |
707 | x <- lift (next p) | |
708 | case x of | |
709 | Left r -> return (return r) | |
31f41a5d | 710 | Right (txt, p') -> do |
711 | let (prefix, suffix) = T.span predicate txt | |
91727d11 | 712 | if (T.null suffix) |
713 | then do | |
31f41a5d | 714 | yield txt |
91727d11 | 715 | go p' |
716 | else do | |
717 | yield prefix | |
718 | return (yield suffix >> p') | |
719 | {-# INLINABLE span #-} | |
720 | ||
1a83ae4e | 721 | {-| Split a text stream in two, producing the longest |
62e8521c | 722 | consecutive group of characters that don't satisfy the predicate |
91727d11 | 723 | -} |
724 | break | |
725 | :: (Monad m) | |
726 | => (Char -> Bool) | |
9e9bb0ce | 727 | -> Lens' (Producer Text m r) |
728 | (Producer Text m (Producer Text m r)) | |
91727d11 | 729 | break predicate = span (not . predicate) |
730 | {-# INLINABLE break #-} | |
731 | ||
9e9bb0ce | 732 | {-| Improper lens that splits after the first group of equivalent Chars, as |
733 | defined by the given equivalence relation | |
734 | -} | |
735 | groupBy | |
736 | :: (Monad m) | |
737 | => (Char -> Char -> Bool) | |
738 | -> Lens' (Producer Text m r) | |
739 | (Producer Text m (Producer Text m r)) | |
740 | groupBy equals k p0 = fmap join (k ((go p0))) where | |
741 | go p = do | |
742 | x <- lift (next p) | |
743 | case x of | |
744 | Left r -> return (return r) | |
745 | Right (txt, p') -> case T.uncons txt of | |
746 | Nothing -> go p' | |
747 | Just (c, _) -> (yield txt >> p') ^. span (equals c) | |
748 | {-# INLINABLE groupBy #-} | |
749 | ||
750 | -- | Improper lens that splits after the first succession of identical 'Char' s | |
751 | group :: Monad m | |
752 | => Lens' (Producer Text m r) | |
753 | (Producer Text m (Producer Text m r)) | |
754 | group = groupBy (==) | |
755 | {-# INLINABLE group #-} | |
756 | ||
757 | {-| Improper lens that splits a 'Producer' after the first word | |
758 | ||
759 | Unlike 'words', this does not drop leading whitespace | |
760 | -} | |
761 | word :: (Monad m) | |
762 | => Lens' (Producer Text m r) | |
763 | (Producer Text m (Producer Text m r)) | |
764 | word k p0 = fmap join (k (to p0)) | |
765 | where | |
766 | to p = do | |
767 | p' <- p^.span isSpace | |
768 | p'^.break isSpace | |
769 | {-# INLINABLE word #-} | |
770 | ||
771 | ||
772 | line :: (Monad m) | |
773 | => Lens' (Producer Text m r) | |
774 | (Producer Text m (Producer Text m r)) | |
775 | line = break (== '\n') | |
776 | ||
777 | {-# INLINABLE line #-} | |
778 | ||
779 | ||
780 | -- | Intersperse a 'Char' in between the characters of stream of 'Text' | |
781 | intersperse | |
782 | :: (Monad m) => Char -> Producer Text m r -> Producer Text m r | |
783 | intersperse c = go0 | |
784 | where | |
785 | go0 p = do | |
786 | x <- lift (next p) | |
787 | case x of | |
788 | Left r -> return r | |
789 | Right (txt, p') -> do | |
790 | yield (T.intersperse c txt) | |
791 | go1 p' | |
792 | go1 p = do | |
793 | x <- lift (next p) | |
794 | case x of | |
795 | Left r -> return r | |
796 | Right (txt, p') -> do | |
797 | yield (T.singleton c) | |
798 | yield (T.intersperse c txt) | |
799 | go1 p' | |
800 | {-# INLINABLE intersperse #-} | |
801 | ||
802 | ||
803 | ||
804 | -- | Improper isomorphism between a 'Producer' of 'ByteString's and 'Word8's | |
805 | packChars :: Monad m => Iso' (Producer Char m x) (Producer Text m x) | |
806 | packChars = Data.Profunctor.dimap to (fmap from) | |
807 | where | |
808 | -- to :: Monad m => Producer Char m x -> Producer Text m x | |
7ed76745 | 809 | to p = PG.folds step id done (p^.PG.chunksOf defaultChunkSize) |
9e9bb0ce | 810 | |
811 | step diffAs c = diffAs . (c:) | |
812 | ||
813 | done diffAs = T.pack (diffAs []) | |
814 | ||
815 | -- from :: Monad m => Producer Text m x -> Producer Char m x | |
816 | from p = for p (each . T.unpack) | |
817 | {-# INLINABLE packChars #-} | |
818 | ||
0f8c6f1b | 819 | |
820 | -- | Split a text stream into 'FreeT'-delimited text streams of fixed size | |
821 | chunksOf | |
822 | :: (Monad m, Integral n) | |
823 | => n -> Lens' (Producer Text m r) | |
824 | (FreeT (Producer Text m) m r) | |
825 | chunksOf n k p0 = fmap concats (k (FreeT (go p0))) | |
826 | where | |
827 | go p = do | |
828 | x <- next p | |
829 | return $ case x of | |
7ed76745 | 830 | Left r -> Pure r |
831 | Right (txt, p') -> Free $ do | |
0f8c6f1b | 832 | p'' <- (yield txt >> p') ^. splitAt n |
7ed76745 | 833 | return $ FreeT (go p'') |
0f8c6f1b | 834 | {-# INLINABLE chunksOf #-} |
835 | ||
836 | ||
62e8521c | 837 | {-| Split a text stream into sub-streams delimited by characters that satisfy the |
91727d11 | 838 | predicate |
839 | -} | |
1677dc12 | 840 | splitsWith |
91727d11 | 841 | :: (Monad m) |
842 | => (Char -> Bool) | |
843 | -> Producer Text m r | |
7ed76745 | 844 | -> FreeT (Producer Text m) m r |
845 | splitsWith predicate p0 = FreeT (go0 p0) | |
91727d11 | 846 | where |
847 | go0 p = do | |
848 | x <- next p | |
849 | case x of | |
7ed76745 | 850 | Left r -> return (Pure r) |
31f41a5d | 851 | Right (txt, p') -> |
852 | if (T.null txt) | |
91727d11 | 853 | then go0 p' |
7ed76745 | 854 | else return $ Free $ do |
9e9bb0ce | 855 | p'' <- (yield txt >> p') ^. span (not . predicate) |
7ed76745 | 856 | return $ FreeT (go1 p'') |
91727d11 | 857 | go1 p = do |
858 | x <- nextChar p | |
859 | return $ case x of | |
7ed76745 | 860 | Left r -> Pure r |
861 | Right (_, p') -> Free $ do | |
9e9bb0ce | 862 | p'' <- p' ^. span (not . predicate) |
7ed76745 | 863 | return $ FreeT (go1 p'') |
1677dc12 | 864 | {-# INLINABLE splitsWith #-} |
91727d11 | 865 | |
31f41a5d | 866 | -- | Split a text stream using the given 'Char' as the delimiter |
0f8c6f1b | 867 | splits :: (Monad m) |
91727d11 | 868 | => Char |
0f8c6f1b | 869 | -> Lens' (Producer Text m r) |
870 | (FreeT (Producer Text m) m r) | |
871 | splits c k p = | |
7ed76745 | 872 | fmap (PG.intercalates (yield (T.singleton c))) (k (splitsWith (c ==) p)) |
0f8c6f1b | 873 | {-# INLINABLE splits #-} |
874 | ||
875 | {-| Isomorphism between a stream of 'Text' and groups of equivalent 'Char's , using the | |
876 | given equivalence relation | |
877 | -} | |
878 | groupsBy | |
879 | :: Monad m | |
880 | => (Char -> Char -> Bool) | |
881 | -> Lens' (Producer Text m x) (FreeT (Producer Text m) m x) | |
7ed76745 | 882 | groupsBy equals k p0 = fmap concats (k (FreeT (go p0))) where |
0f8c6f1b | 883 | go p = do x <- next p |
7ed76745 | 884 | case x of Left r -> return (Pure r) |
0f8c6f1b | 885 | Right (bs, p') -> case T.uncons bs of |
886 | Nothing -> go p' | |
7ed76745 | 887 | Just (c, _) -> do return $ Free $ do |
0f8c6f1b | 888 | p'' <- (yield bs >> p')^.span (equals c) |
7ed76745 | 889 | return $ FreeT (go p'') |
0f8c6f1b | 890 | {-# INLINABLE groupsBy #-} |
891 | ||
892 | ||
893 | -- | Like 'groupsBy', where the equality predicate is ('==') | |
894 | groups | |
895 | :: Monad m | |
896 | => Lens' (Producer Text m x) (FreeT (Producer Text m) m x) | |
897 | groups = groupsBy (==) | |
898 | {-# INLINABLE groups #-} | |
899 | ||
91727d11 | 900 | |
91727d11 | 901 | |
62e8521c | 902 | {-| Split a text stream into 'FreeT'-delimited lines |
91727d11 | 903 | -} |
904 | lines | |
0f8c6f1b | 905 | :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r) |
906 | lines = Data.Profunctor.dimap _lines (fmap _unlines) | |
91727d11 | 907 | where |
7ed76745 | 908 | _lines p0 = FreeT (go0 p0) |
0f8c6f1b | 909 | where |
910 | go0 p = do | |
911 | x <- next p | |
912 | case x of | |
7ed76745 | 913 | Left r -> return (Pure r) |
0f8c6f1b | 914 | Right (txt, p') -> |
915 | if (T.null txt) | |
916 | then go0 p' | |
7ed76745 | 917 | else return $ Free $ go1 (yield txt >> p') |
0f8c6f1b | 918 | go1 p = do |
919 | p' <- p ^. break ('\n' ==) | |
7ed76745 | 920 | return $ FreeT $ do |
0f8c6f1b | 921 | x <- nextChar p' |
922 | case x of | |
7ed76745 | 923 | Left r -> return $ Pure r |
0f8c6f1b | 924 | Right (_, p'') -> go0 p'' |
925 | -- _unlines | |
926 | -- :: Monad m | |
927 | -- => FreeT (Producer Text m) m x -> Producer Text m x | |
7fc48f7c | 928 | _unlines = concats . PG.maps (<* yield (T.singleton '\n')) |
929 | ||
0f8c6f1b | 930 | |
91727d11 | 931 | {-# INLINABLE lines #-} |
91727d11 | 932 | |
31f41a5d | 933 | |
31f41a5d | 934 | -- | Split a text stream into 'FreeT'-delimited words |
91727d11 | 935 | words |
0f8c6f1b | 936 | :: (Monad m) => Iso' (Producer Text m r) (FreeT (Producer Text m) m r) |
937 | words = Data.Profunctor.dimap go (fmap _unwords) | |
91727d11 | 938 | where |
7ed76745 | 939 | go p = FreeT $ do |
cf10d6f1 | 940 | x <- next (p >-> dropWhile isSpace) |
941 | return $ case x of | |
7ed76745 | 942 | Left r -> Pure r |
943 | Right (bs, p') -> Free $ do | |
9e9bb0ce | 944 | p'' <- (yield bs >> p') ^. break isSpace |
cf10d6f1 | 945 | return (go p'') |
7ed76745 | 946 | _unwords = PG.intercalates (yield $ T.singleton ' ') |
0f8c6f1b | 947 | |
91727d11 | 948 | {-# INLINABLE words #-} |
949 | ||
cf10d6f1 | 950 | |
31f41a5d | 951 | {-| 'intercalate' concatenates the 'FreeT'-delimited text streams after |
952 | interspersing a text stream in between them | |
91727d11 | 953 | -} |
954 | intercalate | |
955 | :: (Monad m) | |
956 | => Producer Text m () | |
957 | -> FreeT (Producer Text m) m r | |
958 | -> Producer Text m r | |
959 | intercalate p0 = go0 | |
960 | where | |
961 | go0 f = do | |
7ed76745 | 962 | x <- lift (runFreeT f) |
91727d11 | 963 | case x of |
7ed76745 | 964 | Pure r -> return r |
965 | Free p -> do | |
91727d11 | 966 | f' <- p |
967 | go1 f' | |
968 | go1 f = do | |
7ed76745 | 969 | x <- lift (runFreeT f) |
91727d11 | 970 | case x of |
7ed76745 | 971 | Pure r -> return r |
972 | Free p -> do | |
91727d11 | 973 | p0 |
974 | f' <- p | |
975 | go1 f' | |
976 | {-# INLINABLE intercalate #-} | |
977 | ||
62e8521c | 978 | {-| Join 'FreeT'-delimited lines into a text stream |
91727d11 | 979 | -} |
980 | unlines | |
981 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | |
982 | unlines = go | |
983 | where | |
984 | go f = do | |
7ed76745 | 985 | x <- lift (runFreeT f) |
91727d11 | 986 | case x of |
7ed76745 | 987 | Pure r -> return r |
988 | Free p -> do | |
91727d11 | 989 | f' <- p |
990 | yield $ T.singleton '\n' | |
991 | go f' | |
992 | {-# INLINABLE unlines #-} | |
993 | ||
31f41a5d | 994 | {-| Join 'FreeT'-delimited words into a text stream |
91727d11 | 995 | -} |
996 | unwords | |
997 | :: (Monad m) => FreeT (Producer Text m) m r -> Producer Text m r | |
7fc48f7c | 998 | unwords = intercalate (yield $ T.singleton ' ') |
91727d11 | 999 | {-# INLINABLE unwords #-} |
1000 | ||
91727d11 | 1001 | |
91727d11 | 1002 | {- $reexports |
91727d11 | 1003 | |
1004 | @Data.Text@ re-exports the 'Text' type. | |
1005 | ||
0f8c6f1b | 1006 | @Pipes.Parse@ re-exports 'input', 'concat', 'FreeT' (the type) and the 'Parse' synonym. |
64e03122 | 1007 | -} |
1008 | ||
bbdfd305 | 1009 |