diff options
author | Frédéric Menou <frederic.menou@fretlink.com> | 2016-12-08 10:19:15 +0200 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@fretlink.com> | 2022-05-17 18:01:51 +0200 |
commit | a9d77a20008efe82862cc1adbfa7a6d4f09f8ff7 (patch) | |
tree | adf3186fdccaeef19151026cdfbd38a530cf9ecb /core/src/Text/Edifact/Parsing/Primitives.hs | |
download | edi-parser-master.tar.gz edi-parser-master.tar.zst edi-parser-master.zip |
Diffstat (limited to 'core/src/Text/Edifact/Parsing/Primitives.hs')
-rw-r--r-- | core/src/Text/Edifact/Parsing/Primitives.hs | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/core/src/Text/Edifact/Parsing/Primitives.hs b/core/src/Text/Edifact/Parsing/Primitives.hs new file mode 100644 index 0000000..61659c8 --- /dev/null +++ b/core/src/Text/Edifact/Parsing/Primitives.hs | |||
@@ -0,0 +1,127 @@ | |||
1 | {-| | ||
2 | Module : Text.Edifact.Parsing.Primitives | ||
3 | Description : Low level combinators | ||
4 | |||
5 | This module let you build parsers for primitive values, ie. values contained | ||
6 | in a simple element, either text or number. | ||
7 | |||
8 | = Examples | ||
9 | |||
10 | To parse a text of 3 characters (@an3@ in standard Edifact representation): | ||
11 | |||
12 | > an3 :: Parser Primitive | ||
13 | > an3 = alphaNumeric `exactly` 3 | ||
14 | |||
15 | To parse a text of up to 10 characters (@an..10@ in standard Edifact representation): | ||
16 | |||
17 | > an_10 :: Parser Primitive | ||
18 | > an_10 = alphaNumeric `upTo` 10 | ||
19 | |||
20 | = Known limitations | ||
21 | |||
22 | Numeric representation is not strictly compatible to the specification. | ||
23 | The specification tells that negative sign (@-@) and decimal sign (@.@) are not | ||
24 | to be counted in the length of the field. | ||
25 | |||
26 | Therefore the following parser will fail even it's legal according to the | ||
27 | specification: | ||
28 | |||
29 | > n_3 :: Parser Primitive | ||
30 | > n_3 = numeric `upTo` 3 | ||
31 | > | ||
32 | > parse n_3 "-12.3" | ||
33 | |||
34 | To be fixed, we have to change the way primitives combinators are built so that | ||
35 | the 'upTo' and 'exactly' combinators are aware of the inner parser. | ||
36 | -} | ||
37 | module Text.Edifact.Parsing.Primitives | ||
38 | ( | ||
39 | -- * Primitives | ||
40 | -- ** Simple elements definition | ||
41 | alphaNumeric | ||
42 | , alpha | ||
43 | , numeric | ||
44 | |||
45 | -- ** Cardinality | ||
46 | , exactly | ||
47 | , upTo | ||
48 | , many | ||
49 | |||
50 | ) where | ||
51 | |||
52 | import Text.Edifact.Parsing.Commons | ||
53 | import Text.Edifact.Types | ||
54 | |||
55 | import Data.String (fromString) | ||
56 | import qualified Data.Text as T (length) | ||
57 | import Text.Parsec (count, lookAhead, many1, noneOf, | ||
58 | oneOf) | ||
59 | import qualified Text.Parsec as P (many) | ||
60 | |||
61 | -- | Parser associated with the @an@ notation. | ||
62 | alphaNumeric :: Parser Char | ||
63 | alphaNumeric = do | ||
64 | separators <- sequence [ getSegmentSeparator | ||
65 | , getElementSeparator | ||
66 | , getCompositeSeparator | ||
67 | ] | ||
68 | tries [ parseEscape *> parseSegmentSeparator | ||
69 | , parseEscape *> parseElementSeparator | ||
70 | , parseEscape *> parseCompositeSeparator | ||
71 | , parseEscape *> parseEscape | ||
72 | , noneOf separators | ||
73 | ] | ||
74 | |||
75 | -- | Parser associated with the @a@ notation. | ||
76 | -- | ||
77 | -- So far it's simply an alias to 'alphaNumeric'. | ||
78 | alpha :: Parser Char | ||
79 | alpha = alphaNumeric | ||
80 | |||
81 | -- | Parser associated with the @n@ notation. | ||
82 | numeric :: Parser Char | ||
83 | numeric = do | ||
84 | punctuationSign <- getDecimalSign | ||
85 | oneOf (punctuationSign : "0123456789-") | ||
86 | |||
87 | -- | Combinator to build a parser of primitive which length is unspecified. | ||
88 | -- | ||
89 | -- Correspondance with the Edifact notation: | ||
90 | -- | ||
91 | -- > many alpha # same as a | ||
92 | -- > many numeric # same as n | ||
93 | -- > many alphaNumeric # same as an | ||
94 | many :: Parser Char -> Parser Primitive | ||
95 | many = fmap fromString . many1 | ||
96 | |||
97 | -- | Combinator to build a parser of primitive which length is capped. | ||
98 | -- | ||
99 | -- Correspondance with the Edifact notation: | ||
100 | -- | ||
101 | -- > alpha `upTo` 3 # same as a..3 | ||
102 | -- > numeric `upTo` 3 # same as n..3 | ||
103 | -- > alphaNumeric `upTo` 3 # same as an..3 | ||
104 | upTo :: Parser Char -> Int -> Parser Primitive | ||
105 | upTo p c = | ||
106 | let check t = | ||
107 | let c' = T.length t | ||
108 | in if c' > c | ||
109 | then failWithPosition ("expected up to " <> show c <> " characters, but encountered " <> show c') | ||
110 | else pure (String t) | ||
111 | maybeEmpty = (<$) mempty . lookAhead | ||
112 | in check =<< | ||
113 | tries [ maybeEmpty parseSegmentSeparator | ||
114 | , maybeEmpty parseElementSeparator | ||
115 | , maybeEmpty parseCompositeSeparator | ||
116 | , fromString <$> P.many p | ||
117 | ] | ||
118 | |||
119 | -- | Combinator to build a parser of primitive which length is fixed. | ||
120 | -- | ||
121 | -- Correspondance with the Edifact notation: | ||
122 | -- | ||
123 | -- > alpha `exactly` 3 # same as a3 | ||
124 | -- > numeric `exactly` 3 # same as n3 | ||
125 | -- > alphaNumeric `exactly` 3 # same as an3 | ||
126 | exactly :: Parser Char -> Int -> Parser Primitive | ||
127 | exactly p c = fromString <$> count c p | ||