diff options
author | Frédéric Menou <frederic.menou@fretlink.com> | 2016-12-08 10:19:15 +0200 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@fretlink.com> | 2022-05-17 18:01:51 +0200 |
commit | a9d77a20008efe82862cc1adbfa7a6d4f09f8ff7 (patch) | |
tree | adf3186fdccaeef19151026cdfbd38a530cf9ecb /core/src/Text/Edifact/Parsing | |
download | edi-parser-master.tar.gz edi-parser-master.tar.zst edi-parser-master.zip |
Diffstat (limited to 'core/src/Text/Edifact/Parsing')
-rw-r--r-- | core/src/Text/Edifact/Parsing/Combinators.hs | 230 | ||||
-rw-r--r-- | core/src/Text/Edifact/Parsing/Commons.hs | 173 | ||||
-rw-r--r-- | core/src/Text/Edifact/Parsing/Primitives.hs | 127 |
3 files changed, 530 insertions, 0 deletions
diff --git a/core/src/Text/Edifact/Parsing/Combinators.hs b/core/src/Text/Edifact/Parsing/Combinators.hs new file mode 100644 index 0000000..ce3f4be --- /dev/null +++ b/core/src/Text/Edifact/Parsing/Combinators.hs | |||
@@ -0,0 +1,230 @@ | |||
1 | {-# LANGUAGE TupleSections #-} | ||
2 | |||
3 | {-| | ||
4 | Module : Text.Edifact.Parsing.Combinators | ||
5 | Description : High level combinators | ||
6 | -} | ||
7 | module Text.Edifact.Parsing.Combinators | ||
8 | ( -- * Combinators | ||
9 | -- ** Values parsers | ||
10 | message | ||
11 | , segmentGroup | ||
12 | , segment | ||
13 | , composite | ||
14 | , simple | ||
15 | |||
16 | -- ** Position and strictness | ||
17 | , position | ||
18 | , (.@) | ||
19 | , (@.) | ||
20 | , mandatory | ||
21 | , optional | ||
22 | |||
23 | -- ** Repetition of segments and segment groups | ||
24 | , repeated | ||
25 | , repeatedAtLeastOnce | ||
26 | , once | ||
27 | , maybeOnce | ||
28 | ) where | ||
29 | |||
30 | import Text.Edifact.Parsing.Commons | ||
31 | import Text.Edifact.Types | ||
32 | |||
33 | import Text.Parsec (lookAhead, many1, optionMaybe, | ||
34 | string, try) | ||
35 | import qualified Text.Parsec as P (many) | ||
36 | |||
37 | -- | Parses a 'Message'. | ||
38 | -- | ||
39 | -- > messageABCDEF :: Parser Value | ||
40 | -- > messageABCDEF = | ||
41 | -- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) | ||
42 | -- > c101 = composite "C101" [ position "010" (mandatory simple1234) | ||
43 | -- > , position "020" (optional simple1234) | ||
44 | -- > ] | ||
45 | -- > segmentABC = segment "ABC" [ position "010" (mandatory c101) | ||
46 | -- > ] | ||
47 | -- > in message "ABCDEF" [ position "0010" (mandatory segmentABC) | ||
48 | -- > ] | ||
49 | message :: MessageCode -> [Parser (Position, [Value])] -> Parser Value | ||
50 | message code ps = | ||
51 | let description = "message " <> show code | ||
52 | in Message code <$> sequence ps <??> description | ||
53 | |||
54 | -- | Parses a 'Group'. | ||
55 | -- | ||
56 | -- A Segment Group is the way Edifact format represents hierarchy. One can view | ||
57 | -- a segment group as a sub message. A segment group can be repeated like | ||
58 | -- segments. A segment group wraps segments and segment groups. | ||
59 | segmentGroup :: GroupCode -> [Parser (Position, [Value])] -> Parser Value | ||
60 | segmentGroup code ps = | ||
61 | let description = "segment-group " <> show code | ||
62 | in Group code <$> sequence ps <??> description | ||
63 | |||
64 | -- | Parses a 'Segment'. | ||
65 | -- | ||
66 | -- Following parser: | ||
67 | -- | ||
68 | -- > segmentABC :: Parser Value | ||
69 | -- > segmentABC = | ||
70 | -- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) | ||
71 | -- > simple2001 = simple "2001" (alphaNumeric `exactly` 3) | ||
72 | -- > c101 = composite "C101" [ position "010" (mandatory simple1234) | ||
73 | -- > , position "020" (optional simple1234) | ||
74 | -- > , position "030" (optional simple1234) | ||
75 | -- > ] | ||
76 | -- > in segment "ABC" [ position "010" (mandatory simple2001) | ||
77 | -- > , position "020" (optional c101) | ||
78 | -- > ] | ||
79 | -- | ||
80 | -- would parse strings such as: | ||
81 | -- | ||
82 | -- >>> parse segmentABC "ABC+123'" | ||
83 | -- Segment "ABC" [ ("010", Just (Simple "2001" "123")) | ||
84 | -- ] | ||
85 | -- >>> parse segmentABC "ABC+123+abcdefgh'" | ||
86 | -- Segment "ABC" [ ("010", Just (Simple "2001" "123")) | ||
87 | -- , ("020", Just (Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) | ||
88 | -- ] | ||
89 | -- )) | ||
90 | -- ] | ||
91 | -- >>> parse segmentABC "ABC+123+abcdefgh:ijklmno'" | ||
92 | -- Segment "ABC" [ ("010", Just (Simple "2001" "123")) | ||
93 | -- , ("020", Just (Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) | ||
94 | -- , ("020", Just (Simple "1234" "ijklmno")) | ||
95 | -- ] | ||
96 | -- )) | ||
97 | -- ] | ||
98 | segment :: SegmentCode -> [Parser (Position, Maybe Value)] -> Parser Value | ||
99 | segment code parsers = | ||
100 | let go [] = [] <$ parseSegmentSeparator | ||
101 | go (p:ps) = | ||
102 | tries [ [] <$ parseSegmentSeparator | ||
103 | , (:) <$> (parseElementSeparator *> p) | ||
104 | <*> go ps | ||
105 | ] | ||
106 | description = "segment " <> show code | ||
107 | in Segment <$> parseSegmentCode code | ||
108 | <*> go parsers | ||
109 | <??> description | ||
110 | |||
111 | parseSegmentCode :: SegmentCode -> Parser SegmentCode | ||
112 | parseSegmentCode (SegmentCode code) = | ||
113 | let description = "segment code " <> show code | ||
114 | in SegmentCode <$> string code <??> description | ||
115 | |||
116 | -- | Parses a 'Composite' element. | ||
117 | -- | ||
118 | -- Following parser: | ||
119 | -- | ||
120 | -- > compositeC101 :: Parser Value | ||
121 | -- > compositeC101 = | ||
122 | -- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) | ||
123 | -- > in composite "C101" [ position "010" (mandatory simple1234) | ||
124 | -- > , position "020" (optional simple1234) | ||
125 | -- > , position "030" (optional simple1234) | ||
126 | -- > ] | ||
127 | -- | ||
128 | -- would parse strings such as: | ||
129 | -- | ||
130 | -- >>> parse compositeC101 "abcdefgh" | ||
131 | -- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) | ||
132 | -- ] | ||
133 | -- >>> parse compositeC101 "abcdefgh:ijklmno" | ||
134 | -- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) | ||
135 | -- , ("020", Just (Simple "1234" "ijklmno")) | ||
136 | -- ] | ||
137 | -- >>> parse compositeC101 "abcdefgh::pqrstu" | ||
138 | -- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) | ||
139 | -- , ("020", Just (Simple "1234" "")) | ||
140 | -- , ("030", Just (Simple "1234" "pqrstu")) | ||
141 | -- ] | ||
142 | composite :: CompositeCode -> [Parser (Position, Maybe Value)] -> Parser Value | ||
143 | composite code parsers = | ||
144 | let go [] = pure [] | ||
145 | go (p:ps) = do | ||
146 | let parseSeparator = tries [ parseCompositeSeparator | ||
147 | , lookAhead parseElementSeparator | ||
148 | , lookAhead parseSegmentSeparator | ||
149 | ] | ||
150 | (value, continuation) <- tries [ (, ps) <$> p <* parseSeparator | ||
151 | , (, []) <$> p | ||
152 | ] | ||
153 | (:) value <$> go continuation | ||
154 | description = "composite element " <> show code | ||
155 | in Composite code <$> go parsers <??> description | ||
156 | |||
157 | -- | Parses a 'Simple' element. | ||
158 | -- | ||
159 | -- Following parser would parse strings of size between 0 and 35 characters. | ||
160 | -- | ||
161 | -- > simple1234 :: Parser Value | ||
162 | -- > simple1234 = simple "1234" (alphaNumeric `upTo` 35) | ||
163 | simple :: SimpleCode -> Parser Primitive -> Parser Value | ||
164 | simple code p = | ||
165 | let description = "simple element " <> show code | ||
166 | in Simple code <$> p <??> description | ||
167 | |||
168 | -- | Makes the parsing of the element optional, which doesn't consume input if the given parser doesn't succeed. | ||
169 | optional :: Parser Value -> Parser (Maybe Value) | ||
170 | optional = optionMaybe | ||
171 | |||
172 | -- | Makes the parsing of the element mandatory. | ||
173 | mandatory :: Parser Value -> Parser (Maybe Value) | ||
174 | mandatory = fmap Just | ||
175 | |||
176 | -- | Sets the current 'Position'. This is relevant for segment in a message, for composite or simple element in a segment, and for simple element in a composite. | ||
177 | position :: Position -> Parser (f Value) -> Parser (Position, f Value) | ||
178 | position pos p = | ||
179 | let decorated = (pos,) <$> p | ||
180 | in setCurrentPosition pos *> decorated <* resetCurrentPosition | ||
181 | |||
182 | -- | Alias to 'position'. | ||
183 | -- | ||
184 | -- > compositeC101 :: Parser Value | ||
185 | -- > compositeC101 = | ||
186 | -- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) | ||
187 | -- > in composite "C101" [ "010" .@ mandatory simple1234 | ||
188 | -- > , "020" .@ optional simple1234 | ||
189 | -- > , "030" .@ optional simple1234 | ||
190 | -- > ] | ||
191 | (.@) :: Position -> Parser (f Value) -> Parser (Position, f Value) | ||
192 | (.@) = position | ||
193 | |||
194 | -- | Flipped alias to 'position'. | ||
195 | -- | ||
196 | -- > compositeC101 :: Parser Value | ||
197 | -- > compositeC101 = | ||
198 | -- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) | ||
199 | -- > in composite "C101" [ mandatory simple1234 @. "010" | ||
200 | -- > , optional simple1234 @. "020" | ||
201 | -- > , optional simple1234 @. "030" | ||
202 | -- > ] | ||
203 | (@.) :: Parser (f Value) -> Position -> Parser (Position, f Value) | ||
204 | (@.) = flip position | ||
205 | |||
206 | -- | For segments or segment groups, let you express how many occurrences. | ||
207 | repeated :: Int -> Parser a -> Parser [a] | ||
208 | repeated limit p = do | ||
209 | values <- P.many (try p) | ||
210 | let parsed = length values | ||
211 | if parsed > limit | ||
212 | then failWithPosition ("expected up to " <> show limit <> " items, but encountered " <> show parsed) | ||
213 | else pure values | ||
214 | |||
215 | -- | For segments or segment groups, let you express how many occurrences with at least one occurrence. | ||
216 | repeatedAtLeastOnce :: Int -> Parser a -> Parser [a] | ||
217 | repeatedAtLeastOnce limit p = do | ||
218 | values <- many1 (try p) | ||
219 | let parsed = length values | ||
220 | if parsed > limit | ||
221 | then failWithPosition ("expected up to " <> show limit <> " items, but encountered " <> show parsed) | ||
222 | else pure values | ||
223 | |||
224 | -- | For segments or segment groups, let you express you expect only one occurrence. | ||
225 | once :: Parser a -> Parser [a] | ||
226 | once = fmap pure | ||
227 | |||
228 | -- | For segments or segment groups, let you express you expect one or no occurrence. | ||
229 | maybeOnce :: Parser a -> Parser [a] | ||
230 | maybeOnce = fmap (maybe [] pure) . optionMaybe | ||
diff --git a/core/src/Text/Edifact/Parsing/Commons.hs b/core/src/Text/Edifact/Parsing/Commons.hs new file mode 100644 index 0000000..a1c6150 --- /dev/null +++ b/core/src/Text/Edifact/Parsing/Commons.hs | |||
@@ -0,0 +1,173 @@ | |||
1 | module Text.Edifact.Parsing.Commons | ||
2 | ( -- * Parsing context | ||
3 | Parser | ||
4 | , Context(..) | ||
5 | , CurrentPosition(..) | ||
6 | , defaultContext | ||
7 | |||
8 | -- * State combinators | ||
9 | , updateSyntax | ||
10 | , setCurrentPosition | ||
11 | , resetCurrentPosition | ||
12 | |||
13 | -- * Syntax helpers | ||
14 | -- ** Parsing combinators | ||
15 | , parseCompositeSeparator | ||
16 | , parseElementSeparator | ||
17 | , parseSegmentSeparator | ||
18 | , parseEscape | ||
19 | |||
20 | -- ** State accessors | ||
21 | -- | Shortcuts to the syntax in current state. Doesn't alter input stream. | ||
22 | , getCompositeSeparator | ||
23 | , getElementSeparator | ||
24 | , getSegmentSeparator | ||
25 | , getDecimalSign | ||
26 | |||
27 | -- * Context aware failure helpers | ||
28 | , failWithPosition | ||
29 | , (<??>) | ||
30 | |||
31 | -- * Parsec extras | ||
32 | , tries | ||
33 | |||
34 | -- * Technical combinators | ||
35 | , notYetImplemented | ||
36 | ) where | ||
37 | |||
38 | import Text.Edifact.Types (Position, Syntax (..), defaultSyntax) | ||
39 | |||
40 | import Data.Text (Text) | ||
41 | import Text.Parsec (Parsec, char, choice, endOfLine, getState, | ||
42 | modifyState, try, updateState, (<?>)) | ||
43 | |||
44 | -- | Defines our "Text.Parsec" context. | ||
45 | type Parser = Parsec Text Context | ||
46 | |||
47 | data Context = | ||
48 | Context | ||
49 | { parsingSyntax :: Syntax -- ^ State of the syntax. To be updated on the encounter of the @UNA@ segment. | ||
50 | , currentPosition :: CurrentPosition -- ^ Pointer for current position in the parser. Used for enriched parsing error messages. | ||
51 | } | ||
52 | |||
53 | defaultContext :: Context | ||
54 | defaultContext = Context defaultSyntax Undefined | ||
55 | |||
56 | -- | Current position in the parser. | ||
57 | -- | ||
58 | -- For now it only stores the current position in a message, a segment group, | ||
59 | -- a segment, or a composite. | ||
60 | -- | ||
61 | -- Future version could store the whole path to improve debugging. | ||
62 | data CurrentPosition = Undefined | ||
63 | | Defined Position | ||
64 | deriving Show | ||
65 | |||
66 | getSyntax :: Parser Syntax | ||
67 | getSyntax = parsingSyntax <$> getState | ||
68 | |||
69 | -- | Get current charactor for decimal sign. | ||
70 | -- It doesn't parse nor consume input. | ||
71 | getDecimalSign :: Parser Char | ||
72 | getDecimalSign = decimalSign <$> getSyntax | ||
73 | |||
74 | -- | Get current charactor for segment separator. | ||
75 | -- It doesn't parse nor consume input. | ||
76 | getSegmentSeparator :: Parser Char | ||
77 | getSegmentSeparator = segmentSeparator <$> getSyntax | ||
78 | |||
79 | -- | Get current charactor for element separator. | ||
80 | -- It doesn't parse nor consume input. | ||
81 | getElementSeparator :: Parser Char | ||
82 | getElementSeparator = elementSeparator <$> getSyntax | ||
83 | |||
84 | -- | Get current charactor for composite separator. | ||
85 | -- It doesn't parse nor consume input. | ||
86 | getCompositeSeparator :: Parser Char | ||
87 | getCompositeSeparator = compositeSeparator <$> getSyntax | ||
88 | |||
89 | -- | This let change the operators used in the parsing. This is designed for the @UNA@ segment. | ||
90 | updateSyntax :: Syntax -> Parser () | ||
91 | updateSyntax s = updateState (\ c -> c { parsingSyntax = s }) | ||
92 | |||
93 | -- | Read the parser state to extract current position. | ||
94 | -- It doesn't parse nor consume input. | ||
95 | getCurrentPosition :: Parser CurrentPosition | ||
96 | getCurrentPosition = currentPosition <$> getState | ||
97 | |||
98 | -- | Write the parser state to update current position. | ||
99 | -- It doesn't parse nor consume input. | ||
100 | setCurrentPosition :: Position -> Parser () | ||
101 | setCurrentPosition = updateCurrentPosition . Defined | ||
102 | |||
103 | -- | Write the parser state to reset current position. | ||
104 | -- It doesn't parse nor consume input. | ||
105 | resetCurrentPosition :: Parser () | ||
106 | resetCurrentPosition = updateCurrentPosition Undefined | ||
107 | |||
108 | updateCurrentPosition :: CurrentPosition -> Parser () | ||
109 | updateCurrentPosition pos = modifyState (\s -> s { currentPosition = pos }) | ||
110 | |||
111 | -- | Parse current charactor for element separator. | ||
112 | -- It does parse and consume input. | ||
113 | parseElementSeparator :: Parser Char | ||
114 | parseElementSeparator = parseSpecialChar "element separator" elementSeparator | ||
115 | |||
116 | -- | Parse current charactor for composite separator. | ||
117 | -- It does parse and consume input. | ||
118 | parseCompositeSeparator :: Parser Char | ||
119 | parseCompositeSeparator = parseSpecialChar "composite separator" compositeSeparator | ||
120 | |||
121 | -- | Parse current charactor for escape separator. | ||
122 | -- It does parse and consume input. | ||
123 | parseEscape :: Parser Char | ||
124 | parseEscape = parseSpecialChar "escape character" escape | ||
125 | |||
126 | -- | Parse current charactor for segment separator. | ||
127 | -- It does parse and consume input. | ||
128 | -- | ||
129 | -- It also tries consuming end of line after segment separator if any. | ||
130 | parseSegmentSeparator :: Parser Char | ||
131 | parseSegmentSeparator = tries [ parseSpecialChar "segment separator" segmentSeparator <* endOfLine | ||
132 | , parseSpecialChar "segment separator" segmentSeparator | ||
133 | ] | ||
134 | |||
135 | parseSpecialChar :: String -> (Syntax -> Char) -> Parser Char | ||
136 | parseSpecialChar description reader = do | ||
137 | c <- reader <$> getSyntax | ||
138 | let escape' '\"' = "\\\"" | ||
139 | escape' c' = [c'] | ||
140 | comment = description <> " (\"" <> escape' c <> "\")" | ||
141 | char c <?> comment | ||
142 | |||
143 | -- | Let you try various parsers, not consuming until success. | ||
144 | tries :: [Parser a] -> Parser a | ||
145 | tries = choice . map try | ||
146 | |||
147 | -- | Like 'fail', this operator let you annotate a parser if it were to fail. | ||
148 | -- The difference with the standard "Text.Parsec" operator is that it appends | ||
149 | -- the current position if any. | ||
150 | failWithPosition :: String -> Parser a | ||
151 | failWithPosition = withPosition fail | ||
152 | |||
153 | -- | Like '<?>', this operator let you annotate a parser if it were to fail. | ||
154 | -- The difference with the standard "Text.Parsec" operator is that it appends | ||
155 | -- the current position if any. | ||
156 | (<??>) :: Parser a -> String -> Parser a | ||
157 | (<??>) = withPosition . (<?>) | ||
158 | |||
159 | -- Same priority as <?> from Text.Parsec | ||
160 | infix 0 <??> | ||
161 | |||
162 | withPosition :: (String -> Parser a) -> String -> Parser a | ||
163 | withPosition continuation msg = | ||
164 | let mkMessage Undefined = msg | ||
165 | mkMessage (Defined d) = msg <> " at position " <> show d | ||
166 | getMessage = mkMessage <$> getCurrentPosition | ||
167 | in getMessage >>= continuation | ||
168 | |||
169 | -- | Alias to 'failWithPosition' to convey semantics of work-in-progress when | ||
170 | -- writing a parser. This might be useful if you want to partially support a | ||
171 | -- message. | ||
172 | notYetImplemented :: String -> Parser a | ||
173 | notYetImplemented = failWithPosition | ||
diff --git a/core/src/Text/Edifact/Parsing/Primitives.hs b/core/src/Text/Edifact/Parsing/Primitives.hs new file mode 100644 index 0000000..61659c8 --- /dev/null +++ b/core/src/Text/Edifact/Parsing/Primitives.hs | |||
@@ -0,0 +1,127 @@ | |||
1 | {-| | ||
2 | Module : Text.Edifact.Parsing.Primitives | ||
3 | Description : Low level combinators | ||
4 | |||
5 | This module let you build parsers for primitive values, ie. values contained | ||
6 | in a simple element, either text or number. | ||
7 | |||
8 | = Examples | ||
9 | |||
10 | To parse a text of 3 characters (@an3@ in standard Edifact representation): | ||
11 | |||
12 | > an3 :: Parser Primitive | ||
13 | > an3 = alphaNumeric `exactly` 3 | ||
14 | |||
15 | To parse a text of up to 10 characters (@an..10@ in standard Edifact representation): | ||
16 | |||
17 | > an_10 :: Parser Primitive | ||
18 | > an_10 = alphaNumeric `upTo` 10 | ||
19 | |||
20 | = Known limitations | ||
21 | |||
22 | Numeric representation is not strictly compatible to the specification. | ||
23 | The specification tells that negative sign (@-@) and decimal sign (@.@) are not | ||
24 | to be counted in the length of the field. | ||
25 | |||
26 | Therefore the following parser will fail even it's legal according to the | ||
27 | specification: | ||
28 | |||
29 | > n_3 :: Parser Primitive | ||
30 | > n_3 = numeric `upTo` 3 | ||
31 | > | ||
32 | > parse n_3 "-12.3" | ||
33 | |||
34 | To be fixed, we have to change the way primitives combinators are built so that | ||
35 | the 'upTo' and 'exactly' combinators are aware of the inner parser. | ||
36 | -} | ||
37 | module Text.Edifact.Parsing.Primitives | ||
38 | ( | ||
39 | -- * Primitives | ||
40 | -- ** Simple elements definition | ||
41 | alphaNumeric | ||
42 | , alpha | ||
43 | , numeric | ||
44 | |||
45 | -- ** Cardinality | ||
46 | , exactly | ||
47 | , upTo | ||
48 | , many | ||
49 | |||
50 | ) where | ||
51 | |||
52 | import Text.Edifact.Parsing.Commons | ||
53 | import Text.Edifact.Types | ||
54 | |||
55 | import Data.String (fromString) | ||
56 | import qualified Data.Text as T (length) | ||
57 | import Text.Parsec (count, lookAhead, many1, noneOf, | ||
58 | oneOf) | ||
59 | import qualified Text.Parsec as P (many) | ||
60 | |||
61 | -- | Parser associated with the @an@ notation. | ||
62 | alphaNumeric :: Parser Char | ||
63 | alphaNumeric = do | ||
64 | separators <- sequence [ getSegmentSeparator | ||
65 | , getElementSeparator | ||
66 | , getCompositeSeparator | ||
67 | ] | ||
68 | tries [ parseEscape *> parseSegmentSeparator | ||
69 | , parseEscape *> parseElementSeparator | ||
70 | , parseEscape *> parseCompositeSeparator | ||
71 | , parseEscape *> parseEscape | ||
72 | , noneOf separators | ||
73 | ] | ||
74 | |||
75 | -- | Parser associated with the @a@ notation. | ||
76 | -- | ||
77 | -- So far it's simply an alias to 'alphaNumeric'. | ||
78 | alpha :: Parser Char | ||
79 | alpha = alphaNumeric | ||
80 | |||
81 | -- | Parser associated with the @n@ notation. | ||
82 | numeric :: Parser Char | ||
83 | numeric = do | ||
84 | punctuationSign <- getDecimalSign | ||
85 | oneOf (punctuationSign : "0123456789-") | ||
86 | |||
87 | -- | Combinator to build a parser of primitive which length is unspecified. | ||
88 | -- | ||
89 | -- Correspondance with the Edifact notation: | ||
90 | -- | ||
91 | -- > many alpha # same as a | ||
92 | -- > many numeric # same as n | ||
93 | -- > many alphaNumeric # same as an | ||
94 | many :: Parser Char -> Parser Primitive | ||
95 | many = fmap fromString . many1 | ||
96 | |||
97 | -- | Combinator to build a parser of primitive which length is capped. | ||
98 | -- | ||
99 | -- Correspondance with the Edifact notation: | ||
100 | -- | ||
101 | -- > alpha `upTo` 3 # same as a..3 | ||
102 | -- > numeric `upTo` 3 # same as n..3 | ||
103 | -- > alphaNumeric `upTo` 3 # same as an..3 | ||
104 | upTo :: Parser Char -> Int -> Parser Primitive | ||
105 | upTo p c = | ||
106 | let check t = | ||
107 | let c' = T.length t | ||
108 | in if c' > c | ||
109 | then failWithPosition ("expected up to " <> show c <> " characters, but encountered " <> show c') | ||
110 | else pure (String t) | ||
111 | maybeEmpty = (<$) mempty . lookAhead | ||
112 | in check =<< | ||
113 | tries [ maybeEmpty parseSegmentSeparator | ||
114 | , maybeEmpty parseElementSeparator | ||
115 | , maybeEmpty parseCompositeSeparator | ||
116 | , fromString <$> P.many p | ||
117 | ] | ||
118 | |||
119 | -- | Combinator to build a parser of primitive which length is fixed. | ||
120 | -- | ||
121 | -- Correspondance with the Edifact notation: | ||
122 | -- | ||
123 | -- > alpha `exactly` 3 # same as a3 | ||
124 | -- > numeric `exactly` 3 # same as n3 | ||
125 | -- > alphaNumeric `exactly` 3 # same as an3 | ||
126 | exactly :: Parser Char -> Int -> Parser Primitive | ||
127 | exactly p c = fromString <$> count c p | ||