]>
Commit | Line | Data |
---|---|---|
a9d77a20 FM |
1 | module Text.Edifact.Parsing.Commons |
2 | ( -- * Parsing context | |
3 | Parser | |
4 | , Context(..) | |
5 | , CurrentPosition(..) | |
6 | , defaultContext | |
7 | ||
8 | -- * State combinators | |
9 | , updateSyntax | |
10 | , setCurrentPosition | |
11 | , resetCurrentPosition | |
12 | ||
13 | -- * Syntax helpers | |
14 | -- ** Parsing combinators | |
15 | , parseCompositeSeparator | |
16 | , parseElementSeparator | |
17 | , parseSegmentSeparator | |
18 | , parseEscape | |
19 | ||
20 | -- ** State accessors | |
21 | -- | Shortcuts to the syntax in current state. Doesn't alter input stream. | |
22 | , getCompositeSeparator | |
23 | , getElementSeparator | |
24 | , getSegmentSeparator | |
25 | , getDecimalSign | |
26 | ||
27 | -- * Context aware failure helpers | |
28 | , failWithPosition | |
29 | , (<??>) | |
30 | ||
31 | -- * Parsec extras | |
32 | , tries | |
33 | ||
34 | -- * Technical combinators | |
35 | , notYetImplemented | |
36 | ) where | |
37 | ||
38 | import Text.Edifact.Types (Position, Syntax (..), defaultSyntax) | |
39 | ||
40 | import Data.Text (Text) | |
41 | import Text.Parsec (Parsec, char, choice, endOfLine, getState, | |
42 | modifyState, try, updateState, (<?>)) | |
43 | ||
44 | -- | Defines our "Text.Parsec" context. | |
45 | type Parser = Parsec Text Context | |
46 | ||
47 | data Context = | |
48 | Context | |
49 | { parsingSyntax :: Syntax -- ^ State of the syntax. To be updated on the encounter of the @UNA@ segment. | |
50 | , currentPosition :: CurrentPosition -- ^ Pointer for current position in the parser. Used for enriched parsing error messages. | |
51 | } | |
52 | ||
53 | defaultContext :: Context | |
54 | defaultContext = Context defaultSyntax Undefined | |
55 | ||
56 | -- | Current position in the parser. | |
57 | -- | |
58 | -- For now it only stores the current position in a message, a segment group, | |
59 | -- a segment, or a composite. | |
60 | -- | |
61 | -- Future version could store the whole path to improve debugging. | |
62 | data CurrentPosition = Undefined | |
63 | | Defined Position | |
64 | deriving Show | |
65 | ||
66 | getSyntax :: Parser Syntax | |
67 | getSyntax = parsingSyntax <$> getState | |
68 | ||
69 | -- | Get current charactor for decimal sign. | |
70 | -- It doesn't parse nor consume input. | |
71 | getDecimalSign :: Parser Char | |
72 | getDecimalSign = decimalSign <$> getSyntax | |
73 | ||
74 | -- | Get current charactor for segment separator. | |
75 | -- It doesn't parse nor consume input. | |
76 | getSegmentSeparator :: Parser Char | |
77 | getSegmentSeparator = segmentSeparator <$> getSyntax | |
78 | ||
79 | -- | Get current charactor for element separator. | |
80 | -- It doesn't parse nor consume input. | |
81 | getElementSeparator :: Parser Char | |
82 | getElementSeparator = elementSeparator <$> getSyntax | |
83 | ||
84 | -- | Get current charactor for composite separator. | |
85 | -- It doesn't parse nor consume input. | |
86 | getCompositeSeparator :: Parser Char | |
87 | getCompositeSeparator = compositeSeparator <$> getSyntax | |
88 | ||
89 | -- | This let change the operators used in the parsing. This is designed for the @UNA@ segment. | |
90 | updateSyntax :: Syntax -> Parser () | |
91 | updateSyntax s = updateState (\ c -> c { parsingSyntax = s }) | |
92 | ||
93 | -- | Read the parser state to extract current position. | |
94 | -- It doesn't parse nor consume input. | |
95 | getCurrentPosition :: Parser CurrentPosition | |
96 | getCurrentPosition = currentPosition <$> getState | |
97 | ||
98 | -- | Write the parser state to update current position. | |
99 | -- It doesn't parse nor consume input. | |
100 | setCurrentPosition :: Position -> Parser () | |
101 | setCurrentPosition = updateCurrentPosition . Defined | |
102 | ||
103 | -- | Write the parser state to reset current position. | |
104 | -- It doesn't parse nor consume input. | |
105 | resetCurrentPosition :: Parser () | |
106 | resetCurrentPosition = updateCurrentPosition Undefined | |
107 | ||
108 | updateCurrentPosition :: CurrentPosition -> Parser () | |
109 | updateCurrentPosition pos = modifyState (\s -> s { currentPosition = pos }) | |
110 | ||
111 | -- | Parse current charactor for element separator. | |
112 | -- It does parse and consume input. | |
113 | parseElementSeparator :: Parser Char | |
114 | parseElementSeparator = parseSpecialChar "element separator" elementSeparator | |
115 | ||
116 | -- | Parse current charactor for composite separator. | |
117 | -- It does parse and consume input. | |
118 | parseCompositeSeparator :: Parser Char | |
119 | parseCompositeSeparator = parseSpecialChar "composite separator" compositeSeparator | |
120 | ||
121 | -- | Parse current charactor for escape separator. | |
122 | -- It does parse and consume input. | |
123 | parseEscape :: Parser Char | |
124 | parseEscape = parseSpecialChar "escape character" escape | |
125 | ||
126 | -- | Parse current charactor for segment separator. | |
127 | -- It does parse and consume input. | |
128 | -- | |
129 | -- It also tries consuming end of line after segment separator if any. | |
130 | parseSegmentSeparator :: Parser Char | |
131 | parseSegmentSeparator = tries [ parseSpecialChar "segment separator" segmentSeparator <* endOfLine | |
132 | , parseSpecialChar "segment separator" segmentSeparator | |
133 | ] | |
134 | ||
135 | parseSpecialChar :: String -> (Syntax -> Char) -> Parser Char | |
136 | parseSpecialChar description reader = do | |
137 | c <- reader <$> getSyntax | |
138 | let escape' '\"' = "\\\"" | |
139 | escape' c' = [c'] | |
140 | comment = description <> " (\"" <> escape' c <> "\")" | |
141 | char c <?> comment | |
142 | ||
143 | -- | Let you try various parsers, not consuming until success. | |
144 | tries :: [Parser a] -> Parser a | |
145 | tries = choice . map try | |
146 | ||
147 | -- | Like 'fail', this operator let you annotate a parser if it were to fail. | |
148 | -- The difference with the standard "Text.Parsec" operator is that it appends | |
149 | -- the current position if any. | |
150 | failWithPosition :: String -> Parser a | |
151 | failWithPosition = withPosition fail | |
152 | ||
153 | -- | Like '<?>', this operator let you annotate a parser if it were to fail. | |
154 | -- The difference with the standard "Text.Parsec" operator is that it appends | |
155 | -- the current position if any. | |
156 | (<??>) :: Parser a -> String -> Parser a | |
157 | (<??>) = withPosition . (<?>) | |
158 | ||
159 | -- Same priority as <?> from Text.Parsec | |
160 | infix 0 <??> | |
161 | ||
162 | withPosition :: (String -> Parser a) -> String -> Parser a | |
163 | withPosition continuation msg = | |
164 | let mkMessage Undefined = msg | |
165 | mkMessage (Defined d) = msg <> " at position " <> show d | |
166 | getMessage = mkMessage <$> getCurrentPosition | |
167 | in getMessage >>= continuation | |
168 | ||
169 | -- | Alias to 'failWithPosition' to convey semantics of work-in-progress when | |
170 | -- writing a parser. This might be useful if you want to partially support a | |
171 | -- message. | |
172 | notYetImplemented :: String -> Parser a | |
173 | notYetImplemented = failWithPosition |