diff options
Diffstat (limited to 'core/src/Text/Edifact/Parsing/Commons.hs')
-rw-r--r-- | core/src/Text/Edifact/Parsing/Commons.hs | 173 |
1 files changed, 173 insertions, 0 deletions
diff --git a/core/src/Text/Edifact/Parsing/Commons.hs b/core/src/Text/Edifact/Parsing/Commons.hs new file mode 100644 index 0000000..a1c6150 --- /dev/null +++ b/core/src/Text/Edifact/Parsing/Commons.hs | |||
@@ -0,0 +1,173 @@ | |||
1 | module Text.Edifact.Parsing.Commons | ||
2 | ( -- * Parsing context | ||
3 | Parser | ||
4 | , Context(..) | ||
5 | , CurrentPosition(..) | ||
6 | , defaultContext | ||
7 | |||
8 | -- * State combinators | ||
9 | , updateSyntax | ||
10 | , setCurrentPosition | ||
11 | , resetCurrentPosition | ||
12 | |||
13 | -- * Syntax helpers | ||
14 | -- ** Parsing combinators | ||
15 | , parseCompositeSeparator | ||
16 | , parseElementSeparator | ||
17 | , parseSegmentSeparator | ||
18 | , parseEscape | ||
19 | |||
20 | -- ** State accessors | ||
21 | -- | Shortcuts to the syntax in current state. Doesn't alter input stream. | ||
22 | , getCompositeSeparator | ||
23 | , getElementSeparator | ||
24 | , getSegmentSeparator | ||
25 | , getDecimalSign | ||
26 | |||
27 | -- * Context aware failure helpers | ||
28 | , failWithPosition | ||
29 | , (<??>) | ||
30 | |||
31 | -- * Parsec extras | ||
32 | , tries | ||
33 | |||
34 | -- * Technical combinators | ||
35 | , notYetImplemented | ||
36 | ) where | ||
37 | |||
38 | import Text.Edifact.Types (Position, Syntax (..), defaultSyntax) | ||
39 | |||
40 | import Data.Text (Text) | ||
41 | import Text.Parsec (Parsec, char, choice, endOfLine, getState, | ||
42 | modifyState, try, updateState, (<?>)) | ||
43 | |||
44 | -- | Defines our "Text.Parsec" context. | ||
45 | type Parser = Parsec Text Context | ||
46 | |||
47 | data Context = | ||
48 | Context | ||
49 | { parsingSyntax :: Syntax -- ^ State of the syntax. To be updated on the encounter of the @UNA@ segment. | ||
50 | , currentPosition :: CurrentPosition -- ^ Pointer for current position in the parser. Used for enriched parsing error messages. | ||
51 | } | ||
52 | |||
53 | defaultContext :: Context | ||
54 | defaultContext = Context defaultSyntax Undefined | ||
55 | |||
56 | -- | Current position in the parser. | ||
57 | -- | ||
58 | -- For now it only stores the current position in a message, a segment group, | ||
59 | -- a segment, or a composite. | ||
60 | -- | ||
61 | -- Future version could store the whole path to improve debugging. | ||
62 | data CurrentPosition = Undefined | ||
63 | | Defined Position | ||
64 | deriving Show | ||
65 | |||
66 | getSyntax :: Parser Syntax | ||
67 | getSyntax = parsingSyntax <$> getState | ||
68 | |||
69 | -- | Get current charactor for decimal sign. | ||
70 | -- It doesn't parse nor consume input. | ||
71 | getDecimalSign :: Parser Char | ||
72 | getDecimalSign = decimalSign <$> getSyntax | ||
73 | |||
74 | -- | Get current charactor for segment separator. | ||
75 | -- It doesn't parse nor consume input. | ||
76 | getSegmentSeparator :: Parser Char | ||
77 | getSegmentSeparator = segmentSeparator <$> getSyntax | ||
78 | |||
79 | -- | Get current charactor for element separator. | ||
80 | -- It doesn't parse nor consume input. | ||
81 | getElementSeparator :: Parser Char | ||
82 | getElementSeparator = elementSeparator <$> getSyntax | ||
83 | |||
84 | -- | Get current charactor for composite separator. | ||
85 | -- It doesn't parse nor consume input. | ||
86 | getCompositeSeparator :: Parser Char | ||
87 | getCompositeSeparator = compositeSeparator <$> getSyntax | ||
88 | |||
89 | -- | This let change the operators used in the parsing. This is designed for the @UNA@ segment. | ||
90 | updateSyntax :: Syntax -> Parser () | ||
91 | updateSyntax s = updateState (\ c -> c { parsingSyntax = s }) | ||
92 | |||
93 | -- | Read the parser state to extract current position. | ||
94 | -- It doesn't parse nor consume input. | ||
95 | getCurrentPosition :: Parser CurrentPosition | ||
96 | getCurrentPosition = currentPosition <$> getState | ||
97 | |||
98 | -- | Write the parser state to update current position. | ||
99 | -- It doesn't parse nor consume input. | ||
100 | setCurrentPosition :: Position -> Parser () | ||
101 | setCurrentPosition = updateCurrentPosition . Defined | ||
102 | |||
103 | -- | Write the parser state to reset current position. | ||
104 | -- It doesn't parse nor consume input. | ||
105 | resetCurrentPosition :: Parser () | ||
106 | resetCurrentPosition = updateCurrentPosition Undefined | ||
107 | |||
108 | updateCurrentPosition :: CurrentPosition -> Parser () | ||
109 | updateCurrentPosition pos = modifyState (\s -> s { currentPosition = pos }) | ||
110 | |||
111 | -- | Parse current charactor for element separator. | ||
112 | -- It does parse and consume input. | ||
113 | parseElementSeparator :: Parser Char | ||
114 | parseElementSeparator = parseSpecialChar "element separator" elementSeparator | ||
115 | |||
116 | -- | Parse current charactor for composite separator. | ||
117 | -- It does parse and consume input. | ||
118 | parseCompositeSeparator :: Parser Char | ||
119 | parseCompositeSeparator = parseSpecialChar "composite separator" compositeSeparator | ||
120 | |||
121 | -- | Parse current charactor for escape separator. | ||
122 | -- It does parse and consume input. | ||
123 | parseEscape :: Parser Char | ||
124 | parseEscape = parseSpecialChar "escape character" escape | ||
125 | |||
126 | -- | Parse current charactor for segment separator. | ||
127 | -- It does parse and consume input. | ||
128 | -- | ||
129 | -- It also tries consuming end of line after segment separator if any. | ||
130 | parseSegmentSeparator :: Parser Char | ||
131 | parseSegmentSeparator = tries [ parseSpecialChar "segment separator" segmentSeparator <* endOfLine | ||
132 | , parseSpecialChar "segment separator" segmentSeparator | ||
133 | ] | ||
134 | |||
135 | parseSpecialChar :: String -> (Syntax -> Char) -> Parser Char | ||
136 | parseSpecialChar description reader = do | ||
137 | c <- reader <$> getSyntax | ||
138 | let escape' '\"' = "\\\"" | ||
139 | escape' c' = [c'] | ||
140 | comment = description <> " (\"" <> escape' c <> "\")" | ||
141 | char c <?> comment | ||
142 | |||
143 | -- | Let you try various parsers, not consuming until success. | ||
144 | tries :: [Parser a] -> Parser a | ||
145 | tries = choice . map try | ||
146 | |||
147 | -- | Like 'fail', this operator let you annotate a parser if it were to fail. | ||
148 | -- The difference with the standard "Text.Parsec" operator is that it appends | ||
149 | -- the current position if any. | ||
150 | failWithPosition :: String -> Parser a | ||
151 | failWithPosition = withPosition fail | ||
152 | |||
153 | -- | Like '<?>', this operator let you annotate a parser if it were to fail. | ||
154 | -- The difference with the standard "Text.Parsec" operator is that it appends | ||
155 | -- the current position if any. | ||
156 | (<??>) :: Parser a -> String -> Parser a | ||
157 | (<??>) = withPosition . (<?>) | ||
158 | |||
159 | -- Same priority as <?> from Text.Parsec | ||
160 | infix 0 <??> | ||
161 | |||
162 | withPosition :: (String -> Parser a) -> String -> Parser a | ||
163 | withPosition continuation msg = | ||
164 | let mkMessage Undefined = msg | ||
165 | mkMessage (Defined d) = msg <> " at position " <> show d | ||
166 | getMessage = mkMessage <$> getCurrentPosition | ||
167 | in getMessage >>= continuation | ||
168 | |||
169 | -- | Alias to 'failWithPosition' to convey semantics of work-in-progress when | ||
170 | -- writing a parser. This might be useful if you want to partially support a | ||
171 | -- message. | ||
172 | notYetImplemented :: String -> Parser a | ||
173 | notYetImplemented = failWithPosition | ||