aboutsummaryrefslogtreecommitdiffhomepage
path: root/core/src/Text/Edifact/Parsing/Commons.hs
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/Text/Edifact/Parsing/Commons.hs')
-rw-r--r--core/src/Text/Edifact/Parsing/Commons.hs173
1 files changed, 173 insertions, 0 deletions
diff --git a/core/src/Text/Edifact/Parsing/Commons.hs b/core/src/Text/Edifact/Parsing/Commons.hs
new file mode 100644
index 0000000..a1c6150
--- /dev/null
+++ b/core/src/Text/Edifact/Parsing/Commons.hs
@@ -0,0 +1,173 @@
1module Text.Edifact.Parsing.Commons
2 ( -- * Parsing context
3 Parser
4 , Context(..)
5 , CurrentPosition(..)
6 , defaultContext
7
8 -- * State combinators
9 , updateSyntax
10 , setCurrentPosition
11 , resetCurrentPosition
12
13 -- * Syntax helpers
14 -- ** Parsing combinators
15 , parseCompositeSeparator
16 , parseElementSeparator
17 , parseSegmentSeparator
18 , parseEscape
19
20 -- ** State accessors
21 -- | Shortcuts to the syntax in current state. Doesn't alter input stream.
22 , getCompositeSeparator
23 , getElementSeparator
24 , getSegmentSeparator
25 , getDecimalSign
26
27 -- * Context aware failure helpers
28 , failWithPosition
29 , (<??>)
30
31 -- * Parsec extras
32 , tries
33
34 -- * Technical combinators
35 , notYetImplemented
36 ) where
37
38import Text.Edifact.Types (Position, Syntax (..), defaultSyntax)
39
40import Data.Text (Text)
41import Text.Parsec (Parsec, char, choice, endOfLine, getState,
42 modifyState, try, updateState, (<?>))
43
44-- | Defines our "Text.Parsec" context.
45type Parser = Parsec Text Context
46
47data Context =
48 Context
49 { parsingSyntax :: Syntax -- ^ State of the syntax. To be updated on the encounter of the @UNA@ segment.
50 , currentPosition :: CurrentPosition -- ^ Pointer for current position in the parser. Used for enriched parsing error messages.
51 }
52
53defaultContext :: Context
54defaultContext = Context defaultSyntax Undefined
55
56-- | Current position in the parser.
57--
58-- For now it only stores the current position in a message, a segment group,
59-- a segment, or a composite.
60--
61-- Future version could store the whole path to improve debugging.
62data CurrentPosition = Undefined
63 | Defined Position
64 deriving Show
65
66getSyntax :: Parser Syntax
67getSyntax = parsingSyntax <$> getState
68
69-- | Get current charactor for decimal sign.
70-- It doesn't parse nor consume input.
71getDecimalSign :: Parser Char
72getDecimalSign = decimalSign <$> getSyntax
73
74-- | Get current charactor for segment separator.
75-- It doesn't parse nor consume input.
76getSegmentSeparator :: Parser Char
77getSegmentSeparator = segmentSeparator <$> getSyntax
78
79-- | Get current charactor for element separator.
80-- It doesn't parse nor consume input.
81getElementSeparator :: Parser Char
82getElementSeparator = elementSeparator <$> getSyntax
83
84-- | Get current charactor for composite separator.
85-- It doesn't parse nor consume input.
86getCompositeSeparator :: Parser Char
87getCompositeSeparator = compositeSeparator <$> getSyntax
88
89-- | This let change the operators used in the parsing. This is designed for the @UNA@ segment.
90updateSyntax :: Syntax -> Parser ()
91updateSyntax s = updateState (\ c -> c { parsingSyntax = s })
92
93-- | Read the parser state to extract current position.
94-- It doesn't parse nor consume input.
95getCurrentPosition :: Parser CurrentPosition
96getCurrentPosition = currentPosition <$> getState
97
98-- | Write the parser state to update current position.
99-- It doesn't parse nor consume input.
100setCurrentPosition :: Position -> Parser ()
101setCurrentPosition = updateCurrentPosition . Defined
102
103-- | Write the parser state to reset current position.
104-- It doesn't parse nor consume input.
105resetCurrentPosition :: Parser ()
106resetCurrentPosition = updateCurrentPosition Undefined
107
108updateCurrentPosition :: CurrentPosition -> Parser ()
109updateCurrentPosition pos = modifyState (\s -> s { currentPosition = pos })
110
111-- | Parse current charactor for element separator.
112-- It does parse and consume input.
113parseElementSeparator :: Parser Char
114parseElementSeparator = parseSpecialChar "element separator" elementSeparator
115
116-- | Parse current charactor for composite separator.
117-- It does parse and consume input.
118parseCompositeSeparator :: Parser Char
119parseCompositeSeparator = parseSpecialChar "composite separator" compositeSeparator
120
121-- | Parse current charactor for escape separator.
122-- It does parse and consume input.
123parseEscape :: Parser Char
124parseEscape = parseSpecialChar "escape character" escape
125
126-- | Parse current charactor for segment separator.
127-- It does parse and consume input.
128--
129-- It also tries consuming end of line after segment separator if any.
130parseSegmentSeparator :: Parser Char
131parseSegmentSeparator = tries [ parseSpecialChar "segment separator" segmentSeparator <* endOfLine
132 , parseSpecialChar "segment separator" segmentSeparator
133 ]
134
135parseSpecialChar :: String -> (Syntax -> Char) -> Parser Char
136parseSpecialChar description reader = do
137 c <- reader <$> getSyntax
138 let escape' '\"' = "\\\""
139 escape' c' = [c']
140 comment = description <> " (\"" <> escape' c <> "\")"
141 char c <?> comment
142
143-- | Let you try various parsers, not consuming until success.
144tries :: [Parser a] -> Parser a
145tries = choice . map try
146
147-- | Like 'fail', this operator let you annotate a parser if it were to fail.
148-- The difference with the standard "Text.Parsec" operator is that it appends
149-- the current position if any.
150failWithPosition :: String -> Parser a
151failWithPosition = withPosition fail
152
153-- | Like '<?>', this operator let you annotate a parser if it were to fail.
154-- The difference with the standard "Text.Parsec" operator is that it appends
155-- the current position if any.
156(<??>) :: Parser a -> String -> Parser a
157(<??>) = withPosition . (<?>)
158
159-- Same priority as <?> from Text.Parsec
160infix 0 <??>
161
162withPosition :: (String -> Parser a) -> String -> Parser a
163withPosition continuation msg =
164 let mkMessage Undefined = msg
165 mkMessage (Defined d) = msg <> " at position " <> show d
166 getMessage = mkMessage <$> getCurrentPosition
167 in getMessage >>= continuation
168
169-- | Alias to 'failWithPosition' to convey semantics of work-in-progress when
170-- writing a parser. This might be useful if you want to partially support a
171-- message.
172notYetImplemented :: String -> Parser a
173notYetImplemented = failWithPosition