diff options
Diffstat (limited to 'vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl')
-rw-r--r-- | vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl | 376 |
1 files changed, 376 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl b/vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl new file mode 100644 index 0000000..83ef65b --- /dev/null +++ b/vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl | |||
@@ -0,0 +1,376 @@ | |||
1 | |||
2 | package hclsyntax | ||
3 | |||
4 | import ( | ||
5 | "bytes" | ||
6 | |||
7 | "github.com/hashicorp/hcl2/hcl" | ||
8 | ) | ||
9 | |||
10 | // This file is generated from scan_tokens.rl. DO NOT EDIT. | ||
11 | %%{ | ||
12 | # (except you are actually in scan_tokens.rl here, so edit away!) | ||
13 | |||
14 | machine hcltok; | ||
15 | write data; | ||
16 | }%% | ||
17 | |||
18 | func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []Token { | ||
19 | f := &tokenAccum{ | ||
20 | Filename: filename, | ||
21 | Bytes: data, | ||
22 | Pos: start, | ||
23 | } | ||
24 | |||
25 | %%{ | ||
26 | include UnicodeDerived "unicode_derived.rl"; | ||
27 | |||
28 | UTF8Cont = 0x80 .. 0xBF; | ||
29 | AnyUTF8 = ( | ||
30 | 0x00..0x7F | | ||
31 | 0xC0..0xDF . UTF8Cont | | ||
32 | 0xE0..0xEF . UTF8Cont . UTF8Cont | | ||
33 | 0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont | ||
34 | ); | ||
35 | BrokenUTF8 = any - AnyUTF8; | ||
36 | |||
37 | NumberLitContinue = (digit|'.'|('e'|'E') ('+'|'-')? digit); | ||
38 | NumberLit = digit ("" | (NumberLitContinue - '.') | (NumberLitContinue* (NumberLitContinue - '.'))); | ||
39 | Ident = (ID_Start | '_') (ID_Continue | '-')*; | ||
40 | |||
41 | # Symbols that just represent themselves are handled as a single rule. | ||
42 | SelfToken = "[" | "]" | "(" | ")" | "." | "," | "*" | "/" | "%" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`"; | ||
43 | |||
44 | EqualOp = "=="; | ||
45 | NotEqual = "!="; | ||
46 | GreaterThanEqual = ">="; | ||
47 | LessThanEqual = "<="; | ||
48 | LogicalAnd = "&&"; | ||
49 | LogicalOr = "||"; | ||
50 | |||
51 | Ellipsis = "..."; | ||
52 | FatArrow = "=>"; | ||
53 | |||
54 | Newline = '\r' ? '\n'; | ||
55 | EndOfLine = Newline; | ||
56 | |||
57 | BeginStringTmpl = '"'; | ||
58 | BeginHeredocTmpl = '<<' ('-')? Ident Newline; | ||
59 | |||
60 | Comment = ( | ||
61 | ("#" (any - EndOfLine)* EndOfLine) | | ||
62 | ("//" (any - EndOfLine)* EndOfLine) | | ||
63 | ("/*" any* "*/") | ||
64 | ); | ||
65 | |||
66 | # Note: hclwrite assumes that only ASCII spaces appear between tokens, | ||
67 | # and uses this assumption to recreate the spaces between tokens by | ||
68 | # looking at byte offset differences. This means it will produce | ||
69 | # incorrect results in the presence of tabs, but that's acceptable | ||
70 | # because the canonical style (which hclwrite itself can impose | ||
71 | # automatically is to never use tabs). | ||
72 | Spaces = (' ' | 0x09)+; | ||
73 | |||
74 | action beginStringTemplate { | ||
75 | token(TokenOQuote); | ||
76 | fcall stringTemplate; | ||
77 | } | ||
78 | |||
79 | action endStringTemplate { | ||
80 | token(TokenCQuote); | ||
81 | fret; | ||
82 | } | ||
83 | |||
84 | action beginHeredocTemplate { | ||
85 | token(TokenOHeredoc); | ||
86 | // the token is currently the whole heredoc introducer, like | ||
87 | // <<EOT or <<-EOT, followed by a newline. We want to extract | ||
88 | // just the "EOT" portion that we'll use as the closing marker. | ||
89 | |||
90 | marker := data[ts+2:te-1] | ||
91 | if marker[0] == '-' { | ||
92 | marker = marker[1:] | ||
93 | } | ||
94 | if marker[len(marker)-1] == '\r' { | ||
95 | marker = marker[:len(marker)-1] | ||
96 | } | ||
97 | |||
98 | heredocs = append(heredocs, heredocInProgress{ | ||
99 | Marker: marker, | ||
100 | StartOfLine: true, | ||
101 | }) | ||
102 | |||
103 | fcall heredocTemplate; | ||
104 | } | ||
105 | |||
106 | action heredocLiteralEOL { | ||
107 | // This action is called specificially when a heredoc literal | ||
108 | // ends with a newline character. | ||
109 | |||
110 | // This might actually be our end marker. | ||
111 | topdoc := &heredocs[len(heredocs)-1] | ||
112 | if topdoc.StartOfLine { | ||
113 | maybeMarker := bytes.TrimSpace(data[ts:te]) | ||
114 | if bytes.Equal(maybeMarker, topdoc.Marker) { | ||
115 | // We actually emit two tokens here: the end-of-heredoc | ||
116 | // marker first, and then separately the newline that | ||
117 | // follows it. This then avoids issues with the closing | ||
118 | // marker consuming a newline that would normally be used | ||
119 | // to mark the end of an attribute definition. | ||
120 | // We might have either a \n sequence or an \r\n sequence | ||
121 | // here, so we must handle both. | ||
122 | nls := te-1 | ||
123 | nle := te | ||
124 | te-- | ||
125 | if data[te-1] == '\r' { | ||
126 | // back up one more byte | ||
127 | nls-- | ||
128 | te-- | ||
129 | } | ||
130 | token(TokenCHeredoc); | ||
131 | ts = nls | ||
132 | te = nle | ||
133 | token(TokenNewline); | ||
134 | heredocs = heredocs[:len(heredocs)-1] | ||
135 | fret; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | topdoc.StartOfLine = true; | ||
140 | token(TokenStringLit); | ||
141 | } | ||
142 | |||
143 | action heredocLiteralMidline { | ||
144 | // This action is called when a heredoc literal _doesn't_ end | ||
145 | // with a newline character, e.g. because we're about to enter | ||
146 | // an interpolation sequence. | ||
147 | heredocs[len(heredocs)-1].StartOfLine = false; | ||
148 | token(TokenStringLit); | ||
149 | } | ||
150 | |||
151 | action bareTemplateLiteral { | ||
152 | token(TokenStringLit); | ||
153 | } | ||
154 | |||
155 | action beginTemplateInterp { | ||
156 | token(TokenTemplateInterp); | ||
157 | braces++; | ||
158 | retBraces = append(retBraces, braces); | ||
159 | if len(heredocs) > 0 { | ||
160 | heredocs[len(heredocs)-1].StartOfLine = false; | ||
161 | } | ||
162 | fcall main; | ||
163 | } | ||
164 | |||
165 | action beginTemplateControl { | ||
166 | token(TokenTemplateControl); | ||
167 | braces++; | ||
168 | retBraces = append(retBraces, braces); | ||
169 | if len(heredocs) > 0 { | ||
170 | heredocs[len(heredocs)-1].StartOfLine = false; | ||
171 | } | ||
172 | fcall main; | ||
173 | } | ||
174 | |||
175 | action openBrace { | ||
176 | token(TokenOBrace); | ||
177 | braces++; | ||
178 | } | ||
179 | |||
180 | action closeBrace { | ||
181 | if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces { | ||
182 | token(TokenTemplateSeqEnd); | ||
183 | braces--; | ||
184 | retBraces = retBraces[0:len(retBraces)-1] | ||
185 | fret; | ||
186 | } else { | ||
187 | token(TokenCBrace); | ||
188 | braces--; | ||
189 | } | ||
190 | } | ||
191 | |||
192 | action closeTemplateSeqEatWhitespace { | ||
193 | // Only consume from the retBraces stack and return if we are at | ||
194 | // a suitable brace nesting level, otherwise things will get | ||
195 | // confused. (Not entering this branch indicates a syntax error, | ||
196 | // which we will catch in the parser.) | ||
197 | if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces { | ||
198 | token(TokenTemplateSeqEnd); | ||
199 | braces--; | ||
200 | retBraces = retBraces[0:len(retBraces)-1] | ||
201 | fret; | ||
202 | } else { | ||
203 | // We intentionally generate a TokenTemplateSeqEnd here, | ||
204 | // even though the user apparently wanted a brace, because | ||
205 | // we want to allow the parser to catch the incorrect use | ||
206 | // of a ~} to balance a generic opening brace, rather than | ||
207 | // a template sequence. | ||
208 | token(TokenTemplateSeqEnd); | ||
209 | braces--; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | TemplateInterp = "${" ("~")?; | ||
214 | TemplateControl = "%{" ("~")?; | ||
215 | EndStringTmpl = '"'; | ||
216 | StringLiteralChars = (AnyUTF8 - ("\r"|"\n")); | ||
217 | TemplateStringLiteral = ( | ||
218 | ('$' ^'{' %{ fhold; }) | | ||
219 | ('%' ^'{' %{ fhold; }) | | ||
220 | ('\\' StringLiteralChars) | | ||
221 | (StringLiteralChars - ("$" | '%' | '"')) | ||
222 | )+; | ||
223 | HeredocStringLiteral = ( | ||
224 | ('$' ^'{' %{ fhold; }) | | ||
225 | ('%' ^'{' %{ fhold; }) | | ||
226 | (StringLiteralChars - ("$" | '%')) | ||
227 | )*; | ||
228 | BareStringLiteral = ( | ||
229 | ('$' ^'{') | | ||
230 | ('%' ^'{') | | ||
231 | (StringLiteralChars - ("$" | '%')) | ||
232 | )* Newline?; | ||
233 | |||
234 | stringTemplate := |* | ||
235 | TemplateInterp => beginTemplateInterp; | ||
236 | TemplateControl => beginTemplateControl; | ||
237 | EndStringTmpl => endStringTemplate; | ||
238 | TemplateStringLiteral => { token(TokenQuotedLit); }; | ||
239 | AnyUTF8 => { token(TokenInvalid); }; | ||
240 | BrokenUTF8 => { token(TokenBadUTF8); }; | ||
241 | *|; | ||
242 | |||
243 | heredocTemplate := |* | ||
244 | TemplateInterp => beginTemplateInterp; | ||
245 | TemplateControl => beginTemplateControl; | ||
246 | HeredocStringLiteral EndOfLine => heredocLiteralEOL; | ||
247 | HeredocStringLiteral => heredocLiteralMidline; | ||
248 | BrokenUTF8 => { token(TokenBadUTF8); }; | ||
249 | *|; | ||
250 | |||
251 | bareTemplate := |* | ||
252 | TemplateInterp => beginTemplateInterp; | ||
253 | TemplateControl => beginTemplateControl; | ||
254 | BareStringLiteral => bareTemplateLiteral; | ||
255 | BrokenUTF8 => { token(TokenBadUTF8); }; | ||
256 | *|; | ||
257 | |||
258 | identOnly := |* | ||
259 | Ident => { token(TokenIdent) }; | ||
260 | BrokenUTF8 => { token(TokenBadUTF8) }; | ||
261 | AnyUTF8 => { token(TokenInvalid) }; | ||
262 | *|; | ||
263 | |||
264 | main := |* | ||
265 | Spaces => {}; | ||
266 | NumberLit => { token(TokenNumberLit) }; | ||
267 | Ident => { token(TokenIdent) }; | ||
268 | |||
269 | Comment => { token(TokenComment) }; | ||
270 | Newline => { token(TokenNewline) }; | ||
271 | |||
272 | EqualOp => { token(TokenEqualOp); }; | ||
273 | NotEqual => { token(TokenNotEqual); }; | ||
274 | GreaterThanEqual => { token(TokenGreaterThanEq); }; | ||
275 | LessThanEqual => { token(TokenLessThanEq); }; | ||
276 | LogicalAnd => { token(TokenAnd); }; | ||
277 | LogicalOr => { token(TokenOr); }; | ||
278 | Ellipsis => { token(TokenEllipsis); }; | ||
279 | FatArrow => { token(TokenFatArrow); }; | ||
280 | SelfToken => { selfToken() }; | ||
281 | |||
282 | "{" => openBrace; | ||
283 | "}" => closeBrace; | ||
284 | |||
285 | "~}" => closeTemplateSeqEatWhitespace; | ||
286 | |||
287 | BeginStringTmpl => beginStringTemplate; | ||
288 | BeginHeredocTmpl => beginHeredocTemplate; | ||
289 | |||
290 | BrokenUTF8 => { token(TokenBadUTF8) }; | ||
291 | AnyUTF8 => { token(TokenInvalid) }; | ||
292 | *|; | ||
293 | |||
294 | }%% | ||
295 | |||
296 | // Ragel state | ||
297 | p := 0 // "Pointer" into data | ||
298 | pe := len(data) // End-of-data "pointer" | ||
299 | ts := 0 | ||
300 | te := 0 | ||
301 | act := 0 | ||
302 | eof := pe | ||
303 | var stack []int | ||
304 | var top int | ||
305 | |||
306 | var cs int // current state | ||
307 | switch mode { | ||
308 | case scanNormal: | ||
309 | cs = hcltok_en_main | ||
310 | case scanTemplate: | ||
311 | cs = hcltok_en_bareTemplate | ||
312 | case scanIdentOnly: | ||
313 | cs = hcltok_en_identOnly | ||
314 | default: | ||
315 | panic("invalid scanMode") | ||
316 | } | ||
317 | |||
318 | braces := 0 | ||
319 | var retBraces []int // stack of brace levels that cause us to use fret | ||
320 | var heredocs []heredocInProgress // stack of heredocs we're currently processing | ||
321 | |||
322 | %%{ | ||
323 | prepush { | ||
324 | stack = append(stack, 0); | ||
325 | } | ||
326 | postpop { | ||
327 | stack = stack[:len(stack)-1]; | ||
328 | } | ||
329 | }%% | ||
330 | |||
331 | // Make Go compiler happy | ||
332 | _ = ts | ||
333 | _ = te | ||
334 | _ = act | ||
335 | _ = eof | ||
336 | |||
337 | token := func (ty TokenType) { | ||
338 | f.emitToken(ty, ts, te) | ||
339 | } | ||
340 | selfToken := func () { | ||
341 | b := data[ts:te] | ||
342 | if len(b) != 1 { | ||
343 | // should never happen | ||
344 | panic("selfToken only works for single-character tokens") | ||
345 | } | ||
346 | f.emitToken(TokenType(b[0]), ts, te) | ||
347 | } | ||
348 | |||
349 | %%{ | ||
350 | write init nocs; | ||
351 | write exec; | ||
352 | }%% | ||
353 | |||
354 | // If we fall out here without being in a final state then we've | ||
355 | // encountered something that the scanner can't match, which we'll | ||
356 | // deal with as an invalid. | ||
357 | if cs < hcltok_first_final { | ||
358 | if mode == scanTemplate && len(stack) == 0 { | ||
359 | // If we're scanning a bare template then any straggling | ||
360 | // top-level stuff is actually literal string, rather than | ||
361 | // invalid. This handles the case where the template ends | ||
362 | // with a single "$" or "%", which trips us up because we | ||
363 | // want to see another character to decide if it's a sequence | ||
364 | // or an escape. | ||
365 | f.emitToken(TokenStringLit, ts, len(data)) | ||
366 | } else { | ||
367 | f.emitToken(TokenInvalid, ts, len(data)) | ||
368 | } | ||
369 | } | ||
370 | |||
371 | // We always emit a synthetic EOF token at the end, since it gives the | ||
372 | // parser position information for an "unexpected EOF" diagnostic. | ||
373 | f.emitToken(TokenEOF, len(data), len(data)) | ||
374 | |||
375 | return f.Tokens | ||
376 | } | ||