aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl')
-rw-r--r--vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl376
1 files changed, 376 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl b/vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl
new file mode 100644
index 0000000..83ef65b
--- /dev/null
+++ b/vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl
@@ -0,0 +1,376 @@
1
2package hclsyntax
3
4import (
5 "bytes"
6
7 "github.com/hashicorp/hcl2/hcl"
8)
9
10// This file is generated from scan_tokens.rl. DO NOT EDIT.
11%%{
12 # (except you are actually in scan_tokens.rl here, so edit away!)
13
14 machine hcltok;
15 write data;
16}%%
17
18func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []Token {
19 f := &tokenAccum{
20 Filename: filename,
21 Bytes: data,
22 Pos: start,
23 }
24
25 %%{
26 include UnicodeDerived "unicode_derived.rl";
27
28 UTF8Cont = 0x80 .. 0xBF;
29 AnyUTF8 = (
30 0x00..0x7F |
31 0xC0..0xDF . UTF8Cont |
32 0xE0..0xEF . UTF8Cont . UTF8Cont |
33 0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
34 );
35 BrokenUTF8 = any - AnyUTF8;
36
37 NumberLitContinue = (digit|'.'|('e'|'E') ('+'|'-')? digit);
38 NumberLit = digit ("" | (NumberLitContinue - '.') | (NumberLitContinue* (NumberLitContinue - '.')));
39 Ident = (ID_Start | '_') (ID_Continue | '-')*;
40
41 # Symbols that just represent themselves are handled as a single rule.
42 SelfToken = "[" | "]" | "(" | ")" | "." | "," | "*" | "/" | "%" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`";
43
44 EqualOp = "==";
45 NotEqual = "!=";
46 GreaterThanEqual = ">=";
47 LessThanEqual = "<=";
48 LogicalAnd = "&&";
49 LogicalOr = "||";
50
51 Ellipsis = "...";
52 FatArrow = "=>";
53
54 Newline = '\r' ? '\n';
55 EndOfLine = Newline;
56
57 BeginStringTmpl = '"';
58 BeginHeredocTmpl = '<<' ('-')? Ident Newline;
59
60 Comment = (
61 ("#" (any - EndOfLine)* EndOfLine) |
62 ("//" (any - EndOfLine)* EndOfLine) |
63 ("/*" any* "*/")
64 );
65
66 # Note: hclwrite assumes that only ASCII spaces appear between tokens,
67 # and uses this assumption to recreate the spaces between tokens by
68 # looking at byte offset differences. This means it will produce
69 # incorrect results in the presence of tabs, but that's acceptable
70 # because the canonical style (which hclwrite itself can impose
71 # automatically is to never use tabs).
72 Spaces = (' ' | 0x09)+;
73
74 action beginStringTemplate {
75 token(TokenOQuote);
76 fcall stringTemplate;
77 }
78
79 action endStringTemplate {
80 token(TokenCQuote);
81 fret;
82 }
83
84 action beginHeredocTemplate {
85 token(TokenOHeredoc);
86 // the token is currently the whole heredoc introducer, like
87 // <<EOT or <<-EOT, followed by a newline. We want to extract
88 // just the "EOT" portion that we'll use as the closing marker.
89
90 marker := data[ts+2:te-1]
91 if marker[0] == '-' {
92 marker = marker[1:]
93 }
94 if marker[len(marker)-1] == '\r' {
95 marker = marker[:len(marker)-1]
96 }
97
98 heredocs = append(heredocs, heredocInProgress{
99 Marker: marker,
100 StartOfLine: true,
101 })
102
103 fcall heredocTemplate;
104 }
105
106 action heredocLiteralEOL {
107 // This action is called specificially when a heredoc literal
108 // ends with a newline character.
109
110 // This might actually be our end marker.
111 topdoc := &heredocs[len(heredocs)-1]
112 if topdoc.StartOfLine {
113 maybeMarker := bytes.TrimSpace(data[ts:te])
114 if bytes.Equal(maybeMarker, topdoc.Marker) {
115 // We actually emit two tokens here: the end-of-heredoc
116 // marker first, and then separately the newline that
117 // follows it. This then avoids issues with the closing
118 // marker consuming a newline that would normally be used
119 // to mark the end of an attribute definition.
120 // We might have either a \n sequence or an \r\n sequence
121 // here, so we must handle both.
122 nls := te-1
123 nle := te
124 te--
125 if data[te-1] == '\r' {
126 // back up one more byte
127 nls--
128 te--
129 }
130 token(TokenCHeredoc);
131 ts = nls
132 te = nle
133 token(TokenNewline);
134 heredocs = heredocs[:len(heredocs)-1]
135 fret;
136 }
137 }
138
139 topdoc.StartOfLine = true;
140 token(TokenStringLit);
141 }
142
143 action heredocLiteralMidline {
144 // This action is called when a heredoc literal _doesn't_ end
145 // with a newline character, e.g. because we're about to enter
146 // an interpolation sequence.
147 heredocs[len(heredocs)-1].StartOfLine = false;
148 token(TokenStringLit);
149 }
150
151 action bareTemplateLiteral {
152 token(TokenStringLit);
153 }
154
155 action beginTemplateInterp {
156 token(TokenTemplateInterp);
157 braces++;
158 retBraces = append(retBraces, braces);
159 if len(heredocs) > 0 {
160 heredocs[len(heredocs)-1].StartOfLine = false;
161 }
162 fcall main;
163 }
164
165 action beginTemplateControl {
166 token(TokenTemplateControl);
167 braces++;
168 retBraces = append(retBraces, braces);
169 if len(heredocs) > 0 {
170 heredocs[len(heredocs)-1].StartOfLine = false;
171 }
172 fcall main;
173 }
174
175 action openBrace {
176 token(TokenOBrace);
177 braces++;
178 }
179
180 action closeBrace {
181 if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
182 token(TokenTemplateSeqEnd);
183 braces--;
184 retBraces = retBraces[0:len(retBraces)-1]
185 fret;
186 } else {
187 token(TokenCBrace);
188 braces--;
189 }
190 }
191
192 action closeTemplateSeqEatWhitespace {
193 // Only consume from the retBraces stack and return if we are at
194 // a suitable brace nesting level, otherwise things will get
195 // confused. (Not entering this branch indicates a syntax error,
196 // which we will catch in the parser.)
197 if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
198 token(TokenTemplateSeqEnd);
199 braces--;
200 retBraces = retBraces[0:len(retBraces)-1]
201 fret;
202 } else {
203 // We intentionally generate a TokenTemplateSeqEnd here,
204 // even though the user apparently wanted a brace, because
205 // we want to allow the parser to catch the incorrect use
206 // of a ~} to balance a generic opening brace, rather than
207 // a template sequence.
208 token(TokenTemplateSeqEnd);
209 braces--;
210 }
211 }
212
213 TemplateInterp = "${" ("~")?;
214 TemplateControl = "%{" ("~")?;
215 EndStringTmpl = '"';
216 StringLiteralChars = (AnyUTF8 - ("\r"|"\n"));
217 TemplateStringLiteral = (
218 ('$' ^'{' %{ fhold; }) |
219 ('%' ^'{' %{ fhold; }) |
220 ('\\' StringLiteralChars) |
221 (StringLiteralChars - ("$" | '%' | '"'))
222 )+;
223 HeredocStringLiteral = (
224 ('$' ^'{' %{ fhold; }) |
225 ('%' ^'{' %{ fhold; }) |
226 (StringLiteralChars - ("$" | '%'))
227 )*;
228 BareStringLiteral = (
229 ('$' ^'{') |
230 ('%' ^'{') |
231 (StringLiteralChars - ("$" | '%'))
232 )* Newline?;
233
234 stringTemplate := |*
235 TemplateInterp => beginTemplateInterp;
236 TemplateControl => beginTemplateControl;
237 EndStringTmpl => endStringTemplate;
238 TemplateStringLiteral => { token(TokenQuotedLit); };
239 AnyUTF8 => { token(TokenInvalid); };
240 BrokenUTF8 => { token(TokenBadUTF8); };
241 *|;
242
243 heredocTemplate := |*
244 TemplateInterp => beginTemplateInterp;
245 TemplateControl => beginTemplateControl;
246 HeredocStringLiteral EndOfLine => heredocLiteralEOL;
247 HeredocStringLiteral => heredocLiteralMidline;
248 BrokenUTF8 => { token(TokenBadUTF8); };
249 *|;
250
251 bareTemplate := |*
252 TemplateInterp => beginTemplateInterp;
253 TemplateControl => beginTemplateControl;
254 BareStringLiteral => bareTemplateLiteral;
255 BrokenUTF8 => { token(TokenBadUTF8); };
256 *|;
257
258 identOnly := |*
259 Ident => { token(TokenIdent) };
260 BrokenUTF8 => { token(TokenBadUTF8) };
261 AnyUTF8 => { token(TokenInvalid) };
262 *|;
263
264 main := |*
265 Spaces => {};
266 NumberLit => { token(TokenNumberLit) };
267 Ident => { token(TokenIdent) };
268
269 Comment => { token(TokenComment) };
270 Newline => { token(TokenNewline) };
271
272 EqualOp => { token(TokenEqualOp); };
273 NotEqual => { token(TokenNotEqual); };
274 GreaterThanEqual => { token(TokenGreaterThanEq); };
275 LessThanEqual => { token(TokenLessThanEq); };
276 LogicalAnd => { token(TokenAnd); };
277 LogicalOr => { token(TokenOr); };
278 Ellipsis => { token(TokenEllipsis); };
279 FatArrow => { token(TokenFatArrow); };
280 SelfToken => { selfToken() };
281
282 "{" => openBrace;
283 "}" => closeBrace;
284
285 "~}" => closeTemplateSeqEatWhitespace;
286
287 BeginStringTmpl => beginStringTemplate;
288 BeginHeredocTmpl => beginHeredocTemplate;
289
290 BrokenUTF8 => { token(TokenBadUTF8) };
291 AnyUTF8 => { token(TokenInvalid) };
292 *|;
293
294 }%%
295
296 // Ragel state
297 p := 0 // "Pointer" into data
298 pe := len(data) // End-of-data "pointer"
299 ts := 0
300 te := 0
301 act := 0
302 eof := pe
303 var stack []int
304 var top int
305
306 var cs int // current state
307 switch mode {
308 case scanNormal:
309 cs = hcltok_en_main
310 case scanTemplate:
311 cs = hcltok_en_bareTemplate
312 case scanIdentOnly:
313 cs = hcltok_en_identOnly
314 default:
315 panic("invalid scanMode")
316 }
317
318 braces := 0
319 var retBraces []int // stack of brace levels that cause us to use fret
320 var heredocs []heredocInProgress // stack of heredocs we're currently processing
321
322 %%{
323 prepush {
324 stack = append(stack, 0);
325 }
326 postpop {
327 stack = stack[:len(stack)-1];
328 }
329 }%%
330
331 // Make Go compiler happy
332 _ = ts
333 _ = te
334 _ = act
335 _ = eof
336
337 token := func (ty TokenType) {
338 f.emitToken(ty, ts, te)
339 }
340 selfToken := func () {
341 b := data[ts:te]
342 if len(b) != 1 {
343 // should never happen
344 panic("selfToken only works for single-character tokens")
345 }
346 f.emitToken(TokenType(b[0]), ts, te)
347 }
348
349 %%{
350 write init nocs;
351 write exec;
352 }%%
353
354 // If we fall out here without being in a final state then we've
355 // encountered something that the scanner can't match, which we'll
356 // deal with as an invalid.
357 if cs < hcltok_first_final {
358 if mode == scanTemplate && len(stack) == 0 {
359 // If we're scanning a bare template then any straggling
360 // top-level stuff is actually literal string, rather than
361 // invalid. This handles the case where the template ends
362 // with a single "$" or "%", which trips us up because we
363 // want to see another character to decide if it's a sequence
364 // or an escape.
365 f.emitToken(TokenStringLit, ts, len(data))
366 } else {
367 f.emitToken(TokenInvalid, ts, len(data))
368 }
369 }
370
371 // We always emit a synthetic EOF token at the end, since it gives the
372 // parser position information for an "unexpected EOF" diagnostic.
373 f.emitToken(TokenEOF, len(data), len(data))
374
375 return f.Tokens
376}