]> git.immae.eu Git - github/fretlink/terraform-provider-statuscake.git/blob - vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl
Upgrade to 0.12
[github/fretlink/terraform-provider-statuscake.git] / vendor / github.com / hashicorp / hcl2 / hcl / hclsyntax / scan_tokens.rl
1
2 package hclsyntax
3
4 import (
5 "bytes"
6
7 "github.com/hashicorp/hcl2/hcl"
8 )
9
10 // This file is generated from scan_tokens.rl. DO NOT EDIT.
11 %%{
12 # (except when you are actually in scan_tokens.rl here, so edit away!)
13
14 machine hcltok;
15 write data;
16 }%%
17
18 func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []Token {
19 stripData := stripUTF8BOM(data)
20 start.Byte += len(data) - len(stripData)
21 data = stripData
22
23 f := &tokenAccum{
24 Filename: filename,
25 Bytes: data,
26 Pos: start,
27 StartByte: start.Byte,
28 }
29
30 %%{
31 include UnicodeDerived "unicode_derived.rl";
32
33 UTF8Cont = 0x80 .. 0xBF;
34 AnyUTF8 = (
35 0x00..0x7F |
36 0xC0..0xDF . UTF8Cont |
37 0xE0..0xEF . UTF8Cont . UTF8Cont |
38 0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
39 );
40 BrokenUTF8 = any - AnyUTF8;
41
42 NumberLitContinue = (digit|'.'|('e'|'E') ('+'|'-')? digit);
43 NumberLit = digit ("" | (NumberLitContinue - '.') | (NumberLitContinue* (NumberLitContinue - '.')));
44 Ident = (ID_Start | '_') (ID_Continue | '-')*;
45
46 # Symbols that just represent themselves are handled as a single rule.
47 SelfToken = "[" | "]" | "(" | ")" | "." | "," | "*" | "/" | "%" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`" | "'";
48
49 EqualOp = "==";
50 NotEqual = "!=";
51 GreaterThanEqual = ">=";
52 LessThanEqual = "<=";
53 LogicalAnd = "&&";
54 LogicalOr = "||";
55
56 Ellipsis = "...";
57 FatArrow = "=>";
58
59 Newline = '\r' ? '\n';
60 EndOfLine = Newline;
61
62 BeginStringTmpl = '"';
63 BeginHeredocTmpl = '<<' ('-')? Ident Newline;
64
65 Comment = (
66 # The :>> operator in these is a "finish-guarded concatenation",
67 # which terminates the sequence on its left when it completes
68 # the sequence on its right.
69 # In the single-line comment cases this is allowing us to make
70 # the trailing EndOfLine optional while still having the overall
71 # pattern terminate. In the multi-line case it ensures that
72 # the first comment in the file ends at the first */, rather than
73 # gobbling up all of the "any*" until the _final_ */ in the file.
74 ("#" (any - EndOfLine)* :>> EndOfLine?) |
75 ("//" (any - EndOfLine)* :>> EndOfLine?) |
76 ("/*" any* :>> "*/")
77 );
78
79 # Note: hclwrite assumes that only ASCII spaces appear between tokens,
80 # and uses this assumption to recreate the spaces between tokens by
81 # looking at byte offset differences. This means it will produce
82 # incorrect results in the presence of tabs, but that's acceptable
83 # because the canonical style (which hclwrite itself can impose
84 # automatically is to never use tabs).
85 Spaces = (' ' | 0x09)+;
86
87 action beginStringTemplate {
88 token(TokenOQuote);
89 fcall stringTemplate;
90 }
91
92 action endStringTemplate {
93 token(TokenCQuote);
94 fret;
95 }
96
97 action beginHeredocTemplate {
98 token(TokenOHeredoc);
99 // the token is currently the whole heredoc introducer, like
100 // <<EOT or <<-EOT, followed by a newline. We want to extract
101 // just the "EOT" portion that we'll use as the closing marker.
102
103 marker := data[ts+2:te-1]
104 if marker[0] == '-' {
105 marker = marker[1:]
106 }
107 if marker[len(marker)-1] == '\r' {
108 marker = marker[:len(marker)-1]
109 }
110
111 heredocs = append(heredocs, heredocInProgress{
112 Marker: marker,
113 StartOfLine: true,
114 })
115
116 fcall heredocTemplate;
117 }
118
119 action heredocLiteralEOL {
120 // This action is called specificially when a heredoc literal
121 // ends with a newline character.
122
123 // This might actually be our end marker.
124 topdoc := &heredocs[len(heredocs)-1]
125 if topdoc.StartOfLine {
126 maybeMarker := bytes.TrimSpace(data[ts:te])
127 if bytes.Equal(maybeMarker, topdoc.Marker) {
128 // We actually emit two tokens here: the end-of-heredoc
129 // marker first, and then separately the newline that
130 // follows it. This then avoids issues with the closing
131 // marker consuming a newline that would normally be used
132 // to mark the end of an attribute definition.
133 // We might have either a \n sequence or an \r\n sequence
134 // here, so we must handle both.
135 nls := te-1
136 nle := te
137 te--
138 if data[te-1] == '\r' {
139 // back up one more byte
140 nls--
141 te--
142 }
143 token(TokenCHeredoc);
144 ts = nls
145 te = nle
146 token(TokenNewline);
147 heredocs = heredocs[:len(heredocs)-1]
148 fret;
149 }
150 }
151
152 topdoc.StartOfLine = true;
153 token(TokenStringLit);
154 }
155
156 action heredocLiteralMidline {
157 // This action is called when a heredoc literal _doesn't_ end
158 // with a newline character, e.g. because we're about to enter
159 // an interpolation sequence.
160 heredocs[len(heredocs)-1].StartOfLine = false;
161 token(TokenStringLit);
162 }
163
164 action bareTemplateLiteral {
165 token(TokenStringLit);
166 }
167
168 action beginTemplateInterp {
169 token(TokenTemplateInterp);
170 braces++;
171 retBraces = append(retBraces, braces);
172 if len(heredocs) > 0 {
173 heredocs[len(heredocs)-1].StartOfLine = false;
174 }
175 fcall main;
176 }
177
178 action beginTemplateControl {
179 token(TokenTemplateControl);
180 braces++;
181 retBraces = append(retBraces, braces);
182 if len(heredocs) > 0 {
183 heredocs[len(heredocs)-1].StartOfLine = false;
184 }
185 fcall main;
186 }
187
188 action openBrace {
189 token(TokenOBrace);
190 braces++;
191 }
192
193 action closeBrace {
194 if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
195 token(TokenTemplateSeqEnd);
196 braces--;
197 retBraces = retBraces[0:len(retBraces)-1]
198 fret;
199 } else {
200 token(TokenCBrace);
201 braces--;
202 }
203 }
204
205 action closeTemplateSeqEatWhitespace {
206 // Only consume from the retBraces stack and return if we are at
207 // a suitable brace nesting level, otherwise things will get
208 // confused. (Not entering this branch indicates a syntax error,
209 // which we will catch in the parser.)
210 if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
211 token(TokenTemplateSeqEnd);
212 braces--;
213 retBraces = retBraces[0:len(retBraces)-1]
214 fret;
215 } else {
216 // We intentionally generate a TokenTemplateSeqEnd here,
217 // even though the user apparently wanted a brace, because
218 // we want to allow the parser to catch the incorrect use
219 // of a ~} to balance a generic opening brace, rather than
220 // a template sequence.
221 token(TokenTemplateSeqEnd);
222 braces--;
223 }
224 }
225
226 TemplateInterp = "${" ("~")?;
227 TemplateControl = "%{" ("~")?;
228 EndStringTmpl = '"';
229 NewlineChars = ("\r"|"\n");
230 NewlineCharsSeq = NewlineChars+;
231 StringLiteralChars = (AnyUTF8 - NewlineChars);
232 TemplateIgnoredNonBrace = (^'{' %{ fhold; });
233 TemplateNotInterp = '$' (TemplateIgnoredNonBrace | TemplateInterp);
234 TemplateNotControl = '%' (TemplateIgnoredNonBrace | TemplateControl);
235 QuotedStringLiteralWithEsc = ('\\' StringLiteralChars) | (StringLiteralChars - ("$" | '%' | '"' | "\\"));
236 TemplateStringLiteral = (
237 (TemplateNotInterp) |
238 (TemplateNotControl) |
239 (QuotedStringLiteralWithEsc)+
240 );
241 HeredocStringLiteral = (
242 (TemplateNotInterp) |
243 (TemplateNotControl) |
244 (StringLiteralChars - ("$" | '%'))*
245 );
246 BareStringLiteral = (
247 (TemplateNotInterp) |
248 (TemplateNotControl) |
249 (StringLiteralChars - ("$" | '%'))*
250 ) Newline?;
251
252 stringTemplate := |*
253 TemplateInterp => beginTemplateInterp;
254 TemplateControl => beginTemplateControl;
255 EndStringTmpl => endStringTemplate;
256 TemplateStringLiteral => { token(TokenQuotedLit); };
257 NewlineCharsSeq => { token(TokenQuotedNewline); };
258 AnyUTF8 => { token(TokenInvalid); };
259 BrokenUTF8 => { token(TokenBadUTF8); };
260 *|;
261
262 heredocTemplate := |*
263 TemplateInterp => beginTemplateInterp;
264 TemplateControl => beginTemplateControl;
265 HeredocStringLiteral EndOfLine => heredocLiteralEOL;
266 HeredocStringLiteral => heredocLiteralMidline;
267 BrokenUTF8 => { token(TokenBadUTF8); };
268 *|;
269
270 bareTemplate := |*
271 TemplateInterp => beginTemplateInterp;
272 TemplateControl => beginTemplateControl;
273 BareStringLiteral => bareTemplateLiteral;
274 BrokenUTF8 => { token(TokenBadUTF8); };
275 *|;
276
277 identOnly := |*
278 Ident => { token(TokenIdent) };
279 BrokenUTF8 => { token(TokenBadUTF8) };
280 AnyUTF8 => { token(TokenInvalid) };
281 *|;
282
283 main := |*
284 Spaces => {};
285 NumberLit => { token(TokenNumberLit) };
286 Ident => { token(TokenIdent) };
287
288 Comment => { token(TokenComment) };
289 Newline => { token(TokenNewline) };
290
291 EqualOp => { token(TokenEqualOp); };
292 NotEqual => { token(TokenNotEqual); };
293 GreaterThanEqual => { token(TokenGreaterThanEq); };
294 LessThanEqual => { token(TokenLessThanEq); };
295 LogicalAnd => { token(TokenAnd); };
296 LogicalOr => { token(TokenOr); };
297 Ellipsis => { token(TokenEllipsis); };
298 FatArrow => { token(TokenFatArrow); };
299 SelfToken => { selfToken() };
300
301 "{" => openBrace;
302 "}" => closeBrace;
303
304 "~}" => closeTemplateSeqEatWhitespace;
305
306 BeginStringTmpl => beginStringTemplate;
307 BeginHeredocTmpl => beginHeredocTemplate;
308
309 BrokenUTF8 => { token(TokenBadUTF8) };
310 AnyUTF8 => { token(TokenInvalid) };
311 *|;
312
313 }%%
314
315 // Ragel state
316 p := 0 // "Pointer" into data
317 pe := len(data) // End-of-data "pointer"
318 ts := 0
319 te := 0
320 act := 0
321 eof := pe
322 var stack []int
323 var top int
324
325 var cs int // current state
326 switch mode {
327 case scanNormal:
328 cs = hcltok_en_main
329 case scanTemplate:
330 cs = hcltok_en_bareTemplate
331 case scanIdentOnly:
332 cs = hcltok_en_identOnly
333 default:
334 panic("invalid scanMode")
335 }
336
337 braces := 0
338 var retBraces []int // stack of brace levels that cause us to use fret
339 var heredocs []heredocInProgress // stack of heredocs we're currently processing
340
341 %%{
342 prepush {
343 stack = append(stack, 0);
344 }
345 postpop {
346 stack = stack[:len(stack)-1];
347 }
348 }%%
349
350 // Make Go compiler happy
351 _ = ts
352 _ = te
353 _ = act
354 _ = eof
355
356 token := func (ty TokenType) {
357 f.emitToken(ty, ts, te)
358 }
359 selfToken := func () {
360 b := data[ts:te]
361 if len(b) != 1 {
362 // should never happen
363 panic("selfToken only works for single-character tokens")
364 }
365 f.emitToken(TokenType(b[0]), ts, te)
366 }
367
368 %%{
369 write init nocs;
370 write exec;
371 }%%
372
373 // If we fall out here without being in a final state then we've
374 // encountered something that the scanner can't match, which we'll
375 // deal with as an invalid.
376 if cs < hcltok_first_final {
377 if mode == scanTemplate && len(stack) == 0 {
378 // If we're scanning a bare template then any straggling
379 // top-level stuff is actually literal string, rather than
380 // invalid. This handles the case where the template ends
381 // with a single "$" or "%", which trips us up because we
382 // want to see another character to decide if it's a sequence
383 // or an escape.
384 f.emitToken(TokenStringLit, ts, len(data))
385 } else {
386 f.emitToken(TokenInvalid, ts, len(data))
387 }
388 }
389
390 // We always emit a synthetic EOF token at the end, since it gives the
391 // parser position information for an "unexpected EOF" diagnostic.
392 f.emitToken(TokenEOF, len(data), len(data))
393
394 return f.Tokens
395 }