7 "github.com/hashicorp/hcl2/hcl"
10 // This file is generated from scan_tokens.rl. DO NOT EDIT.
12 # (except when you are actually in scan_tokens.rl here, so edit away!)
18 func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []Token {
19 stripData := stripUTF8BOM(data)
20 start.Byte += len(data) - len(stripData)
27 StartByte: start.Byte,
31 include UnicodeDerived "unicode_derived.rl";
33 UTF8Cont = 0x80 .. 0xBF;
36 0xC0..0xDF . UTF8Cont |
37 0xE0..0xEF . UTF8Cont . UTF8Cont |
38 0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
40 BrokenUTF8 = any - AnyUTF8;
42 NumberLitContinue = (digit|'.'|('e'|'E') ('+'|'-')? digit);
43 NumberLit = digit ("" | (NumberLitContinue - '.') | (NumberLitContinue* (NumberLitContinue - '.')));
44 Ident = (ID_Start | '_') (ID_Continue | '-')*;
46 # Symbols that just represent themselves are handled as a single rule.
47 SelfToken = "[" | "]" | "(" | ")" | "." | "," | "*" | "/" | "%" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`" | "'";
51 GreaterThanEqual = ">=";
59 Newline = '\r' ? '\n';
62 BeginStringTmpl = '"';
63 BeginHeredocTmpl = '<<' ('-')? Ident Newline;
66 # The :>> operator in these is a "finish-guarded concatenation",
67 # which terminates the sequence on its left when it completes
68 # the sequence on its right.
69 # In the single-line comment cases this is allowing us to make
70 # the trailing EndOfLine optional while still having the overall
71 # pattern terminate. In the multi-line case it ensures that
72 # the first comment in the file ends at the first */, rather than
73 # gobbling up all of the "any*" until the _final_ */ in the file.
74 ("#" (any - EndOfLine)* :>> EndOfLine?) |
75 ("//" (any - EndOfLine)* :>> EndOfLine?) |
79 # Note: hclwrite assumes that only ASCII spaces appear between tokens,
80 # and uses this assumption to recreate the spaces between tokens by
81 # looking at byte offset differences. This means it will produce
82 # incorrect results in the presence of tabs, but that's acceptable
83 # because the canonical style (which hclwrite itself can impose
84 # automatically is to never use tabs).
85 Spaces = (' ' | 0x09)+;
87 action beginStringTemplate {
92 action endStringTemplate {
97 action beginHeredocTemplate {
99 // the token is currently the whole heredoc introducer, like
100 // <<EOT or <<-EOT, followed by a newline. We want to extract
101 // just the "EOT" portion that we'll use as the closing marker.
103 marker := data[ts+2:te-1]
104 if marker[0] == '-' {
107 if marker[len(marker)-1] == '\r' {
108 marker = marker[:len(marker)-1]
111 heredocs = append(heredocs, heredocInProgress{
116 fcall heredocTemplate;
119 action heredocLiteralEOL {
120 // This action is called specificially when a heredoc literal
121 // ends with a newline character.
123 // This might actually be our end marker.
124 topdoc := &heredocs[len(heredocs)-1]
125 if topdoc.StartOfLine {
126 maybeMarker := bytes.TrimSpace(data[ts:te])
127 if bytes.Equal(maybeMarker, topdoc.Marker) {
128 // We actually emit two tokens here: the end-of-heredoc
129 // marker first, and then separately the newline that
130 // follows it. This then avoids issues with the closing
131 // marker consuming a newline that would normally be used
132 // to mark the end of an attribute definition.
133 // We might have either a \n sequence or an \r\n sequence
134 // here, so we must handle both.
138 if data[te-1] == '\r' {
139 // back up one more byte
143 token(TokenCHeredoc);
147 heredocs = heredocs[:len(heredocs)-1]
152 topdoc.StartOfLine = true;
153 token(TokenStringLit);
156 action heredocLiteralMidline {
157 // This action is called when a heredoc literal _doesn't_ end
158 // with a newline character, e.g. because we're about to enter
159 // an interpolation sequence.
160 heredocs[len(heredocs)-1].StartOfLine = false;
161 token(TokenStringLit);
164 action bareTemplateLiteral {
165 token(TokenStringLit);
168 action beginTemplateInterp {
169 token(TokenTemplateInterp);
171 retBraces = append(retBraces, braces);
172 if len(heredocs) > 0 {
173 heredocs[len(heredocs)-1].StartOfLine = false;
178 action beginTemplateControl {
179 token(TokenTemplateControl);
181 retBraces = append(retBraces, braces);
182 if len(heredocs) > 0 {
183 heredocs[len(heredocs)-1].StartOfLine = false;
194 if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
195 token(TokenTemplateSeqEnd);
197 retBraces = retBraces[0:len(retBraces)-1]
205 action closeTemplateSeqEatWhitespace {
206 // Only consume from the retBraces stack and return if we are at
207 // a suitable brace nesting level, otherwise things will get
208 // confused. (Not entering this branch indicates a syntax error,
209 // which we will catch in the parser.)
210 if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
211 token(TokenTemplateSeqEnd);
213 retBraces = retBraces[0:len(retBraces)-1]
216 // We intentionally generate a TokenTemplateSeqEnd here,
217 // even though the user apparently wanted a brace, because
218 // we want to allow the parser to catch the incorrect use
219 // of a ~} to balance a generic opening brace, rather than
220 // a template sequence.
221 token(TokenTemplateSeqEnd);
226 TemplateInterp = "${" ("~")?;
227 TemplateControl = "%{" ("~")?;
229 NewlineChars = ("\r"|"\n");
230 NewlineCharsSeq = NewlineChars+;
231 StringLiteralChars = (AnyUTF8 - NewlineChars);
232 TemplateIgnoredNonBrace = (^'{' %{ fhold; });
233 TemplateNotInterp = '$' (TemplateIgnoredNonBrace | TemplateInterp);
234 TemplateNotControl = '%' (TemplateIgnoredNonBrace | TemplateControl);
235 QuotedStringLiteralWithEsc = ('\\' StringLiteralChars) | (StringLiteralChars - ("$" | '%' | '"' | "\\"));
236 TemplateStringLiteral = (
237 (TemplateNotInterp) |
238 (TemplateNotControl) |
239 (QuotedStringLiteralWithEsc)+
241 HeredocStringLiteral = (
242 (TemplateNotInterp) |
243 (TemplateNotControl) |
244 (StringLiteralChars - ("$" | '%'))*
246 BareStringLiteral = (
247 (TemplateNotInterp) |
248 (TemplateNotControl) |
249 (StringLiteralChars - ("$" | '%'))*
253 TemplateInterp => beginTemplateInterp;
254 TemplateControl => beginTemplateControl;
255 EndStringTmpl => endStringTemplate;
256 TemplateStringLiteral => { token(TokenQuotedLit); };
257 NewlineCharsSeq => { token(TokenQuotedNewline); };
258 AnyUTF8 => { token(TokenInvalid); };
259 BrokenUTF8 => { token(TokenBadUTF8); };
262 heredocTemplate := |*
263 TemplateInterp => beginTemplateInterp;
264 TemplateControl => beginTemplateControl;
265 HeredocStringLiteral EndOfLine => heredocLiteralEOL;
266 HeredocStringLiteral => heredocLiteralMidline;
267 BrokenUTF8 => { token(TokenBadUTF8); };
271 TemplateInterp => beginTemplateInterp;
272 TemplateControl => beginTemplateControl;
273 BareStringLiteral => bareTemplateLiteral;
274 BrokenUTF8 => { token(TokenBadUTF8); };
278 Ident => { token(TokenIdent) };
279 BrokenUTF8 => { token(TokenBadUTF8) };
280 AnyUTF8 => { token(TokenInvalid) };
285 NumberLit => { token(TokenNumberLit) };
286 Ident => { token(TokenIdent) };
288 Comment => { token(TokenComment) };
289 Newline => { token(TokenNewline) };
291 EqualOp => { token(TokenEqualOp); };
292 NotEqual => { token(TokenNotEqual); };
293 GreaterThanEqual => { token(TokenGreaterThanEq); };
294 LessThanEqual => { token(TokenLessThanEq); };
295 LogicalAnd => { token(TokenAnd); };
296 LogicalOr => { token(TokenOr); };
297 Ellipsis => { token(TokenEllipsis); };
298 FatArrow => { token(TokenFatArrow); };
299 SelfToken => { selfToken() };
304 "~}" => closeTemplateSeqEatWhitespace;
306 BeginStringTmpl => beginStringTemplate;
307 BeginHeredocTmpl => beginHeredocTemplate;
309 BrokenUTF8 => { token(TokenBadUTF8) };
310 AnyUTF8 => { token(TokenInvalid) };
316 p := 0 // "Pointer" into data
317 pe := len(data) // End-of-data "pointer"
325 var cs int // current state
330 cs = hcltok_en_bareTemplate
332 cs = hcltok_en_identOnly
334 panic("invalid scanMode")
338 var retBraces []int // stack of brace levels that cause us to use fret
339 var heredocs []heredocInProgress // stack of heredocs we're currently processing
343 stack = append(stack, 0);
346 stack = stack[:len(stack)-1];
350 // Make Go compiler happy
356 token := func (ty TokenType) {
357 f.emitToken(ty, ts, te)
359 selfToken := func () {
362 // should never happen
363 panic("selfToken only works for single-character tokens")
365 f.emitToken(TokenType(b[0]), ts, te)
373 // If we fall out here without being in a final state then we've
374 // encountered something that the scanner can't match, which we'll
375 // deal with as an invalid.
376 if cs < hcltok_first_final {
377 if mode == scanTemplate && len(stack) == 0 {
378 // If we're scanning a bare template then any straggling
379 // top-level stuff is actually literal string, rather than
380 // invalid. This handles the case where the template ends
381 // with a single "$" or "%", which trips us up because we
382 // want to see another character to decide if it's a sequence
384 f.emitToken(TokenStringLit, ts, len(data))
386 f.emitToken(TokenInvalid, ts, len(data))
390 // We always emit a synthetic EOF token at the end, since it gives the
391 // parser position information for an "unexpected EOF" diagnostic.
392 f.emitToken(TokenEOF, len(data), len(data))