vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/scan_tokens.rl

   1
   2 package hclsyntax
   3
   4 import (
   5     "bytes"
   6
   7     "github.com/hashicorp/hcl2/hcl"
   8 )
   9
  10 // This file is generated from scan_tokens.rl. DO NOT EDIT.
  11 %%{
  12   # (except when you are actually in scan_tokens.rl here, so edit away!)
  13
  14   machine hcltok;
  15   write data;
  16 }%%
  17
  18 func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []Token {
  19     stripData := stripUTF8BOM(data)
  20     start.Byte += len(data) - len(stripData)
  21     data = stripData
  22
  23     f := &tokenAccum{
  24         Filename:  filename,
  25         Bytes:     data,
  26         Pos:       start,
  27         StartByte: start.Byte,
  28     }
  29
  30     %%{
  31         include UnicodeDerived "unicode_derived.rl";
  32
  33         UTF8Cont = 0x80 .. 0xBF;
  34         AnyUTF8 = (
  35             0x00..0x7F |
  36             0xC0..0xDF . UTF8Cont |
  37             0xE0..0xEF . UTF8Cont . UTF8Cont |
  38             0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
  39         );
  40         BrokenUTF8 = any - AnyUTF8;
  41
  42         NumberLitContinue = (digit|'.'|('e'|'E') ('+'|'-')? digit);
  43         NumberLit = digit ("" | (NumberLitContinue - '.') | (NumberLitContinue* (NumberLitContinue - '.')));
  44         Ident = (ID_Start | '_') (ID_Continue | '-')*;
  45
  46         # Symbols that just represent themselves are handled as a single rule.
  47         SelfToken = "[" | "]" | "(" | ")" | "." | "," | "*" | "/" | "%" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`" | "'";
  48
  49         EqualOp = "==";
  50         NotEqual = "!=";
  51         GreaterThanEqual = ">=";
  52         LessThanEqual = "<=";
  53         LogicalAnd = "&&";
  54         LogicalOr = "||";
  55
  56         Ellipsis = "...";
  57         FatArrow = "=>";
  58
  59         Newline = '\r' ? '\n';
  60         EndOfLine = Newline;
  61
  62         BeginStringTmpl = '"';
  63         BeginHeredocTmpl = '<<' ('-')? Ident Newline;
  64
  65         Comment = (
  66             # The :>> operator in these is a "finish-guarded concatenation",
  67             # which terminates the sequence on its left when it completes
  68             # the sequence on its right.
  69             # In the single-line comment cases this is allowing us to make
  70             # the trailing EndOfLine optional while still having the overall
  71             # pattern terminate. In the multi-line case it ensures that
  72             # the first comment in the file ends at the first */, rather than
  73             # gobbling up all of the "any*" until the _final_ */ in the file.
  74             ("#" (any - EndOfLine)* :>> EndOfLine?) |
  75             ("//" (any - EndOfLine)* :>> EndOfLine?) |
  76             ("/*" any* :>> "*/")
  77         );
  78
  79         # Note: hclwrite assumes that only ASCII spaces appear between tokens,
  80         # and uses this assumption to recreate the spaces between tokens by
  81         # looking at byte offset differences. This means it will produce
  82         # incorrect results in the presence of tabs, but that's acceptable
  83         # because the canonical style (which hclwrite itself can impose
  84         # automatically is to never use tabs).
  85         Spaces = (' ' | 0x09)+;
  86
  87         action beginStringTemplate {
  88             token(TokenOQuote);
  89             fcall stringTemplate;
  90         }
  91
  92         action endStringTemplate {
  93             token(TokenCQuote);
  94             fret;
  95         }
  96
  97         action beginHeredocTemplate {
  98             token(TokenOHeredoc);
  99             // the token is currently the whole heredoc introducer, like
 100             // <<EOT or <<-EOT, followed by a newline. We want to extract
 101             // just the "EOT" portion that we'll use as the closing marker.
 102
 103             marker := data[ts+2:te-1]
 104             if marker[0] == '-' {
 105                 marker = marker[1:]
 106             }
 107             if marker[len(marker)-1] == '\r' {
 108                 marker = marker[:len(marker)-1]
 109             }
 110
 111             heredocs = append(heredocs, heredocInProgress{
 112                 Marker:      marker,
 113                 StartOfLine: true,
 114             })
 115
 116             fcall heredocTemplate;
 117         }
 118
 119         action heredocLiteralEOL {
 120             // This action is called specificially when a heredoc literal
 121             // ends with a newline character.
 122
 123             // This might actually be our end marker.
 124             topdoc := &heredocs[len(heredocs)-1]
 125             if topdoc.StartOfLine {
 126                 maybeMarker := bytes.TrimSpace(data[ts:te])
 127                 if bytes.Equal(maybeMarker, topdoc.Marker) {
 128                     // We actually emit two tokens here: the end-of-heredoc
 129                     // marker first, and then separately the newline that
 130                     // follows it. This then avoids issues with the closing
 131                     // marker consuming a newline that would normally be used
 132                     // to mark the end of an attribute definition.
 133                     // We might have either a \n sequence or an \r\n sequence
 134                     // here, so we must handle both.
 135                     nls := te-1
 136                     nle := te
 137                     te--
 138                     if data[te-1] == '\r' {
 139                         // back up one more byte
 140                         nls--
 141                         te--
 142                     }
 143                     token(TokenCHeredoc);
 144                     ts = nls
 145                     te = nle
 146                     token(TokenNewline);
 147                     heredocs = heredocs[:len(heredocs)-1]
 148                     fret;
 149                 }
 150             }
 151
 152             topdoc.StartOfLine = true;
 153             token(TokenStringLit);
 154         }
 155
 156         action heredocLiteralMidline {
 157             // This action is called when a heredoc literal _doesn't_ end
 158             // with a newline character, e.g. because we're about to enter
 159             // an interpolation sequence.
 160             heredocs[len(heredocs)-1].StartOfLine = false;
 161             token(TokenStringLit);
 162         }
 163
 164         action bareTemplateLiteral {
 165             token(TokenStringLit);
 166         }
 167
 168         action beginTemplateInterp {
 169             token(TokenTemplateInterp);
 170             braces++;
 171             retBraces = append(retBraces, braces);
 172             if len(heredocs) > 0 {
 173                 heredocs[len(heredocs)-1].StartOfLine = false;
 174             }
 175             fcall main;
 176         }
 177
 178         action beginTemplateControl {
 179             token(TokenTemplateControl);
 180             braces++;
 181             retBraces = append(retBraces, braces);
 182             if len(heredocs) > 0 {
 183                 heredocs[len(heredocs)-1].StartOfLine = false;
 184             }
 185             fcall main;
 186         }
 187
 188         action openBrace {
 189             token(TokenOBrace);
 190             braces++;
 191         }
 192
 193         action closeBrace {
 194             if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
 195                 token(TokenTemplateSeqEnd);
 196                 braces--;
 197                 retBraces = retBraces[0:len(retBraces)-1]
 198                 fret;
 199             } else {
 200                 token(TokenCBrace);
 201                 braces--;
 202             }
 203         }
 204
 205         action closeTemplateSeqEatWhitespace {
 206             // Only consume from the retBraces stack and return if we are at
 207             // a suitable brace nesting level, otherwise things will get
 208             // confused. (Not entering this branch indicates a syntax error,
 209             // which we will catch in the parser.)
 210             if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
 211                 token(TokenTemplateSeqEnd);
 212                 braces--;
 213                 retBraces = retBraces[0:len(retBraces)-1]
 214                 fret;
 215             } else {
 216                 // We intentionally generate a TokenTemplateSeqEnd here,
 217                 // even though the user apparently wanted a brace, because
 218                 // we want to allow the parser to catch the incorrect use
 219                 // of a ~} to balance a generic opening brace, rather than
 220                 // a template sequence.
 221                 token(TokenTemplateSeqEnd);
 222                 braces--;
 223             }
 224         }
 225
 226         TemplateInterp = "${" ("~")?;
 227         TemplateControl = "%{" ("~")?;
 228         EndStringTmpl = '"';
 229         NewlineChars = ("\r"|"\n");
 230         NewlineCharsSeq = NewlineChars+;
 231         StringLiteralChars = (AnyUTF8 - NewlineChars);
 232         TemplateIgnoredNonBrace = (^'{' %{ fhold; });
 233         TemplateNotInterp = '$' (TemplateIgnoredNonBrace | TemplateInterp);
 234         TemplateNotControl = '%' (TemplateIgnoredNonBrace | TemplateControl);
 235         QuotedStringLiteralWithEsc = ('\\' StringLiteralChars) | (StringLiteralChars - ("$" | '%' | '"' | "\\"));
 236         TemplateStringLiteral = (
 237             (TemplateNotInterp) |
 238             (TemplateNotControl) |
 239             (QuotedStringLiteralWithEsc)+
 240         );
 241         HeredocStringLiteral = (
 242             (TemplateNotInterp) |
 243             (TemplateNotControl) |
 244             (StringLiteralChars - ("$" | '%'))*
 245         );
 246         BareStringLiteral = (
 247             (TemplateNotInterp) |
 248             (TemplateNotControl) |
 249             (StringLiteralChars - ("$" | '%'))*
 250         ) Newline?;
 251
 252         stringTemplate := |*
 253             TemplateInterp        => beginTemplateInterp;
 254             TemplateControl       => beginTemplateControl;
 255             EndStringTmpl         => endStringTemplate;
 256             TemplateStringLiteral => { token(TokenQuotedLit); };
 257             NewlineCharsSeq       => { token(TokenQuotedNewline); };
 258             AnyUTF8               => { token(TokenInvalid); };
 259             BrokenUTF8            => { token(TokenBadUTF8); };
 260         *|;
 261
 262         heredocTemplate := |*
 263             TemplateInterp        => beginTemplateInterp;
 264             TemplateControl       => beginTemplateControl;
 265             HeredocStringLiteral EndOfLine => heredocLiteralEOL;
 266             HeredocStringLiteral  => heredocLiteralMidline;
 267             BrokenUTF8            => { token(TokenBadUTF8); };
 268         *|;
 269
 270         bareTemplate := |*
 271             TemplateInterp        => beginTemplateInterp;
 272             TemplateControl       => beginTemplateControl;
 273             BareStringLiteral     => bareTemplateLiteral;
 274             BrokenUTF8            => { token(TokenBadUTF8); };
 275         *|;
 276
 277         identOnly := |*
 278             Ident            => { token(TokenIdent) };
 279             BrokenUTF8       => { token(TokenBadUTF8) };
 280             AnyUTF8          => { token(TokenInvalid) };
 281         *|;
 282
 283         main := |*
 284             Spaces           => {};
 285             NumberLit        => { token(TokenNumberLit) };
 286             Ident            => { token(TokenIdent) };
 287
 288             Comment          => { token(TokenComment) };
 289             Newline          => { token(TokenNewline) };
 290
 291             EqualOp          => { token(TokenEqualOp); };
 292             NotEqual         => { token(TokenNotEqual); };
 293             GreaterThanEqual => { token(TokenGreaterThanEq); };
 294             LessThanEqual    => { token(TokenLessThanEq); };
 295             LogicalAnd       => { token(TokenAnd); };
 296             LogicalOr        => { token(TokenOr); };
 297             Ellipsis         => { token(TokenEllipsis); };
 298             FatArrow         => { token(TokenFatArrow); };
 299             SelfToken        => { selfToken() };
 300
 301             "{"              => openBrace;
 302             "}"              => closeBrace;
 303
 304             "~}"             => closeTemplateSeqEatWhitespace;
 305
 306             BeginStringTmpl  => beginStringTemplate;
 307             BeginHeredocTmpl => beginHeredocTemplate;
 308
 309             BrokenUTF8       => { token(TokenBadUTF8) };
 310             AnyUTF8          => { token(TokenInvalid) };
 311         *|;
 312
 313     }%%
 314
 315     // Ragel state
 316         p := 0  // "Pointer" into data
 317         pe := len(data) // End-of-data "pointer"
 318     ts := 0
 319     te := 0
 320     act := 0
 321     eof := pe
 322     var stack []int
 323     var top int
 324
 325     var cs int // current state
 326     switch mode {
 327     case scanNormal:
 328         cs = hcltok_en_main
 329     case scanTemplate:
 330         cs = hcltok_en_bareTemplate
 331     case scanIdentOnly:
 332         cs = hcltok_en_identOnly
 333     default:
 334         panic("invalid scanMode")
 335     }
 336
 337     braces := 0
 338     var retBraces []int // stack of brace levels that cause us to use fret
 339     var heredocs []heredocInProgress // stack of heredocs we're currently processing
 340
 341     %%{
 342         prepush {
 343             stack = append(stack, 0);
 344         }
 345         postpop {
 346             stack = stack[:len(stack)-1];
 347         }
 348     }%%
 349
 350     // Make Go compiler happy
 351     _ = ts
 352     _ = te
 353     _ = act
 354     _ = eof
 355
 356     token := func (ty TokenType) {
 357         f.emitToken(ty, ts, te)
 358     }
 359     selfToken := func () {
 360         b := data[ts:te]
 361         if len(b) != 1 {
 362             // should never happen
 363             panic("selfToken only works for single-character tokens")
 364         }
 365         f.emitToken(TokenType(b[0]), ts, te)
 366     }
 367
 368     %%{
 369         write init nocs;
 370         write exec;
 371     }%%
 372
 373     // If we fall out here without being in a final state then we've
 374     // encountered something that the scanner can't match, which we'll
 375     // deal with as an invalid.
 376     if cs < hcltok_first_final {
 377         if mode == scanTemplate && len(stack) == 0 {
 378             // If we're scanning a bare template then any straggling
 379             // top-level stuff is actually literal string, rather than
 380             // invalid. This handles the case where the template ends
 381             // with a single "$" or "%", which trips us up because we
 382             // want to see another character to decide if it's a sequence
 383             // or an escape.
 384             f.emitToken(TokenStringLit, ts, len(data))
 385         } else {
 386             f.emitToken(TokenInvalid, ts, len(data))
 387         }
 388     }
 389
 390     // We always emit a synthetic EOF token at the end, since it gives the
 391     // parser position information for an "unexpected EOF" diagnostic.
 392     f.emitToken(TokenEOF, len(data), len(data))
 393
 394     return f.Tokens
 395 }