vendor/github.com/hashicorp/hcl2/hcl/hclsyntax/token.go

   1 package hclsyntax
   2
   3 import (
   4         "bytes"
   5         "fmt"
   6
   7         "github.com/apparentlymart/go-textseg/textseg"
   8         "github.com/hashicorp/hcl2/hcl"
   9 )
  10
  11 // Token represents a sequence of bytes from some HCL code that has been
  12 // tagged with a type and its range within the source file.
  13 type Token struct {
  14         Type  TokenType
  15         Bytes []byte
  16         Range hcl.Range
  17 }
  18
  19 // Tokens is a slice of Token.
  20 type Tokens []Token
  21
  22 // TokenType is an enumeration used for the Type field on Token.
  23 type TokenType rune
  24
  25 const (
  26         // Single-character tokens are represented by their own character, for
  27         // convenience in producing these within the scanner. However, the values
  28         // are otherwise arbitrary and just intended to be mnemonic for humans
  29         // who might see them in debug output.
  30
  31         TokenOBrace   TokenType = '{'
  32         TokenCBrace   TokenType = '}'
  33         TokenOBrack   TokenType = '['
  34         TokenCBrack   TokenType = ']'
  35         TokenOParen   TokenType = '('
  36         TokenCParen   TokenType = ')'
  37         TokenOQuote   TokenType = '«'
  38         TokenCQuote   TokenType = '»'
  39         TokenOHeredoc TokenType = 'H'
  40         TokenCHeredoc TokenType = 'h'
  41
  42         TokenStar    TokenType = '*'
  43         TokenSlash   TokenType = '/'
  44         TokenPlus    TokenType = '+'
  45         TokenMinus   TokenType = '-'
  46         TokenPercent TokenType = '%'
  47
  48         TokenEqual         TokenType = '='
  49         TokenEqualOp       TokenType = '≔'
  50         TokenNotEqual      TokenType = '≠'
  51         TokenLessThan      TokenType = '<'
  52         TokenLessThanEq    TokenType = '≤'
  53         TokenGreaterThan   TokenType = '>'
  54         TokenGreaterThanEq TokenType = '≥'
  55
  56         TokenAnd  TokenType = '∧'
  57         TokenOr   TokenType = '∨'
  58         TokenBang TokenType = '!'
  59
  60         TokenDot   TokenType = '.'
  61         TokenComma TokenType = ','
  62
  63         TokenEllipsis TokenType = '…'
  64         TokenFatArrow TokenType = '⇒'
  65
  66         TokenQuestion TokenType = '?'
  67         TokenColon    TokenType = ':'
  68
  69         TokenTemplateInterp  TokenType = '∫'
  70         TokenTemplateControl TokenType = 'λ'
  71         TokenTemplateSeqEnd  TokenType = '∎'
  72
  73         TokenQuotedLit TokenType = 'Q' // might contain backslash escapes
  74         TokenStringLit TokenType = 'S' // cannot contain backslash escapes
  75         TokenNumberLit TokenType = 'N'
  76         TokenIdent     TokenType = 'I'
  77
  78         TokenComment TokenType = 'C'
  79
  80         TokenNewline TokenType = '\n'
  81         TokenEOF     TokenType = '␄'
  82
  83         // The rest are not used in the language but recognized by the scanner so
  84         // we can generate good diagnostics in the parser when users try to write
  85         // things that might work in other languages they are familiar with, or
  86         // simply make incorrect assumptions about the HCL language.
  87
  88         TokenBitwiseAnd    TokenType = '&'
  89         TokenBitwiseOr     TokenType = '|'
  90         TokenBitwiseNot    TokenType = '~'
  91         TokenBitwiseXor    TokenType = '^'
  92         TokenStarStar      TokenType = '➚'
  93         TokenApostrophe    TokenType = '\''
  94         TokenBacktick      TokenType = '`'
  95         TokenSemicolon     TokenType = ';'
  96         TokenTabs          TokenType = '␉'
  97         TokenInvalid       TokenType = '�'
  98         TokenBadUTF8       TokenType = '💩'
  99         TokenQuotedNewline TokenType = '␤'
 100
 101         // TokenNil is a placeholder for when a token is required but none is
 102         // available, e.g. when reporting errors. The scanner will never produce
 103         // this as part of a token stream.
 104         TokenNil TokenType = '\x00'
 105 )
 106
 107 func (t TokenType) GoString() string {
 108         return fmt.Sprintf("hclsyntax.%s", t.String())
 109 }
 110
 111 type scanMode int
 112
 113 const (
 114         scanNormal scanMode = iota
 115         scanTemplate
 116         scanIdentOnly
 117 )
 118
 119 type tokenAccum struct {
 120         Filename  string
 121         Bytes     []byte
 122         Pos       hcl.Pos
 123         Tokens    []Token
 124         StartByte int
 125 }
 126
 127 func (f *tokenAccum) emitToken(ty TokenType, startOfs, endOfs int) {
 128         // Walk through our buffer to figure out how much we need to adjust
 129         // the start pos to get our end pos.
 130
 131         start := f.Pos
 132         start.Column += startOfs + f.StartByte - f.Pos.Byte // Safe because only ASCII spaces can be in the offset
 133         start.Byte = startOfs + f.StartByte
 134
 135         end := start
 136         end.Byte = endOfs + f.StartByte
 137         b := f.Bytes[startOfs:endOfs]
 138         for len(b) > 0 {
 139                 advance, seq, _ := textseg.ScanGraphemeClusters(b, true)
 140                 if (len(seq) == 1 && seq[0] == '\n') || (len(seq) == 2 && seq[0] == '\r' && seq[1] == '\n') {
 141                         end.Line++
 142                         end.Column = 1
 143                 } else {
 144                         end.Column++
 145                 }
 146                 b = b[advance:]
 147         }
 148
 149         f.Pos = end
 150
 151         f.Tokens = append(f.Tokens, Token{
 152                 Type:  ty,
 153                 Bytes: f.Bytes[startOfs:endOfs],
 154                 Range: hcl.Range{
 155                         Filename: f.Filename,
 156                         Start:    start,
 157                         End:      end,
 158                 },
 159         })
 160 }
 161
 162 type heredocInProgress struct {
 163         Marker      []byte
 164         StartOfLine bool
 165 }
 166
 167 func tokenOpensFlushHeredoc(tok Token) bool {
 168         if tok.Type != TokenOHeredoc {
 169                 return false
 170         }
 171         return bytes.HasPrefix(tok.Bytes, []byte{'<', '<', '-'})
 172 }
 173
 174 // checkInvalidTokens does a simple pass across the given tokens and generates
 175 // diagnostics for tokens that should _never_ appear in HCL source. This
 176 // is intended to avoid the need for the parser to have special support
 177 // for them all over.
 178 //
 179 // Returns a diagnostics with no errors if everything seems acceptable.
 180 // Otherwise, returns zero or more error diagnostics, though tries to limit
 181 // repetition of the same information.
 182 func checkInvalidTokens(tokens Tokens) hcl.Diagnostics {
 183         var diags hcl.Diagnostics
 184
 185         toldBitwise := 0
 186         toldExponent := 0
 187         toldBacktick := 0
 188         toldApostrophe := 0
 189         toldSemicolon := 0
 190         toldTabs := 0
 191         toldBadUTF8 := 0
 192
 193         for _, tok := range tokens {
 194                 // copy token so it's safe to point to it
 195                 tok := tok
 196
 197                 switch tok.Type {
 198                 case TokenBitwiseAnd, TokenBitwiseOr, TokenBitwiseXor, TokenBitwiseNot:
 199                         if toldBitwise < 4 {
 200                                 var suggestion string
 201                                 switch tok.Type {
 202                                 case TokenBitwiseAnd:
 203                                         suggestion = " Did you mean boolean AND (\"&&\")?"
 204                                 case TokenBitwiseOr:
 205                                         suggestion = " Did you mean boolean OR (\"&&\")?"
 206                                 case TokenBitwiseNot:
 207                                         suggestion = " Did you mean boolean NOT (\"!\")?"
 208                                 }
 209
 210                                 diags = append(diags, &hcl.Diagnostic{
 211                                         Severity: hcl.DiagError,
 212                                         Summary:  "Unsupported operator",
 213                                         Detail:   fmt.Sprintf("Bitwise operators are not supported.%s", suggestion),
 214                                         Subject:  &tok.Range,
 215                                 })
 216                                 toldBitwise++
 217                         }
 218                 case TokenStarStar:
 219                         if toldExponent < 1 {
 220                                 diags = append(diags, &hcl.Diagnostic{
 221                                         Severity: hcl.DiagError,
 222                                         Summary:  "Unsupported operator",
 223                                         Detail:   "\"**\" is not a supported operator. Exponentiation is not supported as an operator.",
 224                                         Subject:  &tok.Range,
 225                                 })
 226
 227                                 toldExponent++
 228                         }
 229                 case TokenBacktick:
 230                         // Only report for alternating (even) backticks, so we won't report both start and ends of the same
 231                         // backtick-quoted string.
 232                         if (toldBacktick % 2) == 0 {
 233                                 diags = append(diags, &hcl.Diagnostic{
 234                                         Severity: hcl.DiagError,
 235                                         Summary:  "Invalid character",
 236                                         Detail:   "The \"`\" character is not valid. To create a multi-line string, use the \"heredoc\" syntax, like \"<<EOT\".",
 237                                         Subject:  &tok.Range,
 238                                 })
 239                         }
 240                         if toldBacktick <= 2 {
 241                                 toldBacktick++
 242                         }
 243                 case TokenApostrophe:
 244                         if (toldApostrophe % 2) == 0 {
 245                                 newDiag := &hcl.Diagnostic{
 246                                         Severity: hcl.DiagError,
 247                                         Summary:  "Invalid character",
 248                                         Detail:   "Single quotes are not valid. Use double quotes (\") to enclose strings.",
 249                                         Subject:  &tok.Range,
 250                                 }
 251                                 diags = append(diags, newDiag)
 252                         }
 253                         if toldApostrophe <= 2 {
 254                                 toldApostrophe++
 255                         }
 256                 case TokenSemicolon:
 257                         if toldSemicolon < 1 {
 258                                 diags = append(diags, &hcl.Diagnostic{
 259                                         Severity: hcl.DiagError,
 260                                         Summary:  "Invalid character",
 261                                         Detail:   "The \";\" character is not valid. Use newlines to separate arguments and blocks, and commas to separate items in collection values.",
 262                                         Subject:  &tok.Range,
 263                                 })
 264
 265                                 toldSemicolon++
 266                         }
 267                 case TokenTabs:
 268                         if toldTabs < 1 {
 269                                 diags = append(diags, &hcl.Diagnostic{
 270                                         Severity: hcl.DiagError,
 271                                         Summary:  "Invalid character",
 272                                         Detail:   "Tab characters may not be used. The recommended indentation style is two spaces per indent.",
 273                                         Subject:  &tok.Range,
 274                                 })
 275
 276                                 toldTabs++
 277                         }
 278                 case TokenBadUTF8:
 279                         if toldBadUTF8 < 1 {
 280                                 diags = append(diags, &hcl.Diagnostic{
 281                                         Severity: hcl.DiagError,
 282                                         Summary:  "Invalid character encoding",
 283                                         Detail:   "All input files must be UTF-8 encoded. Ensure that UTF-8 encoding is selected in your editor.",
 284                                         Subject:  &tok.Range,
 285                                 })
 286
 287                                 toldBadUTF8++
 288                         }
 289                 case TokenQuotedNewline:
 290                         diags = append(diags, &hcl.Diagnostic{
 291                                 Severity: hcl.DiagError,
 292                                 Summary:  "Invalid multi-line string",
 293                                 Detail:   "Quoted strings may not be split over multiple lines. To produce a multi-line string, either use the \\n escape to represent a newline character or use the \"heredoc\" multi-line template syntax.",
 294                                 Subject:  &tok.Range,
 295                         })
 296                 case TokenInvalid:
 297                         diags = append(diags, &hcl.Diagnostic{
 298                                 Severity: hcl.DiagError,
 299                                 Summary:  "Invalid character",
 300                                 Detail:   "This character is not used within the language.",
 301                                 Subject:  &tok.Range,
 302                         })
 303                 }
 304         }
 305         return diags
 306 }
 307
 308 var utf8BOM = []byte{0xef, 0xbb, 0xbf}
 309
 310 // stripUTF8BOM checks whether the given buffer begins with a UTF-8 byte order
 311 // mark (0xEF 0xBB 0xBF) and, if so, returns a truncated slice with the same
 312 // backing array but with the BOM skipped.
 313 //
 314 // If there is no BOM present, the given slice is returned verbatim.
 315 func stripUTF8BOM(src []byte) []byte {
 316         if bytes.HasPrefix(src, utf8BOM) {
 317                 return src[3:]
 318         }
 319         return src
 320 }