7 "github.com/apparentlymart/go-textseg/textseg"
8 "github.com/hashicorp/hcl2/hcl"
11 // Token represents a sequence of bytes from some HCL code that has been
12 // tagged with a type and its range within the source file.
19 // Tokens is a slice of Token.
22 // TokenType is an enumeration used for the Type field on Token.
26 // Single-character tokens are represented by their own character, for
27 // convenience in producing these within the scanner. However, the values
28 // are otherwise arbitrary and just intended to be mnemonic for humans
29 // who might see them in debug output.
31 TokenOBrace TokenType = '{'
32 TokenCBrace TokenType = '}'
33 TokenOBrack TokenType = '['
34 TokenCBrack TokenType = ']'
35 TokenOParen TokenType = '('
36 TokenCParen TokenType = ')'
37 TokenOQuote TokenType = '«'
38 TokenCQuote TokenType = '»'
39 TokenOHeredoc TokenType = 'H'
40 TokenCHeredoc TokenType = 'h'
42 TokenStar TokenType = '*'
43 TokenSlash TokenType = '/'
44 TokenPlus TokenType = '+'
45 TokenMinus TokenType = '-'
46 TokenPercent TokenType = '%'
48 TokenEqual TokenType = '='
49 TokenEqualOp TokenType = '≔'
50 TokenNotEqual TokenType = '≠'
51 TokenLessThan TokenType = '<'
52 TokenLessThanEq TokenType = '≤'
53 TokenGreaterThan TokenType = '>'
54 TokenGreaterThanEq TokenType = '≥'
56 TokenAnd TokenType = '∧'
57 TokenOr TokenType = '∨'
58 TokenBang TokenType = '!'
60 TokenDot TokenType = '.'
61 TokenComma TokenType = ','
63 TokenEllipsis TokenType = '…'
64 TokenFatArrow TokenType = '⇒'
66 TokenQuestion TokenType = '?'
67 TokenColon TokenType = ':'
69 TokenTemplateInterp TokenType = '∫'
70 TokenTemplateControl TokenType = 'λ'
71 TokenTemplateSeqEnd TokenType = '∎'
73 TokenQuotedLit TokenType = 'Q' // might contain backslash escapes
74 TokenStringLit TokenType = 'S' // cannot contain backslash escapes
75 TokenNumberLit TokenType = 'N'
76 TokenIdent TokenType = 'I'
78 TokenComment TokenType = 'C'
80 TokenNewline TokenType = '\n'
81 TokenEOF TokenType = '␄'
83 // The rest are not used in the language but recognized by the scanner so
84 // we can generate good diagnostics in the parser when users try to write
85 // things that might work in other languages they are familiar with, or
86 // simply make incorrect assumptions about the HCL language.
88 TokenBitwiseAnd TokenType = '&'
89 TokenBitwiseOr TokenType = '|'
90 TokenBitwiseNot TokenType = '~'
91 TokenBitwiseXor TokenType = '^'
92 TokenStarStar TokenType = '➚'
93 TokenApostrophe TokenType = '\''
94 TokenBacktick TokenType = '`'
95 TokenSemicolon TokenType = ';'
96 TokenTabs TokenType = '␉'
97 TokenInvalid TokenType = '�'
98 TokenBadUTF8 TokenType = '💩'
99 TokenQuotedNewline TokenType = ''
101 // TokenNil is a placeholder for when a token is required but none is
102 // available, e.g. when reporting errors. The scanner will never produce
103 // this as part of a token stream.
104 TokenNil TokenType = '\x00'
107 func (t TokenType) GoString() string {
108 return fmt.Sprintf("hclsyntax.%s", t.String())
114 scanNormal scanMode = iota
119 type tokenAccum struct {
127 func (f *tokenAccum) emitToken(ty TokenType, startOfs, endOfs int) {
128 // Walk through our buffer to figure out how much we need to adjust
129 // the start pos to get our end pos.
132 start.Column += startOfs + f.StartByte - f.Pos.Byte // Safe because only ASCII spaces can be in the offset
133 start.Byte = startOfs + f.StartByte
136 end.Byte = endOfs + f.StartByte
137 b := f.Bytes[startOfs:endOfs]
139 advance, seq, _ := textseg.ScanGraphemeClusters(b, true)
140 if (len(seq) == 1 && seq[0] == '\n') || (len(seq) == 2 && seq[0] == '\r' && seq[1] == '\n') {
151 f.Tokens = append(f.Tokens, Token{
153 Bytes: f.Bytes[startOfs:endOfs],
155 Filename: f.Filename,
162 type heredocInProgress struct {
167 func tokenOpensFlushHeredoc(tok Token) bool {
168 if tok.Type != TokenOHeredoc {
171 return bytes.HasPrefix(tok.Bytes, []byte{'<', '<', '-'})
174 // checkInvalidTokens does a simple pass across the given tokens and generates
175 // diagnostics for tokens that should _never_ appear in HCL source. This
176 // is intended to avoid the need for the parser to have special support
177 // for them all over.
179 // Returns a diagnostics with no errors if everything seems acceptable.
180 // Otherwise, returns zero or more error diagnostics, though tries to limit
181 // repetition of the same information.
182 func checkInvalidTokens(tokens Tokens) hcl.Diagnostics {
183 var diags hcl.Diagnostics
193 for _, tok := range tokens {
194 // copy token so it's safe to point to it
198 case TokenBitwiseAnd, TokenBitwiseOr, TokenBitwiseXor, TokenBitwiseNot:
200 var suggestion string
202 case TokenBitwiseAnd:
203 suggestion = " Did you mean boolean AND (\"&&\")?"
205 suggestion = " Did you mean boolean OR (\"&&\")?"
206 case TokenBitwiseNot:
207 suggestion = " Did you mean boolean NOT (\"!\")?"
210 diags = append(diags, &hcl.Diagnostic{
211 Severity: hcl.DiagError,
212 Summary: "Unsupported operator",
213 Detail: fmt.Sprintf("Bitwise operators are not supported.%s", suggestion),
219 if toldExponent < 1 {
220 diags = append(diags, &hcl.Diagnostic{
221 Severity: hcl.DiagError,
222 Summary: "Unsupported operator",
223 Detail: "\"**\" is not a supported operator. Exponentiation is not supported as an operator.",
230 // Only report for alternating (even) backticks, so we won't report both start and ends of the same
231 // backtick-quoted string.
232 if (toldBacktick % 2) == 0 {
233 diags = append(diags, &hcl.Diagnostic{
234 Severity: hcl.DiagError,
235 Summary: "Invalid character",
236 Detail: "The \"`\" character is not valid. To create a multi-line string, use the \"heredoc\" syntax, like \"<<EOT\".",
240 if toldBacktick <= 2 {
243 case TokenApostrophe:
244 if (toldApostrophe % 2) == 0 {
245 newDiag := &hcl.Diagnostic{
246 Severity: hcl.DiagError,
247 Summary: "Invalid character",
248 Detail: "Single quotes are not valid. Use double quotes (\") to enclose strings.",
251 diags = append(diags, newDiag)
253 if toldApostrophe <= 2 {
257 if toldSemicolon < 1 {
258 diags = append(diags, &hcl.Diagnostic{
259 Severity: hcl.DiagError,
260 Summary: "Invalid character",
261 Detail: "The \";\" character is not valid. Use newlines to separate arguments and blocks, and commas to separate items in collection values.",
269 diags = append(diags, &hcl.Diagnostic{
270 Severity: hcl.DiagError,
271 Summary: "Invalid character",
272 Detail: "Tab characters may not be used. The recommended indentation style is two spaces per indent.",
280 diags = append(diags, &hcl.Diagnostic{
281 Severity: hcl.DiagError,
282 Summary: "Invalid character encoding",
283 Detail: "All input files must be UTF-8 encoded. Ensure that UTF-8 encoding is selected in your editor.",
289 case TokenQuotedNewline:
290 diags = append(diags, &hcl.Diagnostic{
291 Severity: hcl.DiagError,
292 Summary: "Invalid multi-line string",
293 Detail: "Quoted strings may not be split over multiple lines. To produce a multi-line string, either use the \\n escape to represent a newline character or use the \"heredoc\" multi-line template syntax.",
297 diags = append(diags, &hcl.Diagnostic{
298 Severity: hcl.DiagError,
299 Summary: "Invalid character",
300 Detail: "This character is not used within the language.",
308 var utf8BOM = []byte{0xef, 0xbb, 0xbf}
310 // stripUTF8BOM checks whether the given buffer begins with a UTF-8 byte order
311 // mark (0xEF 0xBB 0xBF) and, if so, returns a truncated slice with the same
312 // backing array but with the BOM skipped.
314 // If there is no BOM present, the given slice is returned verbatim.
315 func stripUTF8BOM(src []byte) []byte {
316 if bytes.HasPrefix(src, utf8BOM) {