]>
Commit | Line | Data |
---|---|---|
bae9f6d2 JC |
1 | package parser |
2 | ||
3 | import ( | |
4 | "strconv" | |
5 | "unicode/utf8" | |
6 | ||
7 | "github.com/hashicorp/hil/ast" | |
8 | "github.com/hashicorp/hil/scanner" | |
9 | ) | |
10 | ||
11 | func Parse(ch <-chan *scanner.Token) (ast.Node, error) { | |
12 | peeker := scanner.NewPeeker(ch) | |
13 | parser := &parser{peeker} | |
14 | output, err := parser.ParseTopLevel() | |
15 | peeker.Close() | |
16 | return output, err | |
17 | } | |
18 | ||
19 | type parser struct { | |
20 | peeker *scanner.Peeker | |
21 | } | |
22 | ||
23 | func (p *parser) ParseTopLevel() (ast.Node, error) { | |
24 | return p.parseInterpolationSeq(false) | |
25 | } | |
26 | ||
27 | func (p *parser) ParseQuoted() (ast.Node, error) { | |
28 | return p.parseInterpolationSeq(true) | |
29 | } | |
30 | ||
31 | // parseInterpolationSeq parses either the top-level sequence of literals | |
32 | // and interpolation expressions or a similar sequence within a quoted | |
33 | // string inside an interpolation expression. The latter case is requested | |
34 | // by setting 'quoted' to true. | |
35 | func (p *parser) parseInterpolationSeq(quoted bool) (ast.Node, error) { | |
36 | literalType := scanner.LITERAL | |
37 | endType := scanner.EOF | |
38 | if quoted { | |
39 | // exceptions for quoted sequences | |
40 | literalType = scanner.STRING | |
41 | endType = scanner.CQUOTE | |
42 | } | |
43 | ||
44 | startPos := p.peeker.Peek().Pos | |
45 | ||
46 | if quoted { | |
47 | tok := p.peeker.Read() | |
48 | if tok.Type != scanner.OQUOTE { | |
49 | return nil, ExpectationError("open quote", tok) | |
50 | } | |
51 | } | |
52 | ||
53 | var exprs []ast.Node | |
54 | for { | |
55 | tok := p.peeker.Read() | |
56 | ||
57 | if tok.Type == endType { | |
58 | break | |
59 | } | |
60 | ||
61 | switch tok.Type { | |
62 | case literalType: | |
63 | val, err := p.parseStringToken(tok) | |
64 | if err != nil { | |
65 | return nil, err | |
66 | } | |
67 | exprs = append(exprs, &ast.LiteralNode{ | |
68 | Value: val, | |
69 | Typex: ast.TypeString, | |
70 | Posx: tok.Pos, | |
71 | }) | |
72 | case scanner.BEGIN: | |
73 | expr, err := p.ParseInterpolation() | |
74 | if err != nil { | |
75 | return nil, err | |
76 | } | |
77 | exprs = append(exprs, expr) | |
78 | default: | |
79 | return nil, ExpectationError(`"${"`, tok) | |
80 | } | |
81 | } | |
82 | ||
83 | if len(exprs) == 0 { | |
84 | // If we have no parts at all then the input must've | |
85 | // been an empty string. | |
86 | exprs = append(exprs, &ast.LiteralNode{ | |
87 | Value: "", | |
88 | Typex: ast.TypeString, | |
89 | Posx: startPos, | |
90 | }) | |
91 | } | |
92 | ||
93 | // As a special case, if our "Output" contains only one expression | |
94 | // and it's a literal string then we'll hoist it up to be our | |
95 | // direct return value, so callers can easily recognize a string | |
96 | // that has no interpolations at all. | |
97 | if len(exprs) == 1 { | |
98 | if lit, ok := exprs[0].(*ast.LiteralNode); ok { | |
99 | if lit.Typex == ast.TypeString { | |
100 | return lit, nil | |
101 | } | |
102 | } | |
103 | } | |
104 | ||
105 | return &ast.Output{ | |
106 | Exprs: exprs, | |
107 | Posx: startPos, | |
108 | }, nil | |
109 | } | |
110 | ||
111 | // parseStringToken takes a token of either LITERAL or STRING type and | |
112 | // returns the interpreted string, after processing any relevant | |
113 | // escape sequences. | |
114 | func (p *parser) parseStringToken(tok *scanner.Token) (string, error) { | |
115 | var backslashes bool | |
116 | switch tok.Type { | |
117 | case scanner.LITERAL: | |
118 | backslashes = false | |
119 | case scanner.STRING: | |
120 | backslashes = true | |
121 | default: | |
122 | panic("unsupported string token type") | |
123 | } | |
124 | ||
125 | raw := []byte(tok.Content) | |
126 | buf := make([]byte, 0, len(raw)) | |
127 | ||
128 | for i := 0; i < len(raw); i++ { | |
129 | b := raw[i] | |
130 | more := len(raw) > (i + 1) | |
131 | ||
132 | if b == '$' { | |
133 | if more && raw[i+1] == '$' { | |
134 | // skip over the second dollar sign | |
135 | i++ | |
136 | } | |
137 | } else if backslashes && b == '\\' { | |
138 | if !more { | |
139 | return "", Errorf( | |
140 | ast.Pos{ | |
141 | Column: tok.Pos.Column + utf8.RuneCount(raw[:i]), | |
142 | Line: tok.Pos.Line, | |
143 | }, | |
144 | `unfinished backslash escape sequence`, | |
145 | ) | |
146 | } | |
147 | escapeType := raw[i+1] | |
148 | switch escapeType { | |
149 | case '\\': | |
150 | // skip over the second slash | |
151 | i++ | |
152 | case 'n': | |
153 | b = '\n' | |
154 | i++ | |
155 | case '"': | |
156 | b = '"' | |
157 | i++ | |
158 | default: | |
159 | return "", Errorf( | |
160 | ast.Pos{ | |
161 | Column: tok.Pos.Column + utf8.RuneCount(raw[:i]), | |
162 | Line: tok.Pos.Line, | |
163 | }, | |
164 | `invalid backslash escape sequence`, | |
165 | ) | |
166 | } | |
167 | } | |
168 | ||
169 | buf = append(buf, b) | |
170 | } | |
171 | ||
172 | return string(buf), nil | |
173 | } | |
174 | ||
175 | func (p *parser) ParseInterpolation() (ast.Node, error) { | |
176 | // By the time we're called, we're already "inside" the ${ sequence | |
177 | // because the caller consumed the ${ token. | |
178 | ||
179 | expr, err := p.ParseExpression() | |
180 | if err != nil { | |
181 | return nil, err | |
182 | } | |
183 | ||
184 | err = p.requireTokenType(scanner.END, `"}"`) | |
185 | if err != nil { | |
186 | return nil, err | |
187 | } | |
188 | ||
189 | return expr, nil | |
190 | } | |
191 | ||
192 | func (p *parser) ParseExpression() (ast.Node, error) { | |
193 | return p.parseTernaryCond() | |
194 | } | |
195 | ||
196 | func (p *parser) parseTernaryCond() (ast.Node, error) { | |
197 | // The ternary condition operator (.. ? .. : ..) behaves somewhat | |
198 | // like a binary operator except that the "operator" is itself | |
199 | // an expression enclosed in two punctuation characters. | |
200 | // The middle expression is parsed as if the ? and : symbols | |
201 | // were parentheses. The "rhs" (the "false expression") is then | |
202 | // treated right-associatively so it behaves similarly to the | |
203 | // middle in terms of precedence. | |
204 | ||
205 | startPos := p.peeker.Peek().Pos | |
206 | ||
207 | var cond, trueExpr, falseExpr ast.Node | |
208 | var err error | |
209 | ||
210 | cond, err = p.parseBinaryOps(binaryOps) | |
211 | if err != nil { | |
212 | return nil, err | |
213 | } | |
214 | ||
215 | next := p.peeker.Peek() | |
216 | if next.Type != scanner.QUESTION { | |
217 | return cond, nil | |
218 | } | |
219 | ||
220 | p.peeker.Read() // eat question mark | |
221 | ||
222 | trueExpr, err = p.ParseExpression() | |
223 | if err != nil { | |
224 | return nil, err | |
225 | } | |
226 | ||
227 | colon := p.peeker.Read() | |
228 | if colon.Type != scanner.COLON { | |
229 | return nil, ExpectationError(":", colon) | |
230 | } | |
231 | ||
232 | falseExpr, err = p.ParseExpression() | |
233 | if err != nil { | |
234 | return nil, err | |
235 | } | |
236 | ||
237 | return &ast.Conditional{ | |
238 | CondExpr: cond, | |
239 | TrueExpr: trueExpr, | |
240 | FalseExpr: falseExpr, | |
241 | Posx: startPos, | |
242 | }, nil | |
243 | } | |
244 | ||
245 | // parseBinaryOps calls itself recursively to work through all of the | |
246 | // operator precedence groups, and then eventually calls ParseExpressionTerm | |
247 | // for each operand. | |
248 | func (p *parser) parseBinaryOps(ops []map[scanner.TokenType]ast.ArithmeticOp) (ast.Node, error) { | |
249 | if len(ops) == 0 { | |
250 | // We've run out of operators, so now we'll just try to parse a term. | |
251 | return p.ParseExpressionTerm() | |
252 | } | |
253 | ||
254 | thisLevel := ops[0] | |
255 | remaining := ops[1:] | |
256 | ||
257 | startPos := p.peeker.Peek().Pos | |
258 | ||
259 | var lhs, rhs ast.Node | |
260 | operator := ast.ArithmeticOpInvalid | |
261 | var err error | |
262 | ||
263 | // parse a term that might be the first operand of a binary | |
264 | // expression or it might just be a standalone term, but | |
265 | // we won't know until we've parsed it and can look ahead | |
266 | // to see if there's an operator token. | |
267 | lhs, err = p.parseBinaryOps(remaining) | |
268 | if err != nil { | |
269 | return nil, err | |
270 | } | |
271 | ||
272 | // We'll keep eating up arithmetic operators until we run | |
273 | // out, so that operators with the same precedence will combine in a | |
274 | // left-associative manner: | |
275 | // a+b+c => (a+b)+c, not a+(b+c) | |
276 | // | |
277 | // Should we later want to have right-associative operators, a way | |
278 | // to achieve that would be to call back up to ParseExpression here | |
279 | // instead of iteratively parsing only the remaining operators. | |
280 | for { | |
281 | next := p.peeker.Peek() | |
282 | var newOperator ast.ArithmeticOp | |
283 | var ok bool | |
284 | if newOperator, ok = thisLevel[next.Type]; !ok { | |
285 | break | |
286 | } | |
287 | ||
288 | // Are we extending an expression started on | |
289 | // the previous iteration? | |
290 | if operator != ast.ArithmeticOpInvalid { | |
291 | lhs = &ast.Arithmetic{ | |
292 | Op: operator, | |
293 | Exprs: []ast.Node{lhs, rhs}, | |
294 | Posx: startPos, | |
295 | } | |
296 | } | |
297 | ||
298 | operator = newOperator | |
299 | p.peeker.Read() // eat operator token | |
300 | rhs, err = p.parseBinaryOps(remaining) | |
301 | if err != nil { | |
302 | return nil, err | |
303 | } | |
304 | } | |
305 | ||
306 | if operator != ast.ArithmeticOpInvalid { | |
307 | return &ast.Arithmetic{ | |
308 | Op: operator, | |
309 | Exprs: []ast.Node{lhs, rhs}, | |
310 | Posx: startPos, | |
311 | }, nil | |
312 | } else { | |
313 | return lhs, nil | |
314 | } | |
315 | } | |
316 | ||
317 | func (p *parser) ParseExpressionTerm() (ast.Node, error) { | |
318 | ||
319 | next := p.peeker.Peek() | |
320 | ||
321 | switch next.Type { | |
322 | ||
323 | case scanner.OPAREN: | |
324 | p.peeker.Read() | |
325 | expr, err := p.ParseExpression() | |
326 | if err != nil { | |
327 | return nil, err | |
328 | } | |
329 | err = p.requireTokenType(scanner.CPAREN, `")"`) | |
330 | return expr, err | |
331 | ||
332 | case scanner.OQUOTE: | |
333 | return p.ParseQuoted() | |
334 | ||
335 | case scanner.INTEGER: | |
336 | tok := p.peeker.Read() | |
337 | val, err := strconv.Atoi(tok.Content) | |
338 | if err != nil { | |
339 | return nil, TokenErrorf(tok, "invalid integer: %s", err) | |
340 | } | |
341 | return &ast.LiteralNode{ | |
342 | Value: val, | |
343 | Typex: ast.TypeInt, | |
344 | Posx: tok.Pos, | |
345 | }, nil | |
346 | ||
347 | case scanner.FLOAT: | |
348 | tok := p.peeker.Read() | |
349 | val, err := strconv.ParseFloat(tok.Content, 64) | |
350 | if err != nil { | |
351 | return nil, TokenErrorf(tok, "invalid float: %s", err) | |
352 | } | |
353 | return &ast.LiteralNode{ | |
354 | Value: val, | |
355 | Typex: ast.TypeFloat, | |
356 | Posx: tok.Pos, | |
357 | }, nil | |
358 | ||
359 | case scanner.BOOL: | |
360 | tok := p.peeker.Read() | |
361 | // the scanner guarantees that tok.Content is either "true" or "false" | |
362 | var val bool | |
363 | if tok.Content[0] == 't' { | |
364 | val = true | |
365 | } else { | |
366 | val = false | |
367 | } | |
368 | return &ast.LiteralNode{ | |
369 | Value: val, | |
370 | Typex: ast.TypeBool, | |
371 | Posx: tok.Pos, | |
372 | }, nil | |
373 | ||
374 | case scanner.MINUS: | |
375 | opTok := p.peeker.Read() | |
376 | // important to use ParseExpressionTerm rather than ParseExpression | |
377 | // here, otherwise we can capture a following binary expression into | |
378 | // our negation. | |
379 | // e.g. -46+5 should parse as (0-46)+5, not 0-(46+5) | |
380 | operand, err := p.ParseExpressionTerm() | |
381 | if err != nil { | |
382 | return nil, err | |
383 | } | |
384 | // The AST currently represents negative numbers as | |
385 | // a binary subtraction of the number from zero. | |
386 | return &ast.Arithmetic{ | |
387 | Op: ast.ArithmeticOpSub, | |
388 | Exprs: []ast.Node{ | |
389 | &ast.LiteralNode{ | |
390 | Value: 0, | |
391 | Typex: ast.TypeInt, | |
392 | Posx: opTok.Pos, | |
393 | }, | |
394 | operand, | |
395 | }, | |
396 | Posx: opTok.Pos, | |
397 | }, nil | |
398 | ||
399 | case scanner.BANG: | |
400 | opTok := p.peeker.Read() | |
401 | // important to use ParseExpressionTerm rather than ParseExpression | |
402 | // here, otherwise we can capture a following binary expression into | |
403 | // our negation. | |
404 | operand, err := p.ParseExpressionTerm() | |
405 | if err != nil { | |
406 | return nil, err | |
407 | } | |
408 | // The AST currently represents binary negation as an equality | |
409 | // test with "false". | |
410 | return &ast.Arithmetic{ | |
411 | Op: ast.ArithmeticOpEqual, | |
412 | Exprs: []ast.Node{ | |
413 | &ast.LiteralNode{ | |
414 | Value: false, | |
415 | Typex: ast.TypeBool, | |
416 | Posx: opTok.Pos, | |
417 | }, | |
418 | operand, | |
419 | }, | |
420 | Posx: opTok.Pos, | |
421 | }, nil | |
422 | ||
423 | case scanner.IDENTIFIER: | |
424 | return p.ParseScopeInteraction() | |
425 | ||
426 | default: | |
427 | return nil, ExpectationError("expression", next) | |
428 | } | |
429 | } | |
430 | ||
431 | // ParseScopeInteraction parses the expression types that interact | |
432 | // with the evaluation scope: variable access, function calls, and | |
433 | // indexing. | |
434 | // | |
435 | // Indexing should actually be a distinct operator in its own right, | |
436 | // so that e.g. it can be applied to the result of a function call, | |
437 | // but for now we're preserving the behavior of the older yacc-based | |
438 | // parser. | |
439 | func (p *parser) ParseScopeInteraction() (ast.Node, error) { | |
440 | first := p.peeker.Read() | |
441 | startPos := first.Pos | |
442 | if first.Type != scanner.IDENTIFIER { | |
443 | return nil, ExpectationError("identifier", first) | |
444 | } | |
445 | ||
446 | next := p.peeker.Peek() | |
447 | if next.Type == scanner.OPAREN { | |
448 | // function call | |
449 | funcName := first.Content | |
450 | p.peeker.Read() // eat paren | |
451 | var args []ast.Node | |
452 | ||
453 | for { | |
454 | if p.peeker.Peek().Type == scanner.CPAREN { | |
455 | break | |
456 | } | |
457 | ||
458 | arg, err := p.ParseExpression() | |
459 | if err != nil { | |
460 | return nil, err | |
461 | } | |
462 | ||
463 | args = append(args, arg) | |
464 | ||
465 | if p.peeker.Peek().Type == scanner.COMMA { | |
466 | p.peeker.Read() // eat comma | |
467 | continue | |
468 | } else { | |
469 | break | |
470 | } | |
471 | } | |
472 | ||
473 | err := p.requireTokenType(scanner.CPAREN, `")"`) | |
474 | if err != nil { | |
475 | return nil, err | |
476 | } | |
477 | ||
478 | return &ast.Call{ | |
479 | Func: funcName, | |
480 | Args: args, | |
481 | Posx: startPos, | |
482 | }, nil | |
483 | } | |
484 | ||
485 | varNode := &ast.VariableAccess{ | |
486 | Name: first.Content, | |
487 | Posx: startPos, | |
488 | } | |
489 | ||
490 | if p.peeker.Peek().Type == scanner.OBRACKET { | |
491 | // index operator | |
492 | startPos := p.peeker.Read().Pos // eat bracket | |
493 | indexExpr, err := p.ParseExpression() | |
494 | if err != nil { | |
495 | return nil, err | |
496 | } | |
497 | err = p.requireTokenType(scanner.CBRACKET, `"]"`) | |
498 | if err != nil { | |
499 | return nil, err | |
500 | } | |
501 | return &ast.Index{ | |
502 | Target: varNode, | |
503 | Key: indexExpr, | |
504 | Posx: startPos, | |
505 | }, nil | |
506 | } | |
507 | ||
508 | return varNode, nil | |
509 | } | |
510 | ||
511 | // requireTokenType consumes the next token an returns an error if its | |
512 | // type does not match the given type. nil is returned if the type matches. | |
513 | // | |
514 | // This is a helper around peeker.Read() for situations where the parser just | |
515 | // wants to assert that a particular token type must be present. | |
516 | func (p *parser) requireTokenType(wantType scanner.TokenType, wantName string) error { | |
517 | token := p.peeker.Read() | |
518 | if token.Type != wantType { | |
519 | return ExpectationError(wantName, token) | |
520 | } | |
521 | return nil | |
522 | } |