]> git.immae.eu Git - github/fretlink/terraform-provider-statuscake.git/blob - vendor/github.com/hashicorp/hcl2/hclwrite/parser.go
Upgrade to 0.12
[github/fretlink/terraform-provider-statuscake.git] / vendor / github.com / hashicorp / hcl2 / hclwrite / parser.go
1 package hclwrite
2
3 import (
4 "fmt"
5 "sort"
6
7 "github.com/hashicorp/hcl2/hcl"
8 "github.com/hashicorp/hcl2/hcl/hclsyntax"
9 "github.com/zclconf/go-cty/cty"
10 )
11
12 // Our "parser" here is actually not doing any parsing of its own. Instead,
13 // it leans on the native parser in hclsyntax, and then uses the source ranges
14 // from the AST to partition the raw token sequence to match the raw tokens
15 // up to AST nodes.
16 //
17 // This strategy feels somewhat counter-intuitive, since most of the work the
18 // parser does is thrown away here, but this strategy is chosen because the
19 // normal parsing work done by hclsyntax is considered to be the "main case",
20 // while modifying and re-printing source is more of an edge case, used only
21 // in ancillary tools, and so it's good to keep all the main parsing logic
22 // with the main case but keep all of the extra complexity of token wrangling
23 // out of the main parser, which is already rather complex just serving the
24 // use-cases it already serves.
25 //
26 // If the parsing step produces any errors, the returned File is nil because
27 // we can't reliably extract tokens from the partial AST produced by an
28 // erroneous parse.
29 func parse(src []byte, filename string, start hcl.Pos) (*File, hcl.Diagnostics) {
30 file, diags := hclsyntax.ParseConfig(src, filename, start)
31 if diags.HasErrors() {
32 return nil, diags
33 }
34
35 // To do our work here, we use the "native" tokens (those from hclsyntax)
36 // to match against source ranges in the AST, but ultimately produce
37 // slices from our sequence of "writer" tokens, which contain only
38 // *relative* position information that is more appropriate for
39 // transformation/writing use-cases.
40 nativeTokens, diags := hclsyntax.LexConfig(src, filename, start)
41 if diags.HasErrors() {
42 // should never happen, since we would've caught these diags in
43 // the first call above.
44 return nil, diags
45 }
46 writerTokens := writerTokens(nativeTokens)
47
48 from := inputTokens{
49 nativeTokens: nativeTokens,
50 writerTokens: writerTokens,
51 }
52
53 before, root, after := parseBody(file.Body.(*hclsyntax.Body), from)
54 ret := &File{
55 inTree: newInTree(),
56
57 srcBytes: src,
58 body: root,
59 }
60
61 nodes := ret.inTree.children
62 nodes.Append(before.Tokens())
63 nodes.AppendNode(root)
64 nodes.Append(after.Tokens())
65
66 return ret, diags
67 }
68
69 type inputTokens struct {
70 nativeTokens hclsyntax.Tokens
71 writerTokens Tokens
72 }
73
74 func (it inputTokens) Partition(rng hcl.Range) (before, within, after inputTokens) {
75 start, end := partitionTokens(it.nativeTokens, rng)
76 before = it.Slice(0, start)
77 within = it.Slice(start, end)
78 after = it.Slice(end, len(it.nativeTokens))
79 return
80 }
81
82 func (it inputTokens) PartitionType(ty hclsyntax.TokenType) (before, within, after inputTokens) {
83 for i, t := range it.writerTokens {
84 if t.Type == ty {
85 return it.Slice(0, i), it.Slice(i, i+1), it.Slice(i+1, len(it.nativeTokens))
86 }
87 }
88 panic(fmt.Sprintf("didn't find any token of type %s", ty))
89 }
90
91 func (it inputTokens) PartitionTypeSingle(ty hclsyntax.TokenType) (before inputTokens, found *Token, after inputTokens) {
92 before, within, after := it.PartitionType(ty)
93 if within.Len() != 1 {
94 panic("PartitionType found more than one token")
95 }
96 return before, within.Tokens()[0], after
97 }
98
99 // PartitionIncludeComments is like Partition except the returned "within"
100 // range includes any lead and line comments associated with the range.
101 func (it inputTokens) PartitionIncludingComments(rng hcl.Range) (before, within, after inputTokens) {
102 start, end := partitionTokens(it.nativeTokens, rng)
103 start = partitionLeadCommentTokens(it.nativeTokens[:start])
104 _, afterNewline := partitionLineEndTokens(it.nativeTokens[end:])
105 end += afterNewline
106
107 before = it.Slice(0, start)
108 within = it.Slice(start, end)
109 after = it.Slice(end, len(it.nativeTokens))
110 return
111
112 }
113
114 // PartitionBlockItem is similar to PartitionIncludeComments but it returns
115 // the comments as separate token sequences so that they can be captured into
116 // AST attributes. It makes assumptions that apply only to block items, so
117 // should not be used for other constructs.
118 func (it inputTokens) PartitionBlockItem(rng hcl.Range) (before, leadComments, within, lineComments, newline, after inputTokens) {
119 before, within, after = it.Partition(rng)
120 before, leadComments = before.PartitionLeadComments()
121 lineComments, newline, after = after.PartitionLineEndTokens()
122 return
123 }
124
125 func (it inputTokens) PartitionLeadComments() (before, within inputTokens) {
126 start := partitionLeadCommentTokens(it.nativeTokens)
127 before = it.Slice(0, start)
128 within = it.Slice(start, len(it.nativeTokens))
129 return
130 }
131
132 func (it inputTokens) PartitionLineEndTokens() (comments, newline, after inputTokens) {
133 afterComments, afterNewline := partitionLineEndTokens(it.nativeTokens)
134 comments = it.Slice(0, afterComments)
135 newline = it.Slice(afterComments, afterNewline)
136 after = it.Slice(afterNewline, len(it.nativeTokens))
137 return
138 }
139
140 func (it inputTokens) Slice(start, end int) inputTokens {
141 // When we slice, we create a new slice with no additional capacity because
142 // we expect that these slices will be mutated in order to insert
143 // new code into the AST, and we want to ensure that a new underlying
144 // array gets allocated in that case, rather than writing into some
145 // following slice and corrupting it.
146 return inputTokens{
147 nativeTokens: it.nativeTokens[start:end:end],
148 writerTokens: it.writerTokens[start:end:end],
149 }
150 }
151
152 func (it inputTokens) Len() int {
153 return len(it.nativeTokens)
154 }
155
156 func (it inputTokens) Tokens() Tokens {
157 return it.writerTokens
158 }
159
160 func (it inputTokens) Types() []hclsyntax.TokenType {
161 ret := make([]hclsyntax.TokenType, len(it.nativeTokens))
162 for i, tok := range it.nativeTokens {
163 ret[i] = tok.Type
164 }
165 return ret
166 }
167
168 // parseBody locates the given body within the given input tokens and returns
169 // the resulting *Body object as well as the tokens that appeared before and
170 // after it.
171 func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *node, inputTokens) {
172 before, within, after := from.PartitionIncludingComments(nativeBody.SrcRange)
173
174 // The main AST doesn't retain the original source ordering of the
175 // body items, so we need to reconstruct that ordering by inspecting
176 // their source ranges.
177 nativeItems := make([]hclsyntax.Node, 0, len(nativeBody.Attributes)+len(nativeBody.Blocks))
178 for _, nativeAttr := range nativeBody.Attributes {
179 nativeItems = append(nativeItems, nativeAttr)
180 }
181 for _, nativeBlock := range nativeBody.Blocks {
182 nativeItems = append(nativeItems, nativeBlock)
183 }
184 sort.Sort(nativeNodeSorter{nativeItems})
185
186 body := &Body{
187 inTree: newInTree(),
188 items: newNodeSet(),
189 }
190
191 remain := within
192 for _, nativeItem := range nativeItems {
193 beforeItem, item, afterItem := parseBodyItem(nativeItem, remain)
194
195 if beforeItem.Len() > 0 {
196 body.AppendUnstructuredTokens(beforeItem.Tokens())
197 }
198 body.appendItemNode(item)
199
200 remain = afterItem
201 }
202
203 if remain.Len() > 0 {
204 body.AppendUnstructuredTokens(remain.Tokens())
205 }
206
207 return before, newNode(body), after
208 }
209
210 func parseBodyItem(nativeItem hclsyntax.Node, from inputTokens) (inputTokens, *node, inputTokens) {
211 before, leadComments, within, lineComments, newline, after := from.PartitionBlockItem(nativeItem.Range())
212
213 var item *node
214
215 switch tItem := nativeItem.(type) {
216 case *hclsyntax.Attribute:
217 item = parseAttribute(tItem, within, leadComments, lineComments, newline)
218 case *hclsyntax.Block:
219 item = parseBlock(tItem, within, leadComments, lineComments, newline)
220 default:
221 // should never happen if caller is behaving
222 panic("unsupported native item type")
223 }
224
225 return before, item, after
226 }
227
228 func parseAttribute(nativeAttr *hclsyntax.Attribute, from, leadComments, lineComments, newline inputTokens) *node {
229 attr := &Attribute{
230 inTree: newInTree(),
231 }
232 children := attr.inTree.children
233
234 {
235 cn := newNode(newComments(leadComments.Tokens()))
236 attr.leadComments = cn
237 children.AppendNode(cn)
238 }
239
240 before, nameTokens, from := from.Partition(nativeAttr.NameRange)
241 {
242 children.AppendUnstructuredTokens(before.Tokens())
243 if nameTokens.Len() != 1 {
244 // Should never happen with valid input
245 panic("attribute name is not exactly one token")
246 }
247 token := nameTokens.Tokens()[0]
248 in := newNode(newIdentifier(token))
249 attr.name = in
250 children.AppendNode(in)
251 }
252
253 before, equalsTokens, from := from.Partition(nativeAttr.EqualsRange)
254 children.AppendUnstructuredTokens(before.Tokens())
255 children.AppendUnstructuredTokens(equalsTokens.Tokens())
256
257 before, exprTokens, from := from.Partition(nativeAttr.Expr.Range())
258 {
259 children.AppendUnstructuredTokens(before.Tokens())
260 exprNode := parseExpression(nativeAttr.Expr, exprTokens)
261 attr.expr = exprNode
262 children.AppendNode(exprNode)
263 }
264
265 {
266 cn := newNode(newComments(lineComments.Tokens()))
267 attr.lineComments = cn
268 children.AppendNode(cn)
269 }
270
271 children.AppendUnstructuredTokens(newline.Tokens())
272
273 // Collect any stragglers, though there shouldn't be any
274 children.AppendUnstructuredTokens(from.Tokens())
275
276 return newNode(attr)
277 }
278
279 func parseBlock(nativeBlock *hclsyntax.Block, from, leadComments, lineComments, newline inputTokens) *node {
280 block := &Block{
281 inTree: newInTree(),
282 labels: newNodeSet(),
283 }
284 children := block.inTree.children
285
286 {
287 cn := newNode(newComments(leadComments.Tokens()))
288 block.leadComments = cn
289 children.AppendNode(cn)
290 }
291
292 before, typeTokens, from := from.Partition(nativeBlock.TypeRange)
293 {
294 children.AppendUnstructuredTokens(before.Tokens())
295 if typeTokens.Len() != 1 {
296 // Should never happen with valid input
297 panic("block type name is not exactly one token")
298 }
299 token := typeTokens.Tokens()[0]
300 in := newNode(newIdentifier(token))
301 block.typeName = in
302 children.AppendNode(in)
303 }
304
305 for _, rng := range nativeBlock.LabelRanges {
306 var labelTokens inputTokens
307 before, labelTokens, from = from.Partition(rng)
308 children.AppendUnstructuredTokens(before.Tokens())
309 tokens := labelTokens.Tokens()
310 ln := newNode(newQuoted(tokens))
311 block.labels.Add(ln)
312 children.AppendNode(ln)
313 }
314
315 before, oBrace, from := from.Partition(nativeBlock.OpenBraceRange)
316 children.AppendUnstructuredTokens(before.Tokens())
317 children.AppendUnstructuredTokens(oBrace.Tokens())
318
319 // We go a bit out of order here: we go hunting for the closing brace
320 // so that we have a delimited body, but then we'll deal with the body
321 // before we actually append the closing brace and any straggling tokens
322 // that appear after it.
323 bodyTokens, cBrace, from := from.Partition(nativeBlock.CloseBraceRange)
324 before, body, after := parseBody(nativeBlock.Body, bodyTokens)
325 children.AppendUnstructuredTokens(before.Tokens())
326 block.body = body
327 children.AppendNode(body)
328 children.AppendUnstructuredTokens(after.Tokens())
329
330 children.AppendUnstructuredTokens(cBrace.Tokens())
331
332 // stragglers
333 children.AppendUnstructuredTokens(from.Tokens())
334 if lineComments.Len() > 0 {
335 // blocks don't actually have line comments, so we'll just treat
336 // them as extra stragglers
337 children.AppendUnstructuredTokens(lineComments.Tokens())
338 }
339 children.AppendUnstructuredTokens(newline.Tokens())
340
341 return newNode(block)
342 }
343
344 func parseExpression(nativeExpr hclsyntax.Expression, from inputTokens) *node {
345 expr := newExpression()
346 children := expr.inTree.children
347
348 nativeVars := nativeExpr.Variables()
349
350 for _, nativeTraversal := range nativeVars {
351 before, traversal, after := parseTraversal(nativeTraversal, from)
352 children.AppendUnstructuredTokens(before.Tokens())
353 children.AppendNode(traversal)
354 expr.absTraversals.Add(traversal)
355 from = after
356 }
357 // Attach any stragglers that don't belong to a traversal to the expression
358 // itself. In an expression with no traversals at all, this is just the
359 // entirety of "from".
360 children.AppendUnstructuredTokens(from.Tokens())
361
362 return newNode(expr)
363 }
364
365 func parseTraversal(nativeTraversal hcl.Traversal, from inputTokens) (before inputTokens, n *node, after inputTokens) {
366 traversal := newTraversal()
367 children := traversal.inTree.children
368 before, from, after = from.Partition(nativeTraversal.SourceRange())
369
370 stepAfter := from
371 for _, nativeStep := range nativeTraversal {
372 before, step, after := parseTraversalStep(nativeStep, stepAfter)
373 children.AppendUnstructuredTokens(before.Tokens())
374 children.AppendNode(step)
375 traversal.steps.Add(step)
376 stepAfter = after
377 }
378
379 return before, newNode(traversal), after
380 }
381
382 func parseTraversalStep(nativeStep hcl.Traverser, from inputTokens) (before inputTokens, n *node, after inputTokens) {
383 var children *nodes
384 switch tNativeStep := nativeStep.(type) {
385
386 case hcl.TraverseRoot, hcl.TraverseAttr:
387 step := newTraverseName()
388 children = step.inTree.children
389 before, from, after = from.Partition(nativeStep.SourceRange())
390 inBefore, token, inAfter := from.PartitionTypeSingle(hclsyntax.TokenIdent)
391 name := newIdentifier(token)
392 children.AppendUnstructuredTokens(inBefore.Tokens())
393 step.name = children.Append(name)
394 children.AppendUnstructuredTokens(inAfter.Tokens())
395 return before, newNode(step), after
396
397 case hcl.TraverseIndex:
398 step := newTraverseIndex()
399 children = step.inTree.children
400 before, from, after = from.Partition(nativeStep.SourceRange())
401
402 var inBefore, oBrack, keyTokens, cBrack inputTokens
403 inBefore, oBrack, from = from.PartitionType(hclsyntax.TokenOBrack)
404 children.AppendUnstructuredTokens(inBefore.Tokens())
405 children.AppendUnstructuredTokens(oBrack.Tokens())
406 keyTokens, cBrack, from = from.PartitionType(hclsyntax.TokenCBrack)
407
408 keyVal := tNativeStep.Key
409 switch keyVal.Type() {
410 case cty.String:
411 key := newQuoted(keyTokens.Tokens())
412 step.key = children.Append(key)
413 case cty.Number:
414 valBefore, valToken, valAfter := keyTokens.PartitionTypeSingle(hclsyntax.TokenNumberLit)
415 children.AppendUnstructuredTokens(valBefore.Tokens())
416 key := newNumber(valToken)
417 step.key = children.Append(key)
418 children.AppendUnstructuredTokens(valAfter.Tokens())
419 }
420
421 children.AppendUnstructuredTokens(cBrack.Tokens())
422 children.AppendUnstructuredTokens(from.Tokens())
423
424 return before, newNode(step), after
425 default:
426 panic(fmt.Sprintf("unsupported traversal step type %T", nativeStep))
427 }
428
429 }
430
431 // writerTokens takes a sequence of tokens as produced by the main hclsyntax
432 // package and transforms it into an equivalent sequence of tokens using
433 // this package's own token model.
434 //
435 // The resulting list contains the same number of tokens and uses the same
436 // indices as the input, allowing the two sets of tokens to be correlated
437 // by index.
438 func writerTokens(nativeTokens hclsyntax.Tokens) Tokens {
439 // Ultimately we want a slice of token _pointers_, but since we can
440 // predict how much memory we're going to devote to tokens we'll allocate
441 // it all as a single flat buffer and thus give the GC less work to do.
442 tokBuf := make([]Token, len(nativeTokens))
443 var lastByteOffset int
444 for i, mainToken := range nativeTokens {
445 // Create a copy of the bytes so that we can mutate without
446 // corrupting the original token stream.
447 bytes := make([]byte, len(mainToken.Bytes))
448 copy(bytes, mainToken.Bytes)
449
450 tokBuf[i] = Token{
451 Type: mainToken.Type,
452 Bytes: bytes,
453
454 // We assume here that spaces are always ASCII spaces, since
455 // that's what the scanner also assumes, and thus the number
456 // of bytes skipped is also the number of space characters.
457 SpacesBefore: mainToken.Range.Start.Byte - lastByteOffset,
458 }
459
460 lastByteOffset = mainToken.Range.End.Byte
461 }
462
463 // Now make a slice of pointers into the previous slice.
464 ret := make(Tokens, len(tokBuf))
465 for i := range ret {
466 ret[i] = &tokBuf[i]
467 }
468
469 return ret
470 }
471
472 // partitionTokens takes a sequence of tokens and a hcl.Range and returns
473 // two indices within the token sequence that correspond with the range
474 // boundaries, such that the slice operator could be used to produce
475 // three token sequences for before, within, and after respectively:
476 //
477 // start, end := partitionTokens(toks, rng)
478 // before := toks[:start]
479 // within := toks[start:end]
480 // after := toks[end:]
481 //
482 // This works best when the range is aligned with token boundaries (e.g.
483 // because it was produced in terms of the scanner's result) but if that isn't
484 // true then it will make a best effort that may produce strange results at
485 // the boundaries.
486 //
487 // Native hclsyntax tokens are used here, because they contain the necessary
488 // absolute position information. However, since writerTokens produces a
489 // correlatable sequence of writer tokens, the resulting indices can be
490 // used also to index into its result, allowing the partitioning of writer
491 // tokens to be driven by the partitioning of native tokens.
492 //
493 // The tokens are assumed to be in source order and non-overlapping, which
494 // will be true if the token sequence from the scanner is used directly.
495 func partitionTokens(toks hclsyntax.Tokens, rng hcl.Range) (start, end int) {
496 // We us a linear search here because we assume tha in most cases our
497 // target range is close to the beginning of the sequence, and the seqences
498 // are generally small for most reasonable files anyway.
499 for i := 0; ; i++ {
500 if i >= len(toks) {
501 // No tokens for the given range at all!
502 return len(toks), len(toks)
503 }
504
505 if toks[i].Range.Start.Byte >= rng.Start.Byte {
506 start = i
507 break
508 }
509 }
510
511 for i := start; ; i++ {
512 if i >= len(toks) {
513 // The range "hangs off" the end of the token sequence
514 return start, len(toks)
515 }
516
517 if toks[i].Range.Start.Byte >= rng.End.Byte {
518 end = i // end marker is exclusive
519 break
520 }
521 }
522
523 return start, end
524 }
525
526 // partitionLeadCommentTokens takes a sequence of tokens that is assumed
527 // to immediately precede a construct that can have lead comment tokens,
528 // and returns the index into that sequence where the lead comments begin.
529 //
530 // Lead comments are defined as whole lines containing only comment tokens
531 // with no blank lines between. If no such lines are found, the returned
532 // index will be len(toks).
533 func partitionLeadCommentTokens(toks hclsyntax.Tokens) int {
534 // single-line comments (which is what we're interested in here)
535 // consume their trailing newline, so we can just walk backwards
536 // until we stop seeing comment tokens.
537 for i := len(toks) - 1; i >= 0; i-- {
538 if toks[i].Type != hclsyntax.TokenComment {
539 return i + 1
540 }
541 }
542 return 0
543 }
544
545 // partitionLineEndTokens takes a sequence of tokens that is assumed
546 // to immediately follow a construct that can have a line comment, and
547 // returns first the index where any line comments end and then second
548 // the index immediately after the trailing newline.
549 //
550 // Line comments are defined as comments that appear immediately after
551 // a construct on the same line where its significant tokens ended.
552 //
553 // Since single-line comment tokens (# and //) include the newline that
554 // terminates them, in the presence of these the two returned indices
555 // will be the same since the comment itself serves as the line end.
556 func partitionLineEndTokens(toks hclsyntax.Tokens) (afterComment, afterNewline int) {
557 for i := 0; i < len(toks); i++ {
558 tok := toks[i]
559 if tok.Type != hclsyntax.TokenComment {
560 switch tok.Type {
561 case hclsyntax.TokenNewline:
562 return i, i + 1
563 case hclsyntax.TokenEOF:
564 // Although this is valid, we mustn't include the EOF
565 // itself as our "newline" or else strange things will
566 // happen when we try to append new items.
567 return i, i
568 default:
569 // If we have well-formed input here then nothing else should be
570 // possible. This path should never happen, because we only try
571 // to extract tokens from the sequence if the parser succeeded,
572 // and it should catch this problem itself.
573 panic("malformed line trailers: expected only comments and newlines")
574 }
575 }
576
577 if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
578 // Newline at the end of a single-line comment serves both as
579 // the end of comments *and* the end of the line.
580 return i + 1, i + 1
581 }
582 }
583 return len(toks), len(toks)
584 }
585
586 // lexConfig uses the hclsyntax scanner to get a token stream and then
587 // rewrites it into this package's token model.
588 //
589 // Any errors produced during scanning are ignored, so the results of this
590 // function should be used with care.
591 func lexConfig(src []byte) Tokens {
592 mainTokens, _ := hclsyntax.LexConfig(src, "", hcl.Pos{Byte: 0, Line: 1, Column: 1})
593 return writerTokens(mainTokens)
594 }