vendor/github.com/hashicorp/hcl2/hclwrite/parser.go

   1 package hclwrite
   2
   3 import (
   4         "fmt"
   5         "sort"
   6
   7         "github.com/hashicorp/hcl2/hcl"
   8         "github.com/hashicorp/hcl2/hcl/hclsyntax"
   9         "github.com/zclconf/go-cty/cty"
  10 )
  11
  12 // Our "parser" here is actually not doing any parsing of its own. Instead,
  13 // it leans on the native parser in hclsyntax, and then uses the source ranges
  14 // from the AST to partition the raw token sequence to match the raw tokens
  15 // up to AST nodes.
  16 //
  17 // This strategy feels somewhat counter-intuitive, since most of the work the
  18 // parser does is thrown away here, but this strategy is chosen because the
  19 // normal parsing work done by hclsyntax is considered to be the "main case",
  20 // while modifying and re-printing source is more of an edge case, used only
  21 // in ancillary tools, and so it's good to keep all the main parsing logic
  22 // with the main case but keep all of the extra complexity of token wrangling
  23 // out of the main parser, which is already rather complex just serving the
  24 // use-cases it already serves.
  25 //
  26 // If the parsing step produces any errors, the returned File is nil because
  27 // we can't reliably extract tokens from the partial AST produced by an
  28 // erroneous parse.
  29 func parse(src []byte, filename string, start hcl.Pos) (*File, hcl.Diagnostics) {
  30         file, diags := hclsyntax.ParseConfig(src, filename, start)
  31         if diags.HasErrors() {
  32                 return nil, diags
  33         }
  34
  35         // To do our work here, we use the "native" tokens (those from hclsyntax)
  36         // to match against source ranges in the AST, but ultimately produce
  37         // slices from our sequence of "writer" tokens, which contain only
  38         // *relative* position information that is more appropriate for
  39         // transformation/writing use-cases.
  40         nativeTokens, diags := hclsyntax.LexConfig(src, filename, start)
  41         if diags.HasErrors() {
  42                 // should never happen, since we would've caught these diags in
  43                 // the first call above.
  44                 return nil, diags
  45         }
  46         writerTokens := writerTokens(nativeTokens)
  47
  48         from := inputTokens{
  49                 nativeTokens: nativeTokens,
  50                 writerTokens: writerTokens,
  51         }
  52
  53         before, root, after := parseBody(file.Body.(*hclsyntax.Body), from)
  54         ret := &File{
  55                 inTree: newInTree(),
  56
  57                 srcBytes: src,
  58                 body:     root,
  59         }
  60
  61         nodes := ret.inTree.children
  62         nodes.Append(before.Tokens())
  63         nodes.AppendNode(root)
  64         nodes.Append(after.Tokens())
  65
  66         return ret, diags
  67 }
  68
  69 type inputTokens struct {
  70         nativeTokens hclsyntax.Tokens
  71         writerTokens Tokens
  72 }
  73
  74 func (it inputTokens) Partition(rng hcl.Range) (before, within, after inputTokens) {
  75         start, end := partitionTokens(it.nativeTokens, rng)
  76         before = it.Slice(0, start)
  77         within = it.Slice(start, end)
  78         after = it.Slice(end, len(it.nativeTokens))
  79         return
  80 }
  81
  82 func (it inputTokens) PartitionType(ty hclsyntax.TokenType) (before, within, after inputTokens) {
  83         for i, t := range it.writerTokens {
  84                 if t.Type == ty {
  85                         return it.Slice(0, i), it.Slice(i, i+1), it.Slice(i+1, len(it.nativeTokens))
  86                 }
  87         }
  88         panic(fmt.Sprintf("didn't find any token of type %s", ty))
  89 }
  90
  91 func (it inputTokens) PartitionTypeSingle(ty hclsyntax.TokenType) (before inputTokens, found *Token, after inputTokens) {
  92         before, within, after := it.PartitionType(ty)
  93         if within.Len() != 1 {
  94                 panic("PartitionType found more than one token")
  95         }
  96         return before, within.Tokens()[0], after
  97 }
  98
  99 // PartitionIncludeComments is like Partition except the returned "within"
 100 // range includes any lead and line comments associated with the range.
 101 func (it inputTokens) PartitionIncludingComments(rng hcl.Range) (before, within, after inputTokens) {
 102         start, end := partitionTokens(it.nativeTokens, rng)
 103         start = partitionLeadCommentTokens(it.nativeTokens[:start])
 104         _, afterNewline := partitionLineEndTokens(it.nativeTokens[end:])
 105         end += afterNewline
 106
 107         before = it.Slice(0, start)
 108         within = it.Slice(start, end)
 109         after = it.Slice(end, len(it.nativeTokens))
 110         return
 111
 112 }
 113
 114 // PartitionBlockItem is similar to PartitionIncludeComments but it returns
 115 // the comments as separate token sequences so that they can be captured into
 116 // AST attributes. It makes assumptions that apply only to block items, so
 117 // should not be used for other constructs.
 118 func (it inputTokens) PartitionBlockItem(rng hcl.Range) (before, leadComments, within, lineComments, newline, after inputTokens) {
 119         before, within, after = it.Partition(rng)
 120         before, leadComments = before.PartitionLeadComments()
 121         lineComments, newline, after = after.PartitionLineEndTokens()
 122         return
 123 }
 124
 125 func (it inputTokens) PartitionLeadComments() (before, within inputTokens) {
 126         start := partitionLeadCommentTokens(it.nativeTokens)
 127         before = it.Slice(0, start)
 128         within = it.Slice(start, len(it.nativeTokens))
 129         return
 130 }
 131
 132 func (it inputTokens) PartitionLineEndTokens() (comments, newline, after inputTokens) {
 133         afterComments, afterNewline := partitionLineEndTokens(it.nativeTokens)
 134         comments = it.Slice(0, afterComments)
 135         newline = it.Slice(afterComments, afterNewline)
 136         after = it.Slice(afterNewline, len(it.nativeTokens))
 137         return
 138 }
 139
 140 func (it inputTokens) Slice(start, end int) inputTokens {
 141         // When we slice, we create a new slice with no additional capacity because
 142         // we expect that these slices will be mutated in order to insert
 143         // new code into the AST, and we want to ensure that a new underlying
 144         // array gets allocated in that case, rather than writing into some
 145         // following slice and corrupting it.
 146         return inputTokens{
 147                 nativeTokens: it.nativeTokens[start:end:end],
 148                 writerTokens: it.writerTokens[start:end:end],
 149         }
 150 }
 151
 152 func (it inputTokens) Len() int {
 153         return len(it.nativeTokens)
 154 }
 155
 156 func (it inputTokens) Tokens() Tokens {
 157         return it.writerTokens
 158 }
 159
 160 func (it inputTokens) Types() []hclsyntax.TokenType {
 161         ret := make([]hclsyntax.TokenType, len(it.nativeTokens))
 162         for i, tok := range it.nativeTokens {
 163                 ret[i] = tok.Type
 164         }
 165         return ret
 166 }
 167
 168 // parseBody locates the given body within the given input tokens and returns
 169 // the resulting *Body object as well as the tokens that appeared before and
 170 // after it.
 171 func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *node, inputTokens) {
 172         before, within, after := from.PartitionIncludingComments(nativeBody.SrcRange)
 173
 174         // The main AST doesn't retain the original source ordering of the
 175         // body items, so we need to reconstruct that ordering by inspecting
 176         // their source ranges.
 177         nativeItems := make([]hclsyntax.Node, 0, len(nativeBody.Attributes)+len(nativeBody.Blocks))
 178         for _, nativeAttr := range nativeBody.Attributes {
 179                 nativeItems = append(nativeItems, nativeAttr)
 180         }
 181         for _, nativeBlock := range nativeBody.Blocks {
 182                 nativeItems = append(nativeItems, nativeBlock)
 183         }
 184         sort.Sort(nativeNodeSorter{nativeItems})
 185
 186         body := &Body{
 187                 inTree: newInTree(),
 188                 items:  newNodeSet(),
 189         }
 190
 191         remain := within
 192         for _, nativeItem := range nativeItems {
 193                 beforeItem, item, afterItem := parseBodyItem(nativeItem, remain)
 194
 195                 if beforeItem.Len() > 0 {
 196                         body.AppendUnstructuredTokens(beforeItem.Tokens())
 197                 }
 198                 body.appendItemNode(item)
 199
 200                 remain = afterItem
 201         }
 202
 203         if remain.Len() > 0 {
 204                 body.AppendUnstructuredTokens(remain.Tokens())
 205         }
 206
 207         return before, newNode(body), after
 208 }
 209
 210 func parseBodyItem(nativeItem hclsyntax.Node, from inputTokens) (inputTokens, *node, inputTokens) {
 211         before, leadComments, within, lineComments, newline, after := from.PartitionBlockItem(nativeItem.Range())
 212
 213         var item *node
 214
 215         switch tItem := nativeItem.(type) {
 216         case *hclsyntax.Attribute:
 217                 item = parseAttribute(tItem, within, leadComments, lineComments, newline)
 218         case *hclsyntax.Block:
 219                 item = parseBlock(tItem, within, leadComments, lineComments, newline)
 220         default:
 221                 // should never happen if caller is behaving
 222                 panic("unsupported native item type")
 223         }
 224
 225         return before, item, after
 226 }
 227
 228 func parseAttribute(nativeAttr *hclsyntax.Attribute, from, leadComments, lineComments, newline inputTokens) *node {
 229         attr := &Attribute{
 230                 inTree: newInTree(),
 231         }
 232         children := attr.inTree.children
 233
 234         {
 235                 cn := newNode(newComments(leadComments.Tokens()))
 236                 attr.leadComments = cn
 237                 children.AppendNode(cn)
 238         }
 239
 240         before, nameTokens, from := from.Partition(nativeAttr.NameRange)
 241         {
 242                 children.AppendUnstructuredTokens(before.Tokens())
 243                 if nameTokens.Len() != 1 {
 244                         // Should never happen with valid input
 245                         panic("attribute name is not exactly one token")
 246                 }
 247                 token := nameTokens.Tokens()[0]
 248                 in := newNode(newIdentifier(token))
 249                 attr.name = in
 250                 children.AppendNode(in)
 251         }
 252
 253         before, equalsTokens, from := from.Partition(nativeAttr.EqualsRange)
 254         children.AppendUnstructuredTokens(before.Tokens())
 255         children.AppendUnstructuredTokens(equalsTokens.Tokens())
 256
 257         before, exprTokens, from := from.Partition(nativeAttr.Expr.Range())
 258         {
 259                 children.AppendUnstructuredTokens(before.Tokens())
 260                 exprNode := parseExpression(nativeAttr.Expr, exprTokens)
 261                 attr.expr = exprNode
 262                 children.AppendNode(exprNode)
 263         }
 264
 265         {
 266                 cn := newNode(newComments(lineComments.Tokens()))
 267                 attr.lineComments = cn
 268                 children.AppendNode(cn)
 269         }
 270
 271         children.AppendUnstructuredTokens(newline.Tokens())
 272
 273         // Collect any stragglers, though there shouldn't be any
 274         children.AppendUnstructuredTokens(from.Tokens())
 275
 276         return newNode(attr)
 277 }
 278
 279 func parseBlock(nativeBlock *hclsyntax.Block, from, leadComments, lineComments, newline inputTokens) *node {
 280         block := &Block{
 281                 inTree: newInTree(),
 282                 labels: newNodeSet(),
 283         }
 284         children := block.inTree.children
 285
 286         {
 287                 cn := newNode(newComments(leadComments.Tokens()))
 288                 block.leadComments = cn
 289                 children.AppendNode(cn)
 290         }
 291
 292         before, typeTokens, from := from.Partition(nativeBlock.TypeRange)
 293         {
 294                 children.AppendUnstructuredTokens(before.Tokens())
 295                 if typeTokens.Len() != 1 {
 296                         // Should never happen with valid input
 297                         panic("block type name is not exactly one token")
 298                 }
 299                 token := typeTokens.Tokens()[0]
 300                 in := newNode(newIdentifier(token))
 301                 block.typeName = in
 302                 children.AppendNode(in)
 303         }
 304
 305         for _, rng := range nativeBlock.LabelRanges {
 306                 var labelTokens inputTokens
 307                 before, labelTokens, from = from.Partition(rng)
 308                 children.AppendUnstructuredTokens(before.Tokens())
 309                 tokens := labelTokens.Tokens()
 310                 ln := newNode(newQuoted(tokens))
 311                 block.labels.Add(ln)
 312                 children.AppendNode(ln)
 313         }
 314
 315         before, oBrace, from := from.Partition(nativeBlock.OpenBraceRange)
 316         children.AppendUnstructuredTokens(before.Tokens())
 317         children.AppendUnstructuredTokens(oBrace.Tokens())
 318
 319         // We go a bit out of order here: we go hunting for the closing brace
 320         // so that we have a delimited body, but then we'll deal with the body
 321         // before we actually append the closing brace and any straggling tokens
 322         // that appear after it.
 323         bodyTokens, cBrace, from := from.Partition(nativeBlock.CloseBraceRange)
 324         before, body, after := parseBody(nativeBlock.Body, bodyTokens)
 325         children.AppendUnstructuredTokens(before.Tokens())
 326         block.body = body
 327         children.AppendNode(body)
 328         children.AppendUnstructuredTokens(after.Tokens())
 329
 330         children.AppendUnstructuredTokens(cBrace.Tokens())
 331
 332         // stragglers
 333         children.AppendUnstructuredTokens(from.Tokens())
 334         if lineComments.Len() > 0 {
 335                 // blocks don't actually have line comments, so we'll just treat
 336                 // them as extra stragglers
 337                 children.AppendUnstructuredTokens(lineComments.Tokens())
 338         }
 339         children.AppendUnstructuredTokens(newline.Tokens())
 340
 341         return newNode(block)
 342 }
 343
 344 func parseExpression(nativeExpr hclsyntax.Expression, from inputTokens) *node {
 345         expr := newExpression()
 346         children := expr.inTree.children
 347
 348         nativeVars := nativeExpr.Variables()
 349
 350         for _, nativeTraversal := range nativeVars {
 351                 before, traversal, after := parseTraversal(nativeTraversal, from)
 352                 children.AppendUnstructuredTokens(before.Tokens())
 353                 children.AppendNode(traversal)
 354                 expr.absTraversals.Add(traversal)
 355                 from = after
 356         }
 357         // Attach any stragglers that don't belong to a traversal to the expression
 358         // itself. In an expression with no traversals at all, this is just the
 359         // entirety of "from".
 360         children.AppendUnstructuredTokens(from.Tokens())
 361
 362         return newNode(expr)
 363 }
 364
 365 func parseTraversal(nativeTraversal hcl.Traversal, from inputTokens) (before inputTokens, n *node, after inputTokens) {
 366         traversal := newTraversal()
 367         children := traversal.inTree.children
 368         before, from, after = from.Partition(nativeTraversal.SourceRange())
 369
 370         stepAfter := from
 371         for _, nativeStep := range nativeTraversal {
 372                 before, step, after := parseTraversalStep(nativeStep, stepAfter)
 373                 children.AppendUnstructuredTokens(before.Tokens())
 374                 children.AppendNode(step)
 375                 traversal.steps.Add(step)
 376                 stepAfter = after
 377         }
 378
 379         return before, newNode(traversal), after
 380 }
 381
 382 func parseTraversalStep(nativeStep hcl.Traverser, from inputTokens) (before inputTokens, n *node, after inputTokens) {
 383         var children *nodes
 384         switch tNativeStep := nativeStep.(type) {
 385
 386         case hcl.TraverseRoot, hcl.TraverseAttr:
 387                 step := newTraverseName()
 388                 children = step.inTree.children
 389                 before, from, after = from.Partition(nativeStep.SourceRange())
 390                 inBefore, token, inAfter := from.PartitionTypeSingle(hclsyntax.TokenIdent)
 391                 name := newIdentifier(token)
 392                 children.AppendUnstructuredTokens(inBefore.Tokens())
 393                 step.name = children.Append(name)
 394                 children.AppendUnstructuredTokens(inAfter.Tokens())
 395                 return before, newNode(step), after
 396
 397         case hcl.TraverseIndex:
 398                 step := newTraverseIndex()
 399                 children = step.inTree.children
 400                 before, from, after = from.Partition(nativeStep.SourceRange())
 401
 402                 var inBefore, oBrack, keyTokens, cBrack inputTokens
 403                 inBefore, oBrack, from = from.PartitionType(hclsyntax.TokenOBrack)
 404                 children.AppendUnstructuredTokens(inBefore.Tokens())
 405                 children.AppendUnstructuredTokens(oBrack.Tokens())
 406                 keyTokens, cBrack, from = from.PartitionType(hclsyntax.TokenCBrack)
 407
 408                 keyVal := tNativeStep.Key
 409                 switch keyVal.Type() {
 410                 case cty.String:
 411                         key := newQuoted(keyTokens.Tokens())
 412                         step.key = children.Append(key)
 413                 case cty.Number:
 414                         valBefore, valToken, valAfter := keyTokens.PartitionTypeSingle(hclsyntax.TokenNumberLit)
 415                         children.AppendUnstructuredTokens(valBefore.Tokens())
 416                         key := newNumber(valToken)
 417                         step.key = children.Append(key)
 418                         children.AppendUnstructuredTokens(valAfter.Tokens())
 419                 }
 420
 421                 children.AppendUnstructuredTokens(cBrack.Tokens())
 422                 children.AppendUnstructuredTokens(from.Tokens())
 423
 424                 return before, newNode(step), after
 425         default:
 426                 panic(fmt.Sprintf("unsupported traversal step type %T", nativeStep))
 427         }
 428
 429 }
 430
 431 // writerTokens takes a sequence of tokens as produced by the main hclsyntax
 432 // package and transforms it into an equivalent sequence of tokens using
 433 // this package's own token model.
 434 //
 435 // The resulting list contains the same number of tokens and uses the same
 436 // indices as the input, allowing the two sets of tokens to be correlated
 437 // by index.
 438 func writerTokens(nativeTokens hclsyntax.Tokens) Tokens {
 439         // Ultimately we want a slice of token _pointers_, but since we can
 440         // predict how much memory we're going to devote to tokens we'll allocate
 441         // it all as a single flat buffer and thus give the GC less work to do.
 442         tokBuf := make([]Token, len(nativeTokens))
 443         var lastByteOffset int
 444         for i, mainToken := range nativeTokens {
 445                 // Create a copy of the bytes so that we can mutate without
 446                 // corrupting the original token stream.
 447                 bytes := make([]byte, len(mainToken.Bytes))
 448                 copy(bytes, mainToken.Bytes)
 449
 450                 tokBuf[i] = Token{
 451                         Type:  mainToken.Type,
 452                         Bytes: bytes,
 453
 454                         // We assume here that spaces are always ASCII spaces, since
 455                         // that's what the scanner also assumes, and thus the number
 456                         // of bytes skipped is also the number of space characters.
 457                         SpacesBefore: mainToken.Range.Start.Byte - lastByteOffset,
 458                 }
 459
 460                 lastByteOffset = mainToken.Range.End.Byte
 461         }
 462
 463         // Now make a slice of pointers into the previous slice.
 464         ret := make(Tokens, len(tokBuf))
 465         for i := range ret {
 466                 ret[i] = &tokBuf[i]
 467         }
 468
 469         return ret
 470 }
 471
 472 // partitionTokens takes a sequence of tokens and a hcl.Range and returns
 473 // two indices within the token sequence that correspond with the range
 474 // boundaries, such that the slice operator could be used to produce
 475 // three token sequences for before, within, and after respectively:
 476 //
 477 //     start, end := partitionTokens(toks, rng)
 478 //     before := toks[:start]
 479 //     within := toks[start:end]
 480 //     after := toks[end:]
 481 //
 482 // This works best when the range is aligned with token boundaries (e.g.
 483 // because it was produced in terms of the scanner's result) but if that isn't
 484 // true then it will make a best effort that may produce strange results at
 485 // the boundaries.
 486 //
 487 // Native hclsyntax tokens are used here, because they contain the necessary
 488 // absolute position information. However, since writerTokens produces a
 489 // correlatable sequence of writer tokens, the resulting indices can be
 490 // used also to index into its result, allowing the partitioning of writer
 491 // tokens to be driven by the partitioning of native tokens.
 492 //
 493 // The tokens are assumed to be in source order and non-overlapping, which
 494 // will be true if the token sequence from the scanner is used directly.
 495 func partitionTokens(toks hclsyntax.Tokens, rng hcl.Range) (start, end int) {
 496         // We us a linear search here because we assume tha in most cases our
 497         // target range is close to the beginning of the sequence, and the seqences
 498         // are generally small for most reasonable files anyway.
 499         for i := 0; ; i++ {
 500                 if i >= len(toks) {
 501                         // No tokens for the given range at all!
 502                         return len(toks), len(toks)
 503                 }
 504
 505                 if toks[i].Range.Start.Byte >= rng.Start.Byte {
 506                         start = i
 507                         break
 508                 }
 509         }
 510
 511         for i := start; ; i++ {
 512                 if i >= len(toks) {
 513                         // The range "hangs off" the end of the token sequence
 514                         return start, len(toks)
 515                 }
 516
 517                 if toks[i].Range.Start.Byte >= rng.End.Byte {
 518                         end = i // end marker is exclusive
 519                         break
 520                 }
 521         }
 522
 523         return start, end
 524 }
 525
 526 // partitionLeadCommentTokens takes a sequence of tokens that is assumed
 527 // to immediately precede a construct that can have lead comment tokens,
 528 // and returns the index into that sequence where the lead comments begin.
 529 //
 530 // Lead comments are defined as whole lines containing only comment tokens
 531 // with no blank lines between. If no such lines are found, the returned
 532 // index will be len(toks).
 533 func partitionLeadCommentTokens(toks hclsyntax.Tokens) int {
 534         // single-line comments (which is what we're interested in here)
 535         // consume their trailing newline, so we can just walk backwards
 536         // until we stop seeing comment tokens.
 537         for i := len(toks) - 1; i >= 0; i-- {
 538                 if toks[i].Type != hclsyntax.TokenComment {
 539                         return i + 1
 540                 }
 541         }
 542         return 0
 543 }
 544
 545 // partitionLineEndTokens takes a sequence of tokens that is assumed
 546 // to immediately follow a construct that can have a line comment, and
 547 // returns first the index where any line comments end and then second
 548 // the index immediately after the trailing newline.
 549 //
 550 // Line comments are defined as comments that appear immediately after
 551 // a construct on the same line where its significant tokens ended.
 552 //
 553 // Since single-line comment tokens (# and //) include the newline that
 554 // terminates them, in the presence of these the two returned indices
 555 // will be the same since the comment itself serves as the line end.
 556 func partitionLineEndTokens(toks hclsyntax.Tokens) (afterComment, afterNewline int) {
 557         for i := 0; i < len(toks); i++ {
 558                 tok := toks[i]
 559                 if tok.Type != hclsyntax.TokenComment {
 560                         switch tok.Type {
 561                         case hclsyntax.TokenNewline:
 562                                 return i, i + 1
 563                         case hclsyntax.TokenEOF:
 564                                 // Although this is valid, we mustn't include the EOF
 565                                 // itself as our "newline" or else strange things will
 566                                 // happen when we try to append new items.
 567                                 return i, i
 568                         default:
 569                                 // If we have well-formed input here then nothing else should be
 570                                 // possible. This path should never happen, because we only try
 571                                 // to extract tokens from the sequence if the parser succeeded,
 572                                 // and it should catch this problem itself.
 573                                 panic("malformed line trailers: expected only comments and newlines")
 574                         }
 575                 }
 576
 577                 if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
 578                         // Newline at the end of a single-line comment serves both as
 579                         // the end of comments *and* the end of the line.
 580                         return i + 1, i + 1
 581                 }
 582         }
 583         return len(toks), len(toks)
 584 }
 585
 586 // lexConfig uses the hclsyntax scanner to get a token stream and then
 587 // rewrites it into this package's token model.
 588 //
 589 // Any errors produced during scanning are ignored, so the results of this
 590 // function should be used with care.
 591 func lexConfig(src []byte) Tokens {
 592         mainTokens, _ := hclsyntax.LexConfig(src, "", hcl.Pos{Byte: 0, Line: 1, Column: 1})
 593         return writerTokens(mainTokens)
 594 }