vendor/github.com/hashicorp/hcl/hcl/parser/parser.go

   1 // Package parser implements a parser for HCL (HashiCorp Configuration
   2 // Language)
   3 package parser
   4
   5 import (
   6         "bytes"
   7         "errors"
   8         "fmt"
   9         "strings"
  10
  11         "github.com/hashicorp/hcl/hcl/ast"
  12         "github.com/hashicorp/hcl/hcl/scanner"
  13         "github.com/hashicorp/hcl/hcl/token"
  14 )
  15
  16 type Parser struct {
  17         sc *scanner.Scanner
  18
  19         // Last read token
  20         tok       token.Token
  21         commaPrev token.Token
  22
  23         comments    []*ast.CommentGroup
  24         leadComment *ast.CommentGroup // last lead comment
  25         lineComment *ast.CommentGroup // last line comment
  26
  27         enableTrace bool
  28         indent      int
  29         n           int // buffer size (max = 1)
  30 }
  31
  32 func newParser(src []byte) *Parser {
  33         return &Parser{
  34                 sc: scanner.New(src),
  35         }
  36 }
  37
  38 // Parse returns the fully parsed source and returns the abstract syntax tree.
  39 func Parse(src []byte) (*ast.File, error) {
  40         // normalize all line endings
  41         // since the scanner and output only work with "\n" line endings, we may
  42         // end up with dangling "\r" characters in the parsed data.
  43         src = bytes.Replace(src, []byte("\r\n"), []byte("\n"), -1)
  44
  45         p := newParser(src)
  46         return p.Parse()
  47 }
  48
  49 var errEofToken = errors.New("EOF token found")
  50
  51 // Parse returns the fully parsed source and returns the abstract syntax tree.
  52 func (p *Parser) Parse() (*ast.File, error) {
  53         f := &ast.File{}
  54         var err, scerr error
  55         p.sc.Error = func(pos token.Pos, msg string) {
  56                 scerr = &PosError{Pos: pos, Err: errors.New(msg)}
  57         }
  58
  59         f.Node, err = p.objectList(false)
  60         if scerr != nil {
  61                 return nil, scerr
  62         }
  63         if err != nil {
  64                 return nil, err
  65         }
  66
  67         f.Comments = p.comments
  68         return f, nil
  69 }
  70
  71 // objectList parses a list of items within an object (generally k/v pairs).
  72 // The parameter" obj" tells this whether to we are within an object (braces:
  73 // '{', '}') or just at the top level. If we're within an object, we end
  74 // at an RBRACE.
  75 func (p *Parser) objectList(obj bool) (*ast.ObjectList, error) {
  76         defer un(trace(p, "ParseObjectList"))
  77         node := &ast.ObjectList{}
  78
  79         for {
  80                 if obj {
  81                         tok := p.scan()
  82                         p.unscan()
  83                         if tok.Type == token.RBRACE {
  84                                 break
  85                         }
  86                 }
  87
  88                 n, err := p.objectItem()
  89                 if err == errEofToken {
  90                         break // we are finished
  91                 }
  92
  93                 // we don't return a nil node, because might want to use already
  94                 // collected items.
  95                 if err != nil {
  96                         return node, err
  97                 }
  98
  99                 node.Add(n)
 100
 101                 // object lists can be optionally comma-delimited e.g. when a list of maps
 102                 // is being expressed, so a comma is allowed here - it's simply consumed
 103                 tok := p.scan()
 104                 if tok.Type != token.COMMA {
 105                         p.unscan()
 106                 }
 107         }
 108         return node, nil
 109 }
 110
 111 func (p *Parser) consumeComment() (comment *ast.Comment, endline int) {
 112         endline = p.tok.Pos.Line
 113
 114         // count the endline if it's multiline comment, ie starting with /*
 115         if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' {
 116                 // don't use range here - no need to decode Unicode code points
 117                 for i := 0; i < len(p.tok.Text); i++ {
 118                         if p.tok.Text[i] == '\n' {
 119                                 endline++
 120                         }
 121                 }
 122         }
 123
 124         comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text}
 125         p.tok = p.sc.Scan()
 126         return
 127 }
 128
 129 func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) {
 130         var list []*ast.Comment
 131         endline = p.tok.Pos.Line
 132
 133         for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n {
 134                 var comment *ast.Comment
 135                 comment, endline = p.consumeComment()
 136                 list = append(list, comment)
 137         }
 138
 139         // add comment group to the comments list
 140         comments = &ast.CommentGroup{List: list}
 141         p.comments = append(p.comments, comments)
 142
 143         return
 144 }
 145
 146 // objectItem parses a single object item
 147 func (p *Parser) objectItem() (*ast.ObjectItem, error) {
 148         defer un(trace(p, "ParseObjectItem"))
 149
 150         keys, err := p.objectKey()
 151         if len(keys) > 0 && err == errEofToken {
 152                 // We ignore eof token here since it is an error if we didn't
 153                 // receive a value (but we did receive a key) for the item.
 154                 err = nil
 155         }
 156         if len(keys) > 0 && err != nil && p.tok.Type == token.RBRACE {
 157                 // This is a strange boolean statement, but what it means is:
 158                 // We have keys with no value, and we're likely in an object
 159                 // (since RBrace ends an object). For this, we set err to nil so
 160                 // we continue and get the error below of having the wrong value
 161                 // type.
 162                 err = nil
 163
 164                 // Reset the token type so we don't think it completed fine. See
 165                 // objectType which uses p.tok.Type to check if we're done with
 166                 // the object.
 167                 p.tok.Type = token.EOF
 168         }
 169         if err != nil {
 170                 return nil, err
 171         }
 172
 173         o := &ast.ObjectItem{
 174                 Keys: keys,
 175         }
 176
 177         if p.leadComment != nil {
 178                 o.LeadComment = p.leadComment
 179                 p.leadComment = nil
 180         }
 181
 182         switch p.tok.Type {
 183         case token.ASSIGN:
 184                 o.Assign = p.tok.Pos
 185                 o.Val, err = p.object()
 186                 if err != nil {
 187                         return nil, err
 188                 }
 189         case token.LBRACE:
 190                 o.Val, err = p.objectType()
 191                 if err != nil {
 192                         return nil, err
 193                 }
 194         default:
 195                 keyStr := make([]string, 0, len(keys))
 196                 for _, k := range keys {
 197                         keyStr = append(keyStr, k.Token.Text)
 198                 }
 199
 200                 return nil, fmt.Errorf(
 201                         "key '%s' expected start of object ('{') or assignment ('=')",
 202                         strings.Join(keyStr, " "))
 203         }
 204
 205         // do a look-ahead for line comment
 206         p.scan()
 207         if len(keys) > 0 && o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil {
 208                 o.LineComment = p.lineComment
 209                 p.lineComment = nil
 210         }
 211         p.unscan()
 212         return o, nil
 213 }
 214
 215 // objectKey parses an object key and returns a ObjectKey AST
 216 func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
 217         keyCount := 0
 218         keys := make([]*ast.ObjectKey, 0)
 219
 220         for {
 221                 tok := p.scan()
 222                 switch tok.Type {
 223                 case token.EOF:
 224                         // It is very important to also return the keys here as well as
 225                         // the error. This is because we need to be able to tell if we
 226                         // did parse keys prior to finding the EOF, or if we just found
 227                         // a bare EOF.
 228                         return keys, errEofToken
 229                 case token.ASSIGN:
 230                         // assignment or object only, but not nested objects. this is not
 231                         // allowed: `foo bar = {}`
 232                         if keyCount > 1 {
 233                                 return nil, &PosError{
 234                                         Pos: p.tok.Pos,
 235                                         Err: fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type),
 236                                 }
 237                         }
 238
 239                         if keyCount == 0 {
 240                                 return nil, &PosError{
 241                                         Pos: p.tok.Pos,
 242                                         Err: errors.New("no object keys found!"),
 243                                 }
 244                         }
 245
 246                         return keys, nil
 247                 case token.LBRACE:
 248                         var err error
 249
 250                         // If we have no keys, then it is a syntax error. i.e. {{}} is not
 251                         // allowed.
 252                         if len(keys) == 0 {
 253                                 err = &PosError{
 254                                         Pos: p.tok.Pos,
 255                                         Err: fmt.Errorf("expected: IDENT | STRING got: %s", p.tok.Type),
 256                                 }
 257                         }
 258
 259                         // object
 260                         return keys, err
 261                 case token.IDENT, token.STRING:
 262                         keyCount++
 263                         keys = append(keys, &ast.ObjectKey{Token: p.tok})
 264                 case token.ILLEGAL:
 265                         return keys, &PosError{
 266                                 Pos: p.tok.Pos,
 267                                 Err: fmt.Errorf("illegal character"),
 268                         }
 269                 default:
 270                         return keys, &PosError{
 271                                 Pos: p.tok.Pos,
 272                                 Err: fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type),
 273                         }
 274                 }
 275         }
 276 }
 277
 278 // object parses any type of object, such as number, bool, string, object or
 279 // list.
 280 func (p *Parser) object() (ast.Node, error) {
 281         defer un(trace(p, "ParseType"))
 282         tok := p.scan()
 283
 284         switch tok.Type {
 285         case token.NUMBER, token.FLOAT, token.BOOL, token.STRING, token.HEREDOC:
 286                 return p.literalType()
 287         case token.LBRACE:
 288                 return p.objectType()
 289         case token.LBRACK:
 290                 return p.listType()
 291         case token.COMMENT:
 292                 // implement comment
 293         case token.EOF:
 294                 return nil, errEofToken
 295         }
 296
 297         return nil, &PosError{
 298                 Pos: tok.Pos,
 299                 Err: fmt.Errorf("Unknown token: %+v", tok),
 300         }
 301 }
 302
 303 // objectType parses an object type and returns a ObjectType AST
 304 func (p *Parser) objectType() (*ast.ObjectType, error) {
 305         defer un(trace(p, "ParseObjectType"))
 306
 307         // we assume that the currently scanned token is a LBRACE
 308         o := &ast.ObjectType{
 309                 Lbrace: p.tok.Pos,
 310         }
 311
 312         l, err := p.objectList(true)
 313
 314         // if we hit RBRACE, we are good to go (means we parsed all Items), if it's
 315         // not a RBRACE, it's an syntax error and we just return it.
 316         if err != nil && p.tok.Type != token.RBRACE {
 317                 return nil, err
 318         }
 319
 320         // No error, scan and expect the ending to be a brace
 321         if tok := p.scan(); tok.Type != token.RBRACE {
 322                 return nil, fmt.Errorf("object expected closing RBRACE got: %s", tok.Type)
 323         }
 324
 325         o.List = l
 326         o.Rbrace = p.tok.Pos // advanced via parseObjectList
 327         return o, nil
 328 }
 329
 330 // listType parses a list type and returns a ListType AST
 331 func (p *Parser) listType() (*ast.ListType, error) {
 332         defer un(trace(p, "ParseListType"))
 333
 334         // we assume that the currently scanned token is a LBRACK
 335         l := &ast.ListType{
 336                 Lbrack: p.tok.Pos,
 337         }
 338
 339         needComma := false
 340         for {
 341                 tok := p.scan()
 342                 if needComma {
 343                         switch tok.Type {
 344                         case token.COMMA, token.RBRACK:
 345                         default:
 346                                 return nil, &PosError{
 347                                         Pos: tok.Pos,
 348                                         Err: fmt.Errorf(
 349                                                 "error parsing list, expected comma or list end, got: %s",
 350                                                 tok.Type),
 351                                 }
 352                         }
 353                 }
 354                 switch tok.Type {
 355                 case token.BOOL, token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
 356                         node, err := p.literalType()
 357                         if err != nil {
 358                                 return nil, err
 359                         }
 360
 361                         // If there is a lead comment, apply it
 362                         if p.leadComment != nil {
 363                                 node.LeadComment = p.leadComment
 364                                 p.leadComment = nil
 365                         }
 366
 367                         l.Add(node)
 368                         needComma = true
 369                 case token.COMMA:
 370                         // get next list item or we are at the end
 371                         // do a look-ahead for line comment
 372                         p.scan()
 373                         if p.lineComment != nil && len(l.List) > 0 {
 374                                 lit, ok := l.List[len(l.List)-1].(*ast.LiteralType)
 375                                 if ok {
 376                                         lit.LineComment = p.lineComment
 377                                         l.List[len(l.List)-1] = lit
 378                                         p.lineComment = nil
 379                                 }
 380                         }
 381                         p.unscan()
 382
 383                         needComma = false
 384                         continue
 385                 case token.LBRACE:
 386                         // Looks like a nested object, so parse it out
 387                         node, err := p.objectType()
 388                         if err != nil {
 389                                 return nil, &PosError{
 390                                         Pos: tok.Pos,
 391                                         Err: fmt.Errorf(
 392                                                 "error while trying to parse object within list: %s", err),
 393                                 }
 394                         }
 395                         l.Add(node)
 396                         needComma = true
 397                 case token.LBRACK:
 398                         node, err := p.listType()
 399                         if err != nil {
 400                                 return nil, &PosError{
 401                                         Pos: tok.Pos,
 402                                         Err: fmt.Errorf(
 403                                                 "error while trying to parse list within list: %s", err),
 404                                 }
 405                         }
 406                         l.Add(node)
 407                 case token.RBRACK:
 408                         // finished
 409                         l.Rbrack = p.tok.Pos
 410                         return l, nil
 411                 default:
 412                         return nil, &PosError{
 413                                 Pos: tok.Pos,
 414                                 Err: fmt.Errorf("unexpected token while parsing list: %s", tok.Type),
 415                         }
 416                 }
 417         }
 418 }
 419
 420 // literalType parses a literal type and returns a LiteralType AST
 421 func (p *Parser) literalType() (*ast.LiteralType, error) {
 422         defer un(trace(p, "ParseLiteral"))
 423
 424         return &ast.LiteralType{
 425                 Token: p.tok,
 426         }, nil
 427 }
 428
 429 // scan returns the next token from the underlying scanner. If a token has
 430 // been unscanned then read that instead. In the process, it collects any
 431 // comment groups encountered, and remembers the last lead and line comments.
 432 func (p *Parser) scan() token.Token {
 433         // If we have a token on the buffer, then return it.
 434         if p.n != 0 {
 435                 p.n = 0
 436                 return p.tok
 437         }
 438
 439         // Otherwise read the next token from the scanner and Save it to the buffer
 440         // in case we unscan later.
 441         prev := p.tok
 442         p.tok = p.sc.Scan()
 443
 444         if p.tok.Type == token.COMMENT {
 445                 var comment *ast.CommentGroup
 446                 var endline int
 447
 448                 // fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n",
 449                 // p.tok.Pos.Line, prev.Pos.Line, endline)
 450                 if p.tok.Pos.Line == prev.Pos.Line {
 451                         // The comment is on same line as the previous token; it
 452                         // cannot be a lead comment but may be a line comment.
 453                         comment, endline = p.consumeCommentGroup(0)
 454                         if p.tok.Pos.Line != endline {
 455                                 // The next token is on a different line, thus
 456                                 // the last comment group is a line comment.
 457                                 p.lineComment = comment
 458                         }
 459                 }
 460
 461                 // consume successor comments, if any
 462                 endline = -1
 463                 for p.tok.Type == token.COMMENT {
 464                         comment, endline = p.consumeCommentGroup(1)
 465                 }
 466
 467                 if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE {
 468                         switch p.tok.Type {
 469                         case token.RBRACE, token.RBRACK:
 470                                 // Do not count for these cases
 471                         default:
 472                                 // The next token is following on the line immediately after the
 473                                 // comment group, thus the last comment group is a lead comment.
 474                                 p.leadComment = comment
 475                         }
 476                 }
 477
 478         }
 479
 480         return p.tok
 481 }
 482
 483 // unscan pushes the previously read token back onto the buffer.
 484 func (p *Parser) unscan() {
 485         p.n = 1
 486 }
 487
 488 // ----------------------------------------------------------------------------
 489 // Parsing support
 490
 491 func (p *Parser) printTrace(a ...interface{}) {
 492         if !p.enableTrace {
 493                 return
 494         }
 495
 496         const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
 497         const n = len(dots)
 498         fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
 499
 500         i := 2 * p.indent
 501         for i > n {
 502                 fmt.Print(dots)
 503                 i -= n
 504         }
 505         // i <= n
 506         fmt.Print(dots[0:i])
 507         fmt.Println(a...)
 508 }
 509
 510 func trace(p *Parser, msg string) *Parser {
 511         p.printTrace(msg, "(")
 512         p.indent++
 513         return p
 514 }
 515
 516 // Usage pattern: defer un(trace(p, "..."))
 517 func un(p *Parser) {
 518         p.indent--
 519         p.printTrace(")")
 520 }