]> git.immae.eu Git - github/fretlink/terraform-provider-statuscake.git/blob - vendor/github.com/hashicorp/hcl/hcl/scanner/scanner.go
Initial transfer of provider code
[github/fretlink/terraform-provider-statuscake.git] / vendor / github.com / hashicorp / hcl / hcl / scanner / scanner.go
1 // Package scanner implements a scanner for HCL (HashiCorp Configuration
2 // Language) source text.
3 package scanner
4
5 import (
6 "bytes"
7 "fmt"
8 "os"
9 "regexp"
10 "unicode"
11 "unicode/utf8"
12
13 "github.com/hashicorp/hcl/hcl/token"
14 )
15
16 // eof represents a marker rune for the end of the reader.
17 const eof = rune(0)
18
19 // Scanner defines a lexical scanner
20 type Scanner struct {
21 buf *bytes.Buffer // Source buffer for advancing and scanning
22 src []byte // Source buffer for immutable access
23
24 // Source Position
25 srcPos token.Pos // current position
26 prevPos token.Pos // previous position, used for peek() method
27
28 lastCharLen int // length of last character in bytes
29 lastLineLen int // length of last line in characters (for correct column reporting)
30
31 tokStart int // token text start position
32 tokEnd int // token text end position
33
34 // Error is called for each error encountered. If no Error
35 // function is set, the error is reported to os.Stderr.
36 Error func(pos token.Pos, msg string)
37
38 // ErrorCount is incremented by one for each error encountered.
39 ErrorCount int
40
41 // tokPos is the start position of most recently scanned token; set by
42 // Scan. The Filename field is always left untouched by the Scanner. If
43 // an error is reported (via Error) and Position is invalid, the scanner is
44 // not inside a token.
45 tokPos token.Pos
46 }
47
48 // New creates and initializes a new instance of Scanner using src as
49 // its source content.
50 func New(src []byte) *Scanner {
51 // even though we accept a src, we read from a io.Reader compatible type
52 // (*bytes.Buffer). So in the future we might easily change it to streaming
53 // read.
54 b := bytes.NewBuffer(src)
55 s := &Scanner{
56 buf: b,
57 src: src,
58 }
59
60 // srcPosition always starts with 1
61 s.srcPos.Line = 1
62 return s
63 }
64
65 // next reads the next rune from the bufferred reader. Returns the rune(0) if
66 // an error occurs (or io.EOF is returned).
67 func (s *Scanner) next() rune {
68 ch, size, err := s.buf.ReadRune()
69 if err != nil {
70 // advance for error reporting
71 s.srcPos.Column++
72 s.srcPos.Offset += size
73 s.lastCharLen = size
74 return eof
75 }
76
77 if ch == utf8.RuneError && size == 1 {
78 s.srcPos.Column++
79 s.srcPos.Offset += size
80 s.lastCharLen = size
81 s.err("illegal UTF-8 encoding")
82 return ch
83 }
84
85 // remember last position
86 s.prevPos = s.srcPos
87
88 s.srcPos.Column++
89 s.lastCharLen = size
90 s.srcPos.Offset += size
91
92 if ch == '\n' {
93 s.srcPos.Line++
94 s.lastLineLen = s.srcPos.Column
95 s.srcPos.Column = 0
96 }
97
98 // If we see a null character with data left, then that is an error
99 if ch == '\x00' && s.buf.Len() > 0 {
100 s.err("unexpected null character (0x00)")
101 return eof
102 }
103
104 // debug
105 // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
106 return ch
107 }
108
109 // unread unreads the previous read Rune and updates the source position
110 func (s *Scanner) unread() {
111 if err := s.buf.UnreadRune(); err != nil {
112 panic(err) // this is user fault, we should catch it
113 }
114 s.srcPos = s.prevPos // put back last position
115 }
116
117 // peek returns the next rune without advancing the reader.
118 func (s *Scanner) peek() rune {
119 peek, _, err := s.buf.ReadRune()
120 if err != nil {
121 return eof
122 }
123
124 s.buf.UnreadRune()
125 return peek
126 }
127
128 // Scan scans the next token and returns the token.
129 func (s *Scanner) Scan() token.Token {
130 ch := s.next()
131
132 // skip white space
133 for isWhitespace(ch) {
134 ch = s.next()
135 }
136
137 var tok token.Type
138
139 // token text markings
140 s.tokStart = s.srcPos.Offset - s.lastCharLen
141
142 // token position, initial next() is moving the offset by one(size of rune
143 // actually), though we are interested with the starting point
144 s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
145 if s.srcPos.Column > 0 {
146 // common case: last character was not a '\n'
147 s.tokPos.Line = s.srcPos.Line
148 s.tokPos.Column = s.srcPos.Column
149 } else {
150 // last character was a '\n'
151 // (we cannot be at the beginning of the source
152 // since we have called next() at least once)
153 s.tokPos.Line = s.srcPos.Line - 1
154 s.tokPos.Column = s.lastLineLen
155 }
156
157 switch {
158 case isLetter(ch):
159 tok = token.IDENT
160 lit := s.scanIdentifier()
161 if lit == "true" || lit == "false" {
162 tok = token.BOOL
163 }
164 case isDecimal(ch):
165 tok = s.scanNumber(ch)
166 default:
167 switch ch {
168 case eof:
169 tok = token.EOF
170 case '"':
171 tok = token.STRING
172 s.scanString()
173 case '#', '/':
174 tok = token.COMMENT
175 s.scanComment(ch)
176 case '.':
177 tok = token.PERIOD
178 ch = s.peek()
179 if isDecimal(ch) {
180 tok = token.FLOAT
181 ch = s.scanMantissa(ch)
182 ch = s.scanExponent(ch)
183 }
184 case '<':
185 tok = token.HEREDOC
186 s.scanHeredoc()
187 case '[':
188 tok = token.LBRACK
189 case ']':
190 tok = token.RBRACK
191 case '{':
192 tok = token.LBRACE
193 case '}':
194 tok = token.RBRACE
195 case ',':
196 tok = token.COMMA
197 case '=':
198 tok = token.ASSIGN
199 case '+':
200 tok = token.ADD
201 case '-':
202 if isDecimal(s.peek()) {
203 ch := s.next()
204 tok = s.scanNumber(ch)
205 } else {
206 tok = token.SUB
207 }
208 default:
209 s.err("illegal char")
210 }
211 }
212
213 // finish token ending
214 s.tokEnd = s.srcPos.Offset
215
216 // create token literal
217 var tokenText string
218 if s.tokStart >= 0 {
219 tokenText = string(s.src[s.tokStart:s.tokEnd])
220 }
221 s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
222
223 return token.Token{
224 Type: tok,
225 Pos: s.tokPos,
226 Text: tokenText,
227 }
228 }
229
230 func (s *Scanner) scanComment(ch rune) {
231 // single line comments
232 if ch == '#' || (ch == '/' && s.peek() != '*') {
233 if ch == '/' && s.peek() != '/' {
234 s.err("expected '/' for comment")
235 return
236 }
237
238 ch = s.next()
239 for ch != '\n' && ch >= 0 && ch != eof {
240 ch = s.next()
241 }
242 if ch != eof && ch >= 0 {
243 s.unread()
244 }
245 return
246 }
247
248 // be sure we get the character after /* This allows us to find comment's
249 // that are not erminated
250 if ch == '/' {
251 s.next()
252 ch = s.next() // read character after "/*"
253 }
254
255 // look for /* - style comments
256 for {
257 if ch < 0 || ch == eof {
258 s.err("comment not terminated")
259 break
260 }
261
262 ch0 := ch
263 ch = s.next()
264 if ch0 == '*' && ch == '/' {
265 break
266 }
267 }
268 }
269
270 // scanNumber scans a HCL number definition starting with the given rune
271 func (s *Scanner) scanNumber(ch rune) token.Type {
272 if ch == '0' {
273 // check for hexadecimal, octal or float
274 ch = s.next()
275 if ch == 'x' || ch == 'X' {
276 // hexadecimal
277 ch = s.next()
278 found := false
279 for isHexadecimal(ch) {
280 ch = s.next()
281 found = true
282 }
283
284 if !found {
285 s.err("illegal hexadecimal number")
286 }
287
288 if ch != eof {
289 s.unread()
290 }
291
292 return token.NUMBER
293 }
294
295 // now it's either something like: 0421(octal) or 0.1231(float)
296 illegalOctal := false
297 for isDecimal(ch) {
298 ch = s.next()
299 if ch == '8' || ch == '9' {
300 // this is just a possibility. For example 0159 is illegal, but
301 // 0159.23 is valid. So we mark a possible illegal octal. If
302 // the next character is not a period, we'll print the error.
303 illegalOctal = true
304 }
305 }
306
307 if ch == 'e' || ch == 'E' {
308 ch = s.scanExponent(ch)
309 return token.FLOAT
310 }
311
312 if ch == '.' {
313 ch = s.scanFraction(ch)
314
315 if ch == 'e' || ch == 'E' {
316 ch = s.next()
317 ch = s.scanExponent(ch)
318 }
319 return token.FLOAT
320 }
321
322 if illegalOctal {
323 s.err("illegal octal number")
324 }
325
326 if ch != eof {
327 s.unread()
328 }
329 return token.NUMBER
330 }
331
332 s.scanMantissa(ch)
333 ch = s.next() // seek forward
334 if ch == 'e' || ch == 'E' {
335 ch = s.scanExponent(ch)
336 return token.FLOAT
337 }
338
339 if ch == '.' {
340 ch = s.scanFraction(ch)
341 if ch == 'e' || ch == 'E' {
342 ch = s.next()
343 ch = s.scanExponent(ch)
344 }
345 return token.FLOAT
346 }
347
348 if ch != eof {
349 s.unread()
350 }
351 return token.NUMBER
352 }
353
354 // scanMantissa scans the mantissa begining from the rune. It returns the next
355 // non decimal rune. It's used to determine wheter it's a fraction or exponent.
356 func (s *Scanner) scanMantissa(ch rune) rune {
357 scanned := false
358 for isDecimal(ch) {
359 ch = s.next()
360 scanned = true
361 }
362
363 if scanned && ch != eof {
364 s.unread()
365 }
366 return ch
367 }
368
369 // scanFraction scans the fraction after the '.' rune
370 func (s *Scanner) scanFraction(ch rune) rune {
371 if ch == '.' {
372 ch = s.peek() // we peek just to see if we can move forward
373 ch = s.scanMantissa(ch)
374 }
375 return ch
376 }
377
378 // scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
379 // rune.
380 func (s *Scanner) scanExponent(ch rune) rune {
381 if ch == 'e' || ch == 'E' {
382 ch = s.next()
383 if ch == '-' || ch == '+' {
384 ch = s.next()
385 }
386 ch = s.scanMantissa(ch)
387 }
388 return ch
389 }
390
391 // scanHeredoc scans a heredoc string
392 func (s *Scanner) scanHeredoc() {
393 // Scan the second '<' in example: '<<EOF'
394 if s.next() != '<' {
395 s.err("heredoc expected second '<', didn't see it")
396 return
397 }
398
399 // Get the original offset so we can read just the heredoc ident
400 offs := s.srcPos.Offset
401
402 // Scan the identifier
403 ch := s.next()
404
405 // Indented heredoc syntax
406 if ch == '-' {
407 ch = s.next()
408 }
409
410 for isLetter(ch) || isDigit(ch) {
411 ch = s.next()
412 }
413
414 // If we reached an EOF then that is not good
415 if ch == eof {
416 s.err("heredoc not terminated")
417 return
418 }
419
420 // Ignore the '\r' in Windows line endings
421 if ch == '\r' {
422 if s.peek() == '\n' {
423 ch = s.next()
424 }
425 }
426
427 // If we didn't reach a newline then that is also not good
428 if ch != '\n' {
429 s.err("invalid characters in heredoc anchor")
430 return
431 }
432
433 // Read the identifier
434 identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
435 if len(identBytes) == 0 {
436 s.err("zero-length heredoc anchor")
437 return
438 }
439
440 var identRegexp *regexp.Regexp
441 if identBytes[0] == '-' {
442 identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes[1:]))
443 } else {
444 identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes))
445 }
446
447 // Read the actual string value
448 lineStart := s.srcPos.Offset
449 for {
450 ch := s.next()
451
452 // Special newline handling.
453 if ch == '\n' {
454 // Math is fast, so we first compare the byte counts to see if we have a chance
455 // of seeing the same identifier - if the length is less than the number of bytes
456 // in the identifier, this cannot be a valid terminator.
457 lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
458 if lineBytesLen >= len(identBytes) && identRegexp.Match(s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
459 break
460 }
461
462 // Not an anchor match, record the start of a new line
463 lineStart = s.srcPos.Offset
464 }
465
466 if ch == eof {
467 s.err("heredoc not terminated")
468 return
469 }
470 }
471
472 return
473 }
474
475 // scanString scans a quoted string
476 func (s *Scanner) scanString() {
477 braces := 0
478 for {
479 // '"' opening already consumed
480 // read character after quote
481 ch := s.next()
482
483 if (ch == '\n' && braces == 0) || ch < 0 || ch == eof {
484 s.err("literal not terminated")
485 return
486 }
487
488 if ch == '"' && braces == 0 {
489 break
490 }
491
492 // If we're going into a ${} then we can ignore quotes for awhile
493 if braces == 0 && ch == '$' && s.peek() == '{' {
494 braces++
495 s.next()
496 } else if braces > 0 && ch == '{' {
497 braces++
498 }
499 if braces > 0 && ch == '}' {
500 braces--
501 }
502
503 if ch == '\\' {
504 s.scanEscape()
505 }
506 }
507
508 return
509 }
510
511 // scanEscape scans an escape sequence
512 func (s *Scanner) scanEscape() rune {
513 // http://en.cppreference.com/w/cpp/language/escape
514 ch := s.next() // read character after '/'
515 switch ch {
516 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
517 // nothing to do
518 case '0', '1', '2', '3', '4', '5', '6', '7':
519 // octal notation
520 ch = s.scanDigits(ch, 8, 3)
521 case 'x':
522 // hexademical notation
523 ch = s.scanDigits(s.next(), 16, 2)
524 case 'u':
525 // universal character name
526 ch = s.scanDigits(s.next(), 16, 4)
527 case 'U':
528 // universal character name
529 ch = s.scanDigits(s.next(), 16, 8)
530 default:
531 s.err("illegal char escape")
532 }
533 return ch
534 }
535
536 // scanDigits scans a rune with the given base for n times. For example an
537 // octal notation \184 would yield in scanDigits(ch, 8, 3)
538 func (s *Scanner) scanDigits(ch rune, base, n int) rune {
539 start := n
540 for n > 0 && digitVal(ch) < base {
541 ch = s.next()
542 if ch == eof {
543 // If we see an EOF, we halt any more scanning of digits
544 // immediately.
545 break
546 }
547
548 n--
549 }
550 if n > 0 {
551 s.err("illegal char escape")
552 }
553
554 if n != start {
555 // we scanned all digits, put the last non digit char back,
556 // only if we read anything at all
557 s.unread()
558 }
559
560 return ch
561 }
562
563 // scanIdentifier scans an identifier and returns the literal string
564 func (s *Scanner) scanIdentifier() string {
565 offs := s.srcPos.Offset - s.lastCharLen
566 ch := s.next()
567 for isLetter(ch) || isDigit(ch) || ch == '-' || ch == '.' {
568 ch = s.next()
569 }
570
571 if ch != eof {
572 s.unread() // we got identifier, put back latest char
573 }
574
575 return string(s.src[offs:s.srcPos.Offset])
576 }
577
578 // recentPosition returns the position of the character immediately after the
579 // character or token returned by the last call to Scan.
580 func (s *Scanner) recentPosition() (pos token.Pos) {
581 pos.Offset = s.srcPos.Offset - s.lastCharLen
582 switch {
583 case s.srcPos.Column > 0:
584 // common case: last character was not a '\n'
585 pos.Line = s.srcPos.Line
586 pos.Column = s.srcPos.Column
587 case s.lastLineLen > 0:
588 // last character was a '\n'
589 // (we cannot be at the beginning of the source
590 // since we have called next() at least once)
591 pos.Line = s.srcPos.Line - 1
592 pos.Column = s.lastLineLen
593 default:
594 // at the beginning of the source
595 pos.Line = 1
596 pos.Column = 1
597 }
598 return
599 }
600
601 // err prints the error of any scanning to s.Error function. If the function is
602 // not defined, by default it prints them to os.Stderr
603 func (s *Scanner) err(msg string) {
604 s.ErrorCount++
605 pos := s.recentPosition()
606
607 if s.Error != nil {
608 s.Error(pos, msg)
609 return
610 }
611
612 fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
613 }
614
615 // isHexadecimal returns true if the given rune is a letter
616 func isLetter(ch rune) bool {
617 return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
618 }
619
620 // isDigit returns true if the given rune is a decimal digit
621 func isDigit(ch rune) bool {
622 return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
623 }
624
625 // isDecimal returns true if the given rune is a decimal number
626 func isDecimal(ch rune) bool {
627 return '0' <= ch && ch <= '9'
628 }
629
630 // isHexadecimal returns true if the given rune is an hexadecimal number
631 func isHexadecimal(ch rune) bool {
632 return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
633 }
634
635 // isWhitespace returns true if the rune is a space, tab, newline or carriage return
636 func isWhitespace(ch rune) bool {
637 return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
638 }
639
640 // digitVal returns the integer value of a given octal,decimal or hexadecimal rune
641 func digitVal(ch rune) int {
642 switch {
643 case '0' <= ch && ch <= '9':
644 return int(ch - '0')
645 case 'a' <= ch && ch <= 'f':
646 return int(ch - 'a' + 10)
647 case 'A' <= ch && ch <= 'F':
648 return int(ch - 'A' + 10)
649 }
650 return 16 // larger than any legal digit val
651 }