10 "github.com/hashicorp/hcl/json/token"
13 // eof represents a marker rune for the end of the reader.
16 // Scanner defines a lexical scanner
18 buf *bytes.Buffer // Source buffer for advancing and scanning
19 src []byte // Source buffer for immutable access
22 srcPos token.Pos // current position
23 prevPos token.Pos // previous position, used for peek() method
25 lastCharLen int // length of last character in bytes
26 lastLineLen int // length of last line in characters (for correct column reporting)
28 tokStart int // token text start position
29 tokEnd int // token text end position
31 // Error is called for each error encountered. If no Error
32 // function is set, the error is reported to os.Stderr.
33 Error func(pos token.Pos, msg string)
35 // ErrorCount is incremented by one for each error encountered.
38 // tokPos is the start position of most recently scanned token; set by
39 // Scan. The Filename field is always left untouched by the Scanner. If
40 // an error is reported (via Error) and Position is invalid, the scanner is
41 // not inside a token.
45 // New creates and initializes a new instance of Scanner using src as
46 // its source content.
47 func New(src []byte) *Scanner {
48 // even though we accept a src, we read from a io.Reader compatible type
49 // (*bytes.Buffer). So in the future we might easily change it to streaming
51 b := bytes.NewBuffer(src)
57 // srcPosition always starts with 1
62 // next reads the next rune from the bufferred reader. Returns the rune(0) if
63 // an error occurs (or io.EOF is returned).
64 func (s *Scanner) next() rune {
65 ch, size, err := s.buf.ReadRune()
67 // advance for error reporting
69 s.srcPos.Offset += size
74 if ch == utf8.RuneError && size == 1 {
76 s.srcPos.Offset += size
78 s.err("illegal UTF-8 encoding")
82 // remember last position
87 s.srcPos.Offset += size
91 s.lastLineLen = s.srcPos.Column
96 // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
100 // unread unreads the previous read Rune and updates the source position
101 func (s *Scanner) unread() {
102 if err := s.buf.UnreadRune(); err != nil {
103 panic(err) // this is user fault, we should catch it
105 s.srcPos = s.prevPos // put back last position
108 // peek returns the next rune without advancing the reader.
109 func (s *Scanner) peek() rune {
110 peek, _, err := s.buf.ReadRune()
119 // Scan scans the next token and returns the token.
120 func (s *Scanner) Scan() token.Token {
124 for isWhitespace(ch) {
130 // token text markings
131 s.tokStart = s.srcPos.Offset - s.lastCharLen
133 // token position, initial next() is moving the offset by one(size of rune
134 // actually), though we are interested with the starting point
135 s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
136 if s.srcPos.Column > 0 {
137 // common case: last character was not a '\n'
138 s.tokPos.Line = s.srcPos.Line
139 s.tokPos.Column = s.srcPos.Column
141 // last character was a '\n'
142 // (we cannot be at the beginning of the source
143 // since we have called next() at least once)
144 s.tokPos.Line = s.srcPos.Line - 1
145 s.tokPos.Column = s.lastLineLen
150 lit := s.scanIdentifier()
151 if lit == "true" || lit == "false" {
153 } else if lit == "null" {
156 s.err("illegal char")
159 tok = s.scanNumber(ch)
172 ch = s.scanMantissa(ch)
173 ch = s.scanExponent(ch)
188 if isDecimal(s.peek()) {
190 tok = s.scanNumber(ch)
192 s.err("illegal char")
195 s.err("illegal char: " + string(ch))
199 // finish token ending
200 s.tokEnd = s.srcPos.Offset
202 // create token literal
205 tokenText = string(s.src[s.tokStart:s.tokEnd])
207 s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
216 // scanNumber scans a HCL number definition starting with the given rune
217 func (s *Scanner) scanNumber(ch rune) token.Type {
222 ch = s.next() // seek forward
223 if ch == 'e' || ch == 'E' {
224 ch = s.scanExponent(ch)
229 ch = s.scanFraction(ch)
230 if ch == 'e' || ch == 'E' {
232 ch = s.scanExponent(ch)
241 // If we have a larger number and this is zero, error
242 if zero && pos != s.srcPos {
243 s.err("numbers cannot start with 0")
249 // scanMantissa scans the mantissa begining from the rune. It returns the next
250 // non decimal rune. It's used to determine wheter it's a fraction or exponent.
251 func (s *Scanner) scanMantissa(ch rune) rune {
258 if scanned && ch != eof {
264 // scanFraction scans the fraction after the '.' rune
265 func (s *Scanner) scanFraction(ch rune) rune {
267 ch = s.peek() // we peek just to see if we can move forward
268 ch = s.scanMantissa(ch)
273 // scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
275 func (s *Scanner) scanExponent(ch rune) rune {
276 if ch == 'e' || ch == 'E' {
278 if ch == '-' || ch == '+' {
281 ch = s.scanMantissa(ch)
286 // scanString scans a quoted string
287 func (s *Scanner) scanString() {
290 // '"' opening already consumed
291 // read character after quote
294 if ch == '\n' || ch < 0 || ch == eof {
295 s.err("literal not terminated")
303 // If we're going into a ${} then we can ignore quotes for awhile
304 if braces == 0 && ch == '$' && s.peek() == '{' {
307 } else if braces > 0 && ch == '{' {
310 if braces > 0 && ch == '}' {
322 // scanEscape scans an escape sequence
323 func (s *Scanner) scanEscape() rune {
324 // http://en.cppreference.com/w/cpp/language/escape
325 ch := s.next() // read character after '/'
327 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
329 case '0', '1', '2', '3', '4', '5', '6', '7':
331 ch = s.scanDigits(ch, 8, 3)
333 // hexademical notation
334 ch = s.scanDigits(s.next(), 16, 2)
336 // universal character name
337 ch = s.scanDigits(s.next(), 16, 4)
339 // universal character name
340 ch = s.scanDigits(s.next(), 16, 8)
342 s.err("illegal char escape")
347 // scanDigits scans a rune with the given base for n times. For example an
348 // octal notation \184 would yield in scanDigits(ch, 8, 3)
349 func (s *Scanner) scanDigits(ch rune, base, n int) rune {
350 for n > 0 && digitVal(ch) < base {
355 s.err("illegal char escape")
358 // we scanned all digits, put the last non digit char back
363 // scanIdentifier scans an identifier and returns the literal string
364 func (s *Scanner) scanIdentifier() string {
365 offs := s.srcPos.Offset - s.lastCharLen
367 for isLetter(ch) || isDigit(ch) || ch == '-' {
372 s.unread() // we got identifier, put back latest char
375 return string(s.src[offs:s.srcPos.Offset])
378 // recentPosition returns the position of the character immediately after the
379 // character or token returned by the last call to Scan.
380 func (s *Scanner) recentPosition() (pos token.Pos) {
381 pos.Offset = s.srcPos.Offset - s.lastCharLen
383 case s.srcPos.Column > 0:
384 // common case: last character was not a '\n'
385 pos.Line = s.srcPos.Line
386 pos.Column = s.srcPos.Column
387 case s.lastLineLen > 0:
388 // last character was a '\n'
389 // (we cannot be at the beginning of the source
390 // since we have called next() at least once)
391 pos.Line = s.srcPos.Line - 1
392 pos.Column = s.lastLineLen
394 // at the beginning of the source
401 // err prints the error of any scanning to s.Error function. If the function is
402 // not defined, by default it prints them to os.Stderr
403 func (s *Scanner) err(msg string) {
405 pos := s.recentPosition()
412 fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
415 // isHexadecimal returns true if the given rune is a letter
416 func isLetter(ch rune) bool {
417 return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
420 // isHexadecimal returns true if the given rune is a decimal digit
421 func isDigit(ch rune) bool {
422 return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
425 // isHexadecimal returns true if the given rune is a decimal number
426 func isDecimal(ch rune) bool {
427 return '0' <= ch && ch <= '9'
430 // isHexadecimal returns true if the given rune is an hexadecimal number
431 func isHexadecimal(ch rune) bool {
432 return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
435 // isWhitespace returns true if the rune is a space, tab, newline or carriage return
436 func isWhitespace(ch rune) bool {
437 return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
440 // digitVal returns the integer value of a given octal,decimal or hexadecimal rune
441 func digitVal(ch rune) int {
443 case '0' <= ch && ch <= '9':
445 case 'a' <= ch && ch <= 'f':
446 return int(ch - 'a' + 10)
447 case 'A' <= ch && ch <= 'F':
448 return int(ch - 'A' + 10)
450 return 16 // larger than any legal digit val