]> git.immae.eu Git - github/fretlink/terraform-provider-statuscake.git/blob - vendor/github.com/hashicorp/hcl/json/scanner/scanner.go
Initial transfer of provider code
[github/fretlink/terraform-provider-statuscake.git] / vendor / github.com / hashicorp / hcl / json / scanner / scanner.go
1 package scanner
2
3 import (
4 "bytes"
5 "fmt"
6 "os"
7 "unicode"
8 "unicode/utf8"
9
10 "github.com/hashicorp/hcl/json/token"
11 )
12
13 // eof represents a marker rune for the end of the reader.
14 const eof = rune(0)
15
16 // Scanner defines a lexical scanner
17 type Scanner struct {
18 buf *bytes.Buffer // Source buffer for advancing and scanning
19 src []byte // Source buffer for immutable access
20
21 // Source Position
22 srcPos token.Pos // current position
23 prevPos token.Pos // previous position, used for peek() method
24
25 lastCharLen int // length of last character in bytes
26 lastLineLen int // length of last line in characters (for correct column reporting)
27
28 tokStart int // token text start position
29 tokEnd int // token text end position
30
31 // Error is called for each error encountered. If no Error
32 // function is set, the error is reported to os.Stderr.
33 Error func(pos token.Pos, msg string)
34
35 // ErrorCount is incremented by one for each error encountered.
36 ErrorCount int
37
38 // tokPos is the start position of most recently scanned token; set by
39 // Scan. The Filename field is always left untouched by the Scanner. If
40 // an error is reported (via Error) and Position is invalid, the scanner is
41 // not inside a token.
42 tokPos token.Pos
43 }
44
45 // New creates and initializes a new instance of Scanner using src as
46 // its source content.
47 func New(src []byte) *Scanner {
48 // even though we accept a src, we read from a io.Reader compatible type
49 // (*bytes.Buffer). So in the future we might easily change it to streaming
50 // read.
51 b := bytes.NewBuffer(src)
52 s := &Scanner{
53 buf: b,
54 src: src,
55 }
56
57 // srcPosition always starts with 1
58 s.srcPos.Line = 1
59 return s
60 }
61
62 // next reads the next rune from the bufferred reader. Returns the rune(0) if
63 // an error occurs (or io.EOF is returned).
64 func (s *Scanner) next() rune {
65 ch, size, err := s.buf.ReadRune()
66 if err != nil {
67 // advance for error reporting
68 s.srcPos.Column++
69 s.srcPos.Offset += size
70 s.lastCharLen = size
71 return eof
72 }
73
74 if ch == utf8.RuneError && size == 1 {
75 s.srcPos.Column++
76 s.srcPos.Offset += size
77 s.lastCharLen = size
78 s.err("illegal UTF-8 encoding")
79 return ch
80 }
81
82 // remember last position
83 s.prevPos = s.srcPos
84
85 s.srcPos.Column++
86 s.lastCharLen = size
87 s.srcPos.Offset += size
88
89 if ch == '\n' {
90 s.srcPos.Line++
91 s.lastLineLen = s.srcPos.Column
92 s.srcPos.Column = 0
93 }
94
95 // debug
96 // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
97 return ch
98 }
99
100 // unread unreads the previous read Rune and updates the source position
101 func (s *Scanner) unread() {
102 if err := s.buf.UnreadRune(); err != nil {
103 panic(err) // this is user fault, we should catch it
104 }
105 s.srcPos = s.prevPos // put back last position
106 }
107
108 // peek returns the next rune without advancing the reader.
109 func (s *Scanner) peek() rune {
110 peek, _, err := s.buf.ReadRune()
111 if err != nil {
112 return eof
113 }
114
115 s.buf.UnreadRune()
116 return peek
117 }
118
119 // Scan scans the next token and returns the token.
120 func (s *Scanner) Scan() token.Token {
121 ch := s.next()
122
123 // skip white space
124 for isWhitespace(ch) {
125 ch = s.next()
126 }
127
128 var tok token.Type
129
130 // token text markings
131 s.tokStart = s.srcPos.Offset - s.lastCharLen
132
133 // token position, initial next() is moving the offset by one(size of rune
134 // actually), though we are interested with the starting point
135 s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
136 if s.srcPos.Column > 0 {
137 // common case: last character was not a '\n'
138 s.tokPos.Line = s.srcPos.Line
139 s.tokPos.Column = s.srcPos.Column
140 } else {
141 // last character was a '\n'
142 // (we cannot be at the beginning of the source
143 // since we have called next() at least once)
144 s.tokPos.Line = s.srcPos.Line - 1
145 s.tokPos.Column = s.lastLineLen
146 }
147
148 switch {
149 case isLetter(ch):
150 lit := s.scanIdentifier()
151 if lit == "true" || lit == "false" {
152 tok = token.BOOL
153 } else if lit == "null" {
154 tok = token.NULL
155 } else {
156 s.err("illegal char")
157 }
158 case isDecimal(ch):
159 tok = s.scanNumber(ch)
160 default:
161 switch ch {
162 case eof:
163 tok = token.EOF
164 case '"':
165 tok = token.STRING
166 s.scanString()
167 case '.':
168 tok = token.PERIOD
169 ch = s.peek()
170 if isDecimal(ch) {
171 tok = token.FLOAT
172 ch = s.scanMantissa(ch)
173 ch = s.scanExponent(ch)
174 }
175 case '[':
176 tok = token.LBRACK
177 case ']':
178 tok = token.RBRACK
179 case '{':
180 tok = token.LBRACE
181 case '}':
182 tok = token.RBRACE
183 case ',':
184 tok = token.COMMA
185 case ':':
186 tok = token.COLON
187 case '-':
188 if isDecimal(s.peek()) {
189 ch := s.next()
190 tok = s.scanNumber(ch)
191 } else {
192 s.err("illegal char")
193 }
194 default:
195 s.err("illegal char: " + string(ch))
196 }
197 }
198
199 // finish token ending
200 s.tokEnd = s.srcPos.Offset
201
202 // create token literal
203 var tokenText string
204 if s.tokStart >= 0 {
205 tokenText = string(s.src[s.tokStart:s.tokEnd])
206 }
207 s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
208
209 return token.Token{
210 Type: tok,
211 Pos: s.tokPos,
212 Text: tokenText,
213 }
214 }
215
216 // scanNumber scans a HCL number definition starting with the given rune
217 func (s *Scanner) scanNumber(ch rune) token.Type {
218 zero := ch == '0'
219 pos := s.srcPos
220
221 s.scanMantissa(ch)
222 ch = s.next() // seek forward
223 if ch == 'e' || ch == 'E' {
224 ch = s.scanExponent(ch)
225 return token.FLOAT
226 }
227
228 if ch == '.' {
229 ch = s.scanFraction(ch)
230 if ch == 'e' || ch == 'E' {
231 ch = s.next()
232 ch = s.scanExponent(ch)
233 }
234 return token.FLOAT
235 }
236
237 if ch != eof {
238 s.unread()
239 }
240
241 // If we have a larger number and this is zero, error
242 if zero && pos != s.srcPos {
243 s.err("numbers cannot start with 0")
244 }
245
246 return token.NUMBER
247 }
248
249 // scanMantissa scans the mantissa begining from the rune. It returns the next
250 // non decimal rune. It's used to determine wheter it's a fraction or exponent.
251 func (s *Scanner) scanMantissa(ch rune) rune {
252 scanned := false
253 for isDecimal(ch) {
254 ch = s.next()
255 scanned = true
256 }
257
258 if scanned && ch != eof {
259 s.unread()
260 }
261 return ch
262 }
263
264 // scanFraction scans the fraction after the '.' rune
265 func (s *Scanner) scanFraction(ch rune) rune {
266 if ch == '.' {
267 ch = s.peek() // we peek just to see if we can move forward
268 ch = s.scanMantissa(ch)
269 }
270 return ch
271 }
272
273 // scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
274 // rune.
275 func (s *Scanner) scanExponent(ch rune) rune {
276 if ch == 'e' || ch == 'E' {
277 ch = s.next()
278 if ch == '-' || ch == '+' {
279 ch = s.next()
280 }
281 ch = s.scanMantissa(ch)
282 }
283 return ch
284 }
285
286 // scanString scans a quoted string
287 func (s *Scanner) scanString() {
288 braces := 0
289 for {
290 // '"' opening already consumed
291 // read character after quote
292 ch := s.next()
293
294 if ch == '\n' || ch < 0 || ch == eof {
295 s.err("literal not terminated")
296 return
297 }
298
299 if ch == '"' {
300 break
301 }
302
303 // If we're going into a ${} then we can ignore quotes for awhile
304 if braces == 0 && ch == '$' && s.peek() == '{' {
305 braces++
306 s.next()
307 } else if braces > 0 && ch == '{' {
308 braces++
309 }
310 if braces > 0 && ch == '}' {
311 braces--
312 }
313
314 if ch == '\\' {
315 s.scanEscape()
316 }
317 }
318
319 return
320 }
321
322 // scanEscape scans an escape sequence
323 func (s *Scanner) scanEscape() rune {
324 // http://en.cppreference.com/w/cpp/language/escape
325 ch := s.next() // read character after '/'
326 switch ch {
327 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
328 // nothing to do
329 case '0', '1', '2', '3', '4', '5', '6', '7':
330 // octal notation
331 ch = s.scanDigits(ch, 8, 3)
332 case 'x':
333 // hexademical notation
334 ch = s.scanDigits(s.next(), 16, 2)
335 case 'u':
336 // universal character name
337 ch = s.scanDigits(s.next(), 16, 4)
338 case 'U':
339 // universal character name
340 ch = s.scanDigits(s.next(), 16, 8)
341 default:
342 s.err("illegal char escape")
343 }
344 return ch
345 }
346
347 // scanDigits scans a rune with the given base for n times. For example an
348 // octal notation \184 would yield in scanDigits(ch, 8, 3)
349 func (s *Scanner) scanDigits(ch rune, base, n int) rune {
350 for n > 0 && digitVal(ch) < base {
351 ch = s.next()
352 n--
353 }
354 if n > 0 {
355 s.err("illegal char escape")
356 }
357
358 // we scanned all digits, put the last non digit char back
359 s.unread()
360 return ch
361 }
362
363 // scanIdentifier scans an identifier and returns the literal string
364 func (s *Scanner) scanIdentifier() string {
365 offs := s.srcPos.Offset - s.lastCharLen
366 ch := s.next()
367 for isLetter(ch) || isDigit(ch) || ch == '-' {
368 ch = s.next()
369 }
370
371 if ch != eof {
372 s.unread() // we got identifier, put back latest char
373 }
374
375 return string(s.src[offs:s.srcPos.Offset])
376 }
377
378 // recentPosition returns the position of the character immediately after the
379 // character or token returned by the last call to Scan.
380 func (s *Scanner) recentPosition() (pos token.Pos) {
381 pos.Offset = s.srcPos.Offset - s.lastCharLen
382 switch {
383 case s.srcPos.Column > 0:
384 // common case: last character was not a '\n'
385 pos.Line = s.srcPos.Line
386 pos.Column = s.srcPos.Column
387 case s.lastLineLen > 0:
388 // last character was a '\n'
389 // (we cannot be at the beginning of the source
390 // since we have called next() at least once)
391 pos.Line = s.srcPos.Line - 1
392 pos.Column = s.lastLineLen
393 default:
394 // at the beginning of the source
395 pos.Line = 1
396 pos.Column = 1
397 }
398 return
399 }
400
401 // err prints the error of any scanning to s.Error function. If the function is
402 // not defined, by default it prints them to os.Stderr
403 func (s *Scanner) err(msg string) {
404 s.ErrorCount++
405 pos := s.recentPosition()
406
407 if s.Error != nil {
408 s.Error(pos, msg)
409 return
410 }
411
412 fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
413 }
414
415 // isHexadecimal returns true if the given rune is a letter
416 func isLetter(ch rune) bool {
417 return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
418 }
419
420 // isHexadecimal returns true if the given rune is a decimal digit
421 func isDigit(ch rune) bool {
422 return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
423 }
424
425 // isHexadecimal returns true if the given rune is a decimal number
426 func isDecimal(ch rune) bool {
427 return '0' <= ch && ch <= '9'
428 }
429
430 // isHexadecimal returns true if the given rune is an hexadecimal number
431 func isHexadecimal(ch rune) bool {
432 return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
433 }
434
435 // isWhitespace returns true if the rune is a space, tab, newline or carriage return
436 func isWhitespace(ch rune) bool {
437 return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
438 }
439
440 // digitVal returns the integer value of a given octal,decimal or hexadecimal rune
441 func digitVal(ch rune) int {
442 switch {
443 case '0' <= ch && ch <= '9':
444 return int(ch - '0')
445 case 'a' <= ch && ch <= 'f':
446 return int(ch - 'a' + 10)
447 case 'A' <= ch && ch <= 'F':
448 return int(ch - 'A' + 10)
449 }
450 return 16 // larger than any legal digit val
451 }