diff options
Diffstat (limited to 'vendor/github.com/hashicorp/hcl/json/scanner/scanner.go')
-rw-r--r-- | vendor/github.com/hashicorp/hcl/json/scanner/scanner.go | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hcl/json/scanner/scanner.go b/vendor/github.com/hashicorp/hcl/json/scanner/scanner.go new file mode 100644 index 0000000..dd5c72b --- /dev/null +++ b/vendor/github.com/hashicorp/hcl/json/scanner/scanner.go | |||
@@ -0,0 +1,451 @@ | |||
1 | package scanner | ||
2 | |||
3 | import ( | ||
4 | "bytes" | ||
5 | "fmt" | ||
6 | "os" | ||
7 | "unicode" | ||
8 | "unicode/utf8" | ||
9 | |||
10 | "github.com/hashicorp/hcl/json/token" | ||
11 | ) | ||
12 | |||
13 | // eof represents a marker rune for the end of the reader. | ||
14 | const eof = rune(0) | ||
15 | |||
16 | // Scanner defines a lexical scanner | ||
17 | type Scanner struct { | ||
18 | buf *bytes.Buffer // Source buffer for advancing and scanning | ||
19 | src []byte // Source buffer for immutable access | ||
20 | |||
21 | // Source Position | ||
22 | srcPos token.Pos // current position | ||
23 | prevPos token.Pos // previous position, used for peek() method | ||
24 | |||
25 | lastCharLen int // length of last character in bytes | ||
26 | lastLineLen int // length of last line in characters (for correct column reporting) | ||
27 | |||
28 | tokStart int // token text start position | ||
29 | tokEnd int // token text end position | ||
30 | |||
31 | // Error is called for each error encountered. If no Error | ||
32 | // function is set, the error is reported to os.Stderr. | ||
33 | Error func(pos token.Pos, msg string) | ||
34 | |||
35 | // ErrorCount is incremented by one for each error encountered. | ||
36 | ErrorCount int | ||
37 | |||
38 | // tokPos is the start position of most recently scanned token; set by | ||
39 | // Scan. The Filename field is always left untouched by the Scanner. If | ||
40 | // an error is reported (via Error) and Position is invalid, the scanner is | ||
41 | // not inside a token. | ||
42 | tokPos token.Pos | ||
43 | } | ||
44 | |||
45 | // New creates and initializes a new instance of Scanner using src as | ||
46 | // its source content. | ||
47 | func New(src []byte) *Scanner { | ||
48 | // even though we accept a src, we read from a io.Reader compatible type | ||
49 | // (*bytes.Buffer). So in the future we might easily change it to streaming | ||
50 | // read. | ||
51 | b := bytes.NewBuffer(src) | ||
52 | s := &Scanner{ | ||
53 | buf: b, | ||
54 | src: src, | ||
55 | } | ||
56 | |||
57 | // srcPosition always starts with 1 | ||
58 | s.srcPos.Line = 1 | ||
59 | return s | ||
60 | } | ||
61 | |||
62 | // next reads the next rune from the bufferred reader. Returns the rune(0) if | ||
63 | // an error occurs (or io.EOF is returned). | ||
64 | func (s *Scanner) next() rune { | ||
65 | ch, size, err := s.buf.ReadRune() | ||
66 | if err != nil { | ||
67 | // advance for error reporting | ||
68 | s.srcPos.Column++ | ||
69 | s.srcPos.Offset += size | ||
70 | s.lastCharLen = size | ||
71 | return eof | ||
72 | } | ||
73 | |||
74 | if ch == utf8.RuneError && size == 1 { | ||
75 | s.srcPos.Column++ | ||
76 | s.srcPos.Offset += size | ||
77 | s.lastCharLen = size | ||
78 | s.err("illegal UTF-8 encoding") | ||
79 | return ch | ||
80 | } | ||
81 | |||
82 | // remember last position | ||
83 | s.prevPos = s.srcPos | ||
84 | |||
85 | s.srcPos.Column++ | ||
86 | s.lastCharLen = size | ||
87 | s.srcPos.Offset += size | ||
88 | |||
89 | if ch == '\n' { | ||
90 | s.srcPos.Line++ | ||
91 | s.lastLineLen = s.srcPos.Column | ||
92 | s.srcPos.Column = 0 | ||
93 | } | ||
94 | |||
95 | // debug | ||
96 | // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column) | ||
97 | return ch | ||
98 | } | ||
99 | |||
100 | // unread unreads the previous read Rune and updates the source position | ||
101 | func (s *Scanner) unread() { | ||
102 | if err := s.buf.UnreadRune(); err != nil { | ||
103 | panic(err) // this is user fault, we should catch it | ||
104 | } | ||
105 | s.srcPos = s.prevPos // put back last position | ||
106 | } | ||
107 | |||
108 | // peek returns the next rune without advancing the reader. | ||
109 | func (s *Scanner) peek() rune { | ||
110 | peek, _, err := s.buf.ReadRune() | ||
111 | if err != nil { | ||
112 | return eof | ||
113 | } | ||
114 | |||
115 | s.buf.UnreadRune() | ||
116 | return peek | ||
117 | } | ||
118 | |||
119 | // Scan scans the next token and returns the token. | ||
120 | func (s *Scanner) Scan() token.Token { | ||
121 | ch := s.next() | ||
122 | |||
123 | // skip white space | ||
124 | for isWhitespace(ch) { | ||
125 | ch = s.next() | ||
126 | } | ||
127 | |||
128 | var tok token.Type | ||
129 | |||
130 | // token text markings | ||
131 | s.tokStart = s.srcPos.Offset - s.lastCharLen | ||
132 | |||
133 | // token position, initial next() is moving the offset by one(size of rune | ||
134 | // actually), though we are interested with the starting point | ||
135 | s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen | ||
136 | if s.srcPos.Column > 0 { | ||
137 | // common case: last character was not a '\n' | ||
138 | s.tokPos.Line = s.srcPos.Line | ||
139 | s.tokPos.Column = s.srcPos.Column | ||
140 | } else { | ||
141 | // last character was a '\n' | ||
142 | // (we cannot be at the beginning of the source | ||
143 | // since we have called next() at least once) | ||
144 | s.tokPos.Line = s.srcPos.Line - 1 | ||
145 | s.tokPos.Column = s.lastLineLen | ||
146 | } | ||
147 | |||
148 | switch { | ||
149 | case isLetter(ch): | ||
150 | lit := s.scanIdentifier() | ||
151 | if lit == "true" || lit == "false" { | ||
152 | tok = token.BOOL | ||
153 | } else if lit == "null" { | ||
154 | tok = token.NULL | ||
155 | } else { | ||
156 | s.err("illegal char") | ||
157 | } | ||
158 | case isDecimal(ch): | ||
159 | tok = s.scanNumber(ch) | ||
160 | default: | ||
161 | switch ch { | ||
162 | case eof: | ||
163 | tok = token.EOF | ||
164 | case '"': | ||
165 | tok = token.STRING | ||
166 | s.scanString() | ||
167 | case '.': | ||
168 | tok = token.PERIOD | ||
169 | ch = s.peek() | ||
170 | if isDecimal(ch) { | ||
171 | tok = token.FLOAT | ||
172 | ch = s.scanMantissa(ch) | ||
173 | ch = s.scanExponent(ch) | ||
174 | } | ||
175 | case '[': | ||
176 | tok = token.LBRACK | ||
177 | case ']': | ||
178 | tok = token.RBRACK | ||
179 | case '{': | ||
180 | tok = token.LBRACE | ||
181 | case '}': | ||
182 | tok = token.RBRACE | ||
183 | case ',': | ||
184 | tok = token.COMMA | ||
185 | case ':': | ||
186 | tok = token.COLON | ||
187 | case '-': | ||
188 | if isDecimal(s.peek()) { | ||
189 | ch := s.next() | ||
190 | tok = s.scanNumber(ch) | ||
191 | } else { | ||
192 | s.err("illegal char") | ||
193 | } | ||
194 | default: | ||
195 | s.err("illegal char: " + string(ch)) | ||
196 | } | ||
197 | } | ||
198 | |||
199 | // finish token ending | ||
200 | s.tokEnd = s.srcPos.Offset | ||
201 | |||
202 | // create token literal | ||
203 | var tokenText string | ||
204 | if s.tokStart >= 0 { | ||
205 | tokenText = string(s.src[s.tokStart:s.tokEnd]) | ||
206 | } | ||
207 | s.tokStart = s.tokEnd // ensure idempotency of tokenText() call | ||
208 | |||
209 | return token.Token{ | ||
210 | Type: tok, | ||
211 | Pos: s.tokPos, | ||
212 | Text: tokenText, | ||
213 | } | ||
214 | } | ||
215 | |||
216 | // scanNumber scans a HCL number definition starting with the given rune | ||
217 | func (s *Scanner) scanNumber(ch rune) token.Type { | ||
218 | zero := ch == '0' | ||
219 | pos := s.srcPos | ||
220 | |||
221 | s.scanMantissa(ch) | ||
222 | ch = s.next() // seek forward | ||
223 | if ch == 'e' || ch == 'E' { | ||
224 | ch = s.scanExponent(ch) | ||
225 | return token.FLOAT | ||
226 | } | ||
227 | |||
228 | if ch == '.' { | ||
229 | ch = s.scanFraction(ch) | ||
230 | if ch == 'e' || ch == 'E' { | ||
231 | ch = s.next() | ||
232 | ch = s.scanExponent(ch) | ||
233 | } | ||
234 | return token.FLOAT | ||
235 | } | ||
236 | |||
237 | if ch != eof { | ||
238 | s.unread() | ||
239 | } | ||
240 | |||
241 | // If we have a larger number and this is zero, error | ||
242 | if zero && pos != s.srcPos { | ||
243 | s.err("numbers cannot start with 0") | ||
244 | } | ||
245 | |||
246 | return token.NUMBER | ||
247 | } | ||
248 | |||
249 | // scanMantissa scans the mantissa begining from the rune. It returns the next | ||
250 | // non decimal rune. It's used to determine wheter it's a fraction or exponent. | ||
251 | func (s *Scanner) scanMantissa(ch rune) rune { | ||
252 | scanned := false | ||
253 | for isDecimal(ch) { | ||
254 | ch = s.next() | ||
255 | scanned = true | ||
256 | } | ||
257 | |||
258 | if scanned && ch != eof { | ||
259 | s.unread() | ||
260 | } | ||
261 | return ch | ||
262 | } | ||
263 | |||
264 | // scanFraction scans the fraction after the '.' rune | ||
265 | func (s *Scanner) scanFraction(ch rune) rune { | ||
266 | if ch == '.' { | ||
267 | ch = s.peek() // we peek just to see if we can move forward | ||
268 | ch = s.scanMantissa(ch) | ||
269 | } | ||
270 | return ch | ||
271 | } | ||
272 | |||
273 | // scanExponent scans the remaining parts of an exponent after the 'e' or 'E' | ||
274 | // rune. | ||
275 | func (s *Scanner) scanExponent(ch rune) rune { | ||
276 | if ch == 'e' || ch == 'E' { | ||
277 | ch = s.next() | ||
278 | if ch == '-' || ch == '+' { | ||
279 | ch = s.next() | ||
280 | } | ||
281 | ch = s.scanMantissa(ch) | ||
282 | } | ||
283 | return ch | ||
284 | } | ||
285 | |||
286 | // scanString scans a quoted string | ||
287 | func (s *Scanner) scanString() { | ||
288 | braces := 0 | ||
289 | for { | ||
290 | // '"' opening already consumed | ||
291 | // read character after quote | ||
292 | ch := s.next() | ||
293 | |||
294 | if ch == '\n' || ch < 0 || ch == eof { | ||
295 | s.err("literal not terminated") | ||
296 | return | ||
297 | } | ||
298 | |||
299 | if ch == '"' { | ||
300 | break | ||
301 | } | ||
302 | |||
303 | // If we're going into a ${} then we can ignore quotes for awhile | ||
304 | if braces == 0 && ch == '$' && s.peek() == '{' { | ||
305 | braces++ | ||
306 | s.next() | ||
307 | } else if braces > 0 && ch == '{' { | ||
308 | braces++ | ||
309 | } | ||
310 | if braces > 0 && ch == '}' { | ||
311 | braces-- | ||
312 | } | ||
313 | |||
314 | if ch == '\\' { | ||
315 | s.scanEscape() | ||
316 | } | ||
317 | } | ||
318 | |||
319 | return | ||
320 | } | ||
321 | |||
322 | // scanEscape scans an escape sequence | ||
323 | func (s *Scanner) scanEscape() rune { | ||
324 | // http://en.cppreference.com/w/cpp/language/escape | ||
325 | ch := s.next() // read character after '/' | ||
326 | switch ch { | ||
327 | case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"': | ||
328 | // nothing to do | ||
329 | case '0', '1', '2', '3', '4', '5', '6', '7': | ||
330 | // octal notation | ||
331 | ch = s.scanDigits(ch, 8, 3) | ||
332 | case 'x': | ||
333 | // hexademical notation | ||
334 | ch = s.scanDigits(s.next(), 16, 2) | ||
335 | case 'u': | ||
336 | // universal character name | ||
337 | ch = s.scanDigits(s.next(), 16, 4) | ||
338 | case 'U': | ||
339 | // universal character name | ||
340 | ch = s.scanDigits(s.next(), 16, 8) | ||
341 | default: | ||
342 | s.err("illegal char escape") | ||
343 | } | ||
344 | return ch | ||
345 | } | ||
346 | |||
347 | // scanDigits scans a rune with the given base for n times. For example an | ||
348 | // octal notation \184 would yield in scanDigits(ch, 8, 3) | ||
349 | func (s *Scanner) scanDigits(ch rune, base, n int) rune { | ||
350 | for n > 0 && digitVal(ch) < base { | ||
351 | ch = s.next() | ||
352 | n-- | ||
353 | } | ||
354 | if n > 0 { | ||
355 | s.err("illegal char escape") | ||
356 | } | ||
357 | |||
358 | // we scanned all digits, put the last non digit char back | ||
359 | s.unread() | ||
360 | return ch | ||
361 | } | ||
362 | |||
363 | // scanIdentifier scans an identifier and returns the literal string | ||
364 | func (s *Scanner) scanIdentifier() string { | ||
365 | offs := s.srcPos.Offset - s.lastCharLen | ||
366 | ch := s.next() | ||
367 | for isLetter(ch) || isDigit(ch) || ch == '-' { | ||
368 | ch = s.next() | ||
369 | } | ||
370 | |||
371 | if ch != eof { | ||
372 | s.unread() // we got identifier, put back latest char | ||
373 | } | ||
374 | |||
375 | return string(s.src[offs:s.srcPos.Offset]) | ||
376 | } | ||
377 | |||
378 | // recentPosition returns the position of the character immediately after the | ||
379 | // character or token returned by the last call to Scan. | ||
380 | func (s *Scanner) recentPosition() (pos token.Pos) { | ||
381 | pos.Offset = s.srcPos.Offset - s.lastCharLen | ||
382 | switch { | ||
383 | case s.srcPos.Column > 0: | ||
384 | // common case: last character was not a '\n' | ||
385 | pos.Line = s.srcPos.Line | ||
386 | pos.Column = s.srcPos.Column | ||
387 | case s.lastLineLen > 0: | ||
388 | // last character was a '\n' | ||
389 | // (we cannot be at the beginning of the source | ||
390 | // since we have called next() at least once) | ||
391 | pos.Line = s.srcPos.Line - 1 | ||
392 | pos.Column = s.lastLineLen | ||
393 | default: | ||
394 | // at the beginning of the source | ||
395 | pos.Line = 1 | ||
396 | pos.Column = 1 | ||
397 | } | ||
398 | return | ||
399 | } | ||
400 | |||
401 | // err prints the error of any scanning to s.Error function. If the function is | ||
402 | // not defined, by default it prints them to os.Stderr | ||
403 | func (s *Scanner) err(msg string) { | ||
404 | s.ErrorCount++ | ||
405 | pos := s.recentPosition() | ||
406 | |||
407 | if s.Error != nil { | ||
408 | s.Error(pos, msg) | ||
409 | return | ||
410 | } | ||
411 | |||
412 | fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg) | ||
413 | } | ||
414 | |||
415 | // isHexadecimal returns true if the given rune is a letter | ||
416 | func isLetter(ch rune) bool { | ||
417 | return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) | ||
418 | } | ||
419 | |||
420 | // isHexadecimal returns true if the given rune is a decimal digit | ||
421 | func isDigit(ch rune) bool { | ||
422 | return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) | ||
423 | } | ||
424 | |||
425 | // isHexadecimal returns true if the given rune is a decimal number | ||
426 | func isDecimal(ch rune) bool { | ||
427 | return '0' <= ch && ch <= '9' | ||
428 | } | ||
429 | |||
430 | // isHexadecimal returns true if the given rune is an hexadecimal number | ||
431 | func isHexadecimal(ch rune) bool { | ||
432 | return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' | ||
433 | } | ||
434 | |||
435 | // isWhitespace returns true if the rune is a space, tab, newline or carriage return | ||
436 | func isWhitespace(ch rune) bool { | ||
437 | return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' | ||
438 | } | ||
439 | |||
440 | // digitVal returns the integer value of a given octal,decimal or hexadecimal rune | ||
441 | func digitVal(ch rune) int { | ||
442 | switch { | ||
443 | case '0' <= ch && ch <= '9': | ||
444 | return int(ch - '0') | ||
445 | case 'a' <= ch && ch <= 'f': | ||
446 | return int(ch - 'a' + 10) | ||
447 | case 'A' <= ch && ch <= 'F': | ||
448 | return int(ch - 'A' + 10) | ||
449 | } | ||
450 | return 16 // larger than any legal digit val | ||
451 | } | ||