diff options
author | Jake Champlin <jake.champlin.27@gmail.com> | 2017-06-06 12:40:07 -0400 |
---|---|---|
committer | Jake Champlin <jake.champlin.27@gmail.com> | 2017-06-06 12:40:07 -0400 |
commit | bae9f6d2fd5eb5bc80929bd393932b23f14d7c93 (patch) | |
tree | ca9ab12a7d78b1fc27a8f734729081357ce6d252 /vendor/github.com/hashicorp/hil/scanner/scanner.go | |
parent | 254c495b6bebab3fb72a243c4bce858d79e6ee99 (diff) | |
download | terraform-provider-statuscake-bae9f6d2fd5eb5bc80929bd393932b23f14d7c93.tar.gz terraform-provider-statuscake-bae9f6d2fd5eb5bc80929bd393932b23f14d7c93.tar.zst terraform-provider-statuscake-bae9f6d2fd5eb5bc80929bd393932b23f14d7c93.zip |
Initial transfer of provider code
Diffstat (limited to 'vendor/github.com/hashicorp/hil/scanner/scanner.go')
-rw-r--r-- | vendor/github.com/hashicorp/hil/scanner/scanner.go | 550 |
1 files changed, 550 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hil/scanner/scanner.go b/vendor/github.com/hashicorp/hil/scanner/scanner.go new file mode 100644 index 0000000..bab86c6 --- /dev/null +++ b/vendor/github.com/hashicorp/hil/scanner/scanner.go | |||
@@ -0,0 +1,550 @@ | |||
1 | package scanner | ||
2 | |||
3 | import ( | ||
4 | "unicode" | ||
5 | "unicode/utf8" | ||
6 | |||
7 | "github.com/hashicorp/hil/ast" | ||
8 | ) | ||
9 | |||
10 | // Scan returns a channel that recieves Tokens from the given input string. | ||
11 | // | ||
12 | // The scanner's job is just to partition the string into meaningful parts. | ||
13 | // It doesn't do any transformation of the raw input string, so the caller | ||
14 | // must deal with any further interpretation required, such as parsing INTEGER | ||
15 | // tokens into real ints, or dealing with escape sequences in LITERAL or | ||
16 | // STRING tokens. | ||
17 | // | ||
18 | // Strings in the returned tokens are slices from the original string. | ||
19 | // | ||
20 | // startPos should be set to ast.InitPos unless the caller knows that | ||
21 | // this interpolation string is part of a larger file and knows the position | ||
22 | // of the first character in that larger file. | ||
23 | func Scan(s string, startPos ast.Pos) <-chan *Token { | ||
24 | ch := make(chan *Token) | ||
25 | go scan(s, ch, startPos) | ||
26 | return ch | ||
27 | } | ||
28 | |||
29 | func scan(s string, ch chan<- *Token, pos ast.Pos) { | ||
30 | // 'remain' starts off as the whole string but we gradually | ||
31 | // slice of the front of it as we work our way through. | ||
32 | remain := s | ||
33 | |||
34 | // nesting keeps track of how many ${ .. } sequences we are | ||
35 | // inside, so we can recognize the minor differences in syntax | ||
36 | // between outer string literals (LITERAL tokens) and quoted | ||
37 | // string literals (STRING tokens). | ||
38 | nesting := 0 | ||
39 | |||
40 | // We're going to flip back and forth between parsing literals/strings | ||
41 | // and parsing interpolation sequences ${ .. } until we reach EOF or | ||
42 | // some INVALID token. | ||
43 | All: | ||
44 | for { | ||
45 | startPos := pos | ||
46 | // Literal string processing first, since the beginning of | ||
47 | // a string is always outside of an interpolation sequence. | ||
48 | literalVal, terminator := scanLiteral(remain, pos, nesting > 0) | ||
49 | |||
50 | if len(literalVal) > 0 { | ||
51 | litType := LITERAL | ||
52 | if nesting > 0 { | ||
53 | litType = STRING | ||
54 | } | ||
55 | ch <- &Token{ | ||
56 | Type: litType, | ||
57 | Content: literalVal, | ||
58 | Pos: startPos, | ||
59 | } | ||
60 | remain = remain[len(literalVal):] | ||
61 | } | ||
62 | |||
63 | ch <- terminator | ||
64 | remain = remain[len(terminator.Content):] | ||
65 | pos = terminator.Pos | ||
66 | // Safe to use len() here because none of the terminator tokens | ||
67 | // can contain UTF-8 sequences. | ||
68 | pos.Column = pos.Column + len(terminator.Content) | ||
69 | |||
70 | switch terminator.Type { | ||
71 | case INVALID: | ||
72 | // Synthetic EOF after invalid token, since further scanning | ||
73 | // is likely to just produce more garbage. | ||
74 | ch <- &Token{ | ||
75 | Type: EOF, | ||
76 | Content: "", | ||
77 | Pos: pos, | ||
78 | } | ||
79 | break All | ||
80 | case EOF: | ||
81 | // All done! | ||
82 | break All | ||
83 | case BEGIN: | ||
84 | nesting++ | ||
85 | case CQUOTE: | ||
86 | // nothing special to do | ||
87 | default: | ||
88 | // Should never happen | ||
89 | panic("invalid string/literal terminator") | ||
90 | } | ||
91 | |||
92 | // Now we do the processing of the insides of ${ .. } sequences. | ||
93 | // This loop terminates when we encounter either a closing } or | ||
94 | // an opening ", which will cause us to return to literal processing. | ||
95 | Interpolation: | ||
96 | for { | ||
97 | |||
98 | token, size, newPos := scanInterpolationToken(remain, pos) | ||
99 | ch <- token | ||
100 | remain = remain[size:] | ||
101 | pos = newPos | ||
102 | |||
103 | switch token.Type { | ||
104 | case INVALID: | ||
105 | // Synthetic EOF after invalid token, since further scanning | ||
106 | // is likely to just produce more garbage. | ||
107 | ch <- &Token{ | ||
108 | Type: EOF, | ||
109 | Content: "", | ||
110 | Pos: pos, | ||
111 | } | ||
112 | break All | ||
113 | case EOF: | ||
114 | // All done | ||
115 | // (though a syntax error that we'll catch in the parser) | ||
116 | break All | ||
117 | case END: | ||
118 | nesting-- | ||
119 | if nesting < 0 { | ||
120 | // Can happen if there are unbalanced ${ and } sequences | ||
121 | // in the input, which we'll catch in the parser. | ||
122 | nesting = 0 | ||
123 | } | ||
124 | break Interpolation | ||
125 | case OQUOTE: | ||
126 | // Beginning of nested quoted string | ||
127 | break Interpolation | ||
128 | } | ||
129 | } | ||
130 | } | ||
131 | |||
132 | close(ch) | ||
133 | } | ||
134 | |||
135 | // Returns the token found at the start of the given string, followed by | ||
136 | // the number of bytes that were consumed from the string and the adjusted | ||
137 | // source position. | ||
138 | // | ||
139 | // Note that the number of bytes consumed can be more than the length of | ||
140 | // the returned token contents if the string begins with whitespace, since | ||
141 | // it will be silently consumed before reading the token. | ||
142 | func scanInterpolationToken(s string, startPos ast.Pos) (*Token, int, ast.Pos) { | ||
143 | pos := startPos | ||
144 | size := 0 | ||
145 | |||
146 | // Consume whitespace, if any | ||
147 | for len(s) > 0 && byteIsSpace(s[0]) { | ||
148 | if s[0] == '\n' { | ||
149 | pos.Column = 1 | ||
150 | pos.Line++ | ||
151 | } else { | ||
152 | pos.Column++ | ||
153 | } | ||
154 | size++ | ||
155 | s = s[1:] | ||
156 | } | ||
157 | |||
158 | // Unexpected EOF during sequence | ||
159 | if len(s) == 0 { | ||
160 | return &Token{ | ||
161 | Type: EOF, | ||
162 | Content: "", | ||
163 | Pos: pos, | ||
164 | }, size, pos | ||
165 | } | ||
166 | |||
167 | next := s[0] | ||
168 | var token *Token | ||
169 | |||
170 | switch next { | ||
171 | case '(', ')', '[', ']', ',', '.', '+', '-', '*', '/', '%', '?', ':': | ||
172 | // Easy punctuation symbols that don't have any special meaning | ||
173 | // during scanning, and that stand for themselves in the | ||
174 | // TokenType enumeration. | ||
175 | token = &Token{ | ||
176 | Type: TokenType(next), | ||
177 | Content: s[:1], | ||
178 | Pos: pos, | ||
179 | } | ||
180 | case '}': | ||
181 | token = &Token{ | ||
182 | Type: END, | ||
183 | Content: s[:1], | ||
184 | Pos: pos, | ||
185 | } | ||
186 | case '"': | ||
187 | token = &Token{ | ||
188 | Type: OQUOTE, | ||
189 | Content: s[:1], | ||
190 | Pos: pos, | ||
191 | } | ||
192 | case '!': | ||
193 | if len(s) >= 2 && s[:2] == "!=" { | ||
194 | token = &Token{ | ||
195 | Type: NOTEQUAL, | ||
196 | Content: s[:2], | ||
197 | Pos: pos, | ||
198 | } | ||
199 | } else { | ||
200 | token = &Token{ | ||
201 | Type: BANG, | ||
202 | Content: s[:1], | ||
203 | Pos: pos, | ||
204 | } | ||
205 | } | ||
206 | case '<': | ||
207 | if len(s) >= 2 && s[:2] == "<=" { | ||
208 | token = &Token{ | ||
209 | Type: LTE, | ||
210 | Content: s[:2], | ||
211 | Pos: pos, | ||
212 | } | ||
213 | } else { | ||
214 | token = &Token{ | ||
215 | Type: LT, | ||
216 | Content: s[:1], | ||
217 | Pos: pos, | ||
218 | } | ||
219 | } | ||
220 | case '>': | ||
221 | if len(s) >= 2 && s[:2] == ">=" { | ||
222 | token = &Token{ | ||
223 | Type: GTE, | ||
224 | Content: s[:2], | ||
225 | Pos: pos, | ||
226 | } | ||
227 | } else { | ||
228 | token = &Token{ | ||
229 | Type: GT, | ||
230 | Content: s[:1], | ||
231 | Pos: pos, | ||
232 | } | ||
233 | } | ||
234 | case '=': | ||
235 | if len(s) >= 2 && s[:2] == "==" { | ||
236 | token = &Token{ | ||
237 | Type: EQUAL, | ||
238 | Content: s[:2], | ||
239 | Pos: pos, | ||
240 | } | ||
241 | } else { | ||
242 | // A single equals is not a valid operator | ||
243 | token = &Token{ | ||
244 | Type: INVALID, | ||
245 | Content: s[:1], | ||
246 | Pos: pos, | ||
247 | } | ||
248 | } | ||
249 | case '&': | ||
250 | if len(s) >= 2 && s[:2] == "&&" { | ||
251 | token = &Token{ | ||
252 | Type: AND, | ||
253 | Content: s[:2], | ||
254 | Pos: pos, | ||
255 | } | ||
256 | } else { | ||
257 | token = &Token{ | ||
258 | Type: INVALID, | ||
259 | Content: s[:1], | ||
260 | Pos: pos, | ||
261 | } | ||
262 | } | ||
263 | case '|': | ||
264 | if len(s) >= 2 && s[:2] == "||" { | ||
265 | token = &Token{ | ||
266 | Type: OR, | ||
267 | Content: s[:2], | ||
268 | Pos: pos, | ||
269 | } | ||
270 | } else { | ||
271 | token = &Token{ | ||
272 | Type: INVALID, | ||
273 | Content: s[:1], | ||
274 | Pos: pos, | ||
275 | } | ||
276 | } | ||
277 | default: | ||
278 | if next >= '0' && next <= '9' { | ||
279 | num, numType := scanNumber(s) | ||
280 | token = &Token{ | ||
281 | Type: numType, | ||
282 | Content: num, | ||
283 | Pos: pos, | ||
284 | } | ||
285 | } else if stringStartsWithIdentifier(s) { | ||
286 | ident, runeLen := scanIdentifier(s) | ||
287 | tokenType := IDENTIFIER | ||
288 | if ident == "true" || ident == "false" { | ||
289 | tokenType = BOOL | ||
290 | } | ||
291 | token = &Token{ | ||
292 | Type: tokenType, | ||
293 | Content: ident, | ||
294 | Pos: pos, | ||
295 | } | ||
296 | // Skip usual token handling because it doesn't | ||
297 | // know how to deal with UTF-8 sequences. | ||
298 | pos.Column = pos.Column + runeLen | ||
299 | return token, size + len(ident), pos | ||
300 | } else { | ||
301 | _, byteLen := utf8.DecodeRuneInString(s) | ||
302 | token = &Token{ | ||
303 | Type: INVALID, | ||
304 | Content: s[:byteLen], | ||
305 | Pos: pos, | ||
306 | } | ||
307 | // Skip usual token handling because it doesn't | ||
308 | // know how to deal with UTF-8 sequences. | ||
309 | pos.Column = pos.Column + 1 | ||
310 | return token, size + byteLen, pos | ||
311 | } | ||
312 | } | ||
313 | |||
314 | // Here we assume that the token content contains no UTF-8 sequences, | ||
315 | // because we dealt with UTF-8 characters as a special case where | ||
316 | // necessary above. | ||
317 | size = size + len(token.Content) | ||
318 | pos.Column = pos.Column + len(token.Content) | ||
319 | |||
320 | return token, size, pos | ||
321 | } | ||
322 | |||
323 | // Returns the (possibly-empty) prefix of the given string that represents | ||
324 | // a literal, followed by the token that marks the end of the literal. | ||
325 | func scanLiteral(s string, startPos ast.Pos, nested bool) (string, *Token) { | ||
326 | litLen := 0 | ||
327 | pos := startPos | ||
328 | var terminator *Token | ||
329 | for { | ||
330 | |||
331 | if litLen >= len(s) { | ||
332 | if nested { | ||
333 | // We've ended in the middle of a quoted string, | ||
334 | // which means this token is actually invalid. | ||
335 | return "", &Token{ | ||
336 | Type: INVALID, | ||
337 | Content: s, | ||
338 | Pos: startPos, | ||
339 | } | ||
340 | } | ||
341 | terminator = &Token{ | ||
342 | Type: EOF, | ||
343 | Content: "", | ||
344 | Pos: pos, | ||
345 | } | ||
346 | break | ||
347 | } | ||
348 | |||
349 | next := s[litLen] | ||
350 | |||
351 | if next == '$' && len(s) > litLen+1 { | ||
352 | follow := s[litLen+1] | ||
353 | |||
354 | if follow == '{' { | ||
355 | terminator = &Token{ | ||
356 | Type: BEGIN, | ||
357 | Content: s[litLen : litLen+2], | ||
358 | Pos: pos, | ||
359 | } | ||
360 | pos.Column = pos.Column + 2 | ||
361 | break | ||
362 | } else if follow == '$' { | ||
363 | // Double-$ escapes the special processing of $, | ||
364 | // so we will consume both characters here. | ||
365 | pos.Column = pos.Column + 2 | ||
366 | litLen = litLen + 2 | ||
367 | continue | ||
368 | } | ||
369 | } | ||
370 | |||
371 | // special handling that applies only to quoted strings | ||
372 | if nested { | ||
373 | if next == '"' { | ||
374 | terminator = &Token{ | ||
375 | Type: CQUOTE, | ||
376 | Content: s[litLen : litLen+1], | ||
377 | Pos: pos, | ||
378 | } | ||
379 | pos.Column = pos.Column + 1 | ||
380 | break | ||
381 | } | ||
382 | |||
383 | // Escaped quote marks do not terminate the string. | ||
384 | // | ||
385 | // All we do here in the scanner is avoid terminating a string | ||
386 | // due to an escaped quote. The parser is responsible for the | ||
387 | // full handling of escape sequences, since it's able to produce | ||
388 | // better error messages than we can produce in here. | ||
389 | if next == '\\' && len(s) > litLen+1 { | ||
390 | follow := s[litLen+1] | ||
391 | |||
392 | if follow == '"' { | ||
393 | // \" escapes the special processing of ", | ||
394 | // so we will consume both characters here. | ||
395 | pos.Column = pos.Column + 2 | ||
396 | litLen = litLen + 2 | ||
397 | continue | ||
398 | } | ||
399 | } | ||
400 | } | ||
401 | |||
402 | if next == '\n' { | ||
403 | pos.Column = 1 | ||
404 | pos.Line++ | ||
405 | litLen++ | ||
406 | } else { | ||
407 | pos.Column++ | ||
408 | |||
409 | // "Column" measures runes, so we need to actually consume | ||
410 | // a valid UTF-8 character here. | ||
411 | _, size := utf8.DecodeRuneInString(s[litLen:]) | ||
412 | litLen = litLen + size | ||
413 | } | ||
414 | |||
415 | } | ||
416 | |||
417 | return s[:litLen], terminator | ||
418 | } | ||
419 | |||
420 | // scanNumber returns the extent of the prefix of the string that represents | ||
421 | // a valid number, along with what type of number it represents: INT or FLOAT. | ||
422 | // | ||
423 | // scanNumber does only basic character analysis: numbers consist of digits | ||
424 | // and periods, with at least one period signalling a FLOAT. It's the parser's | ||
425 | // responsibility to validate the form and range of the number, such as ensuring | ||
426 | // that a FLOAT actually contains only one period, etc. | ||
427 | func scanNumber(s string) (string, TokenType) { | ||
428 | period := -1 | ||
429 | byteLen := 0 | ||
430 | numType := INTEGER | ||
431 | for { | ||
432 | if byteLen >= len(s) { | ||
433 | break | ||
434 | } | ||
435 | |||
436 | next := s[byteLen] | ||
437 | if next != '.' && (next < '0' || next > '9') { | ||
438 | // If our last value was a period, then we're not a float, | ||
439 | // we're just an integer that ends in a period. | ||
440 | if period == byteLen-1 { | ||
441 | byteLen-- | ||
442 | numType = INTEGER | ||
443 | } | ||
444 | |||
445 | break | ||
446 | } | ||
447 | |||
448 | if next == '.' { | ||
449 | // If we've already seen a period, break out | ||
450 | if period >= 0 { | ||
451 | break | ||
452 | } | ||
453 | |||
454 | period = byteLen | ||
455 | numType = FLOAT | ||
456 | } | ||
457 | |||
458 | byteLen++ | ||
459 | } | ||
460 | |||
461 | return s[:byteLen], numType | ||
462 | } | ||
463 | |||
464 | // scanIdentifier returns the extent of the prefix of the string that | ||
465 | // represents a valid identifier, along with the length of that prefix | ||
466 | // in runes. | ||
467 | // | ||
468 | // Identifiers may contain utf8-encoded non-Latin letters, which will | ||
469 | // cause the returned "rune length" to be shorter than the byte length | ||
470 | // of the returned string. | ||
471 | func scanIdentifier(s string) (string, int) { | ||
472 | byteLen := 0 | ||
473 | runeLen := 0 | ||
474 | for { | ||
475 | if byteLen >= len(s) { | ||
476 | break | ||
477 | } | ||
478 | |||
479 | nextRune, size := utf8.DecodeRuneInString(s[byteLen:]) | ||
480 | if !(nextRune == '_' || | ||
481 | nextRune == '-' || | ||
482 | nextRune == '.' || | ||
483 | nextRune == '*' || | ||
484 | unicode.IsNumber(nextRune) || | ||
485 | unicode.IsLetter(nextRune) || | ||
486 | unicode.IsMark(nextRune)) { | ||
487 | break | ||
488 | } | ||
489 | |||
490 | // If we reach a star, it must be between periods to be part | ||
491 | // of the same identifier. | ||
492 | if nextRune == '*' && s[byteLen-1] != '.' { | ||
493 | break | ||
494 | } | ||
495 | |||
496 | // If our previous character was a star, then the current must | ||
497 | // be period. Otherwise, undo that and exit. | ||
498 | if byteLen > 0 && s[byteLen-1] == '*' && nextRune != '.' { | ||
499 | byteLen-- | ||
500 | if s[byteLen-1] == '.' { | ||
501 | byteLen-- | ||
502 | } | ||
503 | |||
504 | break | ||
505 | } | ||
506 | |||
507 | byteLen = byteLen + size | ||
508 | runeLen = runeLen + 1 | ||
509 | } | ||
510 | |||
511 | return s[:byteLen], runeLen | ||
512 | } | ||
513 | |||
514 | // byteIsSpace implements a restrictive interpretation of spaces that includes | ||
515 | // only what's valid inside interpolation sequences: spaces, tabs, newlines. | ||
516 | func byteIsSpace(b byte) bool { | ||
517 | switch b { | ||
518 | case ' ', '\t', '\r', '\n': | ||
519 | return true | ||
520 | default: | ||
521 | return false | ||
522 | } | ||
523 | } | ||
524 | |||
525 | // stringStartsWithIdentifier returns true if the given string begins with | ||
526 | // a character that is a legal start of an identifier: an underscore or | ||
527 | // any character that Unicode considers to be a letter. | ||
528 | func stringStartsWithIdentifier(s string) bool { | ||
529 | if len(s) == 0 { | ||
530 | return false | ||
531 | } | ||
532 | |||
533 | first := s[0] | ||
534 | |||
535 | // Easy ASCII cases first | ||
536 | if (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_' { | ||
537 | return true | ||
538 | } | ||
539 | |||
540 | // If our first byte begins a UTF-8 sequence then the sequence might | ||
541 | // be a unicode letter. | ||
542 | if utf8.RuneStart(first) { | ||
543 | firstRune, _ := utf8.DecodeRuneInString(s) | ||
544 | if unicode.IsLetter(firstRune) { | ||
545 | return true | ||
546 | } | ||
547 | } | ||
548 | |||
549 | return false | ||
550 | } | ||