aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/hashicorp/hil/scanner
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/hashicorp/hil/scanner')
-rw-r--r--vendor/github.com/hashicorp/hil/scanner/peeker.go55
-rw-r--r--vendor/github.com/hashicorp/hil/scanner/scanner.go550
-rw-r--r--vendor/github.com/hashicorp/hil/scanner/token.go105
-rw-r--r--vendor/github.com/hashicorp/hil/scanner/tokentype_string.go51
4 files changed, 761 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hil/scanner/peeker.go b/vendor/github.com/hashicorp/hil/scanner/peeker.go
new file mode 100644
index 0000000..4de3728
--- /dev/null
+++ b/vendor/github.com/hashicorp/hil/scanner/peeker.go
@@ -0,0 +1,55 @@
1package scanner
2
3// Peeker is a utility that wraps a token channel returned by Scan and
4// provides an interface that allows a caller (e.g. the parser) to
5// work with the token stream in a mode that allows one token of lookahead,
6// and provides utilities for more convenient processing of the stream.
7type Peeker struct {
8 ch <-chan *Token
9 peeked *Token
10}
11
12func NewPeeker(ch <-chan *Token) *Peeker {
13 return &Peeker{
14 ch: ch,
15 }
16}
17
18// Peek returns the next token in the stream without consuming it. A
19// subsequent call to Read will return the same token.
20func (p *Peeker) Peek() *Token {
21 if p.peeked == nil {
22 p.peeked = <-p.ch
23 }
24 return p.peeked
25}
26
27// Read consumes the next token in the stream and returns it.
28func (p *Peeker) Read() *Token {
29 token := p.Peek()
30
31 // As a special case, we will produce the EOF token forever once
32 // it is reached.
33 if token.Type != EOF {
34 p.peeked = nil
35 }
36
37 return token
38}
39
40// Close ensures that the token stream has been exhausted, to prevent
41// the goroutine in the underlying scanner from leaking.
42//
43// It's not necessary to call this if the caller reads the token stream
44// to EOF, since that implicitly closes the scanner.
45func (p *Peeker) Close() {
46 for _ = range p.ch {
47 // discard
48 }
49 // Install a synthetic EOF token in 'peeked' in case someone
50 // erroneously calls Peek() or Read() after we've closed.
51 p.peeked = &Token{
52 Type: EOF,
53 Content: "",
54 }
55}
diff --git a/vendor/github.com/hashicorp/hil/scanner/scanner.go b/vendor/github.com/hashicorp/hil/scanner/scanner.go
new file mode 100644
index 0000000..bab86c6
--- /dev/null
+++ b/vendor/github.com/hashicorp/hil/scanner/scanner.go
@@ -0,0 +1,550 @@
1package scanner
2
3import (
4 "unicode"
5 "unicode/utf8"
6
7 "github.com/hashicorp/hil/ast"
8)
9
10// Scan returns a channel that recieves Tokens from the given input string.
11//
12// The scanner's job is just to partition the string into meaningful parts.
13// It doesn't do any transformation of the raw input string, so the caller
14// must deal with any further interpretation required, such as parsing INTEGER
15// tokens into real ints, or dealing with escape sequences in LITERAL or
16// STRING tokens.
17//
18// Strings in the returned tokens are slices from the original string.
19//
20// startPos should be set to ast.InitPos unless the caller knows that
21// this interpolation string is part of a larger file and knows the position
22// of the first character in that larger file.
23func Scan(s string, startPos ast.Pos) <-chan *Token {
24 ch := make(chan *Token)
25 go scan(s, ch, startPos)
26 return ch
27}
28
29func scan(s string, ch chan<- *Token, pos ast.Pos) {
30 // 'remain' starts off as the whole string but we gradually
31 // slice of the front of it as we work our way through.
32 remain := s
33
34 // nesting keeps track of how many ${ .. } sequences we are
35 // inside, so we can recognize the minor differences in syntax
36 // between outer string literals (LITERAL tokens) and quoted
37 // string literals (STRING tokens).
38 nesting := 0
39
40 // We're going to flip back and forth between parsing literals/strings
41 // and parsing interpolation sequences ${ .. } until we reach EOF or
42 // some INVALID token.
43All:
44 for {
45 startPos := pos
46 // Literal string processing first, since the beginning of
47 // a string is always outside of an interpolation sequence.
48 literalVal, terminator := scanLiteral(remain, pos, nesting > 0)
49
50 if len(literalVal) > 0 {
51 litType := LITERAL
52 if nesting > 0 {
53 litType = STRING
54 }
55 ch <- &Token{
56 Type: litType,
57 Content: literalVal,
58 Pos: startPos,
59 }
60 remain = remain[len(literalVal):]
61 }
62
63 ch <- terminator
64 remain = remain[len(terminator.Content):]
65 pos = terminator.Pos
66 // Safe to use len() here because none of the terminator tokens
67 // can contain UTF-8 sequences.
68 pos.Column = pos.Column + len(terminator.Content)
69
70 switch terminator.Type {
71 case INVALID:
72 // Synthetic EOF after invalid token, since further scanning
73 // is likely to just produce more garbage.
74 ch <- &Token{
75 Type: EOF,
76 Content: "",
77 Pos: pos,
78 }
79 break All
80 case EOF:
81 // All done!
82 break All
83 case BEGIN:
84 nesting++
85 case CQUOTE:
86 // nothing special to do
87 default:
88 // Should never happen
89 panic("invalid string/literal terminator")
90 }
91
92 // Now we do the processing of the insides of ${ .. } sequences.
93 // This loop terminates when we encounter either a closing } or
94 // an opening ", which will cause us to return to literal processing.
95 Interpolation:
96 for {
97
98 token, size, newPos := scanInterpolationToken(remain, pos)
99 ch <- token
100 remain = remain[size:]
101 pos = newPos
102
103 switch token.Type {
104 case INVALID:
105 // Synthetic EOF after invalid token, since further scanning
106 // is likely to just produce more garbage.
107 ch <- &Token{
108 Type: EOF,
109 Content: "",
110 Pos: pos,
111 }
112 break All
113 case EOF:
114 // All done
115 // (though a syntax error that we'll catch in the parser)
116 break All
117 case END:
118 nesting--
119 if nesting < 0 {
120 // Can happen if there are unbalanced ${ and } sequences
121 // in the input, which we'll catch in the parser.
122 nesting = 0
123 }
124 break Interpolation
125 case OQUOTE:
126 // Beginning of nested quoted string
127 break Interpolation
128 }
129 }
130 }
131
132 close(ch)
133}
134
135// Returns the token found at the start of the given string, followed by
136// the number of bytes that were consumed from the string and the adjusted
137// source position.
138//
139// Note that the number of bytes consumed can be more than the length of
140// the returned token contents if the string begins with whitespace, since
141// it will be silently consumed before reading the token.
142func scanInterpolationToken(s string, startPos ast.Pos) (*Token, int, ast.Pos) {
143 pos := startPos
144 size := 0
145
146 // Consume whitespace, if any
147 for len(s) > 0 && byteIsSpace(s[0]) {
148 if s[0] == '\n' {
149 pos.Column = 1
150 pos.Line++
151 } else {
152 pos.Column++
153 }
154 size++
155 s = s[1:]
156 }
157
158 // Unexpected EOF during sequence
159 if len(s) == 0 {
160 return &Token{
161 Type: EOF,
162 Content: "",
163 Pos: pos,
164 }, size, pos
165 }
166
167 next := s[0]
168 var token *Token
169
170 switch next {
171 case '(', ')', '[', ']', ',', '.', '+', '-', '*', '/', '%', '?', ':':
172 // Easy punctuation symbols that don't have any special meaning
173 // during scanning, and that stand for themselves in the
174 // TokenType enumeration.
175 token = &Token{
176 Type: TokenType(next),
177 Content: s[:1],
178 Pos: pos,
179 }
180 case '}':
181 token = &Token{
182 Type: END,
183 Content: s[:1],
184 Pos: pos,
185 }
186 case '"':
187 token = &Token{
188 Type: OQUOTE,
189 Content: s[:1],
190 Pos: pos,
191 }
192 case '!':
193 if len(s) >= 2 && s[:2] == "!=" {
194 token = &Token{
195 Type: NOTEQUAL,
196 Content: s[:2],
197 Pos: pos,
198 }
199 } else {
200 token = &Token{
201 Type: BANG,
202 Content: s[:1],
203 Pos: pos,
204 }
205 }
206 case '<':
207 if len(s) >= 2 && s[:2] == "<=" {
208 token = &Token{
209 Type: LTE,
210 Content: s[:2],
211 Pos: pos,
212 }
213 } else {
214 token = &Token{
215 Type: LT,
216 Content: s[:1],
217 Pos: pos,
218 }
219 }
220 case '>':
221 if len(s) >= 2 && s[:2] == ">=" {
222 token = &Token{
223 Type: GTE,
224 Content: s[:2],
225 Pos: pos,
226 }
227 } else {
228 token = &Token{
229 Type: GT,
230 Content: s[:1],
231 Pos: pos,
232 }
233 }
234 case '=':
235 if len(s) >= 2 && s[:2] == "==" {
236 token = &Token{
237 Type: EQUAL,
238 Content: s[:2],
239 Pos: pos,
240 }
241 } else {
242 // A single equals is not a valid operator
243 token = &Token{
244 Type: INVALID,
245 Content: s[:1],
246 Pos: pos,
247 }
248 }
249 case '&':
250 if len(s) >= 2 && s[:2] == "&&" {
251 token = &Token{
252 Type: AND,
253 Content: s[:2],
254 Pos: pos,
255 }
256 } else {
257 token = &Token{
258 Type: INVALID,
259 Content: s[:1],
260 Pos: pos,
261 }
262 }
263 case '|':
264 if len(s) >= 2 && s[:2] == "||" {
265 token = &Token{
266 Type: OR,
267 Content: s[:2],
268 Pos: pos,
269 }
270 } else {
271 token = &Token{
272 Type: INVALID,
273 Content: s[:1],
274 Pos: pos,
275 }
276 }
277 default:
278 if next >= '0' && next <= '9' {
279 num, numType := scanNumber(s)
280 token = &Token{
281 Type: numType,
282 Content: num,
283 Pos: pos,
284 }
285 } else if stringStartsWithIdentifier(s) {
286 ident, runeLen := scanIdentifier(s)
287 tokenType := IDENTIFIER
288 if ident == "true" || ident == "false" {
289 tokenType = BOOL
290 }
291 token = &Token{
292 Type: tokenType,
293 Content: ident,
294 Pos: pos,
295 }
296 // Skip usual token handling because it doesn't
297 // know how to deal with UTF-8 sequences.
298 pos.Column = pos.Column + runeLen
299 return token, size + len(ident), pos
300 } else {
301 _, byteLen := utf8.DecodeRuneInString(s)
302 token = &Token{
303 Type: INVALID,
304 Content: s[:byteLen],
305 Pos: pos,
306 }
307 // Skip usual token handling because it doesn't
308 // know how to deal with UTF-8 sequences.
309 pos.Column = pos.Column + 1
310 return token, size + byteLen, pos
311 }
312 }
313
314 // Here we assume that the token content contains no UTF-8 sequences,
315 // because we dealt with UTF-8 characters as a special case where
316 // necessary above.
317 size = size + len(token.Content)
318 pos.Column = pos.Column + len(token.Content)
319
320 return token, size, pos
321}
322
323// Returns the (possibly-empty) prefix of the given string that represents
324// a literal, followed by the token that marks the end of the literal.
325func scanLiteral(s string, startPos ast.Pos, nested bool) (string, *Token) {
326 litLen := 0
327 pos := startPos
328 var terminator *Token
329 for {
330
331 if litLen >= len(s) {
332 if nested {
333 // We've ended in the middle of a quoted string,
334 // which means this token is actually invalid.
335 return "", &Token{
336 Type: INVALID,
337 Content: s,
338 Pos: startPos,
339 }
340 }
341 terminator = &Token{
342 Type: EOF,
343 Content: "",
344 Pos: pos,
345 }
346 break
347 }
348
349 next := s[litLen]
350
351 if next == '$' && len(s) > litLen+1 {
352 follow := s[litLen+1]
353
354 if follow == '{' {
355 terminator = &Token{
356 Type: BEGIN,
357 Content: s[litLen : litLen+2],
358 Pos: pos,
359 }
360 pos.Column = pos.Column + 2
361 break
362 } else if follow == '$' {
363 // Double-$ escapes the special processing of $,
364 // so we will consume both characters here.
365 pos.Column = pos.Column + 2
366 litLen = litLen + 2
367 continue
368 }
369 }
370
371 // special handling that applies only to quoted strings
372 if nested {
373 if next == '"' {
374 terminator = &Token{
375 Type: CQUOTE,
376 Content: s[litLen : litLen+1],
377 Pos: pos,
378 }
379 pos.Column = pos.Column + 1
380 break
381 }
382
383 // Escaped quote marks do not terminate the string.
384 //
385 // All we do here in the scanner is avoid terminating a string
386 // due to an escaped quote. The parser is responsible for the
387 // full handling of escape sequences, since it's able to produce
388 // better error messages than we can produce in here.
389 if next == '\\' && len(s) > litLen+1 {
390 follow := s[litLen+1]
391
392 if follow == '"' {
393 // \" escapes the special processing of ",
394 // so we will consume both characters here.
395 pos.Column = pos.Column + 2
396 litLen = litLen + 2
397 continue
398 }
399 }
400 }
401
402 if next == '\n' {
403 pos.Column = 1
404 pos.Line++
405 litLen++
406 } else {
407 pos.Column++
408
409 // "Column" measures runes, so we need to actually consume
410 // a valid UTF-8 character here.
411 _, size := utf8.DecodeRuneInString(s[litLen:])
412 litLen = litLen + size
413 }
414
415 }
416
417 return s[:litLen], terminator
418}
419
420// scanNumber returns the extent of the prefix of the string that represents
421// a valid number, along with what type of number it represents: INT or FLOAT.
422//
423// scanNumber does only basic character analysis: numbers consist of digits
424// and periods, with at least one period signalling a FLOAT. It's the parser's
425// responsibility to validate the form and range of the number, such as ensuring
426// that a FLOAT actually contains only one period, etc.
427func scanNumber(s string) (string, TokenType) {
428 period := -1
429 byteLen := 0
430 numType := INTEGER
431 for {
432 if byteLen >= len(s) {
433 break
434 }
435
436 next := s[byteLen]
437 if next != '.' && (next < '0' || next > '9') {
438 // If our last value was a period, then we're not a float,
439 // we're just an integer that ends in a period.
440 if period == byteLen-1 {
441 byteLen--
442 numType = INTEGER
443 }
444
445 break
446 }
447
448 if next == '.' {
449 // If we've already seen a period, break out
450 if period >= 0 {
451 break
452 }
453
454 period = byteLen
455 numType = FLOAT
456 }
457
458 byteLen++
459 }
460
461 return s[:byteLen], numType
462}
463
464// scanIdentifier returns the extent of the prefix of the string that
465// represents a valid identifier, along with the length of that prefix
466// in runes.
467//
468// Identifiers may contain utf8-encoded non-Latin letters, which will
469// cause the returned "rune length" to be shorter than the byte length
470// of the returned string.
471func scanIdentifier(s string) (string, int) {
472 byteLen := 0
473 runeLen := 0
474 for {
475 if byteLen >= len(s) {
476 break
477 }
478
479 nextRune, size := utf8.DecodeRuneInString(s[byteLen:])
480 if !(nextRune == '_' ||
481 nextRune == '-' ||
482 nextRune == '.' ||
483 nextRune == '*' ||
484 unicode.IsNumber(nextRune) ||
485 unicode.IsLetter(nextRune) ||
486 unicode.IsMark(nextRune)) {
487 break
488 }
489
490 // If we reach a star, it must be between periods to be part
491 // of the same identifier.
492 if nextRune == '*' && s[byteLen-1] != '.' {
493 break
494 }
495
496 // If our previous character was a star, then the current must
497 // be period. Otherwise, undo that and exit.
498 if byteLen > 0 && s[byteLen-1] == '*' && nextRune != '.' {
499 byteLen--
500 if s[byteLen-1] == '.' {
501 byteLen--
502 }
503
504 break
505 }
506
507 byteLen = byteLen + size
508 runeLen = runeLen + 1
509 }
510
511 return s[:byteLen], runeLen
512}
513
514// byteIsSpace implements a restrictive interpretation of spaces that includes
515// only what's valid inside interpolation sequences: spaces, tabs, newlines.
516func byteIsSpace(b byte) bool {
517 switch b {
518 case ' ', '\t', '\r', '\n':
519 return true
520 default:
521 return false
522 }
523}
524
525// stringStartsWithIdentifier returns true if the given string begins with
526// a character that is a legal start of an identifier: an underscore or
527// any character that Unicode considers to be a letter.
528func stringStartsWithIdentifier(s string) bool {
529 if len(s) == 0 {
530 return false
531 }
532
533 first := s[0]
534
535 // Easy ASCII cases first
536 if (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_' {
537 return true
538 }
539
540 // If our first byte begins a UTF-8 sequence then the sequence might
541 // be a unicode letter.
542 if utf8.RuneStart(first) {
543 firstRune, _ := utf8.DecodeRuneInString(s)
544 if unicode.IsLetter(firstRune) {
545 return true
546 }
547 }
548
549 return false
550}
diff --git a/vendor/github.com/hashicorp/hil/scanner/token.go b/vendor/github.com/hashicorp/hil/scanner/token.go
new file mode 100644
index 0000000..b6c82ae
--- /dev/null
+++ b/vendor/github.com/hashicorp/hil/scanner/token.go
@@ -0,0 +1,105 @@
1package scanner
2
3import (
4 "fmt"
5
6 "github.com/hashicorp/hil/ast"
7)
8
9type Token struct {
10 Type TokenType
11 Content string
12 Pos ast.Pos
13}
14
15//go:generate stringer -type=TokenType
16type TokenType rune
17
18const (
19 // Raw string data outside of ${ .. } sequences
20 LITERAL TokenType = 'o'
21
22 // STRING is like a LITERAL but it's inside a quoted string
23 // within a ${ ... } sequence, and so it can contain backslash
24 // escaping.
25 STRING TokenType = 'S'
26
27 // Other Literals
28 INTEGER TokenType = 'I'
29 FLOAT TokenType = 'F'
30 BOOL TokenType = 'B'
31
32 BEGIN TokenType = '$' // actually "${"
33 END TokenType = '}'
34 OQUOTE TokenType = '“' // Opening quote of a nested quoted sequence
35 CQUOTE TokenType = '”' // Closing quote of a nested quoted sequence
36 OPAREN TokenType = '('
37 CPAREN TokenType = ')'
38 OBRACKET TokenType = '['
39 CBRACKET TokenType = ']'
40 COMMA TokenType = ','
41
42 IDENTIFIER TokenType = 'i'
43
44 PERIOD TokenType = '.'
45 PLUS TokenType = '+'
46 MINUS TokenType = '-'
47 STAR TokenType = '*'
48 SLASH TokenType = '/'
49 PERCENT TokenType = '%'
50
51 AND TokenType = '∧'
52 OR TokenType = '∨'
53 BANG TokenType = '!'
54
55 EQUAL TokenType = '='
56 NOTEQUAL TokenType = '≠'
57 GT TokenType = '>'
58 LT TokenType = '<'
59 GTE TokenType = '≥'
60 LTE TokenType = '≤'
61
62 QUESTION TokenType = '?'
63 COLON TokenType = ':'
64
65 EOF TokenType = '␄'
66
67 // Produced for sequences that cannot be understood as valid tokens
68 // e.g. due to use of unrecognized punctuation.
69 INVALID TokenType = '�'
70)
71
72func (t *Token) String() string {
73 switch t.Type {
74 case EOF:
75 return "end of string"
76 case INVALID:
77 return fmt.Sprintf("invalid sequence %q", t.Content)
78 case INTEGER:
79 return fmt.Sprintf("integer %s", t.Content)
80 case FLOAT:
81 return fmt.Sprintf("float %s", t.Content)
82 case STRING:
83 return fmt.Sprintf("string %q", t.Content)
84 case LITERAL:
85 return fmt.Sprintf("literal %q", t.Content)
86 case OQUOTE:
87 return fmt.Sprintf("opening quote")
88 case CQUOTE:
89 return fmt.Sprintf("closing quote")
90 case AND:
91 return "&&"
92 case OR:
93 return "||"
94 case NOTEQUAL:
95 return "!="
96 case GTE:
97 return ">="
98 case LTE:
99 return "<="
100 default:
101 // The remaining token types have content that
102 // speaks for itself.
103 return fmt.Sprintf("%q", t.Content)
104 }
105}
diff --git a/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go b/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go
new file mode 100644
index 0000000..a602f5f
--- /dev/null
+++ b/vendor/github.com/hashicorp/hil/scanner/tokentype_string.go
@@ -0,0 +1,51 @@
1// Code generated by "stringer -type=TokenType"; DO NOT EDIT
2
3package scanner
4
5import "fmt"
6
7const _TokenType_name = "BANGBEGINPERCENTOPARENCPARENSTARPLUSCOMMAMINUSPERIODSLASHCOLONLTEQUALGTQUESTIONBOOLFLOATINTEGERSTRINGOBRACKETCBRACKETIDENTIFIERLITERALENDOQUOTECQUOTEANDORNOTEQUALLTEGTEEOFINVALID"
8
9var _TokenType_map = map[TokenType]string{
10 33: _TokenType_name[0:4],
11 36: _TokenType_name[4:9],
12 37: _TokenType_name[9:16],
13 40: _TokenType_name[16:22],
14 41: _TokenType_name[22:28],
15 42: _TokenType_name[28:32],
16 43: _TokenType_name[32:36],
17 44: _TokenType_name[36:41],
18 45: _TokenType_name[41:46],
19 46: _TokenType_name[46:52],
20 47: _TokenType_name[52:57],
21 58: _TokenType_name[57:62],
22 60: _TokenType_name[62:64],
23 61: _TokenType_name[64:69],
24 62: _TokenType_name[69:71],
25 63: _TokenType_name[71:79],
26 66: _TokenType_name[79:83],
27 70: _TokenType_name[83:88],
28 73: _TokenType_name[88:95],
29 83: _TokenType_name[95:101],
30 91: _TokenType_name[101:109],
31 93: _TokenType_name[109:117],
32 105: _TokenType_name[117:127],
33 111: _TokenType_name[127:134],
34 125: _TokenType_name[134:137],
35 8220: _TokenType_name[137:143],
36 8221: _TokenType_name[143:149],
37 8743: _TokenType_name[149:152],
38 8744: _TokenType_name[152:154],
39 8800: _TokenType_name[154:162],
40 8804: _TokenType_name[162:165],
41 8805: _TokenType_name[165:168],
42 9220: _TokenType_name[168:171],
43 65533: _TokenType_name[171:178],
44}
45
46func (i TokenType) String() string {
47 if str, ok := _TokenType_map[i]; ok {
48 return str
49 }
50 return fmt.Sprintf("TokenType(%d)", i)
51}