diff options
Diffstat (limited to 'vendor/github.com/hashicorp/hcl2/hcl/json/scanner.go')
-rw-r--r-- | vendor/github.com/hashicorp/hcl2/hcl/json/scanner.go | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hcl2/hcl/json/scanner.go b/vendor/github.com/hashicorp/hcl2/hcl/json/scanner.go new file mode 100644 index 0000000..0a8378b --- /dev/null +++ b/vendor/github.com/hashicorp/hcl2/hcl/json/scanner.go | |||
@@ -0,0 +1,293 @@ | |||
1 | package json | ||
2 | |||
3 | import ( | ||
4 | "fmt" | ||
5 | |||
6 | "github.com/apparentlymart/go-textseg/textseg" | ||
7 | "github.com/hashicorp/hcl2/hcl" | ||
8 | ) | ||
9 | |||
10 | //go:generate stringer -type tokenType scanner.go | ||
11 | type tokenType rune | ||
12 | |||
13 | const ( | ||
14 | tokenBraceO tokenType = '{' | ||
15 | tokenBraceC tokenType = '}' | ||
16 | tokenBrackO tokenType = '[' | ||
17 | tokenBrackC tokenType = ']' | ||
18 | tokenComma tokenType = ',' | ||
19 | tokenColon tokenType = ':' | ||
20 | tokenKeyword tokenType = 'K' | ||
21 | tokenString tokenType = 'S' | ||
22 | tokenNumber tokenType = 'N' | ||
23 | tokenEOF tokenType = '␄' | ||
24 | tokenInvalid tokenType = 0 | ||
25 | tokenEquals tokenType = '=' // used only for reminding the user of JSON syntax | ||
26 | ) | ||
27 | |||
28 | type token struct { | ||
29 | Type tokenType | ||
30 | Bytes []byte | ||
31 | Range hcl.Range | ||
32 | } | ||
33 | |||
34 | // scan returns the primary tokens for the given JSON buffer in sequence. | ||
35 | // | ||
36 | // The responsibility of this pass is to just mark the slices of the buffer | ||
37 | // as being of various types. It is lax in how it interprets the multi-byte | ||
38 | // token types keyword, string and number, preferring to capture erroneous | ||
39 | // extra bytes that we presume the user intended to be part of the token | ||
40 | // so that we can generate more helpful diagnostics in the parser. | ||
41 | func scan(buf []byte, start pos) []token { | ||
42 | var tokens []token | ||
43 | p := start | ||
44 | for { | ||
45 | if len(buf) == 0 { | ||
46 | tokens = append(tokens, token{ | ||
47 | Type: tokenEOF, | ||
48 | Bytes: nil, | ||
49 | Range: posRange(p, p), | ||
50 | }) | ||
51 | return tokens | ||
52 | } | ||
53 | |||
54 | buf, p = skipWhitespace(buf, p) | ||
55 | |||
56 | if len(buf) == 0 { | ||
57 | tokens = append(tokens, token{ | ||
58 | Type: tokenEOF, | ||
59 | Bytes: nil, | ||
60 | Range: posRange(p, p), | ||
61 | }) | ||
62 | return tokens | ||
63 | } | ||
64 | |||
65 | start = p | ||
66 | |||
67 | first := buf[0] | ||
68 | switch { | ||
69 | case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':' || first == '=': | ||
70 | p.Pos.Column++ | ||
71 | p.Pos.Byte++ | ||
72 | tokens = append(tokens, token{ | ||
73 | Type: tokenType(first), | ||
74 | Bytes: buf[0:1], | ||
75 | Range: posRange(start, p), | ||
76 | }) | ||
77 | buf = buf[1:] | ||
78 | case first == '"': | ||
79 | var tokBuf []byte | ||
80 | tokBuf, buf, p = scanString(buf, p) | ||
81 | tokens = append(tokens, token{ | ||
82 | Type: tokenString, | ||
83 | Bytes: tokBuf, | ||
84 | Range: posRange(start, p), | ||
85 | }) | ||
86 | case byteCanStartNumber(first): | ||
87 | var tokBuf []byte | ||
88 | tokBuf, buf, p = scanNumber(buf, p) | ||
89 | tokens = append(tokens, token{ | ||
90 | Type: tokenNumber, | ||
91 | Bytes: tokBuf, | ||
92 | Range: posRange(start, p), | ||
93 | }) | ||
94 | case byteCanStartKeyword(first): | ||
95 | var tokBuf []byte | ||
96 | tokBuf, buf, p = scanKeyword(buf, p) | ||
97 | tokens = append(tokens, token{ | ||
98 | Type: tokenKeyword, | ||
99 | Bytes: tokBuf, | ||
100 | Range: posRange(start, p), | ||
101 | }) | ||
102 | default: | ||
103 | tokens = append(tokens, token{ | ||
104 | Type: tokenInvalid, | ||
105 | Bytes: buf[:1], | ||
106 | Range: start.Range(1, 1), | ||
107 | }) | ||
108 | // If we've encountered an invalid then we might as well stop | ||
109 | // scanning since the parser won't proceed beyond this point. | ||
110 | return tokens | ||
111 | } | ||
112 | } | ||
113 | } | ||
114 | |||
115 | func byteCanStartNumber(b byte) bool { | ||
116 | switch b { | ||
117 | // We are slightly more tolerant than JSON requires here since we | ||
118 | // expect the parser will make a stricter interpretation of the | ||
119 | // number bytes, but we specifically don't allow 'e' or 'E' here | ||
120 | // since we want the scanner to treat that as the start of an | ||
121 | // invalid keyword instead, to produce more intelligible error messages. | ||
122 | case '-', '+', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': | ||
123 | return true | ||
124 | default: | ||
125 | return false | ||
126 | } | ||
127 | } | ||
128 | |||
129 | func scanNumber(buf []byte, start pos) ([]byte, []byte, pos) { | ||
130 | // The scanner doesn't check that the sequence of digit-ish bytes is | ||
131 | // in a valid order. The parser must do this when decoding a number | ||
132 | // token. | ||
133 | var i int | ||
134 | p := start | ||
135 | Byte: | ||
136 | for i = 0; i < len(buf); i++ { | ||
137 | switch buf[i] { | ||
138 | case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': | ||
139 | p.Pos.Byte++ | ||
140 | p.Pos.Column++ | ||
141 | default: | ||
142 | break Byte | ||
143 | } | ||
144 | } | ||
145 | return buf[:i], buf[i:], p | ||
146 | } | ||
147 | |||
148 | func byteCanStartKeyword(b byte) bool { | ||
149 | switch { | ||
150 | // We allow any sequence of alphabetical characters here, even though | ||
151 | // JSON is more constrained, so that we can collect what we presume | ||
152 | // the user intended to be a single keyword and then check its validity | ||
153 | // in the parser, where we can generate better diagnostics. | ||
154 | // So e.g. we want to be able to say: | ||
155 | // unrecognized keyword "True". Did you mean "true"? | ||
156 | case b >= 'a' || b <= 'z' || b >= 'A' || b <= 'Z': | ||
157 | return true | ||
158 | default: | ||
159 | return false | ||
160 | } | ||
161 | } | ||
162 | |||
163 | func scanKeyword(buf []byte, start pos) ([]byte, []byte, pos) { | ||
164 | var i int | ||
165 | p := start | ||
166 | Byte: | ||
167 | for i = 0; i < len(buf); i++ { | ||
168 | b := buf[i] | ||
169 | switch { | ||
170 | case (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_': | ||
171 | p.Pos.Byte++ | ||
172 | p.Pos.Column++ | ||
173 | default: | ||
174 | break Byte | ||
175 | } | ||
176 | } | ||
177 | return buf[:i], buf[i:], p | ||
178 | } | ||
179 | |||
180 | func scanString(buf []byte, start pos) ([]byte, []byte, pos) { | ||
181 | // The scanner doesn't validate correct use of escapes, etc. It pays | ||
182 | // attention to escapes only for the purpose of identifying the closing | ||
183 | // quote character. It's the parser's responsibility to do proper | ||
184 | // validation. | ||
185 | // | ||
186 | // The scanner also doesn't specifically detect unterminated string | ||
187 | // literals, though they can be identified in the parser by checking if | ||
188 | // the final byte in a string token is the double-quote character. | ||
189 | |||
190 | // Skip the opening quote symbol | ||
191 | i := 1 | ||
192 | p := start | ||
193 | p.Pos.Byte++ | ||
194 | p.Pos.Column++ | ||
195 | escaping := false | ||
196 | Byte: | ||
197 | for i < len(buf) { | ||
198 | b := buf[i] | ||
199 | |||
200 | switch { | ||
201 | case b == '\\': | ||
202 | escaping = !escaping | ||
203 | p.Pos.Byte++ | ||
204 | p.Pos.Column++ | ||
205 | i++ | ||
206 | case b == '"': | ||
207 | p.Pos.Byte++ | ||
208 | p.Pos.Column++ | ||
209 | i++ | ||
210 | if !escaping { | ||
211 | break Byte | ||
212 | } | ||
213 | escaping = false | ||
214 | case b < 32: | ||
215 | break Byte | ||
216 | default: | ||
217 | // Advance by one grapheme cluster, so that we consider each | ||
218 | // grapheme to be a "column". | ||
219 | // Ignoring error because this scanner cannot produce errors. | ||
220 | advance, _, _ := textseg.ScanGraphemeClusters(buf[i:], true) | ||
221 | |||
222 | p.Pos.Byte += advance | ||
223 | p.Pos.Column++ | ||
224 | i += advance | ||
225 | |||
226 | escaping = false | ||
227 | } | ||
228 | } | ||
229 | return buf[:i], buf[i:], p | ||
230 | } | ||
231 | |||
232 | func skipWhitespace(buf []byte, start pos) ([]byte, pos) { | ||
233 | var i int | ||
234 | p := start | ||
235 | Byte: | ||
236 | for i = 0; i < len(buf); i++ { | ||
237 | switch buf[i] { | ||
238 | case ' ': | ||
239 | p.Pos.Byte++ | ||
240 | p.Pos.Column++ | ||
241 | case '\n': | ||
242 | p.Pos.Byte++ | ||
243 | p.Pos.Column = 1 | ||
244 | p.Pos.Line++ | ||
245 | case '\r': | ||
246 | // For the purpose of line/column counting we consider a | ||
247 | // carriage return to take up no space, assuming that it will | ||
248 | // be paired up with a newline (on Windows, for example) that | ||
249 | // will account for both of them. | ||
250 | p.Pos.Byte++ | ||
251 | case '\t': | ||
252 | // We arbitrarily count a tab as if it were two spaces, because | ||
253 | // we need to choose _some_ number here. This means any system | ||
254 | // that renders code on-screen with markers must itself treat | ||
255 | // tabs as a pair of spaces for rendering purposes, or instead | ||
256 | // use the byte offset and back into its own column position. | ||
257 | p.Pos.Byte++ | ||
258 | p.Pos.Column += 2 | ||
259 | default: | ||
260 | break Byte | ||
261 | } | ||
262 | } | ||
263 | return buf[i:], p | ||
264 | } | ||
265 | |||
266 | type pos struct { | ||
267 | Filename string | ||
268 | Pos hcl.Pos | ||
269 | } | ||
270 | |||
271 | func (p *pos) Range(byteLen, charLen int) hcl.Range { | ||
272 | start := p.Pos | ||
273 | end := p.Pos | ||
274 | end.Byte += byteLen | ||
275 | end.Column += charLen | ||
276 | return hcl.Range{ | ||
277 | Filename: p.Filename, | ||
278 | Start: start, | ||
279 | End: end, | ||
280 | } | ||
281 | } | ||
282 | |||
283 | func posRange(start, end pos) hcl.Range { | ||
284 | return hcl.Range{ | ||
285 | Filename: start.Filename, | ||
286 | Start: start.Pos, | ||
287 | End: end.Pos, | ||
288 | } | ||
289 | } | ||
290 | |||
291 | func (t token) GoString() string { | ||
292 | return fmt.Sprintf("json.token{json.%s, []byte(%q), %#v}", t.Type, t.Bytes, t.Range) | ||
293 | } | ||