]> git.immae.eu Git - github/fretlink/terraform-provider-statuscake.git/blob - vendor/github.com/hashicorp/hcl2/hcl/json/scanner.go
Upgrade to 0.12
[github/fretlink/terraform-provider-statuscake.git] / vendor / github.com / hashicorp / hcl2 / hcl / json / scanner.go
1 package json
2
3 import (
4 "fmt"
5
6 "github.com/apparentlymart/go-textseg/textseg"
7 "github.com/hashicorp/hcl2/hcl"
8 )
9
10 //go:generate stringer -type tokenType scanner.go
11 type tokenType rune
12
13 const (
14 tokenBraceO tokenType = '{'
15 tokenBraceC tokenType = '}'
16 tokenBrackO tokenType = '['
17 tokenBrackC tokenType = ']'
18 tokenComma tokenType = ','
19 tokenColon tokenType = ':'
20 tokenKeyword tokenType = 'K'
21 tokenString tokenType = 'S'
22 tokenNumber tokenType = 'N'
23 tokenEOF tokenType = '␄'
24 tokenInvalid tokenType = 0
25 tokenEquals tokenType = '=' // used only for reminding the user of JSON syntax
26 )
27
28 type token struct {
29 Type tokenType
30 Bytes []byte
31 Range hcl.Range
32 }
33
34 // scan returns the primary tokens for the given JSON buffer in sequence.
35 //
36 // The responsibility of this pass is to just mark the slices of the buffer
37 // as being of various types. It is lax in how it interprets the multi-byte
38 // token types keyword, string and number, preferring to capture erroneous
39 // extra bytes that we presume the user intended to be part of the token
40 // so that we can generate more helpful diagnostics in the parser.
41 func scan(buf []byte, start pos) []token {
42 var tokens []token
43 p := start
44 for {
45 if len(buf) == 0 {
46 tokens = append(tokens, token{
47 Type: tokenEOF,
48 Bytes: nil,
49 Range: posRange(p, p),
50 })
51 return tokens
52 }
53
54 buf, p = skipWhitespace(buf, p)
55
56 if len(buf) == 0 {
57 tokens = append(tokens, token{
58 Type: tokenEOF,
59 Bytes: nil,
60 Range: posRange(p, p),
61 })
62 return tokens
63 }
64
65 start = p
66
67 first := buf[0]
68 switch {
69 case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':' || first == '=':
70 p.Pos.Column++
71 p.Pos.Byte++
72 tokens = append(tokens, token{
73 Type: tokenType(first),
74 Bytes: buf[0:1],
75 Range: posRange(start, p),
76 })
77 buf = buf[1:]
78 case first == '"':
79 var tokBuf []byte
80 tokBuf, buf, p = scanString(buf, p)
81 tokens = append(tokens, token{
82 Type: tokenString,
83 Bytes: tokBuf,
84 Range: posRange(start, p),
85 })
86 case byteCanStartNumber(first):
87 var tokBuf []byte
88 tokBuf, buf, p = scanNumber(buf, p)
89 tokens = append(tokens, token{
90 Type: tokenNumber,
91 Bytes: tokBuf,
92 Range: posRange(start, p),
93 })
94 case byteCanStartKeyword(first):
95 var tokBuf []byte
96 tokBuf, buf, p = scanKeyword(buf, p)
97 tokens = append(tokens, token{
98 Type: tokenKeyword,
99 Bytes: tokBuf,
100 Range: posRange(start, p),
101 })
102 default:
103 tokens = append(tokens, token{
104 Type: tokenInvalid,
105 Bytes: buf[:1],
106 Range: start.Range(1, 1),
107 })
108 // If we've encountered an invalid then we might as well stop
109 // scanning since the parser won't proceed beyond this point.
110 return tokens
111 }
112 }
113 }
114
115 func byteCanStartNumber(b byte) bool {
116 switch b {
117 // We are slightly more tolerant than JSON requires here since we
118 // expect the parser will make a stricter interpretation of the
119 // number bytes, but we specifically don't allow 'e' or 'E' here
120 // since we want the scanner to treat that as the start of an
121 // invalid keyword instead, to produce more intelligible error messages.
122 case '-', '+', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
123 return true
124 default:
125 return false
126 }
127 }
128
129 func scanNumber(buf []byte, start pos) ([]byte, []byte, pos) {
130 // The scanner doesn't check that the sequence of digit-ish bytes is
131 // in a valid order. The parser must do this when decoding a number
132 // token.
133 var i int
134 p := start
135 Byte:
136 for i = 0; i < len(buf); i++ {
137 switch buf[i] {
138 case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
139 p.Pos.Byte++
140 p.Pos.Column++
141 default:
142 break Byte
143 }
144 }
145 return buf[:i], buf[i:], p
146 }
147
148 func byteCanStartKeyword(b byte) bool {
149 switch {
150 // We allow any sequence of alphabetical characters here, even though
151 // JSON is more constrained, so that we can collect what we presume
152 // the user intended to be a single keyword and then check its validity
153 // in the parser, where we can generate better diagnostics.
154 // So e.g. we want to be able to say:
155 // unrecognized keyword "True". Did you mean "true"?
156 case isAlphabetical(b):
157 return true
158 default:
159 return false
160 }
161 }
162
163 func scanKeyword(buf []byte, start pos) ([]byte, []byte, pos) {
164 var i int
165 p := start
166 Byte:
167 for i = 0; i < len(buf); i++ {
168 b := buf[i]
169 switch {
170 case isAlphabetical(b) || b == '_':
171 p.Pos.Byte++
172 p.Pos.Column++
173 default:
174 break Byte
175 }
176 }
177 return buf[:i], buf[i:], p
178 }
179
180 func scanString(buf []byte, start pos) ([]byte, []byte, pos) {
181 // The scanner doesn't validate correct use of escapes, etc. It pays
182 // attention to escapes only for the purpose of identifying the closing
183 // quote character. It's the parser's responsibility to do proper
184 // validation.
185 //
186 // The scanner also doesn't specifically detect unterminated string
187 // literals, though they can be identified in the parser by checking if
188 // the final byte in a string token is the double-quote character.
189
190 // Skip the opening quote symbol
191 i := 1
192 p := start
193 p.Pos.Byte++
194 p.Pos.Column++
195 escaping := false
196 Byte:
197 for i < len(buf) {
198 b := buf[i]
199
200 switch {
201 case b == '\\':
202 escaping = !escaping
203 p.Pos.Byte++
204 p.Pos.Column++
205 i++
206 case b == '"':
207 p.Pos.Byte++
208 p.Pos.Column++
209 i++
210 if !escaping {
211 break Byte
212 }
213 escaping = false
214 case b < 32:
215 break Byte
216 default:
217 // Advance by one grapheme cluster, so that we consider each
218 // grapheme to be a "column".
219 // Ignoring error because this scanner cannot produce errors.
220 advance, _, _ := textseg.ScanGraphemeClusters(buf[i:], true)
221
222 p.Pos.Byte += advance
223 p.Pos.Column++
224 i += advance
225
226 escaping = false
227 }
228 }
229 return buf[:i], buf[i:], p
230 }
231
232 func skipWhitespace(buf []byte, start pos) ([]byte, pos) {
233 var i int
234 p := start
235 Byte:
236 for i = 0; i < len(buf); i++ {
237 switch buf[i] {
238 case ' ':
239 p.Pos.Byte++
240 p.Pos.Column++
241 case '\n':
242 p.Pos.Byte++
243 p.Pos.Column = 1
244 p.Pos.Line++
245 case '\r':
246 // For the purpose of line/column counting we consider a
247 // carriage return to take up no space, assuming that it will
248 // be paired up with a newline (on Windows, for example) that
249 // will account for both of them.
250 p.Pos.Byte++
251 case '\t':
252 // We arbitrarily count a tab as if it were two spaces, because
253 // we need to choose _some_ number here. This means any system
254 // that renders code on-screen with markers must itself treat
255 // tabs as a pair of spaces for rendering purposes, or instead
256 // use the byte offset and back into its own column position.
257 p.Pos.Byte++
258 p.Pos.Column += 2
259 default:
260 break Byte
261 }
262 }
263 return buf[i:], p
264 }
265
266 type pos struct {
267 Filename string
268 Pos hcl.Pos
269 }
270
271 func (p *pos) Range(byteLen, charLen int) hcl.Range {
272 start := p.Pos
273 end := p.Pos
274 end.Byte += byteLen
275 end.Column += charLen
276 return hcl.Range{
277 Filename: p.Filename,
278 Start: start,
279 End: end,
280 }
281 }
282
283 func posRange(start, end pos) hcl.Range {
284 return hcl.Range{
285 Filename: start.Filename,
286 Start: start.Pos,
287 End: end.Pos,
288 }
289 }
290
291 func (t token) GoString() string {
292 return fmt.Sprintf("json.token{json.%s, []byte(%q), %#v}", t.Type, t.Bytes, t.Range)
293 }
294
295 func isAlphabetical(b byte) bool {
296 return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
297 }