[github/fretlink/terraform-provider-statuscake.git] / vendor / github.com / hashicorp / hcl2 / hcl / json / scanner.go

package json

import (
	"fmt"

	"github.com/apparentlymart/go-textseg/textseg"
	"github.com/hashicorp/hcl2/hcl"
)

//go:generate stringer -type tokenType scanner.go
type tokenType rune

const (
	tokenBraceO  tokenType = '{'
	tokenBraceC  tokenType = '}'
	tokenBrackO  tokenType = '['
	tokenBrackC  tokenType = ']'
	tokenComma   tokenType = ','
	tokenColon   tokenType = ':'
	tokenKeyword tokenType = 'K'
	tokenString  tokenType = 'S'
	tokenNumber  tokenType = 'N'
	tokenEOF     tokenType = '␄'
	tokenInvalid tokenType = 0
	tokenEquals  tokenType = '=' // used only for reminding the user of JSON syntax
)

type token struct {
	Type  tokenType
	Bytes []byte
	Range hcl.Range
}

// scan returns the primary tokens for the given JSON buffer in sequence.
//
// The responsibility of this pass is to just mark the slices of the buffer
// as being of various types. It is lax in how it interprets the multi-byte
// token types keyword, string and number, preferring to capture erroneous
// extra bytes that we presume the user intended to be part of the token
// so that we can generate more helpful diagnostics in the parser.
func scan(buf []byte, start pos) []token {
	var tokens []token
	p := start
	for {
		if len(buf) == 0 {
			tokens = append(tokens, token{
				Type:  tokenEOF,
				Bytes: nil,
				Range: posRange(p, p),
			})
			return tokens
		}

		buf, p = skipWhitespace(buf, p)

		if len(buf) == 0 {
			tokens = append(tokens, token{
				Type:  tokenEOF,
				Bytes: nil,
				Range: posRange(p, p),
			})
			return tokens
		}

		start = p

		first := buf[0]
		switch {
		case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':' || first == '=':
			p.Pos.Column++
			p.Pos.Byte++
			tokens = append(tokens, token{
				Type:  tokenType(first),
				Bytes: buf[0:1],
				Range: posRange(start, p),
			})
			buf = buf[1:]
		case first == '"':
			var tokBuf []byte
			tokBuf, buf, p = scanString(buf, p)
			tokens = append(tokens, token{
				Type:  tokenString,
				Bytes: tokBuf,
				Range: posRange(start, p),
			})
		case byteCanStartNumber(first):
			var tokBuf []byte
			tokBuf, buf, p = scanNumber(buf, p)
			tokens = append(tokens, token{
				Type:  tokenNumber,
				Bytes: tokBuf,
				Range: posRange(start, p),
			})
		case byteCanStartKeyword(first):
			var tokBuf []byte
			tokBuf, buf, p = scanKeyword(buf, p)
			tokens = append(tokens, token{
				Type:  tokenKeyword,
				Bytes: tokBuf,
				Range: posRange(start, p),
			})
		default:
			tokens = append(tokens, token{
				Type:  tokenInvalid,
				Bytes: buf[:1],
				Range: start.Range(1, 1),
			})
			// If we've encountered an invalid then we might as well stop
			// scanning since the parser won't proceed beyond this point.
			return tokens
		}
	}
}

func byteCanStartNumber(b byte) bool {
	switch b {
	// We are slightly more tolerant than JSON requires here since we
	// expect the parser will make a stricter interpretation of the
	// number bytes, but we specifically don't allow 'e' or 'E' here
	// since we want the scanner to treat that as the start of an
	// invalid keyword instead, to produce more intelligible error messages.
	case '-', '+', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
		return true
	default:
		return false
	}
}

func scanNumber(buf []byte, start pos) ([]byte, []byte, pos) {
	// The scanner doesn't check that the sequence of digit-ish bytes is
	// in a valid order. The parser must do this when decoding a number
	// token.
	var i int
	p := start
Byte:
	for i = 0; i < len(buf); i++ {
		switch buf[i] {
		case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			p.Pos.Byte++
			p.Pos.Column++
		default:
			break Byte
		}
	}
	return buf[:i], buf[i:], p
}

func byteCanStartKeyword(b byte) bool {
	switch {
	// We allow any sequence of alphabetical characters here, even though
	// JSON is more constrained, so that we can collect what we presume
	// the user intended to be a single keyword and then check its validity
	// in the parser, where we can generate better diagnostics.
	// So e.g. we want to be able to say:
	//   unrecognized keyword "True". Did you mean "true"?
	case isAlphabetical(b):
		return true
	default:
		return false
	}
}

func scanKeyword(buf []byte, start pos) ([]byte, []byte, pos) {
	var i int
	p := start
Byte:
	for i = 0; i < len(buf); i++ {
		b := buf[i]
		switch {
		case isAlphabetical(b) || b == '_':
			p.Pos.Byte++
			p.Pos.Column++
		default:
			break Byte
		}
	}
	return buf[:i], buf[i:], p
}

func scanString(buf []byte, start pos) ([]byte, []byte, pos) {
	// The scanner doesn't validate correct use of escapes, etc. It pays
	// attention to escapes only for the purpose of identifying the closing
	// quote character. It's the parser's responsibility to do proper
	// validation.
	//
	// The scanner also doesn't specifically detect unterminated string
	// literals, though they can be identified in the parser by checking if
	// the final byte in a string token is the double-quote character.

	// Skip the opening quote symbol
	i := 1
	p := start
	p.Pos.Byte++
	p.Pos.Column++
	escaping := false
Byte:
	for i < len(buf) {
		b := buf[i]

		switch {
		case b == '\\':
			escaping = !escaping
			p.Pos.Byte++
			p.Pos.Column++
			i++
		case b == '"':
			p.Pos.Byte++
			p.Pos.Column++
			i++
			if !escaping {
				break Byte
			}
			escaping = false
		case b < 32:
			break Byte
		default:
			// Advance by one grapheme cluster, so that we consider each
			// grapheme to be a "column".
			// Ignoring error because this scanner cannot produce errors.
			advance, _, _ := textseg.ScanGraphemeClusters(buf[i:], true)

			p.Pos.Byte += advance
			p.Pos.Column++
			i += advance

			escaping = false
		}
	}
	return buf[:i], buf[i:], p
}

func skipWhitespace(buf []byte, start pos) ([]byte, pos) {
	var i int
	p := start
Byte:
	for i = 0; i < len(buf); i++ {
		switch buf[i] {
		case ' ':
			p.Pos.Byte++
			p.Pos.Column++
		case '\n':
			p.Pos.Byte++
			p.Pos.Column = 1
			p.Pos.Line++
		case '\r':
			// For the purpose of line/column counting we consider a
			// carriage return to take up no space, assuming that it will
			// be paired up with a newline (on Windows, for example) that
			// will account for both of them.
			p.Pos.Byte++
		case '\t':
			// We arbitrarily count a tab as if it were two spaces, because
			// we need to choose _some_ number here. This means any system
			// that renders code on-screen with markers must itself treat
			// tabs as a pair of spaces for rendering purposes, or instead
			// use the byte offset and back into its own column position.
			p.Pos.Byte++
			p.Pos.Column += 2
		default:
			break Byte
		}
	}
	return buf[i:], p
}

type pos struct {
	Filename string
	Pos      hcl.Pos
}

func (p *pos) Range(byteLen, charLen int) hcl.Range {
	start := p.Pos
	end := p.Pos
	end.Byte += byteLen
	end.Column += charLen
	return hcl.Range{
		Filename: p.Filename,
		Start:    start,
		End:      end,
	}
}

func posRange(start, end pos) hcl.Range {
	return hcl.Range{
		Filename: start.Filename,
		Start:    start.Pos,
		End:      end.Pos,
	}
}

func (t token) GoString() string {
	return fmt.Sprintf("json.token{json.%s, []byte(%q), %#v}", t.Type, t.Bytes, t.Range)
}

func isAlphabetical(b byte) bool {
	return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
}
Commit	Line	Data
15c0b25d AP	1	package json
	2
	3	import (
	4	"fmt"
	5
	6	"github.com/apparentlymart/go-textseg/textseg"
	7	"github.com/hashicorp/hcl2/hcl"
	8	)
	9
	10	//go:generate stringer -type tokenType scanner.go
	11	type tokenType rune
	12
	13	const (
	14	tokenBraceO tokenType = '{'
	15	tokenBraceC tokenType = '}'
	16	tokenBrackO tokenType = '['
	17	tokenBrackC tokenType = ']'
	18	tokenComma tokenType = ','
	19	tokenColon tokenType = ':'
	20	tokenKeyword tokenType = 'K'
	21	tokenString tokenType = 'S'
	22	tokenNumber tokenType = 'N'
	23	tokenEOF tokenType = '␄'
	24	tokenInvalid tokenType = 0
	25	tokenEquals tokenType = '=' // used only for reminding the user of JSON syntax
	26	)
	27
	28	type token struct {
	29	Type tokenType
	30	Bytes []byte
	31	Range hcl.Range
	32	}
	33
	34	// scan returns the primary tokens for the given JSON buffer in sequence.
	35	//
	36	// The responsibility of this pass is to just mark the slices of the buffer
	37	// as being of various types. It is lax in how it interprets the multi-byte
	38	// token types keyword, string and number, preferring to capture erroneous
	39	// extra bytes that we presume the user intended to be part of the token
	40	// so that we can generate more helpful diagnostics in the parser.
	41	func scan(buf []byte, start pos) []token {
	42	var tokens []token
	43	p := start
	44	for {
	45	if len(buf) == 0 {
	46	tokens = append(tokens, token{
	47	Type: tokenEOF,
	48	Bytes: nil,
	49	Range: posRange(p, p),
	50	})
	51	return tokens
	52	}
	53
	54	buf, p = skipWhitespace(buf, p)
	55
	56	if len(buf) == 0 {
	57	tokens = append(tokens, token{
	58	Type: tokenEOF,
	59	Bytes: nil,
	60	Range: posRange(p, p),
	61	})
	62	return tokens
	63	}
	64
65	start = p
66
67	first := buf[0]
68	switch {
69	case first == '{' \|\| first == '}' \|\| first == '[' \|\| first == ']' \|\| first == ',' \|\| first == ':' \|\| first == '=':
70	p.Pos.Column++
71	p.Pos.Byte++
72	tokens = append(tokens, token{
73	Type: tokenType(first),
74	Bytes: buf[0:1],
75	Range: posRange(start, p),
76	})
77	buf = buf[1:]
78	case first == '"':
79	var tokBuf []byte
80	tokBuf, buf, p = scanString(buf, p)
81	tokens = append(tokens, token{
82	Type: tokenString,
83	Bytes: tokBuf,
84	Range: posRange(start, p),
85	})
86	case byteCanStartNumber(first):
87	var tokBuf []byte
88	tokBuf, buf, p = scanNumber(buf, p)
89	tokens = append(tokens, token{
90	Type: tokenNumber,
91	Bytes: tokBuf,
92	Range: posRange(start, p),
93	})
94	case byteCanStartKeyword(first):
95	var tokBuf []byte
96	tokBuf, buf, p = scanKeyword(buf, p)
97	tokens = append(tokens, token{
98	Type: tokenKeyword,
99	Bytes: tokBuf,
100	Range: posRange(start, p),
101	})
102	default:
103	tokens = append(tokens, token{
104	Type: tokenInvalid,
105	Bytes: buf[:1],
106	Range: start.Range(1, 1),
107	})
108	// If we've encountered an invalid then we might as well stop
109	// scanning since the parser won't proceed beyond this point.
110	return tokens
111	}
112	}
113	}
114
115	func byteCanStartNumber(b byte) bool {
116	switch b {
117	// We are slightly more tolerant than JSON requires here since we
118	// expect the parser will make a stricter interpretation of the
119	// number bytes, but we specifically don't allow 'e' or 'E' here
120	// since we want the scanner to treat that as the start of an
121	// invalid keyword instead, to produce more intelligible error messages.
122	case '-', '+', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
123	return true
124	default:
125	return false
126	}
127	}
128
129	func scanNumber(buf []byte, start pos) ([]byte, []byte, pos) {
130	// The scanner doesn't check that the sequence of digit-ish bytes is
131	// in a valid order. The parser must do this when decoding a number
132	// token.
133	var i int
134	p := start
135	Byte:
136	for i = 0; i < len(buf); i++ {
137	switch buf[i] {
138	case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
139	p.Pos.Byte++
140	p.Pos.Column++
141	default:
142	break Byte
143	}
144	}
145	return buf[:i], buf[i:], p
146	}
147
148	func byteCanStartKeyword(b byte) bool {
149	switch {
150	// We allow any sequence of alphabetical characters here, even though
151	// JSON is more constrained, so that we can collect what we presume
152	// the user intended to be a single keyword and then check its validity
153	// in the parser, where we can generate better diagnostics.
154	// So e.g. we want to be able to say:
155	// unrecognized keyword "True". Did you mean "true"?
107c1cdb	156	case isAlphabetical(b):
15c0b25d AP	157	return true
	158	default:
	159	return false
	160	}
	161	}
	162
	163	func scanKeyword(buf []byte, start pos) ([]byte, []byte, pos) {
	164	var i int
	165	p := start
	166	Byte:
	167	for i = 0; i < len(buf); i++ {
	168	b := buf[i]
	169	switch {
107c1cdb	170	case isAlphabetical(b) \|\| b == '_':
15c0b25d AP	171	p.Pos.Byte++
	172	p.Pos.Column++
	173	default:
	174	break Byte
	175	}
	176	}
	177	return buf[:i], buf[i:], p
	178	}
	179
	180	func scanString(buf []byte, start pos) ([]byte, []byte, pos) {
	181	// The scanner doesn't validate correct use of escapes, etc. It pays
	182	// attention to escapes only for the purpose of identifying the closing
	183	// quote character. It's the parser's responsibility to do proper
	184	// validation.
	185	//
	186	// The scanner also doesn't specifically detect unterminated string
	187	// literals, though they can be identified in the parser by checking if
	188	// the final byte in a string token is the double-quote character.
	189
	190	// Skip the opening quote symbol
	191	i := 1
	192	p := start
	193	p.Pos.Byte++
	194	p.Pos.Column++
	195	escaping := false
	196	Byte:
	197	for i < len(buf) {
	198	b := buf[i]
	199
	200	switch {
	201	case b == '\\':
	202	escaping = !escaping
	203	p.Pos.Byte++
	204	p.Pos.Column++
	205	i++
	206	case b == '"':
	207	p.Pos.Byte++
	208	p.Pos.Column++
	209	i++
	210	if !escaping {
	211	break Byte
	212	}
	213	escaping = false
	214	case b < 32:
	215	break Byte
	216	default:
	217	// Advance by one grapheme cluster, so that we consider each
	218	// grapheme to be a "column".
	219	// Ignoring error because this scanner cannot produce errors.
	220	advance, _, _ := textseg.ScanGraphemeClusters(buf[i:], true)
	221
	222	p.Pos.Byte += advance
	223	p.Pos.Column++
	224	i += advance
	225
	226	escaping = false
	227	}
	228	}
	229	return buf[:i], buf[i:], p
	230	}
	231
	232	func skipWhitespace(buf []byte, start pos) ([]byte, pos) {
	233	var i int
	234	p := start
235	Byte:
236	for i = 0; i < len(buf); i++ {
237	switch buf[i] {
238	case ' ':
239	p.Pos.Byte++
240	p.Pos.Column++
241	case '\n':
242	p.Pos.Byte++
243	p.Pos.Column = 1
244	p.Pos.Line++
245	case '\r':
246	// For the purpose of line/column counting we consider a
247	// carriage return to take up no space, assuming that it will
248	// be paired up with a newline (on Windows, for example) that
249	// will account for both of them.
250	p.Pos.Byte++
251	case '\t':
252	// We arbitrarily count a tab as if it were two spaces, because
253	// we need to choose _some_ number here. This means any system
254	// that renders code on-screen with markers must itself treat
255	// tabs as a pair of spaces for rendering purposes, or instead
256	// use the byte offset and back into its own column position.
257	p.Pos.Byte++
258	p.Pos.Column += 2
259	default:
260	break Byte
261	}
262	}
263	return buf[i:], p
264	}
265
266	type pos struct {
267	Filename string
268	Pos hcl.Pos
269	}
270
271	func (p *pos) Range(byteLen, charLen int) hcl.Range {
272	start := p.Pos
273	end := p.Pos
274	end.Byte += byteLen
275	end.Column += charLen
276	return hcl.Range{
277	Filename: p.Filename,
278	Start: start,
279	End: end,
280	}
281	}
282
283	func posRange(start, end pos) hcl.Range {
284	return hcl.Range{
285	Filename: start.Filename,
286	Start: start.Pos,
287	End: end.Pos,
288	}
289	}
290
291	func (t token) GoString() string {
292	return fmt.Sprintf("json.token{json.%s, []byte(%q), %#v}", t.Type, t.Bytes, t.Range)
293	}
107c1cdb ND	294
	295	func isAlphabetical(b byte) bool {
	296	return (b >= 'a' && b <= 'z') \|\| (b >= 'A' && b <= 'Z')
	297	}