aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/jmespath/go-jmespath/lexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/jmespath/go-jmespath/lexer.go')
-rw-r--r--vendor/github.com/jmespath/go-jmespath/lexer.go420
1 files changed, 420 insertions, 0 deletions
diff --git a/vendor/github.com/jmespath/go-jmespath/lexer.go b/vendor/github.com/jmespath/go-jmespath/lexer.go
new file mode 100644
index 0000000..817900c
--- /dev/null
+++ b/vendor/github.com/jmespath/go-jmespath/lexer.go
@@ -0,0 +1,420 @@
1package jmespath
2
3import (
4 "bytes"
5 "encoding/json"
6 "fmt"
7 "strconv"
8 "strings"
9 "unicode/utf8"
10)
11
12type token struct {
13 tokenType tokType
14 value string
15 position int
16 length int
17}
18
19type tokType int
20
21const eof = -1
22
23// Lexer contains information about the expression being tokenized.
24type Lexer struct {
25 expression string // The expression provided by the user.
26 currentPos int // The current position in the string.
27 lastWidth int // The width of the current rune. This
28 buf bytes.Buffer // Internal buffer used for building up values.
29}
30
31// SyntaxError is the main error used whenever a lexing or parsing error occurs.
32type SyntaxError struct {
33 msg string // Error message displayed to user
34 Expression string // Expression that generated a SyntaxError
35 Offset int // The location in the string where the error occurred
36}
37
38func (e SyntaxError) Error() string {
39 // In the future, it would be good to underline the specific
40 // location where the error occurred.
41 return "SyntaxError: " + e.msg
42}
43
44// HighlightLocation will show where the syntax error occurred.
45// It will place a "^" character on a line below the expression
46// at the point where the syntax error occurred.
47func (e SyntaxError) HighlightLocation() string {
48 return e.Expression + "\n" + strings.Repeat(" ", e.Offset) + "^"
49}
50
51//go:generate stringer -type=tokType
52const (
53 tUnknown tokType = iota
54 tStar
55 tDot
56 tFilter
57 tFlatten
58 tLparen
59 tRparen
60 tLbracket
61 tRbracket
62 tLbrace
63 tRbrace
64 tOr
65 tPipe
66 tNumber
67 tUnquotedIdentifier
68 tQuotedIdentifier
69 tComma
70 tColon
71 tLT
72 tLTE
73 tGT
74 tGTE
75 tEQ
76 tNE
77 tJSONLiteral
78 tStringLiteral
79 tCurrent
80 tExpref
81 tAnd
82 tNot
83 tEOF
84)
85
86var basicTokens = map[rune]tokType{
87 '.': tDot,
88 '*': tStar,
89 ',': tComma,
90 ':': tColon,
91 '{': tLbrace,
92 '}': tRbrace,
93 ']': tRbracket, // tLbracket not included because it could be "[]"
94 '(': tLparen,
95 ')': tRparen,
96 '@': tCurrent,
97}
98
99// Bit mask for [a-zA-Z_] shifted down 64 bits to fit in a single uint64.
100// When using this bitmask just be sure to shift the rune down 64 bits
101// before checking against identifierStartBits.
102const identifierStartBits uint64 = 576460745995190270
103
104// Bit mask for [a-zA-Z0-9], 128 bits -> 2 uint64s.
105var identifierTrailingBits = [2]uint64{287948901175001088, 576460745995190270}
106
107var whiteSpace = map[rune]bool{
108 ' ': true, '\t': true, '\n': true, '\r': true,
109}
110
111func (t token) String() string {
112 return fmt.Sprintf("Token{%+v, %s, %d, %d}",
113 t.tokenType, t.value, t.position, t.length)
114}
115
116// NewLexer creates a new JMESPath lexer.
117func NewLexer() *Lexer {
118 lexer := Lexer{}
119 return &lexer
120}
121
122func (lexer *Lexer) next() rune {
123 if lexer.currentPos >= len(lexer.expression) {
124 lexer.lastWidth = 0
125 return eof
126 }
127 r, w := utf8.DecodeRuneInString(lexer.expression[lexer.currentPos:])
128 lexer.lastWidth = w
129 lexer.currentPos += w
130 return r
131}
132
133func (lexer *Lexer) back() {
134 lexer.currentPos -= lexer.lastWidth
135}
136
137func (lexer *Lexer) peek() rune {
138 t := lexer.next()
139 lexer.back()
140 return t
141}
142
143// tokenize takes an expression and returns corresponding tokens.
144func (lexer *Lexer) tokenize(expression string) ([]token, error) {
145 var tokens []token
146 lexer.expression = expression
147 lexer.currentPos = 0
148 lexer.lastWidth = 0
149loop:
150 for {
151 r := lexer.next()
152 if identifierStartBits&(1<<(uint64(r)-64)) > 0 {
153 t := lexer.consumeUnquotedIdentifier()
154 tokens = append(tokens, t)
155 } else if val, ok := basicTokens[r]; ok {
156 // Basic single char token.
157 t := token{
158 tokenType: val,
159 value: string(r),
160 position: lexer.currentPos - lexer.lastWidth,
161 length: 1,
162 }
163 tokens = append(tokens, t)
164 } else if r == '-' || (r >= '0' && r <= '9') {
165 t := lexer.consumeNumber()
166 tokens = append(tokens, t)
167 } else if r == '[' {
168 t := lexer.consumeLBracket()
169 tokens = append(tokens, t)
170 } else if r == '"' {
171 t, err := lexer.consumeQuotedIdentifier()
172 if err != nil {
173 return tokens, err
174 }
175 tokens = append(tokens, t)
176 } else if r == '\'' {
177 t, err := lexer.consumeRawStringLiteral()
178 if err != nil {
179 return tokens, err
180 }
181 tokens = append(tokens, t)
182 } else if r == '`' {
183 t, err := lexer.consumeLiteral()
184 if err != nil {
185 return tokens, err
186 }
187 tokens = append(tokens, t)
188 } else if r == '|' {
189 t := lexer.matchOrElse(r, '|', tOr, tPipe)
190 tokens = append(tokens, t)
191 } else if r == '<' {
192 t := lexer.matchOrElse(r, '=', tLTE, tLT)
193 tokens = append(tokens, t)
194 } else if r == '>' {
195 t := lexer.matchOrElse(r, '=', tGTE, tGT)
196 tokens = append(tokens, t)
197 } else if r == '!' {
198 t := lexer.matchOrElse(r, '=', tNE, tNot)
199 tokens = append(tokens, t)
200 } else if r == '=' {
201 t := lexer.matchOrElse(r, '=', tEQ, tUnknown)
202 tokens = append(tokens, t)
203 } else if r == '&' {
204 t := lexer.matchOrElse(r, '&', tAnd, tExpref)
205 tokens = append(tokens, t)
206 } else if r == eof {
207 break loop
208 } else if _, ok := whiteSpace[r]; ok {
209 // Ignore whitespace
210 } else {
211 return tokens, lexer.syntaxError(fmt.Sprintf("Unknown char: %s", strconv.QuoteRuneToASCII(r)))
212 }
213 }
214 tokens = append(tokens, token{tEOF, "", len(lexer.expression), 0})
215 return tokens, nil
216}
217
218// Consume characters until the ending rune "r" is reached.
219// If the end of the expression is reached before seeing the
220// terminating rune "r", then an error is returned.
221// If no error occurs then the matching substring is returned.
222// The returned string will not include the ending rune.
223func (lexer *Lexer) consumeUntil(end rune) (string, error) {
224 start := lexer.currentPos
225 current := lexer.next()
226 for current != end && current != eof {
227 if current == '\\' && lexer.peek() != eof {
228 lexer.next()
229 }
230 current = lexer.next()
231 }
232 if lexer.lastWidth == 0 {
233 // Then we hit an EOF so we never reached the closing
234 // delimiter.
235 return "", SyntaxError{
236 msg: "Unclosed delimiter: " + string(end),
237 Expression: lexer.expression,
238 Offset: len(lexer.expression),
239 }
240 }
241 return lexer.expression[start : lexer.currentPos-lexer.lastWidth], nil
242}
243
244func (lexer *Lexer) consumeLiteral() (token, error) {
245 start := lexer.currentPos
246 value, err := lexer.consumeUntil('`')
247 if err != nil {
248 return token{}, err
249 }
250 value = strings.Replace(value, "\\`", "`", -1)
251 return token{
252 tokenType: tJSONLiteral,
253 value: value,
254 position: start,
255 length: len(value),
256 }, nil
257}
258
259func (lexer *Lexer) consumeRawStringLiteral() (token, error) {
260 start := lexer.currentPos
261 currentIndex := start
262 current := lexer.next()
263 for current != '\'' && lexer.peek() != eof {
264 if current == '\\' && lexer.peek() == '\'' {
265 chunk := lexer.expression[currentIndex : lexer.currentPos-1]
266 lexer.buf.WriteString(chunk)
267 lexer.buf.WriteString("'")
268 lexer.next()
269 currentIndex = lexer.currentPos
270 }
271 current = lexer.next()
272 }
273 if lexer.lastWidth == 0 {
274 // Then we hit an EOF so we never reached the closing
275 // delimiter.
276 return token{}, SyntaxError{
277 msg: "Unclosed delimiter: '",
278 Expression: lexer.expression,
279 Offset: len(lexer.expression),
280 }
281 }
282 if currentIndex < lexer.currentPos {
283 lexer.buf.WriteString(lexer.expression[currentIndex : lexer.currentPos-1])
284 }
285 value := lexer.buf.String()
286 // Reset the buffer so it can reused again.
287 lexer.buf.Reset()
288 return token{
289 tokenType: tStringLiteral,
290 value: value,
291 position: start,
292 length: len(value),
293 }, nil
294}
295
296func (lexer *Lexer) syntaxError(msg string) SyntaxError {
297 return SyntaxError{
298 msg: msg,
299 Expression: lexer.expression,
300 Offset: lexer.currentPos - 1,
301 }
302}
303
304// Checks for a two char token, otherwise matches a single character
305// token. This is used whenever a two char token overlaps a single
306// char token, e.g. "||" -> tPipe, "|" -> tOr.
307func (lexer *Lexer) matchOrElse(first rune, second rune, matchedType tokType, singleCharType tokType) token {
308 start := lexer.currentPos - lexer.lastWidth
309 nextRune := lexer.next()
310 var t token
311 if nextRune == second {
312 t = token{
313 tokenType: matchedType,
314 value: string(first) + string(second),
315 position: start,
316 length: 2,
317 }
318 } else {
319 lexer.back()
320 t = token{
321 tokenType: singleCharType,
322 value: string(first),
323 position: start,
324 length: 1,
325 }
326 }
327 return t
328}
329
330func (lexer *Lexer) consumeLBracket() token {
331 // There's three options here:
332 // 1. A filter expression "[?"
333 // 2. A flatten operator "[]"
334 // 3. A bare rbracket "["
335 start := lexer.currentPos - lexer.lastWidth
336 nextRune := lexer.next()
337 var t token
338 if nextRune == '?' {
339 t = token{
340 tokenType: tFilter,
341 value: "[?",
342 position: start,
343 length: 2,
344 }
345 } else if nextRune == ']' {
346 t = token{
347 tokenType: tFlatten,
348 value: "[]",
349 position: start,
350 length: 2,
351 }
352 } else {
353 t = token{
354 tokenType: tLbracket,
355 value: "[",
356 position: start,
357 length: 1,
358 }
359 lexer.back()
360 }
361 return t
362}
363
364func (lexer *Lexer) consumeQuotedIdentifier() (token, error) {
365 start := lexer.currentPos
366 value, err := lexer.consumeUntil('"')
367 if err != nil {
368 return token{}, err
369 }
370 var decoded string
371 asJSON := []byte("\"" + value + "\"")
372 if err := json.Unmarshal([]byte(asJSON), &decoded); err != nil {
373 return token{}, err
374 }
375 return token{
376 tokenType: tQuotedIdentifier,
377 value: decoded,
378 position: start - 1,
379 length: len(decoded),
380 }, nil
381}
382
383func (lexer *Lexer) consumeUnquotedIdentifier() token {
384 // Consume runes until we reach the end of an unquoted
385 // identifier.
386 start := lexer.currentPos - lexer.lastWidth
387 for {
388 r := lexer.next()
389 if r < 0 || r > 128 || identifierTrailingBits[uint64(r)/64]&(1<<(uint64(r)%64)) == 0 {
390 lexer.back()
391 break
392 }
393 }
394 value := lexer.expression[start:lexer.currentPos]
395 return token{
396 tokenType: tUnquotedIdentifier,
397 value: value,
398 position: start,
399 length: lexer.currentPos - start,
400 }
401}
402
403func (lexer *Lexer) consumeNumber() token {
404 // Consume runes until we reach something that's not a number.
405 start := lexer.currentPos - lexer.lastWidth
406 for {
407 r := lexer.next()
408 if r < '0' || r > '9' {
409 lexer.back()
410 break
411 }
412 }
413 value := lexer.expression[start:lexer.currentPos]
414 return token{
415 tokenType: tNumber,
416 value: value,
417 position: start,
418 length: lexer.currentPos - start,
419 }
420}