diff options
Diffstat (limited to 'vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go')
-rw-r--r-- | vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go b/vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go new file mode 100644 index 0000000..7c8f2df --- /dev/null +++ b/vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go | |||
@@ -0,0 +1,148 @@ | |||
1 | package hcl | ||
2 | |||
3 | import ( | ||
4 | "bufio" | ||
5 | "bytes" | ||
6 | |||
7 | "github.com/apparentlymart/go-textseg/textseg" | ||
8 | ) | ||
9 | |||
10 | // RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc | ||
11 | // and visit a source range for each token matched. | ||
12 | // | ||
13 | // For example, this can be used with bufio.ScanLines to find the source range | ||
14 | // for each line in the file, skipping over the actual newline characters, which | ||
15 | // may be useful when printing source code snippets as part of diagnostic | ||
16 | // messages. | ||
17 | // | ||
18 | // The line and column information in the returned ranges is produced by | ||
19 | // counting newline characters and grapheme clusters respectively, which | ||
20 | // mimics the behavior we expect from a parser when producing ranges. | ||
21 | type RangeScanner struct { | ||
22 | filename string | ||
23 | b []byte | ||
24 | cb bufio.SplitFunc | ||
25 | |||
26 | pos Pos // position of next byte to process in b | ||
27 | cur Range // latest range | ||
28 | tok []byte // slice of b that is covered by cur | ||
29 | err error // error from last scan, if any | ||
30 | } | ||
31 | |||
32 | // Create a new RangeScanner for the given buffer, producing ranges for the | ||
33 | // given filename. | ||
34 | // | ||
35 | // Since ranges have grapheme-cluster granularity rather than byte granularity, | ||
36 | // the scanner will produce incorrect results if the given SplitFunc creates | ||
37 | // tokens between grapheme cluster boundaries. In particular, it is incorrect | ||
38 | // to use RangeScanner with bufio.ScanRunes because it will produce tokens | ||
39 | // around individual UTF-8 sequences, which will split any multi-sequence | ||
40 | // grapheme clusters. | ||
41 | func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner { | ||
42 | return &RangeScanner{ | ||
43 | filename: filename, | ||
44 | b: b, | ||
45 | cb: cb, | ||
46 | pos: Pos{ | ||
47 | Byte: 0, | ||
48 | Line: 1, | ||
49 | Column: 1, | ||
50 | }, | ||
51 | } | ||
52 | } | ||
53 | |||
54 | func (sc *RangeScanner) Scan() bool { | ||
55 | if sc.pos.Byte >= len(sc.b) || sc.err != nil { | ||
56 | // All done | ||
57 | return false | ||
58 | } | ||
59 | |||
60 | // Since we're operating on an in-memory buffer, we always pass the whole | ||
61 | // remainder of the buffer to our SplitFunc and set isEOF to let it know | ||
62 | // that it has the whole thing. | ||
63 | advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true) | ||
64 | |||
65 | // Since we are setting isEOF to true this should never happen, but | ||
66 | // if it does we will just abort and assume the SplitFunc is misbehaving. | ||
67 | if advance == 0 && token == nil && err == nil { | ||
68 | return false | ||
69 | } | ||
70 | |||
71 | if err != nil { | ||
72 | sc.err = err | ||
73 | sc.cur = Range{ | ||
74 | Filename: sc.filename, | ||
75 | Start: sc.pos, | ||
76 | End: sc.pos, | ||
77 | } | ||
78 | sc.tok = nil | ||
79 | return false | ||
80 | } | ||
81 | |||
82 | sc.tok = token | ||
83 | start := sc.pos | ||
84 | end := sc.pos | ||
85 | new := sc.pos | ||
86 | |||
87 | // adv is similar to token but it also includes any subsequent characters | ||
88 | // we're being asked to skip over by the SplitFunc. | ||
89 | // adv is a slice covering any additional bytes we are skipping over, based | ||
90 | // on what the SplitFunc told us to do with advance. | ||
91 | adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance] | ||
92 | |||
93 | // We now need to scan over our token to count the grapheme clusters | ||
94 | // so we can correctly advance Column, and count the newlines so we | ||
95 | // can correctly advance Line. | ||
96 | advR := bytes.NewReader(adv) | ||
97 | gsc := bufio.NewScanner(advR) | ||
98 | advanced := 0 | ||
99 | gsc.Split(textseg.ScanGraphemeClusters) | ||
100 | for gsc.Scan() { | ||
101 | gr := gsc.Bytes() | ||
102 | new.Byte += len(gr) | ||
103 | new.Column++ | ||
104 | |||
105 | // We rely here on the fact that \r\n is considered a grapheme cluster | ||
106 | // and so we don't need to worry about miscounting additional lines | ||
107 | // on files with Windows-style line endings. | ||
108 | if len(gr) != 0 && (gr[0] == '\r' || gr[0] == '\n') { | ||
109 | new.Column = 1 | ||
110 | new.Line++ | ||
111 | } | ||
112 | |||
113 | if advanced < len(token) { | ||
114 | // If we've not yet found the end of our token then we'll | ||
115 | // also push our "end" marker along. | ||
116 | // (if advance > len(token) then we'll stop moving "end" early | ||
117 | // so that the caller only sees the range covered by token.) | ||
118 | end = new | ||
119 | } | ||
120 | advanced += len(gr) | ||
121 | } | ||
122 | |||
123 | sc.cur = Range{ | ||
124 | Filename: sc.filename, | ||
125 | Start: start, | ||
126 | End: end, | ||
127 | } | ||
128 | sc.pos = new | ||
129 | return true | ||
130 | } | ||
131 | |||
132 | // Range returns a range that covers the latest token obtained after a call | ||
133 | // to Scan returns true. | ||
134 | func (sc *RangeScanner) Range() Range { | ||
135 | return sc.cur | ||
136 | } | ||
137 | |||
138 | // Bytes returns the slice of the input buffer that is covered by the range | ||
139 | // that would be returned by Range. | ||
140 | func (sc *RangeScanner) Bytes() []byte { | ||
141 | return sc.tok | ||
142 | } | ||
143 | |||
144 | // Err can be called after Scan returns false to determine if the latest read | ||
145 | // resulted in an error, and obtain that error if so. | ||
146 | func (sc *RangeScanner) Err() error { | ||
147 | return sc.err | ||
148 | } | ||