aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go')
-rw-r--r--vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go148
1 files changed, 148 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go b/vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go
new file mode 100644
index 0000000..7c8f2df
--- /dev/null
+++ b/vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go
@@ -0,0 +1,148 @@
1package hcl
2
3import (
4 "bufio"
5 "bytes"
6
7 "github.com/apparentlymart/go-textseg/textseg"
8)
9
10// RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc
11// and visit a source range for each token matched.
12//
13// For example, this can be used with bufio.ScanLines to find the source range
14// for each line in the file, skipping over the actual newline characters, which
15// may be useful when printing source code snippets as part of diagnostic
16// messages.
17//
18// The line and column information in the returned ranges is produced by
19// counting newline characters and grapheme clusters respectively, which
20// mimics the behavior we expect from a parser when producing ranges.
21type RangeScanner struct {
22 filename string
23 b []byte
24 cb bufio.SplitFunc
25
26 pos Pos // position of next byte to process in b
27 cur Range // latest range
28 tok []byte // slice of b that is covered by cur
29 err error // error from last scan, if any
30}
31
32// Create a new RangeScanner for the given buffer, producing ranges for the
33// given filename.
34//
35// Since ranges have grapheme-cluster granularity rather than byte granularity,
36// the scanner will produce incorrect results if the given SplitFunc creates
37// tokens between grapheme cluster boundaries. In particular, it is incorrect
38// to use RangeScanner with bufio.ScanRunes because it will produce tokens
39// around individual UTF-8 sequences, which will split any multi-sequence
40// grapheme clusters.
41func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner {
42 return &RangeScanner{
43 filename: filename,
44 b: b,
45 cb: cb,
46 pos: Pos{
47 Byte: 0,
48 Line: 1,
49 Column: 1,
50 },
51 }
52}
53
54func (sc *RangeScanner) Scan() bool {
55 if sc.pos.Byte >= len(sc.b) || sc.err != nil {
56 // All done
57 return false
58 }
59
60 // Since we're operating on an in-memory buffer, we always pass the whole
61 // remainder of the buffer to our SplitFunc and set isEOF to let it know
62 // that it has the whole thing.
63 advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true)
64
65 // Since we are setting isEOF to true this should never happen, but
66 // if it does we will just abort and assume the SplitFunc is misbehaving.
67 if advance == 0 && token == nil && err == nil {
68 return false
69 }
70
71 if err != nil {
72 sc.err = err
73 sc.cur = Range{
74 Filename: sc.filename,
75 Start: sc.pos,
76 End: sc.pos,
77 }
78 sc.tok = nil
79 return false
80 }
81
82 sc.tok = token
83 start := sc.pos
84 end := sc.pos
85 new := sc.pos
86
87 // adv is similar to token but it also includes any subsequent characters
88 // we're being asked to skip over by the SplitFunc.
89 // adv is a slice covering any additional bytes we are skipping over, based
90 // on what the SplitFunc told us to do with advance.
91 adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance]
92
93 // We now need to scan over our token to count the grapheme clusters
94 // so we can correctly advance Column, and count the newlines so we
95 // can correctly advance Line.
96 advR := bytes.NewReader(adv)
97 gsc := bufio.NewScanner(advR)
98 advanced := 0
99 gsc.Split(textseg.ScanGraphemeClusters)
100 for gsc.Scan() {
101 gr := gsc.Bytes()
102 new.Byte += len(gr)
103 new.Column++
104
105 // We rely here on the fact that \r\n is considered a grapheme cluster
106 // and so we don't need to worry about miscounting additional lines
107 // on files with Windows-style line endings.
108 if len(gr) != 0 && (gr[0] == '\r' || gr[0] == '\n') {
109 new.Column = 1
110 new.Line++
111 }
112
113 if advanced < len(token) {
114 // If we've not yet found the end of our token then we'll
115 // also push our "end" marker along.
116 // (if advance > len(token) then we'll stop moving "end" early
117 // so that the caller only sees the range covered by token.)
118 end = new
119 }
120 advanced += len(gr)
121 }
122
123 sc.cur = Range{
124 Filename: sc.filename,
125 Start: start,
126 End: end,
127 }
128 sc.pos = new
129 return true
130}
131
132// Range returns a range that covers the latest token obtained after a call
133// to Scan returns true.
134func (sc *RangeScanner) Range() Range {
135 return sc.cur
136}
137
138// Bytes returns the slice of the input buffer that is covered by the range
139// that would be returned by Range.
140func (sc *RangeScanner) Bytes() []byte {
141 return sc.tok
142}
143
144// Err can be called after Scan returns false to determine if the latest read
145// resulted in an error, and obtain that error if so.
146func (sc *RangeScanner) Err() error {
147 return sc.err
148}