7 "github.com/apparentlymart/go-textseg/textseg"
10 // RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc
11 // and visit a source range for each token matched.
13 // For example, this can be used with bufio.ScanLines to find the source range
14 // for each line in the file, skipping over the actual newline characters, which
15 // may be useful when printing source code snippets as part of diagnostic
18 // The line and column information in the returned ranges is produced by
19 // counting newline characters and grapheme clusters respectively, which
20 // mimics the behavior we expect from a parser when producing ranges.
21 type RangeScanner struct {
26 pos Pos // position of next byte to process in b
27 cur Range // latest range
28 tok []byte // slice of b that is covered by cur
29 err error // error from last scan, if any
32 // NewRangeScanner creates a new RangeScanner for the given buffer, producing
33 // ranges for the given filename.
35 // Since ranges have grapheme-cluster granularity rather than byte granularity,
36 // the scanner will produce incorrect results if the given SplitFunc creates
37 // tokens between grapheme cluster boundaries. In particular, it is incorrect
38 // to use RangeScanner with bufio.ScanRunes because it will produce tokens
39 // around individual UTF-8 sequences, which will split any multi-sequence
41 func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner {
42 return NewRangeScannerFragment(b, filename, InitialPos, cb)
45 // NewRangeScannerFragment is like NewRangeScanner but the ranges it produces
46 // will be offset by the given starting position, which is appropriate for
47 // sub-slices of a file, whereas NewRangeScanner assumes it is scanning an
49 func NewRangeScannerFragment(b []byte, filename string, start Pos, cb bufio.SplitFunc) *RangeScanner {
58 func (sc *RangeScanner) Scan() bool {
59 if sc.pos.Byte >= len(sc.b) || sc.err != nil {
64 // Since we're operating on an in-memory buffer, we always pass the whole
65 // remainder of the buffer to our SplitFunc and set isEOF to let it know
66 // that it has the whole thing.
67 advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true)
69 // Since we are setting isEOF to true this should never happen, but
70 // if it does we will just abort and assume the SplitFunc is misbehaving.
71 if advance == 0 && token == nil && err == nil {
78 Filename: sc.filename,
91 // adv is similar to token but it also includes any subsequent characters
92 // we're being asked to skip over by the SplitFunc.
93 // adv is a slice covering any additional bytes we are skipping over, based
94 // on what the SplitFunc told us to do with advance.
95 adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance]
97 // We now need to scan over our token to count the grapheme clusters
98 // so we can correctly advance Column, and count the newlines so we
99 // can correctly advance Line.
100 advR := bytes.NewReader(adv)
101 gsc := bufio.NewScanner(advR)
103 gsc.Split(textseg.ScanGraphemeClusters)
109 // We rely here on the fact that \r\n is considered a grapheme cluster
110 // and so we don't need to worry about miscounting additional lines
111 // on files with Windows-style line endings.
112 if len(gr) != 0 && (gr[0] == '\r' || gr[0] == '\n') {
117 if advanced < len(token) {
118 // If we've not yet found the end of our token then we'll
119 // also push our "end" marker along.
120 // (if advance > len(token) then we'll stop moving "end" early
121 // so that the caller only sees the range covered by token.)
128 Filename: sc.filename,
136 // Range returns a range that covers the latest token obtained after a call
137 // to Scan returns true.
138 func (sc *RangeScanner) Range() Range {
142 // Bytes returns the slice of the input buffer that is covered by the range
143 // that would be returned by Range.
144 func (sc *RangeScanner) Bytes() []byte {
148 // Err can be called after Scan returns false to determine if the latest read
149 // resulted in an error, and obtain that error if so.
150 func (sc *RangeScanner) Err() error {