vendor/github.com/hashicorp/hcl2/hcl/pos_scanner.go

   1 package hcl
   2
   3 import (
   4         "bufio"
   5         "bytes"
   6
   7         "github.com/apparentlymart/go-textseg/textseg"
   8 )
   9
  10 // RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc
  11 // and visit a source range for each token matched.
  12 //
  13 // For example, this can be used with bufio.ScanLines to find the source range
  14 // for each line in the file, skipping over the actual newline characters, which
  15 // may be useful when printing source code snippets as part of diagnostic
  16 // messages.
  17 //
  18 // The line and column information in the returned ranges is produced by
  19 // counting newline characters and grapheme clusters respectively, which
  20 // mimics the behavior we expect from a parser when producing ranges.
  21 type RangeScanner struct {
  22         filename string
  23         b        []byte
  24         cb       bufio.SplitFunc
  25
  26         pos Pos    // position of next byte to process in b
  27         cur Range  // latest range
  28         tok []byte // slice of b that is covered by cur
  29         err error  // error from last scan, if any
  30 }
  31
  32 // NewRangeScanner creates a new RangeScanner for the given buffer, producing
  33 // ranges for the given filename.
  34 //
  35 // Since ranges have grapheme-cluster granularity rather than byte granularity,
  36 // the scanner will produce incorrect results if the given SplitFunc creates
  37 // tokens between grapheme cluster boundaries. In particular, it is incorrect
  38 // to use RangeScanner with bufio.ScanRunes because it will produce tokens
  39 // around individual UTF-8 sequences, which will split any multi-sequence
  40 // grapheme clusters.
  41 func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner {
  42         return NewRangeScannerFragment(b, filename, InitialPos, cb)
  43 }
  44
  45 // NewRangeScannerFragment is like NewRangeScanner but the ranges it produces
  46 // will be offset by the given starting position, which is appropriate for
  47 // sub-slices of a file, whereas NewRangeScanner assumes it is scanning an
  48 // entire file.
  49 func NewRangeScannerFragment(b []byte, filename string, start Pos, cb bufio.SplitFunc) *RangeScanner {
  50         return &RangeScanner{
  51                 filename: filename,
  52                 b:        b,
  53                 cb:       cb,
  54                 pos:      start,
  55         }
  56 }
  57
  58 func (sc *RangeScanner) Scan() bool {
  59         if sc.pos.Byte >= len(sc.b) || sc.err != nil {
  60                 // All done
  61                 return false
  62         }
  63
  64         // Since we're operating on an in-memory buffer, we always pass the whole
  65         // remainder of the buffer to our SplitFunc and set isEOF to let it know
  66         // that it has the whole thing.
  67         advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true)
  68
  69         // Since we are setting isEOF to true this should never happen, but
  70         // if it does we will just abort and assume the SplitFunc is misbehaving.
  71         if advance == 0 && token == nil && err == nil {
  72                 return false
  73         }
  74
  75         if err != nil {
  76                 sc.err = err
  77                 sc.cur = Range{
  78                         Filename: sc.filename,
  79                         Start:    sc.pos,
  80                         End:      sc.pos,
  81                 }
  82                 sc.tok = nil
  83                 return false
  84         }
  85
  86         sc.tok = token
  87         start := sc.pos
  88         end := sc.pos
  89         new := sc.pos
  90
  91         // adv is similar to token but it also includes any subsequent characters
  92         // we're being asked to skip over by the SplitFunc.
  93         // adv is a slice covering any additional bytes we are skipping over, based
  94         // on what the SplitFunc told us to do with advance.
  95         adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance]
  96
  97         // We now need to scan over our token to count the grapheme clusters
  98         // so we can correctly advance Column, and count the newlines so we
  99         // can correctly advance Line.
 100         advR := bytes.NewReader(adv)
 101         gsc := bufio.NewScanner(advR)
 102         advanced := 0
 103         gsc.Split(textseg.ScanGraphemeClusters)
 104         for gsc.Scan() {
 105                 gr := gsc.Bytes()
 106                 new.Byte += len(gr)
 107                 new.Column++
 108
 109                 // We rely here on the fact that \r\n is considered a grapheme cluster
 110                 // and so we don't need to worry about miscounting additional lines
 111                 // on files with Windows-style line endings.
 112                 if len(gr) != 0 && (gr[0] == '\r' || gr[0] == '\n') {
 113                         new.Column = 1
 114                         new.Line++
 115                 }
 116
 117                 if advanced < len(token) {
 118                         // If we've not yet found the end of our token then we'll
 119                         // also push our "end" marker along.
 120                         // (if advance > len(token) then we'll stop moving "end" early
 121                         // so that the caller only sees the range covered by token.)
 122                         end = new
 123                 }
 124                 advanced += len(gr)
 125         }
 126
 127         sc.cur = Range{
 128                 Filename: sc.filename,
 129                 Start:    start,
 130                 End:      end,
 131         }
 132         sc.pos = new
 133         return true
 134 }
 135
 136 // Range returns a range that covers the latest token obtained after a call
 137 // to Scan returns true.
 138 func (sc *RangeScanner) Range() Range {
 139         return sc.cur
 140 }
 141
 142 // Bytes returns the slice of the input buffer that is covered by the range
 143 // that would be returned by Range.
 144 func (sc *RangeScanner) Bytes() []byte {
 145         return sc.tok
 146 }
 147
 148 // Err can be called after Scan returns false to determine if the latest read
 149 // resulted in an error, and obtain that error if so.
 150 func (sc *RangeScanner) Err() error {
 151         return sc.err
 152 }