aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/apparentlymart/go-textseg/textseg/utf8_seqs.go
blob: 6b14bef0e25f8c4780b8c30144c0634a1daf6ac6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
package textseg

import "unicode/utf8"

// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on UTF8 sequence boundaries.
//
// This is included largely for completeness, since this behavior is already
// built in to Go when ranging over a string.
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
	if len(data) == 0 {
		return 0, nil, nil
	}
	r, seqLen := utf8.DecodeRune(data)
	if r == utf8.RuneError && !atEOF {
		return 0, nil, nil
	}
	return seqLen, data[:seqLen], nil
}