[github/fretlink/terraform-provider-statuscake.git] / vendor / golang.org / x / text / unicode / norm / normalize.go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Note: the file data_test.go that is generated should not be checked in.
//go:generate go run maketables.go triegen.go
//go:generate go test -tags test

// Package norm contains types and functions for normalizing Unicode strings.
package norm // import "golang.org/x/text/unicode/norm"

import (
	"unicode/utf8"

	"golang.org/x/text/transform"
)

// A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are:
//
//   NFC   Unicode Normalization Form C
//   NFD   Unicode Normalization Form D
//   NFKC  Unicode Normalization Form KC
//   NFKD  Unicode Normalization Form KD
//
// For a Form f, this documentation uses the notation f(x) to mean
// the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides:
//   f(x) == append(f(x[0:n]), f(x[n:])...)
//
// References: https://unicode.org/reports/tr15/ and
// https://unicode.org/notes/tn5/.
type Form int

const (
	NFC Form = iota
	NFD
	NFKC
	NFKD
)

// Bytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byte {
	src := inputBytes(b)
	ft := formTable[f]
	n, ok := ft.quickSpan(src, 0, len(b), true)
	if ok {
		return b
	}
	out := make([]byte, n, len(b))
	copy(out, b[0:n])
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
	return doAppendInner(&rb, n)
}

// String returns f(s).
func (f Form) String(s string) string {
	src := inputString(s)
	ft := formTable[f]
	n, ok := ft.quickSpan(src, 0, len(s), true)
	if ok {
		return s
	}
	out := make([]byte, n, len(s))
	copy(out, s[0:n])
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
	return string(doAppendInner(&rb, n))
}

// IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) bool {
	src := inputBytes(b)
	ft := formTable[f]
	bp, ok := ft.quickSpan(src, 0, len(b), true)
	if ok {
		return true
	}
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
	rb.setFlusher(nil, cmpNormalBytes)
	for bp < len(b) {
		rb.out = b[bp:]
		if bp = decomposeSegment(&rb, bp, true); bp < 0 {
			return false
		}
		bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
	}
	return true
}

func cmpNormalBytes(rb *reorderBuffer) bool {
	b := rb.out
	for i := 0; i < rb.nrune; i++ {
		info := rb.rune[i]
		if int(info.size) > len(b) {
			return false
		}
		p := info.pos
		pe := p + info.size
		for ; p < pe; p++ {
			if b[0] != rb.byte[p] {
				return false
			}
			b = b[1:]
		}
	}
	return true
}

// IsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) bool {
	src := inputString(s)
	ft := formTable[f]
	bp, ok := ft.quickSpan(src, 0, len(s), true)
	if ok {
		return true
	}
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
	rb.setFlusher(nil, func(rb *reorderBuffer) bool {
		for i := 0; i < rb.nrune; i++ {
			info := rb.rune[i]
			if bp+int(info.size) > len(s) {
				return false
			}
			p := info.pos
			pe := p + info.size
			for ; p < pe; p++ {
				if s[bp] != rb.byte[p] {
					return false
				}
				bp++
			}
		}
		return true
	})
	for bp < len(s) {
		if bp = decomposeSegment(&rb, bp, true); bp < 0 {
			return false
		}
		bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
	}
	return true
}

// patchTail fixes a case where a rune may be incorrectly normalized
// if it is followed by illegal continuation bytes. It returns the
// patched buffer and whether the decomposition is still in progress.
func patchTail(rb *reorderBuffer) bool {
	info, p := lastRuneStart(&rb.f, rb.out)
	if p == -1 || info.size == 0 {
		return true
	}
	end := p + int(info.size)
	extra := len(rb.out) - end
	if extra > 0 {
		// Potentially allocating memory. However, this only
		// happens with ill-formed UTF-8.
		x := make([]byte, 0)
		x = append(x, rb.out[len(rb.out)-extra:]...)
		rb.out = rb.out[:end]
		decomposeToLastBoundary(rb)
		rb.doFlush()
		rb.out = append(rb.out, x...)
		return false
	}
	buf := rb.out[p:]
	rb.out = rb.out[:p]
	decomposeToLastBoundary(rb)
	if s := rb.ss.next(info); s == ssStarter {
		rb.doFlush()
		rb.ss.first(info)
	} else if s == ssOverflow {
		rb.doFlush()
		rb.insertCGJ()
		rb.ss = 0
	}
	rb.insertUnsafe(inputBytes(buf), 0, info)
	return true
}

func appendQuick(rb *reorderBuffer, i int) int {
	if rb.nsrc == i {
		return i
	}
	end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
	rb.out = rb.src.appendSlice(rb.out, i, end)
	return end
}

// Append returns f(append(out, b...)).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byte {
	return f.doAppend(out, inputBytes(src), len(src))
}

func (f Form) doAppend(out []byte, src input, n int) []byte {
	if n == 0 {
		return out
	}
	ft := formTable[f]
	// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
	if len(out) == 0 {
		p, _ := ft.quickSpan(src, 0, n, true)
		out = src.appendSlice(out, 0, p)
		if p == n {
			return out
		}
		rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
		return doAppendInner(&rb, p)
	}
	rb := reorderBuffer{f: *ft, src: src, nsrc: n}
	return doAppend(&rb, out, 0)
}

func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
	rb.setFlusher(out, appendFlush)
	src, n := rb.src, rb.nsrc
	doMerge := len(out) > 0
	if q := src.skipContinuationBytes(p); q > p {
		// Move leading non-starters to destination.
		rb.out = src.appendSlice(rb.out, p, q)
		p = q
		doMerge = patchTail(rb)
	}
	fd := &rb.f
	if doMerge {
		var info Properties
		if p < n {
			info = fd.info(src, p)
			if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
				if p == 0 {
					decomposeToLastBoundary(rb)
				}
				p = decomposeSegment(rb, p, true)
			}
		}
		if info.size == 0 {
			rb.doFlush()
			// Append incomplete UTF-8 encoding.
			return src.appendSlice(rb.out, p, n)
		}
		if rb.nrune > 0 {
			return doAppendInner(rb, p)
		}
	}
	p = appendQuick(rb, p)
	return doAppendInner(rb, p)
}

func doAppendInner(rb *reorderBuffer, p int) []byte {
	for n := rb.nsrc; p < n; {
		p = decomposeSegment(rb, p, true)
		p = appendQuick(rb, p)
	}
	return rb.out
}

// AppendString returns f(append(out, []byte(s))).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byte {
	return f.doAppend(out, inputString(src), len(src))
}

// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpan(b []byte) int {
	n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
	return n
}

// Span implements transform.SpanningTransformer. It returns a boundary n such
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
	n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
	if n < len(b) {
		if !ok {
			err = transform.ErrEndOfSpan
		} else {
			err = transform.ErrShortSrc
		}
	}
	return n, err
}

// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
	n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
	if n < len(s) {
		if !ok {
			err = transform.ErrEndOfSpan
		} else {
			err = transform.ErrShortSrc
		}
	}
	return n, err
}

// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
// whether any non-normalized parts were found. If atEOF is false, n will
// not point past the last segment if this segment might be become
// non-normalized by appending other runes.
func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
	var lastCC uint8
	ss := streamSafe(0)
	lastSegStart := i
	for n = end; i < n; {
		if j := src.skipASCII(i, n); i != j {
			i = j
			lastSegStart = i - 1
			lastCC = 0
			ss = 0
			continue
		}
		info := f.info(src, i)
		if info.size == 0 {
			if atEOF {
				// include incomplete runes
				return n, true
			}
			return lastSegStart, true
		}
		// This block needs to be before the next, because it is possible to
		// have an overflow for runes that are starters (e.g. with U+FF9E).
		switch ss.next(info) {
		case ssStarter:
			lastSegStart = i
		case ssOverflow:
			return lastSegStart, false
		case ssSuccess:
			if lastCC > info.ccc {
				return lastSegStart, false
			}
		}
		if f.composing {
			if !info.isYesC() {
				break
			}
		} else {
			if !info.isYesD() {
				break
			}
		}
		lastCC = info.ccc
		i += int(info.size)
	}
	if i == n {
		if !atEOF {
			n = lastSegStart
		}
		return n, true
	}
	return lastSegStart, false
}

// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int {
	n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
	return n
}

// FirstBoundary returns the position i of the first boundary in b
// or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) int {
	return f.firstBoundary(inputBytes(b), len(b))
}

func (f Form) firstBoundary(src input, nsrc int) int {
	i := src.skipContinuationBytes(0)
	if i >= nsrc {
		return -1
	}
	fd := formTable[f]
	ss := streamSafe(0)
	// We should call ss.first here, but we can't as the first rune is
	// skipped already. This means FirstBoundary can't really determine
	// CGJ insertion points correctly. Luckily it doesn't have to.
	for {
		info := fd.info(src, i)
		if info.size == 0 {
			return -1
		}
		if s := ss.next(info); s != ssSuccess {
			return i
		}
		i += int(info.size)
		if i >= nsrc {
			if !info.BoundaryAfter() && !ss.isMax() {
				return -1
			}
			return nsrc
		}
	}
}

// FirstBoundaryInString returns the position i of the first boundary in s
// or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) int {
	return f.firstBoundary(inputString(s), len(s))
}

// NextBoundary reports the index of the boundary between the first and next
// segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundary(b []byte, atEOF bool) int {
	return f.nextBoundary(inputBytes(b), len(b), atEOF)
}

// NextBoundaryInString reports the index of the boundary between the first and
// next segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundaryInString(s string, atEOF bool) int {
	return f.nextBoundary(inputString(s), len(s), atEOF)
}

func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
	if nsrc == 0 {
		if atEOF {
			return 0
		}
		return -1
	}
	fd := formTable[f]
	info := fd.info(src, 0)
	if info.size == 0 {
		if atEOF {
			return 1
		}
		return -1
	}
	ss := streamSafe(0)
	ss.first(info)

	for i := int(info.size); i < nsrc; i += int(info.size) {
		info = fd.info(src, i)
		if info.size == 0 {
			if atEOF {
				return i
			}
			return -1
		}
		// TODO: Using streamSafe to determine the boundary isn't the same as
		// using BoundaryBefore. Determine which should be used.
		if s := ss.next(info); s != ssSuccess {
			return i
		}
	}
	if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
		return -1
	}
	return nsrc
}

// LastBoundary returns the position i of the last boundary in b
// or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) int {
	return lastBoundary(formTable[f], b)
}

func lastBoundary(fd *formInfo, b []byte) int {
	i := len(b)
	info, p := lastRuneStart(fd, b)
	if p == -1 {
		return -1
	}
	if info.size == 0 { // ends with incomplete rune
		if p == 0 { // starts with incomplete rune
			return -1
		}
		i = p
		info, p = lastRuneStart(fd, b[:i])
		if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
			return i
		}
	}
	if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
		return i
	}
	if info.BoundaryAfter() {
		return i
	}
	ss := streamSafe(0)
	v := ss.backwards(info)
	for i = p; i >= 0 && v != ssStarter; i = p {
		info, p = lastRuneStart(fd, b[:i])
		if v = ss.backwards(info); v == ssOverflow {
			break
		}
		if p+int(info.size) != i {
			if p == -1 { // no boundary found
				return -1
			}
			return i // boundary after an illegal UTF-8 encoding
		}
	}
	return i
}

// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
	// Force one character to be consumed.
	info := rb.f.info(rb.src, sp)
	if info.size == 0 {
		return 0
	}
	if s := rb.ss.next(info); s == ssStarter {
		// TODO: this could be removed if we don't support merging.
		if rb.nrune > 0 {
			goto end
		}
	} else if s == ssOverflow {
		rb.insertCGJ()
		goto end
	}
	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
		return int(err)
	}
	for {
		sp += int(info.size)
		if sp >= rb.nsrc {
			if !atEOF && !info.BoundaryAfter() {
				return int(iShortSrc)
			}
			break
		}
		info = rb.f.info(rb.src, sp)
		if info.size == 0 {
			if !atEOF {
				return int(iShortSrc)
			}
			break
		}
		if s := rb.ss.next(info); s == ssStarter {
			break
		} else if s == ssOverflow {
			rb.insertCGJ()
			break
		}
		if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
			return int(err)
		}
	}
end:
	if !rb.doFlush() {
		return int(iShortDst)
	}
	return sp
}

// lastRuneStart returns the runeInfo and position of the last
// rune in buf or the zero runeInfo and -1 if no rune was found.
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
	p := len(buf) - 1
	for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
	}
	if p < 0 {
		return Properties{}, -1
	}
	return fd.info(inputBytes(buf), p), p
}

// decomposeToLastBoundary finds an open segment at the end of the buffer
// and scans it into rb. Returns the buffer minus the last segment.
func decomposeToLastBoundary(rb *reorderBuffer) {
	fd := &rb.f
	info, i := lastRuneStart(fd, rb.out)
	if int(info.size) != len(rb.out)-i {
		// illegal trailing continuation bytes
		return
	}
	if info.BoundaryAfter() {
		return
	}
	var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
	padd := 0
	ss := streamSafe(0)
	p := len(rb.out)
	for {
		add[padd] = info
		v := ss.backwards(info)
		if v == ssOverflow {
			// Note that if we have an overflow, it the string we are appending to
			// is not correctly normalized. In this case the behavior is undefined.
			break
		}
		padd++
		p -= int(info.size)
		if v == ssStarter || p < 0 {
			break
		}
		info, i = lastRuneStart(fd, rb.out[:p])
		if int(info.size) != p-i {
			break
		}
	}
	rb.ss = ss
	// Copy bytes for insertion as we may need to overwrite rb.out.
	var buf [maxBufferSize * utf8.UTFMax]byte
	cp := buf[:copy(buf[:], rb.out[p:])]
	rb.out = rb.out[:p]
	for padd--; padd >= 0; padd-- {
		info = add[padd]
		rb.insertUnsafe(inputBytes(cp), 0, info)
		cp = cp[info.size:]
	}
}
Commit	Line	Data
15c0b25d AP	1	// Copyright 2011 The Go Authors. All rights reserved.
	2	// Use of this source code is governed by a BSD-style
	3	// license that can be found in the LICENSE file.
	4
	5	// Note: the file data_test.go that is generated should not be checked in.
	6	//go:generate go run maketables.go triegen.go
	7	//go:generate go test -tags test
	8
	9	// Package norm contains types and functions for normalizing Unicode strings.
	10	package norm // import "golang.org/x/text/unicode/norm"
	11
	12	import (
	13	"unicode/utf8"
	14
	15	"golang.org/x/text/transform"
	16	)
	17
	18	// A Form denotes a canonical representation of Unicode code points.
	19	// The Unicode-defined normalization and equivalence forms are:
	20	//
	21	// NFC Unicode Normalization Form C
	22	// NFD Unicode Normalization Form D
	23	// NFKC Unicode Normalization Form KC
	24	// NFKD Unicode Normalization Form KD
	25	//
	26	// For a Form f, this documentation uses the notation f(x) to mean
	27	// the bytes or string x converted to the given form.
	28	// A position n in x is called a boundary if conversion to the form can
	29	// proceed independently on both sides:
	30	// f(x) == append(f(x[0:n]), f(x[n:])...)
	31	//
107c1cdb ND	32	// References: https://unicode.org/reports/tr15/ and
107c1cdb ND	33	// https://unicode.org/notes/tn5/.
15c0b25d AP	34	type Form int
	35
	36	const (
	37	NFC Form = iota
	38	NFD
	39	NFKC
	40	NFKD
	41	)
	42
	43	// Bytes returns f(b). May return b if f(b) = b.
	44	func (f Form) Bytes(b []byte) []byte {
	45	src := inputBytes(b)
	46	ft := formTable[f]
	47	n, ok := ft.quickSpan(src, 0, len(b), true)
	48	if ok {
	49	return b
	50	}
	51	out := make([]byte, n, len(b))
	52	copy(out, b[0:n])
	53	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
	54	return doAppendInner(&rb, n)
	55	}
	56
	57	// String returns f(s).
	58	func (f Form) String(s string) string {
	59	src := inputString(s)
	60	ft := formTable[f]
	61	n, ok := ft.quickSpan(src, 0, len(s), true)
	62	if ok {
	63	return s
	64	}
	65	out := make([]byte, n, len(s))
	66	copy(out, s[0:n])
	67	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
	68	return string(doAppendInner(&rb, n))
	69	}
	70
	71	// IsNormal returns true if b == f(b).
	72	func (f Form) IsNormal(b []byte) bool {
	73	src := inputBytes(b)
	74	ft := formTable[f]
	75	bp, ok := ft.quickSpan(src, 0, len(b), true)
	76	if ok {
	77	return true
	78	}
	79	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
	80	rb.setFlusher(nil, cmpNormalBytes)
	81	for bp < len(b) {
	82	rb.out = b[bp:]
	83	if bp = decomposeSegment(&rb, bp, true); bp < 0 {
	84	return false
	85	}
	86	bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
	87	}
	88	return true
	89	}
	90
	91	func cmpNormalBytes(rb *reorderBuffer) bool {
	92	b := rb.out
	93	for i := 0; i < rb.nrune; i++ {
	94	info := rb.rune[i]
	95	if int(info.size) > len(b) {
	96	return false
	97	}
98	p := info.pos
99	pe := p + info.size
100	for ; p < pe; p++ {
101	if b[0] != rb.byte[p] {
102	return false
103	}
104	b = b[1:]
105	}
106	}
107	return true
108	}
109
110	// IsNormalString returns true if s == f(s).
111	func (f Form) IsNormalString(s string) bool {
112	src := inputString(s)
113	ft := formTable[f]
114	bp, ok := ft.quickSpan(src, 0, len(s), true)
115	if ok {
116	return true
117	}
118	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
119	rb.setFlusher(nil, func(rb *reorderBuffer) bool {
120	for i := 0; i < rb.nrune; i++ {
121	info := rb.rune[i]
122	if bp+int(info.size) > len(s) {
123	return false
124	}
125	p := info.pos
126	pe := p + info.size
127	for ; p < pe; p++ {
128	if s[bp] != rb.byte[p] {
129	return false
130	}
131	bp++
132	}
133	}
134	return true
135	})
136	for bp < len(s) {
137	if bp = decomposeSegment(&rb, bp, true); bp < 0 {
138	return false
139	}
140	bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
141	}
142	return true
143	}
144
145	// patchTail fixes a case where a rune may be incorrectly normalized
146	// if it is followed by illegal continuation bytes. It returns the
147	// patched buffer and whether the decomposition is still in progress.
148	func patchTail(rb *reorderBuffer) bool {
149	info, p := lastRuneStart(&rb.f, rb.out)
150	if p == -1 \|\| info.size == 0 {
151	return true
152	}
153	end := p + int(info.size)
154	extra := len(rb.out) - end
155	if extra > 0 {
156	// Potentially allocating memory. However, this only
157	// happens with ill-formed UTF-8.
158	x := make([]byte, 0)
159	x = append(x, rb.out[len(rb.out)-extra:]...)
160	rb.out = rb.out[:end]
161	decomposeToLastBoundary(rb)
162	rb.doFlush()
163	rb.out = append(rb.out, x...)
164	return false
165	}
166	buf := rb.out[p:]
167	rb.out = rb.out[:p]
168	decomposeToLastBoundary(rb)
169	if s := rb.ss.next(info); s == ssStarter {
170	rb.doFlush()
171	rb.ss.first(info)
172	} else if s == ssOverflow {
173	rb.doFlush()
174	rb.insertCGJ()
175	rb.ss = 0
176	}
177	rb.insertUnsafe(inputBytes(buf), 0, info)
178	return true
179	}
180
181	func appendQuick(rb *reorderBuffer, i int) int {
182	if rb.nsrc == i {
183	return i
184	}
185	end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
186	rb.out = rb.src.appendSlice(rb.out, i, end)
187	return end
188	}
189
190	// Append returns f(append(out, b...)).
191	// The buffer out must be nil, empty, or equal to f(out).
192	func (f Form) Append(out []byte, src ...byte) []byte {
193	return f.doAppend(out, inputBytes(src), len(src))
194	}
195
196	func (f Form) doAppend(out []byte, src input, n int) []byte {
197	if n == 0 {
198	return out
199	}
200	ft := formTable[f]
201	// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
202	if len(out) == 0 {
203	p, _ := ft.quickSpan(src, 0, n, true)
204	out = src.appendSlice(out, 0, p)
205	if p == n {
206	return out
207	}
208	rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
209	return doAppendInner(&rb, p)
210	}
211	rb := reorderBuffer{f: *ft, src: src, nsrc: n}
212	return doAppend(&rb, out, 0)
213	}
214
215	func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
216	rb.setFlusher(out, appendFlush)
217	src, n := rb.src, rb.nsrc
218	doMerge := len(out) > 0
219	if q := src.skipContinuationBytes(p); q > p {
220	// Move leading non-starters to destination.
221	rb.out = src.appendSlice(rb.out, p, q)
222	p = q
223	doMerge = patchTail(rb)
224	}
225	fd := &rb.f
226	if doMerge {
227	var info Properties
228	if p < n {
229	info = fd.info(src, p)
230	if !info.BoundaryBefore() \|\| info.nLeadingNonStarters() > 0 {
231	if p == 0 {
232	decomposeToLastBoundary(rb)
233	}
234	p = decomposeSegment(rb, p, true)
235	}
236	}
237	if info.size == 0 {
238	rb.doFlush()
239	// Append incomplete UTF-8 encoding.
240	return src.appendSlice(rb.out, p, n)
241	}
242	if rb.nrune > 0 {
243	return doAppendInner(rb, p)
244	}
245	}
246	p = appendQuick(rb, p)
247	return doAppendInner(rb, p)
248	}
249
250	func doAppendInner(rb *reorderBuffer, p int) []byte {
251	for n := rb.nsrc; p < n; {
252	p = decomposeSegment(rb, p, true)
253	p = appendQuick(rb, p)
254	}
255	return rb.out
256	}
257
258	// AppendString returns f(append(out, []byte(s))).
259	// The buffer out must be nil, empty, or equal to f(out).
260	func (f Form) AppendString(out []byte, src string) []byte {
261	return f.doAppend(out, inputString(src), len(src))
262	}
263
264	// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
265	// It is not guaranteed to return the largest such n.
266	func (f Form) QuickSpan(b []byte) int {
267	n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
268	return n
269	}
270
271	// Span implements transform.SpanningTransformer. It returns a boundary n such
272	// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
273	func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
274	n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
275	if n < len(b) {
276	if !ok {
277	err = transform.ErrEndOfSpan
278	} else {
279	err = transform.ErrShortSrc
280	}
281	}
282	return n, err
283	}
284
285	// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
286	// It is not guaranteed to return the largest such n.
287	func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
288	n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
289	if n < len(s) {
290	if !ok {
291	err = transform.ErrEndOfSpan
292	} else {
293	err = transform.ErrShortSrc
294	}
295	}
296	return n, err
297	}
298
299	// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
300	// whether any non-normalized parts were found. If atEOF is false, n will
301	// not point past the last segment if this segment might be become
302	// non-normalized by appending other runes.
303	func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
304	var lastCC uint8
305	ss := streamSafe(0)
306	lastSegStart := i
307	for n = end; i < n; {
308	if j := src.skipASCII(i, n); i != j {
309	i = j
310	lastSegStart = i - 1
311	lastCC = 0
312	ss = 0
313	continue
314	}
315	info := f.info(src, i)
316	if info.size == 0 {
317	if atEOF {
318	// include incomplete runes
319	return n, true
320	}
321	return lastSegStart, true
322	}
323	// This block needs to be before the next, because it is possible to
324	// have an overflow for runes that are starters (e.g. with U+FF9E).
325	switch ss.next(info) {
326	case ssStarter:
327	lastSegStart = i
328	case ssOverflow:
329	return lastSegStart, false
330	case ssSuccess:
331	if lastCC > info.ccc {
332	return lastSegStart, false
333	}
334	}
335	if f.composing {
336	if !info.isYesC() {
337	break
338	}
339	} else {
340	if !info.isYesD() {
341	break
342	}
343	}
344	lastCC = info.ccc
345	i += int(info.size)
346	}
347	if i == n {
348	if !atEOF {
349	n = lastSegStart
350	}
351	return n, true
352	}
353	return lastSegStart, false
354	}
355
356	// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
357	// It is not guaranteed to return the largest such n.
358	func (f Form) QuickSpanString(s string) int {
359	n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
360	return n
361	}
362
363	// FirstBoundary returns the position i of the first boundary in b
364	// or -1 if b contains no boundary.
365	func (f Form) FirstBoundary(b []byte) int {
366	return f.firstBoundary(inputBytes(b), len(b))
367	}
368
369	func (f Form) firstBoundary(src input, nsrc int) int {
370	i := src.skipContinuationBytes(0)
371	if i >= nsrc {
372	return -1
373	}
374	fd := formTable[f]
375	ss := streamSafe(0)
376	// We should call ss.first here, but we can't as the first rune is
377	// skipped already. This means FirstBoundary can't really determine
378	// CGJ insertion points correctly. Luckily it doesn't have to.
379	for {
380	info := fd.info(src, i)
381	if info.size == 0 {
382	return -1
383	}
384	if s := ss.next(info); s != ssSuccess {
385	return i
386	}
387	i += int(info.size)
388	if i >= nsrc {
389	if !info.BoundaryAfter() && !ss.isMax() {
390	return -1
391	}
392	return nsrc
393	}
394	}
395	}
396
397	// FirstBoundaryInString returns the position i of the first boundary in s
398	// or -1 if s contains no boundary.
399	func (f Form) FirstBoundaryInString(s string) int {
400	return f.firstBoundary(inputString(s), len(s))
401	}
402
403	// NextBoundary reports the index of the boundary between the first and next
404	// segment in b or -1 if atEOF is false and there are not enough bytes to
405	// determine this boundary.
406	func (f Form) NextBoundary(b []byte, atEOF bool) int {
407	return f.nextBoundary(inputBytes(b), len(b), atEOF)
408	}
409
410	// NextBoundaryInString reports the index of the boundary between the first and
411	// next segment in b or -1 if atEOF is false and there are not enough bytes to
412	// determine this boundary.
413	func (f Form) NextBoundaryInString(s string, atEOF bool) int {
414	return f.nextBoundary(inputString(s), len(s), atEOF)
415	}
416
417	func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
418	if nsrc == 0 {
419	if atEOF {
420	return 0
421	}
422	return -1
423	}
424	fd := formTable[f]
425	info := fd.info(src, 0)
426	if info.size == 0 {
427	if atEOF {
428	return 1
429	}
430	return -1
431	}
432	ss := streamSafe(0)
433	ss.first(info)
434
435	for i := int(info.size); i < nsrc; i += int(info.size) {
436	info = fd.info(src, i)
437	if info.size == 0 {
438	if atEOF {
439	return i
440	}
441	return -1
442	}
443	// TODO: Using streamSafe to determine the boundary isn't the same as
444	// using BoundaryBefore. Determine which should be used.
445	if s := ss.next(info); s != ssSuccess {
446	return i
447	}
448	}
449	if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
450	return -1
451	}
452	return nsrc
453	}
454
455	// LastBoundary returns the position i of the last boundary in b
456	// or -1 if b contains no boundary.
457	func (f Form) LastBoundary(b []byte) int {
458	return lastBoundary(formTable[f], b)
459	}
460
461	func lastBoundary(fd *formInfo, b []byte) int {
462	i := len(b)
463	info, p := lastRuneStart(fd, b)
464	if p == -1 {
465	return -1
466	}
467	if info.size == 0 { // ends with incomplete rune
468	if p == 0 { // starts with incomplete rune
469	return -1
470	}
471	i = p
472	info, p = lastRuneStart(fd, b[:i])
473	if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
474	return i
475	}
476	}
477	if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
478	return i
479	}
480	if info.BoundaryAfter() {
481	return i
482	}
483	ss := streamSafe(0)
484	v := ss.backwards(info)
485	for i = p; i >= 0 && v != ssStarter; i = p {
486	info, p = lastRuneStart(fd, b[:i])
487	if v = ss.backwards(info); v == ssOverflow {
488	break
489	}
490	if p+int(info.size) != i {
491	if p == -1 { // no boundary found
492	return -1
493	}
494	return i // boundary after an illegal UTF-8 encoding
495	}
496	}
497	return i
498	}
499
500	// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
501	// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
502	// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
503	func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
504	// Force one character to be consumed.
505	info := rb.f.info(rb.src, sp)
506	if info.size == 0 {
507	return 0
508	}
509	if s := rb.ss.next(info); s == ssStarter {
510	// TODO: this could be removed if we don't support merging.
511	if rb.nrune > 0 {
512	goto end
513	}
514	} else if s == ssOverflow {
515	rb.insertCGJ()
516	goto end
517	}
518	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
519	return int(err)
520	}
521	for {
522	sp += int(info.size)
523	if sp >= rb.nsrc {
524	if !atEOF && !info.BoundaryAfter() {
525	return int(iShortSrc)
526	}
527	break
528	}
529	info = rb.f.info(rb.src, sp)
530	if info.size == 0 {
531	if !atEOF {
532	return int(iShortSrc)
533	}
534	break
535	}
536	if s := rb.ss.next(info); s == ssStarter {
537	break
538	} else if s == ssOverflow {
539	rb.insertCGJ()
540	break
541	}
542	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
543	return int(err)
544	}
545	}
546	end:
547	if !rb.doFlush() {
548	return int(iShortDst)
549	}
550	return sp
551	}
552
553	// lastRuneStart returns the runeInfo and position of the last
554	// rune in buf or the zero runeInfo and -1 if no rune was found.
555	func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
556	p := len(buf) - 1
557	for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
558	}
559	if p < 0 {
560	return Properties{}, -1
561	}
562	return fd.info(inputBytes(buf), p), p
563	}
564
565	// decomposeToLastBoundary finds an open segment at the end of the buffer
566	// and scans it into rb. Returns the buffer minus the last segment.
567	func decomposeToLastBoundary(rb *reorderBuffer) {
568	fd := &rb.f
569	info, i := lastRuneStart(fd, rb.out)
570	if int(info.size) != len(rb.out)-i {
571	// illegal trailing continuation bytes
572	return
573	}
574	if info.BoundaryAfter() {
575	return
576	}
577	var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
578	padd := 0
579	ss := streamSafe(0)
580	p := len(rb.out)
581	for {
582	add[padd] = info
583	v := ss.backwards(info)
584	if v == ssOverflow {
585	// Note that if we have an overflow, it the string we are appending to
586	// is not correctly normalized. In this case the behavior is undefined.
587	break
588	}
589	padd++
590	p -= int(info.size)
591	if v == ssStarter \|\| p < 0 {
592	break
593	}
594	info, i = lastRuneStart(fd, rb.out[:p])
595	if int(info.size) != p-i {
596	break
597	}
598	}
599	rb.ss = ss
600	// Copy bytes for insertion as we may need to overwrite rb.out.
601	var buf [maxBufferSize * utf8.UTFMax]byte
602	cp := buf[:copy(buf[:], rb.out[p:])]
603	rb.out = rb.out[:p]
604	for padd--; padd >= 0; padd-- {
605	info = add[padd]
606	rb.insertUnsafe(inputBytes(cp), 0, info)
607	cp = cp[info.size:]
608	}
609	}