1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
9 // Properties provides access to BiDi properties of runes.
10 type Properties struct {
15 var trie = newBidiTrie(0)
17 // TODO: using this for bidirule reduces the running time by about 5%. Consider
18 // if this is worth exposing or if we can find a way to speed up the Class
21 // // CompactClass is like Class, but maps all of the BiDi control classes
22 // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
23 // func (p Properties) CompactClass() Class {
24 // return Class(p.entry & 0x0F)
27 // Class returns the Bidi class for p.
28 func (p Properties) Class() Class {
29 c := Class(p.entry & 0x0F)
31 c = controlByteToClass[p.last&0xF]
36 // IsBracket reports whether the rune is a bracket.
37 func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
39 // IsOpeningBracket reports whether the rune is an opening bracket.
40 // IsBracket must return true.
41 func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
43 // TODO: find a better API and expose.
44 func (p Properties) reverseBracket(r rune) rune {
45 return xorMasks[p.entry>>xorMaskShift] ^ r
48 var controlByteToClass = [16]Class{
49 0xD: LRO, // U+202D LeftToRightOverride,
50 0xE: RLO, // U+202E RightToLeftOverride,
51 0xA: LRE, // U+202A LeftToRightEmbedding,
52 0xB: RLE, // U+202B RightToLeftEmbedding,
53 0xC: PDF, // U+202C PopDirectionalFormat,
54 0x6: LRI, // U+2066 LeftToRightIsolate,
55 0x7: RLI, // U+2067 RightToLeftIsolate,
56 0x8: FSI, // U+2068 FirstStrongIsolate,
57 0x9: PDI, // U+2069 PopDirectionalIsolate,
60 // LookupRune returns properties for r.
61 func LookupRune(r rune) (p Properties, size int) {
63 n := utf8.EncodeRune(buf[:], r)
64 return Lookup(buf[:n])
67 // TODO: these lookup methods are based on the generated trie code. The returned
68 // sizes have slightly different semantics from the generated code, in that it
69 // always returns size==1 for an illegal UTF-8 byte (instead of the length
70 // of the maximum invalid subsequence). Most Transformers, like unicode/norm,
71 // leave invalid UTF-8 untouched, in which case it has performance benefits to
72 // do so (without changing the semantics). Bidi requires the semantics used here
73 // for the bidirule implementation to be compatible with the Go semantics.
74 // They ultimately should perhaps be adopted by all trie implementations, for
76 // This unrolled code also boosts performance of the secure/bidirule package by
78 // So, to remove this code:
79 // - add option to trie generator to define return type.
80 // - always return 1 byte size for ill-formed UTF-8 runes.
82 // Lookup returns properties for the first rune in s and the width in bytes of
83 // its encoding. The size will be 0 if s does not hold enough bytes to complete
85 func Lookup(s []byte) (p Properties, sz int) {
88 case c0 < 0x80: // is ASCII
89 return Properties{entry: bidiValues[c0]}, 1
91 return Properties{}, 1
92 case c0 < 0xE0: // 2-byte UTF-8
94 return Properties{}, 0
98 if c1 < 0x80 || 0xC0 <= c1 {
99 return Properties{}, 1
101 return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
102 case c0 < 0xF0: // 3-byte UTF-8
104 return Properties{}, 0
108 if c1 < 0x80 || 0xC0 <= c1 {
109 return Properties{}, 1
111 o := uint32(i)<<6 + uint32(c1)
114 if c2 < 0x80 || 0xC0 <= c2 {
115 return Properties{}, 1
117 return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
118 case c0 < 0xF8: // 4-byte UTF-8
120 return Properties{}, 0
124 if c1 < 0x80 || 0xC0 <= c1 {
125 return Properties{}, 1
127 o := uint32(i)<<6 + uint32(c1)
130 if c2 < 0x80 || 0xC0 <= c2 {
131 return Properties{}, 1
133 o = uint32(i)<<6 + uint32(c2)
136 if c3 < 0x80 || 0xC0 <= c3 {
137 return Properties{}, 1
139 return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
142 return Properties{}, 1
145 // LookupString returns properties for the first rune in s and the width in
146 // bytes of its encoding. The size will be 0 if s does not hold enough bytes to
147 // complete the encoding.
148 func LookupString(s string) (p Properties, sz int) {
151 case c0 < 0x80: // is ASCII
152 return Properties{entry: bidiValues[c0]}, 1
154 return Properties{}, 1
155 case c0 < 0xE0: // 2-byte UTF-8
157 return Properties{}, 0
161 if c1 < 0x80 || 0xC0 <= c1 {
162 return Properties{}, 1
164 return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
165 case c0 < 0xF0: // 3-byte UTF-8
167 return Properties{}, 0
171 if c1 < 0x80 || 0xC0 <= c1 {
172 return Properties{}, 1
174 o := uint32(i)<<6 + uint32(c1)
177 if c2 < 0x80 || 0xC0 <= c2 {
178 return Properties{}, 1
180 return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
181 case c0 < 0xF8: // 4-byte UTF-8
183 return Properties{}, 0
187 if c1 < 0x80 || 0xC0 <= c1 {
188 return Properties{}, 1
190 o := uint32(i)<<6 + uint32(c1)
193 if c2 < 0x80 || 0xC0 <= c2 {
194 return Properties{}, 1
196 o = uint32(i)<<6 + uint32(c2)
199 if c3 < 0x80 || 0xC0 <= c3 {
200 return Properties{}, 1
202 return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
205 return Properties{}, 1