diff options
Diffstat (limited to 'vendor/golang.org/x/text/unicode/bidi/core.go')
-rw-r--r-- | vendor/golang.org/x/text/unicode/bidi/core.go | 1058 |
1 files changed, 1058 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/unicode/bidi/core.go b/vendor/golang.org/x/text/unicode/bidi/core.go new file mode 100644 index 0000000..d4c1399 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/bidi/core.go | |||
@@ -0,0 +1,1058 @@ | |||
1 | // Copyright 2015 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | package bidi | ||
6 | |||
7 | import "log" | ||
8 | |||
9 | // This implementation is a port based on the reference implementation found at: | ||
10 | // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/ | ||
11 | // | ||
12 | // described in Unicode Bidirectional Algorithm (UAX #9). | ||
13 | // | ||
14 | // Input: | ||
15 | // There are two levels of input to the algorithm, since clients may prefer to | ||
16 | // supply some information from out-of-band sources rather than relying on the | ||
17 | // default behavior. | ||
18 | // | ||
19 | // - Bidi class array | ||
20 | // - Bidi class array, with externally supplied base line direction | ||
21 | // | ||
22 | // Output: | ||
23 | // Output is separated into several stages: | ||
24 | // | ||
25 | // - levels array over entire paragraph | ||
26 | // - reordering array over entire paragraph | ||
27 | // - levels array over line | ||
28 | // - reordering array over line | ||
29 | // | ||
30 | // Note that for conformance to the Unicode Bidirectional Algorithm, | ||
31 | // implementations are only required to generate correct reordering and | ||
32 | // character directionality (odd or even levels) over a line. Generating | ||
33 | // identical level arrays over a line is not required. Bidi explicit format | ||
34 | // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and | ||
35 | // positions as long as the rest of the input is properly reordered. | ||
36 | // | ||
37 | // As the algorithm is defined to operate on a single paragraph at a time, this | ||
38 | // implementation is written to handle single paragraphs. Thus rule P1 is | ||
39 | // presumed by this implementation-- the data provided to the implementation is | ||
40 | // assumed to be a single paragraph, and either contains no 'B' codes, or a | ||
41 | // single 'B' code at the end of the input. 'B' is allowed as input to | ||
42 | // illustrate how the algorithm assigns it a level. | ||
43 | // | ||
44 | // Also note that rules L3 and L4 depend on the rendering engine that uses the | ||
45 | // result of the bidi algorithm. This implementation assumes that the rendering | ||
46 | // engine expects combining marks in visual order (e.g. to the left of their | ||
47 | // base character in RTL runs) and that it adjusts the glyphs used to render | ||
48 | // mirrored characters that are in RTL runs so that they render appropriately. | ||
49 | |||
50 | // level is the embedding level of a character. Even embedding levels indicate | ||
51 | // left-to-right order and odd levels indicate right-to-left order. The special | ||
52 | // level of -1 is reserved for undefined order. | ||
53 | type level int8 | ||
54 | |||
55 | const implicitLevel level = -1 | ||
56 | |||
57 | // in returns if x is equal to any of the values in set. | ||
58 | func (c Class) in(set ...Class) bool { | ||
59 | for _, s := range set { | ||
60 | if c == s { | ||
61 | return true | ||
62 | } | ||
63 | } | ||
64 | return false | ||
65 | } | ||
66 | |||
67 | // A paragraph contains the state of a paragraph. | ||
68 | type paragraph struct { | ||
69 | initialTypes []Class | ||
70 | |||
71 | // Arrays of properties needed for paired bracket evaluation in N0 | ||
72 | pairTypes []bracketType // paired Bracket types for paragraph | ||
73 | pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone | ||
74 | |||
75 | embeddingLevel level // default: = implicitLevel; | ||
76 | |||
77 | // at the paragraph levels | ||
78 | resultTypes []Class | ||
79 | resultLevels []level | ||
80 | |||
81 | // Index of matching PDI for isolate initiator characters. For other | ||
82 | // characters, the value of matchingPDI will be set to -1. For isolate | ||
83 | // initiators with no matching PDI, matchingPDI will be set to the length of | ||
84 | // the input string. | ||
85 | matchingPDI []int | ||
86 | |||
87 | // Index of matching isolate initiator for PDI characters. For other | ||
88 | // characters, and for PDIs with no matching isolate initiator, the value of | ||
89 | // matchingIsolateInitiator will be set to -1. | ||
90 | matchingIsolateInitiator []int | ||
91 | } | ||
92 | |||
93 | // newParagraph initializes a paragraph. The user needs to supply a few arrays | ||
94 | // corresponding to the preprocessed text input. The types correspond to the | ||
95 | // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for | ||
96 | // each rune. pairValues provides a unique bracket class identifier for each | ||
97 | // rune (suggested is the rune of the open bracket for opening and matching | ||
98 | // close brackets, after normalization). The embedding levels are optional, but | ||
99 | // may be supplied to encode embedding levels of styled text. | ||
100 | // | ||
101 | // TODO: return an error. | ||
102 | func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph { | ||
103 | validateTypes(types) | ||
104 | validatePbTypes(pairTypes) | ||
105 | validatePbValues(pairValues, pairTypes) | ||
106 | validateParagraphEmbeddingLevel(levels) | ||
107 | |||
108 | p := ¶graph{ | ||
109 | initialTypes: append([]Class(nil), types...), | ||
110 | embeddingLevel: levels, | ||
111 | |||
112 | pairTypes: pairTypes, | ||
113 | pairValues: pairValues, | ||
114 | |||
115 | resultTypes: append([]Class(nil), types...), | ||
116 | } | ||
117 | p.run() | ||
118 | return p | ||
119 | } | ||
120 | |||
121 | func (p *paragraph) Len() int { return len(p.initialTypes) } | ||
122 | |||
123 | // The algorithm. Does not include line-based processing (Rules L1, L2). | ||
124 | // These are applied later in the line-based phase of the algorithm. | ||
125 | func (p *paragraph) run() { | ||
126 | p.determineMatchingIsolates() | ||
127 | |||
128 | // 1) determining the paragraph level | ||
129 | // Rule P1 is the requirement for entering this algorithm. | ||
130 | // Rules P2, P3. | ||
131 | // If no externally supplied paragraph embedding level, use default. | ||
132 | if p.embeddingLevel == implicitLevel { | ||
133 | p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len()) | ||
134 | } | ||
135 | |||
136 | // Initialize result levels to paragraph embedding level. | ||
137 | p.resultLevels = make([]level, p.Len()) | ||
138 | setLevels(p.resultLevels, p.embeddingLevel) | ||
139 | |||
140 | // 2) Explicit levels and directions | ||
141 | // Rules X1-X8. | ||
142 | p.determineExplicitEmbeddingLevels() | ||
143 | |||
144 | // Rule X9. | ||
145 | // We do not remove the embeddings, the overrides, the PDFs, and the BNs | ||
146 | // from the string explicitly. But they are not copied into isolating run | ||
147 | // sequences when they are created, so they are removed for all | ||
148 | // practical purposes. | ||
149 | |||
150 | // Rule X10. | ||
151 | // Run remainder of algorithm one isolating run sequence at a time | ||
152 | for _, seq := range p.determineIsolatingRunSequences() { | ||
153 | // 3) resolving weak types | ||
154 | // Rules W1-W7. | ||
155 | seq.resolveWeakTypes() | ||
156 | |||
157 | // 4a) resolving paired brackets | ||
158 | // Rule N0 | ||
159 | resolvePairedBrackets(seq) | ||
160 | |||
161 | // 4b) resolving neutral types | ||
162 | // Rules N1-N3. | ||
163 | seq.resolveNeutralTypes() | ||
164 | |||
165 | // 5) resolving implicit embedding levels | ||
166 | // Rules I1, I2. | ||
167 | seq.resolveImplicitLevels() | ||
168 | |||
169 | // Apply the computed levels and types | ||
170 | seq.applyLevelsAndTypes() | ||
171 | } | ||
172 | |||
173 | // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and | ||
174 | // BNs. This is for convenience, so the resulting level array will have | ||
175 | // a value for every character. | ||
176 | p.assignLevelsToCharactersRemovedByX9() | ||
177 | } | ||
178 | |||
179 | // determineMatchingIsolates determines the matching PDI for each isolate | ||
180 | // initiator and vice versa. | ||
181 | // | ||
182 | // Definition BD9. | ||
183 | // | ||
184 | // At the end of this function: | ||
185 | // | ||
186 | // - The member variable matchingPDI is set to point to the index of the | ||
187 | // matching PDI character for each isolate initiator character. If there is | ||
188 | // no matching PDI, it is set to the length of the input text. For other | ||
189 | // characters, it is set to -1. | ||
190 | // - The member variable matchingIsolateInitiator is set to point to the | ||
191 | // index of the matching isolate initiator character for each PDI character. | ||
192 | // If there is no matching isolate initiator, or the character is not a PDI, | ||
193 | // it is set to -1. | ||
194 | func (p *paragraph) determineMatchingIsolates() { | ||
195 | p.matchingPDI = make([]int, p.Len()) | ||
196 | p.matchingIsolateInitiator = make([]int, p.Len()) | ||
197 | |||
198 | for i := range p.matchingIsolateInitiator { | ||
199 | p.matchingIsolateInitiator[i] = -1 | ||
200 | } | ||
201 | |||
202 | for i := range p.matchingPDI { | ||
203 | p.matchingPDI[i] = -1 | ||
204 | |||
205 | if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) { | ||
206 | depthCounter := 1 | ||
207 | for j := i + 1; j < p.Len(); j++ { | ||
208 | if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) { | ||
209 | depthCounter++ | ||
210 | } else if u == PDI { | ||
211 | if depthCounter--; depthCounter == 0 { | ||
212 | p.matchingPDI[i] = j | ||
213 | p.matchingIsolateInitiator[j] = i | ||
214 | break | ||
215 | } | ||
216 | } | ||
217 | } | ||
218 | if p.matchingPDI[i] == -1 { | ||
219 | p.matchingPDI[i] = p.Len() | ||
220 | } | ||
221 | } | ||
222 | } | ||
223 | } | ||
224 | |||
225 | // determineParagraphEmbeddingLevel reports the resolved paragraph direction of | ||
226 | // the substring limited by the given range [start, end). | ||
227 | // | ||
228 | // Determines the paragraph level based on rules P2, P3. This is also used | ||
229 | // in rule X5c to find if an FSI should resolve to LRI or RLI. | ||
230 | func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level { | ||
231 | var strongType Class = unknownClass | ||
232 | |||
233 | // Rule P2. | ||
234 | for i := start; i < end; i++ { | ||
235 | if t := p.resultTypes[i]; t.in(L, AL, R) { | ||
236 | strongType = t | ||
237 | break | ||
238 | } else if t.in(FSI, LRI, RLI) { | ||
239 | i = p.matchingPDI[i] // skip over to the matching PDI | ||
240 | if i > end { | ||
241 | log.Panic("assert (i <= end)") | ||
242 | } | ||
243 | } | ||
244 | } | ||
245 | // Rule P3. | ||
246 | switch strongType { | ||
247 | case unknownClass: // none found | ||
248 | // default embedding level when no strong types found is 0. | ||
249 | return 0 | ||
250 | case L: | ||
251 | return 0 | ||
252 | default: // AL, R | ||
253 | return 1 | ||
254 | } | ||
255 | } | ||
256 | |||
257 | const maxDepth = 125 | ||
258 | |||
259 | // This stack will store the embedding levels and override and isolated | ||
260 | // statuses | ||
261 | type directionalStatusStack struct { | ||
262 | stackCounter int | ||
263 | embeddingLevelStack [maxDepth + 1]level | ||
264 | overrideStatusStack [maxDepth + 1]Class | ||
265 | isolateStatusStack [maxDepth + 1]bool | ||
266 | } | ||
267 | |||
268 | func (s *directionalStatusStack) empty() { s.stackCounter = 0 } | ||
269 | func (s *directionalStatusStack) pop() { s.stackCounter-- } | ||
270 | func (s *directionalStatusStack) depth() int { return s.stackCounter } | ||
271 | |||
272 | func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) { | ||
273 | s.embeddingLevelStack[s.stackCounter] = level | ||
274 | s.overrideStatusStack[s.stackCounter] = overrideStatus | ||
275 | s.isolateStatusStack[s.stackCounter] = isolateStatus | ||
276 | s.stackCounter++ | ||
277 | } | ||
278 | |||
279 | func (s *directionalStatusStack) lastEmbeddingLevel() level { | ||
280 | return s.embeddingLevelStack[s.stackCounter-1] | ||
281 | } | ||
282 | |||
283 | func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class { | ||
284 | return s.overrideStatusStack[s.stackCounter-1] | ||
285 | } | ||
286 | |||
287 | func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool { | ||
288 | return s.isolateStatusStack[s.stackCounter-1] | ||
289 | } | ||
290 | |||
291 | // Determine explicit levels using rules X1 - X8 | ||
292 | func (p *paragraph) determineExplicitEmbeddingLevels() { | ||
293 | var stack directionalStatusStack | ||
294 | var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int | ||
295 | |||
296 | // Rule X1. | ||
297 | stack.push(p.embeddingLevel, ON, false) | ||
298 | |||
299 | for i, t := range p.resultTypes { | ||
300 | // Rules X2, X3, X4, X5, X5a, X5b, X5c | ||
301 | switch t { | ||
302 | case RLE, LRE, RLO, LRO, RLI, LRI, FSI: | ||
303 | isIsolate := t.in(RLI, LRI, FSI) | ||
304 | isRTL := t.in(RLE, RLO, RLI) | ||
305 | |||
306 | // override if this is an FSI that resolves to RLI | ||
307 | if t == FSI { | ||
308 | isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1) | ||
309 | } | ||
310 | if isIsolate { | ||
311 | p.resultLevels[i] = stack.lastEmbeddingLevel() | ||
312 | if stack.lastDirectionalOverrideStatus() != ON { | ||
313 | p.resultTypes[i] = stack.lastDirectionalOverrideStatus() | ||
314 | } | ||
315 | } | ||
316 | |||
317 | var newLevel level | ||
318 | if isRTL { | ||
319 | // least greater odd | ||
320 | newLevel = (stack.lastEmbeddingLevel() + 1) | 1 | ||
321 | } else { | ||
322 | // least greater even | ||
323 | newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1 | ||
324 | } | ||
325 | |||
326 | if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 { | ||
327 | if isIsolate { | ||
328 | validIsolateCount++ | ||
329 | } | ||
330 | // Push new embedding level, override status, and isolated | ||
331 | // status. | ||
332 | // No check for valid stack counter, since the level check | ||
333 | // suffices. | ||
334 | switch t { | ||
335 | case LRO: | ||
336 | stack.push(newLevel, L, isIsolate) | ||
337 | case RLO: | ||
338 | stack.push(newLevel, R, isIsolate) | ||
339 | default: | ||
340 | stack.push(newLevel, ON, isIsolate) | ||
341 | } | ||
342 | // Not really part of the spec | ||
343 | if !isIsolate { | ||
344 | p.resultLevels[i] = newLevel | ||
345 | } | ||
346 | } else { | ||
347 | // This is an invalid explicit formatting character, | ||
348 | // so apply the "Otherwise" part of rules X2-X5b. | ||
349 | if isIsolate { | ||
350 | overflowIsolateCount++ | ||
351 | } else { // !isIsolate | ||
352 | if overflowIsolateCount == 0 { | ||
353 | overflowEmbeddingCount++ | ||
354 | } | ||
355 | } | ||
356 | } | ||
357 | |||
358 | // Rule X6a | ||
359 | case PDI: | ||
360 | if overflowIsolateCount > 0 { | ||
361 | overflowIsolateCount-- | ||
362 | } else if validIsolateCount == 0 { | ||
363 | // do nothing | ||
364 | } else { | ||
365 | overflowEmbeddingCount = 0 | ||
366 | for !stack.lastDirectionalIsolateStatus() { | ||
367 | stack.pop() | ||
368 | } | ||
369 | stack.pop() | ||
370 | validIsolateCount-- | ||
371 | } | ||
372 | p.resultLevels[i] = stack.lastEmbeddingLevel() | ||
373 | |||
374 | // Rule X7 | ||
375 | case PDF: | ||
376 | // Not really part of the spec | ||
377 | p.resultLevels[i] = stack.lastEmbeddingLevel() | ||
378 | |||
379 | if overflowIsolateCount > 0 { | ||
380 | // do nothing | ||
381 | } else if overflowEmbeddingCount > 0 { | ||
382 | overflowEmbeddingCount-- | ||
383 | } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 { | ||
384 | stack.pop() | ||
385 | } | ||
386 | |||
387 | case B: // paragraph separator. | ||
388 | // Rule X8. | ||
389 | |||
390 | // These values are reset for clarity, in this implementation B | ||
391 | // can only occur as the last code in the array. | ||
392 | stack.empty() | ||
393 | overflowIsolateCount = 0 | ||
394 | overflowEmbeddingCount = 0 | ||
395 | validIsolateCount = 0 | ||
396 | p.resultLevels[i] = p.embeddingLevel | ||
397 | |||
398 | default: | ||
399 | p.resultLevels[i] = stack.lastEmbeddingLevel() | ||
400 | if stack.lastDirectionalOverrideStatus() != ON { | ||
401 | p.resultTypes[i] = stack.lastDirectionalOverrideStatus() | ||
402 | } | ||
403 | } | ||
404 | } | ||
405 | } | ||
406 | |||
407 | type isolatingRunSequence struct { | ||
408 | p *paragraph | ||
409 | |||
410 | indexes []int // indexes to the original string | ||
411 | |||
412 | types []Class // type of each character using the index | ||
413 | resolvedLevels []level // resolved levels after application of rules | ||
414 | level level | ||
415 | sos, eos Class | ||
416 | } | ||
417 | |||
418 | func (i *isolatingRunSequence) Len() int { return len(i.indexes) } | ||
419 | |||
420 | func maxLevel(a, b level) level { | ||
421 | if a > b { | ||
422 | return a | ||
423 | } | ||
424 | return b | ||
425 | } | ||
426 | |||
427 | // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, | ||
428 | // either L or R, for each isolating run sequence. | ||
429 | func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { | ||
430 | length := len(indexes) | ||
431 | types := make([]Class, length) | ||
432 | for i, x := range indexes { | ||
433 | types[i] = p.resultTypes[x] | ||
434 | } | ||
435 | |||
436 | // assign level, sos and eos | ||
437 | prevChar := indexes[0] - 1 | ||
438 | for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) { | ||
439 | prevChar-- | ||
440 | } | ||
441 | prevLevel := p.embeddingLevel | ||
442 | if prevChar >= 0 { | ||
443 | prevLevel = p.resultLevels[prevChar] | ||
444 | } | ||
445 | |||
446 | var succLevel level | ||
447 | lastType := types[length-1] | ||
448 | if lastType.in(LRI, RLI, FSI) { | ||
449 | succLevel = p.embeddingLevel | ||
450 | } else { | ||
451 | // the first character after the end of run sequence | ||
452 | limit := indexes[length-1] + 1 | ||
453 | for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ { | ||
454 | |||
455 | } | ||
456 | succLevel = p.embeddingLevel | ||
457 | if limit < p.Len() { | ||
458 | succLevel = p.resultLevels[limit] | ||
459 | } | ||
460 | } | ||
461 | level := p.resultLevels[indexes[0]] | ||
462 | return &isolatingRunSequence{ | ||
463 | p: p, | ||
464 | indexes: indexes, | ||
465 | types: types, | ||
466 | level: level, | ||
467 | sos: typeForLevel(maxLevel(prevLevel, level)), | ||
468 | eos: typeForLevel(maxLevel(succLevel, level)), | ||
469 | } | ||
470 | } | ||
471 | |||
472 | // Resolving weak types Rules W1-W7. | ||
473 | // | ||
474 | // Note that some weak types (EN, AN) remain after this processing is | ||
475 | // complete. | ||
476 | func (s *isolatingRunSequence) resolveWeakTypes() { | ||
477 | |||
478 | // on entry, only these types remain | ||
479 | s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI) | ||
480 | |||
481 | // Rule W1. | ||
482 | // Changes all NSMs. | ||
483 | preceedingCharacterType := s.sos | ||
484 | for i, t := range s.types { | ||
485 | if t == NSM { | ||
486 | s.types[i] = preceedingCharacterType | ||
487 | } else { | ||
488 | if t.in(LRI, RLI, FSI, PDI) { | ||
489 | preceedingCharacterType = ON | ||
490 | } | ||
491 | preceedingCharacterType = t | ||
492 | } | ||
493 | } | ||
494 | |||
495 | // Rule W2. | ||
496 | // EN does not change at the start of the run, because sos != AL. | ||
497 | for i, t := range s.types { | ||
498 | if t == EN { | ||
499 | for j := i - 1; j >= 0; j-- { | ||
500 | if t := s.types[j]; t.in(L, R, AL) { | ||
501 | if t == AL { | ||
502 | s.types[i] = AN | ||
503 | } | ||
504 | break | ||
505 | } | ||
506 | } | ||
507 | } | ||
508 | } | ||
509 | |||
510 | // Rule W3. | ||
511 | for i, t := range s.types { | ||
512 | if t == AL { | ||
513 | s.types[i] = R | ||
514 | } | ||
515 | } | ||
516 | |||
517 | // Rule W4. | ||
518 | // Since there must be values on both sides for this rule to have an | ||
519 | // effect, the scan skips the first and last value. | ||
520 | // | ||
521 | // Although the scan proceeds left to right, and changes the type | ||
522 | // values in a way that would appear to affect the computations | ||
523 | // later in the scan, there is actually no problem. A change in the | ||
524 | // current value can only affect the value to its immediate right, | ||
525 | // and only affect it if it is ES or CS. But the current value can | ||
526 | // only change if the value to its right is not ES or CS. Thus | ||
527 | // either the current value will not change, or its change will have | ||
528 | // no effect on the remainder of the analysis. | ||
529 | |||
530 | for i := 1; i < s.Len()-1; i++ { | ||
531 | t := s.types[i] | ||
532 | if t == ES || t == CS { | ||
533 | prevSepType := s.types[i-1] | ||
534 | succSepType := s.types[i+1] | ||
535 | if prevSepType == EN && succSepType == EN { | ||
536 | s.types[i] = EN | ||
537 | } else if s.types[i] == CS && prevSepType == AN && succSepType == AN { | ||
538 | s.types[i] = AN | ||
539 | } | ||
540 | } | ||
541 | } | ||
542 | |||
543 | // Rule W5. | ||
544 | for i, t := range s.types { | ||
545 | if t == ET { | ||
546 | // locate end of sequence | ||
547 | runStart := i | ||
548 | runEnd := s.findRunLimit(runStart, ET) | ||
549 | |||
550 | // check values at ends of sequence | ||
551 | t := s.sos | ||
552 | if runStart > 0 { | ||
553 | t = s.types[runStart-1] | ||
554 | } | ||
555 | if t != EN { | ||
556 | t = s.eos | ||
557 | if runEnd < len(s.types) { | ||
558 | t = s.types[runEnd] | ||
559 | } | ||
560 | } | ||
561 | if t == EN { | ||
562 | setTypes(s.types[runStart:runEnd], EN) | ||
563 | } | ||
564 | // continue at end of sequence | ||
565 | i = runEnd | ||
566 | } | ||
567 | } | ||
568 | |||
569 | // Rule W6. | ||
570 | for i, t := range s.types { | ||
571 | if t.in(ES, ET, CS) { | ||
572 | s.types[i] = ON | ||
573 | } | ||
574 | } | ||
575 | |||
576 | // Rule W7. | ||
577 | for i, t := range s.types { | ||
578 | if t == EN { | ||
579 | // set default if we reach start of run | ||
580 | prevStrongType := s.sos | ||
581 | for j := i - 1; j >= 0; j-- { | ||
582 | t = s.types[j] | ||
583 | if t == L || t == R { // AL's have been changed to R | ||
584 | prevStrongType = t | ||
585 | break | ||
586 | } | ||
587 | } | ||
588 | if prevStrongType == L { | ||
589 | s.types[i] = L | ||
590 | } | ||
591 | } | ||
592 | } | ||
593 | } | ||
594 | |||
595 | // 6) resolving neutral types Rules N1-N2. | ||
596 | func (s *isolatingRunSequence) resolveNeutralTypes() { | ||
597 | |||
598 | // on entry, only these types can be in resultTypes | ||
599 | s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI) | ||
600 | |||
601 | for i, t := range s.types { | ||
602 | switch t { | ||
603 | case WS, ON, B, S, RLI, LRI, FSI, PDI: | ||
604 | // find bounds of run of neutrals | ||
605 | runStart := i | ||
606 | runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI) | ||
607 | |||
608 | // determine effective types at ends of run | ||
609 | var leadType, trailType Class | ||
610 | |||
611 | // Note that the character found can only be L, R, AN, or | ||
612 | // EN. | ||
613 | if runStart == 0 { | ||
614 | leadType = s.sos | ||
615 | } else { | ||
616 | leadType = s.types[runStart-1] | ||
617 | if leadType.in(AN, EN) { | ||
618 | leadType = R | ||
619 | } | ||
620 | } | ||
621 | if runEnd == len(s.types) { | ||
622 | trailType = s.eos | ||
623 | } else { | ||
624 | trailType = s.types[runEnd] | ||
625 | if trailType.in(AN, EN) { | ||
626 | trailType = R | ||
627 | } | ||
628 | } | ||
629 | |||
630 | var resolvedType Class | ||
631 | if leadType == trailType { | ||
632 | // Rule N1. | ||
633 | resolvedType = leadType | ||
634 | } else { | ||
635 | // Rule N2. | ||
636 | // Notice the embedding level of the run is used, not | ||
637 | // the paragraph embedding level. | ||
638 | resolvedType = typeForLevel(s.level) | ||
639 | } | ||
640 | |||
641 | setTypes(s.types[runStart:runEnd], resolvedType) | ||
642 | |||
643 | // skip over run of (former) neutrals | ||
644 | i = runEnd | ||
645 | } | ||
646 | } | ||
647 | } | ||
648 | |||
649 | func setLevels(levels []level, newLevel level) { | ||
650 | for i := range levels { | ||
651 | levels[i] = newLevel | ||
652 | } | ||
653 | } | ||
654 | |||
655 | func setTypes(types []Class, newType Class) { | ||
656 | for i := range types { | ||
657 | types[i] = newType | ||
658 | } | ||
659 | } | ||
660 | |||
661 | // 7) resolving implicit embedding levels Rules I1, I2. | ||
662 | func (s *isolatingRunSequence) resolveImplicitLevels() { | ||
663 | |||
664 | // on entry, only these types can be in resultTypes | ||
665 | s.assertOnly(L, R, EN, AN) | ||
666 | |||
667 | s.resolvedLevels = make([]level, len(s.types)) | ||
668 | setLevels(s.resolvedLevels, s.level) | ||
669 | |||
670 | if (s.level & 1) == 0 { // even level | ||
671 | for i, t := range s.types { | ||
672 | // Rule I1. | ||
673 | if t == L { | ||
674 | // no change | ||
675 | } else if t == R { | ||
676 | s.resolvedLevels[i] += 1 | ||
677 | } else { // t == AN || t == EN | ||
678 | s.resolvedLevels[i] += 2 | ||
679 | } | ||
680 | } | ||
681 | } else { // odd level | ||
682 | for i, t := range s.types { | ||
683 | // Rule I2. | ||
684 | if t == R { | ||
685 | // no change | ||
686 | } else { // t == L || t == AN || t == EN | ||
687 | s.resolvedLevels[i] += 1 | ||
688 | } | ||
689 | } | ||
690 | } | ||
691 | } | ||
692 | |||
693 | // Applies the levels and types resolved in rules W1-I2 to the | ||
694 | // resultLevels array. | ||
695 | func (s *isolatingRunSequence) applyLevelsAndTypes() { | ||
696 | for i, x := range s.indexes { | ||
697 | s.p.resultTypes[x] = s.types[i] | ||
698 | s.p.resultLevels[x] = s.resolvedLevels[i] | ||
699 | } | ||
700 | } | ||
701 | |||
702 | // Return the limit of the run consisting only of the types in validSet | ||
703 | // starting at index. This checks the value at index, and will return | ||
704 | // index if that value is not in validSet. | ||
705 | func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int { | ||
706 | loop: | ||
707 | for ; index < len(s.types); index++ { | ||
708 | t := s.types[index] | ||
709 | for _, valid := range validSet { | ||
710 | if t == valid { | ||
711 | continue loop | ||
712 | } | ||
713 | } | ||
714 | return index // didn't find a match in validSet | ||
715 | } | ||
716 | return len(s.types) | ||
717 | } | ||
718 | |||
719 | // Algorithm validation. Assert that all values in types are in the | ||
720 | // provided set. | ||
721 | func (s *isolatingRunSequence) assertOnly(codes ...Class) { | ||
722 | loop: | ||
723 | for i, t := range s.types { | ||
724 | for _, c := range codes { | ||
725 | if t == c { | ||
726 | continue loop | ||
727 | } | ||
728 | } | ||
729 | log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i]) | ||
730 | } | ||
731 | } | ||
732 | |||
733 | // determineLevelRuns returns an array of level runs. Each level run is | ||
734 | // described as an array of indexes into the input string. | ||
735 | // | ||
736 | // Determines the level runs. Rule X9 will be applied in determining the | ||
737 | // runs, in the way that makes sure the characters that are supposed to be | ||
738 | // removed are not included in the runs. | ||
739 | func (p *paragraph) determineLevelRuns() [][]int { | ||
740 | run := []int{} | ||
741 | allRuns := [][]int{} | ||
742 | currentLevel := implicitLevel | ||
743 | |||
744 | for i := range p.initialTypes { | ||
745 | if !isRemovedByX9(p.initialTypes[i]) { | ||
746 | if p.resultLevels[i] != currentLevel { | ||
747 | // we just encountered a new run; wrap up last run | ||
748 | if currentLevel >= 0 { // only wrap it up if there was a run | ||
749 | allRuns = append(allRuns, run) | ||
750 | run = nil | ||
751 | } | ||
752 | // Start new run | ||
753 | currentLevel = p.resultLevels[i] | ||
754 | } | ||
755 | run = append(run, i) | ||
756 | } | ||
757 | } | ||
758 | // Wrap up the final run, if any | ||
759 | if len(run) > 0 { | ||
760 | allRuns = append(allRuns, run) | ||
761 | } | ||
762 | return allRuns | ||
763 | } | ||
764 | |||
765 | // Definition BD13. Determine isolating run sequences. | ||
766 | func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence { | ||
767 | levelRuns := p.determineLevelRuns() | ||
768 | |||
769 | // Compute the run that each character belongs to | ||
770 | runForCharacter := make([]int, p.Len()) | ||
771 | for i, run := range levelRuns { | ||
772 | for _, index := range run { | ||
773 | runForCharacter[index] = i | ||
774 | } | ||
775 | } | ||
776 | |||
777 | sequences := []*isolatingRunSequence{} | ||
778 | |||
779 | var currentRunSequence []int | ||
780 | |||
781 | for _, run := range levelRuns { | ||
782 | first := run[0] | ||
783 | if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 { | ||
784 | currentRunSequence = nil | ||
785 | // int run = i; | ||
786 | for { | ||
787 | // Copy this level run into currentRunSequence | ||
788 | currentRunSequence = append(currentRunSequence, run...) | ||
789 | |||
790 | last := currentRunSequence[len(currentRunSequence)-1] | ||
791 | lastT := p.initialTypes[last] | ||
792 | if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() { | ||
793 | run = levelRuns[runForCharacter[p.matchingPDI[last]]] | ||
794 | } else { | ||
795 | break | ||
796 | } | ||
797 | } | ||
798 | sequences = append(sequences, p.isolatingRunSequence(currentRunSequence)) | ||
799 | } | ||
800 | } | ||
801 | return sequences | ||
802 | } | ||
803 | |||
804 | // Assign level information to characters removed by rule X9. This is for | ||
805 | // ease of relating the level information to the original input data. Note | ||
806 | // that the levels assigned to these codes are arbitrary, they're chosen so | ||
807 | // as to avoid breaking level runs. | ||
808 | func (p *paragraph) assignLevelsToCharactersRemovedByX9() { | ||
809 | for i, t := range p.initialTypes { | ||
810 | if t.in(LRE, RLE, LRO, RLO, PDF, BN) { | ||
811 | p.resultTypes[i] = t | ||
812 | p.resultLevels[i] = -1 | ||
813 | } | ||
814 | } | ||
815 | // now propagate forward the levels information (could have | ||
816 | // propagated backward, the main thing is not to introduce a level | ||
817 | // break where one doesn't already exist). | ||
818 | |||
819 | if p.resultLevels[0] == -1 { | ||
820 | p.resultLevels[0] = p.embeddingLevel | ||
821 | } | ||
822 | for i := 1; i < len(p.initialTypes); i++ { | ||
823 | if p.resultLevels[i] == -1 { | ||
824 | p.resultLevels[i] = p.resultLevels[i-1] | ||
825 | } | ||
826 | } | ||
827 | // Embedding information is for informational purposes only so need not be | ||
828 | // adjusted. | ||
829 | } | ||
830 | |||
831 | // | ||
832 | // Output | ||
833 | // | ||
834 | |||
835 | // getLevels computes levels array breaking lines at offsets in linebreaks. | ||
836 | // Rule L1. | ||
837 | // | ||
838 | // The linebreaks array must include at least one value. The values must be | ||
839 | // in strictly increasing order (no duplicates) between 1 and the length of | ||
840 | // the text, inclusive. The last value must be the length of the text. | ||
841 | func (p *paragraph) getLevels(linebreaks []int) []level { | ||
842 | // Note that since the previous processing has removed all | ||
843 | // P, S, and WS values from resultTypes, the values referred to | ||
844 | // in these rules are the initial types, before any processing | ||
845 | // has been applied (including processing of overrides). | ||
846 | // | ||
847 | // This example implementation has reinserted explicit format codes | ||
848 | // and BN, in order that the levels array correspond to the | ||
849 | // initial text. Their final placement is not normative. | ||
850 | // These codes are treated like WS in this implementation, | ||
851 | // so they don't interrupt sequences of WS. | ||
852 | |||
853 | validateLineBreaks(linebreaks, p.Len()) | ||
854 | |||
855 | result := append([]level(nil), p.resultLevels...) | ||
856 | |||
857 | // don't worry about linebreaks since if there is a break within | ||
858 | // a series of WS values preceding S, the linebreak itself | ||
859 | // causes the reset. | ||
860 | for i, t := range p.initialTypes { | ||
861 | if t.in(B, S) { | ||
862 | // Rule L1, clauses one and two. | ||
863 | result[i] = p.embeddingLevel | ||
864 | |||
865 | // Rule L1, clause three. | ||
866 | for j := i - 1; j >= 0; j-- { | ||
867 | if isWhitespace(p.initialTypes[j]) { // including format codes | ||
868 | result[j] = p.embeddingLevel | ||
869 | } else { | ||
870 | break | ||
871 | } | ||
872 | } | ||
873 | } | ||
874 | } | ||
875 | |||
876 | // Rule L1, clause four. | ||
877 | start := 0 | ||
878 | for _, limit := range linebreaks { | ||
879 | for j := limit - 1; j >= start; j-- { | ||
880 | if isWhitespace(p.initialTypes[j]) { // including format codes | ||
881 | result[j] = p.embeddingLevel | ||
882 | } else { | ||
883 | break | ||
884 | } | ||
885 | } | ||
886 | start = limit | ||
887 | } | ||
888 | |||
889 | return result | ||
890 | } | ||
891 | |||
892 | // getReordering returns the reordering of lines from a visual index to a | ||
893 | // logical index for line breaks at the given offsets. | ||
894 | // | ||
895 | // Lines are concatenated from left to right. So for example, the fifth | ||
896 | // character from the left on the third line is | ||
897 | // | ||
898 | // getReordering(linebreaks)[linebreaks[1] + 4] | ||
899 | // | ||
900 | // (linebreaks[1] is the position after the last character of the second | ||
901 | // line, which is also the index of the first character on the third line, | ||
902 | // and adding four gets the fifth character from the left). | ||
903 | // | ||
904 | // The linebreaks array must include at least one value. The values must be | ||
905 | // in strictly increasing order (no duplicates) between 1 and the length of | ||
906 | // the text, inclusive. The last value must be the length of the text. | ||
907 | func (p *paragraph) getReordering(linebreaks []int) []int { | ||
908 | validateLineBreaks(linebreaks, p.Len()) | ||
909 | |||
910 | return computeMultilineReordering(p.getLevels(linebreaks), linebreaks) | ||
911 | } | ||
912 | |||
913 | // Return multiline reordering array for a given level array. Reordering | ||
914 | // does not occur across a line break. | ||
915 | func computeMultilineReordering(levels []level, linebreaks []int) []int { | ||
916 | result := make([]int, len(levels)) | ||
917 | |||
918 | start := 0 | ||
919 | for _, limit := range linebreaks { | ||
920 | tempLevels := make([]level, limit-start) | ||
921 | copy(tempLevels, levels[start:]) | ||
922 | |||
923 | for j, order := range computeReordering(tempLevels) { | ||
924 | result[start+j] = order + start | ||
925 | } | ||
926 | start = limit | ||
927 | } | ||
928 | return result | ||
929 | } | ||
930 | |||
931 | // Return reordering array for a given level array. This reorders a single | ||
932 | // line. The reordering is a visual to logical map. For example, the | ||
933 | // leftmost char is string.charAt(order[0]). Rule L2. | ||
934 | func computeReordering(levels []level) []int { | ||
935 | result := make([]int, len(levels)) | ||
936 | // initialize order | ||
937 | for i := range result { | ||
938 | result[i] = i | ||
939 | } | ||
940 | |||
941 | // locate highest level found on line. | ||
942 | // Note the rules say text, but no reordering across line bounds is | ||
943 | // performed, so this is sufficient. | ||
944 | highestLevel := level(0) | ||
945 | lowestOddLevel := level(maxDepth + 2) | ||
946 | for _, level := range levels { | ||
947 | if level > highestLevel { | ||
948 | highestLevel = level | ||
949 | } | ||
950 | if level&1 != 0 && level < lowestOddLevel { | ||
951 | lowestOddLevel = level | ||
952 | } | ||
953 | } | ||
954 | |||
955 | for level := highestLevel; level >= lowestOddLevel; level-- { | ||
956 | for i := 0; i < len(levels); i++ { | ||
957 | if levels[i] >= level { | ||
958 | // find range of text at or above this level | ||
959 | start := i | ||
960 | limit := i + 1 | ||
961 | for limit < len(levels) && levels[limit] >= level { | ||
962 | limit++ | ||
963 | } | ||
964 | |||
965 | for j, k := start, limit-1; j < k; j, k = j+1, k-1 { | ||
966 | result[j], result[k] = result[k], result[j] | ||
967 | } | ||
968 | // skip to end of level run | ||
969 | i = limit | ||
970 | } | ||
971 | } | ||
972 | } | ||
973 | |||
974 | return result | ||
975 | } | ||
976 | |||
977 | // isWhitespace reports whether the type is considered a whitespace type for the | ||
978 | // line break rules. | ||
979 | func isWhitespace(c Class) bool { | ||
980 | switch c { | ||
981 | case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS: | ||
982 | return true | ||
983 | } | ||
984 | return false | ||
985 | } | ||
986 | |||
987 | // isRemovedByX9 reports whether the type is one of the types removed in X9. | ||
988 | func isRemovedByX9(c Class) bool { | ||
989 | switch c { | ||
990 | case LRE, RLE, LRO, RLO, PDF, BN: | ||
991 | return true | ||
992 | } | ||
993 | return false | ||
994 | } | ||
995 | |||
996 | // typeForLevel reports the strong type (L or R) corresponding to the level. | ||
997 | func typeForLevel(level level) Class { | ||
998 | if (level & 0x1) == 0 { | ||
999 | return L | ||
1000 | } | ||
1001 | return R | ||
1002 | } | ||
1003 | |||
1004 | // TODO: change validation to not panic | ||
1005 | |||
1006 | func validateTypes(types []Class) { | ||
1007 | if len(types) == 0 { | ||
1008 | log.Panic("types is null") | ||
1009 | } | ||
1010 | for i, t := range types[:len(types)-1] { | ||
1011 | if t == B { | ||
1012 | log.Panicf("B type before end of paragraph at index: %d", i) | ||
1013 | } | ||
1014 | } | ||
1015 | } | ||
1016 | |||
1017 | func validateParagraphEmbeddingLevel(embeddingLevel level) { | ||
1018 | if embeddingLevel != implicitLevel && | ||
1019 | embeddingLevel != 0 && | ||
1020 | embeddingLevel != 1 { | ||
1021 | log.Panicf("illegal paragraph embedding level: %d", embeddingLevel) | ||
1022 | } | ||
1023 | } | ||
1024 | |||
1025 | func validateLineBreaks(linebreaks []int, textLength int) { | ||
1026 | prev := 0 | ||
1027 | for i, next := range linebreaks { | ||
1028 | if next <= prev { | ||
1029 | log.Panicf("bad linebreak: %d at index: %d", next, i) | ||
1030 | } | ||
1031 | prev = next | ||
1032 | } | ||
1033 | if prev != textLength { | ||
1034 | log.Panicf("last linebreak was %d, want %d", prev, textLength) | ||
1035 | } | ||
1036 | } | ||
1037 | |||
1038 | func validatePbTypes(pairTypes []bracketType) { | ||
1039 | if len(pairTypes) == 0 { | ||
1040 | log.Panic("pairTypes is null") | ||
1041 | } | ||
1042 | for i, pt := range pairTypes { | ||
1043 | switch pt { | ||
1044 | case bpNone, bpOpen, bpClose: | ||
1045 | default: | ||
1046 | log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i]) | ||
1047 | } | ||
1048 | } | ||
1049 | } | ||
1050 | |||
1051 | func validatePbValues(pairValues []rune, pairTypes []bracketType) { | ||
1052 | if pairValues == nil { | ||
1053 | log.Panic("pairValues is null") | ||
1054 | } | ||
1055 | if len(pairTypes) != len(pairValues) { | ||
1056 | log.Panic("pairTypes is different length from pairValues") | ||
1057 | } | ||
1058 | } | ||