vendor/golang.org/x/text/unicode/bidi/core.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package bidi
   6
   7 import "log"
   8
   9 // This implementation is a port based on the reference implementation found at:
  10 // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
  11 //
  12 // described in Unicode Bidirectional Algorithm (UAX #9).
  13 //
  14 // Input:
  15 // There are two levels of input to the algorithm, since clients may prefer to
  16 // supply some information from out-of-band sources rather than relying on the
  17 // default behavior.
  18 //
  19 // - Bidi class array
  20 // - Bidi class array, with externally supplied base line direction
  21 //
  22 // Output:
  23 // Output is separated into several stages:
  24 //
  25 //  - levels array over entire paragraph
  26 //  - reordering array over entire paragraph
  27 //  - levels array over line
  28 //  - reordering array over line
  29 //
  30 // Note that for conformance to the Unicode Bidirectional Algorithm,
  31 // implementations are only required to generate correct reordering and
  32 // character directionality (odd or even levels) over a line. Generating
  33 // identical level arrays over a line is not required. Bidi explicit format
  34 // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
  35 // positions as long as the rest of the input is properly reordered.
  36 //
  37 // As the algorithm is defined to operate on a single paragraph at a time, this
  38 // implementation is written to handle single paragraphs. Thus rule P1 is
  39 // presumed by this implementation-- the data provided to the implementation is
  40 // assumed to be a single paragraph, and either contains no 'B' codes, or a
  41 // single 'B' code at the end of the input. 'B' is allowed as input to
  42 // illustrate how the algorithm assigns it a level.
  43 //
  44 // Also note that rules L3 and L4 depend on the rendering engine that uses the
  45 // result of the bidi algorithm. This implementation assumes that the rendering
  46 // engine expects combining marks in visual order (e.g. to the left of their
  47 // base character in RTL runs) and that it adjusts the glyphs used to render
  48 // mirrored characters that are in RTL runs so that they render appropriately.
  49
  50 // level is the embedding level of a character. Even embedding levels indicate
  51 // left-to-right order and odd levels indicate right-to-left order. The special
  52 // level of -1 is reserved for undefined order.
  53 type level int8
  54
  55 const implicitLevel level = -1
  56
  57 // in returns if x is equal to any of the values in set.
  58 func (c Class) in(set ...Class) bool {
  59         for _, s := range set {
  60                 if c == s {
  61                         return true
  62                 }
  63         }
  64         return false
  65 }
  66
  67 // A paragraph contains the state of a paragraph.
  68 type paragraph struct {
  69         initialTypes []Class
  70
  71         // Arrays of properties needed for paired bracket evaluation in N0
  72         pairTypes  []bracketType // paired Bracket types for paragraph
  73         pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
  74
  75         embeddingLevel level // default: = implicitLevel;
  76
  77         // at the paragraph levels
  78         resultTypes  []Class
  79         resultLevels []level
  80
  81         // Index of matching PDI for isolate initiator characters. For other
  82         // characters, the value of matchingPDI will be set to -1. For isolate
  83         // initiators with no matching PDI, matchingPDI will be set to the length of
  84         // the input string.
  85         matchingPDI []int
  86
  87         // Index of matching isolate initiator for PDI characters. For other
  88         // characters, and for PDIs with no matching isolate initiator, the value of
  89         // matchingIsolateInitiator will be set to -1.
  90         matchingIsolateInitiator []int
  91 }
  92
  93 // newParagraph initializes a paragraph. The user needs to supply a few arrays
  94 // corresponding to the preprocessed text input. The types correspond to the
  95 // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
  96 // each rune. pairValues provides a unique bracket class identifier for each
  97 // rune (suggested is the rune of the open bracket for opening and matching
  98 // close brackets, after normalization). The embedding levels are optional, but
  99 // may be supplied to encode embedding levels of styled text.
 100 //
 101 // TODO: return an error.
 102 func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
 103         validateTypes(types)
 104         validatePbTypes(pairTypes)
 105         validatePbValues(pairValues, pairTypes)
 106         validateParagraphEmbeddingLevel(levels)
 107
 108         p := &paragraph{
 109                 initialTypes:   append([]Class(nil), types...),
 110                 embeddingLevel: levels,
 111
 112                 pairTypes:  pairTypes,
 113                 pairValues: pairValues,
 114
 115                 resultTypes: append([]Class(nil), types...),
 116         }
 117         p.run()
 118         return p
 119 }
 120
 121 func (p *paragraph) Len() int { return len(p.initialTypes) }
 122
 123 // The algorithm. Does not include line-based processing (Rules L1, L2).
 124 // These are applied later in the line-based phase of the algorithm.
 125 func (p *paragraph) run() {
 126         p.determineMatchingIsolates()
 127
 128         // 1) determining the paragraph level
 129         // Rule P1 is the requirement for entering this algorithm.
 130         // Rules P2, P3.
 131         // If no externally supplied paragraph embedding level, use default.
 132         if p.embeddingLevel == implicitLevel {
 133                 p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
 134         }
 135
 136         // Initialize result levels to paragraph embedding level.
 137         p.resultLevels = make([]level, p.Len())
 138         setLevels(p.resultLevels, p.embeddingLevel)
 139
 140         // 2) Explicit levels and directions
 141         // Rules X1-X8.
 142         p.determineExplicitEmbeddingLevels()
 143
 144         // Rule X9.
 145         // We do not remove the embeddings, the overrides, the PDFs, and the BNs
 146         // from the string explicitly. But they are not copied into isolating run
 147         // sequences when they are created, so they are removed for all
 148         // practical purposes.
 149
 150         // Rule X10.
 151         // Run remainder of algorithm one isolating run sequence at a time
 152         for _, seq := range p.determineIsolatingRunSequences() {
 153                 // 3) resolving weak types
 154                 // Rules W1-W7.
 155                 seq.resolveWeakTypes()
 156
 157                 // 4a) resolving paired brackets
 158                 // Rule N0
 159                 resolvePairedBrackets(seq)
 160
 161                 // 4b) resolving neutral types
 162                 // Rules N1-N3.
 163                 seq.resolveNeutralTypes()
 164
 165                 // 5) resolving implicit embedding levels
 166                 // Rules I1, I2.
 167                 seq.resolveImplicitLevels()
 168
 169                 // Apply the computed levels and types
 170                 seq.applyLevelsAndTypes()
 171         }
 172
 173         // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
 174         // BNs. This is for convenience, so the resulting level array will have
 175         // a value for every character.
 176         p.assignLevelsToCharactersRemovedByX9()
 177 }
 178
 179 // determineMatchingIsolates determines the matching PDI for each isolate
 180 // initiator and vice versa.
 181 //
 182 // Definition BD9.
 183 //
 184 // At the end of this function:
 185 //
 186 //  - The member variable matchingPDI is set to point to the index of the
 187 //    matching PDI character for each isolate initiator character. If there is
 188 //    no matching PDI, it is set to the length of the input text. For other
 189 //    characters, it is set to -1.
 190 //  - The member variable matchingIsolateInitiator is set to point to the
 191 //    index of the matching isolate initiator character for each PDI character.
 192 //    If there is no matching isolate initiator, or the character is not a PDI,
 193 //    it is set to -1.
 194 func (p *paragraph) determineMatchingIsolates() {
 195         p.matchingPDI = make([]int, p.Len())
 196         p.matchingIsolateInitiator = make([]int, p.Len())
 197
 198         for i := range p.matchingIsolateInitiator {
 199                 p.matchingIsolateInitiator[i] = -1
 200         }
 201
 202         for i := range p.matchingPDI {
 203                 p.matchingPDI[i] = -1
 204
 205                 if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
 206                         depthCounter := 1
 207                         for j := i + 1; j < p.Len(); j++ {
 208                                 if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
 209                                         depthCounter++
 210                                 } else if u == PDI {
 211                                         if depthCounter--; depthCounter == 0 {
 212                                                 p.matchingPDI[i] = j
 213                                                 p.matchingIsolateInitiator[j] = i
 214                                                 break
 215                                         }
 216                                 }
 217                         }
 218                         if p.matchingPDI[i] == -1 {
 219                                 p.matchingPDI[i] = p.Len()
 220                         }
 221                 }
 222         }
 223 }
 224
 225 // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
 226 // the substring limited by the given range [start, end).
 227 //
 228 // Determines the paragraph level based on rules P2, P3. This is also used
 229 // in rule X5c to find if an FSI should resolve to LRI or RLI.
 230 func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
 231         var strongType Class = unknownClass
 232
 233         // Rule P2.
 234         for i := start; i < end; i++ {
 235                 if t := p.resultTypes[i]; t.in(L, AL, R) {
 236                         strongType = t
 237                         break
 238                 } else if t.in(FSI, LRI, RLI) {
 239                         i = p.matchingPDI[i] // skip over to the matching PDI
 240                         if i > end {
 241                                 log.Panic("assert (i <= end)")
 242                         }
 243                 }
 244         }
 245         // Rule P3.
 246         switch strongType {
 247         case unknownClass: // none found
 248                 // default embedding level when no strong types found is 0.
 249                 return 0
 250         case L:
 251                 return 0
 252         default: // AL, R
 253                 return 1
 254         }
 255 }
 256
 257 const maxDepth = 125
 258
 259 // This stack will store the embedding levels and override and isolated
 260 // statuses
 261 type directionalStatusStack struct {
 262         stackCounter        int
 263         embeddingLevelStack [maxDepth + 1]level
 264         overrideStatusStack [maxDepth + 1]Class
 265         isolateStatusStack  [maxDepth + 1]bool
 266 }
 267
 268 func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
 269 func (s *directionalStatusStack) pop()       { s.stackCounter-- }
 270 func (s *directionalStatusStack) depth() int { return s.stackCounter }
 271
 272 func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
 273         s.embeddingLevelStack[s.stackCounter] = level
 274         s.overrideStatusStack[s.stackCounter] = overrideStatus
 275         s.isolateStatusStack[s.stackCounter] = isolateStatus
 276         s.stackCounter++
 277 }
 278
 279 func (s *directionalStatusStack) lastEmbeddingLevel() level {
 280         return s.embeddingLevelStack[s.stackCounter-1]
 281 }
 282
 283 func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
 284         return s.overrideStatusStack[s.stackCounter-1]
 285 }
 286
 287 func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
 288         return s.isolateStatusStack[s.stackCounter-1]
 289 }
 290
 291 // Determine explicit levels using rules X1 - X8
 292 func (p *paragraph) determineExplicitEmbeddingLevels() {
 293         var stack directionalStatusStack
 294         var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
 295
 296         // Rule X1.
 297         stack.push(p.embeddingLevel, ON, false)
 298
 299         for i, t := range p.resultTypes {
 300                 // Rules X2, X3, X4, X5, X5a, X5b, X5c
 301                 switch t {
 302                 case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
 303                         isIsolate := t.in(RLI, LRI, FSI)
 304                         isRTL := t.in(RLE, RLO, RLI)
 305
 306                         // override if this is an FSI that resolves to RLI
 307                         if t == FSI {
 308                                 isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
 309                         }
 310                         if isIsolate {
 311                                 p.resultLevels[i] = stack.lastEmbeddingLevel()
 312                                 if stack.lastDirectionalOverrideStatus() != ON {
 313                                         p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
 314                                 }
 315                         }
 316
 317                         var newLevel level
 318                         if isRTL {
 319                                 // least greater odd
 320                                 newLevel = (stack.lastEmbeddingLevel() + 1) | 1
 321                         } else {
 322                                 // least greater even
 323                                 newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
 324                         }
 325
 326                         if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
 327                                 if isIsolate {
 328                                         validIsolateCount++
 329                                 }
 330                                 // Push new embedding level, override status, and isolated
 331                                 // status.
 332                                 // No check for valid stack counter, since the level check
 333                                 // suffices.
 334                                 switch t {
 335                                 case LRO:
 336                                         stack.push(newLevel, L, isIsolate)
 337                                 case RLO:
 338                                         stack.push(newLevel, R, isIsolate)
 339                                 default:
 340                                         stack.push(newLevel, ON, isIsolate)
 341                                 }
 342                                 // Not really part of the spec
 343                                 if !isIsolate {
 344                                         p.resultLevels[i] = newLevel
 345                                 }
 346                         } else {
 347                                 // This is an invalid explicit formatting character,
 348                                 // so apply the "Otherwise" part of rules X2-X5b.
 349                                 if isIsolate {
 350                                         overflowIsolateCount++
 351                                 } else { // !isIsolate
 352                                         if overflowIsolateCount == 0 {
 353                                                 overflowEmbeddingCount++
 354                                         }
 355                                 }
 356                         }
 357
 358                 // Rule X6a
 359                 case PDI:
 360                         if overflowIsolateCount > 0 {
 361                                 overflowIsolateCount--
 362                         } else if validIsolateCount == 0 {
 363                                 // do nothing
 364                         } else {
 365                                 overflowEmbeddingCount = 0
 366                                 for !stack.lastDirectionalIsolateStatus() {
 367                                         stack.pop()
 368                                 }
 369                                 stack.pop()
 370                                 validIsolateCount--
 371                         }
 372                         p.resultLevels[i] = stack.lastEmbeddingLevel()
 373
 374                 // Rule X7
 375                 case PDF:
 376                         // Not really part of the spec
 377                         p.resultLevels[i] = stack.lastEmbeddingLevel()
 378
 379                         if overflowIsolateCount > 0 {
 380                                 // do nothing
 381                         } else if overflowEmbeddingCount > 0 {
 382                                 overflowEmbeddingCount--
 383                         } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
 384                                 stack.pop()
 385                         }
 386
 387                 case B: // paragraph separator.
 388                         // Rule X8.
 389
 390                         // These values are reset for clarity, in this implementation B
 391                         // can only occur as the last code in the array.
 392                         stack.empty()
 393                         overflowIsolateCount = 0
 394                         overflowEmbeddingCount = 0
 395                         validIsolateCount = 0
 396                         p.resultLevels[i] = p.embeddingLevel
 397
 398                 default:
 399                         p.resultLevels[i] = stack.lastEmbeddingLevel()
 400                         if stack.lastDirectionalOverrideStatus() != ON {
 401                                 p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
 402                         }
 403                 }
 404         }
 405 }
 406
 407 type isolatingRunSequence struct {
 408         p *paragraph
 409
 410         indexes []int // indexes to the original string
 411
 412         types          []Class // type of each character using the index
 413         resolvedLevels []level // resolved levels after application of rules
 414         level          level
 415         sos, eos       Class
 416 }
 417
 418 func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
 419
 420 func maxLevel(a, b level) level {
 421         if a > b {
 422                 return a
 423         }
 424         return b
 425 }
 426
 427 // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
 428 //                       either L or R, for each isolating run sequence.
 429 func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
 430         length := len(indexes)
 431         types := make([]Class, length)
 432         for i, x := range indexes {
 433                 types[i] = p.resultTypes[x]
 434         }
 435
 436         // assign level, sos and eos
 437         prevChar := indexes[0] - 1
 438         for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
 439                 prevChar--
 440         }
 441         prevLevel := p.embeddingLevel
 442         if prevChar >= 0 {
 443                 prevLevel = p.resultLevels[prevChar]
 444         }
 445
 446         var succLevel level
 447         lastType := types[length-1]
 448         if lastType.in(LRI, RLI, FSI) {
 449                 succLevel = p.embeddingLevel
 450         } else {
 451                 // the first character after the end of run sequence
 452                 limit := indexes[length-1] + 1
 453                 for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
 454
 455                 }
 456                 succLevel = p.embeddingLevel
 457                 if limit < p.Len() {
 458                         succLevel = p.resultLevels[limit]
 459                 }
 460         }
 461         level := p.resultLevels[indexes[0]]
 462         return &isolatingRunSequence{
 463                 p:       p,
 464                 indexes: indexes,
 465                 types:   types,
 466                 level:   level,
 467                 sos:     typeForLevel(maxLevel(prevLevel, level)),
 468                 eos:     typeForLevel(maxLevel(succLevel, level)),
 469         }
 470 }
 471
 472 // Resolving weak types Rules W1-W7.
 473 //
 474 // Note that some weak types (EN, AN) remain after this processing is
 475 // complete.
 476 func (s *isolatingRunSequence) resolveWeakTypes() {
 477
 478         // on entry, only these types remain
 479         s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
 480
 481         // Rule W1.
 482         // Changes all NSMs.
 483         preceedingCharacterType := s.sos
 484         for i, t := range s.types {
 485                 if t == NSM {
 486                         s.types[i] = preceedingCharacterType
 487                 } else {
 488                         if t.in(LRI, RLI, FSI, PDI) {
 489                                 preceedingCharacterType = ON
 490                         }
 491                         preceedingCharacterType = t
 492                 }
 493         }
 494
 495         // Rule W2.
 496         // EN does not change at the start of the run, because sos != AL.
 497         for i, t := range s.types {
 498                 if t == EN {
 499                         for j := i - 1; j >= 0; j-- {
 500                                 if t := s.types[j]; t.in(L, R, AL) {
 501                                         if t == AL {
 502                                                 s.types[i] = AN
 503                                         }
 504                                         break
 505                                 }
 506                         }
 507                 }
 508         }
 509
 510         // Rule W3.
 511         for i, t := range s.types {
 512                 if t == AL {
 513                         s.types[i] = R
 514                 }
 515         }
 516
 517         // Rule W4.
 518         // Since there must be values on both sides for this rule to have an
 519         // effect, the scan skips the first and last value.
 520         //
 521         // Although the scan proceeds left to right, and changes the type
 522         // values in a way that would appear to affect the computations
 523         // later in the scan, there is actually no problem. A change in the
 524         // current value can only affect the value to its immediate right,
 525         // and only affect it if it is ES or CS. But the current value can
 526         // only change if the value to its right is not ES or CS. Thus
 527         // either the current value will not change, or its change will have
 528         // no effect on the remainder of the analysis.
 529
 530         for i := 1; i < s.Len()-1; i++ {
 531                 t := s.types[i]
 532                 if t == ES || t == CS {
 533                         prevSepType := s.types[i-1]
 534                         succSepType := s.types[i+1]
 535                         if prevSepType == EN && succSepType == EN {
 536                                 s.types[i] = EN
 537                         } else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
 538                                 s.types[i] = AN
 539                         }
 540                 }
 541         }
 542
 543         // Rule W5.
 544         for i, t := range s.types {
 545                 if t == ET {
 546                         // locate end of sequence
 547                         runStart := i
 548                         runEnd := s.findRunLimit(runStart, ET)
 549
 550                         // check values at ends of sequence
 551                         t := s.sos
 552                         if runStart > 0 {
 553                                 t = s.types[runStart-1]
 554                         }
 555                         if t != EN {
 556                                 t = s.eos
 557                                 if runEnd < len(s.types) {
 558                                         t = s.types[runEnd]
 559                                 }
 560                         }
 561                         if t == EN {
 562                                 setTypes(s.types[runStart:runEnd], EN)
 563                         }
 564                         // continue at end of sequence
 565                         i = runEnd
 566                 }
 567         }
 568
 569         // Rule W6.
 570         for i, t := range s.types {
 571                 if t.in(ES, ET, CS) {
 572                         s.types[i] = ON
 573                 }
 574         }
 575
 576         // Rule W7.
 577         for i, t := range s.types {
 578                 if t == EN {
 579                         // set default if we reach start of run
 580                         prevStrongType := s.sos
 581                         for j := i - 1; j >= 0; j-- {
 582                                 t = s.types[j]
 583                                 if t == L || t == R { // AL's have been changed to R
 584                                         prevStrongType = t
 585                                         break
 586                                 }
 587                         }
 588                         if prevStrongType == L {
 589                                 s.types[i] = L
 590                         }
 591                 }
 592         }
 593 }
 594
 595 // 6) resolving neutral types Rules N1-N2.
 596 func (s *isolatingRunSequence) resolveNeutralTypes() {
 597
 598         // on entry, only these types can be in resultTypes
 599         s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
 600
 601         for i, t := range s.types {
 602                 switch t {
 603                 case WS, ON, B, S, RLI, LRI, FSI, PDI:
 604                         // find bounds of run of neutrals
 605                         runStart := i
 606                         runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
 607
 608                         // determine effective types at ends of run
 609                         var leadType, trailType Class
 610
 611                         // Note that the character found can only be L, R, AN, or
 612                         // EN.
 613                         if runStart == 0 {
 614                                 leadType = s.sos
 615                         } else {
 616                                 leadType = s.types[runStart-1]
 617                                 if leadType.in(AN, EN) {
 618                                         leadType = R
 619                                 }
 620                         }
 621                         if runEnd == len(s.types) {
 622                                 trailType = s.eos
 623                         } else {
 624                                 trailType = s.types[runEnd]
 625                                 if trailType.in(AN, EN) {
 626                                         trailType = R
 627                                 }
 628                         }
 629
 630                         var resolvedType Class
 631                         if leadType == trailType {
 632                                 // Rule N1.
 633                                 resolvedType = leadType
 634                         } else {
 635                                 // Rule N2.
 636                                 // Notice the embedding level of the run is used, not
 637                                 // the paragraph embedding level.
 638                                 resolvedType = typeForLevel(s.level)
 639                         }
 640
 641                         setTypes(s.types[runStart:runEnd], resolvedType)
 642
 643                         // skip over run of (former) neutrals
 644                         i = runEnd
 645                 }
 646         }
 647 }
 648
 649 func setLevels(levels []level, newLevel level) {
 650         for i := range levels {
 651                 levels[i] = newLevel
 652         }
 653 }
 654
 655 func setTypes(types []Class, newType Class) {
 656         for i := range types {
 657                 types[i] = newType
 658         }
 659 }
 660
 661 // 7) resolving implicit embedding levels Rules I1, I2.
 662 func (s *isolatingRunSequence) resolveImplicitLevels() {
 663
 664         // on entry, only these types can be in resultTypes
 665         s.assertOnly(L, R, EN, AN)
 666
 667         s.resolvedLevels = make([]level, len(s.types))
 668         setLevels(s.resolvedLevels, s.level)
 669
 670         if (s.level & 1) == 0 { // even level
 671                 for i, t := range s.types {
 672                         // Rule I1.
 673                         if t == L {
 674                                 // no change
 675                         } else if t == R {
 676                                 s.resolvedLevels[i] += 1
 677                         } else { // t == AN || t == EN
 678                                 s.resolvedLevels[i] += 2
 679                         }
 680                 }
 681         } else { // odd level
 682                 for i, t := range s.types {
 683                         // Rule I2.
 684                         if t == R {
 685                                 // no change
 686                         } else { // t == L || t == AN || t == EN
 687                                 s.resolvedLevels[i] += 1
 688                         }
 689                 }
 690         }
 691 }
 692
 693 // Applies the levels and types resolved in rules W1-I2 to the
 694 // resultLevels array.
 695 func (s *isolatingRunSequence) applyLevelsAndTypes() {
 696         for i, x := range s.indexes {
 697                 s.p.resultTypes[x] = s.types[i]
 698                 s.p.resultLevels[x] = s.resolvedLevels[i]
 699         }
 700 }
 701
 702 // Return the limit of the run consisting only of the types in validSet
 703 // starting at index. This checks the value at index, and will return
 704 // index if that value is not in validSet.
 705 func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
 706 loop:
 707         for ; index < len(s.types); index++ {
 708                 t := s.types[index]
 709                 for _, valid := range validSet {
 710                         if t == valid {
 711                                 continue loop
 712                         }
 713                 }
 714                 return index // didn't find a match in validSet
 715         }
 716         return len(s.types)
 717 }
 718
 719 // Algorithm validation. Assert that all values in types are in the
 720 // provided set.
 721 func (s *isolatingRunSequence) assertOnly(codes ...Class) {
 722 loop:
 723         for i, t := range s.types {
 724                 for _, c := range codes {
 725                         if t == c {
 726                                 continue loop
 727                         }
 728                 }
 729                 log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
 730         }
 731 }
 732
 733 // determineLevelRuns returns an array of level runs. Each level run is
 734 // described as an array of indexes into the input string.
 735 //
 736 // Determines the level runs. Rule X9 will be applied in determining the
 737 // runs, in the way that makes sure the characters that are supposed to be
 738 // removed are not included in the runs.
 739 func (p *paragraph) determineLevelRuns() [][]int {
 740         run := []int{}
 741         allRuns := [][]int{}
 742         currentLevel := implicitLevel
 743
 744         for i := range p.initialTypes {
 745                 if !isRemovedByX9(p.initialTypes[i]) {
 746                         if p.resultLevels[i] != currentLevel {
 747                                 // we just encountered a new run; wrap up last run
 748                                 if currentLevel >= 0 { // only wrap it up if there was a run
 749                                         allRuns = append(allRuns, run)
 750                                         run = nil
 751                                 }
 752                                 // Start new run
 753                                 currentLevel = p.resultLevels[i]
 754                         }
 755                         run = append(run, i)
 756                 }
 757         }
 758         // Wrap up the final run, if any
 759         if len(run) > 0 {
 760                 allRuns = append(allRuns, run)
 761         }
 762         return allRuns
 763 }
 764
 765 // Definition BD13. Determine isolating run sequences.
 766 func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
 767         levelRuns := p.determineLevelRuns()
 768
 769         // Compute the run that each character belongs to
 770         runForCharacter := make([]int, p.Len())
 771         for i, run := range levelRuns {
 772                 for _, index := range run {
 773                         runForCharacter[index] = i
 774                 }
 775         }
 776
 777         sequences := []*isolatingRunSequence{}
 778
 779         var currentRunSequence []int
 780
 781         for _, run := range levelRuns {
 782                 first := run[0]
 783                 if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
 784                         currentRunSequence = nil
 785                         // int run = i;
 786                         for {
 787                                 // Copy this level run into currentRunSequence
 788                                 currentRunSequence = append(currentRunSequence, run...)
 789
 790                                 last := currentRunSequence[len(currentRunSequence)-1]
 791                                 lastT := p.initialTypes[last]
 792                                 if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
 793                                         run = levelRuns[runForCharacter[p.matchingPDI[last]]]
 794                                 } else {
 795                                         break
 796                                 }
 797                         }
 798                         sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
 799                 }
 800         }
 801         return sequences
 802 }
 803
 804 // Assign level information to characters removed by rule X9. This is for
 805 // ease of relating the level information to the original input data. Note
 806 // that the levels assigned to these codes are arbitrary, they're chosen so
 807 // as to avoid breaking level runs.
 808 func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
 809         for i, t := range p.initialTypes {
 810                 if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
 811                         p.resultTypes[i] = t
 812                         p.resultLevels[i] = -1
 813                 }
 814         }
 815         // now propagate forward the levels information (could have
 816         // propagated backward, the main thing is not to introduce a level
 817         // break where one doesn't already exist).
 818
 819         if p.resultLevels[0] == -1 {
 820                 p.resultLevels[0] = p.embeddingLevel
 821         }
 822         for i := 1; i < len(p.initialTypes); i++ {
 823                 if p.resultLevels[i] == -1 {
 824                         p.resultLevels[i] = p.resultLevels[i-1]
 825                 }
 826         }
 827         // Embedding information is for informational purposes only so need not be
 828         // adjusted.
 829 }
 830
 831 //
 832 // Output
 833 //
 834
 835 // getLevels computes levels array breaking lines at offsets in linebreaks.
 836 // Rule L1.
 837 //
 838 // The linebreaks array must include at least one value. The values must be
 839 // in strictly increasing order (no duplicates) between 1 and the length of
 840 // the text, inclusive. The last value must be the length of the text.
 841 func (p *paragraph) getLevels(linebreaks []int) []level {
 842         // Note that since the previous processing has removed all
 843         // P, S, and WS values from resultTypes, the values referred to
 844         // in these rules are the initial types, before any processing
 845         // has been applied (including processing of overrides).
 846         //
 847         // This example implementation has reinserted explicit format codes
 848         // and BN, in order that the levels array correspond to the
 849         // initial text. Their final placement is not normative.
 850         // These codes are treated like WS in this implementation,
 851         // so they don't interrupt sequences of WS.
 852
 853         validateLineBreaks(linebreaks, p.Len())
 854
 855         result := append([]level(nil), p.resultLevels...)
 856
 857         // don't worry about linebreaks since if there is a break within
 858         // a series of WS values preceding S, the linebreak itself
 859         // causes the reset.
 860         for i, t := range p.initialTypes {
 861                 if t.in(B, S) {
 862                         // Rule L1, clauses one and two.
 863                         result[i] = p.embeddingLevel
 864
 865                         // Rule L1, clause three.
 866                         for j := i - 1; j >= 0; j-- {
 867                                 if isWhitespace(p.initialTypes[j]) { // including format codes
 868                                         result[j] = p.embeddingLevel
 869                                 } else {
 870                                         break
 871                                 }
 872                         }
 873                 }
 874         }
 875
 876         // Rule L1, clause four.
 877         start := 0
 878         for _, limit := range linebreaks {
 879                 for j := limit - 1; j >= start; j-- {
 880                         if isWhitespace(p.initialTypes[j]) { // including format codes
 881                                 result[j] = p.embeddingLevel
 882                         } else {
 883                                 break
 884                         }
 885                 }
 886                 start = limit
 887         }
 888
 889         return result
 890 }
 891
 892 // getReordering returns the reordering of lines from a visual index to a
 893 // logical index for line breaks at the given offsets.
 894 //
 895 // Lines are concatenated from left to right. So for example, the fifth
 896 // character from the left on the third line is
 897 //
 898 //              getReordering(linebreaks)[linebreaks[1] + 4]
 899 //
 900 // (linebreaks[1] is the position after the last character of the second
 901 // line, which is also the index of the first character on the third line,
 902 // and adding four gets the fifth character from the left).
 903 //
 904 // The linebreaks array must include at least one value. The values must be
 905 // in strictly increasing order (no duplicates) between 1 and the length of
 906 // the text, inclusive. The last value must be the length of the text.
 907 func (p *paragraph) getReordering(linebreaks []int) []int {
 908         validateLineBreaks(linebreaks, p.Len())
 909
 910         return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
 911 }
 912
 913 // Return multiline reordering array for a given level array. Reordering
 914 // does not occur across a line break.
 915 func computeMultilineReordering(levels []level, linebreaks []int) []int {
 916         result := make([]int, len(levels))
 917
 918         start := 0
 919         for _, limit := range linebreaks {
 920                 tempLevels := make([]level, limit-start)
 921                 copy(tempLevels, levels[start:])
 922
 923                 for j, order := range computeReordering(tempLevels) {
 924                         result[start+j] = order + start
 925                 }
 926                 start = limit
 927         }
 928         return result
 929 }
 930
 931 // Return reordering array for a given level array. This reorders a single
 932 // line. The reordering is a visual to logical map. For example, the
 933 // leftmost char is string.charAt(order[0]). Rule L2.
 934 func computeReordering(levels []level) []int {
 935         result := make([]int, len(levels))
 936         // initialize order
 937         for i := range result {
 938                 result[i] = i
 939         }
 940
 941         // locate highest level found on line.
 942         // Note the rules say text, but no reordering across line bounds is
 943         // performed, so this is sufficient.
 944         highestLevel := level(0)
 945         lowestOddLevel := level(maxDepth + 2)
 946         for _, level := range levels {
 947                 if level > highestLevel {
 948                         highestLevel = level
 949                 }
 950                 if level&1 != 0 && level < lowestOddLevel {
 951                         lowestOddLevel = level
 952                 }
 953         }
 954
 955         for level := highestLevel; level >= lowestOddLevel; level-- {
 956                 for i := 0; i < len(levels); i++ {
 957                         if levels[i] >= level {
 958                                 // find range of text at or above this level
 959                                 start := i
 960                                 limit := i + 1
 961                                 for limit < len(levels) && levels[limit] >= level {
 962                                         limit++
 963                                 }
 964
 965                                 for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
 966                                         result[j], result[k] = result[k], result[j]
 967                                 }
 968                                 // skip to end of level run
 969                                 i = limit
 970                         }
 971                 }
 972         }
 973
 974         return result
 975 }
 976
 977 // isWhitespace reports whether the type is considered a whitespace type for the
 978 // line break rules.
 979 func isWhitespace(c Class) bool {
 980         switch c {
 981         case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
 982                 return true
 983         }
 984         return false
 985 }
 986
 987 // isRemovedByX9 reports whether the type is one of the types removed in X9.
 988 func isRemovedByX9(c Class) bool {
 989         switch c {
 990         case LRE, RLE, LRO, RLO, PDF, BN:
 991                 return true
 992         }
 993         return false
 994 }
 995
 996 // typeForLevel reports the strong type (L or R) corresponding to the level.
 997 func typeForLevel(level level) Class {
 998         if (level & 0x1) == 0 {
 999                 return L
1000         }
1001         return R
1002 }
1003
1004 // TODO: change validation to not panic
1005
1006 func validateTypes(types []Class) {
1007         if len(types) == 0 {
1008                 log.Panic("types is null")
1009         }
1010         for i, t := range types[:len(types)-1] {
1011                 if t == B {
1012                         log.Panicf("B type before end of paragraph at index: %d", i)
1013                 }
1014         }
1015 }
1016
1017 func validateParagraphEmbeddingLevel(embeddingLevel level) {
1018         if embeddingLevel != implicitLevel &&
1019                 embeddingLevel != 0 &&
1020                 embeddingLevel != 1 {
1021                 log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
1022         }
1023 }
1024
1025 func validateLineBreaks(linebreaks []int, textLength int) {
1026         prev := 0
1027         for i, next := range linebreaks {
1028                 if next <= prev {
1029                         log.Panicf("bad linebreak: %d at index: %d", next, i)
1030                 }
1031                 prev = next
1032         }
1033         if prev != textLength {
1034                 log.Panicf("last linebreak was %d, want %d", prev, textLength)
1035         }
1036 }
1037
1038 func validatePbTypes(pairTypes []bracketType) {
1039         if len(pairTypes) == 0 {
1040                 log.Panic("pairTypes is null")
1041         }
1042         for i, pt := range pairTypes {
1043                 switch pt {
1044                 case bpNone, bpOpen, bpClose:
1045                 default:
1046                         log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
1047                 }
1048         }
1049 }
1050
1051 func validatePbValues(pairValues []rune, pairTypes []bracketType) {
1052         if pairValues == nil {
1053                 log.Panic("pairValues is null")
1054         }
1055         if len(pairTypes) != len(pairValues) {
1056                 log.Panic("pairTypes is different length from pairValues")
1057         }
1058 }