Bumping manifests a=b2g-bump
[gecko.git] / layout / base / nsBidi.cpp
blob1903b86488a5c7ee7cf56292520b548ce98ce397
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsBidi.h"
8 #include "nsUnicodeProperties.h"
9 #include "nsCRTGlue.h"
11 using namespace mozilla::unicode;
13 // These are #defined in <sys/regset.h> under Solaris 10 x86
14 #undef CS
15 #undef ES
17 /* Comparing the description of the Bidi algorithm with this implementation
18 is easier with the same names for the Bidi types in the code as there.
20 enum {
21 L = eCharType_LeftToRight,
22 R = eCharType_RightToLeft,
23 EN = eCharType_EuropeanNumber,
24 ES = eCharType_EuropeanNumberSeparator,
25 ET = eCharType_EuropeanNumberTerminator,
26 AN = eCharType_ArabicNumber,
27 CS = eCharType_CommonNumberSeparator,
28 B = eCharType_BlockSeparator,
29 S = eCharType_SegmentSeparator,
30 WS = eCharType_WhiteSpaceNeutral,
31 O_N = eCharType_OtherNeutral,
32 LRE = eCharType_LeftToRightEmbedding,
33 LRO = eCharType_LeftToRightOverride,
34 AL = eCharType_RightToLeftArabic,
35 RLE = eCharType_RightToLeftEmbedding,
36 RLO = eCharType_RightToLeftOverride,
37 PDF = eCharType_PopDirectionalFormat,
38 NSM = eCharType_DirNonSpacingMark,
39 BN = eCharType_BoundaryNeutral,
40 LRI = eCharType_LeftToRightIsolate,
41 RLI = eCharType_RightToLeftIsolate,
42 FSI = eCharType_FirstStrongIsolate,
43 PDI = eCharType_PopDirectionalIsolate,
44 dirPropCount
47 /* to avoid some conditional statements, use tiny constant arrays */
48 static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
49 static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
50 static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
52 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
53 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
54 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
57 * General implementation notes:
59 * Throughout the implementation, there are comments like (W2) that refer to
60 * rules of the Bidi algorithm in its version 5, in this example to the second
61 * rule of the resolution of weak types.
63 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
64 * character according to UTF-16, the second UChar gets the directional property of
65 * the entire character assigned, while the first one gets a BN, a boundary
66 * neutral, type, which is ignored by most of the algorithm according to
67 * rule (X9) and the implementation suggestions of the Bidi algorithm.
69 * Later, AdjustWSLevels() will set the level for each BN to that of the
70 * following character (UChar), which results in surrogate pairs getting the
71 * same level on each of their surrogates.
73 * In a UTF-8 implementation, the same thing could be done: the last byte of
74 * a multi-byte sequence would get the "real" property, while all previous
75 * bytes of that sequence would get BN.
77 * It is not possible to assign all those parts of a character the same real
78 * property because this would fail in the resolution of weak types with rules
79 * that look at immediately surrounding types.
81 * As a related topic, this implementation does not remove Boundary Neutral
82 * types from the input, but ignores them whenever this is relevant.
83 * For example, the loop for the resolution of the weak types reads
84 * types until it finds a non-BN.
85 * Also, explicit embedding codes are neither changed into BN nor removed.
86 * They are only treated the same way real BNs are.
87 * As stated before, AdjustWSLevels() takes care of them at the end.
88 * For the purpose of conformance, the levels of all these codes
89 * do not matter.
91 * Note that this implementation never modifies the dirProps
92 * after the initial setup, except for FSI which is changed to either
93 * LRI or RLI in GetDirProps(), and paired brackets which may be changed
94 * to L or R according to N0.
97 * In this implementation, the resolution of weak types (Wn),
98 * neutrals (Nn), and the assignment of the resolved level (In)
99 * are all done in one single loop, in ResolveImplicitLevels().
100 * Changes of dirProp values are done on the fly, without writing
101 * them back to the dirProps array.
104 * This implementation contains code that allows to bypass steps of the
105 * algorithm that are not needed on the specific paragraph
106 * in order to speed up the most common cases considerably,
107 * like text that is entirely LTR, or RTL text without numbers.
109 * Most of this is done by setting a bit for each directional property
110 * in a flags variable and later checking for whether there are
111 * any LTR characters or any RTL characters, or both, whether
112 * there are any explicit embedding codes, etc.
114 * If the (Xn) steps are performed, then the flags are re-evaluated,
115 * because they will then not contain the embedding codes any more
116 * and will be adjusted for override codes, so that subsequently
117 * more bypassing may be possible than what the initial flags suggested.
119 * If the text is not mixed-directional, then the
120 * algorithm steps for the weak type resolution are not performed,
121 * and all levels are set to the paragraph level.
123 * If there are no explicit embedding codes, then the (Xn) steps
124 * are not performed.
126 * If embedding levels are supplied as a parameter, then all
127 * explicit embedding codes are ignored, and the (Xn) steps
128 * are not performed.
130 * White Space types could get the level of the run they belong to,
131 * and are checked with a test of (flags&MASK_EMBEDDING) to
132 * consider if the paragraph direction should be considered in
133 * the flags variable.
135 * If there are no White Space types in the paragraph, then
136 * (L1) is not necessary in AdjustWSLevels().
138 nsBidi::nsBidi()
140 Init();
142 mMayAllocateText=true;
143 mMayAllocateRuns=true;
146 nsBidi::~nsBidi()
148 Free();
151 void nsBidi::Init()
153 /* reset the object, all pointers nullptr, all flags false, all sizes 0 */
154 mLength = 0;
155 mParaLevel = 0;
156 mFlags = 0;
157 mDirection = NSBIDI_LTR;
158 mTrailingWSStart = 0;
160 mDirPropsSize = 0;
161 mLevelsSize = 0;
162 mRunsSize = 0;
163 mIsolatesSize = 0;
165 mRunCount = -1;
166 mIsolateCount = -1;
168 mDirProps=nullptr;
169 mLevels=nullptr;
170 mRuns=nullptr;
171 mIsolates=nullptr;
173 mDirPropsMemory=nullptr;
174 mLevelsMemory=nullptr;
175 mRunsMemory=nullptr;
176 mIsolatesMemory=nullptr;
178 mMayAllocateText=false;
179 mMayAllocateRuns=false;
183 * We are allowed to allocate memory if aMemory==nullptr or
184 * aMayAllocate==true for each array that we need.
185 * We also try to grow and shrink memory as needed if we
186 * allocate it.
188 * Assume aSizeNeeded>0.
189 * If *aMemory!=nullptr, then assume *aSize>0.
191 * ### this realloc() may unnecessarily copy the old data,
192 * which we know we don't need any more;
193 * is this the best way to do this??
195 bool nsBidi::GetMemory(void **aMemory, size_t *aSize, bool aMayAllocate, size_t aSizeNeeded)
197 /* check for existing memory */
198 if(*aMemory==nullptr) {
199 /* we need to allocate memory */
200 if(!aMayAllocate) {
201 return false;
202 } else {
203 *aMemory=moz_malloc(aSizeNeeded);
204 if (*aMemory!=nullptr) {
205 *aSize=aSizeNeeded;
206 return true;
207 } else {
208 *aSize=0;
209 return false;
212 } else {
213 /* there is some memory, is it enough or too much? */
214 if(aSizeNeeded>*aSize && !aMayAllocate) {
215 /* not enough memory, and we must not allocate */
216 return false;
217 } else if(aSizeNeeded!=*aSize && aMayAllocate) {
218 /* we may try to grow or shrink */
219 void *memory=moz_realloc(*aMemory, aSizeNeeded);
221 if(memory!=nullptr) {
222 *aMemory=memory;
223 *aSize=aSizeNeeded;
224 return true;
225 } else {
226 /* we failed to grow */
227 return false;
229 } else {
230 /* we have at least enough memory and must not allocate */
231 return true;
236 void nsBidi::Free()
238 moz_free(mDirPropsMemory);
239 mDirPropsMemory = nullptr;
240 moz_free(mLevelsMemory);
241 mLevelsMemory = nullptr;
242 moz_free(mRunsMemory);
243 mRunsMemory = nullptr;
244 free(mIsolatesMemory);
245 mIsolatesMemory = nullptr;
248 /* SetPara ------------------------------------------------------------ */
250 nsresult nsBidi::SetPara(const char16_t *aText, int32_t aLength,
251 nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels)
253 nsBidiDirection direction;
255 /* check the argument values */
256 if(aText==nullptr ||
257 ((NSBIDI_MAX_EXPLICIT_LEVEL<aParaLevel) && !IS_DEFAULT_LEVEL(aParaLevel)) ||
258 aLength<-1
260 return NS_ERROR_INVALID_ARG;
263 if(aLength==-1) {
264 aLength = NS_strlen(aText);
267 /* initialize member data */
268 mLength = aLength;
269 mParaLevel=aParaLevel;
270 mDirection=aParaLevel & 1 ? NSBIDI_RTL : NSBIDI_LTR;
271 mTrailingWSStart=aLength; /* the levels[] will reflect the WS run */
273 mDirProps=nullptr;
274 mLevels=nullptr;
275 mRuns=nullptr;
277 if(aLength==0) {
279 * For an empty paragraph, create an nsBidi object with the aParaLevel and
280 * the flags and the direction set but without allocating zero-length arrays.
281 * There is nothing more to do.
283 if(IS_DEFAULT_LEVEL(aParaLevel)) {
284 mParaLevel&=1;
286 mFlags=DIRPROP_FLAG_LR(aParaLevel);
287 mRunCount=0;
288 return NS_OK;
291 mRunCount=-1;
294 * Get the directional properties,
295 * the flags bit-set, and
296 * determine the partagraph level if necessary.
298 if(GETDIRPROPSMEMORY(aLength)) {
299 mDirProps=mDirPropsMemory;
300 GetDirProps(aText);
301 } else {
302 return NS_ERROR_OUT_OF_MEMORY;
305 /* are explicit levels specified? */
306 if(aEmbeddingLevels==nullptr) {
307 /* no: determine explicit levels according to the (Xn) rules */\
308 if(GETLEVELSMEMORY(aLength)) {
309 mLevels=mLevelsMemory;
310 ResolveExplicitLevels(&direction);
311 } else {
312 return NS_ERROR_OUT_OF_MEMORY;
314 } else {
315 /* set BN for all explicit codes, check that all levels are aParaLevel..NSBIDI_MAX_EXPLICIT_LEVEL */
316 mLevels=aEmbeddingLevels;
317 nsresult rv = CheckExplicitLevels(&direction);
318 if(NS_FAILED(rv)) {
319 return rv;
323 /* allocate isolate memory */
324 if (mIsolateCount <= SIMPLE_ISOLATES_SIZE) {
325 mIsolates = mSimpleIsolates;
326 } else {
327 if (mIsolateCount * sizeof(Isolate) <= mIsolatesSize) {
328 mIsolates = mIsolatesMemory;
329 } else {
330 if (GETINITIALISOLATESMEMORY(mIsolateCount)) {
331 mIsolates = mIsolatesMemory;
332 } else {
333 return NS_ERROR_OUT_OF_MEMORY;
337 mIsolateCount = -1; /* current isolates stack entry == none */
340 * The steps after (X9) in the Bidi algorithm are performed only if
341 * the paragraph text has mixed directionality!
343 mDirection = direction;
344 switch(direction) {
345 case NSBIDI_LTR:
346 /* make sure paraLevel is even */
347 mParaLevel=(mParaLevel+1)&~1;
349 /* all levels are implicitly at paraLevel (important for GetLevels()) */
350 mTrailingWSStart=0;
351 break;
352 case NSBIDI_RTL:
353 /* make sure paraLevel is odd */
354 mParaLevel|=1;
356 /* all levels are implicitly at paraLevel (important for GetLevels()) */
357 mTrailingWSStart=0;
358 break;
359 default:
361 * If there are no external levels specified and there
362 * are no significant explicit level codes in the text,
363 * then we can treat the entire paragraph as one run.
364 * Otherwise, we need to perform the following rules on runs of
365 * the text with the same embedding levels. (X10)
366 * "Significant" explicit level codes are ones that actually
367 * affect non-BN characters.
368 * Examples for "insignificant" ones are empty embeddings
369 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
371 if(aEmbeddingLevels==nullptr && !(mFlags&DIRPROP_FLAG_MULTI_RUNS)) {
372 ResolveImplicitLevels(0, aLength,
373 GET_LR_FROM_LEVEL(mParaLevel),
374 GET_LR_FROM_LEVEL(mParaLevel));
375 } else {
376 /* sor, eor: start and end types of same-level-run */
377 nsBidiLevel *levels=mLevels;
378 int32_t start, limit=0;
379 nsBidiLevel level, nextLevel;
380 DirProp sor, eor;
382 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
383 level=mParaLevel;
384 nextLevel=levels[0];
385 if(level<nextLevel) {
386 eor=GET_LR_FROM_LEVEL(nextLevel);
387 } else {
388 eor=GET_LR_FROM_LEVEL(level);
391 do {
392 /* determine start and limit of the run (end points just behind the run) */
394 /* the values for this run's start are the same as for the previous run's end */
395 sor=eor;
396 start=limit;
397 level=nextLevel;
399 /* search for the limit of this run */
400 while(++limit<aLength && levels[limit]==level) {}
402 /* get the correct level of the next run */
403 if(limit<aLength) {
404 nextLevel=levels[limit];
405 } else {
406 nextLevel=mParaLevel;
409 /* determine eor from max(level, nextLevel); sor is last run's eor */
410 if((level&~NSBIDI_LEVEL_OVERRIDE)<(nextLevel&~NSBIDI_LEVEL_OVERRIDE)) {
411 eor=GET_LR_FROM_LEVEL(nextLevel);
412 } else {
413 eor=GET_LR_FROM_LEVEL(level);
416 /* if the run consists of overridden directional types, then there
417 are no implicit types to be resolved */
418 if(!(level&NSBIDI_LEVEL_OVERRIDE)) {
419 ResolveImplicitLevels(start, limit, sor, eor);
420 } else {
421 do {
422 levels[start++] &= ~NSBIDI_LEVEL_OVERRIDE;
423 } while (start < limit);
425 } while(limit<aLength);
428 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
429 AdjustWSLevels();
430 break;
433 return NS_OK;
436 /* perform (P2)..(P3) ------------------------------------------------------- */
439 * Get the directional properties for the text,
440 * calculate the flags bit-set, and
441 * determine the partagraph level if necessary.
443 void nsBidi::GetDirProps(const char16_t *aText)
445 DirProp *dirProps=mDirPropsMemory; /* mDirProps is const */
447 int32_t i=0, length=mLength;
448 Flags flags=0; /* collect all directionalities in the text */
449 char16_t uchar;
450 DirProp dirProp;
452 bool isDefaultLevel = IS_DEFAULT_LEVEL(mParaLevel);
454 enum State {
455 NOT_SEEKING_STRONG, /* 0: not after FSI */
456 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
457 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
458 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
460 State state;
462 /* The following stacks are used to manage isolate sequences. Those
463 sequences may be nested, but obviously never more deeply than the
464 maximum explicit embedding level.
465 lastStack is the index of the last used entry in the stack. A value of -1
466 means that there is no open isolate sequence. */
467 /* The following stack contains the position of the initiator of
468 each open isolate sequence */
469 int32_t isolateStartStack[NSBIDI_MAX_EXPLICIT_LEVEL + 1];
470 /* The following stack contains the last known state before
471 encountering the initiator of an isolate sequence */
472 State previousStateStack[NSBIDI_MAX_EXPLICIT_LEVEL + 1];
473 int32_t stackLast = -1;
475 if(isDefaultLevel) {
477 * see comment in nsBidi.h:
478 * the DEFAULT_XXX values are designed so that
479 * their bit 0 alone yields the intended default
481 mParaLevel &= 1;
482 state = SEEKING_STRONG_FOR_PARA;
483 } else {
484 state = NOT_SEEKING_STRONG;
487 /* determine the paragraph level (P2..P3) */
488 for(/* i = 0 above */; i < length;) {
489 uchar=aText[i];
490 if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) {
491 /* not a surrogate pair */
492 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetBidiCat((uint32_t)uchar));
493 } else {
494 /* a surrogate pair */
495 dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */
496 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetBidiCat(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN);
498 ++i;
500 switch (dirProp) {
501 case L:
502 if (state == SEEKING_STRONG_FOR_PARA) {
503 mParaLevel = 0;
504 state = NOT_SEEKING_STRONG;
505 } else if (state == SEEKING_STRONG_FOR_FSI) {
506 if (stackLast <= NSBIDI_MAX_EXPLICIT_LEVEL) {
507 dirProps[isolateStartStack[stackLast]] = LRI;
508 flags |= LRI;
510 state = LOOKING_FOR_PDI;
512 break;
514 case R: case AL:
515 if (state == SEEKING_STRONG_FOR_PARA) {
516 mParaLevel = 1;
517 state = NOT_SEEKING_STRONG;
518 } else if (state == SEEKING_STRONG_FOR_FSI) {
519 if (stackLast <= NSBIDI_MAX_EXPLICIT_LEVEL) {
520 dirProps[isolateStartStack[stackLast]] = RLI;
521 flags |= RLI;
523 state = LOOKING_FOR_PDI;
525 break;
527 case FSI: case LRI: case RLI:
528 stackLast++;
529 if (stackLast <= NSBIDI_MAX_EXPLICIT_LEVEL) {
530 isolateStartStack[stackLast] = i - 1;
531 previousStateStack[stackLast] = state;
533 if (dirProp == FSI) {
534 state = SEEKING_STRONG_FOR_FSI;
535 } else {
536 state = LOOKING_FOR_PDI;
538 break;
540 case PDI:
541 if (state == SEEKING_STRONG_FOR_FSI) {
542 if (stackLast <= NSBIDI_MAX_EXPLICIT_LEVEL) {
543 dirProps[isolateStartStack[stackLast]] = LRI;
544 flags |= DIRPROP_FLAG(LRI);
547 if (stackLast >= 0) {
548 if (stackLast <= NSBIDI_MAX_EXPLICIT_LEVEL) {
549 state = previousStateStack[stackLast];
551 stackLast--;
553 break;
555 case B:
556 // This shouldn't happen, since we don't support multiple paragraphs.
557 NS_NOTREACHED("Unexpected paragraph separator");
558 break;
560 default:
561 break;
565 /* Ignore still open isolate sequences with overflow */
566 if (stackLast > NSBIDI_MAX_EXPLICIT_LEVEL) {
567 stackLast = NSBIDI_MAX_EXPLICIT_LEVEL;
568 if (dirProps[previousStateStack[NSBIDI_MAX_EXPLICIT_LEVEL]] != FSI) {
569 state = LOOKING_FOR_PDI;
573 /* Resolve direction of still unresolved open FSI sequences */
574 while (stackLast >= 0) {
575 if (state == SEEKING_STRONG_FOR_FSI) {
576 dirProps[isolateStartStack[stackLast]] = LRI;
577 flags |= DIRPROP_FLAG(LRI);
579 state = previousStateStack[stackLast];
580 stackLast--;
583 flags|=DIRPROP_FLAG_LR(mParaLevel);
585 mFlags = flags;
588 /* perform (X1)..(X9) ------------------------------------------------------- */
591 * Resolve the explicit levels as specified by explicit embedding codes.
592 * Recalculate the flags to have them reflect the real properties
593 * after taking the explicit embeddings into account.
595 * The Bidi algorithm is designed to result in the same behavior whether embedding
596 * levels are externally specified (from "styled text", supposedly the preferred
597 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
598 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
599 * However, in a real implementation, this removal of these codes and their index
600 * positions in the plain text is undesirable since it would result in
601 * reallocated, reindexed text.
602 * Instead, this implementation leaves the codes in there and just ignores them
603 * in the subsequent processing.
604 * In order to get the same reordering behavior, positions with a BN or a not-isolate
605 * explicit embedding code just get the same level assigned as the last "real"
606 * character.
608 * Some implementations, not this one, then overwrite some of these
609 * directionality properties at "real" same-level-run boundaries by
610 * L or R codes so that the resolution of weak types can be performed on the
611 * entire paragraph at once instead of having to parse it once more and
612 * perform that resolution on same-level-runs.
613 * This limits the scope of the implicit rules in effectively
614 * the same way as the run limits.
616 * Instead, this implementation does not modify these codes.
617 * On one hand, the paragraph has to be scanned for same-level-runs, but
618 * on the other hand, this saves another loop to reset these codes,
619 * or saves making and modifying a copy of dirProps[].
622 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
625 * Handling the stack of explicit levels (Xn):
627 * With the Bidi stack of explicit levels, as pushed with each
628 * LRE, RLE, LRO, and RLO, LRI, RLI, and FSI and popped with each PDF and PDI,
629 * the explicit level must never exceed NSBIDI_MAX_EXPLICIT_LEVEL.
631 * In order to have a correct push-pop semantics even in the case of overflows,
632 * overflow counters and a valid isolate counter are used as described in UAX#9
633 * section 3.3.2 "Explicit Levels and Direction".
635 * This implementation assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
638 void nsBidi::ResolveExplicitLevels(nsBidiDirection *aDirection)
640 DirProp *dirProps=mDirProps;
641 nsBidiLevel *levels=mLevels;
643 int32_t i=0, length=mLength;
644 Flags flags=mFlags; /* collect all directionalities in the text */
645 DirProp dirProp;
646 nsBidiLevel level=mParaLevel;
647 nsBidiDirection direction;
649 mIsolateCount = 0;
651 /* determine if the text is mixed-directional or single-directional */
652 direction=DirectionFromFlags(flags);
654 /* we may not need to resolve any explicit levels */
655 if(direction!=NSBIDI_MIXED) {
656 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
657 } else if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
658 /* no embeddings, set all levels to the paragraph level */
659 for(i=0; i<length; ++i) {
660 levels[i]=level;
662 } else {
663 /* continue to perform (Xn) */
665 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
666 /* both variables may carry the NSBIDI_LEVEL_OVERRIDE flag to indicate the override status */
667 nsBidiLevel embeddingLevel = level, newLevel;
668 nsBidiLevel previousLevel = level; /* previous level for regular (not CC) characters */
670 uint16_t stack[NSBIDI_MAX_EXPLICIT_LEVEL + 2]; /* we never push anything >=NSBIDI_MAX_EXPLICIT_LEVEL
671 but we need one more entry as base */
672 int32_t stackLast = 0;
673 int32_t overflowIsolateCount = 0;
674 int32_t overflowEmbeddingCount = 0;
675 int32_t validIsolateCount = 0;
677 stack[0] = level;
679 /* recalculate the flags */
680 flags=0;
682 /* since we assume that this is a single paragraph, we ignore (X8) */
683 for(i=0; i<length; ++i) {
684 dirProp=dirProps[i];
685 switch(dirProp) {
686 case LRE:
687 case RLE:
688 case LRO:
689 case RLO:
690 /* (X2, X3, X4, X5) */
691 flags |= DIRPROP_FLAG(BN);
692 if (dirProp == LRE || dirProp == LRO) {
693 newLevel = (embeddingLevel + 2) & ~(NSBIDI_LEVEL_OVERRIDE | 1); /* least greater even level */
694 } else {
695 newLevel = ((embeddingLevel & ~NSBIDI_LEVEL_OVERRIDE) + 1) | 1; /* least greater odd level */
697 if(newLevel <= NSBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) {
698 embeddingLevel = newLevel;
699 if (dirProp == LRO || dirProp == RLO) {
700 embeddingLevel |= NSBIDI_LEVEL_OVERRIDE;
702 stackLast++;
703 stack[stackLast] = embeddingLevel;
704 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
705 since this has already been done for newLevel which is
706 the source for embeddingLevel.
708 } else {
709 dirProps[i] |= IGNORE_CC;
710 if (overflowIsolateCount == 0) {
711 overflowEmbeddingCount++;
714 break;
716 case PDF:
717 /* (X7) */
718 flags |= DIRPROP_FLAG(BN);
719 /* handle all the overflow cases first */
720 if (overflowIsolateCount) {
721 dirProps[i] |= IGNORE_CC;
722 break;
724 if (overflowEmbeddingCount) {
725 dirProps[i] |= IGNORE_CC;
726 overflowEmbeddingCount--;
727 break;
729 if (stackLast > 0 && stack[stackLast] < ISOLATE) { /* not an isolate entry */
730 stackLast--;
731 embeddingLevel = stack[stackLast];
732 } else {
733 dirProps[i] |= IGNORE_CC;
735 break;
737 case LRI:
738 case RLI:
739 if (embeddingLevel != previousLevel) {
740 previousLevel = embeddingLevel;
742 /* (X5a, X5b) */
743 flags |= DIRPROP_FLAG(O_N) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
744 level = embeddingLevel;
745 if (dirProp == LRI) {
746 newLevel = (embeddingLevel + 2) & ~(NSBIDI_LEVEL_OVERRIDE | 1); /* least greater even level */
747 } else {
748 newLevel = ((embeddingLevel & ~NSBIDI_LEVEL_OVERRIDE) + 1) | 1; /* least greater odd level */
750 if (newLevel <= NSBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) {
751 previousLevel = embeddingLevel;
752 validIsolateCount++;
753 if (validIsolateCount > mIsolateCount) {
754 mIsolateCount = validIsolateCount;
756 embeddingLevel = newLevel;
757 stackLast++;
758 stack[stackLast] = embeddingLevel + ISOLATE;
759 } else {
760 dirProps[i] |= IGNORE_CC;
761 overflowIsolateCount++;
763 break;
765 case PDI:
766 /* (X6a) */
767 if (overflowIsolateCount) {
768 dirProps[i] |= IGNORE_CC;
769 overflowIsolateCount--;
770 } else if (validIsolateCount) {
771 overflowEmbeddingCount = 0;
772 while (stack[stackLast] < ISOLATE) {
773 /* pop embedding entries */
774 /* until the last isolate entry */
775 stackLast--;
777 // Since validIsolateCount is true, there must be an isolate entry
778 // on the stack, so the stack is guaranteed to not be empty.
779 // Still, to eliminate a warning from coverity, we use an assertion.
780 MOZ_ASSERT(stackLast > 0);
782 stackLast--; /* pop also the last isolate entry */
783 MOZ_ASSERT(stackLast >= 0); // For coverity
784 validIsolateCount--;
785 } else {
786 dirProps[i] |= IGNORE_CC;
788 embeddingLevel = stack[stackLast] & ~ISOLATE;
789 previousLevel = level = embeddingLevel;
790 flags |= DIRPROP_FLAG(O_N) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
791 break;
793 case B:
795 * We do not expect to see a paragraph separator (B),
797 NS_NOTREACHED("Unexpected paragraph separator");
798 break;
800 case BN:
801 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
802 /* they will get their levels set correctly in AdjustWSLevels() */
803 flags|=DIRPROP_FLAG(BN);
804 break;
806 default:
807 /* all other types get the "real" level */
808 level = embeddingLevel;
809 if(embeddingLevel != previousLevel) {
810 previousLevel = embeddingLevel;
813 if (level & NSBIDI_LEVEL_OVERRIDE) {
814 flags |= DIRPROP_FLAG_LR(level);
815 } else {
816 flags |= DIRPROP_FLAG(dirProp);
818 break;
822 * We need to set reasonable levels even on BN codes and
823 * explicit codes because we will later look at same-level runs (X10).
825 levels[i]=level;
826 if (i > 0 && levels[i - 1] != level) {
827 flags |= DIRPROP_FLAG_MULTI_RUNS;
828 if (level & NSBIDI_LEVEL_OVERRIDE) {
829 flags |= DIRPROP_FLAG_O(level);
830 } else {
831 flags |= DIRPROP_FLAG_E(level);
834 if (DIRPROP_FLAG(dirProp) & MASK_ISO) {
835 level = embeddingLevel;
839 if(flags&MASK_EMBEDDING) {
840 flags|=DIRPROP_FLAG_LR(mParaLevel);
843 /* subsequently, ignore the explicit codes and BN (X9) */
845 /* again, determine if the text is mixed-directional or single-directional */
846 mFlags=flags;
847 direction=DirectionFromFlags(flags);
850 *aDirection = direction;
854 * Use a pre-specified embedding levels array:
856 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
857 * ignore all explicit codes (X9),
858 * and check all the preset levels.
860 * Recalculate the flags to have them reflect the real properties
861 * after taking the explicit embeddings into account.
863 nsresult nsBidi::CheckExplicitLevels(nsBidiDirection *aDirection)
865 const DirProp *dirProps=mDirProps;
866 DirProp dirProp;
867 nsBidiLevel *levels=mLevels;
868 int32_t isolateCount = 0;
870 int32_t i, length=mLength;
871 Flags flags=0; /* collect all directionalities in the text */
872 nsBidiLevel level, paraLevel=mParaLevel;
873 mIsolateCount = 0;
875 for(i=0; i<length; ++i) {
876 level=levels[i];
877 dirProp = dirProps[i];
878 if (dirProp == LRI || dirProp == RLI) {
879 isolateCount++;
880 if (isolateCount > mIsolateCount) {
881 mIsolateCount = isolateCount;
883 } else if (dirProp == PDI) {
884 isolateCount--;
886 if(level&NSBIDI_LEVEL_OVERRIDE) {
887 /* keep the override flag in levels[i] but adjust the flags */
888 level&=~NSBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
889 flags|=DIRPROP_FLAG_O(level);
890 } else {
891 /* set the flags */
892 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
894 if(level<paraLevel || NSBIDI_MAX_EXPLICIT_LEVEL<level) {
895 /* level out of bounds */
896 *aDirection = NSBIDI_LTR;
897 return NS_ERROR_INVALID_ARG;
900 if(flags&MASK_EMBEDDING) {
901 flags|=DIRPROP_FLAG_LR(mParaLevel);
904 /* determine if the text is mixed-directional or single-directional */
905 mFlags=flags;
906 *aDirection = DirectionFromFlags(flags);
907 return NS_OK;
910 /* determine if the text is mixed-directional or single-directional */
911 nsBidiDirection nsBidi::DirectionFromFlags(Flags aFlags)
913 /* if the text contains AN and neutrals, then some neutrals may become RTL */
914 if(!(aFlags&MASK_RTL || (aFlags&DIRPROP_FLAG(AN) && aFlags&MASK_POSSIBLE_N))) {
915 return NSBIDI_LTR;
916 } else if(!(aFlags&MASK_LTR)) {
917 return NSBIDI_RTL;
918 } else {
919 return NSBIDI_MIXED;
923 /******************************************************************
924 The Properties state machine table
925 *******************************************************************
927 All table cells are 8 bits:
928 bits 0..4: next state
929 bits 5..7: action to perform (if > 0)
931 Cells may be of format "n" where n represents the next state
932 (except for the rightmost column).
933 Cells may also be of format "s(x,y)" where x represents an action
934 to perform and y represents the next state.
936 *******************************************************************
937 Definitions and type for properties state table
938 *******************************************************************
940 #define IMPTABPROPS_COLUMNS 16
941 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
942 #define GET_STATEPROPS(cell) ((cell)&0x1f)
943 #define GET_ACTIONPROPS(cell) ((cell)>>5)
944 #undef s
945 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
947 static const uint8_t groupProp[] = /* dirProp regrouped */
949 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
950 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
953 /******************************************************************
955 PROPERTIES STATE TABLE
957 In table impTabProps,
958 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
959 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
960 - the Res column is the reduced property assigned to a run
962 Action 1: process current run1, init new run1
963 2: init new run2
964 3: process run1, process run2, init new run1
965 4: process run1, set run1=run2, init new run2
967 Notes:
968 1) This table is used in ResolveImplicitLevels().
969 2) This table triggers actions when there is a change in the Bidi
970 property of incoming characters (action 1).
971 3) Most such property sequences are processed immediately (in
972 fact, passed to ProcessPropertySeq().
973 4) However, numbers are assembled as one sequence. This means
974 that undefined situations (like CS following digits, until
975 it is known if the next char will be a digit) are held until
976 following chars define them.
977 Example: digits followed by CS, then comes another CS or ON;
978 the digits will be processed, then the CS assigned
979 as the start of an ON sequence (action 3).
980 5) There are cases where more than one sequence must be
981 processed, for instance digits followed by CS followed by L:
982 the digits must be processed as one sequence, and the CS
983 must be processed as an ON sequence, all this before starting
984 assembling chars for the opening L sequence.
988 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
990 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
991 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
992 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
993 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
994 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
995 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
996 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
997 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
998 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
999 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1000 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
1001 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
1002 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
1003 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1004 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
1005 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1006 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
1007 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
1008 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
1009 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
1010 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
1011 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
1012 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
1013 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
1014 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
1017 /* we must undef macro s because the levels table have a different
1018 * structure (4 bits for action and 4 bits for next state.
1020 #undef s
1022 /******************************************************************
1023 The levels state machine tables
1024 *******************************************************************
1026 All table cells are 8 bits:
1027 bits 0..3: next state
1028 bits 4..7: action to perform (if > 0)
1030 Cells may be of format "n" where n represents the next state
1031 (except for the rightmost column).
1032 Cells may also be of format "s(x,y)" where x represents an action
1033 to perform and y represents the next state.
1035 This format limits each table to 16 states each and to 15 actions.
1037 *******************************************************************
1038 Definitions and type for levels state tables
1039 *******************************************************************
1041 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1042 #define GET_STATE(cell) ((cell)&0x0f)
1043 #define GET_ACTION(cell) ((cell)>>4)
1044 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1046 /******************************************************************
1048 LEVELS STATE TABLES
1050 In all levels state tables,
1051 - state 0 is the initial state
1052 - the Res column is the increment to add to the text level
1053 for this property sequence.
1055 The impAct arrays for each table of a pair map the local action
1056 numbers of the table to the total list of actions. For instance,
1057 action 2 in a given table corresponds to the action number which
1058 appears in entry [2] of the impAct array for that table.
1059 The first entry of all impAct arrays must be 0.
1061 Action 1: init conditional sequence
1062 2: prepend conditional sequence to current sequence
1063 3: set ON sequence to new level - 1
1064 4: init EN/AN/ON sequence
1065 5: fix EN/AN/ON sequence followed by R
1066 6: set previous level sequence to level 2
1068 Notes:
1069 1) These tables are used in ProcessPropertySeq(). The input
1070 is property sequences as determined by ResolveImplicitLevels.
1071 2) Most such property sequences are processed immediately
1072 (levels are assigned).
1073 3) However, some sequences cannot be assigned a final level till
1074 one or more following sequences are received. For instance,
1075 ON following an R sequence within an even-level paragraph.
1076 If the following sequence is R, the ON sequence will be
1077 assigned basic run level+1, and so will the R sequence.
1078 4) S is generally handled like ON, since its level will be fixed
1079 to paragraph level in AdjustWSLevels().
1083 static const ImpTab impTabL = /* Even paragraph level */
1084 /* In this table, conditional sequences receive the higher possible level
1085 until proven otherwise.
1088 /* L , R , EN , AN , ON , S , B , Res */
1089 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1090 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1091 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1092 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
1093 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
1094 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
1096 static const ImpTab impTabR = /* Odd paragraph level */
1097 /* In this table, conditional sequences receive the lower possible level
1098 until proven otherwise.
1101 /* L , R , EN , AN , ON , S , B , Res */
1102 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1103 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1104 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1105 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1106 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1107 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1110 #undef s
1112 static ImpAct impAct0 = {0,1,2,3,4,5,6};
1113 static PImpTab impTab[2] = {impTabL, impTabR};
1115 /*------------------------------------------------------------------------*/
1117 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1120 * This implementation of the (Wn) rules applies all rules in one pass.
1121 * In order to do so, it needs a look-ahead of typically 1 character
1122 * (except for W5: sequences of ET) and keeps track of changes
1123 * in a rule Wp that affect a later Wq (p<q).
1125 * The (Nn) and (In) rules are also performed in that same single loop,
1126 * but effectively one iteration behind for white space.
1128 * Since all implicit rules are performed in one step, it is not necessary
1129 * to actually store the intermediate directional properties in dirProps[].
1132 void nsBidi::ProcessPropertySeq(LevState *pLevState, uint8_t _prop, int32_t start, int32_t limit)
1134 uint8_t cell, oldStateSeq, actionSeq;
1135 PImpTab pImpTab = pLevState->pImpTab;
1136 PImpAct pImpAct = pLevState->pImpAct;
1137 nsBidiLevel* levels = mLevels;
1138 nsBidiLevel level, addLevel;
1139 int32_t start0, k;
1141 start0 = start; /* save original start position */
1142 oldStateSeq = (uint8_t)pLevState->state;
1143 cell = pImpTab[oldStateSeq][_prop];
1144 pLevState->state = GET_STATE(cell); /* isolate the new state */
1145 actionSeq = pImpAct[GET_ACTION(cell)]; /* isolate the action */
1146 addLevel = pImpTab[pLevState->state][IMPTABLEVELS_RES];
1148 if(actionSeq) {
1149 switch(actionSeq) {
1150 case 1: /* init ON seq */
1151 pLevState->startON = start0;
1152 break;
1154 case 2: /* prepend ON seq to current seq */
1155 start = pLevState->startON;
1156 break;
1158 default: /* we should never get here */
1159 MOZ_ASSERT(false);
1160 break;
1163 if(addLevel || (start < start0)) {
1164 level = pLevState->runLevel + addLevel;
1165 if (start >= pLevState->runStart) {
1166 for (k = start; k < limit; k++) {
1167 levels[k] = level;
1169 } else {
1170 DirProp *dirProps = mDirProps, dirProp;
1171 int32_t isolateCount = 0;
1172 for (k = start; k < limit; k++) {
1173 dirProp = dirProps[k];
1174 if (dirProp == PDI) {
1175 isolateCount--;
1177 if (isolateCount == 0) {
1178 levels[k]=level;
1180 if (dirProp == LRI || dirProp == RLI) {
1181 isolateCount++;
1188 void nsBidi::ResolveImplicitLevels(int32_t aStart, int32_t aLimit,
1189 DirProp aSOR, DirProp aEOR)
1191 const DirProp *dirProps = mDirProps;
1192 DirProp dirProp;
1193 LevState levState;
1194 int32_t i, start1, start2;
1195 uint16_t oldStateImp, stateImp, actionImp;
1196 uint8_t gprop, resProp, cell;
1198 /* initialize for property and levels state tables */
1199 levState.startON = -1;
1200 levState.runStart = aStart;
1201 levState.runLevel = mLevels[aStart];
1202 levState.pImpTab = impTab[levState.runLevel & 1];
1203 levState.pImpAct = impAct0;
1205 /* The isolates[] entries contain enough information to
1206 resume the bidi algorithm in the same state as it was
1207 when it was interrupted by an isolate sequence. */
1208 if (dirProps[aStart] == PDI) {
1209 start1 = mIsolates[mIsolateCount].start1;
1210 stateImp = mIsolates[mIsolateCount].stateImp;
1211 levState.state = mIsolates[mIsolateCount].state;
1212 mIsolateCount--;
1213 } else {
1214 start1 = aStart;
1215 if (dirProps[aStart] == NSM) {
1216 stateImp = 1 + aSOR;
1217 } else {
1218 stateImp = 0;
1220 levState.state = 0;
1221 ProcessPropertySeq(&levState, aSOR, aStart, aStart);
1223 start2 = aStart;
1225 for (i = aStart; i <= aLimit; i++) {
1226 if (i >= aLimit) {
1227 if (aLimit > aStart) {
1228 dirProp = mDirProps[aLimit - 1];
1229 if (dirProp == LRI || dirProp == RLI) {
1230 break; /* no forced closing for sequence ending with LRI/RLI */
1233 gprop = aEOR;
1234 } else {
1235 DirProp prop;
1236 prop = PURE_DIRPROP(dirProps[i]);
1237 gprop = groupProp[prop];
1239 oldStateImp = stateImp;
1240 cell = impTabProps[oldStateImp][gprop];
1241 stateImp = GET_STATEPROPS(cell); /* isolate the new state */
1242 actionImp = GET_ACTIONPROPS(cell); /* isolate the action */
1243 if ((i == aLimit) && (actionImp == 0)) {
1244 /* there is an unprocessed sequence if its property == eor */
1245 actionImp = 1; /* process the last sequence */
1247 if (actionImp) {
1248 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];
1249 switch (actionImp) {
1250 case 1: /* process current seq1, init new seq1 */
1251 ProcessPropertySeq(&levState, resProp, start1, i);
1252 start1 = i;
1253 break;
1254 case 2: /* init new seq2 */
1255 start2 = i;
1256 break;
1257 case 3: /* process seq1, process seq2, init new seq1 */
1258 ProcessPropertySeq(&levState, resProp, start1, start2);
1259 ProcessPropertySeq(&levState, DirProp_ON, start2, i);
1260 start1 = i;
1261 break;
1262 case 4: /* process seq1, set seq1=seq2, init new seq2 */
1263 ProcessPropertySeq(&levState, resProp, start1, start2);
1264 start1 = start2;
1265 start2 = i;
1266 break;
1267 default: /* we should never get here */
1268 MOZ_ASSERT(false);
1269 break;
1274 dirProp = dirProps[aLimit - 1];
1275 if ((dirProp == LRI || dirProp == RLI) && aLimit < mLength) {
1276 mIsolateCount++;
1277 mIsolates[mIsolateCount].stateImp = stateImp;
1278 mIsolates[mIsolateCount].state = levState.state;
1279 mIsolates[mIsolateCount].start1 = start1;
1280 } else {
1281 ProcessPropertySeq(&levState, aEOR, aLimit, aLimit);
1286 /* perform (L1) and (X9) ---------------------------------------------------- */
1289 * Reset the embedding levels for some non-graphic characters (L1).
1290 * This function also sets appropriate levels for BN, and
1291 * explicit embedding types that are supposed to have been removed
1292 * from the paragraph in (X9).
1294 void nsBidi::AdjustWSLevels()
1296 const DirProp *dirProps=mDirProps;
1297 nsBidiLevel *levels=mLevels;
1298 int32_t i;
1300 if(mFlags&MASK_WS) {
1301 nsBidiLevel paraLevel=mParaLevel;
1302 Flags flag;
1304 i=mTrailingWSStart;
1305 while(i>0) {
1306 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1307 while (i > 0 && DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])) & MASK_WS) {
1308 levels[i]=paraLevel;
1311 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1312 /* here, i+1 is guaranteed to be <length */
1313 while(i>0) {
1314 flag = DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i]));
1315 if(flag&MASK_BN_EXPLICIT) {
1316 levels[i]=levels[i+1];
1317 } else if(flag&MASK_B_S) {
1318 levels[i]=paraLevel;
1319 break;
1326 nsresult nsBidi::GetDirection(nsBidiDirection* aDirection)
1328 *aDirection = mDirection;
1329 return NS_OK;
1332 nsresult nsBidi::GetParaLevel(nsBidiLevel* aParaLevel)
1334 *aParaLevel = mParaLevel;
1335 return NS_OK;
1337 #ifdef FULL_BIDI_ENGINE
1339 /* -------------------------------------------------------------------------- */
1341 nsresult nsBidi::GetLength(int32_t* aLength)
1343 *aLength = mLength;
1344 return NS_OK;
1348 * General remarks about the functions in this section:
1350 * These functions deal with the aspects of potentially mixed-directional
1351 * text in a single paragraph or in a line of a single paragraph
1352 * which has already been processed according to
1353 * the Unicode 6.3 Bidi algorithm as defined in
1354 * http://www.unicode.org/unicode/reports/tr9/ , version 28,
1355 * also described in The Unicode Standard, Version 6.3.0 .
1357 * This means that there is a nsBidi object with a levels
1358 * and a dirProps array.
1359 * paraLevel and direction are also set.
1360 * Only if the length of the text is zero, then levels==dirProps==nullptr.
1362 * The overall directionality of the paragraph
1363 * or line is used to bypass the reordering steps if possible.
1364 * Even purely RTL text does not need reordering there because
1365 * the getLogical/VisualIndex() functions can compute the
1366 * index on the fly in such a case.
1368 * The implementation of the access to same-level-runs and of the reordering
1369 * do attempt to provide better performance and less memory usage compared to
1370 * a direct implementation of especially rule (L2) with an array of
1371 * one (32-bit) integer per text character.
1373 * Here, the levels array is scanned as soon as necessary, and a vector of
1374 * same-level-runs is created. Reordering then is done on this vector.
1375 * For each run of text positions that were resolved to the same level,
1376 * only 8 bytes are stored: the first text position of the run and the visual
1377 * position behind the run after reordering.
1378 * One sign bit is used to hold the directionality of the run.
1379 * This is inefficient if there are many very short runs. If the average run
1380 * length is <2, then this uses more memory.
1382 * In a further attempt to save memory, the levels array is never changed
1383 * after all the resolution rules (Xn, Wn, Nn, In).
1384 * Many functions have to consider the field trailingWSStart:
1385 * if it is less than length, then there is an implicit trailing run
1386 * at the paraLevel,
1387 * which is not reflected in the levels array.
1388 * This allows a line nsBidi object to use the same levels array as
1389 * its paragraph parent object.
1391 * When a nsBidi object is created for a line of a paragraph, then the
1392 * paragraph's levels and dirProps arrays are reused by way of setting
1393 * a pointer into them, not by copying. This again saves memory and forbids to
1394 * change the now shared levels for (L1).
1396 nsresult nsBidi::SetLine(const nsBidi* aParaBidi, int32_t aStart, int32_t aLimit)
1398 nsBidi* pParent = (nsBidi*)aParaBidi;
1399 int32_t length;
1401 /* check the argument values */
1402 if(pParent==nullptr) {
1403 return NS_ERROR_INVALID_POINTER;
1404 } else if(aStart < 0 || aStart >= aLimit || aLimit > pParent->mLength) {
1405 return NS_ERROR_INVALID_ARG;
1408 /* set members from our aParaBidi parent */
1409 length = mLength = aLimit - aStart;
1410 mParaLevel=pParent->mParaLevel;
1412 mRuns=nullptr;
1413 mFlags=0;
1415 mDirProps=pParent->mDirProps+aStart;
1416 mLevels=pParent->mLevels+aStart;
1417 mRunCount=-1;
1419 if(pParent->mDirection!=NSBIDI_MIXED) {
1420 /* the parent is already trivial */
1421 mDirection=pParent->mDirection;
1424 * The parent's levels are all either
1425 * implicitly or explicitly ==paraLevel;
1426 * do the same here.
1428 if(pParent->mTrailingWSStart<=aStart) {
1429 mTrailingWSStart=0;
1430 } else if(pParent->mTrailingWSStart<aLimit) {
1431 mTrailingWSStart=pParent->mTrailingWSStart-aStart;
1432 } else {
1433 mTrailingWSStart=length;
1435 } else {
1436 const nsBidiLevel *levels=mLevels;
1437 int32_t i, trailingWSStart;
1438 nsBidiLevel level;
1440 SetTrailingWSStart();
1441 trailingWSStart=mTrailingWSStart;
1443 /* recalculate pLineBidi->direction */
1444 if(trailingWSStart==0) {
1445 /* all levels are at paraLevel */
1446 mDirection=(nsBidiDirection)(mParaLevel&1);
1447 } else {
1448 /* get the level of the first character */
1449 level=levels[0]&1;
1451 /* if there is anything of a different level, then the line is mixed */
1452 if(trailingWSStart<length && (mParaLevel&1)!=level) {
1453 /* the trailing WS is at paraLevel, which differs from levels[0] */
1454 mDirection=NSBIDI_MIXED;
1455 } else {
1456 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
1457 i=1;
1458 for(;;) {
1459 if(i==trailingWSStart) {
1460 /* the direction values match those in level */
1461 mDirection=(nsBidiDirection)level;
1462 break;
1463 } else if((levels[i]&1)!=level) {
1464 mDirection=NSBIDI_MIXED;
1465 break;
1467 ++i;
1472 switch(mDirection) {
1473 case NSBIDI_LTR:
1474 /* make sure paraLevel is even */
1475 mParaLevel=(mParaLevel+1)&~1;
1477 /* all levels are implicitly at paraLevel (important for GetLevels()) */
1478 mTrailingWSStart=0;
1479 break;
1480 case NSBIDI_RTL:
1481 /* make sure paraLevel is odd */
1482 mParaLevel|=1;
1484 /* all levels are implicitly at paraLevel (important for GetLevels()) */
1485 mTrailingWSStart=0;
1486 break;
1487 default:
1488 break;
1491 return NS_OK;
1494 /* handle trailing WS (L1) -------------------------------------------------- */
1497 * SetTrailingWSStart() sets the start index for a trailing
1498 * run of WS in the line. This is necessary because we do not modify
1499 * the paragraph's levels array that we just point into.
1500 * Using trailingWSStart is another form of performing (L1).
1502 * To make subsequent operations easier, we also include the run
1503 * before the WS if it is at the paraLevel - we merge the two here.
1505 void nsBidi::SetTrailingWSStart() {
1506 /* mDirection!=NSBIDI_MIXED */
1508 const DirProp *dirProps=mDirProps;
1509 nsBidiLevel *levels=mLevels;
1510 int32_t start=mLength;
1511 nsBidiLevel paraLevel=mParaLevel;
1513 /* go backwards across all WS, BN, explicit codes */
1514 while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
1515 --start;
1518 /* if the WS run can be merged with the previous run then do so here */
1519 while(start>0 && levels[start-1]==paraLevel) {
1520 --start;
1523 mTrailingWSStart=start;
1526 nsresult nsBidi::GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel)
1528 /* return paraLevel if in the trailing WS run, otherwise the real level */
1529 if(aCharIndex<0 || mLength<=aCharIndex) {
1530 *aLevel = 0;
1531 } else if(mDirection!=NSBIDI_MIXED || aCharIndex>=mTrailingWSStart) {
1532 *aLevel = mParaLevel;
1533 } else {
1534 *aLevel = mLevels[aCharIndex];
1536 return NS_OK;
1539 nsresult nsBidi::GetLevels(nsBidiLevel** aLevels)
1541 int32_t start, length;
1543 length = mLength;
1544 if(length<=0) {
1545 *aLevels = nullptr;
1546 return NS_ERROR_INVALID_ARG;
1549 start = mTrailingWSStart;
1550 if(start==length) {
1551 /* the current levels array reflects the WS run */
1552 *aLevels = mLevels;
1553 return NS_OK;
1557 * After the previous if(), we know that the levels array
1558 * has an implicit trailing WS run and therefore does not fully
1559 * reflect itself all the levels.
1560 * This must be a nsBidi object for a line, and
1561 * we need to create a new levels array.
1564 if(GETLEVELSMEMORY(length)) {
1565 nsBidiLevel *levels=mLevelsMemory;
1567 if(start>0 && levels!=mLevels) {
1568 memcpy(levels, mLevels, start);
1570 memset(levels+start, mParaLevel, length-start);
1572 /* this new levels array is set for the line and reflects the WS run */
1573 mTrailingWSStart=length;
1574 *aLevels=mLevels=levels;
1575 return NS_OK;
1576 } else {
1577 /* out of memory */
1578 *aLevels = nullptr;
1579 return NS_ERROR_OUT_OF_MEMORY;
1582 #endif // FULL_BIDI_ENGINE
1584 nsresult nsBidi::GetCharTypeAt(int32_t aCharIndex, nsCharType* pType)
1586 if(aCharIndex<0 || mLength<=aCharIndex) {
1587 return NS_ERROR_INVALID_ARG;
1589 *pType = (nsCharType)mDirProps[aCharIndex];
1590 return NS_OK;
1593 nsresult nsBidi::GetLogicalRun(int32_t aLogicalStart, int32_t *aLogicalLimit, nsBidiLevel *aLevel)
1595 int32_t length = mLength;
1597 if(aLogicalStart<0 || length<=aLogicalStart) {
1598 return NS_ERROR_INVALID_ARG;
1601 int32_t runCount, visualStart, logicalLimit, logicalFirst, i;
1602 Run iRun;
1604 /* CountRuns will check VALID_PARA_OR_LINE */
1605 nsresult rv = CountRuns(&runCount);
1606 if (NS_FAILED(rv)) {
1607 return rv;
1610 visualStart = logicalLimit = 0;
1611 iRun = mRuns[0];
1613 for (i = 0; i < runCount; i++) {
1614 iRun = mRuns[i];
1615 logicalFirst = GET_INDEX(iRun.logicalStart);
1616 logicalLimit = logicalFirst + iRun.visualLimit - visualStart;
1617 if ((aLogicalStart >= logicalFirst) && (aLogicalStart < logicalLimit)) {
1618 break;
1620 visualStart = iRun.visualLimit;
1622 if (aLogicalLimit) {
1623 *aLogicalLimit = logicalLimit;
1625 if (aLevel) {
1626 if (mDirection != NSBIDI_MIXED || aLogicalStart >= mTrailingWSStart) {
1627 *aLevel = mParaLevel;
1628 } else {
1629 *aLevel = mLevels[aLogicalStart];
1632 return NS_OK;
1635 /* runs API functions ------------------------------------------------------- */
1637 nsresult nsBidi::CountRuns(int32_t* aRunCount)
1639 if(mRunCount<0 && !GetRuns()) {
1640 return NS_ERROR_OUT_OF_MEMORY;
1641 } else {
1642 if (aRunCount)
1643 *aRunCount = mRunCount;
1644 return NS_OK;
1648 nsresult nsBidi::GetVisualRun(int32_t aRunIndex, int32_t *aLogicalStart, int32_t *aLength, nsBidiDirection *aDirection)
1650 if( aRunIndex<0 ||
1651 (mRunCount==-1 && !GetRuns()) ||
1652 aRunIndex>=mRunCount
1654 *aDirection = NSBIDI_LTR;
1655 return NS_OK;
1656 } else {
1657 int32_t start=mRuns[aRunIndex].logicalStart;
1658 if(aLogicalStart!=nullptr) {
1659 *aLogicalStart=GET_INDEX(start);
1661 if(aLength!=nullptr) {
1662 if(aRunIndex>0) {
1663 *aLength=mRuns[aRunIndex].visualLimit-
1664 mRuns[aRunIndex-1].visualLimit;
1665 } else {
1666 *aLength=mRuns[0].visualLimit;
1669 *aDirection = (nsBidiDirection)GET_ODD_BIT(start);
1670 return NS_OK;
1674 /* compute the runs array --------------------------------------------------- */
1677 * Compute the runs array from the levels array.
1678 * After GetRuns() returns true, runCount is guaranteed to be >0
1679 * and the runs are reordered.
1680 * Odd-level runs have visualStart on their visual right edge and
1681 * they progress visually to the left.
1683 bool nsBidi::GetRuns()
1686 * This method returns immediately if the runs are already set. This
1687 * includes the case of length==0 (handled in setPara)..
1689 if (mRunCount >= 0) {
1690 return true;
1693 if(mDirection!=NSBIDI_MIXED) {
1694 /* simple, single-run case - this covers length==0 */
1695 GetSingleRun(mParaLevel);
1696 } else /* NSBIDI_MIXED, length>0 */ {
1697 /* mixed directionality */
1698 int32_t length=mLength, limit=mTrailingWSStart;
1701 * If there are WS characters at the end of the line
1702 * and the run preceding them has a level different from
1703 * paraLevel, then they will form their own run at paraLevel (L1).
1704 * Count them separately.
1705 * We need some special treatment for this in order to not
1706 * modify the levels array which a line nsBidi object shares
1707 * with its paragraph parent and its other line siblings.
1708 * In other words, for the trailing WS, it may be
1709 * levels[]!=paraLevel but we have to treat it like it were so.
1711 nsBidiLevel *levels=mLevels;
1712 int32_t i, runCount;
1713 nsBidiLevel level=NSBIDI_DEFAULT_LTR; /* initialize with no valid level */
1715 /* count the runs, there is at least one non-WS run, and limit>0 */
1716 runCount=0;
1717 for(i=0; i<limit; ++i) {
1718 /* increment runCount at the start of each run */
1719 if(levels[i]!=level) {
1720 ++runCount;
1721 level=levels[i];
1726 * We don't need to see if the last run can be merged with a trailing
1727 * WS run because SetTrailingWSStart() would have done that.
1729 if(runCount==1 && limit==length) {
1730 /* There is only one non-WS run and no trailing WS-run. */
1731 GetSingleRun(levels[0]);
1732 } else /* runCount>1 || limit<length */ {
1733 /* allocate and set the runs */
1734 Run *runs;
1735 int32_t runIndex, start;
1736 nsBidiLevel minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
1738 /* now, count a (non-mergable) WS run */
1739 if(limit<length) {
1740 ++runCount;
1743 /* runCount>1 */
1744 if(GETRUNSMEMORY(runCount)) {
1745 runs=mRunsMemory;
1746 } else {
1747 return false;
1750 /* set the runs */
1751 /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */
1752 /* however, that would take longer and make other functions more complicated */
1753 runIndex=0;
1755 /* search for the run ends */
1756 i = 0;
1757 do {
1758 /* prepare this run */
1759 start = i;
1760 level = levels[i];
1761 if(level<minLevel) {
1762 minLevel=level;
1764 if(level>maxLevel) {
1765 maxLevel=level;
1768 /* look for the run limit */
1769 while (++i < limit && levels[i] == level) {
1772 /* i is another run limit */
1773 runs[runIndex].logicalStart = start;
1774 runs[runIndex].visualLimit = i - start;
1775 ++runIndex;
1776 } while (i < limit);
1778 if(limit<length) {
1779 /* there is a separate WS run */
1780 runs[runIndex].logicalStart=limit;
1781 runs[runIndex].visualLimit=length-limit;
1782 if(mParaLevel<minLevel) {
1783 minLevel=mParaLevel;
1787 /* set the object fields */
1788 mRuns=runs;
1789 mRunCount=runCount;
1791 ReorderLine(minLevel, maxLevel);
1793 /* now add the direction flags and adjust the visualLimit's to be just that */
1794 /* this loop will also handling the trailing WS run */
1795 limit = 0;
1796 for (i = 0; i < runCount; ++i) {
1797 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
1798 limit += runs[i].visualLimit;
1799 runs[i].visualLimit = limit;
1802 /* Set the "odd" bit for the trailing WS run. */
1803 /* For a RTL paragraph, it will be the *first* run in visual order. */
1804 if (runIndex < runCount) {
1805 int32_t trailingRun = (mParaLevel & 1) ? 0 : runIndex;
1806 ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, mParaLevel);
1811 return true;
1814 /* in trivial cases there is only one trivial run; called by GetRuns() */
1815 void nsBidi::GetSingleRun(nsBidiLevel aLevel)
1817 /* simple, single-run case */
1818 mRuns=mSimpleRuns;
1819 mRunCount=1;
1821 /* fill and reorder the single run */
1822 mRuns[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, aLevel);
1823 mRuns[0].visualLimit=mLength;
1826 /* reorder the runs array (L2) ---------------------------------------------- */
1829 * Reorder the same-level runs in the runs array.
1830 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
1831 * All the visualStart fields=logical start before reordering.
1832 * The "odd" bits are not set yet.
1834 * Reordering with this data structure lends itself to some handy shortcuts:
1836 * Since each run is moved but not modified, and since at the initial maxLevel
1837 * each sequence of same-level runs consists of only one run each, we
1838 * don't need to do anything there and can predecrement maxLevel.
1839 * In many simple cases, the reordering is thus done entirely in the
1840 * index mapping.
1841 * Also, reordering occurs only down to the lowest odd level that occurs,
1842 * which is minLevel|1. However, if the lowest level itself is odd, then
1843 * in the last reordering the sequence of the runs at this level or higher
1844 * will be all runs, and we don't need the elaborate loop to search for them.
1845 * This is covered by ++minLevel instead of minLevel|=1 followed
1846 * by an extra reorder-all after the reorder-some loop.
1847 * About a trailing WS run:
1848 * Such a run would need special treatment because its level is not
1849 * reflected in levels[] if this is not a paragraph object.
1850 * Instead, all characters from trailingWSStart on are implicitly at
1851 * paraLevel.
1852 * However, for all maxLevel>paraLevel, this run will never be reordered
1853 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
1854 * if minLevel==paraLevel is odd, which is done in the extra segment.
1855 * This means that for the main reordering loop we don't need to consider
1856 * this run and can --runCount. If it is later part of the all-runs
1857 * reordering, then runCount is adjusted accordingly.
1859 void nsBidi::ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel)
1861 Run *runs, tempRun;
1862 nsBidiLevel *levels;
1863 int32_t firstRun, endRun, limitRun, runCount;
1865 /* nothing to do? */
1866 if(aMaxLevel<=(aMinLevel|1)) {
1867 return;
1871 * Reorder only down to the lowest odd level
1872 * and reorder at an odd aMinLevel in a separate, simpler loop.
1873 * See comments above for why aMinLevel is always incremented.
1875 ++aMinLevel;
1877 runs=mRuns;
1878 levels=mLevels;
1879 runCount=mRunCount;
1881 /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */
1882 if(mTrailingWSStart<mLength) {
1883 --runCount;
1886 while(--aMaxLevel>=aMinLevel) {
1887 firstRun=0;
1889 /* loop for all sequences of runs */
1890 for(;;) {
1891 /* look for a sequence of runs that are all at >=aMaxLevel */
1892 /* look for the first run of such a sequence */
1893 while(firstRun<runCount && levels[runs[firstRun].logicalStart]<aMaxLevel) {
1894 ++firstRun;
1896 if(firstRun>=runCount) {
1897 break; /* no more such runs */
1900 /* look for the limit run of such a sequence (the run behind it) */
1901 for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=aMaxLevel;) {}
1903 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
1904 endRun=limitRun-1;
1905 while(firstRun<endRun) {
1906 tempRun = runs[firstRun];
1907 runs[firstRun] = runs[endRun];
1908 runs[endRun] = tempRun;
1909 ++firstRun;
1910 --endRun;
1913 if(limitRun==runCount) {
1914 break; /* no more such runs */
1915 } else {
1916 firstRun=limitRun+1;
1921 /* now do aMaxLevel==old aMinLevel (==odd!), see above */
1922 if(!(aMinLevel&1)) {
1923 firstRun=0;
1925 /* include the trailing WS run in this complete reordering */
1926 if(mTrailingWSStart==mLength) {
1927 --runCount;
1930 /* Swap the entire sequence of all runs. (endRun==runCount) */
1931 while(firstRun<runCount) {
1932 tempRun = runs[firstRun];
1933 runs[firstRun] = runs[runCount];
1934 runs[runCount] = tempRun;
1935 ++firstRun;
1936 --runCount;
1941 nsresult nsBidi::ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap)
1943 int32_t start, end, limit, temp;
1944 nsBidiLevel minLevel, maxLevel;
1946 if(aIndexMap==nullptr ||
1947 !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) {
1948 return NS_OK;
1951 /* nothing to do? */
1952 if(minLevel==maxLevel && (minLevel&1)==0) {
1953 return NS_OK;
1956 /* reorder only down to the lowest odd level */
1957 minLevel|=1;
1959 /* loop maxLevel..minLevel */
1960 do {
1961 start=0;
1963 /* loop for all sequences of levels to reorder at the current maxLevel */
1964 for(;;) {
1965 /* look for a sequence of levels that are all at >=maxLevel */
1966 /* look for the first index of such a sequence */
1967 while(start<aLength && aLevels[start]<maxLevel) {
1968 ++start;
1970 if(start>=aLength) {
1971 break; /* no more such runs */
1974 /* look for the limit of such a sequence (the index behind it) */
1975 for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {}
1978 * Swap the entire interval of indexes from start to limit-1.
1979 * We don't need to swap the levels for the purpose of this
1980 * algorithm: the sequence of levels that we look at does not
1981 * move anyway.
1983 end=limit-1;
1984 while(start<end) {
1985 temp=aIndexMap[start];
1986 aIndexMap[start]=aIndexMap[end];
1987 aIndexMap[end]=temp;
1989 ++start;
1990 --end;
1993 if(limit==aLength) {
1994 break; /* no more such sequences */
1995 } else {
1996 start=limit+1;
1999 } while(--maxLevel>=minLevel);
2001 return NS_OK;
2004 bool nsBidi::PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength,
2005 int32_t *aIndexMap,
2006 nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel)
2008 int32_t start;
2009 nsBidiLevel level, minLevel, maxLevel;
2011 if(aLevels==nullptr || aLength<=0) {
2012 return false;
2015 /* determine minLevel and maxLevel */
2016 minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1;
2017 maxLevel=0;
2018 for(start=aLength; start>0;) {
2019 level=aLevels[--start];
2020 if(level>NSBIDI_MAX_EXPLICIT_LEVEL+1) {
2021 return false;
2023 if(level<minLevel) {
2024 minLevel=level;
2026 if(level>maxLevel) {
2027 maxLevel=level;
2030 *aMinLevel=minLevel;
2031 *aMaxLevel=maxLevel;
2033 /* initialize the index map */
2034 for(start=aLength; start>0;) {
2035 --start;
2036 aIndexMap[start]=start;
2039 return true;
2042 #ifdef FULL_BIDI_ENGINE
2043 /* API functions for logical<->visual mapping ------------------------------- */
2045 nsresult nsBidi::GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex) {
2046 int32_t visualIndex = NSBIDI_MAP_NOWHERE;
2048 if(aLogicalIndex<0 || mLength<=aLogicalIndex) {
2049 return NS_ERROR_INVALID_ARG;
2050 } else {
2051 /* we can do the trivial cases without the runs array */
2052 switch(mDirection) {
2053 case NSBIDI_LTR:
2054 *aVisualIndex = aLogicalIndex;
2055 return NS_OK;
2056 case NSBIDI_RTL:
2057 *aVisualIndex = mLength-aLogicalIndex-1;
2058 return NS_OK;
2059 default:
2060 if(mRunCount<0 && !GetRuns()) {
2061 return NS_ERROR_OUT_OF_MEMORY;
2062 } else {
2063 Run *runs=mRuns;
2064 int32_t i, visualStart=0, offset, length;
2066 /* linear search for the run, search on the visual runs */
2067 for (i = 0; i < mRunCount; ++i) {
2068 length=runs[i].visualLimit-visualStart;
2069 offset=aLogicalIndex-GET_INDEX(runs[i].logicalStart);
2070 if(offset>=0 && offset<length) {
2071 if(IS_EVEN_RUN(runs[i].logicalStart)) {
2072 /* LTR */
2073 visualIndex = visualStart + offset;
2074 } else {
2075 /* RTL */
2076 visualIndex = visualStart + length - offset - 1;
2078 break;
2080 visualStart+=length;
2082 if (i >= mRunCount) {
2083 *aVisualIndex = NSBIDI_MAP_NOWHERE;
2084 return NS_OK;
2090 *aVisualIndex = visualIndex;
2091 return NS_OK;
2094 nsresult nsBidi::GetLogicalIndex(int32_t aVisualIndex, int32_t *aLogicalIndex)
2096 if(aVisualIndex<0 || mLength<=aVisualIndex) {
2097 return NS_ERROR_INVALID_ARG;
2100 /* we can do the trivial cases without the runs array */
2101 if (mDirection == NSBIDI_LTR) {
2102 *aLogicalIndex = aVisualIndex;
2103 return NS_OK;
2104 } else if (mDirection == NSBIDI_RTL) {
2105 *aLogicalIndex = mLength - aVisualIndex - 1;
2106 return NS_OK;
2109 if(mRunCount<0 && !GetRuns()) {
2110 return NS_ERROR_OUT_OF_MEMORY;
2113 Run *runs=mRuns;
2114 int32_t i, runCount=mRunCount, start;
2116 if(runCount<=10) {
2117 /* linear search for the run */
2118 for(i=0; aVisualIndex>=runs[i].visualLimit; ++i) {}
2119 } else {
2120 /* binary search for the run */
2121 int32_t start=0, limit=runCount;
2123 /* the middle if() will guaranteed find the run, we don't need a loop limit */
2124 for(;;) {
2125 i=(start+limit)/2;
2126 if(aVisualIndex>=runs[i].visualLimit) {
2127 start=i+1;
2128 } else if(i==0 || aVisualIndex>=runs[i-1].visualLimit) {
2129 break;
2130 } else {
2131 limit=i;
2136 start=runs[i].logicalStart;
2137 if(IS_EVEN_RUN(start)) {
2138 /* LTR */
2139 /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */
2140 if(i>0) {
2141 aVisualIndex-=runs[i-1].visualLimit;
2143 *aLogicalIndex = GET_INDEX(start)+aVisualIndex;
2144 return NS_OK;
2145 } else {
2146 /* RTL */
2147 *aLogicalIndex = GET_INDEX(start)+runs[i].visualLimit-aVisualIndex-1;
2148 return NS_OK;
2152 nsresult nsBidi::GetLogicalMap(int32_t *aIndexMap)
2154 nsresult rv;
2156 /* CountRuns() checks for VALID_PARA_OR_LINE */
2157 rv = CountRuns(nullptr);
2158 if(NS_FAILED(rv)) {
2159 return rv;
2160 } else if(aIndexMap==nullptr) {
2161 return NS_ERROR_INVALID_ARG;
2162 } else {
2163 /* fill a logical-to-visual index map using the runs[] */
2164 int32_t visualStart, visualLimit, j;
2165 int32_t logicalStart;
2166 Run *runs = mRuns;
2167 if (mLength <= 0) {
2168 return NS_OK;
2171 visualStart = 0;
2172 for (j = 0; j < mRunCount; ++j) {
2173 logicalStart = GET_INDEX(runs[j].logicalStart);
2174 visualLimit = runs[j].visualLimit;
2175 if (IS_EVEN_RUN(runs[j].logicalStart)) {
2176 do { /* LTR */
2177 aIndexMap[logicalStart++] = visualStart++;
2178 } while (visualStart < visualLimit);
2179 } else {
2180 logicalStart += visualLimit-visualStart; /* logicalLimit */
2181 do { /* RTL */
2182 aIndexMap[--logicalStart] = visualStart++;
2183 } while (visualStart < visualLimit);
2185 /* visualStart==visualLimit; */
2188 return NS_OK;
2191 nsresult nsBidi::GetVisualMap(int32_t *aIndexMap)
2193 int32_t* runCount=nullptr;
2194 nsresult rv;
2196 if(aIndexMap==nullptr) {
2197 return NS_ERROR_INVALID_ARG;
2200 /* CountRuns() checks all of its and our arguments */
2201 rv = CountRuns(runCount);
2202 if(NS_FAILED(rv)) {
2203 return rv;
2204 } else {
2205 /* fill a visual-to-logical index map using the runs[] */
2206 Run *runs=mRuns, *runsLimit=runs+mRunCount;
2207 int32_t logicalStart, visualStart, visualLimit;
2209 visualStart=0;
2210 for(; runs<runsLimit; ++runs) {
2211 logicalStart=runs->logicalStart;
2212 visualLimit=runs->visualLimit;
2213 if(IS_EVEN_RUN(logicalStart)) {
2214 do { /* LTR */
2215 *aIndexMap++ = logicalStart++;
2216 } while(++visualStart<visualLimit);
2217 } else {
2218 REMOVE_ODD_BIT(logicalStart);
2219 logicalStart+=visualLimit-visualStart; /* logicalLimit */
2220 do { /* RTL */
2221 *aIndexMap++ = --logicalStart;
2222 } while(++visualStart<visualLimit);
2224 /* visualStart==visualLimit; */
2226 return NS_OK;
2230 /* reorder a line based on a levels array (L2) ------------------------------ */
2232 nsresult nsBidi::ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap)
2234 int32_t start, limit, sumOfSosEos;
2235 nsBidiLevel minLevel, maxLevel;
2237 if(aIndexMap==nullptr ||
2238 !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) {
2239 return NS_OK;
2242 /* nothing to do? */
2243 if(minLevel==maxLevel && (minLevel&1)==0) {
2244 return NS_OK;
2247 /* reorder only down to the lowest odd level */
2248 minLevel|=1;
2250 /* loop maxLevel..minLevel */
2251 do {
2252 start=0;
2254 /* loop for all sequences of levels to reorder at the current maxLevel */
2255 for(;;) {
2256 /* look for a sequence of levels that are all at >=maxLevel */
2257 /* look for the first index of such a sequence */
2258 while(start<aLength && aLevels[start]<maxLevel) {
2259 ++start;
2261 if(start>=aLength) {
2262 break; /* no more such sequences */
2265 /* look for the limit of such a sequence (the index behind it) */
2266 for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {}
2269 * sos=start of sequence, eos=end of sequence
2271 * The closed (inclusive) interval from sos to eos includes all the logical
2272 * and visual indexes within this sequence. They are logically and
2273 * visually contiguous and in the same range.
2275 * For each run, the new visual index=sos+eos-old visual index;
2276 * we pre-add sos+eos into sumOfSosEos ->
2277 * new visual index=sumOfSosEos-old visual index;
2279 sumOfSosEos=start+limit-1;
2281 /* reorder each index in the sequence */
2282 do {
2283 aIndexMap[start]=sumOfSosEos-aIndexMap[start];
2284 } while(++start<limit);
2286 /* start==limit */
2287 if(limit==aLength) {
2288 break; /* no more such sequences */
2289 } else {
2290 start=limit+1;
2293 } while(--maxLevel>=minLevel);
2295 return NS_OK;
2298 nsresult nsBidi::InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength)
2300 if(aSrcMap!=nullptr && aDestMap!=nullptr && aLength > 0) {
2301 const int32_t *pi;
2302 int32_t destLength = -1, count = 0;
2303 /* find highest value and count positive indexes in srcMap */
2304 pi = aSrcMap + aLength;
2305 while (pi > aSrcMap) {
2306 if (*--pi > destLength) {
2307 destLength = *pi;
2309 if (*pi >= 0) {
2310 count++;
2313 destLength++; /* add 1 for origin 0 */
2314 if (count < destLength) {
2315 /* we must fill unmatched destMap entries with -1 */
2316 memset(aDestMap, 0xFF, destLength * sizeof(int32_t));
2318 pi = aSrcMap + aLength;
2319 while (aLength > 0) {
2320 if (*--pi >= 0) {
2321 aDestMap[*pi] = --aLength;
2322 } else {
2323 --aLength;
2327 return NS_OK;
2330 int32_t nsBidi::doWriteReverse(const char16_t *src, int32_t srcLength,
2331 char16_t *dest, uint16_t options) {
2333 * RTL run -
2335 * RTL runs need to be copied to the destination in reverse order
2336 * of code points, not code units, to keep Unicode characters intact.
2338 * The general strategy for this is to read the source text
2339 * in backward order, collect all code units for a code point
2340 * (and optionally following combining characters, see below),
2341 * and copy all these code units in ascending order
2342 * to the destination for this run.
2344 * Several options request whether combining characters
2345 * should be kept after their base characters,
2346 * whether Bidi control characters should be removed, and
2347 * whether characters should be replaced by their mirror-image
2348 * equivalent Unicode characters.
2350 int32_t i, j, destSize;
2351 uint32_t c;
2353 /* optimize for several combinations of options */
2354 switch(options&(NSBIDI_REMOVE_BIDI_CONTROLS|NSBIDI_DO_MIRRORING|NSBIDI_KEEP_BASE_COMBINING)) {
2355 case 0:
2357 * With none of the "complicated" options set, the destination
2358 * run will have the same length as the source run,
2359 * and there is no mirroring and no keeping combining characters
2360 * with their base characters.
2362 destSize=srcLength;
2364 /* preserve character integrity */
2365 do {
2366 /* i is always after the last code unit known to need to be kept in this segment */
2367 i=srcLength;
2369 /* collect code units for one base character */
2370 UTF_BACK_1(src, 0, srcLength);
2372 /* copy this base character */
2373 j=srcLength;
2374 do {
2375 *dest++=src[j++];
2376 } while(j<i);
2377 } while(srcLength>0);
2378 break;
2379 case NSBIDI_KEEP_BASE_COMBINING:
2381 * Here, too, the destination
2382 * run will have the same length as the source run,
2383 * and there is no mirroring.
2384 * We do need to keep combining characters with their base characters.
2386 destSize=srcLength;
2388 /* preserve character integrity */
2389 do {
2390 /* i is always after the last code unit known to need to be kept in this segment */
2391 i=srcLength;
2393 /* collect code units and modifier letters for one base character */
2394 do {
2395 UTF_PREV_CHAR(src, 0, srcLength, c);
2396 } while(srcLength>0 && GetBidiCat(c) == eCharType_DirNonSpacingMark);
2398 /* copy this "user character" */
2399 j=srcLength;
2400 do {
2401 *dest++=src[j++];
2402 } while(j<i);
2403 } while(srcLength>0);
2404 break;
2405 default:
2407 * With several "complicated" options set, this is the most
2408 * general and the slowest copying of an RTL run.
2409 * We will do mirroring, remove Bidi controls, and
2410 * keep combining characters with their base characters
2411 * as requested.
2413 if(!(options&NSBIDI_REMOVE_BIDI_CONTROLS)) {
2414 i=srcLength;
2415 } else {
2416 /* we need to find out the destination length of the run,
2417 which will not include the Bidi control characters */
2418 int32_t length=srcLength;
2419 char16_t ch;
2421 i=0;
2422 do {
2423 ch=*src++;
2424 if (!IsBidiControl((uint32_t)ch)) {
2425 ++i;
2427 } while(--length>0);
2428 src-=srcLength;
2430 destSize=i;
2432 /* preserve character integrity */
2433 do {
2434 /* i is always after the last code unit known to need to be kept in this segment */
2435 i=srcLength;
2437 /* collect code units for one base character */
2438 UTF_PREV_CHAR(src, 0, srcLength, c);
2439 if(options&NSBIDI_KEEP_BASE_COMBINING) {
2440 /* collect modifier letters for this base character */
2441 while(srcLength>0 && GetBidiCat(c) == eCharType_DirNonSpacingMark) {
2442 UTF_PREV_CHAR(src, 0, srcLength, c);
2446 if(options&NSBIDI_REMOVE_BIDI_CONTROLS && IsBidiControl(c)) {
2447 /* do not copy this Bidi control character */
2448 continue;
2451 /* copy this "user character" */
2452 j=srcLength;
2453 if(options&NSBIDI_DO_MIRRORING) {
2454 /* mirror only the base character */
2455 c = GetMirroredChar(c);
2457 int32_t k=0;
2458 UTF_APPEND_CHAR_UNSAFE(dest, k, c);
2459 dest+=k;
2460 j+=k;
2462 while(j<i) {
2463 *dest++=src[j++];
2465 } while(srcLength>0);
2466 break;
2467 } /* end of switch */
2468 return destSize;
2471 nsresult nsBidi::WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize)
2473 if( aSrc==nullptr || aSrcLength<0 ||
2474 aDest==nullptr
2476 return NS_ERROR_INVALID_ARG;
2479 /* do input and output overlap? */
2480 if( aSrc>=aDest && aSrc<aDest+aSrcLength ||
2481 aDest>=aSrc && aDest<aSrc+aSrcLength
2483 return NS_ERROR_INVALID_ARG;
2486 if(aSrcLength>0) {
2487 *aDestSize = doWriteReverse(aSrc, aSrcLength, aDest, aOptions);
2489 return NS_OK;
2491 #endif // FULL_BIDI_ENGINE