1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 #include "nsUnicodeProperties.h"
11 using namespace mozilla::unicode
;
13 // These are #defined in <sys/regset.h> under Solaris 10 x86
17 /* Comparing the description of the Bidi algorithm with this implementation
18 is easier with the same names for the Bidi types in the code as there.
21 L
= eCharType_LeftToRight
,
22 R
= eCharType_RightToLeft
,
23 EN
= eCharType_EuropeanNumber
,
24 ES
= eCharType_EuropeanNumberSeparator
,
25 ET
= eCharType_EuropeanNumberTerminator
,
26 AN
= eCharType_ArabicNumber
,
27 CS
= eCharType_CommonNumberSeparator
,
28 B
= eCharType_BlockSeparator
,
29 S
= eCharType_SegmentSeparator
,
30 WS
= eCharType_WhiteSpaceNeutral
,
31 O_N
= eCharType_OtherNeutral
,
32 LRE
= eCharType_LeftToRightEmbedding
,
33 LRO
= eCharType_LeftToRightOverride
,
34 AL
= eCharType_RightToLeftArabic
,
35 RLE
= eCharType_RightToLeftEmbedding
,
36 RLO
= eCharType_RightToLeftOverride
,
37 PDF
= eCharType_PopDirectionalFormat
,
38 NSM
= eCharType_DirNonSpacingMark
,
39 BN
= eCharType_BoundaryNeutral
,
40 LRI
= eCharType_LeftToRightIsolate
,
41 RLI
= eCharType_RightToLeftIsolate
,
42 FSI
= eCharType_FirstStrongIsolate
,
43 PDI
= eCharType_PopDirectionalIsolate
,
47 /* to avoid some conditional statements, use tiny constant arrays */
48 static Flags flagLR
[2]={ DIRPROP_FLAG(L
), DIRPROP_FLAG(R
) };
49 static Flags flagE
[2]={ DIRPROP_FLAG(LRE
), DIRPROP_FLAG(RLE
) };
50 static Flags flagO
[2]={ DIRPROP_FLAG(LRO
), DIRPROP_FLAG(RLO
) };
52 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
53 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
54 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
57 * General implementation notes:
59 * Throughout the implementation, there are comments like (W2) that refer to
60 * rules of the Bidi algorithm in its version 5, in this example to the second
61 * rule of the resolution of weak types.
63 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
64 * character according to UTF-16, the second UChar gets the directional property of
65 * the entire character assigned, while the first one gets a BN, a boundary
66 * neutral, type, which is ignored by most of the algorithm according to
67 * rule (X9) and the implementation suggestions of the Bidi algorithm.
69 * Later, AdjustWSLevels() will set the level for each BN to that of the
70 * following character (UChar), which results in surrogate pairs getting the
71 * same level on each of their surrogates.
73 * In a UTF-8 implementation, the same thing could be done: the last byte of
74 * a multi-byte sequence would get the "real" property, while all previous
75 * bytes of that sequence would get BN.
77 * It is not possible to assign all those parts of a character the same real
78 * property because this would fail in the resolution of weak types with rules
79 * that look at immediately surrounding types.
81 * As a related topic, this implementation does not remove Boundary Neutral
82 * types from the input, but ignores them whenever this is relevant.
83 * For example, the loop for the resolution of the weak types reads
84 * types until it finds a non-BN.
85 * Also, explicit embedding codes are neither changed into BN nor removed.
86 * They are only treated the same way real BNs are.
87 * As stated before, AdjustWSLevels() takes care of them at the end.
88 * For the purpose of conformance, the levels of all these codes
91 * Note that this implementation never modifies the dirProps
92 * after the initial setup, except for FSI which is changed to either
93 * LRI or RLI in GetDirProps(), and paired brackets which may be changed
94 * to L or R according to N0.
97 * In this implementation, the resolution of weak types (Wn),
98 * neutrals (Nn), and the assignment of the resolved level (In)
99 * are all done in one single loop, in ResolveImplicitLevels().
100 * Changes of dirProp values are done on the fly, without writing
101 * them back to the dirProps array.
104 * This implementation contains code that allows to bypass steps of the
105 * algorithm that are not needed on the specific paragraph
106 * in order to speed up the most common cases considerably,
107 * like text that is entirely LTR, or RTL text without numbers.
109 * Most of this is done by setting a bit for each directional property
110 * in a flags variable and later checking for whether there are
111 * any LTR characters or any RTL characters, or both, whether
112 * there are any explicit embedding codes, etc.
114 * If the (Xn) steps are performed, then the flags are re-evaluated,
115 * because they will then not contain the embedding codes any more
116 * and will be adjusted for override codes, so that subsequently
117 * more bypassing may be possible than what the initial flags suggested.
119 * If the text is not mixed-directional, then the
120 * algorithm steps for the weak type resolution are not performed,
121 * and all levels are set to the paragraph level.
123 * If there are no explicit embedding codes, then the (Xn) steps
126 * If embedding levels are supplied as a parameter, then all
127 * explicit embedding codes are ignored, and the (Xn) steps
130 * White Space types could get the level of the run they belong to,
131 * and are checked with a test of (flags&MASK_EMBEDDING) to
132 * consider if the paragraph direction should be considered in
133 * the flags variable.
135 * If there are no White Space types in the paragraph, then
136 * (L1) is not necessary in AdjustWSLevels().
142 mMayAllocateText
=true;
143 mMayAllocateRuns
=true;
153 /* reset the object, all pointers nullptr, all flags false, all sizes 0 */
157 mDirection
= NSBIDI_LTR
;
158 mTrailingWSStart
= 0;
173 mDirPropsMemory
=nullptr;
174 mLevelsMemory
=nullptr;
176 mIsolatesMemory
=nullptr;
178 mMayAllocateText
=false;
179 mMayAllocateRuns
=false;
183 * We are allowed to allocate memory if aMemory==nullptr or
184 * aMayAllocate==true for each array that we need.
185 * We also try to grow and shrink memory as needed if we
188 * Assume aSizeNeeded>0.
189 * If *aMemory!=nullptr, then assume *aSize>0.
191 * ### this realloc() may unnecessarily copy the old data,
192 * which we know we don't need any more;
193 * is this the best way to do this??
195 bool nsBidi::GetMemory(void **aMemory
, size_t *aSize
, bool aMayAllocate
, size_t aSizeNeeded
)
197 /* check for existing memory */
198 if(*aMemory
==nullptr) {
199 /* we need to allocate memory */
203 *aMemory
=moz_malloc(aSizeNeeded
);
204 if (*aMemory
!=nullptr) {
213 /* there is some memory, is it enough or too much? */
214 if(aSizeNeeded
>*aSize
&& !aMayAllocate
) {
215 /* not enough memory, and we must not allocate */
217 } else if(aSizeNeeded
!=*aSize
&& aMayAllocate
) {
218 /* we may try to grow or shrink */
219 void *memory
=moz_realloc(*aMemory
, aSizeNeeded
);
221 if(memory
!=nullptr) {
226 /* we failed to grow */
230 /* we have at least enough memory and must not allocate */
238 moz_free(mDirPropsMemory
);
239 mDirPropsMemory
= nullptr;
240 moz_free(mLevelsMemory
);
241 mLevelsMemory
= nullptr;
242 moz_free(mRunsMemory
);
243 mRunsMemory
= nullptr;
244 free(mIsolatesMemory
);
245 mIsolatesMemory
= nullptr;
248 /* SetPara ------------------------------------------------------------ */
250 nsresult
nsBidi::SetPara(const char16_t
*aText
, int32_t aLength
,
251 nsBidiLevel aParaLevel
, nsBidiLevel
*aEmbeddingLevels
)
253 nsBidiDirection direction
;
255 /* check the argument values */
257 ((NSBIDI_MAX_EXPLICIT_LEVEL
<aParaLevel
) && !IS_DEFAULT_LEVEL(aParaLevel
)) ||
260 return NS_ERROR_INVALID_ARG
;
264 aLength
= NS_strlen(aText
);
267 /* initialize member data */
269 mParaLevel
=aParaLevel
;
270 mDirection
=aParaLevel
& 1 ? NSBIDI_RTL
: NSBIDI_LTR
;
271 mTrailingWSStart
=aLength
; /* the levels[] will reflect the WS run */
279 * For an empty paragraph, create an nsBidi object with the aParaLevel and
280 * the flags and the direction set but without allocating zero-length arrays.
281 * There is nothing more to do.
283 if(IS_DEFAULT_LEVEL(aParaLevel
)) {
286 mFlags
=DIRPROP_FLAG_LR(aParaLevel
);
294 * Get the directional properties,
295 * the flags bit-set, and
296 * determine the partagraph level if necessary.
298 if(GETDIRPROPSMEMORY(aLength
)) {
299 mDirProps
=mDirPropsMemory
;
302 return NS_ERROR_OUT_OF_MEMORY
;
305 /* are explicit levels specified? */
306 if(aEmbeddingLevels
==nullptr) {
307 /* no: determine explicit levels according to the (Xn) rules */\
308 if(GETLEVELSMEMORY(aLength
)) {
309 mLevels
=mLevelsMemory
;
310 ResolveExplicitLevels(&direction
);
312 return NS_ERROR_OUT_OF_MEMORY
;
315 /* set BN for all explicit codes, check that all levels are aParaLevel..NSBIDI_MAX_EXPLICIT_LEVEL */
316 mLevels
=aEmbeddingLevels
;
317 nsresult rv
= CheckExplicitLevels(&direction
);
323 /* allocate isolate memory */
324 if (mIsolateCount
<= SIMPLE_ISOLATES_SIZE
) {
325 mIsolates
= mSimpleIsolates
;
327 if (mIsolateCount
* sizeof(Isolate
) <= mIsolatesSize
) {
328 mIsolates
= mIsolatesMemory
;
330 if (GETINITIALISOLATESMEMORY(mIsolateCount
)) {
331 mIsolates
= mIsolatesMemory
;
333 return NS_ERROR_OUT_OF_MEMORY
;
337 mIsolateCount
= -1; /* current isolates stack entry == none */
340 * The steps after (X9) in the Bidi algorithm are performed only if
341 * the paragraph text has mixed directionality!
343 mDirection
= direction
;
346 /* make sure paraLevel is even */
347 mParaLevel
=(mParaLevel
+1)&~1;
349 /* all levels are implicitly at paraLevel (important for GetLevels()) */
353 /* make sure paraLevel is odd */
356 /* all levels are implicitly at paraLevel (important for GetLevels()) */
361 * If there are no external levels specified and there
362 * are no significant explicit level codes in the text,
363 * then we can treat the entire paragraph as one run.
364 * Otherwise, we need to perform the following rules on runs of
365 * the text with the same embedding levels. (X10)
366 * "Significant" explicit level codes are ones that actually
367 * affect non-BN characters.
368 * Examples for "insignificant" ones are empty embeddings
369 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
371 if(aEmbeddingLevels
==nullptr && !(mFlags
&DIRPROP_FLAG_MULTI_RUNS
)) {
372 ResolveImplicitLevels(0, aLength
,
373 GET_LR_FROM_LEVEL(mParaLevel
),
374 GET_LR_FROM_LEVEL(mParaLevel
));
376 /* sor, eor: start and end types of same-level-run */
377 nsBidiLevel
*levels
=mLevels
;
378 int32_t start
, limit
=0;
379 nsBidiLevel level
, nextLevel
;
382 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
385 if(level
<nextLevel
) {
386 eor
=GET_LR_FROM_LEVEL(nextLevel
);
388 eor
=GET_LR_FROM_LEVEL(level
);
392 /* determine start and limit of the run (end points just behind the run) */
394 /* the values for this run's start are the same as for the previous run's end */
399 /* search for the limit of this run */
400 while(++limit
<aLength
&& levels
[limit
]==level
) {}
402 /* get the correct level of the next run */
404 nextLevel
=levels
[limit
];
406 nextLevel
=mParaLevel
;
409 /* determine eor from max(level, nextLevel); sor is last run's eor */
410 if((level
&~NSBIDI_LEVEL_OVERRIDE
)<(nextLevel
&~NSBIDI_LEVEL_OVERRIDE
)) {
411 eor
=GET_LR_FROM_LEVEL(nextLevel
);
413 eor
=GET_LR_FROM_LEVEL(level
);
416 /* if the run consists of overridden directional types, then there
417 are no implicit types to be resolved */
418 if(!(level
&NSBIDI_LEVEL_OVERRIDE
)) {
419 ResolveImplicitLevels(start
, limit
, sor
, eor
);
422 levels
[start
++] &= ~NSBIDI_LEVEL_OVERRIDE
;
423 } while (start
< limit
);
425 } while(limit
<aLength
);
428 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
436 /* perform (P2)..(P3) ------------------------------------------------------- */
439 * Get the directional properties for the text,
440 * calculate the flags bit-set, and
441 * determine the partagraph level if necessary.
443 void nsBidi::GetDirProps(const char16_t
*aText
)
445 DirProp
*dirProps
=mDirPropsMemory
; /* mDirProps is const */
447 int32_t i
=0, length
=mLength
;
448 Flags flags
=0; /* collect all directionalities in the text */
452 bool isDefaultLevel
= IS_DEFAULT_LEVEL(mParaLevel
);
455 NOT_SEEKING_STRONG
, /* 0: not after FSI */
456 SEEKING_STRONG_FOR_PARA
, /* 1: looking for first strong char in para */
457 SEEKING_STRONG_FOR_FSI
, /* 2: looking for first strong after FSI */
458 LOOKING_FOR_PDI
/* 3: found strong after FSI, looking for PDI */
462 /* The following stacks are used to manage isolate sequences. Those
463 sequences may be nested, but obviously never more deeply than the
464 maximum explicit embedding level.
465 lastStack is the index of the last used entry in the stack. A value of -1
466 means that there is no open isolate sequence. */
467 /* The following stack contains the position of the initiator of
468 each open isolate sequence */
469 int32_t isolateStartStack
[NSBIDI_MAX_EXPLICIT_LEVEL
+ 1];
470 /* The following stack contains the last known state before
471 encountering the initiator of an isolate sequence */
472 State previousStateStack
[NSBIDI_MAX_EXPLICIT_LEVEL
+ 1];
473 int32_t stackLast
= -1;
477 * see comment in nsBidi.h:
478 * the DEFAULT_XXX values are designed so that
479 * their bit 0 alone yields the intended default
482 state
= SEEKING_STRONG_FOR_PARA
;
484 state
= NOT_SEEKING_STRONG
;
487 /* determine the paragraph level (P2..P3) */
488 for(/* i = 0 above */; i
< length
;) {
490 if(!IS_FIRST_SURROGATE(uchar
) || i
+1==length
|| !IS_SECOND_SURROGATE(aText
[i
+1])) {
491 /* not a surrogate pair */
492 flags
|=DIRPROP_FLAG(dirProps
[i
]=dirProp
=GetBidiCat((uint32_t)uchar
));
494 /* a surrogate pair */
495 dirProps
[i
++]=BN
; /* first surrogate in the pair gets the BN type */
496 flags
|=DIRPROP_FLAG(dirProps
[i
]=dirProp
=GetBidiCat(GET_UTF_32(uchar
, aText
[i
])))|DIRPROP_FLAG(BN
);
502 if (state
== SEEKING_STRONG_FOR_PARA
) {
504 state
= NOT_SEEKING_STRONG
;
505 } else if (state
== SEEKING_STRONG_FOR_FSI
) {
506 if (stackLast
<= NSBIDI_MAX_EXPLICIT_LEVEL
) {
507 dirProps
[isolateStartStack
[stackLast
]] = LRI
;
510 state
= LOOKING_FOR_PDI
;
515 if (state
== SEEKING_STRONG_FOR_PARA
) {
517 state
= NOT_SEEKING_STRONG
;
518 } else if (state
== SEEKING_STRONG_FOR_FSI
) {
519 if (stackLast
<= NSBIDI_MAX_EXPLICIT_LEVEL
) {
520 dirProps
[isolateStartStack
[stackLast
]] = RLI
;
523 state
= LOOKING_FOR_PDI
;
527 case FSI
: case LRI
: case RLI
:
529 if (stackLast
<= NSBIDI_MAX_EXPLICIT_LEVEL
) {
530 isolateStartStack
[stackLast
] = i
- 1;
531 previousStateStack
[stackLast
] = state
;
533 if (dirProp
== FSI
) {
534 state
= SEEKING_STRONG_FOR_FSI
;
536 state
= LOOKING_FOR_PDI
;
541 if (state
== SEEKING_STRONG_FOR_FSI
) {
542 if (stackLast
<= NSBIDI_MAX_EXPLICIT_LEVEL
) {
543 dirProps
[isolateStartStack
[stackLast
]] = LRI
;
544 flags
|= DIRPROP_FLAG(LRI
);
547 if (stackLast
>= 0) {
548 if (stackLast
<= NSBIDI_MAX_EXPLICIT_LEVEL
) {
549 state
= previousStateStack
[stackLast
];
556 // This shouldn't happen, since we don't support multiple paragraphs.
557 NS_NOTREACHED("Unexpected paragraph separator");
565 /* Ignore still open isolate sequences with overflow */
566 if (stackLast
> NSBIDI_MAX_EXPLICIT_LEVEL
) {
567 stackLast
= NSBIDI_MAX_EXPLICIT_LEVEL
;
568 if (dirProps
[previousStateStack
[NSBIDI_MAX_EXPLICIT_LEVEL
]] != FSI
) {
569 state
= LOOKING_FOR_PDI
;
573 /* Resolve direction of still unresolved open FSI sequences */
574 while (stackLast
>= 0) {
575 if (state
== SEEKING_STRONG_FOR_FSI
) {
576 dirProps
[isolateStartStack
[stackLast
]] = LRI
;
577 flags
|= DIRPROP_FLAG(LRI
);
579 state
= previousStateStack
[stackLast
];
583 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
588 /* perform (X1)..(X9) ------------------------------------------------------- */
591 * Resolve the explicit levels as specified by explicit embedding codes.
592 * Recalculate the flags to have them reflect the real properties
593 * after taking the explicit embeddings into account.
595 * The Bidi algorithm is designed to result in the same behavior whether embedding
596 * levels are externally specified (from "styled text", supposedly the preferred
597 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
598 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
599 * However, in a real implementation, this removal of these codes and their index
600 * positions in the plain text is undesirable since it would result in
601 * reallocated, reindexed text.
602 * Instead, this implementation leaves the codes in there and just ignores them
603 * in the subsequent processing.
604 * In order to get the same reordering behavior, positions with a BN or a not-isolate
605 * explicit embedding code just get the same level assigned as the last "real"
608 * Some implementations, not this one, then overwrite some of these
609 * directionality properties at "real" same-level-run boundaries by
610 * L or R codes so that the resolution of weak types can be performed on the
611 * entire paragraph at once instead of having to parse it once more and
612 * perform that resolution on same-level-runs.
613 * This limits the scope of the implicit rules in effectively
614 * the same way as the run limits.
616 * Instead, this implementation does not modify these codes.
617 * On one hand, the paragraph has to be scanned for same-level-runs, but
618 * on the other hand, this saves another loop to reset these codes,
619 * or saves making and modifying a copy of dirProps[].
622 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
625 * Handling the stack of explicit levels (Xn):
627 * With the Bidi stack of explicit levels, as pushed with each
628 * LRE, RLE, LRO, and RLO, LRI, RLI, and FSI and popped with each PDF and PDI,
629 * the explicit level must never exceed NSBIDI_MAX_EXPLICIT_LEVEL.
631 * In order to have a correct push-pop semantics even in the case of overflows,
632 * overflow counters and a valid isolate counter are used as described in UAX#9
633 * section 3.3.2 "Explicit Levels and Direction".
635 * This implementation assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
638 void nsBidi::ResolveExplicitLevels(nsBidiDirection
*aDirection
)
640 DirProp
*dirProps
=mDirProps
;
641 nsBidiLevel
*levels
=mLevels
;
643 int32_t i
=0, length
=mLength
;
644 Flags flags
=mFlags
; /* collect all directionalities in the text */
646 nsBidiLevel level
=mParaLevel
;
647 nsBidiDirection direction
;
651 /* determine if the text is mixed-directional or single-directional */
652 direction
=DirectionFromFlags(flags
);
654 /* we may not need to resolve any explicit levels */
655 if(direction
!=NSBIDI_MIXED
) {
656 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
657 } else if(!(flags
&(MASK_EXPLICIT
|MASK_ISO
))) {
658 /* no embeddings, set all levels to the paragraph level */
659 for(i
=0; i
<length
; ++i
) {
663 /* continue to perform (Xn) */
665 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
666 /* both variables may carry the NSBIDI_LEVEL_OVERRIDE flag to indicate the override status */
667 nsBidiLevel embeddingLevel
= level
, newLevel
;
668 nsBidiLevel previousLevel
= level
; /* previous level for regular (not CC) characters */
670 uint16_t stack
[NSBIDI_MAX_EXPLICIT_LEVEL
+ 2]; /* we never push anything >=NSBIDI_MAX_EXPLICIT_LEVEL
671 but we need one more entry as base */
672 int32_t stackLast
= 0;
673 int32_t overflowIsolateCount
= 0;
674 int32_t overflowEmbeddingCount
= 0;
675 int32_t validIsolateCount
= 0;
679 /* recalculate the flags */
682 /* since we assume that this is a single paragraph, we ignore (X8) */
683 for(i
=0; i
<length
; ++i
) {
690 /* (X2, X3, X4, X5) */
691 flags
|= DIRPROP_FLAG(BN
);
692 if (dirProp
== LRE
|| dirProp
== LRO
) {
693 newLevel
= (embeddingLevel
+ 2) & ~(NSBIDI_LEVEL_OVERRIDE
| 1); /* least greater even level */
695 newLevel
= ((embeddingLevel
& ~NSBIDI_LEVEL_OVERRIDE
) + 1) | 1; /* least greater odd level */
697 if(newLevel
<= NSBIDI_MAX_EXPLICIT_LEVEL
&& overflowIsolateCount
== 0 && overflowEmbeddingCount
== 0) {
698 embeddingLevel
= newLevel
;
699 if (dirProp
== LRO
|| dirProp
== RLO
) {
700 embeddingLevel
|= NSBIDI_LEVEL_OVERRIDE
;
703 stack
[stackLast
] = embeddingLevel
;
704 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
705 since this has already been done for newLevel which is
706 the source for embeddingLevel.
709 dirProps
[i
] |= IGNORE_CC
;
710 if (overflowIsolateCount
== 0) {
711 overflowEmbeddingCount
++;
718 flags
|= DIRPROP_FLAG(BN
);
719 /* handle all the overflow cases first */
720 if (overflowIsolateCount
) {
721 dirProps
[i
] |= IGNORE_CC
;
724 if (overflowEmbeddingCount
) {
725 dirProps
[i
] |= IGNORE_CC
;
726 overflowEmbeddingCount
--;
729 if (stackLast
> 0 && stack
[stackLast
] < ISOLATE
) { /* not an isolate entry */
731 embeddingLevel
= stack
[stackLast
];
733 dirProps
[i
] |= IGNORE_CC
;
739 if (embeddingLevel
!= previousLevel
) {
740 previousLevel
= embeddingLevel
;
743 flags
|= DIRPROP_FLAG(O_N
) | DIRPROP_FLAG(BN
) | DIRPROP_FLAG_LR(embeddingLevel
);
744 level
= embeddingLevel
;
745 if (dirProp
== LRI
) {
746 newLevel
= (embeddingLevel
+ 2) & ~(NSBIDI_LEVEL_OVERRIDE
| 1); /* least greater even level */
748 newLevel
= ((embeddingLevel
& ~NSBIDI_LEVEL_OVERRIDE
) + 1) | 1; /* least greater odd level */
750 if (newLevel
<= NSBIDI_MAX_EXPLICIT_LEVEL
&& overflowIsolateCount
== 0 && overflowEmbeddingCount
== 0) {
751 previousLevel
= embeddingLevel
;
753 if (validIsolateCount
> mIsolateCount
) {
754 mIsolateCount
= validIsolateCount
;
756 embeddingLevel
= newLevel
;
758 stack
[stackLast
] = embeddingLevel
+ ISOLATE
;
760 dirProps
[i
] |= IGNORE_CC
;
761 overflowIsolateCount
++;
767 if (overflowIsolateCount
) {
768 dirProps
[i
] |= IGNORE_CC
;
769 overflowIsolateCount
--;
770 } else if (validIsolateCount
) {
771 overflowEmbeddingCount
= 0;
772 while (stack
[stackLast
] < ISOLATE
) {
773 /* pop embedding entries */
774 /* until the last isolate entry */
777 // Since validIsolateCount is true, there must be an isolate entry
778 // on the stack, so the stack is guaranteed to not be empty.
779 // Still, to eliminate a warning from coverity, we use an assertion.
780 MOZ_ASSERT(stackLast
> 0);
782 stackLast
--; /* pop also the last isolate entry */
783 MOZ_ASSERT(stackLast
>= 0); // For coverity
786 dirProps
[i
] |= IGNORE_CC
;
788 embeddingLevel
= stack
[stackLast
] & ~ISOLATE
;
789 previousLevel
= level
= embeddingLevel
;
790 flags
|= DIRPROP_FLAG(O_N
) | DIRPROP_FLAG(BN
) | DIRPROP_FLAG_LR(embeddingLevel
);
795 * We do not expect to see a paragraph separator (B),
797 NS_NOTREACHED("Unexpected paragraph separator");
801 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
802 /* they will get their levels set correctly in AdjustWSLevels() */
803 flags
|=DIRPROP_FLAG(BN
);
807 /* all other types get the "real" level */
808 level
= embeddingLevel
;
809 if(embeddingLevel
!= previousLevel
) {
810 previousLevel
= embeddingLevel
;
813 if (level
& NSBIDI_LEVEL_OVERRIDE
) {
814 flags
|= DIRPROP_FLAG_LR(level
);
816 flags
|= DIRPROP_FLAG(dirProp
);
822 * We need to set reasonable levels even on BN codes and
823 * explicit codes because we will later look at same-level runs (X10).
826 if (i
> 0 && levels
[i
- 1] != level
) {
827 flags
|= DIRPROP_FLAG_MULTI_RUNS
;
828 if (level
& NSBIDI_LEVEL_OVERRIDE
) {
829 flags
|= DIRPROP_FLAG_O(level
);
831 flags
|= DIRPROP_FLAG_E(level
);
834 if (DIRPROP_FLAG(dirProp
) & MASK_ISO
) {
835 level
= embeddingLevel
;
839 if(flags
&MASK_EMBEDDING
) {
840 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
843 /* subsequently, ignore the explicit codes and BN (X9) */
845 /* again, determine if the text is mixed-directional or single-directional */
847 direction
=DirectionFromFlags(flags
);
850 *aDirection
= direction
;
854 * Use a pre-specified embedding levels array:
856 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
857 * ignore all explicit codes (X9),
858 * and check all the preset levels.
860 * Recalculate the flags to have them reflect the real properties
861 * after taking the explicit embeddings into account.
863 nsresult
nsBidi::CheckExplicitLevels(nsBidiDirection
*aDirection
)
865 const DirProp
*dirProps
=mDirProps
;
867 nsBidiLevel
*levels
=mLevels
;
868 int32_t isolateCount
= 0;
870 int32_t i
, length
=mLength
;
871 Flags flags
=0; /* collect all directionalities in the text */
872 nsBidiLevel level
, paraLevel
=mParaLevel
;
875 for(i
=0; i
<length
; ++i
) {
877 dirProp
= dirProps
[i
];
878 if (dirProp
== LRI
|| dirProp
== RLI
) {
880 if (isolateCount
> mIsolateCount
) {
881 mIsolateCount
= isolateCount
;
883 } else if (dirProp
== PDI
) {
886 if(level
&NSBIDI_LEVEL_OVERRIDE
) {
887 /* keep the override flag in levels[i] but adjust the flags */
888 level
&=~NSBIDI_LEVEL_OVERRIDE
; /* make the range check below simpler */
889 flags
|=DIRPROP_FLAG_O(level
);
892 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG(dirProp
);
894 if(level
<paraLevel
|| NSBIDI_MAX_EXPLICIT_LEVEL
<level
) {
895 /* level out of bounds */
896 *aDirection
= NSBIDI_LTR
;
897 return NS_ERROR_INVALID_ARG
;
900 if(flags
&MASK_EMBEDDING
) {
901 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
904 /* determine if the text is mixed-directional or single-directional */
906 *aDirection
= DirectionFromFlags(flags
);
910 /* determine if the text is mixed-directional or single-directional */
911 nsBidiDirection
nsBidi::DirectionFromFlags(Flags aFlags
)
913 /* if the text contains AN and neutrals, then some neutrals may become RTL */
914 if(!(aFlags
&MASK_RTL
|| (aFlags
&DIRPROP_FLAG(AN
) && aFlags
&MASK_POSSIBLE_N
))) {
916 } else if(!(aFlags
&MASK_LTR
)) {
923 /******************************************************************
924 The Properties state machine table
925 *******************************************************************
927 All table cells are 8 bits:
928 bits 0..4: next state
929 bits 5..7: action to perform (if > 0)
931 Cells may be of format "n" where n represents the next state
932 (except for the rightmost column).
933 Cells may also be of format "s(x,y)" where x represents an action
934 to perform and y represents the next state.
936 *******************************************************************
937 Definitions and type for properties state table
938 *******************************************************************
940 #define IMPTABPROPS_COLUMNS 16
941 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
942 #define GET_STATEPROPS(cell) ((cell)&0x1f)
943 #define GET_ACTIONPROPS(cell) ((cell)>>5)
945 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
947 static const uint8_t groupProp
[] = /* dirProp regrouped */
949 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
950 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
953 /******************************************************************
955 PROPERTIES STATE TABLE
957 In table impTabProps,
958 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
959 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
960 - the Res column is the reduced property assigned to a run
962 Action 1: process current run1, init new run1
964 3: process run1, process run2, init new run1
965 4: process run1, set run1=run2, init new run2
968 1) This table is used in ResolveImplicitLevels().
969 2) This table triggers actions when there is a change in the Bidi
970 property of incoming characters (action 1).
971 3) Most such property sequences are processed immediately (in
972 fact, passed to ProcessPropertySeq().
973 4) However, numbers are assembled as one sequence. This means
974 that undefined situations (like CS following digits, until
975 it is known if the next char will be a digit) are held until
976 following chars define them.
977 Example: digits followed by CS, then comes another CS or ON;
978 the digits will be processed, then the CS assigned
979 as the start of an ON sequence (action 3).
980 5) There are cases where more than one sequence must be
981 processed, for instance digits followed by CS followed by L:
982 the digits must be processed as one sequence, and the CS
983 must be processed as an ON sequence, all this before starting
984 assembling chars for the opening L sequence.
988 static const uint8_t impTabProps
[][IMPTABPROPS_COLUMNS
] =
990 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
991 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON
},
992 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L
},
993 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R
},
994 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R
},
995 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN
},
996 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN
},
997 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN
},
998 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON
},
999 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON
},
1000 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON
},
1001 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN
},
1002 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN
},
1003 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN
},
1004 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN
},
1005 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON
},
1006 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S
},
1007 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S
},
1008 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B
},
1009 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L
},
1010 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L
},
1011 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L
},
1012 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN
},
1013 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN
},
1014 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN
}
1017 /* we must undef macro s because the levels table have a different
1018 * structure (4 bits for action and 4 bits for next state.
1022 /******************************************************************
1023 The levels state machine tables
1024 *******************************************************************
1026 All table cells are 8 bits:
1027 bits 0..3: next state
1028 bits 4..7: action to perform (if > 0)
1030 Cells may be of format "n" where n represents the next state
1031 (except for the rightmost column).
1032 Cells may also be of format "s(x,y)" where x represents an action
1033 to perform and y represents the next state.
1035 This format limits each table to 16 states each and to 15 actions.
1037 *******************************************************************
1038 Definitions and type for levels state tables
1039 *******************************************************************
1041 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1042 #define GET_STATE(cell) ((cell)&0x0f)
1043 #define GET_ACTION(cell) ((cell)>>4)
1044 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1046 /******************************************************************
1050 In all levels state tables,
1051 - state 0 is the initial state
1052 - the Res column is the increment to add to the text level
1053 for this property sequence.
1055 The impAct arrays for each table of a pair map the local action
1056 numbers of the table to the total list of actions. For instance,
1057 action 2 in a given table corresponds to the action number which
1058 appears in entry [2] of the impAct array for that table.
1059 The first entry of all impAct arrays must be 0.
1061 Action 1: init conditional sequence
1062 2: prepend conditional sequence to current sequence
1063 3: set ON sequence to new level - 1
1064 4: init EN/AN/ON sequence
1065 5: fix EN/AN/ON sequence followed by R
1066 6: set previous level sequence to level 2
1069 1) These tables are used in ProcessPropertySeq(). The input
1070 is property sequences as determined by ResolveImplicitLevels.
1071 2) Most such property sequences are processed immediately
1072 (levels are assigned).
1073 3) However, some sequences cannot be assigned a final level till
1074 one or more following sequences are received. For instance,
1075 ON following an R sequence within an even-level paragraph.
1076 If the following sequence is R, the ON sequence will be
1077 assigned basic run level+1, and so will the R sequence.
1078 4) S is generally handled like ON, since its level will be fixed
1079 to paragraph level in AdjustWSLevels().
1083 static const ImpTab impTabL
= /* Even paragraph level */
1084 /* In this table, conditional sequences receive the higher possible level
1085 until proven otherwise.
1088 /* L , R , EN , AN , ON , S , B , Res */
1089 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1090 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1091 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1092 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
1093 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
1094 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
1096 static const ImpTab impTabR
= /* Odd paragraph level */
1097 /* In this table, conditional sequences receive the lower possible level
1098 until proven otherwise.
1101 /* L , R , EN , AN , ON , S , B , Res */
1102 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1103 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1104 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1105 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1106 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1107 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1112 static ImpAct impAct0
= {0,1,2,3,4,5,6};
1113 static PImpTab impTab
[2] = {impTabL
, impTabR
};
1115 /*------------------------------------------------------------------------*/
1117 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1120 * This implementation of the (Wn) rules applies all rules in one pass.
1121 * In order to do so, it needs a look-ahead of typically 1 character
1122 * (except for W5: sequences of ET) and keeps track of changes
1123 * in a rule Wp that affect a later Wq (p<q).
1125 * The (Nn) and (In) rules are also performed in that same single loop,
1126 * but effectively one iteration behind for white space.
1128 * Since all implicit rules are performed in one step, it is not necessary
1129 * to actually store the intermediate directional properties in dirProps[].
1132 void nsBidi::ProcessPropertySeq(LevState
*pLevState
, uint8_t _prop
, int32_t start
, int32_t limit
)
1134 uint8_t cell
, oldStateSeq
, actionSeq
;
1135 PImpTab pImpTab
= pLevState
->pImpTab
;
1136 PImpAct pImpAct
= pLevState
->pImpAct
;
1137 nsBidiLevel
* levels
= mLevels
;
1138 nsBidiLevel level
, addLevel
;
1141 start0
= start
; /* save original start position */
1142 oldStateSeq
= (uint8_t)pLevState
->state
;
1143 cell
= pImpTab
[oldStateSeq
][_prop
];
1144 pLevState
->state
= GET_STATE(cell
); /* isolate the new state */
1145 actionSeq
= pImpAct
[GET_ACTION(cell
)]; /* isolate the action */
1146 addLevel
= pImpTab
[pLevState
->state
][IMPTABLEVELS_RES
];
1150 case 1: /* init ON seq */
1151 pLevState
->startON
= start0
;
1154 case 2: /* prepend ON seq to current seq */
1155 start
= pLevState
->startON
;
1158 default: /* we should never get here */
1163 if(addLevel
|| (start
< start0
)) {
1164 level
= pLevState
->runLevel
+ addLevel
;
1165 if (start
>= pLevState
->runStart
) {
1166 for (k
= start
; k
< limit
; k
++) {
1170 DirProp
*dirProps
= mDirProps
, dirProp
;
1171 int32_t isolateCount
= 0;
1172 for (k
= start
; k
< limit
; k
++) {
1173 dirProp
= dirProps
[k
];
1174 if (dirProp
== PDI
) {
1177 if (isolateCount
== 0) {
1180 if (dirProp
== LRI
|| dirProp
== RLI
) {
1188 void nsBidi::ResolveImplicitLevels(int32_t aStart
, int32_t aLimit
,
1189 DirProp aSOR
, DirProp aEOR
)
1191 const DirProp
*dirProps
= mDirProps
;
1194 int32_t i
, start1
, start2
;
1195 uint16_t oldStateImp
, stateImp
, actionImp
;
1196 uint8_t gprop
, resProp
, cell
;
1198 /* initialize for property and levels state tables */
1199 levState
.startON
= -1;
1200 levState
.runStart
= aStart
;
1201 levState
.runLevel
= mLevels
[aStart
];
1202 levState
.pImpTab
= impTab
[levState
.runLevel
& 1];
1203 levState
.pImpAct
= impAct0
;
1205 /* The isolates[] entries contain enough information to
1206 resume the bidi algorithm in the same state as it was
1207 when it was interrupted by an isolate sequence. */
1208 if (dirProps
[aStart
] == PDI
) {
1209 start1
= mIsolates
[mIsolateCount
].start1
;
1210 stateImp
= mIsolates
[mIsolateCount
].stateImp
;
1211 levState
.state
= mIsolates
[mIsolateCount
].state
;
1215 if (dirProps
[aStart
] == NSM
) {
1216 stateImp
= 1 + aSOR
;
1221 ProcessPropertySeq(&levState
, aSOR
, aStart
, aStart
);
1225 for (i
= aStart
; i
<= aLimit
; i
++) {
1227 if (aLimit
> aStart
) {
1228 dirProp
= mDirProps
[aLimit
- 1];
1229 if (dirProp
== LRI
|| dirProp
== RLI
) {
1230 break; /* no forced closing for sequence ending with LRI/RLI */
1236 prop
= PURE_DIRPROP(dirProps
[i
]);
1237 gprop
= groupProp
[prop
];
1239 oldStateImp
= stateImp
;
1240 cell
= impTabProps
[oldStateImp
][gprop
];
1241 stateImp
= GET_STATEPROPS(cell
); /* isolate the new state */
1242 actionImp
= GET_ACTIONPROPS(cell
); /* isolate the action */
1243 if ((i
== aLimit
) && (actionImp
== 0)) {
1244 /* there is an unprocessed sequence if its property == eor */
1245 actionImp
= 1; /* process the last sequence */
1248 resProp
= impTabProps
[oldStateImp
][IMPTABPROPS_RES
];
1249 switch (actionImp
) {
1250 case 1: /* process current seq1, init new seq1 */
1251 ProcessPropertySeq(&levState
, resProp
, start1
, i
);
1254 case 2: /* init new seq2 */
1257 case 3: /* process seq1, process seq2, init new seq1 */
1258 ProcessPropertySeq(&levState
, resProp
, start1
, start2
);
1259 ProcessPropertySeq(&levState
, DirProp_ON
, start2
, i
);
1262 case 4: /* process seq1, set seq1=seq2, init new seq2 */
1263 ProcessPropertySeq(&levState
, resProp
, start1
, start2
);
1267 default: /* we should never get here */
1274 dirProp
= dirProps
[aLimit
- 1];
1275 if ((dirProp
== LRI
|| dirProp
== RLI
) && aLimit
< mLength
) {
1277 mIsolates
[mIsolateCount
].stateImp
= stateImp
;
1278 mIsolates
[mIsolateCount
].state
= levState
.state
;
1279 mIsolates
[mIsolateCount
].start1
= start1
;
1281 ProcessPropertySeq(&levState
, aEOR
, aLimit
, aLimit
);
1286 /* perform (L1) and (X9) ---------------------------------------------------- */
1289 * Reset the embedding levels for some non-graphic characters (L1).
1290 * This function also sets appropriate levels for BN, and
1291 * explicit embedding types that are supposed to have been removed
1292 * from the paragraph in (X9).
1294 void nsBidi::AdjustWSLevels()
1296 const DirProp
*dirProps
=mDirProps
;
1297 nsBidiLevel
*levels
=mLevels
;
1300 if(mFlags
&MASK_WS
) {
1301 nsBidiLevel paraLevel
=mParaLevel
;
1306 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1307 while (i
> 0 && DIRPROP_FLAG(PURE_DIRPROP(dirProps
[--i
])) & MASK_WS
) {
1308 levels
[i
]=paraLevel
;
1311 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1312 /* here, i+1 is guaranteed to be <length */
1314 flag
= DIRPROP_FLAG(PURE_DIRPROP(dirProps
[--i
]));
1315 if(flag
&MASK_BN_EXPLICIT
) {
1316 levels
[i
]=levels
[i
+1];
1317 } else if(flag
&MASK_B_S
) {
1318 levels
[i
]=paraLevel
;
1326 nsresult
nsBidi::GetDirection(nsBidiDirection
* aDirection
)
1328 *aDirection
= mDirection
;
1332 nsresult
nsBidi::GetParaLevel(nsBidiLevel
* aParaLevel
)
1334 *aParaLevel
= mParaLevel
;
1337 #ifdef FULL_BIDI_ENGINE
1339 /* -------------------------------------------------------------------------- */
1341 nsresult
nsBidi::GetLength(int32_t* aLength
)
1348 * General remarks about the functions in this section:
1350 * These functions deal with the aspects of potentially mixed-directional
1351 * text in a single paragraph or in a line of a single paragraph
1352 * which has already been processed according to
1353 * the Unicode 6.3 Bidi algorithm as defined in
1354 * http://www.unicode.org/unicode/reports/tr9/ , version 28,
1355 * also described in The Unicode Standard, Version 6.3.0 .
1357 * This means that there is a nsBidi object with a levels
1358 * and a dirProps array.
1359 * paraLevel and direction are also set.
1360 * Only if the length of the text is zero, then levels==dirProps==nullptr.
1362 * The overall directionality of the paragraph
1363 * or line is used to bypass the reordering steps if possible.
1364 * Even purely RTL text does not need reordering there because
1365 * the getLogical/VisualIndex() functions can compute the
1366 * index on the fly in such a case.
1368 * The implementation of the access to same-level-runs and of the reordering
1369 * do attempt to provide better performance and less memory usage compared to
1370 * a direct implementation of especially rule (L2) with an array of
1371 * one (32-bit) integer per text character.
1373 * Here, the levels array is scanned as soon as necessary, and a vector of
1374 * same-level-runs is created. Reordering then is done on this vector.
1375 * For each run of text positions that were resolved to the same level,
1376 * only 8 bytes are stored: the first text position of the run and the visual
1377 * position behind the run after reordering.
1378 * One sign bit is used to hold the directionality of the run.
1379 * This is inefficient if there are many very short runs. If the average run
1380 * length is <2, then this uses more memory.
1382 * In a further attempt to save memory, the levels array is never changed
1383 * after all the resolution rules (Xn, Wn, Nn, In).
1384 * Many functions have to consider the field trailingWSStart:
1385 * if it is less than length, then there is an implicit trailing run
1387 * which is not reflected in the levels array.
1388 * This allows a line nsBidi object to use the same levels array as
1389 * its paragraph parent object.
1391 * When a nsBidi object is created for a line of a paragraph, then the
1392 * paragraph's levels and dirProps arrays are reused by way of setting
1393 * a pointer into them, not by copying. This again saves memory and forbids to
1394 * change the now shared levels for (L1).
1396 nsresult
nsBidi::SetLine(const nsBidi
* aParaBidi
, int32_t aStart
, int32_t aLimit
)
1398 nsBidi
* pParent
= (nsBidi
*)aParaBidi
;
1401 /* check the argument values */
1402 if(pParent
==nullptr) {
1403 return NS_ERROR_INVALID_POINTER
;
1404 } else if(aStart
< 0 || aStart
>= aLimit
|| aLimit
> pParent
->mLength
) {
1405 return NS_ERROR_INVALID_ARG
;
1408 /* set members from our aParaBidi parent */
1409 length
= mLength
= aLimit
- aStart
;
1410 mParaLevel
=pParent
->mParaLevel
;
1415 mDirProps
=pParent
->mDirProps
+aStart
;
1416 mLevels
=pParent
->mLevels
+aStart
;
1419 if(pParent
->mDirection
!=NSBIDI_MIXED
) {
1420 /* the parent is already trivial */
1421 mDirection
=pParent
->mDirection
;
1424 * The parent's levels are all either
1425 * implicitly or explicitly ==paraLevel;
1428 if(pParent
->mTrailingWSStart
<=aStart
) {
1430 } else if(pParent
->mTrailingWSStart
<aLimit
) {
1431 mTrailingWSStart
=pParent
->mTrailingWSStart
-aStart
;
1433 mTrailingWSStart
=length
;
1436 const nsBidiLevel
*levels
=mLevels
;
1437 int32_t i
, trailingWSStart
;
1440 SetTrailingWSStart();
1441 trailingWSStart
=mTrailingWSStart
;
1443 /* recalculate pLineBidi->direction */
1444 if(trailingWSStart
==0) {
1445 /* all levels are at paraLevel */
1446 mDirection
=(nsBidiDirection
)(mParaLevel
&1);
1448 /* get the level of the first character */
1451 /* if there is anything of a different level, then the line is mixed */
1452 if(trailingWSStart
<length
&& (mParaLevel
&1)!=level
) {
1453 /* the trailing WS is at paraLevel, which differs from levels[0] */
1454 mDirection
=NSBIDI_MIXED
;
1456 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
1459 if(i
==trailingWSStart
) {
1460 /* the direction values match those in level */
1461 mDirection
=(nsBidiDirection
)level
;
1463 } else if((levels
[i
]&1)!=level
) {
1464 mDirection
=NSBIDI_MIXED
;
1472 switch(mDirection
) {
1474 /* make sure paraLevel is even */
1475 mParaLevel
=(mParaLevel
+1)&~1;
1477 /* all levels are implicitly at paraLevel (important for GetLevels()) */
1481 /* make sure paraLevel is odd */
1484 /* all levels are implicitly at paraLevel (important for GetLevels()) */
1494 /* handle trailing WS (L1) -------------------------------------------------- */
1497 * SetTrailingWSStart() sets the start index for a trailing
1498 * run of WS in the line. This is necessary because we do not modify
1499 * the paragraph's levels array that we just point into.
1500 * Using trailingWSStart is another form of performing (L1).
1502 * To make subsequent operations easier, we also include the run
1503 * before the WS if it is at the paraLevel - we merge the two here.
1505 void nsBidi::SetTrailingWSStart() {
1506 /* mDirection!=NSBIDI_MIXED */
1508 const DirProp
*dirProps
=mDirProps
;
1509 nsBidiLevel
*levels
=mLevels
;
1510 int32_t start
=mLength
;
1511 nsBidiLevel paraLevel
=mParaLevel
;
1513 /* go backwards across all WS, BN, explicit codes */
1514 while(start
>0 && DIRPROP_FLAG(dirProps
[start
-1])&MASK_WS
) {
1518 /* if the WS run can be merged with the previous run then do so here */
1519 while(start
>0 && levels
[start
-1]==paraLevel
) {
1523 mTrailingWSStart
=start
;
1526 nsresult
nsBidi::GetLevelAt(int32_t aCharIndex
, nsBidiLevel
* aLevel
)
1528 /* return paraLevel if in the trailing WS run, otherwise the real level */
1529 if(aCharIndex
<0 || mLength
<=aCharIndex
) {
1531 } else if(mDirection
!=NSBIDI_MIXED
|| aCharIndex
>=mTrailingWSStart
) {
1532 *aLevel
= mParaLevel
;
1534 *aLevel
= mLevels
[aCharIndex
];
1539 nsresult
nsBidi::GetLevels(nsBidiLevel
** aLevels
)
1541 int32_t start
, length
;
1546 return NS_ERROR_INVALID_ARG
;
1549 start
= mTrailingWSStart
;
1551 /* the current levels array reflects the WS run */
1557 * After the previous if(), we know that the levels array
1558 * has an implicit trailing WS run and therefore does not fully
1559 * reflect itself all the levels.
1560 * This must be a nsBidi object for a line, and
1561 * we need to create a new levels array.
1564 if(GETLEVELSMEMORY(length
)) {
1565 nsBidiLevel
*levels
=mLevelsMemory
;
1567 if(start
>0 && levels
!=mLevels
) {
1568 memcpy(levels
, mLevels
, start
);
1570 memset(levels
+start
, mParaLevel
, length
-start
);
1572 /* this new levels array is set for the line and reflects the WS run */
1573 mTrailingWSStart
=length
;
1574 *aLevels
=mLevels
=levels
;
1579 return NS_ERROR_OUT_OF_MEMORY
;
1582 #endif // FULL_BIDI_ENGINE
1584 nsresult
nsBidi::GetCharTypeAt(int32_t aCharIndex
, nsCharType
* pType
)
1586 if(aCharIndex
<0 || mLength
<=aCharIndex
) {
1587 return NS_ERROR_INVALID_ARG
;
1589 *pType
= (nsCharType
)mDirProps
[aCharIndex
];
1593 nsresult
nsBidi::GetLogicalRun(int32_t aLogicalStart
, int32_t *aLogicalLimit
, nsBidiLevel
*aLevel
)
1595 int32_t length
= mLength
;
1597 if(aLogicalStart
<0 || length
<=aLogicalStart
) {
1598 return NS_ERROR_INVALID_ARG
;
1601 int32_t runCount
, visualStart
, logicalLimit
, logicalFirst
, i
;
1604 /* CountRuns will check VALID_PARA_OR_LINE */
1605 nsresult rv
= CountRuns(&runCount
);
1606 if (NS_FAILED(rv
)) {
1610 visualStart
= logicalLimit
= 0;
1613 for (i
= 0; i
< runCount
; i
++) {
1615 logicalFirst
= GET_INDEX(iRun
.logicalStart
);
1616 logicalLimit
= logicalFirst
+ iRun
.visualLimit
- visualStart
;
1617 if ((aLogicalStart
>= logicalFirst
) && (aLogicalStart
< logicalLimit
)) {
1620 visualStart
= iRun
.visualLimit
;
1622 if (aLogicalLimit
) {
1623 *aLogicalLimit
= logicalLimit
;
1626 if (mDirection
!= NSBIDI_MIXED
|| aLogicalStart
>= mTrailingWSStart
) {
1627 *aLevel
= mParaLevel
;
1629 *aLevel
= mLevels
[aLogicalStart
];
1635 /* runs API functions ------------------------------------------------------- */
1637 nsresult
nsBidi::CountRuns(int32_t* aRunCount
)
1639 if(mRunCount
<0 && !GetRuns()) {
1640 return NS_ERROR_OUT_OF_MEMORY
;
1643 *aRunCount
= mRunCount
;
1648 nsresult
nsBidi::GetVisualRun(int32_t aRunIndex
, int32_t *aLogicalStart
, int32_t *aLength
, nsBidiDirection
*aDirection
)
1651 (mRunCount
==-1 && !GetRuns()) ||
1652 aRunIndex
>=mRunCount
1654 *aDirection
= NSBIDI_LTR
;
1657 int32_t start
=mRuns
[aRunIndex
].logicalStart
;
1658 if(aLogicalStart
!=nullptr) {
1659 *aLogicalStart
=GET_INDEX(start
);
1661 if(aLength
!=nullptr) {
1663 *aLength
=mRuns
[aRunIndex
].visualLimit
-
1664 mRuns
[aRunIndex
-1].visualLimit
;
1666 *aLength
=mRuns
[0].visualLimit
;
1669 *aDirection
= (nsBidiDirection
)GET_ODD_BIT(start
);
1674 /* compute the runs array --------------------------------------------------- */
1677 * Compute the runs array from the levels array.
1678 * After GetRuns() returns true, runCount is guaranteed to be >0
1679 * and the runs are reordered.
1680 * Odd-level runs have visualStart on their visual right edge and
1681 * they progress visually to the left.
1683 bool nsBidi::GetRuns()
1686 * This method returns immediately if the runs are already set. This
1687 * includes the case of length==0 (handled in setPara)..
1689 if (mRunCount
>= 0) {
1693 if(mDirection
!=NSBIDI_MIXED
) {
1694 /* simple, single-run case - this covers length==0 */
1695 GetSingleRun(mParaLevel
);
1696 } else /* NSBIDI_MIXED, length>0 */ {
1697 /* mixed directionality */
1698 int32_t length
=mLength
, limit
=mTrailingWSStart
;
1701 * If there are WS characters at the end of the line
1702 * and the run preceding them has a level different from
1703 * paraLevel, then they will form their own run at paraLevel (L1).
1704 * Count them separately.
1705 * We need some special treatment for this in order to not
1706 * modify the levels array which a line nsBidi object shares
1707 * with its paragraph parent and its other line siblings.
1708 * In other words, for the trailing WS, it may be
1709 * levels[]!=paraLevel but we have to treat it like it were so.
1711 nsBidiLevel
*levels
=mLevels
;
1712 int32_t i
, runCount
;
1713 nsBidiLevel level
=NSBIDI_DEFAULT_LTR
; /* initialize with no valid level */
1715 /* count the runs, there is at least one non-WS run, and limit>0 */
1717 for(i
=0; i
<limit
; ++i
) {
1718 /* increment runCount at the start of each run */
1719 if(levels
[i
]!=level
) {
1726 * We don't need to see if the last run can be merged with a trailing
1727 * WS run because SetTrailingWSStart() would have done that.
1729 if(runCount
==1 && limit
==length
) {
1730 /* There is only one non-WS run and no trailing WS-run. */
1731 GetSingleRun(levels
[0]);
1732 } else /* runCount>1 || limit<length */ {
1733 /* allocate and set the runs */
1735 int32_t runIndex
, start
;
1736 nsBidiLevel minLevel
=NSBIDI_MAX_EXPLICIT_LEVEL
+1, maxLevel
=0;
1738 /* now, count a (non-mergable) WS run */
1744 if(GETRUNSMEMORY(runCount
)) {
1751 /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */
1752 /* however, that would take longer and make other functions more complicated */
1755 /* search for the run ends */
1758 /* prepare this run */
1761 if(level
<minLevel
) {
1764 if(level
>maxLevel
) {
1768 /* look for the run limit */
1769 while (++i
< limit
&& levels
[i
] == level
) {
1772 /* i is another run limit */
1773 runs
[runIndex
].logicalStart
= start
;
1774 runs
[runIndex
].visualLimit
= i
- start
;
1776 } while (i
< limit
);
1779 /* there is a separate WS run */
1780 runs
[runIndex
].logicalStart
=limit
;
1781 runs
[runIndex
].visualLimit
=length
-limit
;
1782 if(mParaLevel
<minLevel
) {
1783 minLevel
=mParaLevel
;
1787 /* set the object fields */
1791 ReorderLine(minLevel
, maxLevel
);
1793 /* now add the direction flags and adjust the visualLimit's to be just that */
1794 /* this loop will also handling the trailing WS run */
1796 for (i
= 0; i
< runCount
; ++i
) {
1797 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, levels
[runs
[i
].logicalStart
]);
1798 limit
+= runs
[i
].visualLimit
;
1799 runs
[i
].visualLimit
= limit
;
1802 /* Set the "odd" bit for the trailing WS run. */
1803 /* For a RTL paragraph, it will be the *first* run in visual order. */
1804 if (runIndex
< runCount
) {
1805 int32_t trailingRun
= (mParaLevel
& 1) ? 0 : runIndex
;
1806 ADD_ODD_BIT_FROM_LEVEL(runs
[trailingRun
].logicalStart
, mParaLevel
);
1814 /* in trivial cases there is only one trivial run; called by GetRuns() */
1815 void nsBidi::GetSingleRun(nsBidiLevel aLevel
)
1817 /* simple, single-run case */
1821 /* fill and reorder the single run */
1822 mRuns
[0].logicalStart
=MAKE_INDEX_ODD_PAIR(0, aLevel
);
1823 mRuns
[0].visualLimit
=mLength
;
1826 /* reorder the runs array (L2) ---------------------------------------------- */
1829 * Reorder the same-level runs in the runs array.
1830 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
1831 * All the visualStart fields=logical start before reordering.
1832 * The "odd" bits are not set yet.
1834 * Reordering with this data structure lends itself to some handy shortcuts:
1836 * Since each run is moved but not modified, and since at the initial maxLevel
1837 * each sequence of same-level runs consists of only one run each, we
1838 * don't need to do anything there and can predecrement maxLevel.
1839 * In many simple cases, the reordering is thus done entirely in the
1841 * Also, reordering occurs only down to the lowest odd level that occurs,
1842 * which is minLevel|1. However, if the lowest level itself is odd, then
1843 * in the last reordering the sequence of the runs at this level or higher
1844 * will be all runs, and we don't need the elaborate loop to search for them.
1845 * This is covered by ++minLevel instead of minLevel|=1 followed
1846 * by an extra reorder-all after the reorder-some loop.
1847 * About a trailing WS run:
1848 * Such a run would need special treatment because its level is not
1849 * reflected in levels[] if this is not a paragraph object.
1850 * Instead, all characters from trailingWSStart on are implicitly at
1852 * However, for all maxLevel>paraLevel, this run will never be reordered
1853 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
1854 * if minLevel==paraLevel is odd, which is done in the extra segment.
1855 * This means that for the main reordering loop we don't need to consider
1856 * this run and can --runCount. If it is later part of the all-runs
1857 * reordering, then runCount is adjusted accordingly.
1859 void nsBidi::ReorderLine(nsBidiLevel aMinLevel
, nsBidiLevel aMaxLevel
)
1862 nsBidiLevel
*levels
;
1863 int32_t firstRun
, endRun
, limitRun
, runCount
;
1865 /* nothing to do? */
1866 if(aMaxLevel
<=(aMinLevel
|1)) {
1871 * Reorder only down to the lowest odd level
1872 * and reorder at an odd aMinLevel in a separate, simpler loop.
1873 * See comments above for why aMinLevel is always incremented.
1881 /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */
1882 if(mTrailingWSStart
<mLength
) {
1886 while(--aMaxLevel
>=aMinLevel
) {
1889 /* loop for all sequences of runs */
1891 /* look for a sequence of runs that are all at >=aMaxLevel */
1892 /* look for the first run of such a sequence */
1893 while(firstRun
<runCount
&& levels
[runs
[firstRun
].logicalStart
]<aMaxLevel
) {
1896 if(firstRun
>=runCount
) {
1897 break; /* no more such runs */
1900 /* look for the limit run of such a sequence (the run behind it) */
1901 for(limitRun
=firstRun
; ++limitRun
<runCount
&& levels
[runs
[limitRun
].logicalStart
]>=aMaxLevel
;) {}
1903 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
1905 while(firstRun
<endRun
) {
1906 tempRun
= runs
[firstRun
];
1907 runs
[firstRun
] = runs
[endRun
];
1908 runs
[endRun
] = tempRun
;
1913 if(limitRun
==runCount
) {
1914 break; /* no more such runs */
1916 firstRun
=limitRun
+1;
1921 /* now do aMaxLevel==old aMinLevel (==odd!), see above */
1922 if(!(aMinLevel
&1)) {
1925 /* include the trailing WS run in this complete reordering */
1926 if(mTrailingWSStart
==mLength
) {
1930 /* Swap the entire sequence of all runs. (endRun==runCount) */
1931 while(firstRun
<runCount
) {
1932 tempRun
= runs
[firstRun
];
1933 runs
[firstRun
] = runs
[runCount
];
1934 runs
[runCount
] = tempRun
;
1941 nsresult
nsBidi::ReorderVisual(const nsBidiLevel
*aLevels
, int32_t aLength
, int32_t *aIndexMap
)
1943 int32_t start
, end
, limit
, temp
;
1944 nsBidiLevel minLevel
, maxLevel
;
1946 if(aIndexMap
==nullptr ||
1947 !PrepareReorder(aLevels
, aLength
, aIndexMap
, &minLevel
, &maxLevel
)) {
1951 /* nothing to do? */
1952 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
1956 /* reorder only down to the lowest odd level */
1959 /* loop maxLevel..minLevel */
1963 /* loop for all sequences of levels to reorder at the current maxLevel */
1965 /* look for a sequence of levels that are all at >=maxLevel */
1966 /* look for the first index of such a sequence */
1967 while(start
<aLength
&& aLevels
[start
]<maxLevel
) {
1970 if(start
>=aLength
) {
1971 break; /* no more such runs */
1974 /* look for the limit of such a sequence (the index behind it) */
1975 for(limit
=start
; ++limit
<aLength
&& aLevels
[limit
]>=maxLevel
;) {}
1978 * Swap the entire interval of indexes from start to limit-1.
1979 * We don't need to swap the levels for the purpose of this
1980 * algorithm: the sequence of levels that we look at does not
1985 temp
=aIndexMap
[start
];
1986 aIndexMap
[start
]=aIndexMap
[end
];
1987 aIndexMap
[end
]=temp
;
1993 if(limit
==aLength
) {
1994 break; /* no more such sequences */
1999 } while(--maxLevel
>=minLevel
);
2004 bool nsBidi::PrepareReorder(const nsBidiLevel
*aLevels
, int32_t aLength
,
2006 nsBidiLevel
*aMinLevel
, nsBidiLevel
*aMaxLevel
)
2009 nsBidiLevel level
, minLevel
, maxLevel
;
2011 if(aLevels
==nullptr || aLength
<=0) {
2015 /* determine minLevel and maxLevel */
2016 minLevel
=NSBIDI_MAX_EXPLICIT_LEVEL
+1;
2018 for(start
=aLength
; start
>0;) {
2019 level
=aLevels
[--start
];
2020 if(level
>NSBIDI_MAX_EXPLICIT_LEVEL
+1) {
2023 if(level
<minLevel
) {
2026 if(level
>maxLevel
) {
2030 *aMinLevel
=minLevel
;
2031 *aMaxLevel
=maxLevel
;
2033 /* initialize the index map */
2034 for(start
=aLength
; start
>0;) {
2036 aIndexMap
[start
]=start
;
2042 #ifdef FULL_BIDI_ENGINE
2043 /* API functions for logical<->visual mapping ------------------------------- */
2045 nsresult
nsBidi::GetVisualIndex(int32_t aLogicalIndex
, int32_t* aVisualIndex
) {
2046 int32_t visualIndex
= NSBIDI_MAP_NOWHERE
;
2048 if(aLogicalIndex
<0 || mLength
<=aLogicalIndex
) {
2049 return NS_ERROR_INVALID_ARG
;
2051 /* we can do the trivial cases without the runs array */
2052 switch(mDirection
) {
2054 *aVisualIndex
= aLogicalIndex
;
2057 *aVisualIndex
= mLength
-aLogicalIndex
-1;
2060 if(mRunCount
<0 && !GetRuns()) {
2061 return NS_ERROR_OUT_OF_MEMORY
;
2064 int32_t i
, visualStart
=0, offset
, length
;
2066 /* linear search for the run, search on the visual runs */
2067 for (i
= 0; i
< mRunCount
; ++i
) {
2068 length
=runs
[i
].visualLimit
-visualStart
;
2069 offset
=aLogicalIndex
-GET_INDEX(runs
[i
].logicalStart
);
2070 if(offset
>=0 && offset
<length
) {
2071 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
2073 visualIndex
= visualStart
+ offset
;
2076 visualIndex
= visualStart
+ length
- offset
- 1;
2080 visualStart
+=length
;
2082 if (i
>= mRunCount
) {
2083 *aVisualIndex
= NSBIDI_MAP_NOWHERE
;
2090 *aVisualIndex
= visualIndex
;
2094 nsresult
nsBidi::GetLogicalIndex(int32_t aVisualIndex
, int32_t *aLogicalIndex
)
2096 if(aVisualIndex
<0 || mLength
<=aVisualIndex
) {
2097 return NS_ERROR_INVALID_ARG
;
2100 /* we can do the trivial cases without the runs array */
2101 if (mDirection
== NSBIDI_LTR
) {
2102 *aLogicalIndex
= aVisualIndex
;
2104 } else if (mDirection
== NSBIDI_RTL
) {
2105 *aLogicalIndex
= mLength
- aVisualIndex
- 1;
2109 if(mRunCount
<0 && !GetRuns()) {
2110 return NS_ERROR_OUT_OF_MEMORY
;
2114 int32_t i
, runCount
=mRunCount
, start
;
2117 /* linear search for the run */
2118 for(i
=0; aVisualIndex
>=runs
[i
].visualLimit
; ++i
) {}
2120 /* binary search for the run */
2121 int32_t start
=0, limit
=runCount
;
2123 /* the middle if() will guaranteed find the run, we don't need a loop limit */
2126 if(aVisualIndex
>=runs
[i
].visualLimit
) {
2128 } else if(i
==0 || aVisualIndex
>=runs
[i
-1].visualLimit
) {
2136 start
=runs
[i
].logicalStart
;
2137 if(IS_EVEN_RUN(start
)) {
2139 /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */
2141 aVisualIndex
-=runs
[i
-1].visualLimit
;
2143 *aLogicalIndex
= GET_INDEX(start
)+aVisualIndex
;
2147 *aLogicalIndex
= GET_INDEX(start
)+runs
[i
].visualLimit
-aVisualIndex
-1;
2152 nsresult
nsBidi::GetLogicalMap(int32_t *aIndexMap
)
2156 /* CountRuns() checks for VALID_PARA_OR_LINE */
2157 rv
= CountRuns(nullptr);
2160 } else if(aIndexMap
==nullptr) {
2161 return NS_ERROR_INVALID_ARG
;
2163 /* fill a logical-to-visual index map using the runs[] */
2164 int32_t visualStart
, visualLimit
, j
;
2165 int32_t logicalStart
;
2172 for (j
= 0; j
< mRunCount
; ++j
) {
2173 logicalStart
= GET_INDEX(runs
[j
].logicalStart
);
2174 visualLimit
= runs
[j
].visualLimit
;
2175 if (IS_EVEN_RUN(runs
[j
].logicalStart
)) {
2177 aIndexMap
[logicalStart
++] = visualStart
++;
2178 } while (visualStart
< visualLimit
);
2180 logicalStart
+= visualLimit
-visualStart
; /* logicalLimit */
2182 aIndexMap
[--logicalStart
] = visualStart
++;
2183 } while (visualStart
< visualLimit
);
2185 /* visualStart==visualLimit; */
2191 nsresult
nsBidi::GetVisualMap(int32_t *aIndexMap
)
2193 int32_t* runCount
=nullptr;
2196 if(aIndexMap
==nullptr) {
2197 return NS_ERROR_INVALID_ARG
;
2200 /* CountRuns() checks all of its and our arguments */
2201 rv
= CountRuns(runCount
);
2205 /* fill a visual-to-logical index map using the runs[] */
2206 Run
*runs
=mRuns
, *runsLimit
=runs
+mRunCount
;
2207 int32_t logicalStart
, visualStart
, visualLimit
;
2210 for(; runs
<runsLimit
; ++runs
) {
2211 logicalStart
=runs
->logicalStart
;
2212 visualLimit
=runs
->visualLimit
;
2213 if(IS_EVEN_RUN(logicalStart
)) {
2215 *aIndexMap
++ = logicalStart
++;
2216 } while(++visualStart
<visualLimit
);
2218 REMOVE_ODD_BIT(logicalStart
);
2219 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
2221 *aIndexMap
++ = --logicalStart
;
2222 } while(++visualStart
<visualLimit
);
2224 /* visualStart==visualLimit; */
2230 /* reorder a line based on a levels array (L2) ------------------------------ */
2232 nsresult
nsBidi::ReorderLogical(const nsBidiLevel
*aLevels
, int32_t aLength
, int32_t *aIndexMap
)
2234 int32_t start
, limit
, sumOfSosEos
;
2235 nsBidiLevel minLevel
, maxLevel
;
2237 if(aIndexMap
==nullptr ||
2238 !PrepareReorder(aLevels
, aLength
, aIndexMap
, &minLevel
, &maxLevel
)) {
2242 /* nothing to do? */
2243 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
2247 /* reorder only down to the lowest odd level */
2250 /* loop maxLevel..minLevel */
2254 /* loop for all sequences of levels to reorder at the current maxLevel */
2256 /* look for a sequence of levels that are all at >=maxLevel */
2257 /* look for the first index of such a sequence */
2258 while(start
<aLength
&& aLevels
[start
]<maxLevel
) {
2261 if(start
>=aLength
) {
2262 break; /* no more such sequences */
2265 /* look for the limit of such a sequence (the index behind it) */
2266 for(limit
=start
; ++limit
<aLength
&& aLevels
[limit
]>=maxLevel
;) {}
2269 * sos=start of sequence, eos=end of sequence
2271 * The closed (inclusive) interval from sos to eos includes all the logical
2272 * and visual indexes within this sequence. They are logically and
2273 * visually contiguous and in the same range.
2275 * For each run, the new visual index=sos+eos-old visual index;
2276 * we pre-add sos+eos into sumOfSosEos ->
2277 * new visual index=sumOfSosEos-old visual index;
2279 sumOfSosEos
=start
+limit
-1;
2281 /* reorder each index in the sequence */
2283 aIndexMap
[start
]=sumOfSosEos
-aIndexMap
[start
];
2284 } while(++start
<limit
);
2287 if(limit
==aLength
) {
2288 break; /* no more such sequences */
2293 } while(--maxLevel
>=minLevel
);
2298 nsresult
nsBidi::InvertMap(const int32_t *aSrcMap
, int32_t *aDestMap
, int32_t aLength
)
2300 if(aSrcMap
!=nullptr && aDestMap
!=nullptr && aLength
> 0) {
2302 int32_t destLength
= -1, count
= 0;
2303 /* find highest value and count positive indexes in srcMap */
2304 pi
= aSrcMap
+ aLength
;
2305 while (pi
> aSrcMap
) {
2306 if (*--pi
> destLength
) {
2313 destLength
++; /* add 1 for origin 0 */
2314 if (count
< destLength
) {
2315 /* we must fill unmatched destMap entries with -1 */
2316 memset(aDestMap
, 0xFF, destLength
* sizeof(int32_t));
2318 pi
= aSrcMap
+ aLength
;
2319 while (aLength
> 0) {
2321 aDestMap
[*pi
] = --aLength
;
2330 int32_t nsBidi::doWriteReverse(const char16_t
*src
, int32_t srcLength
,
2331 char16_t
*dest
, uint16_t options
) {
2335 * RTL runs need to be copied to the destination in reverse order
2336 * of code points, not code units, to keep Unicode characters intact.
2338 * The general strategy for this is to read the source text
2339 * in backward order, collect all code units for a code point
2340 * (and optionally following combining characters, see below),
2341 * and copy all these code units in ascending order
2342 * to the destination for this run.
2344 * Several options request whether combining characters
2345 * should be kept after their base characters,
2346 * whether Bidi control characters should be removed, and
2347 * whether characters should be replaced by their mirror-image
2348 * equivalent Unicode characters.
2350 int32_t i
, j
, destSize
;
2353 /* optimize for several combinations of options */
2354 switch(options
&(NSBIDI_REMOVE_BIDI_CONTROLS
|NSBIDI_DO_MIRRORING
|NSBIDI_KEEP_BASE_COMBINING
)) {
2357 * With none of the "complicated" options set, the destination
2358 * run will have the same length as the source run,
2359 * and there is no mirroring and no keeping combining characters
2360 * with their base characters.
2364 /* preserve character integrity */
2366 /* i is always after the last code unit known to need to be kept in this segment */
2369 /* collect code units for one base character */
2370 UTF_BACK_1(src
, 0, srcLength
);
2372 /* copy this base character */
2377 } while(srcLength
>0);
2379 case NSBIDI_KEEP_BASE_COMBINING
:
2381 * Here, too, the destination
2382 * run will have the same length as the source run,
2383 * and there is no mirroring.
2384 * We do need to keep combining characters with their base characters.
2388 /* preserve character integrity */
2390 /* i is always after the last code unit known to need to be kept in this segment */
2393 /* collect code units and modifier letters for one base character */
2395 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2396 } while(srcLength
>0 && GetBidiCat(c
) == eCharType_DirNonSpacingMark
);
2398 /* copy this "user character" */
2403 } while(srcLength
>0);
2407 * With several "complicated" options set, this is the most
2408 * general and the slowest copying of an RTL run.
2409 * We will do mirroring, remove Bidi controls, and
2410 * keep combining characters with their base characters
2413 if(!(options
&NSBIDI_REMOVE_BIDI_CONTROLS
)) {
2416 /* we need to find out the destination length of the run,
2417 which will not include the Bidi control characters */
2418 int32_t length
=srcLength
;
2424 if (!IsBidiControl((uint32_t)ch
)) {
2427 } while(--length
>0);
2432 /* preserve character integrity */
2434 /* i is always after the last code unit known to need to be kept in this segment */
2437 /* collect code units for one base character */
2438 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2439 if(options
&NSBIDI_KEEP_BASE_COMBINING
) {
2440 /* collect modifier letters for this base character */
2441 while(srcLength
>0 && GetBidiCat(c
) == eCharType_DirNonSpacingMark
) {
2442 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2446 if(options
&NSBIDI_REMOVE_BIDI_CONTROLS
&& IsBidiControl(c
)) {
2447 /* do not copy this Bidi control character */
2451 /* copy this "user character" */
2453 if(options
&NSBIDI_DO_MIRRORING
) {
2454 /* mirror only the base character */
2455 c
= GetMirroredChar(c
);
2458 UTF_APPEND_CHAR_UNSAFE(dest
, k
, c
);
2465 } while(srcLength
>0);
2467 } /* end of switch */
2471 nsresult
nsBidi::WriteReverse(const char16_t
*aSrc
, int32_t aSrcLength
, char16_t
*aDest
, uint16_t aOptions
, int32_t *aDestSize
)
2473 if( aSrc
==nullptr || aSrcLength
<0 ||
2476 return NS_ERROR_INVALID_ARG
;
2479 /* do input and output overlap? */
2480 if( aSrc
>=aDest
&& aSrc
<aDest
+aSrcLength
||
2481 aDest
>=aSrc
&& aDest
<aSrc
+aSrcLength
2483 return NS_ERROR_INVALID_ARG
;
2487 *aDestSize
= doWriteReverse(aSrc
, aSrcLength
, aDest
, aOptions
);
2491 #endif // FULL_BIDI_ENGINE