1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 #include "nsUnicodeProperties.h"
11 using namespace mozilla::unicode
;
13 // These are #defined in <sys/regset.h> under Solaris 10 x86
17 /* Comparing the description of the Bidi algorithm with this implementation
18 is easier with the same names for the Bidi types in the code as there.
21 L
= eCharType_LeftToRight
,
22 R
= eCharType_RightToLeft
,
23 EN
= eCharType_EuropeanNumber
,
24 ES
= eCharType_EuropeanNumberSeparator
,
25 ET
= eCharType_EuropeanNumberTerminator
,
26 AN
= eCharType_ArabicNumber
,
27 CS
= eCharType_CommonNumberSeparator
,
28 B
= eCharType_BlockSeparator
,
29 S
= eCharType_SegmentSeparator
,
30 WS
= eCharType_WhiteSpaceNeutral
,
31 O_N
= eCharType_OtherNeutral
,
32 LRE
= eCharType_LeftToRightEmbedding
,
33 LRO
= eCharType_LeftToRightOverride
,
34 AL
= eCharType_RightToLeftArabic
,
35 RLE
= eCharType_RightToLeftEmbedding
,
36 RLO
= eCharType_RightToLeftOverride
,
37 PDF
= eCharType_PopDirectionalFormat
,
38 NSM
= eCharType_DirNonSpacingMark
,
39 BN
= eCharType_BoundaryNeutral
,
43 /* to avoid some conditional statements, use tiny constant arrays */
44 static Flags flagLR
[2]={ DIRPROP_FLAG(L
), DIRPROP_FLAG(R
) };
45 static Flags flagE
[2]={ DIRPROP_FLAG(LRE
), DIRPROP_FLAG(RLE
) };
46 static Flags flagO
[2]={ DIRPROP_FLAG(LRO
), DIRPROP_FLAG(RLO
) };
48 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
49 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
50 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
53 * General implementation notes:
55 * Throughout the implementation, there are comments like (W2) that refer to
56 * rules of the Bidi algorithm in its version 5, in this example to the second
57 * rule of the resolution of weak types.
59 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
60 * character according to UTF-16, the second UChar gets the directional property of
61 * the entire character assigned, while the first one gets a BN, a boundary
62 * neutral, type, which is ignored by most of the algorithm according to
63 * rule (X9) and the implementation suggestions of the Bidi algorithm.
65 * Later, AdjustWSLevels() will set the level for each BN to that of the
66 * following character (UChar), which results in surrogate pairs getting the
67 * same level on each of their surrogates.
69 * In a UTF-8 implementation, the same thing could be done: the last byte of
70 * a multi-byte sequence would get the "real" property, while all previous
71 * bytes of that sequence would get BN.
73 * It is not possible to assign all those parts of a character the same real
74 * property because this would fail in the resolution of weak types with rules
75 * that look at immediately surrounding types.
77 * As a related topic, this implementation does not remove Boundary Neutral
78 * types from the input, but ignores them whenever this is relevant.
79 * For example, the loop for the resolution of the weak types reads
80 * types until it finds a non-BN.
81 * Also, explicit embedding codes are neither changed into BN nor removed.
82 * They are only treated the same way real BNs are.
83 * As stated before, AdjustWSLevels() takes care of them at the end.
84 * For the purpose of conformance, the levels of all these codes
87 * Note that this implementation never modifies the dirProps
88 * after the initial setup.
91 * In this implementation, the resolution of weak types (Wn),
92 * neutrals (Nn), and the assignment of the resolved level (In)
93 * are all done in one single loop, in ResolveImplicitLevels().
94 * Changes of dirProp values are done on the fly, without writing
95 * them back to the dirProps array.
98 * This implementation contains code that allows to bypass steps of the
99 * algorithm that are not needed on the specific paragraph
100 * in order to speed up the most common cases considerably,
101 * like text that is entirely LTR, or RTL text without numbers.
103 * Most of this is done by setting a bit for each directional property
104 * in a flags variable and later checking for whether there are
105 * any LTR characters or any RTL characters, or both, whether
106 * there are any explicit embedding codes, etc.
108 * If the (Xn) steps are performed, then the flags are re-evaluated,
109 * because they will then not contain the embedding codes any more
110 * and will be adjusted for override codes, so that subsequently
111 * more bypassing may be possible than what the initial flags suggested.
113 * If the text is not mixed-directional, then the
114 * algorithm steps for the weak type resolution are not performed,
115 * and all levels are set to the paragraph level.
117 * If there are no explicit embedding codes, then the (Xn) steps
120 * If embedding levels are supplied as a parameter, then all
121 * explicit embedding codes are ignored, and the (Xn) steps
124 * White Space types could get the level of the run they belong to,
125 * and are checked with a test of (flags&MASK_EMBEDDING) to
126 * consider if the paragraph direction should be considered in
127 * the flags variable.
129 * If there are no White Space types in the paragraph, then
130 * (L1) is not necessary in AdjustWSLevels().
136 mMayAllocateText
=true;
137 mMayAllocateRuns
=true;
147 /* reset the object, all pointers nullptr, all flags false, all sizes 0 */
151 mDirection
= NSBIDI_LTR
;
152 mTrailingWSStart
= 0;
163 mDirPropsMemory
=nullptr;
164 mLevelsMemory
=nullptr;
167 mMayAllocateText
=false;
168 mMayAllocateRuns
=false;
173 * We are allowed to allocate memory if aMemory==nullptr or
174 * aMayAllocate==true for each array that we need.
175 * We also try to grow and shrink memory as needed if we
178 * Assume aSizeNeeded>0.
179 * If *aMemory!=nullptr, then assume *aSize>0.
181 * ### this realloc() may unnecessarily copy the old data,
182 * which we know we don't need any more;
183 * is this the best way to do this??
185 bool nsBidi::GetMemory(void **aMemory
, size_t *aSize
, bool aMayAllocate
, size_t aSizeNeeded
)
187 /* check for existing memory */
188 if(*aMemory
==nullptr) {
189 /* we need to allocate memory */
193 *aMemory
=moz_malloc(aSizeNeeded
);
194 if (*aMemory
!=nullptr) {
203 /* there is some memory, is it enough or too much? */
204 if(aSizeNeeded
>*aSize
&& !aMayAllocate
) {
205 /* not enough memory, and we must not allocate */
207 } else if(aSizeNeeded
!=*aSize
&& aMayAllocate
) {
208 /* we may try to grow or shrink */
209 void *memory
=moz_realloc(*aMemory
, aSizeNeeded
);
211 if(memory
!=nullptr) {
216 /* we failed to grow */
220 /* we have at least enough memory and must not allocate */
228 moz_free(mDirPropsMemory
);
229 mDirPropsMemory
= nullptr;
230 moz_free(mLevelsMemory
);
231 mLevelsMemory
= nullptr;
232 moz_free(mRunsMemory
);
233 mRunsMemory
= nullptr;
236 /* SetPara ------------------------------------------------------------ */
238 nsresult
nsBidi::SetPara(const char16_t
*aText
, int32_t aLength
,
239 nsBidiLevel aParaLevel
, nsBidiLevel
*aEmbeddingLevels
)
241 nsBidiDirection direction
;
243 /* check the argument values */
245 ((NSBIDI_MAX_EXPLICIT_LEVEL
<aParaLevel
) && !IS_DEFAULT_LEVEL(aParaLevel
)) ||
248 return NS_ERROR_INVALID_ARG
;
252 aLength
= NS_strlen(aText
);
255 /* initialize member data */
257 mParaLevel
=aParaLevel
;
258 mDirection
=NSBIDI_LTR
;
259 mTrailingWSStart
=aLength
; /* the levels[] will reflect the WS run */
267 * For an empty paragraph, create an nsBidi object with the aParaLevel and
268 * the flags and the direction set but without allocating zero-length arrays.
269 * There is nothing more to do.
271 if(IS_DEFAULT_LEVEL(aParaLevel
)) {
275 mFlags
=DIRPROP_FLAG(R
);
276 mDirection
=NSBIDI_RTL
;
278 mFlags
=DIRPROP_FLAG(L
);
279 mDirection
=NSBIDI_LTR
;
289 * Get the directional properties,
290 * the flags bit-set, and
291 * determine the partagraph level if necessary.
293 if(GETDIRPROPSMEMORY(aLength
)) {
294 mDirProps
=mDirPropsMemory
;
297 return NS_ERROR_OUT_OF_MEMORY
;
300 /* are explicit levels specified? */
301 if(aEmbeddingLevels
==nullptr) {
302 /* no: determine explicit levels according to the (Xn) rules */\
303 if(GETLEVELSMEMORY(aLength
)) {
304 mLevels
=mLevelsMemory
;
305 direction
=ResolveExplicitLevels();
307 return NS_ERROR_OUT_OF_MEMORY
;
310 /* set BN for all explicit codes, check that all levels are aParaLevel..NSBIDI_MAX_EXPLICIT_LEVEL */
311 mLevels
=aEmbeddingLevels
;
312 nsresult rv
= CheckExplicitLevels(&direction
);
319 * The steps after (X9) in the Bidi algorithm are performed only if
320 * the paragraph text has mixed directionality!
324 /* make sure paraLevel is even */
325 mParaLevel
=(mParaLevel
+1)&~1;
327 /* all levels are implicitly at paraLevel (important for GetLevels()) */
331 /* make sure paraLevel is odd */
334 /* all levels are implicitly at paraLevel (important for GetLevels()) */
339 * If there are no external levels specified and there
340 * are no significant explicit level codes in the text,
341 * then we can treat the entire paragraph as one run.
342 * Otherwise, we need to perform the following rules on runs of
343 * the text with the same embedding levels. (X10)
344 * "Significant" explicit level codes are ones that actually
345 * affect non-BN characters.
346 * Examples for "insignificant" ones are empty embeddings
347 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
349 if(aEmbeddingLevels
==nullptr && !(mFlags
&DIRPROP_FLAG_MULTI_RUNS
)) {
350 ResolveImplicitLevels(0, aLength
,
351 GET_LR_FROM_LEVEL(mParaLevel
),
352 GET_LR_FROM_LEVEL(mParaLevel
));
354 /* sor, eor: start and end types of same-level-run */
355 nsBidiLevel
*levels
=mLevels
;
356 int32_t start
, limit
=0;
357 nsBidiLevel level
, nextLevel
;
360 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
363 if(level
<nextLevel
) {
364 eor
=GET_LR_FROM_LEVEL(nextLevel
);
366 eor
=GET_LR_FROM_LEVEL(level
);
370 /* determine start and limit of the run (end points just behind the run) */
372 /* the values for this run's start are the same as for the previous run's end */
377 /* search for the limit of this run */
378 while(++limit
<aLength
&& levels
[limit
]==level
) {}
380 /* get the correct level of the next run */
382 nextLevel
=levels
[limit
];
384 nextLevel
=mParaLevel
;
387 /* determine eor from max(level, nextLevel); sor is last run's eor */
388 if((level
&~NSBIDI_LEVEL_OVERRIDE
)<(nextLevel
&~NSBIDI_LEVEL_OVERRIDE
)) {
389 eor
=GET_LR_FROM_LEVEL(nextLevel
);
391 eor
=GET_LR_FROM_LEVEL(level
);
394 /* if the run consists of overridden directional types, then there
395 are no implicit types to be resolved */
396 if(!(level
&NSBIDI_LEVEL_OVERRIDE
)) {
397 ResolveImplicitLevels(start
, limit
, sor
, eor
);
399 } while(limit
<aLength
);
402 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
407 mDirection
=direction
;
411 /* perform (P2)..(P3) ------------------------------------------------------- */
414 * Get the directional properties for the text,
415 * calculate the flags bit-set, and
416 * determine the partagraph level if necessary.
418 void nsBidi::GetDirProps(const char16_t
*aText
)
420 DirProp
*dirProps
=mDirPropsMemory
; /* mDirProps is const */
422 int32_t i
=0, length
=mLength
;
423 Flags flags
=0; /* collect all directionalities in the text */
427 if(IS_DEFAULT_LEVEL(mParaLevel
)) {
428 /* determine the paragraph level (P2..P3) */
431 if(!IS_FIRST_SURROGATE(uchar
) || i
+1==length
|| !IS_SECOND_SURROGATE(aText
[i
+1])) {
432 /* not a surrogate pair */
433 flags
|=DIRPROP_FLAG(dirProps
[i
]=dirProp
=GetBidiCat((uint32_t)uchar
));
435 /* a surrogate pair */
436 dirProps
[i
++]=BN
; /* first surrogate in the pair gets the BN type */
437 flags
|=DIRPROP_FLAG(dirProps
[i
]=dirProp
=GetBidiCat(GET_UTF_32(uchar
, aText
[i
])))|DIRPROP_FLAG(BN
);
443 } else if(dirProp
==R
|| dirProp
==AL
) {
446 } else if(i
==length
) {
448 * see comment in nsIBidi.h:
449 * the DEFAULT_XXX values are designed so that
450 * their bit 0 alone yields the intended default
458 /* get the rest of the directional properties and the flags bits */
461 if(!IS_FIRST_SURROGATE(uchar
) || i
+1==length
|| !IS_SECOND_SURROGATE(aText
[i
+1])) {
462 /* not a surrogate pair */
463 flags
|=DIRPROP_FLAG(dirProps
[i
]=GetBidiCat((uint32_t)uchar
));
465 /* a surrogate pair */
466 dirProps
[i
++]=BN
; /* second surrogate in the pair gets the BN type */
467 flags
|=DIRPROP_FLAG(dirProps
[i
]=GetBidiCat(GET_UTF_32(uchar
, aText
[i
])))|DIRPROP_FLAG(BN
);
471 if(flags
&MASK_EMBEDDING
) {
472 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
477 /* perform (X1)..(X9) ------------------------------------------------------- */
480 * Resolve the explicit levels as specified by explicit embedding codes.
481 * Recalculate the flags to have them reflect the real properties
482 * after taking the explicit embeddings into account.
484 * The Bidi algorithm is designed to result in the same behavior whether embedding
485 * levels are externally specified (from "styled text", supposedly the preferred
486 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
487 * That is why (X9) instructs to remove all explicit codes (and BN).
488 * However, in a real implementation, this removal of these codes and their index
489 * positions in the plain text is undesirable since it would result in
490 * reallocated, reindexed text.
491 * Instead, this implementation leaves the codes in there and just ignores them
492 * in the subsequent processing.
493 * In order to get the same reordering behavior, positions with a BN or an
494 * explicit embedding code just get the same level assigned as the last "real"
497 * Some implementations, not this one, then overwrite some of these
498 * directionality properties at "real" same-level-run boundaries by
499 * L or R codes so that the resolution of weak types can be performed on the
500 * entire paragraph at once instead of having to parse it once more and
501 * perform that resolution on same-level-runs.
502 * This limits the scope of the implicit rules in effectively
503 * the same way as the run limits.
505 * Instead, this implementation does not modify these codes.
506 * On one hand, the paragraph has to be scanned for same-level-runs, but
507 * on the other hand, this saves another loop to reset these codes,
508 * or saves making and modifying a copy of dirProps[].
511 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
514 * Handling the stack of explicit levels (Xn):
516 * With the Bidi stack of explicit levels,
517 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
518 * the explicit level must never exceed NSBIDI_MAX_EXPLICIT_LEVEL==61.
520 * In order to have a correct push-pop semantics even in the case of overflows,
521 * there are two overflow counters:
522 * - countOver60 is incremented with each LRx at level 60
523 * - from level 60, one RLx increases the level to 61
524 * - countOver61 is incremented with each LRx and RLx at level 61
526 * Popping levels with PDF must work in the opposite order so that level 61
527 * is correct at the correct point. Underflows (too many PDFs) must be checked.
529 * This implementation assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
532 nsBidiDirection
nsBidi::ResolveExplicitLevels()
534 const DirProp
*dirProps
=mDirProps
;
535 nsBidiLevel
*levels
=mLevels
;
537 int32_t i
=0, length
=mLength
;
538 Flags flags
=mFlags
; /* collect all directionalities in the text */
540 nsBidiLevel level
=mParaLevel
;
542 nsBidiDirection direction
;
544 /* determine if the text is mixed-directional or single-directional */
545 direction
=DirectionFromFlags(flags
);
547 /* we may not need to resolve any explicit levels */
548 if(direction
!=NSBIDI_MIXED
) {
549 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
550 } else if(!(flags
&MASK_EXPLICIT
)) {
551 /* mixed, but all characters are at the same embedding level */
552 /* set all levels to the paragraph level */
553 for(i
=0; i
<length
; ++i
) {
557 /* continue to perform (Xn) */
559 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
560 /* both variables may carry the NSBIDI_LEVEL_OVERRIDE flag to indicate the override status */
561 nsBidiLevel embeddingLevel
=level
, newLevel
, stackTop
=0;
563 nsBidiLevel stack
[NSBIDI_MAX_EXPLICIT_LEVEL
]; /* we never push anything >=NSBIDI_MAX_EXPLICIT_LEVEL */
564 uint32_t countOver60
=0, countOver61
=0; /* count overflows of explicit levels */
566 /* recalculate the flags */
569 /* since we assume that this is a single paragraph, we ignore (X8) */
570 for(i
=0; i
<length
; ++i
) {
576 newLevel
=(embeddingLevel
+2)&~(NSBIDI_LEVEL_OVERRIDE
|1); /* least greater even level */
577 if(newLevel
<=NSBIDI_MAX_EXPLICIT_LEVEL
) {
578 stack
[stackTop
]=embeddingLevel
;
580 embeddingLevel
=newLevel
;
582 embeddingLevel
|=NSBIDI_LEVEL_OVERRIDE
;
584 embeddingLevel
&=~NSBIDI_LEVEL_OVERRIDE
;
586 } else if((embeddingLevel
&~NSBIDI_LEVEL_OVERRIDE
)==NSBIDI_MAX_EXPLICIT_LEVEL
) {
588 } else /* (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL-1 */ {
591 flags
|=DIRPROP_FLAG(BN
);
596 newLevel
=((embeddingLevel
&~NSBIDI_LEVEL_OVERRIDE
)+1)|1; /* least greater odd level */
597 if(newLevel
<=NSBIDI_MAX_EXPLICIT_LEVEL
) {
598 stack
[stackTop
]=embeddingLevel
;
600 embeddingLevel
=newLevel
;
602 embeddingLevel
|=NSBIDI_LEVEL_OVERRIDE
;
604 embeddingLevel
&=~NSBIDI_LEVEL_OVERRIDE
;
609 flags
|=DIRPROP_FLAG(BN
);
613 /* handle all the overflow cases first */
616 } else if(countOver60
>0 && (embeddingLevel
&~NSBIDI_LEVEL_OVERRIDE
)!=NSBIDI_MAX_EXPLICIT_LEVEL
) {
617 /* handle LRx overflows from level 60 */
619 } else if(stackTop
>0) {
620 /* this is the pop operation; it also pops level 61 while countOver60>0 */
622 embeddingLevel
=stack
[stackTop
];
623 /* } else { (underflow) */
625 flags
|=DIRPROP_FLAG(BN
);
629 * We do not really expect to see a paragraph separator (B),
630 * but we should do something reasonable with it,
631 * especially at the end of the text.
634 countOver60
=countOver61
=0;
635 embeddingLevel
=level
=mParaLevel
;
636 flags
|=DIRPROP_FLAG(B
);
639 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
640 /* they will get their levels set correctly in AdjustWSLevels() */
641 flags
|=DIRPROP_FLAG(BN
);
644 /* all other types get the "real" level */
645 if(level
!=embeddingLevel
) {
646 level
=embeddingLevel
;
647 if(level
&NSBIDI_LEVEL_OVERRIDE
) {
648 flags
|=DIRPROP_FLAG_O(level
)|DIRPROP_FLAG_MULTI_RUNS
;
650 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG_MULTI_RUNS
;
653 if(!(level
&NSBIDI_LEVEL_OVERRIDE
)) {
654 flags
|=DIRPROP_FLAG(dirProp
);
660 * We need to set reasonable levels even on BN codes and
661 * explicit codes because we will later look at same-level runs (X10).
665 if(flags
&MASK_EMBEDDING
) {
666 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
669 /* subsequently, ignore the explicit codes and BN (X9) */
671 /* again, determine if the text is mixed-directional or single-directional */
673 direction
=DirectionFromFlags(flags
);
679 * Use a pre-specified embedding levels array:
681 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
682 * ignore all explicit codes (X9),
683 * and check all the preset levels.
685 * Recalculate the flags to have them reflect the real properties
686 * after taking the explicit embeddings into account.
688 nsresult
nsBidi::CheckExplicitLevels(nsBidiDirection
*aDirection
)
690 const DirProp
*dirProps
=mDirProps
;
691 nsBidiLevel
*levels
=mLevels
;
693 int32_t i
, length
=mLength
;
694 Flags flags
=0; /* collect all directionalities in the text */
695 nsBidiLevel level
, paraLevel
=mParaLevel
;
697 for(i
=0; i
<length
; ++i
) {
699 if(level
&NSBIDI_LEVEL_OVERRIDE
) {
700 /* keep the override flag in levels[i] but adjust the flags */
701 level
&=~NSBIDI_LEVEL_OVERRIDE
; /* make the range check below simpler */
702 flags
|=DIRPROP_FLAG_O(level
);
705 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG(dirProps
[i
]);
707 if(level
<paraLevel
|| NSBIDI_MAX_EXPLICIT_LEVEL
<level
) {
708 /* level out of bounds */
709 *aDirection
= NSBIDI_LTR
;
710 return NS_ERROR_INVALID_ARG
;
713 if(flags
&MASK_EMBEDDING
) {
714 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
717 /* determine if the text is mixed-directional or single-directional */
719 *aDirection
= DirectionFromFlags(flags
);
723 /* determine if the text is mixed-directional or single-directional */
724 nsBidiDirection
nsBidi::DirectionFromFlags(Flags aFlags
)
726 /* if the text contains AN and neutrals, then some neutrals may become RTL */
727 if(!(aFlags
&MASK_RTL
|| (aFlags
&DIRPROP_FLAG(AN
) && aFlags
&MASK_POSSIBLE_N
))) {
729 } else if(!(aFlags
&MASK_LTR
)) {
736 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
739 * This implementation of the (Wn) rules applies all rules in one pass.
740 * In order to do so, it needs a look-ahead of typically 1 character
741 * (except for W5: sequences of ET) and keeps track of changes
742 * in a rule Wp that affect a later Wq (p<q).
744 * historyOfEN is a variable-saver: it contains 4 boolean states;
745 * a bit in it set to 1 means:
746 * bit 0: the current code is an EN after W2
747 * bit 1: the current code is an EN after W4
748 * bit 2: the previous code was an EN after W2
749 * bit 3: the previous code was an EN after W4
750 * In other words, b0..1 have transitions of EN in the current iteration,
751 * while b2..3 have the transitions of EN in the previous iteration.
752 * A simple historyOfEN<<=2 suffices for the propagation.
754 * The (Nn) and (In) rules are also performed in that same single loop,
755 * but effectively one iteration behind for white space.
757 * Since all implicit rules are performed in one step, it is not necessary
758 * to actually store the intermediate directional properties in dirProps[].
762 #define EN_AFTER_W2 1
763 #define EN_AFTER_W4 2
765 #define PREV_EN_AFTER_W2 4
766 #define PREV_EN_AFTER_W4 8
768 void nsBidi::ResolveImplicitLevels(int32_t aStart
, int32_t aLimit
,
769 DirProp aSOR
, DirProp aEOR
)
771 const DirProp
*dirProps
=mDirProps
;
772 nsBidiLevel
*levels
=mLevels
;
774 int32_t i
, next
, neutralStart
=-1;
775 DirProp prevDirProp
, dirProp
, nextDirProp
, lastStrong
, beforeNeutral
;
778 /* initialize: current at aSOR, next at aStart (it is aStart<aLimit) */
780 beforeNeutral
=dirProp
=lastStrong
=aSOR
;
781 nextDirProp
=dirProps
[next
];
785 * In all steps of this implementation, BN and explicit embedding codes
786 * must be treated as if they didn't exist (X9).
787 * They will get levels set before a non-neutral character, and remain
788 * undefined before a neutral one, but AdjustWSLevels() will take care
791 while(DIRPROP_FLAG(nextDirProp
)&MASK_BN_EXPLICIT
) {
793 nextDirProp
=dirProps
[next
];
800 /* loop for entire run */
808 nextDirProp
=dirProps
[next
];
813 } while(DIRPROP_FLAG(nextDirProp
)&MASK_BN_EXPLICIT
);
814 historyOfEN
<<=EN_SHIFT
;
830 /* we have to set historyOfEN correctly */
839 /* this EN stays after (W2) and (W4) - at least before (W7) */
844 if( historyOfEN
&PREV_EN_AFTER_W2
&& /* previous was EN before (W4) */
845 nextDirProp
==EN
&& lastStrong
!=AL
/* next is EN and (W2) won't make it AN */
854 historyOfEN
|=EN_AFTER_W4
;
861 if( historyOfEN
&PREV_EN_AFTER_W2
&& /* previous was EN before (W4) */
862 nextDirProp
==EN
&& lastStrong
!=AL
/* next is EN and (W2) won't make it AN */
871 historyOfEN
|=EN_AFTER_W4
;
872 } else if(prevDirProp
==AN
&& /* previous was AN */
873 (nextDirProp
==AN
|| /* next is AN */
874 (nextDirProp
==EN
&& lastStrong
==AL
)) /* or (W2) will make it one */
884 /* get sequence of ET; advance only next, not current, previous or historyOfEN */
885 while(next
<aLimit
&& DIRPROP_FLAG(nextDirProp
)&MASK_ET_NSM_BN
/* (W1), (X9) */) {
887 nextDirProp
=dirProps
[next
];
894 if( historyOfEN
&PREV_EN_AFTER_W4
|| /* previous was EN before (W5) */
895 (nextDirProp
==EN
&& lastStrong
!=AL
) /* next is EN and (W2) won't make it AN */
909 /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
914 /* set historyOfEN back to prevDirProp's historyOfEN */
915 historyOfEN
>>=EN_SHIFT
;
917 * Technically, this should be done before the switch() in the form
918 * if(nextDirProp==NSM) {
919 * dirProps[next]=nextDirProp=dirProp;
922 * - effectively one iteration ahead.
923 * However, whether the next dirProp is NSM or is equal to the current dirProp
924 * does not change the outcome of any condition in (W2)..(W7).
931 /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */
933 /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */
934 /* this is one iteration late for the neutrals */
935 if(DIRPROP_FLAG(dirProp
)&MASK_N
) {
937 /* start of a sequence of neutrals */
939 beforeNeutral
=prevDirProp
;
941 } else /* not a neutral, can be only one of { L, R, EN, AN } */ {
943 * Note that all levels[] values are still the same at this
944 * point because this function is called for an entire
946 * Therefore, we need to read only one actual level.
948 nsBidiLevel level
=levels
[i
];
950 if(neutralStart
>=0) {
952 /* end of a sequence of neutrals (dirProp is "afterNeutral") */
953 if(beforeNeutral
==L
) {
955 final
=0; /* make all neutrals L (N1) */
957 final
=level
; /* make all neutrals "e" (N2) */
959 } else /* beforeNeutral is one of { R, EN, AN } */ {
961 final
=level
; /* make all neutrals "e" (N2) */
963 final
=1; /* make all neutrals R (N1) */
966 /* perform (In) on the sequence of neutrals */
967 if((level
^final
)&1) {
968 /* do something only if we need to _change_ the level */
970 ++levels
[neutralStart
];
971 } while(++neutralStart
<i
);
976 /* perform (In) on the non-neutral character */
978 * in the cases of (W5), processing a sequence of ET,
979 * and of (X9), skipping BN,
980 * there may be multiple characters from i to <next
981 * that all get (virtually) the same dirProp and (really) the same level
987 i
=next
; /* we keep the levels */
989 } else if(dirProp
==R
) {
993 i
=next
; /* we keep the levels */
995 } else /* EN or AN */ {
996 level
=(level
+2)&~1; /* least greater even level */
999 /* apply the new level to the sequence, if necessary */
1006 /* perform (Nn) - here,
1007 the character after the neutrals is aEOR, which is either L or R */
1008 /* this is one iteration late for the neutrals */
1009 if(neutralStart
>=0) {
1011 * Note that all levels[] values are still the same at this
1012 * point because this function is called for an entire
1014 * Therefore, we need to read only one actual level.
1016 nsBidiLevel level
=levels
[neutralStart
], final
;
1018 /* end of a sequence of neutrals (aEOR is "afterNeutral") */
1019 if(beforeNeutral
==L
) {
1021 final
=0; /* make all neutrals L (N1) */
1023 final
=level
; /* make all neutrals "e" (N2) */
1025 } else /* beforeNeutral is one of { R, EN, AN } */ {
1027 final
=level
; /* make all neutrals "e" (N2) */
1029 final
=1; /* make all neutrals R (N1) */
1032 /* perform (In) on the sequence of neutrals */
1033 if((level
^final
)&1) {
1034 /* do something only if we need to _change_ the level */
1036 ++levels
[neutralStart
];
1037 } while(++neutralStart
<aLimit
);
1042 /* perform (L1) and (X9) ---------------------------------------------------- */
1045 * Reset the embedding levels for some non-graphic characters (L1).
1046 * This function also sets appropriate levels for BN, and
1047 * explicit embedding types that are supposed to have been removed
1048 * from the paragraph in (X9).
1050 void nsBidi::AdjustWSLevels()
1052 const DirProp
*dirProps
=mDirProps
;
1053 nsBidiLevel
*levels
=mLevels
;
1056 if(mFlags
&MASK_WS
) {
1057 nsBidiLevel paraLevel
=mParaLevel
;
1062 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1063 while(i
>0 && DIRPROP_FLAG(dirProps
[--i
])&MASK_WS
) {
1064 levels
[i
]=paraLevel
;
1067 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1068 /* here, i+1 is guaranteed to be <length */
1070 flag
=DIRPROP_FLAG(dirProps
[--i
]);
1071 if(flag
&MASK_BN_EXPLICIT
) {
1072 levels
[i
]=levels
[i
+1];
1073 } else if(flag
&MASK_B_S
) {
1074 levels
[i
]=paraLevel
;
1081 /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */
1082 /* (a separate loop can be optimized more easily by a compiler) */
1083 if(mFlags
&MASK_OVERRIDE
) {
1084 for(i
=mTrailingWSStart
; i
>0;) {
1085 levels
[--i
]&=~NSBIDI_LEVEL_OVERRIDE
;
1090 nsresult
nsBidi::GetDirection(nsBidiDirection
* aDirection
)
1092 *aDirection
= mDirection
;
1096 nsresult
nsBidi::GetParaLevel(nsBidiLevel
* aParaLevel
)
1098 *aParaLevel
= mParaLevel
;
1101 #ifdef FULL_BIDI_ENGINE
1103 /* -------------------------------------------------------------------------- */
1105 nsresult
nsBidi::GetLength(int32_t* aLength
)
1112 * General remarks about the functions in this section:
1114 * These functions deal with the aspects of potentially mixed-directional
1115 * text in a single paragraph or in a line of a single paragraph
1116 * which has already been processed according to
1117 * the Unicode 3.0 Bidi algorithm as defined in
1118 * http://www.unicode.org/unicode/reports/tr9/ , version 5,
1119 * also described in The Unicode Standard, Version 3.0 .
1121 * This means that there is a nsBidi object with a levels
1122 * and a dirProps array.
1123 * paraLevel and direction are also set.
1124 * Only if the length of the text is zero, then levels==dirProps==nullptr.
1126 * The overall directionality of the paragraph
1127 * or line is used to bypass the reordering steps if possible.
1128 * Even purely RTL text does not need reordering there because
1129 * the getLogical/VisualIndex() functions can compute the
1130 * index on the fly in such a case.
1132 * The implementation of the access to same-level-runs and of the reordering
1133 * do attempt to provide better performance and less memory usage compared to
1134 * a direct implementation of especially rule (L2) with an array of
1135 * one (32-bit) integer per text character.
1137 * Here, the levels array is scanned as soon as necessary, and a vector of
1138 * same-level-runs is created. Reordering then is done on this vector.
1139 * For each run of text positions that were resolved to the same level,
1140 * only 8 bytes are stored: the first text position of the run and the visual
1141 * position behind the run after reordering.
1142 * One sign bit is used to hold the directionality of the run.
1143 * This is inefficient if there are many very short runs. If the average run
1144 * length is <2, then this uses more memory.
1146 * In a further attempt to save memory, the levels array is never changed
1147 * after all the resolution rules (Xn, Wn, Nn, In).
1148 * Many functions have to consider the field trailingWSStart:
1149 * if it is less than length, then there is an implicit trailing run
1151 * which is not reflected in the levels array.
1152 * This allows a line nsBidi object to use the same levels array as
1153 * its paragraph parent object.
1155 * When a nsBidi object is created for a line of a paragraph, then the
1156 * paragraph's levels and dirProps arrays are reused by way of setting
1157 * a pointer into them, not by copying. This again saves memory and forbids to
1158 * change the now shared levels for (L1).
1160 nsresult
nsBidi::SetLine(nsIBidi
* aParaBidi
, int32_t aStart
, int32_t aLimit
)
1162 nsBidi
* pParent
= (nsBidi
*)aParaBidi
;
1165 /* check the argument values */
1166 if(pParent
==nullptr) {
1167 return NS_ERROR_INVALID_POINTER
;
1168 } else if(aStart
<0 || aStart
>aLimit
|| aLimit
>pParent
->mLength
) {
1169 return NS_ERROR_INVALID_ARG
;
1172 /* set members from our aParaBidi parent */
1173 length
=mLength
=aLimit
-aStart
;
1174 mParaLevel
=pParent
->mParaLevel
;
1180 mDirProps
=pParent
->mDirProps
+aStart
;
1181 mLevels
=pParent
->mLevels
+aStart
;
1184 if(pParent
->mDirection
!=NSBIDI_MIXED
) {
1185 /* the parent is already trivial */
1186 mDirection
=pParent
->mDirection
;
1189 * The parent's levels are all either
1190 * implicitly or explicitly ==paraLevel;
1193 if(pParent
->mTrailingWSStart
<=aStart
) {
1195 } else if(pParent
->mTrailingWSStart
<aLimit
) {
1196 mTrailingWSStart
=pParent
->mTrailingWSStart
-aStart
;
1198 mTrailingWSStart
=length
;
1201 const nsBidiLevel
*levels
=mLevels
;
1202 int32_t i
, trailingWSStart
;
1206 SetTrailingWSStart();
1207 trailingWSStart
=mTrailingWSStart
;
1209 /* recalculate pLineBidi->direction */
1210 if(trailingWSStart
==0) {
1211 /* all levels are at paraLevel */
1212 mDirection
=(nsBidiDirection
)(mParaLevel
&1);
1214 /* get the level of the first character */
1217 /* if there is anything of a different level, then the line is mixed */
1218 if(trailingWSStart
<length
&& (mParaLevel
&1)!=level
) {
1219 /* the trailing WS is at paraLevel, which differs from levels[0] */
1220 mDirection
=NSBIDI_MIXED
;
1222 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
1225 if(i
==trailingWSStart
) {
1226 /* the direction values match those in level */
1227 mDirection
=(nsBidiDirection
)level
;
1229 } else if((levels
[i
]&1)!=level
) {
1230 mDirection
=NSBIDI_MIXED
;
1238 switch(mDirection
) {
1240 /* make sure paraLevel is even */
1241 mParaLevel
=(mParaLevel
+1)&~1;
1243 /* all levels are implicitly at paraLevel (important for GetLevels()) */
1247 /* make sure paraLevel is odd */
1250 /* all levels are implicitly at paraLevel (important for GetLevels()) */
1258 /* create an object for a zero-length line */
1259 mDirection
=mParaLevel
&1 ? NSBIDI_RTL
: NSBIDI_LTR
;
1260 mTrailingWSStart
=mRunCount
=0;
1268 /* handle trailing WS (L1) -------------------------------------------------- */
1271 * SetTrailingWSStart() sets the start index for a trailing
1272 * run of WS in the line. This is necessary because we do not modify
1273 * the paragraph's levels array that we just point into.
1274 * Using trailingWSStart is another form of performing (L1).
1276 * To make subsequent operations easier, we also include the run
1277 * before the WS if it is at the paraLevel - we merge the two here.
1279 void nsBidi::SetTrailingWSStart() {
1280 /* mDirection!=NSBIDI_MIXED */
1282 const DirProp
*dirProps
=mDirProps
;
1283 nsBidiLevel
*levels
=mLevels
;
1284 int32_t start
=mLength
;
1285 nsBidiLevel paraLevel
=mParaLevel
;
1287 /* go backwards across all WS, BN, explicit codes */
1288 while(start
>0 && DIRPROP_FLAG(dirProps
[start
-1])&MASK_WS
) {
1292 /* if the WS run can be merged with the previous run then do so here */
1293 while(start
>0 && levels
[start
-1]==paraLevel
) {
1297 mTrailingWSStart
=start
;
1300 nsresult
nsBidi::GetLevelAt(int32_t aCharIndex
, nsBidiLevel
* aLevel
)
1302 /* return paraLevel if in the trailing WS run, otherwise the real level */
1303 if(aCharIndex
<0 || mLength
<=aCharIndex
) {
1305 } else if(mDirection
!=NSBIDI_MIXED
|| aCharIndex
>=mTrailingWSStart
) {
1306 *aLevel
= mParaLevel
;
1308 *aLevel
= mLevels
[aCharIndex
];
1313 nsresult
nsBidi::GetLevels(nsBidiLevel
** aLevels
)
1315 int32_t start
, length
;
1320 return NS_ERROR_INVALID_ARG
;
1323 start
= mTrailingWSStart
;
1325 /* the current levels array reflects the WS run */
1331 * After the previous if(), we know that the levels array
1332 * has an implicit trailing WS run and therefore does not fully
1333 * reflect itself all the levels.
1334 * This must be a nsBidi object for a line, and
1335 * we need to create a new levels array.
1338 if(GETLEVELSMEMORY(length
)) {
1339 nsBidiLevel
*levels
=mLevelsMemory
;
1341 if(start
>0 && levels
!=mLevels
) {
1342 memcpy(levels
, mLevels
, start
);
1344 memset(levels
+start
, mParaLevel
, length
-start
);
1346 /* this new levels array is set for the line and reflects the WS run */
1347 mTrailingWSStart
=length
;
1348 *aLevels
=mLevels
=levels
;
1353 return NS_ERROR_OUT_OF_MEMORY
;
1356 #endif // FULL_BIDI_ENGINE
1358 nsresult
nsBidi::GetCharTypeAt(int32_t aCharIndex
, nsCharType
* pType
)
1360 if(aCharIndex
<0 || mLength
<=aCharIndex
) {
1361 return NS_ERROR_INVALID_ARG
;
1363 *pType
= (nsCharType
)mDirProps
[aCharIndex
];
1367 nsresult
nsBidi::GetLogicalRun(int32_t aLogicalStart
, int32_t *aLogicalLimit
, nsBidiLevel
*aLevel
)
1369 int32_t length
= mLength
;
1371 if(aLogicalStart
<0 || length
<=aLogicalStart
) {
1372 return NS_ERROR_INVALID_ARG
;
1375 if(mDirection
!=NSBIDI_MIXED
|| aLogicalStart
>=mTrailingWSStart
) {
1376 if(aLogicalLimit
!=nullptr) {
1377 *aLogicalLimit
=length
;
1379 if(aLevel
!=nullptr) {
1383 nsBidiLevel
*levels
=mLevels
;
1384 nsBidiLevel level
=levels
[aLogicalStart
];
1386 /* search for the end of the run */
1387 length
=mTrailingWSStart
;
1388 while(++aLogicalStart
<length
&& level
==levels
[aLogicalStart
]) {}
1390 if(aLogicalLimit
!=nullptr) {
1391 *aLogicalLimit
=aLogicalStart
;
1393 if(aLevel
!=nullptr) {
1400 /* runs API functions ------------------------------------------------------- */
1402 nsresult
nsBidi::CountRuns(int32_t* aRunCount
)
1404 if(mRunCount
<0 && !GetRuns()) {
1405 return NS_ERROR_OUT_OF_MEMORY
;
1408 *aRunCount
= mRunCount
;
1413 nsresult
nsBidi::GetVisualRun(int32_t aRunIndex
, int32_t *aLogicalStart
, int32_t *aLength
, nsBidiDirection
*aDirection
)
1416 (mRunCount
==-1 && !GetRuns()) ||
1417 aRunIndex
>=mRunCount
1419 *aDirection
= NSBIDI_LTR
;
1422 int32_t start
=mRuns
[aRunIndex
].logicalStart
;
1423 if(aLogicalStart
!=nullptr) {
1424 *aLogicalStart
=GET_INDEX(start
);
1426 if(aLength
!=nullptr) {
1428 *aLength
=mRuns
[aRunIndex
].visualLimit
-
1429 mRuns
[aRunIndex
-1].visualLimit
;
1431 *aLength
=mRuns
[0].visualLimit
;
1434 *aDirection
= (nsBidiDirection
)GET_ODD_BIT(start
);
1439 /* compute the runs array --------------------------------------------------- */
1442 * Compute the runs array from the levels array.
1443 * After GetRuns() returns true, runCount is guaranteed to be >0
1444 * and the runs are reordered.
1445 * Odd-level runs have visualStart on their visual right edge and
1446 * they progress visually to the left.
1448 bool nsBidi::GetRuns()
1450 if(mDirection
!=NSBIDI_MIXED
) {
1451 /* simple, single-run case - this covers length==0 */
1452 GetSingleRun(mParaLevel
);
1453 } else /* NSBIDI_MIXED, length>0 */ {
1454 /* mixed directionality */
1455 int32_t length
=mLength
, limit
=mTrailingWSStart
;
1458 * If there are WS characters at the end of the line
1459 * and the run preceding them has a level different from
1460 * paraLevel, then they will form their own run at paraLevel (L1).
1461 * Count them separately.
1462 * We need some special treatment for this in order to not
1463 * modify the levels array which a line nsBidi object shares
1464 * with its paragraph parent and its other line siblings.
1465 * In other words, for the trailing WS, it may be
1466 * levels[]!=paraLevel but we have to treat it like it were so.
1469 /* there is only WS on this line */
1470 GetSingleRun(mParaLevel
);
1472 nsBidiLevel
*levels
=mLevels
;
1473 int32_t i
, runCount
;
1474 nsBidiLevel level
=NSBIDI_DEFAULT_LTR
; /* initialize with no valid level */
1476 /* count the runs, there is at least one non-WS run, and limit>0 */
1478 for(i
=0; i
<limit
; ++i
) {
1479 /* increment runCount at the start of each run */
1480 if(levels
[i
]!=level
) {
1487 * We don't need to see if the last run can be merged with a trailing
1488 * WS run because SetTrailingWSStart() would have done that.
1490 if(runCount
==1 && limit
==length
) {
1491 /* There is only one non-WS run and no trailing WS-run. */
1492 GetSingleRun(levels
[0]);
1493 } else /* runCount>1 || limit<length */ {
1494 /* allocate and set the runs */
1496 int32_t runIndex
, start
;
1497 nsBidiLevel minLevel
=NSBIDI_MAX_EXPLICIT_LEVEL
+1, maxLevel
=0;
1499 /* now, count a (non-mergable) WS run */
1505 if(GETRUNSMEMORY(runCount
)) {
1512 /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */
1513 /* however, that would take longer and make other functions more complicated */
1516 /* search for the run ends */
1519 if(level
<minLevel
) {
1522 if(level
>maxLevel
) {
1526 /* initialize visualLimit values with the run lengths */
1527 for(i
=1; i
<limit
; ++i
) {
1528 if(levels
[i
]!=level
) {
1529 /* i is another run limit */
1530 runs
[runIndex
].logicalStart
=start
;
1531 runs
[runIndex
].visualLimit
=i
-start
;
1535 if(level
<minLevel
) {
1538 if(level
>maxLevel
) {
1545 /* finish the last run at i==limit */
1546 runs
[runIndex
].logicalStart
=start
;
1547 runs
[runIndex
].visualLimit
=limit
-start
;
1551 /* there is a separate WS run */
1552 runs
[runIndex
].logicalStart
=limit
;
1553 runs
[runIndex
].visualLimit
=length
-limit
;
1554 if(mParaLevel
<minLevel
) {
1555 minLevel
=mParaLevel
;
1559 /* set the object fields */
1563 ReorderLine(minLevel
, maxLevel
);
1565 /* now add the direction flags and adjust the visualLimit's to be just that */
1566 ADD_ODD_BIT_FROM_LEVEL(runs
[0].logicalStart
, levels
[runs
[0].logicalStart
]);
1567 limit
=runs
[0].visualLimit
;
1568 for(i
=1; i
<runIndex
; ++i
) {
1569 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, levels
[runs
[i
].logicalStart
]);
1570 limit
=runs
[i
].visualLimit
+=limit
;
1573 /* same for the trailing WS run */
1574 if(runIndex
<runCount
) {
1575 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, mParaLevel
);
1576 runs
[runIndex
].visualLimit
+=limit
;
1584 /* in trivial cases there is only one trivial run; called by GetRuns() */
1585 void nsBidi::GetSingleRun(nsBidiLevel aLevel
)
1587 /* simple, single-run case */
1591 /* fill and reorder the single run */
1592 mRuns
[0].logicalStart
=MAKE_INDEX_ODD_PAIR(0, aLevel
);
1593 mRuns
[0].visualLimit
=mLength
;
1596 /* reorder the runs array (L2) ---------------------------------------------- */
1599 * Reorder the same-level runs in the runs array.
1600 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
1601 * All the visualStart fields=logical start before reordering.
1602 * The "odd" bits are not set yet.
1604 * Reordering with this data structure lends itself to some handy shortcuts:
1606 * Since each run is moved but not modified, and since at the initial maxLevel
1607 * each sequence of same-level runs consists of only one run each, we
1608 * don't need to do anything there and can predecrement maxLevel.
1609 * In many simple cases, the reordering is thus done entirely in the
1611 * Also, reordering occurs only down to the lowest odd level that occurs,
1612 * which is minLevel|1. However, if the lowest level itself is odd, then
1613 * in the last reordering the sequence of the runs at this level or higher
1614 * will be all runs, and we don't need the elaborate loop to search for them.
1615 * This is covered by ++minLevel instead of minLevel|=1 followed
1616 * by an extra reorder-all after the reorder-some loop.
1617 * About a trailing WS run:
1618 * Such a run would need special treatment because its level is not
1619 * reflected in levels[] if this is not a paragraph object.
1620 * Instead, all characters from trailingWSStart on are implicitly at
1622 * However, for all maxLevel>paraLevel, this run will never be reordered
1623 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
1624 * if minLevel==paraLevel is odd, which is done in the extra segment.
1625 * This means that for the main reordering loop we don't need to consider
1626 * this run and can --runCount. If it is later part of the all-runs
1627 * reordering, then runCount is adjusted accordingly.
1629 void nsBidi::ReorderLine(nsBidiLevel aMinLevel
, nsBidiLevel aMaxLevel
)
1632 nsBidiLevel
*levels
;
1633 int32_t firstRun
, endRun
, limitRun
, runCount
, temp
;
1635 /* nothing to do? */
1636 if(aMaxLevel
<=(aMinLevel
|1)) {
1641 * Reorder only down to the lowest odd level
1642 * and reorder at an odd aMinLevel in a separate, simpler loop.
1643 * See comments above for why aMinLevel is always incremented.
1651 /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */
1652 if(mTrailingWSStart
<mLength
) {
1656 while(--aMaxLevel
>=aMinLevel
) {
1659 /* loop for all sequences of runs */
1661 /* look for a sequence of runs that are all at >=aMaxLevel */
1662 /* look for the first run of such a sequence */
1663 while(firstRun
<runCount
&& levels
[runs
[firstRun
].logicalStart
]<aMaxLevel
) {
1666 if(firstRun
>=runCount
) {
1667 break; /* no more such runs */
1670 /* look for the limit run of such a sequence (the run behind it) */
1671 for(limitRun
=firstRun
; ++limitRun
<runCount
&& levels
[runs
[limitRun
].logicalStart
]>=aMaxLevel
;) {}
1673 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
1675 while(firstRun
<endRun
) {
1676 temp
=runs
[firstRun
].logicalStart
;
1677 runs
[firstRun
].logicalStart
=runs
[endRun
].logicalStart
;
1678 runs
[endRun
].logicalStart
=temp
;
1680 temp
=runs
[firstRun
].visualLimit
;
1681 runs
[firstRun
].visualLimit
=runs
[endRun
].visualLimit
;
1682 runs
[endRun
].visualLimit
=temp
;
1688 if(limitRun
==runCount
) {
1689 break; /* no more such runs */
1691 firstRun
=limitRun
+1;
1696 /* now do aMaxLevel==old aMinLevel (==odd!), see above */
1697 if(!(aMinLevel
&1)) {
1700 /* include the trailing WS run in this complete reordering */
1701 if(mTrailingWSStart
==mLength
) {
1705 /* Swap the entire sequence of all runs. (endRun==runCount) */
1706 while(firstRun
<runCount
) {
1707 temp
=runs
[firstRun
].logicalStart
;
1708 runs
[firstRun
].logicalStart
=runs
[runCount
].logicalStart
;
1709 runs
[runCount
].logicalStart
=temp
;
1711 temp
=runs
[firstRun
].visualLimit
;
1712 runs
[firstRun
].visualLimit
=runs
[runCount
].visualLimit
;
1713 runs
[runCount
].visualLimit
=temp
;
1721 nsresult
nsBidi::ReorderVisual(const nsBidiLevel
*aLevels
, int32_t aLength
, int32_t *aIndexMap
)
1723 int32_t start
, end
, limit
, temp
;
1724 nsBidiLevel minLevel
, maxLevel
;
1726 if(aIndexMap
==nullptr ||
1727 !PrepareReorder(aLevels
, aLength
, aIndexMap
, &minLevel
, &maxLevel
)) {
1731 /* nothing to do? */
1732 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
1736 /* reorder only down to the lowest odd level */
1739 /* loop maxLevel..minLevel */
1743 /* loop for all sequences of levels to reorder at the current maxLevel */
1745 /* look for a sequence of levels that are all at >=maxLevel */
1746 /* look for the first index of such a sequence */
1747 while(start
<aLength
&& aLevels
[start
]<maxLevel
) {
1750 if(start
>=aLength
) {
1751 break; /* no more such runs */
1754 /* look for the limit of such a sequence (the index behind it) */
1755 for(limit
=start
; ++limit
<aLength
&& aLevels
[limit
]>=maxLevel
;) {}
1758 * Swap the entire interval of indexes from start to limit-1.
1759 * We don't need to swap the levels for the purpose of this
1760 * algorithm: the sequence of levels that we look at does not
1765 temp
=aIndexMap
[start
];
1766 aIndexMap
[start
]=aIndexMap
[end
];
1767 aIndexMap
[end
]=temp
;
1773 if(limit
==aLength
) {
1774 break; /* no more such sequences */
1779 } while(--maxLevel
>=minLevel
);
1784 bool nsBidi::PrepareReorder(const nsBidiLevel
*aLevels
, int32_t aLength
,
1786 nsBidiLevel
*aMinLevel
, nsBidiLevel
*aMaxLevel
)
1789 nsBidiLevel level
, minLevel
, maxLevel
;
1791 if(aLevels
==nullptr || aLength
<=0) {
1795 /* determine minLevel and maxLevel */
1796 minLevel
=NSBIDI_MAX_EXPLICIT_LEVEL
+1;
1798 for(start
=aLength
; start
>0;) {
1799 level
=aLevels
[--start
];
1800 if(level
>NSBIDI_MAX_EXPLICIT_LEVEL
+1) {
1803 if(level
<minLevel
) {
1806 if(level
>maxLevel
) {
1810 *aMinLevel
=minLevel
;
1811 *aMaxLevel
=maxLevel
;
1813 /* initialize the index map */
1814 for(start
=aLength
; start
>0;) {
1816 aIndexMap
[start
]=start
;
1822 #ifdef FULL_BIDI_ENGINE
1823 /* API functions for logical<->visual mapping ------------------------------- */
1825 nsresult
nsBidi::GetVisualIndex(int32_t aLogicalIndex
, int32_t* aVisualIndex
) {
1826 if(aLogicalIndex
<0 || mLength
<=aLogicalIndex
) {
1827 return NS_ERROR_INVALID_ARG
;
1829 /* we can do the trivial cases without the runs array */
1830 switch(mDirection
) {
1832 *aVisualIndex
= aLogicalIndex
;
1835 *aVisualIndex
= mLength
-aLogicalIndex
-1;
1838 if(mRunCount
<0 && !GetRuns()) {
1839 return NS_ERROR_OUT_OF_MEMORY
;
1842 int32_t i
, visualStart
=0, offset
, length
;
1844 /* linear search for the run, search on the visual runs */
1846 length
=runs
[i
].visualLimit
-visualStart
;
1847 offset
=aLogicalIndex
-GET_INDEX(runs
[i
].logicalStart
);
1848 if(offset
>=0 && offset
<length
) {
1849 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
1851 *aVisualIndex
= visualStart
+offset
;
1855 *aVisualIndex
= visualStart
+length
-offset
-1;
1859 visualStart
+=length
;
1866 nsresult
nsBidi::GetLogicalIndex(int32_t aVisualIndex
, int32_t *aLogicalIndex
)
1868 if(aVisualIndex
<0 || mLength
<=aVisualIndex
) {
1869 return NS_ERROR_INVALID_ARG
;
1871 /* we can do the trivial cases without the runs array */
1872 switch(mDirection
) {
1874 *aLogicalIndex
= aVisualIndex
;
1877 *aLogicalIndex
= mLength
-aVisualIndex
-1;
1880 if(mRunCount
<0 && !GetRuns()) {
1881 return NS_ERROR_OUT_OF_MEMORY
;
1884 int32_t i
, runCount
=mRunCount
, start
;
1887 /* linear search for the run */
1888 for(i
=0; aVisualIndex
>=runs
[i
].visualLimit
; ++i
) {}
1890 /* binary search for the run */
1891 int32_t start
=0, limit
=runCount
;
1893 /* the middle if() will guaranteed find the run, we don't need a loop limit */
1896 if(aVisualIndex
>=runs
[i
].visualLimit
) {
1898 } else if(i
==0 || aVisualIndex
>=runs
[i
-1].visualLimit
) {
1906 start
=runs
[i
].logicalStart
;
1907 if(IS_EVEN_RUN(start
)) {
1909 /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */
1911 aVisualIndex
-=runs
[i
-1].visualLimit
;
1913 *aLogicalIndex
= GET_INDEX(start
)+aVisualIndex
;
1917 *aLogicalIndex
= GET_INDEX(start
)+runs
[i
].visualLimit
-aVisualIndex
-1;
1925 nsresult
nsBidi::GetLogicalMap(int32_t *aIndexMap
)
1927 nsBidiLevel
*levels
;
1930 /* GetLevels() checks all of its and our arguments */
1931 rv
= GetLevels(&levels
);
1934 } else if(aIndexMap
==nullptr) {
1935 return NS_ERROR_INVALID_ARG
;
1937 return ReorderLogical(levels
, mLength
, aIndexMap
);
1941 nsresult
nsBidi::GetVisualMap(int32_t *aIndexMap
)
1943 int32_t* runCount
=nullptr;
1946 /* CountRuns() checks all of its and our arguments */
1947 rv
= CountRuns(runCount
);
1950 } else if(aIndexMap
==nullptr) {
1951 return NS_ERROR_INVALID_ARG
;
1953 /* fill a visual-to-logical index map using the runs[] */
1954 Run
*runs
=mRuns
, *runsLimit
=runs
+mRunCount
;
1955 int32_t logicalStart
, visualStart
, visualLimit
;
1958 for(; runs
<runsLimit
; ++runs
) {
1959 logicalStart
=runs
->logicalStart
;
1960 visualLimit
=runs
->visualLimit
;
1961 if(IS_EVEN_RUN(logicalStart
)) {
1963 *aIndexMap
++ = logicalStart
++;
1964 } while(++visualStart
<visualLimit
);
1966 REMOVE_ODD_BIT(logicalStart
);
1967 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1969 *aIndexMap
++ = --logicalStart
;
1970 } while(++visualStart
<visualLimit
);
1972 /* visualStart==visualLimit; */
1978 /* reorder a line based on a levels array (L2) ------------------------------ */
1980 nsresult
nsBidi::ReorderLogical(const nsBidiLevel
*aLevels
, int32_t aLength
, int32_t *aIndexMap
)
1982 int32_t start
, limit
, sumOfSosEos
;
1983 nsBidiLevel minLevel
, maxLevel
;
1985 if(aIndexMap
==nullptr ||
1986 !PrepareReorder(aLevels
, aLength
, aIndexMap
, &minLevel
, &maxLevel
)) {
1990 /* nothing to do? */
1991 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
1995 /* reorder only down to the lowest odd level */
1998 /* loop maxLevel..minLevel */
2002 /* loop for all sequences of levels to reorder at the current maxLevel */
2004 /* look for a sequence of levels that are all at >=maxLevel */
2005 /* look for the first index of such a sequence */
2006 while(start
<aLength
&& aLevels
[start
]<maxLevel
) {
2009 if(start
>=aLength
) {
2010 break; /* no more such sequences */
2013 /* look for the limit of such a sequence (the index behind it) */
2014 for(limit
=start
; ++limit
<aLength
&& aLevels
[limit
]>=maxLevel
;) {}
2017 * sos=start of sequence, eos=end of sequence
2019 * The closed (inclusive) interval from sos to eos includes all the logical
2020 * and visual indexes within this sequence. They are logically and
2021 * visually contiguous and in the same range.
2023 * For each run, the new visual index=sos+eos-old visual index;
2024 * we pre-add sos+eos into sumOfSosEos ->
2025 * new visual index=sumOfSosEos-old visual index;
2027 sumOfSosEos
=start
+limit
-1;
2029 /* reorder each index in the sequence */
2031 aIndexMap
[start
]=sumOfSosEos
-aIndexMap
[start
];
2032 } while(++start
<limit
);
2035 if(limit
==aLength
) {
2036 break; /* no more such sequences */
2041 } while(--maxLevel
>=minLevel
);
2046 nsresult
nsBidi::InvertMap(const int32_t *aSrcMap
, int32_t *aDestMap
, int32_t aLength
)
2048 if(aSrcMap
!=nullptr && aDestMap
!=nullptr) {
2051 aDestMap
[*--aSrcMap
]=--aLength
;
2057 int32_t nsBidi::doWriteReverse(const char16_t
*src
, int32_t srcLength
,
2058 char16_t
*dest
, uint16_t options
) {
2062 * RTL runs need to be copied to the destination in reverse order
2063 * of code points, not code units, to keep Unicode characters intact.
2065 * The general strategy for this is to read the source text
2066 * in backward order, collect all code units for a code point
2067 * (and optionally following combining characters, see below),
2068 * and copy all these code units in ascending order
2069 * to the destination for this run.
2071 * Several options request whether combining characters
2072 * should be kept after their base characters,
2073 * whether Bidi control characters should be removed, and
2074 * whether characters should be replaced by their mirror-image
2075 * equivalent Unicode characters.
2077 int32_t i
, j
, destSize
;
2080 /* optimize for several combinations of options */
2081 switch(options
&(NSBIDI_REMOVE_BIDI_CONTROLS
|NSBIDI_DO_MIRRORING
|NSBIDI_KEEP_BASE_COMBINING
)) {
2084 * With none of the "complicated" options set, the destination
2085 * run will have the same length as the source run,
2086 * and there is no mirroring and no keeping combining characters
2087 * with their base characters.
2091 /* preserve character integrity */
2093 /* i is always after the last code unit known to need to be kept in this segment */
2096 /* collect code units for one base character */
2097 UTF_BACK_1(src
, 0, srcLength
);
2099 /* copy this base character */
2104 } while(srcLength
>0);
2106 case NSBIDI_KEEP_BASE_COMBINING
:
2108 * Here, too, the destination
2109 * run will have the same length as the source run,
2110 * and there is no mirroring.
2111 * We do need to keep combining characters with their base characters.
2115 /* preserve character integrity */
2117 /* i is always after the last code unit known to need to be kept in this segment */
2120 /* collect code units and modifier letters for one base character */
2122 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2123 } while(srcLength
>0 && IsBidiCategory(c
, eBidiCat_NSM
));
2125 /* copy this "user character" */
2130 } while(srcLength
>0);
2134 * With several "complicated" options set, this is the most
2135 * general and the slowest copying of an RTL run.
2136 * We will do mirroring, remove Bidi controls, and
2137 * keep combining characters with their base characters
2140 if(!(options
&NSBIDI_REMOVE_BIDI_CONTROLS
)) {
2143 /* we need to find out the destination length of the run,
2144 which will not include the Bidi control characters */
2145 int32_t length
=srcLength
;
2151 if (!IsBidiControl((uint32_t)ch
)) {
2154 } while(--length
>0);
2159 /* preserve character integrity */
2161 /* i is always after the last code unit known to need to be kept in this segment */
2164 /* collect code units for one base character */
2165 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2166 if(options
&NSBIDI_KEEP_BASE_COMBINING
) {
2167 /* collect modifier letters for this base character */
2168 while(srcLength
>0 && IsBidiCategory(c
, eBidiCat_NSM
)) {
2169 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2173 if(options
&NSBIDI_REMOVE_BIDI_CONTROLS
&& IsBidiControl(c
)) {
2174 /* do not copy this Bidi control character */
2178 /* copy this "user character" */
2180 if(options
&NSBIDI_DO_MIRRORING
) {
2181 /* mirror only the base character */
2185 UTF_APPEND_CHAR_UNSAFE(dest
, k
, c
);
2192 } while(srcLength
>0);
2194 } /* end of switch */
2198 nsresult
nsBidi::WriteReverse(const char16_t
*aSrc
, int32_t aSrcLength
, char16_t
*aDest
, uint16_t aOptions
, int32_t *aDestSize
)
2200 if( aSrc
==nullptr || aSrcLength
<0 ||
2203 return NS_ERROR_INVALID_ARG
;
2206 /* do input and output overlap? */
2207 if( aSrc
>=aDest
&& aSrc
<aDest
+aSrcLength
||
2208 aDest
>=aSrc
&& aDest
<aSrc
+aSrcLength
2210 return NS_ERROR_INVALID_ARG
;
2214 *aDestSize
= doWriteReverse(aSrc
, aSrcLength
, aDest
, aOptions
);
2218 #endif // FULL_BIDI_ENGINE