Bug 846320 - Remove SimpleTest.expectAssertions in test_seek.html. r=dbaron
[gecko.git] / layout / base / nsBidi.h
blob6e76503cf6a2803ccb48968e6561a45c3e035474
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef nsBidi_h__
8 #define nsBidi_h__
10 #include "nsCOMPtr.h"
11 #include "nsString.h"
12 #include "nsBidiUtils.h"
14 // Bidi reordering engine from ICU
16 * javadoc-style comments are intended to be transformed into HTML
17 * using DOC++ - see
18 * http://www.zib.de/Visual/software/doc++/index.html .
20 * The HTML documentation is created with
21 * doc++ -H nsIBidi.h
24 /**
25 * @mainpage BIDI algorithm for Mozilla (from ICU)
27 * <h2>BIDI algorithm for Mozilla</h2>
29 * This is an implementation of the Unicode Bidirectional algorithm.
30 * The algorithm is defined in the
31 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
32 * version 5, also described in The Unicode Standard, Version 3.0 .<p>
34 * <h3>General remarks about the API:</h3>
36 * The <quote>limit</quote> of a sequence of characters is the position just after their
37 * last character, i.e., one more than that position.<p>
39 * Some of the API functions provide access to <quote>runs</quote>.
40 * Such a <quote>run</quote> is defined as a sequence of characters
41 * that are at the same embedding level
42 * after performing the BIDI algorithm.<p>
44 * @author Markus W. Scherer. Ported to Mozilla by Simon Montagu
45 * @version 1.0
48 /**
49 * nsBidiLevel is the type of the level values in this
50 * Bidi implementation.
51 * It holds an embedding level and indicates the visual direction
52 * by its bit 0 (even/odd value).<p>
54 * It can also hold non-level values for the
55 * <code>aParaLevel</code> and <code>aEmbeddingLevels</code>
56 * arguments of <code>SetPara</code>; there:
57 * <ul>
58 * <li>bit 7 of an <code>aEmbeddingLevels[]</code>
59 * value indicates whether the using application is
60 * specifying the level of a character to <i>override</i> whatever the
61 * Bidi implementation would resolve it to.</li>
62 * <li><code>aParaLevel</code> can be set to the
63 * pseudo-level values <code>NSBIDI_DEFAULT_LTR</code>
64 * and <code>NSBIDI_DEFAULT_RTL</code>.</li></ul>
66 * @see nsIBidi::SetPara
68 * <p>The related constants are not real, valid level values.
69 * <code>NSBIDI_DEFAULT_XXX</code> can be used to specify
70 * a default for the paragraph level for
71 * when the <code>SetPara</code> function
72 * shall determine it but there is no
73 * strongly typed character in the input.<p>
75 * Note that the value for <code>NSBIDI_DEFAULT_LTR</code> is even
76 * and the one for <code>NSBIDI_DEFAULT_RTL</code> is odd,
77 * just like with normal LTR and RTL level values -
78 * these special values are designed that way. Also, the implementation
79 * assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
81 * @see NSBIDI_DEFAULT_LTR
82 * @see NSBIDI_DEFAULT_RTL
83 * @see NSBIDI_LEVEL_OVERRIDE
84 * @see NSBIDI_MAX_EXPLICIT_LEVEL
86 typedef uint8_t nsBidiLevel;
88 /** Paragraph level setting.
89 * If there is no strong character, then set the paragraph level to 0 (left-to-right).
91 #define NSBIDI_DEFAULT_LTR 0xfe
93 /** Paragraph level setting.
94 * If there is no strong character, then set the paragraph level to 1 (right-to-left).
96 #define NSBIDI_DEFAULT_RTL 0xff
98 /**
99 * Maximum explicit embedding level.
100 * (The maximum resolved level can be up to <code>NSBIDI_MAX_EXPLICIT_LEVEL+1</code>).
103 #define NSBIDI_MAX_EXPLICIT_LEVEL 61
105 /** Bit flag for level input.
106 * Overrides directional properties.
108 #define NSBIDI_LEVEL_OVERRIDE 0x80
111 * <code>nsBidiDirection</code> values indicate the text direction.
113 enum nsBidiDirection {
114 /** All left-to-right text This is a 0 value. */
115 NSBIDI_LTR,
116 /** All right-to-left text This is a 1 value. */
117 NSBIDI_RTL,
118 /** Mixed-directional text. */
119 NSBIDI_MIXED
122 typedef enum nsBidiDirection nsBidiDirection;
124 /* miscellaneous definitions ------------------------------------------------ */
125 /** option flags for WriteReverse() */
127 * option bit for WriteReverse():
128 * keep combining characters after their base characters in RTL runs
130 * @see WriteReverse
132 #define NSBIDI_KEEP_BASE_COMBINING 1
135 * option bit for WriteReverse():
136 * replace characters with the "mirrored" property in RTL runs
137 * by their mirror-image mappings
139 * @see WriteReverse
141 #define NSBIDI_DO_MIRRORING 2
144 * option bit for WriteReverse():
145 * remove Bidi control characters
147 * @see WriteReverse
149 #define NSBIDI_REMOVE_BIDI_CONTROLS 8
151 /* helper macros for each allocated array member */
152 #define GETDIRPROPSMEMORY(length) \
153 GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \
154 mMayAllocateText, (length))
156 #define GETLEVELSMEMORY(length) \
157 GetMemory((void **)&mLevelsMemory, &mLevelsSize, \
158 mMayAllocateText, (length))
160 #define GETRUNSMEMORY(length) \
161 GetMemory((void **)&mRunsMemory, &mRunsSize, \
162 mMayAllocateRuns, (length)*sizeof(Run))
164 /* additional macros used by constructor - always allow allocation */
165 #define GETINITIALDIRPROPSMEMORY(length) \
166 GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \
167 true, (length))
169 #define GETINITIALLEVELSMEMORY(length) \
170 GetMemory((void **)&mLevelsMemory, &mLevelsSize, \
171 true, (length))
173 #define GETINITIALRUNSMEMORY(length) \
174 GetMemory((void **)&mRunsMemory, &mRunsSize, \
175 true, (length)*sizeof(Run))
178 * Sometimes, bit values are more appropriate
179 * to deal with directionality properties.
180 * Abbreviations in these macro names refer to names
181 * used in the Bidi algorithm.
183 typedef uint8_t DirProp;
185 #define DIRPROP_FLAG(dir) (1UL<<(dir))
187 /* special flag for multiple runs from explicit embedding codes */
188 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
190 /* are there any characters that are LTR or RTL? */
191 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
192 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
194 /* explicit embedding codes */
195 #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
196 #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
197 #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
199 #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
200 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
202 /* paragraph and segment separators */
203 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
205 /* all types that are counted as White Space or Neutral in some steps */
206 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
207 #define MASK_N (DIRPROP_FLAG(O_N)|MASK_WS)
209 /* all types that are included in a sequence of European Terminators for (W5) */
210 #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
212 /* types that are neutrals or could becomes neutrals in (Wn) */
213 #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
216 * These types may be changed to "e",
217 * the embedding type (L or R) of the run,
218 * in the Bidi algorithm (N2)
220 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
222 /* the dirProp's L and R are defined to 0 and 1 values in nsCharType */
223 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
225 #define IS_DEFAULT_LEVEL(level) (((level)&0xfe)==0xfe)
227 /* handle surrogate pairs --------------------------------------------------- */
229 #define IS_FIRST_SURROGATE(uchar) (((uchar)&0xfc00)==0xd800)
230 #define IS_SECOND_SURROGATE(uchar) (((uchar)&0xfc00)==0xdc00)
232 /* get the UTF-32 value directly from the surrogate pseudo-characters */
233 #define SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
234 #define GET_UTF_32(first, second) (((first)<<10UL)+(second)-SURROGATE_OFFSET)
237 #define UTF_ERROR_VALUE 0xffff
238 /* definitions with forward iteration --------------------------------------- */
241 * all the macros that go forward assume that
242 * the initial offset is 0<=i<length;
243 * they update the offset
246 /* fast versions, no error-checking */
248 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c){ \
249 if((uint32_t)(c)<=0xffff) { \
250 (s)[(i)++]=(PRUnichar)(c); \
251 } else { \
252 (s)[(i)++]=(PRUnichar)((c)>>10)+0xd7c0; \
253 (s)[(i)++]=(PRUnichar)(c)&0x3ff|0xdc00; \
257 /* safe versions with error-checking and optional regularity-checking */
259 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
260 if((PRUInt32)(c)<=0xffff) { \
261 (s)[(i)++]=(PRUnichar)(c); \
262 } else if((PRUInt32)(c)<=0x10ffff) { \
263 if((i)+1<(length)) { \
264 (s)[(i)++]=(PRUnichar)((c)>>10)+0xd7c0; \
265 (s)[(i)++]=(PRUnichar)(c)&0x3ff|0xdc00; \
266 } else /* not enough space */ { \
267 (s)[(i)++]=UTF_ERROR_VALUE; \
269 } else /* c>0x10ffff, write error value */ { \
270 (s)[(i)++]=UTF_ERROR_VALUE; \
274 /* definitions with backward iteration -------------------------------------- */
277 * all the macros that go backward assume that
278 * the valid buffer range starts at offset 0
279 * and that the initial offset is 0<i<=length;
280 * they update the offset
283 /* fast versions, no error-checking */
286 * Get a single code point from an offset that points behind the last
287 * of the code units that belong to that code point.
288 * Assume 0<=i<length.
290 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
291 (c)=(s)[--(i)]; \
292 if(IS_SECOND_SURROGATE(c)) { \
293 (c)=GET_UTF_32((s)[--(i)], (c)); \
297 #define UTF16_BACK_1_UNSAFE(s, i) { \
298 if(IS_SECOND_SURROGATE((s)[--(i)])) { \
299 --(i); \
303 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
304 int32_t __N=(n); \
305 while(__N>0) { \
306 UTF16_BACK_1_UNSAFE(s, i); \
307 --__N; \
311 /* safe versions with error-checking and optional regularity-checking */
313 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
314 (c)=(s)[--(i)]; \
315 if(IS_SECOND_SURROGATE(c)) { \
316 PRUnichar __c2; \
317 if((i)>(start) && IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
318 --(i); \
319 (c)=GET_UTF_32(__c2, (c)); \
320 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \
321 } else if(strict) {\
322 /* unmatched second surrogate */ \
323 (c)=UTF_ERROR_VALUE; \
325 } else if(strict && IS_FIRST_SURROGATE(c)) { \
326 /* unmatched first surrogate */ \
327 (c)=UTF_ERROR_VALUE; \
328 /* else strict: (c)==0xfffe is caught by UTF_IS_ERROR() */ \
332 #define UTF16_BACK_1_SAFE(s, start, i) { \
333 if(IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && IS_FIRST_SURROGATE((s)[(i)-1])) { \
334 --(i); \
338 #define UTF16_BACK_N_SAFE(s, start, i, n) { \
339 int32_t __N=(n); \
340 while(__N>0 && (i)>(start)) { \
341 UTF16_BACK_1_SAFE(s, start, i); \
342 --__N; \
346 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
347 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
348 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
349 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
350 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
351 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
352 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
353 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
355 #define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, false)
356 #define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
357 #define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
358 #define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
360 /* Run structure for reordering --------------------------------------------- */
362 typedef struct Run {
363 int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
364 visualLimit; /* last visual position of the run +1 */
365 } Run;
367 /* in a Run, logicalStart will get this bit set if the run level is odd */
368 #define INDEX_ODD_BIT (1UL<<31)
370 #define MAKE_INDEX_ODD_PAIR(index, level) (index|((uint32_t)level<<31))
371 #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)level<<31))
372 #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
374 #define GET_INDEX(x) (x&~INDEX_ODD_BIT)
375 #define GET_ODD_BIT(x) ((uint32_t)x>>31)
376 #define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0)
377 #define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0)
379 typedef uint32_t Flags;
382 * This class holds information about a paragraph of text
383 * with Bidi-algorithm-related details, or about one line of
384 * such a paragraph.<p>
385 * Reordering can be done on a line, or on a paragraph which is
386 * then interpreted as one single line.<p>
388 * On construction, the class is initially empty. It is assigned
389 * the Bidi properties of a paragraph by <code>SetPara</code>
390 * or the Bidi properties of a line of a paragraph by
391 * <code>SetLine</code>.<p>
392 * A Bidi class can be reused for as long as it is not deallocated
393 * by calling its destructor.<p>
394 * <code>SetPara</code> will allocate additional memory for
395 * internal structures as necessary.
397 class nsBidi
399 public:
400 /** @brief Default constructor.
402 * The nsBidi object is initially empty. It is assigned
403 * the Bidi properties of a paragraph by <code>SetPara()</code>
404 * or the Bidi properties of a line of a paragraph by
405 * <code>GetLine()</code>.<p>
406 * This object can be reused for as long as it is not destroyed.<p>
407 * <code>SetPara()</code> will allocate additional memory for
408 * internal structures as necessary.
411 nsBidi();
413 /** @brief Destructor. */
414 virtual ~nsBidi();
418 * Perform the Unicode Bidi algorithm. It is defined in the
419 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
420 * version 5,
421 * also described in The Unicode Standard, Version 3.0 .<p>
423 * This function takes a single plain text paragraph with or without
424 * externally specified embedding levels from <quote>styled</quote> text
425 * and computes the left-right-directionality of each character.<p>
427 * If the entire paragraph consists of text of only one direction, then
428 * the function may not perform all the steps described by the algorithm,
429 * i.e., some levels may not be the same as if all steps were performed.
430 * This is not relevant for unidirectional text.<br>
431 * For example, in pure LTR text with numbers the numbers would get
432 * a resolved level of 2 higher than the surrounding text according to
433 * the algorithm. This implementation may set all resolved levels to
434 * the same value in such a case.<p>
436 * The text must be externally split into separate paragraphs (rule P1).
437 * Paragraph separators (B) should appear at most at the very end.
439 * @param aText is a pointer to the single-paragraph text that the
440 * Bidi algorithm will be performed on
441 * (step (P1) of the algorithm is performed externally).
442 * <strong>The text must be (at least) <code>aLength</code> long.</strong>
444 * @param aLength is the length of the text; if <code>aLength==-1</code> then
445 * the text must be zero-terminated.
447 * @param aParaLevel specifies the default level for the paragraph;
448 * it is typically 0 (LTR) or 1 (RTL).
449 * If the function shall determine the paragraph level from the text,
450 * then <code>aParaLevel</code> can be set to
451 * either <code>NSBIDI_DEFAULT_LTR</code>
452 * or <code>NSBIDI_DEFAULT_RTL</code>;
453 * if there is no strongly typed character, then
454 * the desired default is used (0 for LTR or 1 for RTL).
455 * Any other value between 0 and <code>NSBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
456 * with odd levels indicating RTL.
458 * @param aEmbeddingLevels (in) may be used to preset the embedding and override levels,
459 * ignoring characters like LRE and PDF in the text.
460 * A level overrides the directional property of its corresponding
461 * (same index) character if the level has the
462 * <code>NSBIDI_LEVEL_OVERRIDE</code> bit set.<p>
463 * Except for that bit, it must be
464 * <code>aParaLevel<=aEmbeddingLevels[]<=NSBIDI_MAX_EXPLICIT_LEVEL</code>.<p>
465 * <strong>Caution: </strong>A copy of this pointer, not of the levels,
466 * will be stored in the <code>nsBidi</code> object;
467 * the <code>aEmbeddingLevels</code> array must not be
468 * deallocated before the <code>nsBidi</code> object is destroyed or reused,
469 * and the <code>aEmbeddingLevels</code>
470 * should not be modified to avoid unexpected results on subsequent Bidi operations.
471 * However, the <code>SetPara</code> and
472 * <code>SetLine</code> functions may modify some or all of the levels.<p>
473 * After the <code>nsBidi</code> object is reused or destroyed, the caller
474 * must take care of the deallocation of the <code>aEmbeddingLevels</code> array.<p>
475 * <strong>The <code>aEmbeddingLevels</code> array must be
476 * at least <code>aLength</code> long.</strong>
478 nsresult SetPara(const PRUnichar *aText, int32_t aLength, nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels);
481 * Get the directionality of the text.
483 * @param aDirection receives a <code>NSBIDI_XXX</code> value that indicates if the entire text
484 * represented by this object is unidirectional,
485 * and which direction, or if it is mixed-directional.
487 * @see nsBidiDirection
489 nsresult GetDirection(nsBidiDirection* aDirection);
492 * Get the paragraph level of the text.
494 * @param aParaLevel receives a <code>NSBIDI_XXX</code> value indicating the paragraph level
496 * @see nsBidiLevel
498 nsresult GetParaLevel(nsBidiLevel* aParaLevel);
500 #ifdef FULL_BIDI_ENGINE
502 * <code>SetLine</code> sets an <code>nsBidi</code> to
503 * contain the reordering information, especially the resolved levels,
504 * for all the characters in a line of text. This line of text is
505 * specified by referring to an <code>nsBidi</code> object representing
506 * this information for a paragraph of text, and by specifying
507 * a range of indexes in this paragraph.<p>
508 * In the new line object, the indexes will range from 0 to <code>aLimit-aStart</code>.<p>
510 * This is used after calling <code>SetPara</code>
511 * for a paragraph, and after line-breaking on that paragraph.
512 * It is not necessary if the paragraph is treated as a single line.<p>
514 * After line-breaking, rules (L1) and (L2) for the treatment of
515 * trailing WS and for reordering are performed on
516 * an <code>nsBidi</code> object that represents a line.<p>
518 * <strong>Important:</strong> the line <code>nsBidi</code> object shares data with
519 * <code>aParaBidi</code>.
520 * You must destroy or reuse this object before <code>aParaBidi</code>.
521 * In other words, you must destroy or reuse the <code>nsBidi</code> object for a line
522 * before the object for its parent paragraph.
524 * @param aParaBidi is the parent paragraph object.
526 * @param aStart is the line's first index into the paragraph text.
528 * @param aLimit is just behind the line's last index into the paragraph text
529 * (its last index +1).<br>
530 * It must be <code>0<=aStart<=aLimit<=</code>paragraph length.
532 * @see SetPara
534 nsresult SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit);
537 * Get the length of the text.
539 * @param aLength receives the length of the text that the nsBidi object was created for.
541 nsresult GetLength(int32_t* aLength);
544 * Get the level for one character.
546 * @param aCharIndex the index of a character.
548 * @param aLevel receives the level for the character at aCharIndex.
550 * @see nsBidiLevel
552 nsresult GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel);
555 * Get an array of levels for each character.<p>
557 * Note that this function may allocate memory under some
558 * circumstances, unlike <code>GetLevelAt</code>.
560 * @param aLevels receives a pointer to the levels array for the text,
561 * or <code>NULL</code> if an error occurs.
563 * @see nsBidiLevel
565 nsresult GetLevels(nsBidiLevel** aLevels);
566 #endif // FULL_BIDI_ENGINE
568 * Get the bidirectional type for one character.
570 * @param aCharIndex the index of a character.
572 * @param aType receives the bidirectional type of the character at aCharIndex.
574 nsresult GetCharTypeAt(int32_t aCharIndex, nsCharType* aType);
577 * Get a logical run.
578 * This function returns information about a run and is used
579 * to retrieve runs in logical order.<p>
580 * This is especially useful for line-breaking on a paragraph.
582 * @param aLogicalStart is the first character of the run.
584 * @param aLogicalLimit will receive the limit of the run.
585 * The l-value that you point to here may be the
586 * same expression (variable) as the one for
587 * <code>aLogicalStart</code>.
588 * This pointer can be <code>NULL</code> if this
589 * value is not necessary.
591 * @param aLevel will receive the level of the run.
592 * This pointer can be <code>NULL</code> if this
593 * value is not necessary.
595 nsresult GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimit, nsBidiLevel* aLevel);
598 * Get the number of runs.
599 * This function may invoke the actual reordering on the
600 * <code>nsBidi</code> object, after <code>SetPara</code>
601 * may have resolved only the levels of the text. Therefore,
602 * <code>CountRuns</code> may have to allocate memory,
603 * and may fail doing so.
605 * @param aRunCount will receive the number of runs.
607 nsresult CountRuns(int32_t* aRunCount);
610 * Get one run's logical start, length, and directionality,
611 * which can be 0 for LTR or 1 for RTL.
612 * In an RTL run, the character at the logical start is
613 * visually on the right of the displayed run.
614 * The length is the number of characters in the run.<p>
615 * <code>CountRuns</code> should be called
616 * before the runs are retrieved.
618 * @param aRunIndex is the number of the run in visual order, in the
619 * range <code>[0..CountRuns-1]</code>.
621 * @param aLogicalStart is the first logical character index in the text.
622 * The pointer may be <code>NULL</code> if this index is not needed.
624 * @param aLength is the number of characters (at least one) in the run.
625 * The pointer may be <code>NULL</code> if this is not needed.
627 * @param aDirection will receive the directionality of the run,
628 * <code>NSBIDI_LTR==0</code> or <code>NSBIDI_RTL==1</code>,
629 * never <code>NSBIDI_MIXED</code>.
631 * @see CountRuns<p>
633 * Example:
634 * @code
635 * int32_t i, count, logicalStart, visualIndex=0, length;
636 * nsBidiDirection dir;
637 * pBidi->CountRuns(&count);
638 * for(i=0; i<count; ++i) {
639 * pBidi->GetVisualRun(i, &logicalStart, &length, &dir);
640 * if(NSBIDI_LTR==dir) {
641 * do { // LTR
642 * show_char(text[logicalStart++], visualIndex++);
643 * } while(--length>0);
644 * } else {
645 * logicalStart+=length; // logicalLimit
646 * do { // RTL
647 * show_char(text[--logicalStart], visualIndex++);
648 * } while(--length>0);
651 * @endcode
653 * Note that in right-to-left runs, code like this places
654 * modifier letters before base characters and second surrogates
655 * before first ones.
657 nsresult GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, int32_t* aLength, nsBidiDirection* aDirection);
659 #ifdef FULL_BIDI_ENGINE
661 * Get the visual position from a logical text position.
662 * If such a mapping is used many times on the same
663 * <code>nsBidi</code> object, then calling
664 * <code>GetLogicalMap</code> is more efficient.<p>
666 * Note that in right-to-left runs, this mapping places
667 * modifier letters before base characters and second surrogates
668 * before first ones.
670 * @param aLogicalIndex is the index of a character in the text.
672 * @param aVisualIndex will receive the visual position of this character.
674 * @see GetLogicalMap
675 * @see GetLogicalIndex
677 nsresult GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex);
680 * Get the logical text position from a visual position.
681 * If such a mapping is used many times on the same
682 * <code>nsBidi</code> object, then calling
683 * <code>GetVisualMap</code> is more efficient.<p>
685 * This is the inverse function to <code>GetVisualIndex</code>.
687 * @param aVisualIndex is the visual position of a character.
689 * @param aLogicalIndex will receive the index of this character in the text.
691 * @see GetVisualMap
692 * @see GetVisualIndex
694 nsresult GetLogicalIndex(int32_t aVisualIndex, int32_t* aLogicalIndex);
697 * Get a logical-to-visual index map (array) for the characters in the nsBidi
698 * (paragraph or line) object.
700 * @param aIndexMap is a pointer to an array of <code>GetLength</code>
701 * indexes which will reflect the reordering of the characters.
702 * The array does not need to be initialized.<p>
703 * The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.<p>
705 * @see GetVisualMap
706 * @see GetVisualIndex
708 nsresult GetLogicalMap(int32_t *aIndexMap);
711 * Get a visual-to-logical index map (array) for the characters in the nsBidi
712 * (paragraph or line) object.
714 * @param aIndexMap is a pointer to an array of <code>GetLength</code>
715 * indexes which will reflect the reordering of the characters.
716 * The array does not need to be initialized.<p>
717 * The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.<p>
719 * @see GetLogicalMap
720 * @see GetLogicalIndex
722 nsresult GetVisualMap(int32_t *aIndexMap);
725 * This is a convenience function that does not use a nsBidi object.
726 * It is intended to be used for when an application has determined the levels
727 * of objects (character sequences) and just needs to have them reordered (L2).
728 * This is equivalent to using <code>GetLogicalMap</code> on a
729 * <code>nsBidi</code> object.
731 * @param aLevels is an array with <code>aLength</code> levels that have been determined by
732 * the application.
734 * @param aLength is the number of levels in the array, or, semantically,
735 * the number of objects to be reordered.
736 * It must be <code>aLength>0</code>.
738 * @param aIndexMap is a pointer to an array of <code>aLength</code>
739 * indexes which will reflect the reordering of the characters.
740 * The array does not need to be initialized.<p>
741 * The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.
743 static nsresult ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap);
744 #endif // FULL_BIDI_ENGINE
746 * This is a convenience function that does not use a nsBidi object.
747 * It is intended to be used for when an application has determined the levels
748 * of objects (character sequences) and just needs to have them reordered (L2).
749 * This is equivalent to using <code>GetVisualMap</code> on a
750 * <code>nsBidi</code> object.
752 * @param aLevels is an array with <code>aLength</code> levels that have been determined by
753 * the application.
755 * @param aLength is the number of levels in the array, or, semantically,
756 * the number of objects to be reordered.
757 * It must be <code>aLength>0</code>.
759 * @param aIndexMap is a pointer to an array of <code>aLength</code>
760 * indexes which will reflect the reordering of the characters.
761 * The array does not need to be initialized.<p>
762 * The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.
764 static nsresult ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap);
766 #ifdef FULL_BIDI_ENGINE
768 * Invert an index map.
769 * The one-to-one index mapping of the first map is inverted and written to
770 * the second one.
772 * @param aSrcMap is an array with <code>aLength</code> indexes
773 * which define the original mapping.
775 * @param aDestMap is an array with <code>aLength</code> indexes
776 * which will be filled with the inverse mapping.
778 * @param aLength is the length of each array.
780 nsresult InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength);
781 #endif // FULL_BIDI_ENGINE
783 * Reverse a Right-To-Left run of Unicode text.
785 * This function preserves the integrity of characters with multiple
786 * code units and (optionally) modifier letters.
787 * Characters can be replaced by mirror-image characters
788 * in the destination buffer. Note that "real" mirroring has
789 * to be done in a rendering engine by glyph selection
790 * and that for many "mirrored" characters there are no
791 * Unicode characters as mirror-image equivalents.
792 * There are also options to insert or remove Bidi control
793 * characters; see the description of the <code>aDestSize</code>
794 * and <code>aOptions</code> parameters and of the option bit flags.
796 * Since no Bidi controls are inserted here, this function will never
797 * write more than <code>aSrcLength</code> characters to <code>aDest</code>.
799 * @param aSrc A pointer to the RTL run text.
801 * @param aSrcLength The length of the RTL run.
802 * If the <code>NSBIDI_REMOVE_BIDI_CONTROLS</code> option
803 * is set, then the destination length may be less than
804 * <code>aSrcLength</code>.
805 * If this option is not set, then the destination length
806 * will be exactly <code>aSrcLength</code>.
808 * @param aDest A pointer to where the reordered text is to be copied.
809 * <code>aSrc[aSrcLength]</code> and <code>aDest[aSrcLength]</code>
810 * must not overlap.
812 * @param aOptions A bit set of options for the reordering that control
813 * how the reordered text is written.
815 * @param aDestSize will receive the number of characters that were written to <code>aDest</code>.
817 nsresult WriteReverse(const PRUnichar *aSrc, int32_t aSrcLength, PRUnichar *aDest, uint16_t aOptions, int32_t *aDestSize);
819 protected:
820 friend class nsBidiPresUtils;
822 /** length of the current text */
823 int32_t mLength;
825 /** memory sizes in bytes */
826 size_t mDirPropsSize, mLevelsSize, mRunsSize;
828 /** allocated memory */
829 DirProp* mDirPropsMemory;
830 nsBidiLevel* mLevelsMemory;
831 Run* mRunsMemory;
833 /** indicators for whether memory may be allocated after construction */
834 bool mMayAllocateText, mMayAllocateRuns;
836 const DirProp* mDirProps;
837 nsBidiLevel* mLevels;
839 /** the paragraph level */
840 nsBidiLevel mParaLevel;
842 /** flags is a bit set for which directional properties are in the text */
843 Flags mFlags;
845 /** the overall paragraph or line directionality - see nsBidiDirection */
846 nsBidiDirection mDirection;
848 /** characters after trailingWSStart are WS and are */
849 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
850 int32_t mTrailingWSStart;
852 /** fields for line reordering */
853 int32_t mRunCount; /* ==-1: runs not set up yet */
854 Run* mRuns;
856 /** for non-mixed text, we only need a tiny array of runs (no malloc()) */
857 Run mSimpleRuns[1];
859 private:
861 void Init();
863 bool GetMemory(void **aMemory, size_t* aSize, bool aMayAllocate, size_t aSizeNeeded);
865 void Free();
867 void GetDirProps(const PRUnichar *aText);
869 nsBidiDirection ResolveExplicitLevels();
871 nsresult CheckExplicitLevels(nsBidiDirection *aDirection);
873 nsBidiDirection DirectionFromFlags(Flags aFlags);
875 void ResolveImplicitLevels(int32_t aStart, int32_t aLimit, DirProp aSOR, DirProp aEOR);
877 void AdjustWSLevels();
879 void SetTrailingWSStart();
881 bool GetRuns();
883 void GetSingleRun(nsBidiLevel aLevel);
885 void ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel);
887 static bool PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap, nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel);
889 int32_t doWriteReverse(const PRUnichar *src, int32_t srcLength,
890 PRUnichar *dest, uint16_t options);
894 #endif // _nsBidi_h_