uiautomationcore: Put general purpose helper functions into separate source file.
[wine.git] / dlls / dwrite / bidi.c
blob796f276e2e9c0368daff25bfb344fa0bcd7376cd
1 /*
2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wine/debug.h"
50 #include "dwrite_private.h"
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
54 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
55 extern const unsigned short bidi_direction_table[] DECLSPEC_HIDDEN;
57 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
58 #define MAX_DEPTH 125
60 #define odd(x) ((x) & 1)
62 /*------------------------------------------------------------------------
63 Bidirectional Character Types
65 as defined by the Unicode Bidirectional Algorithm Table 3-7.
67 Note:
69 The list of bidirectional character types here is not grouped the
70 same way as the table 3-7, since the numeric values for the types
71 are chosen to keep the state and action tables compact.
72 ------------------------------------------------------------------------*/
73 enum directions
75 /* input types */
76 /* ON MUST be zero, code relies on ON = NI = 0 */
77 ON = 0, /* Other Neutral */
78 L, /* Left Letter */
79 R, /* Right Letter */
80 AN, /* Arabic Number */
81 EN, /* European Number */
82 AL, /* Arabic Letter (Right-to-left) */
83 NSM, /* Non-spacing Mark */
84 CS, /* Common Separator */
85 ES, /* European Separator */
86 ET, /* European Terminator (post/prefix e.g. $ and %) */
88 /* resolved types */
89 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
91 /* input types, */
92 S, /* Segment Separator (TAB) // used only in L1 */
93 WS, /* White space // used only in L1 */
94 B, /* Paragraph Separator (aka as PS) */
96 /* types for explicit controls */
97 RLO, /* these are used only in X1-X9 */
98 RLE,
99 LRO,
100 LRE,
101 PDF,
103 LRI, /* Isolate formatting characters new with 6.3 */
104 RLI,
105 FSI,
106 PDI,
108 /* resolved types, also resolved directions */
109 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
112 static const char debug_type[][4] =
114 "ON", /* Other Neutral */
115 "L", /* Left Letter */
116 "R", /* Right Letter */
117 "AN", /* Arabic Number */
118 "EN", /* European Number */
119 "AL", /* Arabic Letter (Right-to-left) */
120 "NSM", /* Non-spacing Mark */
121 "CS", /* Common Separator */
122 "ES", /* European Separator */
123 "ET", /* European Terminator (post/prefix e.g. $ and %) */
124 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
125 "S", /* Segment Separator (TAB) used only in L1 */
126 "WS", /* White space used only in L1 */
127 "B", /* Paragraph Separator (aka as PS) */
128 "RLO", /* these are used only in X1-X9 */
129 "RLE",
130 "LRO",
131 "LRE",
132 "PDF",
133 "LRI", /* Isolate formatting characters new with 6.3 */
134 "RLI",
135 "FSI",
136 "PDI",
139 static inline void bidi_dump_types(const char* header, const struct bidi_char *chars, UINT32 start, UINT32 end)
141 int i, len = 0;
142 TRACE("%s:", header);
143 for (i = start; i < end && len < 200; i++) {
144 TRACE(" %s", debug_type[chars[i].bidi_class]);
145 len += strlen(debug_type[chars[i].bidi_class]) + 1;
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
152 /* RESOLVE EXPLICIT */
154 static inline UINT8 get_greater_even_level(UINT8 level)
156 return odd(level) ? level + 1 : level + 2;
159 static inline UINT8 get_greater_odd_level(UINT8 level)
161 return odd(level) ? level + 2 : level + 1;
164 static inline UINT8 get_embedding_direction(UINT8 level)
166 return odd(level) ? R : L;
169 /*------------------------------------------------------------------------
170 Function: bidi_resolve_explicit
172 Recursively resolves explicit embedding levels and overrides.
173 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
175 Note: The function uses two simple counters to keep track of
176 matching explicit codes and PDF. Use the default argument for
177 the outermost call. The nesting counter counts the recursion
178 depth and not the embedding level.
179 ------------------------------------------------------------------------*/
180 typedef struct tagStackItem
182 UINT8 level;
183 UINT8 override;
184 BOOL isolate;
185 } StackItem;
187 #define push_stack(l,o,i) \
188 do { stack_top--; \
189 stack[stack_top].level = l; \
190 stack[stack_top].override = o; \
191 stack[stack_top].isolate = i;} while(0)
193 #define pop_stack() do { stack_top++; } while(0)
195 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
197 static void bidi_resolve_explicit(struct bidi_char *chars, unsigned int count, UINT8 baselevel)
199 /* X1 */
200 int overflow_isolate_count = 0;
201 int overflow_embedding_count = 0;
202 int valid_isolate_count = 0;
203 unsigned int i;
205 StackItem stack[MAX_DEPTH+2];
206 int stack_top = MAX_DEPTH+1;
208 stack[stack_top].level = baselevel;
209 stack[stack_top].override = NI;
210 stack[stack_top].isolate = FALSE;
212 for (i = 0; i < count; ++i)
214 struct bidi_char *c = &chars[i];
215 UINT8 least_odd, least_even;
217 switch (c->bidi_class)
220 /* X2 */
221 case RLE:
222 least_odd = get_greater_odd_level(stack[stack_top].level);
223 c->resolved = valid_level(least_odd) ? least_odd : stack[stack_top].level;
224 if (valid_level(least_odd))
225 push_stack(least_odd, NI, FALSE);
226 else if (overflow_isolate_count == 0)
227 overflow_embedding_count++;
228 break;
230 /* X3 */
231 case LRE:
232 least_even = get_greater_even_level(stack[stack_top].level);
233 c->resolved = valid_level(least_even) ? least_even : stack[stack_top].level;
234 if (valid_level(least_even))
235 push_stack(least_even, NI, FALSE);
236 else if (overflow_isolate_count == 0)
237 overflow_embedding_count++;
238 break;
240 /* X4 */
241 case RLO:
242 least_odd = get_greater_odd_level(stack[stack_top].level);
243 c->resolved = stack[stack_top].level;
244 if (valid_level(least_odd))
245 push_stack(least_odd, R, FALSE);
246 else if (overflow_isolate_count == 0)
247 overflow_embedding_count++;
248 break;
250 /* X5 */
251 case LRO:
252 least_even = get_greater_even_level(stack[stack_top].level);
253 c->resolved = stack[stack_top].level;
254 if (valid_level(least_even))
255 push_stack(least_even, L, FALSE);
256 else if (overflow_isolate_count == 0)
257 overflow_embedding_count++;
258 break;
260 /* X5a */
261 case RLI:
262 least_odd = get_greater_odd_level(stack[stack_top].level);
263 c->resolved = stack[stack_top].level;
264 if (valid_level(least_odd))
266 valid_isolate_count++;
267 push_stack(least_odd, NI, TRUE);
269 else
270 overflow_isolate_count++;
271 break;
273 /* X5b */
274 case LRI:
275 least_even = get_greater_even_level(stack[stack_top].level);
276 c->resolved = stack[stack_top].level;
277 if (valid_level(least_even))
279 valid_isolate_count++;
280 push_stack(least_even, NI, TRUE);
282 else
283 overflow_isolate_count++;
284 break;
286 /* X5c */
287 case FSI:
289 UINT8 new_level = 0;
290 int skipping = 0;
291 int j;
293 c->resolved = stack[stack_top].level;
294 for (j = i+1; j < count; j++)
296 const struct bidi_char *p = &chars[j];
298 if (p->bidi_class == LRI || p->bidi_class == RLI || p->bidi_class == FSI)
300 skipping++;
301 continue;
303 else if (p->bidi_class == PDI)
305 if (skipping)
306 skipping --;
307 else
308 break;
309 continue;
312 if (skipping) continue;
314 if (p->bidi_class == L)
316 new_level = 0;
317 break;
319 else if (p->bidi_class == R || p->bidi_class == AL)
321 new_level = 1;
322 break;
325 if (odd(new_level))
327 least_odd = get_greater_odd_level(stack[stack_top].level);
328 if (valid_level(least_odd))
330 valid_isolate_count++;
331 push_stack(least_odd, NI, TRUE);
333 else
334 overflow_isolate_count++;
336 else
338 least_even = get_greater_even_level(stack[stack_top].level);
339 if (valid_level(least_even))
341 valid_isolate_count++;
342 push_stack(least_even, NI, TRUE);
344 else
345 overflow_isolate_count++;
347 break;
350 /* X6 */
351 case ON:
352 case L:
353 case R:
354 case AN:
355 case EN:
356 case AL:
357 case NSM:
358 case CS:
359 case ES:
360 case ET:
361 case S:
362 case WS:
363 c->resolved = stack[stack_top].level;
364 if (stack[stack_top].override != NI)
365 c->resolved = stack[stack_top].override;
366 break;
368 /* X6a */
369 case PDI:
370 if (overflow_isolate_count) overflow_isolate_count--;
371 else if (!valid_isolate_count) {/* do nothing */}
372 else
374 overflow_embedding_count = 0;
375 while (!stack[stack_top].isolate) pop_stack();
376 pop_stack();
377 valid_isolate_count--;
379 c->resolved = stack[stack_top].level;
380 break;
382 /* X7 */
383 case PDF:
384 c->resolved = stack[stack_top].level;
385 if (overflow_isolate_count) {/* do nothing */}
386 else if (overflow_embedding_count) overflow_embedding_count--;
387 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
388 pop_stack();
389 break;
391 /* X8 */
392 default:
393 c->resolved = baselevel;
394 break;
397 c->explicit = c->resolved;
400 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
401 for (i = 0; i < count; ++i)
403 switch (chars[i].bidi_class)
405 case RLE:
406 case LRE:
407 case RLO:
408 case LRO:
409 case PDF:
410 chars[i].bidi_class = BN;
411 break;
412 default:
418 static inline int get_prev_valid_char_index(const struct bidi_char *chars, int index, int back_fence)
420 if (index == -1 || index == back_fence) return index;
421 index--;
422 while (index > back_fence && chars[index].bidi_class == BN) index--;
423 return index;
426 static inline int get_next_valid_char_index(const struct bidi_char *chars, int index, int front_fence)
428 if (index == front_fence) return index;
429 index++;
430 while (index < front_fence && chars[index].bidi_class == BN) index++;
431 return index;
434 typedef struct tagRun
436 int start;
437 int end;
438 UINT8 e;
439 } Run;
441 typedef struct tagRunChar
443 WCHAR ch;
444 UINT8 *class;
445 } RunChar;
447 typedef struct tagIsolatedRun
449 struct list entry;
450 int length;
451 UINT8 sos;
452 UINT8 eos;
453 UINT8 e;
455 RunChar item[1];
456 } IsolatedRun;
458 static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
460 if (index >= (run->length-1)) return -1;
461 index++;
462 while (index < run->length && *run->item[index].class == BN) index++;
463 if (index == run->length) return -1;
464 return index;
467 static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
469 if (index <= 0) return -1;
470 index--;
471 while (index > -1 && *run->item[index].class == BN) index--;
472 return index;
475 static inline void iso_dump_types(const char* header, IsolatedRun *run)
477 int i, len = 0;
478 TRACE("%s:",header);
479 TRACE("[ ");
480 for (i = 0; i < run->length && len < 200; i++) {
481 TRACE(" %s", debug_type[*run->item[i].class]);
482 len += strlen(debug_type[*run->item[i].class])+1;
484 if (i != run->length)
485 TRACE("...");
486 TRACE(" ]\n");
489 /*------------------------------------------------------------------------
490 Function: bidi_resolve_weak
492 Resolves the directionality of numeric and other weak character types
494 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
496 Input: Array of embedding levels
497 Character count
499 In/Out: Array of directional classes
501 Note: On input only these directional classes are expected
502 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
503 ------------------------------------------------------------------------*/
504 static BOOL bidi_is_isolate(UINT8 class)
506 return class == LRI || class == RLI || class == FSI || class == PDI;
509 static void bidi_resolve_weak(IsolatedRun *iso_run)
511 int i;
513 /* W1 */
514 for (i=0; i < iso_run->length; i++) {
515 if (*iso_run->item[i].class == NSM) {
516 int j = get_prev_valid_char_from_run(iso_run, i);
517 if (j == -1)
518 *iso_run->item[i].class = iso_run->sos;
519 else if (bidi_is_isolate(*iso_run->item[j].class))
520 *iso_run->item[i].class = ON;
521 else
522 *iso_run->item[i].class = *iso_run->item[j].class;
526 /* W2 */
527 for (i = 0; i < iso_run->length; i++) {
528 if (*iso_run->item[i].class == EN) {
529 int j = get_prev_valid_char_from_run(iso_run, i);
530 while (j > -1) {
531 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
532 if (*iso_run->item[j].class == AL)
533 *iso_run->item[i].class = AN;
534 break;
536 j = get_prev_valid_char_from_run(iso_run, j);
541 /* W3 */
542 for (i = 0; i < iso_run->length; i++) {
543 if (*iso_run->item[i].class == AL)
544 *iso_run->item[i].class = R;
547 /* W4 */
548 for (i = 0; i < iso_run->length; i++) {
549 if (*iso_run->item[i].class == ES) {
550 int b = get_prev_valid_char_from_run(iso_run, i);
551 int f = get_next_valid_char_from_run(iso_run, i);
553 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
554 *iso_run->item[i].class = EN;
556 else if (*iso_run->item[i].class == CS) {
557 int b = get_prev_valid_char_from_run(iso_run, i);
558 int f = get_next_valid_char_from_run(iso_run, i);
560 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
561 *iso_run->item[i].class = EN;
562 else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
563 *iso_run->item[i].class = AN;
567 /* W5 */
568 for (i = 0; i < iso_run->length; i++) {
569 if (*iso_run->item[i].class == ET) {
570 int j;
571 for (j = i-1 ; j > -1; j--) {
572 if (*iso_run->item[j].class == BN) continue;
573 if (*iso_run->item[j].class == ET) continue;
574 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
575 else break;
577 if (*iso_run->item[i].class == ET) {
578 for (j = i+1; j < iso_run->length; j++) {
579 if (*iso_run->item[j].class == BN) continue;
580 if (*iso_run->item[j].class == ET) continue;
581 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
582 else break;
588 /* W6 */
589 for (i = 0; i < iso_run->length; i++) {
590 if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
592 int b = i-1;
593 int f = i+1;
594 if (b > -1 && *iso_run->item[b].class == BN)
595 *iso_run->item[b].class = ON;
596 if (f < iso_run->length && *iso_run->item[f].class == BN)
597 *iso_run->item[f].class = ON;
599 *iso_run->item[i].class = ON;
603 /* W7 */
604 for (i = 0; i < iso_run->length; i++) {
605 if (*iso_run->item[i].class == EN) {
606 int j;
607 for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
608 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
609 if (*iso_run->item[j].class == L)
610 *iso_run->item[i].class = L;
611 break;
613 if (iso_run->sos == L && j == -1)
614 *iso_run->item[i].class = L;
619 typedef struct tagBracketPair
621 int start;
622 int end;
623 } BracketPair;
625 static int __cdecl bracketpair_compr(const void *a, const void* b)
627 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
630 static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
632 WCHAR *open_stack;
633 int *stack_index;
634 int stack_top = iso_run->length;
635 BracketPair *out;
636 int pair_count = 0;
637 int i;
639 open_stack = malloc(sizeof(WCHAR) * iso_run->length);
640 stack_index = malloc(sizeof(int) * iso_run->length);
641 out = malloc(sizeof(BracketPair) * iso_run->length);
643 if (!open_stack || !stack_index || !out) {
644 free(open_stack);
645 free(stack_index);
646 free(out);
647 return NULL;
650 out[0].start = -1;
652 for (i = 0; i < iso_run->length; i++) {
653 unsigned short ubv = get_table_entry_16(bidi_bracket_table, iso_run->item[i].ch);
654 if (ubv)
656 if ((ubv >> 8) == 0) {
657 stack_top--;
658 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
659 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
660 if (open_stack[stack_top] == 0x232A)
661 open_stack[stack_top] = 0x3009;
662 stack_index[stack_top] = i;
664 else if ((ubv >> 8) == 1) {
665 int j;
667 if (stack_top == iso_run->length) continue;
668 for (j = stack_top; j < iso_run->length; j++) {
669 WCHAR c = iso_run->item[i].ch;
670 if (c == 0x232A) c = 0x3009;
671 if (c == open_stack[j]) {
672 out[pair_count].start = stack_index[j];
673 out[pair_count].end = i;
674 pair_count++;
675 out[pair_count].start = -1;
676 stack_top = j+1;
677 break;
683 if (pair_count == 0)
685 free(out);
686 out = NULL;
688 else if (pair_count > 1)
689 qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
691 free(open_stack);
692 free(stack_index);
693 return out;
696 static inline UINT8 get_rule_N0_class(UINT8 class)
698 return (class == AN || class == EN) ? R : class;
701 /*------------------------------------------------------------------------
702 Function: bidi_resolve_neutrals
704 Resolves the directionality of neutral character types.
706 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
708 Input: Array of embedding levels
709 Character count
710 Baselevel
712 In/Out: Array of directional classes
714 Note: On input only these directional classes are expected
715 R, L, NI, AN, EN and BN
717 W8 resolves a number of ENs to L
718 ------------------------------------------------------------------------*/
719 static void bidi_resolve_neutrals(IsolatedRun *run)
721 BracketPair *pairs;
722 int i;
724 /* Translate isolates into NI */
725 for (i = 0; i < run->length; i++) {
726 switch (*run->item[i].class) {
727 case B:
728 case S:
729 case WS:
730 case FSI:
731 case LRI:
732 case RLI:
733 case PDI: *run->item[i].class = NI;
736 /* "Only NI, L, R, AN, EN and BN are allowed" */
737 ASSERT(*run->item[i].class <= EN || *run->item[i].class == BN);
740 /* N0: Skipping bracketed pairs for now */
741 pairs = bidi_compute_bracket_pairs(run);
742 if (pairs) {
743 BracketPair *p = pairs;
744 int i = 0;
745 while (p->start >= 0) {
746 UINT8 e = get_embedding_direction(run->e);
747 UINT8 o = get_embedding_direction(run->e + 1);
748 BOOL flag_o = FALSE;
749 int j;
751 TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
753 /* N0.b */
754 for (j = p->start+1; j < p->end; j++) {
755 if (get_rule_N0_class(*run->item[j].class) == e) {
756 *run->item[p->start].class = e;
757 *run->item[p->end].class = e;
758 break;
760 else if (get_rule_N0_class(*run->item[j].class) == o)
761 flag_o = TRUE;
763 /* N0.c */
764 if (j == p->end && flag_o) {
765 for (j = p->start; j >= 0; j--) {
766 if (get_rule_N0_class(*run->item[j].class) == o) {
767 *run->item[p->start].class = o;
768 *run->item[p->end].class = o;
769 break;
771 else if (get_rule_N0_class(*run->item[j].class) == e) {
772 *run->item[p->start].class = e;
773 *run->item[p->end].class = e;
774 break;
777 if (j < 0) {
778 *run->item[p->start].class = run->sos;
779 *run->item[p->end].class = run->sos;
783 i++;
784 p = &pairs[i];
786 free(pairs);
789 /* N1 */
790 for (i = 0; i < run->length; i++) {
791 UINT8 l, r;
793 if (*run->item[i].class == NI) {
794 int b = get_prev_valid_char_from_run(run, i);
795 int j;
797 if (b == -1) {
798 l = run->sos;
799 b = 0;
801 else {
802 if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
803 l = R;
804 else if (*run->item[b].class == L)
805 l = L;
806 else /* No string type */
807 continue;
809 j = get_next_valid_char_from_run(run, i);
810 while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
811 if (j == -1) {
812 r = run->eos;
813 j = run->length;
815 else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
816 r = R;
817 else if (*run->item[j].class == L)
818 r = L;
819 else /* No string type */
820 continue;
822 if (r == l) {
823 for (b = i; b < j && b < run->length; b++)
824 *run->item[b].class = r;
829 /* N2 */
830 for (i = 0; i < run->length; i++) {
831 if (*run->item[i].class == NI) {
832 int b = i-1;
833 int f = i+1;
835 *run->item[i].class = get_embedding_direction(run->e);
836 if (b > -1 && *run->item[b].class == BN)
837 *run->item[b].class = get_embedding_direction(run->e);
838 if (f < run->length && *run->item[f].class == BN)
839 *run->item[f].class = get_embedding_direction(run->e);
844 /*------------------------------------------------------------------------
845 Function: bidi_resolve_implicit
847 Recursively resolves implicit embedding levels.
848 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
850 Note: levels may exceed 15 on output.
851 Accepted subset of direction classes
852 R, L, AN, EN
853 ------------------------------------------------------------------------*/
854 static void bidi_resolve_implicit(struct bidi_char *chars, unsigned int count)
856 unsigned int i;
858 /* I1/2 */
859 for (i = 0; i < count; ++i)
861 struct bidi_char *c = &chars[i];
863 if (c->bidi_class == BN)
864 continue;
866 ASSERT(c->bidi_class != ON); /* "No Neutrals allowed to survive here." */
867 ASSERT(c->bidi_class <= EN); /* "Out of range." */
869 if (odd(c->resolved) && (c->bidi_class == L || c->bidi_class == EN || c->bidi_class == AN))
870 c->resolved++;
871 else if (!odd(c->resolved) && c->bidi_class == R)
872 c->resolved++;
873 else if (!odd(c->resolved) && (c->bidi_class == EN || c->bidi_class == AN))
874 c->resolved += 2;
878 static inline BOOL is_rule_L1_reset_class(UINT8 class)
880 switch (class) {
881 case WS:
882 case FSI:
883 case LRI:
884 case RLI:
885 case PDI:
886 case LRE:
887 case RLE:
888 case LRO:
889 case RLO:
890 case PDF:
891 case BN:
892 return TRUE;
893 default:
894 return FALSE;
898 static void bidi_resolve_resolved(struct bidi_char *chars, unsigned int count, UINT8 baselevel)
900 int i, sos = 0, eos = count - 1;
902 /* L1 */
903 for (i = sos; i <= eos; i++)
905 switch (chars[i].nominal_bidi_class)
907 case B:
908 case S:
910 int j = i - 1;
911 while (i > sos && j >= sos && is_rule_L1_reset_class(chars[j].nominal_bidi_class))
912 chars[j--].resolved = baselevel;
913 chars[i].resolved = baselevel;
915 break;
916 case LRE: case RLE: case LRO: case RLO: case PDF: case BN:
917 chars[i].resolved = i ? chars[i - 1].resolved : baselevel;
918 break;
919 default:
923 if (i == eos && is_rule_L1_reset_class(chars[i].nominal_bidi_class))
925 int j = i;
926 while (j >= sos && is_rule_L1_reset_class(chars[j].nominal_bidi_class))
927 chars[j--].resolved = baselevel;
932 static HRESULT bidi_compute_isolating_runs_set(struct bidi_char *chars, unsigned int count, UINT8 baselevel, struct list *set)
934 int run_start, run_end, i;
935 int run_count = 0;
936 HRESULT hr = S_OK;
937 Run *runs;
939 if (!(runs = calloc(count, sizeof(*runs))))
940 return E_OUTOFMEMORY;
942 list_init(set);
944 /* Build Runs */
945 run_start = 0;
946 while (run_start < count)
948 run_end = get_next_valid_char_index(chars, run_start, count);
949 while (run_end < count && chars[run_end].resolved == chars[run_start].resolved)
950 run_end = get_next_valid_char_index(chars, run_end, count);
951 run_end--;
952 runs[run_count].start = run_start;
953 runs[run_count].end = run_end;
954 runs[run_count].e = chars[run_start].resolved;
955 run_start = get_next_valid_char_index(chars, run_end, count);
956 run_count++;
959 /* Build Isolating Runs */
960 i = 0;
961 while (i < run_count)
963 int k = i;
964 if (runs[k].start >= 0)
966 IsolatedRun *current_isolated;
967 int type_fence, real_end;
968 int j;
970 if (!(current_isolated = malloc(sizeof(IsolatedRun) + sizeof(RunChar)*count)))
972 hr = E_OUTOFMEMORY;
973 break;
976 run_start = runs[k].start;
977 current_isolated->e = runs[k].e;
978 current_isolated->length = (runs[k].end - runs[k].start)+1;
980 for (j = 0; j < current_isolated->length; ++j)
982 current_isolated->item[j].class = &chars[runs[k].start+j].bidi_class;
983 current_isolated->item[j].ch = chars[runs[k].start+j].ch;
986 run_end = runs[k].end;
988 TRACE("{ [%i -- %i]",run_start, run_end);
990 if (chars[run_end].bidi_class == BN)
991 run_end = get_prev_valid_char_index(chars, run_end, runs[k].start);
993 while (run_end < count && (chars[run_end].bidi_class == RLI || chars[run_end].bidi_class == LRI || chars[run_end].bidi_class == FSI))
995 j = k+1;
996 search:
997 while (j < run_count && chars[runs[j].start].bidi_class != PDI) j++;
998 if (j < run_count && runs[i].e != runs[j].e) {
999 j++;
1000 goto search;
1003 if (j != run_count)
1005 int l = current_isolated->length;
1006 int m;
1008 current_isolated->length += (runs[j].end - runs[j].start)+1;
1009 for (m = 0; l < current_isolated->length; l++, m++) {
1010 current_isolated->item[l].class = &chars[runs[j].start + m].bidi_class;
1011 current_isolated->item[l].ch = chars[runs[j].start + m].ch;
1014 TRACE("[%i -- %i]", runs[j].start, runs[j].end);
1016 run_end = runs[j].end;
1017 if (chars[run_end].bidi_class == BN)
1018 run_end = get_prev_valid_char_index(chars, run_end, runs[i].start);
1019 runs[j].start = -1;
1020 k = j;
1022 else {
1023 run_end = count;
1024 break;
1028 type_fence = get_prev_valid_char_index(chars, run_start, -1);
1030 if (type_fence == -1)
1031 current_isolated->sos = max(baselevel, chars[run_start].resolved);
1032 else
1033 current_isolated->sos = max(chars[type_fence].resolved, chars[run_start].resolved);
1035 current_isolated->sos = get_embedding_direction(current_isolated->sos);
1037 if (run_end == count)
1038 current_isolated->eos = current_isolated->sos;
1039 else
1041 /* eos could be an BN */
1042 if (chars[run_end].resolved == BN)
1044 real_end = get_prev_valid_char_index(chars, run_end, run_start - 1);
1045 if (real_end < run_start)
1046 real_end = run_start;
1048 else
1049 real_end = run_end;
1051 type_fence = get_next_valid_char_index(chars, run_end, count);
1052 if (type_fence == count)
1053 current_isolated->eos = max(baselevel, chars[real_end].resolved);
1054 else
1055 current_isolated->eos = max(chars[type_fence].resolved, chars[real_end].resolved);
1057 current_isolated->eos = get_embedding_direction(current_isolated->eos);
1060 list_add_tail(set, &current_isolated->entry);
1061 TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1063 i++;
1066 free(runs);
1067 return hr;
1070 HRESULT bidi_computelevels(struct bidi_char *chars, unsigned int count, UINT8 baselevel)
1072 IsolatedRun *iso_run, *next;
1073 struct list IsolatingRuns;
1074 HRESULT hr;
1076 if (TRACE_ON(bidi)) bidi_dump_types("start ", chars, 0, count);
1078 bidi_resolve_explicit(chars, count, baselevel);
1080 if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chars, 0, count);
1082 /* X10/BD13: Compute Isolating runs */
1083 if (FAILED(hr = bidi_compute_isolating_runs_set(chars, count, baselevel, &IsolatingRuns)))
1085 WARN("Failed to compute isolating runs set, hr %#lx.\n", hr);
1086 return hr;
1089 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1091 if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
1093 bidi_resolve_weak(iso_run);
1094 if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
1096 bidi_resolve_neutrals(iso_run);
1097 if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
1099 list_remove(&iso_run->entry);
1100 free(iso_run);
1103 if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chars, 0, count);
1104 bidi_resolve_implicit(chars, count);
1106 bidi_resolve_resolved(chars, count, baselevel);
1108 return S_OK;