wbemprox: Tweak a comment.
[wine.git] / dlls / dwrite / bidi.c
blobd4ee2ca0f7ccc8d14519a5a93d124c5b6aafef59
1 /*
2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wine/debug.h"
50 #include "dwrite_private.h"
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
54 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
56 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
57 #define MAX_DEPTH 125
59 #define odd(x) ((x) & 1)
61 /*------------------------------------------------------------------------
62 Bidirectional Character Types
64 as defined by the Unicode Bidirectional Algorithm Table 3-7.
66 Note:
68 The list of bidirectional character types here is not grouped the
69 same way as the table 3-7, since the numberic values for the types
70 are chosen to keep the state and action tables compact.
71 ------------------------------------------------------------------------*/
72 enum directions
74 /* input types */
75 /* ON MUST be zero, code relies on ON = NI = 0 */
76 ON = 0, /* Other Neutral */
77 L, /* Left Letter */
78 R, /* Right Letter */
79 AN, /* Arabic Number */
80 EN, /* European Number */
81 AL, /* Arabic Letter (Right-to-left) */
82 NSM, /* Non-spacing Mark */
83 CS, /* Common Separator */
84 ES, /* European Separator */
85 ET, /* European Terminator (post/prefix e.g. $ and %) */
87 /* resolved types */
88 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
90 /* input types, */
91 S, /* Segment Separator (TAB) // used only in L1 */
92 WS, /* White space // used only in L1 */
93 B, /* Paragraph Separator (aka as PS) */
95 /* types for explicit controls */
96 RLO, /* these are used only in X1-X9 */
97 RLE,
98 LRO,
99 LRE,
100 PDF,
102 LRI, /* Isolate formatting characters new with 6.3 */
103 RLI,
104 FSI,
105 PDI,
107 /* resolved types, also resolved directions */
108 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
111 static const char debug_type[][4] =
113 "ON", /* Other Neutral */
114 "L", /* Left Letter */
115 "R", /* Right Letter */
116 "AN", /* Arabic Number */
117 "EN", /* European Number */
118 "AL", /* Arabic Letter (Right-to-left) */
119 "NSM", /* Non-spacing Mark */
120 "CS", /* Common Separator */
121 "ES", /* European Separator */
122 "ET", /* European Terminator (post/prefix e.g. $ and %) */
123 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
124 "S", /* Segment Separator (TAB) used only in L1 */
125 "WS", /* White space used only in L1 */
126 "B", /* Paragraph Separator (aka as PS) */
127 "RLO", /* these are used only in X1-X9 */
128 "RLE",
129 "LRO",
130 "LRE",
131 "PDF",
132 "LRI", /* Isolate formatting characters new with 6.3 */
133 "RLI",
134 "FSI",
135 "PDI",
138 static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end)
140 int i, len = 0;
141 TRACE("%s:", header);
142 for (i = start; i < end && len < 200; i++) {
143 TRACE(" %s", debug_type[types[i]]);
144 len += strlen(debug_type[types[i]])+1;
146 if (i != end)
147 TRACE("...");
148 TRACE("\n");
151 /* Convert the libwine information to the direction enum */
152 static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count)
154 static const enum directions dir_map[16] =
156 L, /* unassigned defaults to L */
169 NSM,
171 PDF /* also LRE, LRO, RLE, RLO */
174 UINT32 i;
176 for (i = 0; i < count; ++i) {
177 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
179 switch (chartype[i]) {
180 case ES:
181 break;
182 case PDF:
183 switch (string[i]) {
184 case 0x202a: chartype[i] = LRE; break;
185 case 0x202b: chartype[i] = RLE; break;
186 case 0x202c: chartype[i] = PDF; break;
187 case 0x202d: chartype[i] = LRO; break;
188 case 0x202e: chartype[i] = RLO; break;
189 case 0x2066: chartype[i] = LRI; break;
190 case 0x2067: chartype[i] = RLI; break;
191 case 0x2068: chartype[i] = FSI; break;
192 case 0x2069: chartype[i] = PDI; break;
194 break;
199 WCHAR bidi_get_mirrored_char(WCHAR ch)
201 extern const WCHAR wine_mirror_map[] DECLSPEC_HIDDEN;
202 return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)];
205 /* RESOLVE EXPLICIT */
207 static inline UINT8 get_greater_even_level(UINT8 level)
209 return odd(level) ? level + 1 : level + 2;
212 static inline UINT8 get_greater_odd_level(UINT8 level)
214 return odd(level) ? level + 2 : level + 1;
217 static inline UINT8 get_embedding_direction(UINT8 level)
219 return odd(level) ? R : L;
222 /*------------------------------------------------------------------------
223 Function: bidi_resolve_explicit
225 Recursively resolves explicit embedding levels and overrides.
226 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
228 Input: Base embedding level and direction
229 Character count
231 Output: Array of embedding levels
233 In/Out: Array of direction classes
236 Note: The function uses two simple counters to keep track of
237 matching explicit codes and PDF. Use the default argument for
238 the outermost call. The nesting counter counts the recursion
239 depth and not the embedding level.
240 ------------------------------------------------------------------------*/
241 typedef struct tagStackItem
243 UINT8 level;
244 UINT8 override;
245 BOOL isolate;
246 } StackItem;
248 #define push_stack(l,o,i) \
249 do { stack_top--; \
250 stack[stack_top].level = l; \
251 stack[stack_top].override = o; \
252 stack[stack_top].isolate = i;} while(0)
254 #define pop_stack() do { stack_top++; } while(0)
256 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
258 static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count)
260 /* X1 */
261 int overflow_isolate_count = 0;
262 int overflow_embedding_count = 0;
263 int valid_isolate_count = 0;
264 UINT32 i;
266 StackItem stack[MAX_DEPTH+2];
267 int stack_top = MAX_DEPTH+1;
269 stack[stack_top].level = baselevel;
270 stack[stack_top].override = NI;
271 stack[stack_top].isolate = FALSE;
273 for (i = 0; i < count; i++) {
274 UINT8 least_odd, least_even;
276 switch (classes[i]) {
278 /* X2 */
279 case RLE:
280 least_odd = get_greater_odd_level(stack[stack_top].level);
281 levels[i] = valid_level(least_odd) ? least_odd : stack[stack_top].level;
282 if (valid_level(least_odd))
283 push_stack(least_odd, NI, FALSE);
284 else if (overflow_isolate_count == 0)
285 overflow_embedding_count++;
286 break;
288 /* X3 */
289 case LRE:
290 least_even = get_greater_even_level(stack[stack_top].level);
291 levels[i] = valid_level(least_even) ? least_even : stack[stack_top].level;
292 if (valid_level(least_even))
293 push_stack(least_even, NI, FALSE);
294 else if (overflow_isolate_count == 0)
295 overflow_embedding_count++;
296 break;
298 /* X4 */
299 case RLO:
300 least_odd = get_greater_odd_level(stack[stack_top].level);
301 levels[i] = stack[stack_top].level;
302 if (valid_level(least_odd))
303 push_stack(least_odd, R, FALSE);
304 else if (overflow_isolate_count == 0)
305 overflow_embedding_count++;
306 break;
308 /* X5 */
309 case LRO:
310 least_even = get_greater_even_level(stack[stack_top].level);
311 levels[i] = stack[stack_top].level;
312 if (valid_level(least_even))
313 push_stack(least_even, L, FALSE);
314 else if (overflow_isolate_count == 0)
315 overflow_embedding_count++;
316 break;
318 /* X5a */
319 case RLI:
320 least_odd = get_greater_odd_level(stack[stack_top].level);
321 levels[i] = stack[stack_top].level;
322 if (valid_level(least_odd))
324 valid_isolate_count++;
325 push_stack(least_odd, NI, TRUE);
327 else
328 overflow_isolate_count++;
329 break;
331 /* X5b */
332 case LRI:
333 least_even = get_greater_even_level(stack[stack_top].level);
334 levels[i] = stack[stack_top].level;
335 if (valid_level(least_even))
337 valid_isolate_count++;
338 push_stack(least_even, NI, TRUE);
340 else
341 overflow_isolate_count++;
342 break;
344 /* X5c */
345 case FSI:
347 UINT8 new_level = 0;
348 int skipping = 0;
349 int j;
351 levels[i] = stack[stack_top].level;
352 for (j = i+1; j < count; j++)
354 if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI)
356 skipping++;
357 continue;
359 else if (classes[j] == PDI)
361 if (skipping)
362 skipping --;
363 else
364 break;
365 continue;
368 if (skipping) continue;
370 if (classes[j] == L)
372 new_level = 0;
373 break;
375 else if (classes[j] == R || classes[j] == AL)
377 new_level = 1;
378 break;
381 if (odd(new_level))
383 least_odd = get_greater_odd_level(stack[stack_top].level);
384 if (valid_level(least_odd))
386 valid_isolate_count++;
387 push_stack(least_odd, NI, TRUE);
389 else
390 overflow_isolate_count++;
392 else
394 least_even = get_greater_even_level(stack[stack_top].level);
395 if (valid_level(least_even))
397 valid_isolate_count++;
398 push_stack(least_even, NI, TRUE);
400 else
401 overflow_isolate_count++;
403 break;
406 /* X6 */
407 case ON:
408 case L:
409 case R:
410 case AN:
411 case EN:
412 case AL:
413 case NSM:
414 case CS:
415 case ES:
416 case ET:
417 case S:
418 case WS:
419 levels[i] = stack[stack_top].level;
420 if (stack[stack_top].override != NI)
421 classes[i] = stack[stack_top].override;
422 break;
424 /* X6a */
425 case PDI:
426 if (overflow_isolate_count) overflow_isolate_count--;
427 else if (!valid_isolate_count) {/* do nothing */}
428 else
430 overflow_embedding_count = 0;
431 while (!stack[stack_top].isolate) pop_stack();
432 pop_stack();
433 valid_isolate_count--;
435 levels[i] = stack[stack_top].level;
436 break;
438 /* X7 */
439 case PDF:
440 levels[i] = stack[stack_top].level;
441 if (overflow_isolate_count) {/* do nothing */}
442 else if (overflow_embedding_count) overflow_embedding_count--;
443 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
444 pop_stack();
445 break;
447 /* X8 */
448 default:
449 levels[i] = baselevel;
450 break;
453 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
454 for (i = 0; i < count ; i++)
455 if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF)
456 classes[i] = BN;
459 static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence)
461 if (index == -1 || index == back_fence) return index;
462 index--;
463 while (index > back_fence && classes[index] == BN) index--;
464 return index;
467 static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence)
469 if (index == front_fence) return index;
470 index++;
471 while (index < front_fence && classes[index] == BN) index++;
472 return index;
475 typedef struct tagRun
477 int start;
478 int end;
479 UINT8 e;
480 } Run;
482 typedef struct tagRunChar
484 WCHAR ch;
485 UINT8 *class;
486 } RunChar;
488 typedef struct tagIsolatedRun
490 struct list entry;
491 int length;
492 UINT8 sos;
493 UINT8 eos;
494 UINT8 e;
496 RunChar item[1];
497 } IsolatedRun;
499 static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
501 if (index >= (run->length-1)) return -1;
502 index++;
503 while (index < run->length && *run->item[index].class == BN) index++;
504 if (index == run->length) return -1;
505 return index;
508 static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
510 if (index <= 0) return -1;
511 index--;
512 while (index > -1 && *run->item[index].class == BN) index--;
513 return index;
516 static inline void iso_dump_types(const char* header, IsolatedRun *run)
518 int i, len = 0;
519 TRACE("%s:",header);
520 TRACE("[ ");
521 for (i = 0; i < run->length && len < 200; i++) {
522 TRACE(" %s", debug_type[*run->item[i].class]);
523 len += strlen(debug_type[*run->item[i].class])+1;
525 if (i != run->length)
526 TRACE("...");
527 TRACE(" ]\n");
530 /*------------------------------------------------------------------------
531 Function: bidi_resolve_weak
533 Resolves the directionality of numeric and other weak character types
535 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
537 Input: Array of embedding levels
538 Character count
540 In/Out: Array of directional classes
542 Note: On input only these directional classes are expected
543 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
544 ------------------------------------------------------------------------*/
545 static BOOL bidi_is_isolate(UINT8 class)
547 return class == LRI || class == RLI || class == FSI || class == PDI;
550 static void bidi_resolve_weak(IsolatedRun *iso_run)
552 int i;
554 /* W1 */
555 for (i=0; i < iso_run->length; i++) {
556 if (*iso_run->item[i].class == NSM) {
557 int j = get_prev_valid_char_from_run(iso_run, i);
558 if (j == -1)
559 *iso_run->item[i].class = iso_run->sos;
560 else if (bidi_is_isolate(*iso_run->item[j].class))
561 *iso_run->item[i].class = ON;
562 else
563 *iso_run->item[i].class = *iso_run->item[j].class;
567 /* W2 */
568 for (i = 0; i < iso_run->length; i++) {
569 if (*iso_run->item[i].class == EN) {
570 int j = get_prev_valid_char_from_run(iso_run, i);
571 while (j > -1) {
572 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
573 if (*iso_run->item[j].class == AL)
574 *iso_run->item[i].class = AN;
575 break;
577 j = get_prev_valid_char_from_run(iso_run, j);
582 /* W3 */
583 for (i = 0; i < iso_run->length; i++) {
584 if (*iso_run->item[i].class == AL)
585 *iso_run->item[i].class = R;
588 /* W4 */
589 for (i = 0; i < iso_run->length; i++) {
590 if (*iso_run->item[i].class == ES) {
591 int b = get_prev_valid_char_from_run(iso_run, i);
592 int f = get_next_valid_char_from_run(iso_run, i);
594 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
595 *iso_run->item[i].class = EN;
597 else if (*iso_run->item[i].class == CS) {
598 int b = get_prev_valid_char_from_run(iso_run, i);
599 int f = get_next_valid_char_from_run(iso_run, i);
601 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
602 *iso_run->item[i].class = EN;
603 else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
604 *iso_run->item[i].class = AN;
608 /* W5 */
609 for (i = 0; i < iso_run->length; i++) {
610 if (*iso_run->item[i].class == ET) {
611 int j;
612 for (j = i-1 ; j > -1; j--) {
613 if (*iso_run->item[j].class == BN) continue;
614 if (*iso_run->item[j].class == ET) continue;
615 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
616 else break;
618 if (*iso_run->item[i].class == ET) {
619 for (j = i+1; j < iso_run->length; j++) {
620 if (*iso_run->item[j].class == BN) continue;
621 if (*iso_run->item[j].class == ET) continue;
622 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
623 else break;
629 /* W6 */
630 for (i = 0; i < iso_run->length; i++) {
631 if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
633 int b = i-1;
634 int f = i+1;
635 if (b > -1 && *iso_run->item[b].class == BN)
636 *iso_run->item[b].class = ON;
637 if (f < iso_run->length && *iso_run->item[f].class == BN)
638 *iso_run->item[f].class = ON;
640 *iso_run->item[i].class = ON;
644 /* W7 */
645 for (i = 0; i < iso_run->length; i++) {
646 if (*iso_run->item[i].class == EN) {
647 int j;
648 for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
649 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
650 if (*iso_run->item[j].class == L)
651 *iso_run->item[i].class = L;
652 break;
654 if (iso_run->sos == L && j == -1)
655 *iso_run->item[i].class = L;
660 typedef struct tagBracketPair
662 int start;
663 int end;
664 } BracketPair;
666 static int bracketpair_compr(const void *a, const void* b)
668 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
671 static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
673 WCHAR *open_stack;
674 int *stack_index;
675 int stack_top = iso_run->length;
676 BracketPair *out = NULL;
677 int pair_count = 0;
678 int i;
680 open_stack = heap_alloc(sizeof(WCHAR) * iso_run->length);
681 stack_index = heap_alloc(sizeof(int) * iso_run->length);
683 for (i = 0; i < iso_run->length; i++) {
684 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
685 if (ubv) {
686 if (!out) {
687 out = heap_alloc(sizeof(BracketPair));
688 out[0].start = -1;
691 if ((ubv >> 8) == 0) {
692 stack_top--;
693 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
694 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
695 if (open_stack[stack_top] == 0x232A)
696 open_stack[stack_top] = 0x3009;
697 stack_index[stack_top] = i;
699 else if ((ubv >> 8) == 1) {
700 int j;
702 if (stack_top == iso_run->length) continue;
703 for (j = stack_top; j < iso_run->length; j++) {
704 WCHAR c = iso_run->item[i].ch;
705 if (c == 0x232A) c = 0x3009;
706 if (c == open_stack[j]) {
707 out[pair_count].start = stack_index[j];
708 out[pair_count].end = i;
709 pair_count++;
710 out = heap_realloc(out, sizeof(BracketPair) * (pair_count+1));
711 out[pair_count].start = -1;
712 stack_top = j+1;
713 break;
719 if (pair_count == 0) {
720 heap_free(out);
721 out = NULL;
723 else if (pair_count > 1)
724 qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
726 heap_free(open_stack);
727 heap_free(stack_index);
728 return out;
731 static inline UINT8 get_rule_N0_class(UINT8 class)
733 return (class == AN || class == EN) ? R : class;
736 /*------------------------------------------------------------------------
737 Function: bidi_resolve_neutrals
739 Resolves the directionality of neutral character types.
741 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
743 Input: Array of embedding levels
744 Character count
745 Baselevel
747 In/Out: Array of directional classes
749 Note: On input only these directional classes are expected
750 R, L, NI, AN, EN and BN
752 W8 resolves a number of ENs to L
753 ------------------------------------------------------------------------*/
754 static void bidi_resolve_neutrals(IsolatedRun *run)
756 BracketPair *pairs;
757 int i;
759 /* Translate isolates into NI */
760 for (i = 0; i < run->length; i++) {
761 switch (*run->item[i].class) {
762 case B:
763 case S:
764 case WS:
765 case FSI:
766 case LRI:
767 case RLI:
768 case PDI: *run->item[i].class = NI;
771 /* "Only NI, L, R, AN, EN and BN are allowed" */
772 ASSERT(*run->item[i].class <= EN || *run->item[i].class == BN);
775 /* N0: Skipping bracketed pairs for now */
776 pairs = bidi_compute_bracket_pairs(run);
777 if (pairs) {
778 BracketPair *p = pairs;
779 int i = 0;
780 while (p->start >= 0) {
781 UINT8 e = get_embedding_direction(run->e);
782 UINT8 o = get_embedding_direction(run->e + 1);
783 BOOL flag_o = FALSE;
784 int j;
786 TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
788 /* N0.b */
789 for (j = p->start+1; j < p->end; j++) {
790 if (get_rule_N0_class(*run->item[j].class) == e) {
791 *run->item[p->start].class = e;
792 *run->item[p->end].class = e;
793 break;
795 else if (get_rule_N0_class(*run->item[j].class) == o)
796 flag_o = TRUE;
798 /* N0.c */
799 if (j == p->end && flag_o) {
800 for (j = p->start; j >= 0; j--) {
801 if (get_rule_N0_class(*run->item[j].class) == o) {
802 *run->item[p->start].class = o;
803 *run->item[p->end].class = o;
804 break;
806 else if (get_rule_N0_class(*run->item[j].class) == e) {
807 *run->item[p->start].class = e;
808 *run->item[p->end].class = e;
809 break;
812 if (j < 0) {
813 *run->item[p->start].class = run->sos;
814 *run->item[p->end].class = run->sos;
818 i++;
819 p = &pairs[i];
821 heap_free(pairs);
824 /* N1 */
825 for (i = 0; i < run->length; i++) {
826 UINT8 l, r;
828 if (*run->item[i].class == NI) {
829 int b = get_prev_valid_char_from_run(run, i);
830 int j;
832 if (b == -1) {
833 l = run->sos;
834 b = 0;
836 else {
837 if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
838 l = R;
839 else if (*run->item[b].class == L)
840 l = L;
841 else /* No string type */
842 continue;
844 j = get_next_valid_char_from_run(run, i);
845 while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
846 if (j == -1) {
847 r = run->eos;
848 j = run->length;
850 else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
851 r = R;
852 else if (*run->item[j].class == L)
853 r = L;
854 else /* No string type */
855 continue;
857 if (r == l) {
858 for (b = i; b < j && b < run->length; b++)
859 *run->item[b].class = r;
864 /* N2 */
865 for (i = 0; i < run->length; i++) {
866 if (*run->item[i].class == NI) {
867 int b = i-1;
868 int f = i+1;
870 *run->item[i].class = get_embedding_direction(run->e);
871 if (b > -1 && *run->item[b].class == BN)
872 *run->item[b].class = get_embedding_direction(run->e);
873 if (f < run->length && *run->item[f].class == BN)
874 *run->item[f].class = get_embedding_direction(run->e);
879 /*------------------------------------------------------------------------
880 Function: bidi_resolve_implicit
882 Recursively resolves implicit embedding levels.
883 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
885 Input: Array of direction classes
886 Character count
887 Base level
889 In/Out: Array of embedding levels
891 Note: levels may exceed 15 on output.
892 Accepted subset of direction classes
893 R, L, AN, EN
894 ------------------------------------------------------------------------*/
895 static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos)
897 int i;
899 /* I1/2 */
900 for (i = sos; i <= eos; i++) {
901 if (classes[i] == BN)
902 continue;
904 ASSERT(classes[i] != ON); /* "No Neutrals allowed to survive here." */
905 ASSERT(classes[i] <= EN); /* "Out of range." */
907 if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN))
908 levels[i]++;
909 else if (!odd(levels[i]) && classes[i] == R)
910 levels[i]++;
911 else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN))
912 levels[i] += 2;
916 static inline BOOL is_rule_L1_reset_class(UINT8 class)
918 switch (class) {
919 case WS:
920 case FSI:
921 case LRI:
922 case RLI:
923 case PDI:
924 case LRE:
925 case RLE:
926 case LRO:
927 case RLO:
928 case PDF:
929 case BN:
930 return TRUE;
931 default:
932 return FALSE;
936 static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos)
938 int i;
940 /* L1 */
941 for (i = sos; i <= eos; i++) {
942 if (classes[i] == B || classes[i] == S) {
943 int j = i - 1;
944 while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j]))
945 levels[j--] = baselevel;
946 levels[i] = baselevel;
948 else if (classes[i] == LRE || classes[i] == RLE || classes[i] == LRO || classes[i] == RLO ||
949 classes[i] == PDF || classes[i] == BN) {
950 levels[i] = i ? levels[i - 1] : baselevel;
952 if (i == eos && is_rule_L1_reset_class(classes[i])) {
953 int j = i;
954 while (j >= sos && is_rule_L1_reset_class(classes[j]))
955 levels[j--] = baselevel;
960 static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set)
962 int run_start, run_end, i;
963 int run_count = 0;
964 HRESULT hr = S_OK;
965 Run *runs;
967 runs = heap_alloc(count * sizeof(Run));
968 if (!runs)
969 return E_OUTOFMEMORY;
971 list_init(set);
973 /* Build Runs */
974 run_start = 0;
975 while (run_start < count) {
976 run_end = get_next_valid_char_index(classes, run_start, count);
977 while (run_end < count && levels[run_end] == levels[run_start])
978 run_end = get_next_valid_char_index(classes, run_end, count);
979 run_end--;
980 runs[run_count].start = run_start;
981 runs[run_count].end = run_end;
982 runs[run_count].e = levels[run_start];
983 run_start = get_next_valid_char_index(classes, run_end, count);
984 run_count++;
987 /* Build Isolating Runs */
988 i = 0;
989 while (i < run_count) {
990 int k = i;
991 if (runs[k].start >= 0) {
992 IsolatedRun *current_isolated;
993 int type_fence, real_end;
994 int j;
996 current_isolated = heap_alloc(sizeof(IsolatedRun) + sizeof(RunChar)*count);
997 if (!current_isolated) {
998 hr = E_OUTOFMEMORY;
999 break;
1002 run_start = runs[k].start;
1003 current_isolated->e = runs[k].e;
1004 current_isolated->length = (runs[k].end - runs[k].start)+1;
1006 for (j = 0; j < current_isolated->length; j++) {
1007 current_isolated->item[j].class = &classes[runs[k].start+j];
1008 current_isolated->item[j].ch = string[runs[k].start+j];
1011 run_end = runs[k].end;
1013 TRACE("{ [%i -- %i]",run_start, run_end);
1015 if (classes[run_end] == BN)
1016 run_end = get_prev_valid_char_index(classes, run_end, runs[k].start);
1018 while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) {
1019 j = k+1;
1020 search:
1021 while (j < run_count && classes[runs[j].start] != PDI) j++;
1022 if (j < run_count && runs[i].e != runs[j].e) {
1023 j++;
1024 goto search;
1027 if (j != run_count) {
1028 int l = current_isolated->length;
1029 int m;
1031 current_isolated->length += (runs[j].end - runs[j].start)+1;
1032 for (m = 0; l < current_isolated->length; l++, m++) {
1033 current_isolated->item[l].class = &classes[runs[j].start+m];
1034 current_isolated->item[l].ch = string[runs[j].start+m];
1037 TRACE("[%i -- %i]", runs[j].start, runs[j].end);
1039 run_end = runs[j].end;
1040 if (classes[run_end] == BN)
1041 run_end = get_prev_valid_char_index(classes, run_end, runs[i].start);
1042 runs[j].start = -1;
1043 k = j;
1045 else {
1046 run_end = count;
1047 break;
1051 type_fence = get_prev_valid_char_index(classes, run_start, -1);
1053 if (type_fence == -1)
1054 current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start];
1055 else
1056 current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start];
1058 current_isolated->sos = get_embedding_direction(current_isolated->sos);
1060 if (run_end == count)
1061 current_isolated->eos = current_isolated->sos;
1062 else {
1063 /* eos could be an BN */
1064 if (classes[run_end] == BN) {
1065 real_end = get_prev_valid_char_index(classes, run_end, run_start-1);
1066 if (real_end < run_start)
1067 real_end = run_start;
1069 else
1070 real_end = run_end;
1072 type_fence = get_next_valid_char_index(classes, run_end, count);
1073 if (type_fence == count)
1074 current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end];
1075 else
1076 current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end];
1078 current_isolated->eos = get_embedding_direction(current_isolated->eos);
1081 list_add_tail(set, &current_isolated->entry);
1082 TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1084 i++;
1087 heap_free(runs);
1088 return hr;
1091 HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels)
1093 IsolatedRun *iso_run, *next;
1094 struct list IsolatingRuns;
1095 UINT8 *chartype;
1096 HRESULT hr;
1098 TRACE("%s, %u\n", debugstr_wn(string, count), count);
1100 chartype = heap_alloc(count*sizeof(*chartype));
1101 if (!chartype)
1102 return E_OUTOFMEMORY;
1104 bidi_classify(string, chartype, count);
1105 if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count);
1107 bidi_resolve_explicit(baselevel, chartype, levels, count);
1108 memcpy(explicit, levels, count*sizeof(*explicit));
1110 if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count);
1112 /* X10/BD13: Compute Isolating runs */
1113 hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns);
1114 if (FAILED(hr))
1115 goto done;
1117 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1119 if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
1121 bidi_resolve_weak(iso_run);
1122 if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
1124 bidi_resolve_neutrals(iso_run);
1125 if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
1127 list_remove(&iso_run->entry);
1128 heap_free(iso_run);
1131 if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count);
1132 bidi_resolve_implicit(chartype, levels, 0, count-1);
1134 bidi_classify(string, chartype, count);
1135 bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1);
1137 done:
1138 heap_free(chartype);
1139 return hr;