imm32: Simplify the helper functions to retrieve the thread data.
[wine.git] / dlls / dwrite / bidi.c
blob69eae5a2bdbf86d6b0e44b9fea6803c0d8b65631
1 /*
2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wine/debug.h"
49 #include "wine/list.h"
51 #include "dwrite_private.h"
53 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
55 extern const unsigned short bidi_bracket_table[];
57 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
58 #define MAX_DEPTH 125
60 #define odd(x) ((x) & 1)
62 /*------------------------------------------------------------------------
63 Bidirectional Character Types
65 as defined by the Unicode Bidirectional Algorithm Table 3-7.
67 Note:
69 The list of bidirectional character types here is not grouped the
70 same way as the table 3-7, since the numberic values for the types
71 are chosen to keep the state and action tables compact.
72 ------------------------------------------------------------------------*/
73 enum directions
75 /* input types */
76 /* ON MUST be zero, code relies on ON = NI = 0 */
77 ON = 0, /* Other Neutral */
78 L, /* Left Letter */
79 R, /* Right Letter */
80 AN, /* Arabic Number */
81 EN, /* European Number */
82 AL, /* Arabic Letter (Right-to-left) */
83 NSM, /* Non-spacing Mark */
84 CS, /* Common Separator */
85 ES, /* European Separator */
86 ET, /* European Terminator (post/prefix e.g. $ and %) */
88 /* resolved types */
89 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
91 /* input types, */
92 S, /* Segment Separator (TAB) // used only in L1 */
93 WS, /* White space // used only in L1 */
94 B, /* Paragraph Separator (aka as PS) */
96 /* types for explicit controls */
97 RLO, /* these are used only in X1-X9 */
98 RLE,
99 LRO,
100 LRE,
101 PDF,
103 LRI, /* Isolate formatting characters new with 6.3 */
104 RLI,
105 FSI,
106 PDI,
108 /* resolved types, also resolved directions */
109 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
112 static const char debug_type[][4] =
114 "ON", /* Other Neutral */
115 "L", /* Left Letter */
116 "R", /* Right Letter */
117 "AN", /* Arabic Number */
118 "EN", /* European Number */
119 "AL", /* Arabic Letter (Right-to-left) */
120 "NSM", /* Non-spacing Mark */
121 "CS", /* Common Separator */
122 "ES", /* European Separator */
123 "ET", /* European Terminator (post/prefix e.g. $ and %) */
124 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
125 "S", /* Segment Separator (TAB) used only in L1 */
126 "WS", /* White space used only in L1 */
127 "B", /* Paragraph Separator (aka as PS) */
128 "RLO", /* these are used only in X1-X9 */
129 "RLE",
130 "LRO",
131 "LRE",
132 "PDF",
133 "LRI", /* Isolate formatting characters new with 6.3 */
134 "RLI",
135 "FSI",
136 "PDI",
139 static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end)
141 int i, len = 0;
142 TRACE("%s:", header);
143 for (i = start; i < end && len < 200; i++) {
144 TRACE(" %s", debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
152 /* Convert the libwine information to the direction enum */
153 static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count)
155 static const enum directions dir_map[16] =
157 L, /* unassigned defaults to L */
170 NSM,
172 PDF /* also LRE, LRO, RLE, RLO */
175 UINT32 i;
177 for (i = 0; i < count; ++i) {
178 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
180 switch (chartype[i]) {
181 case ES:
182 break;
183 case PDF:
184 switch (string[i]) {
185 case 0x202a: chartype[i] = LRE; break;
186 case 0x202b: chartype[i] = RLE; break;
187 case 0x202c: chartype[i] = PDF; break;
188 case 0x202d: chartype[i] = LRO; break;
189 case 0x202e: chartype[i] = RLO; break;
190 case 0x2066: chartype[i] = LRI; break;
191 case 0x2067: chartype[i] = RLI; break;
192 case 0x2068: chartype[i] = FSI; break;
193 case 0x2069: chartype[i] = PDI; break;
195 break;
200 WCHAR bidi_get_mirrored_char(WCHAR ch)
202 extern const WCHAR wine_mirror_map[];
203 return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)];
206 /* RESOLVE EXPLICIT */
208 static inline UINT8 get_greater_even_level(UINT8 level)
210 return odd(level) ? level + 1 : level + 2;
213 static inline UINT8 get_greater_odd_level(UINT8 level)
215 return odd(level) ? level + 2 : level + 1;
218 static inline UINT8 get_embedding_direction(UINT8 level)
220 return odd(level) ? R : L;
223 /*------------------------------------------------------------------------
224 Function: bidi_resolve_explicit
226 Recursively resolves explicit embedding levels and overrides.
227 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
229 Input: Base embedding level and direction
230 Character count
232 Output: Array of embedding levels
234 In/Out: Array of direction classes
237 Note: The function uses two simple counters to keep track of
238 matching explicit codes and PDF. Use the default argument for
239 the outermost call. The nesting counter counts the recursion
240 depth and not the embedding level.
241 ------------------------------------------------------------------------*/
242 typedef struct tagStackItem
244 UINT8 level;
245 UINT8 override;
246 BOOL isolate;
247 } StackItem;
249 #define push_stack(l,o,i) \
250 do { stack_top--; \
251 stack[stack_top].level = l; \
252 stack[stack_top].override = o; \
253 stack[stack_top].isolate = i;} while(0)
255 #define pop_stack() do { stack_top++; } while(0)
257 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
259 static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count)
261 /* X1 */
262 int overflow_isolate_count = 0;
263 int overflow_embedding_count = 0;
264 int valid_isolate_count = 0;
265 UINT32 i;
267 StackItem stack[MAX_DEPTH+2];
268 int stack_top = MAX_DEPTH+1;
270 stack[stack_top].level = baselevel;
271 stack[stack_top].override = NI;
272 stack[stack_top].isolate = FALSE;
274 for (i = 0; i < count; i++) {
275 UINT8 least_odd, least_even;
277 switch (classes[i]) {
279 /* X2 */
280 case RLE:
281 least_odd = get_greater_odd_level(stack[stack_top].level);
282 levels[i] = stack[stack_top].level;
283 if (valid_level(least_odd))
284 push_stack(least_odd, NI, FALSE);
285 else if (overflow_isolate_count == 0)
286 overflow_embedding_count++;
287 break;
289 /* X3 */
290 case LRE:
291 least_even = get_greater_even_level(stack[stack_top].level);
292 levels[i] = stack[stack_top].level;
293 if (valid_level(least_even))
294 push_stack(least_even, NI, FALSE);
295 else if (overflow_isolate_count == 0)
296 overflow_embedding_count++;
297 break;
299 /* X4 */
300 case RLO:
301 least_odd = get_greater_odd_level(stack[stack_top].level);
302 levels[i] = stack[stack_top].level;
303 if (valid_level(least_odd))
304 push_stack(least_odd, R, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 break;
309 /* X5 */
310 case LRO:
311 least_even = get_greater_even_level(stack[stack_top].level);
312 levels[i] = stack[stack_top].level;
313 if (valid_level(least_even))
314 push_stack(least_even, L, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 break;
319 /* X5a */
320 case RLI:
321 least_odd = get_greater_odd_level(stack[stack_top].level);
322 levels[i] = stack[stack_top].level;
323 if (valid_level(least_odd))
325 valid_isolate_count++;
326 push_stack(least_odd, NI, TRUE);
328 else
329 overflow_isolate_count++;
330 break;
332 /* X5b */
333 case LRI:
334 least_even = get_greater_even_level(stack[stack_top].level);
335 levels[i] = stack[stack_top].level;
336 if (valid_level(least_even))
338 valid_isolate_count++;
339 push_stack(least_even, NI, TRUE);
341 else
342 overflow_isolate_count++;
343 break;
345 /* X5c */
346 case FSI:
348 UINT8 new_level = 0;
349 int skipping = 0;
350 int j;
352 levels[i] = stack[stack_top].level;
353 for (j = i+1; j < count; j++)
355 if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI)
357 skipping++;
358 continue;
360 else if (classes[j] == PDI)
362 if (skipping)
363 skipping --;
364 else
365 break;
366 continue;
369 if (skipping) continue;
371 if (classes[j] == L)
373 new_level = 0;
374 break;
376 else if (classes[j] == R || classes[j] == AL)
378 new_level = 1;
379 break;
382 if (odd(new_level))
384 least_odd = get_greater_odd_level(stack[stack_top].level);
385 if (valid_level(least_odd))
387 valid_isolate_count++;
388 push_stack(least_odd, NI, TRUE);
390 else
391 overflow_isolate_count++;
393 else
395 least_even = get_greater_even_level(stack[stack_top].level);
396 if (valid_level(least_even))
398 valid_isolate_count++;
399 push_stack(least_even, NI, TRUE);
401 else
402 overflow_isolate_count++;
404 break;
407 /* X6 */
408 case ON:
409 case L:
410 case R:
411 case AN:
412 case EN:
413 case AL:
414 case NSM:
415 case CS:
416 case ES:
417 case ET:
418 case S:
419 case WS:
420 levels[i] = stack[stack_top].level;
421 if (stack[stack_top].override != NI)
422 classes[i] = stack[stack_top].override;
423 break;
425 /* X6a */
426 case PDI:
427 if (overflow_isolate_count) overflow_isolate_count--;
428 else if (!valid_isolate_count) {/* do nothing */}
429 else
431 overflow_embedding_count = 0;
432 while (!stack[stack_top].isolate) pop_stack();
433 pop_stack();
434 valid_isolate_count--;
436 levels[i] = stack[stack_top].level;
437 break;
439 /* X7 */
440 case PDF:
441 levels[i] = stack[stack_top].level;
442 if (overflow_isolate_count) {/* do nothing */}
443 else if (overflow_embedding_count) overflow_embedding_count--;
444 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
445 pop_stack();
446 break;
448 /* X8: Nothing */
449 default:
450 break;
453 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
454 for (i = 0; i < count ; i++)
455 if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF)
456 classes[i] = BN;
459 static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence)
461 if (index == -1 || index == back_fence) return index;
462 index--;
463 while (index > back_fence && classes[index] == BN) index--;
464 return index;
467 static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence)
469 if (index == front_fence) return index;
470 index++;
471 while (index < front_fence && classes[index] == BN) index++;
472 return index;
475 typedef struct tagRun
477 int start;
478 int end;
479 UINT8 e;
480 } Run;
482 typedef struct tagRunChar
484 WCHAR ch;
485 UINT8 *class;
486 } RunChar;
488 typedef struct tagIsolatedRun
490 struct list entry;
491 int length;
492 UINT8 sos;
493 UINT8 eos;
494 UINT8 e;
496 RunChar item[1];
497 } IsolatedRun;
499 static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
501 if (index >= (run->length-1)) return -1;
502 index++;
503 while (index < run->length && *run->item[index].class == BN) index++;
504 if (index == run->length) return -1;
505 return index;
508 static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
510 if (index <= 0) return -1;
511 index--;
512 while (index > -1 && *run->item[index].class == BN) index--;
513 return index;
516 static inline void iso_dump_types(const char* header, IsolatedRun *run)
518 int i, len = 0;
519 TRACE("%s:",header);
520 TRACE("[ ");
521 for (i = 0; i < run->length && len < 200; i++) {
522 TRACE(" %s", debug_type[*run->item[i].class]);
523 len += strlen(debug_type[*run->item[i].class])+1;
525 if (i != run->length)
526 TRACE("...");
527 TRACE(" ]\n");
530 /*------------------------------------------------------------------------
531 Function: bidi_resolve_weak
533 Resolves the directionality of numeric and other weak character types
535 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
537 Input: Array of embedding levels
538 Character count
540 In/Out: Array of directional classes
542 Note: On input only these directional classes are expected
543 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
544 ------------------------------------------------------------------------*/
545 static void bidi_resolve_weak(IsolatedRun *iso_run)
547 int i;
549 /* W1 */
550 for (i=0; i < iso_run->length; i++) {
551 if (*iso_run->item[i].class == NSM) {
552 int j = get_prev_valid_char_from_run(iso_run, i);
553 if (j == -1)
554 *iso_run->item[i].class = iso_run->sos;
555 else if (*iso_run->item[j].class >= LRI)
556 *iso_run->item[i].class = ON;
557 else
558 *iso_run->item[i].class = *iso_run->item[j].class;
562 /* W2 */
563 for (i = 0; i < iso_run->length; i++) {
564 if (*iso_run->item[i].class == EN) {
565 int j = get_prev_valid_char_from_run(iso_run, i);
566 while (j > -1) {
567 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
568 if (*iso_run->item[j].class == AL)
569 *iso_run->item[i].class = AN;
570 break;
572 j = get_prev_valid_char_from_run(iso_run, j);
577 /* W3 */
578 for (i = 0; i < iso_run->length; i++) {
579 if (*iso_run->item[i].class == AL)
580 *iso_run->item[i].class = R;
583 /* W4 */
584 for (i = 0; i < iso_run->length; i++) {
585 if (*iso_run->item[i].class == ES) {
586 int b = get_prev_valid_char_from_run(iso_run, i);
587 int f = get_next_valid_char_from_run(iso_run, i);
589 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
590 *iso_run->item[i].class = EN;
592 else if (*iso_run->item[i].class == CS) {
593 int b = get_prev_valid_char_from_run(iso_run, i);
594 int f = get_next_valid_char_from_run(iso_run, i);
596 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
597 *iso_run->item[i].class = EN;
598 else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
599 *iso_run->item[i].class = AN;
603 /* W5 */
604 for (i = 0; i < iso_run->length; i++) {
605 if (*iso_run->item[i].class == ET) {
606 int j;
607 for (j = i-1 ; j > -1; j--) {
608 if (*iso_run->item[j].class == BN) continue;
609 if (*iso_run->item[j].class == ET) continue;
610 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
611 else break;
613 if (*iso_run->item[i].class == ET) {
614 for (j = i+1; j < iso_run->length; j++) {
615 if (*iso_run->item[j].class == BN) continue;
616 if (*iso_run->item[j].class == ET) continue;
617 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
618 else break;
624 /* W6 */
625 for (i = 0; i < iso_run->length; i++) {
626 if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
628 int b = i-1;
629 int f = i+1;
630 if (b > -1 && *iso_run->item[b].class == BN)
631 *iso_run->item[b].class = ON;
632 if (f < iso_run->length && *iso_run->item[f].class == BN)
633 *iso_run->item[f].class = ON;
635 *iso_run->item[i].class = ON;
639 /* W7 */
640 for (i = 0; i < iso_run->length; i++) {
641 if (*iso_run->item[i].class == EN) {
642 int j;
643 for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
644 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
645 if (*iso_run->item[j].class == L)
646 *iso_run->item[i].class = L;
647 break;
649 if (iso_run->sos == L && j == -1)
650 *iso_run->item[i].class = L;
655 typedef struct tagBracketPair
657 int start;
658 int end;
659 } BracketPair;
661 static int bracketpair_compr(const void *a, const void* b)
663 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
666 static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
668 WCHAR *open_stack;
669 int *stack_index;
670 int stack_top = iso_run->length;
671 BracketPair *out = NULL;
672 int pair_count = 0;
673 int i;
675 open_stack = heap_alloc(sizeof(WCHAR) * iso_run->length);
676 stack_index = heap_alloc(sizeof(int) * iso_run->length);
678 for (i = 0; i < iso_run->length; i++) {
679 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
680 if (ubv) {
681 if (!out) {
682 out = heap_alloc(sizeof(BracketPair));
683 out[0].start = -1;
686 if ((ubv >> 8) == 0) {
687 stack_top--;
688 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
689 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
690 if (open_stack[stack_top] == 0x232A)
691 open_stack[stack_top] = 0x3009;
692 stack_index[stack_top] = i;
694 else if ((ubv >> 8) == 1) {
695 int j;
697 if (stack_top == iso_run->length) continue;
698 for (j = stack_top; j < iso_run->length; j++) {
699 WCHAR c = iso_run->item[i].ch;
700 if (c == 0x232A) c = 0x3009;
701 if (c == open_stack[j]) {
702 out[pair_count].start = stack_index[j];
703 out[pair_count].end = i;
704 pair_count++;
705 out = heap_realloc(out, sizeof(BracketPair) * (pair_count+1));
706 out[pair_count].start = -1;
707 stack_top = j+1;
708 break;
714 if (pair_count == 0) {
715 heap_free(out);
716 out = NULL;
718 else if (pair_count > 1)
719 qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
721 heap_free(open_stack);
722 heap_free(stack_index);
723 return out;
726 static inline UINT8 get_rule_N0_class(UINT8 class)
728 return (class == AN || class == EN) ? R : class;
731 /*------------------------------------------------------------------------
732 Function: bidi_resolve_neutrals
734 Resolves the directionality of neutral character types.
736 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
738 Input: Array of embedding levels
739 Character count
740 Baselevel
742 In/Out: Array of directional classes
744 Note: On input only these directional classes are expected
745 R, L, NI, AN, EN and BN
747 W8 resolves a number of ENs to L
748 ------------------------------------------------------------------------*/
749 static void bidi_resolve_neutrals(IsolatedRun *run)
751 BracketPair *pairs;
752 int i;
754 /* Translate isolates into NI */
755 for (i = 0; i < run->length; i++) {
756 if (*run->item[i].class >= LRI)
757 *run->item[i].class = NI;
759 switch (*run->item[i].class) {
760 case B:
761 case S:
762 case WS: *run->item[i].class = NI;
765 ASSERT(*run->item[i].class < 5 || *run->item[i].class == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
768 /* N0: Skipping bracketed pairs for now */
769 pairs = bidi_compute_bracket_pairs(run);
770 if (pairs) {
771 BracketPair *p = pairs;
772 int i = 0;
773 while (p->start >= 0) {
774 UINT8 e = get_embedding_direction(run->e);
775 UINT8 o = get_embedding_direction(run->e + 1);
776 BOOL flag_o = FALSE;
777 int j;
779 TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
781 /* N0.b */
782 for (j = p->start+1; j < p->end; j++) {
783 if (get_rule_N0_class(*run->item[j].class) == e) {
784 *run->item[p->start].class = e;
785 *run->item[p->end].class = e;
786 break;
788 else if (get_rule_N0_class(*run->item[j].class) == o)
789 flag_o = TRUE;
791 /* N0.c */
792 if (j == p->end && flag_o) {
793 for (j = p->start; j >= 0; j--) {
794 if (get_rule_N0_class(*run->item[j].class) == o) {
795 *run->item[p->start].class = o;
796 *run->item[p->end].class = o;
797 break;
799 else if (get_rule_N0_class(*run->item[j].class) == e) {
800 *run->item[p->start].class = e;
801 *run->item[p->end].class = e;
802 break;
805 if (j < 0) {
806 *run->item[p->start].class = run->sos;
807 *run->item[p->end].class = run->sos;
811 i++;
812 p = &pairs[i];
814 heap_free(pairs);
817 /* N1 */
818 for (i = 0; i < run->length; i++) {
819 UINT8 l, r;
821 if (*run->item[i].class == NI) {
822 int b = get_prev_valid_char_from_run(run, i);
823 int j;
825 if (b == -1) {
826 l = run->sos;
827 b = 0;
829 else {
830 if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
831 l = R;
832 else if (*run->item[b].class == L)
833 l = L;
834 else /* No string type */
835 continue;
837 j = get_next_valid_char_from_run(run, i);
838 while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
839 if (j == -1) {
840 r = run->eos;
841 j = run->length;
843 else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
844 r = R;
845 else if (*run->item[j].class == L)
846 r = L;
847 else /* No string type */
848 continue;
850 if (r == l) {
851 for (b = i; b < j && b < run->length; b++)
852 *run->item[b].class = r;
857 /* N2 */
858 for (i = 0; i < run->length; i++) {
859 if (*run->item[i].class == NI) {
860 int b = i-1;
861 int f = i+1;
863 *run->item[i].class = get_embedding_direction(run->e);
864 if (b > -1 && *run->item[b].class == BN)
865 *run->item[b].class = get_embedding_direction(run->e);
866 if (f < run->length && *run->item[f].class == BN)
867 *run->item[f].class = get_embedding_direction(run->e);
872 /*------------------------------------------------------------------------
873 Function: bidi_resolve_implicit
875 Recursively resolves implicit embedding levels.
876 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
878 Input: Array of direction classes
879 Character count
880 Base level
882 In/Out: Array of embedding levels
884 Note: levels may exceed 15 on output.
885 Accepted subset of direction classes
886 R, L, AN, EN
887 ------------------------------------------------------------------------*/
888 static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos)
890 int i;
892 /* I1/2 */
893 for (i = sos; i <= eos; i++) {
894 if (classes[i] == BN)
895 continue;
897 ASSERT(classes[i] > 0); /* "No Neutrals allowed to survive here." */
898 ASSERT(classes[i] < 5); /* "Out of range." */
900 if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN))
901 levels[i]++;
902 else if (!odd(levels[i]) && classes[i] == R)
903 levels[i]++;
904 else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN))
905 levels[i] += 2;
909 static inline BOOL is_rule_L1_reset_class(UINT8 class)
911 switch (class) {
912 case WS:
913 case FSI:
914 case LRI:
915 case RLI:
916 case PDI:
917 case LRE:
918 case RLE:
919 case LRO:
920 case RLO:
921 case PDF:
922 case BN:
923 return TRUE;
924 default:
925 return FALSE;
929 static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos)
931 int i;
933 /* L1 */
934 for (i = sos; i <= eos; i++) {
935 if (classes[i] == B || classes[i] == S) {
936 int j = i - 1;
937 while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j]))
938 levels[j--] = baselevel;
939 levels[i] = baselevel;
941 if (i == eos && is_rule_L1_reset_class(classes[i])) {
942 int j = i;
943 while (j >= sos && is_rule_L1_reset_class(classes[j]))
944 levels[j--] = baselevel;
949 static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set)
951 int run_start, run_end, i;
952 int run_count = 0;
953 HRESULT hr = S_OK;
954 Run *runs;
956 runs = heap_alloc(count * sizeof(Run));
957 if (!runs)
958 return E_OUTOFMEMORY;
960 list_init(set);
962 /* Build Runs */
963 run_start = 0;
964 while (run_start < count) {
965 run_end = get_next_valid_char_index(classes, run_start, count);
966 while (run_end < count && levels[run_end] == levels[run_start])
967 run_end = get_next_valid_char_index(classes, run_end, count);
968 run_end--;
969 runs[run_count].start = run_start;
970 runs[run_count].end = run_end;
971 runs[run_count].e = levels[run_start];
972 run_start = get_next_valid_char_index(classes, run_end, count);
973 run_count++;
976 /* Build Isolating Runs */
977 i = 0;
978 while (i < run_count) {
979 int k = i;
980 if (runs[k].start >= 0) {
981 IsolatedRun *current_isolated;
982 int type_fence, real_end;
983 int j;
985 current_isolated = heap_alloc(sizeof(IsolatedRun) + sizeof(RunChar)*count);
986 if (!current_isolated) {
987 hr = E_OUTOFMEMORY;
988 break;
991 run_start = runs[k].start;
992 current_isolated->e = runs[k].e;
993 current_isolated->length = (runs[k].end - runs[k].start)+1;
995 for (j = 0; j < current_isolated->length; j++) {
996 current_isolated->item[j].class = &classes[runs[k].start+j];
997 current_isolated->item[j].ch = string[runs[k].start+j];
1000 run_end = runs[k].end;
1002 TRACE("{ [%i -- %i]",run_start, run_end);
1004 if (classes[run_end] == BN)
1005 run_end = get_prev_valid_char_index(classes, run_end, runs[k].start);
1007 while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) {
1008 j = k+1;
1009 search:
1010 while (j < run_count && classes[runs[j].start] != PDI) j++;
1011 if (j < run_count && runs[i].e != runs[j].e) {
1012 j++;
1013 goto search;
1016 if (j != run_count) {
1017 int l = current_isolated->length;
1018 int m;
1020 current_isolated->length += (runs[j].end - runs[j].start)+1;
1021 for (m = 0; l < current_isolated->length; l++, m++) {
1022 current_isolated->item[l].class = &classes[runs[j].start+m];
1023 current_isolated->item[l].ch = string[runs[j].start+m];
1026 TRACE("[%i -- %i]", runs[j].start, runs[j].end);
1028 run_end = runs[j].end;
1029 if (classes[run_end] == BN)
1030 run_end = get_prev_valid_char_index(classes, run_end, runs[i].start);
1031 runs[j].start = -1;
1032 k = j;
1034 else {
1035 run_end = count;
1036 break;
1040 type_fence = get_prev_valid_char_index(classes, run_start, -1);
1042 if (type_fence == -1)
1043 current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start];
1044 else
1045 current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start];
1047 current_isolated->sos = get_embedding_direction(current_isolated->sos);
1049 if (run_end == count)
1050 current_isolated->eos = current_isolated->sos;
1051 else {
1052 /* eos could be an BN */
1053 if (classes[run_end] == BN) {
1054 real_end = get_prev_valid_char_index(classes, run_end, run_start-1);
1055 if (real_end < run_start)
1056 real_end = run_start;
1058 else
1059 real_end = run_end;
1061 type_fence = get_next_valid_char_index(classes, run_end, count);
1062 if (type_fence == count)
1063 current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end];
1064 else
1065 current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end];
1067 current_isolated->eos = get_embedding_direction(current_isolated->eos);
1070 list_add_tail(set, &current_isolated->entry);
1071 TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1073 i++;
1076 heap_free(runs);
1077 return hr;
1080 HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels)
1082 IsolatedRun *iso_run, *next;
1083 struct list IsolatingRuns;
1084 UINT8 *chartype;
1085 HRESULT hr;
1087 TRACE("%s, %u\n", debugstr_wn(string, count), count);
1089 chartype = heap_alloc(count*sizeof(*chartype));
1090 if (!chartype)
1091 return E_OUTOFMEMORY;
1093 bidi_classify(string, chartype, count);
1094 if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count);
1096 bidi_resolve_explicit(baselevel, chartype, levels, count);
1097 memcpy(explicit, levels, count*sizeof(*explicit));
1099 if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count);
1101 /* X10/BD13: Compute Isolating runs */
1102 hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns);
1103 if (FAILED(hr))
1104 goto done;
1106 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1108 if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
1110 bidi_resolve_weak(iso_run);
1111 if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
1113 bidi_resolve_neutrals(iso_run);
1114 if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
1116 list_remove(&iso_run->entry);
1117 heap_free(iso_run);
1120 if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count);
1121 bidi_resolve_implicit(chartype, levels, 0, count-1);
1123 bidi_classify(string, chartype, count);
1124 bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1);
1126 done:
1127 heap_free(chartype);
1128 return hr;