win32u: Don't report cloned monitors in EnumDisplayMonitors().
[wine.git] / dlls / dwrite / bidi.c
blobcc62509b5574254aa547d22ca452f63dc942bc9b
1 /*
2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wine/debug.h"
50 #include "dwrite_private.h"
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
54 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
55 extern const unsigned short bidi_direction_table[] DECLSPEC_HIDDEN;
57 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
58 #define MAX_DEPTH 125
60 #define odd(x) ((x) & 1)
62 /*------------------------------------------------------------------------
63 Bidirectional Character Types
65 as defined by the Unicode Bidirectional Algorithm Table 3-7.
67 Note:
69 The list of bidirectional character types here is not grouped the
70 same way as the table 3-7, since the numeric values for the types
71 are chosen to keep the state and action tables compact.
72 ------------------------------------------------------------------------*/
73 enum directions
75 /* input types */
76 /* ON MUST be zero, code relies on ON = NI = 0 */
77 ON = 0, /* Other Neutral */
78 L, /* Left Letter */
79 R, /* Right Letter */
80 AN, /* Arabic Number */
81 EN, /* European Number */
82 AL, /* Arabic Letter (Right-to-left) */
83 NSM, /* Non-spacing Mark */
84 CS, /* Common Separator */
85 ES, /* European Separator */
86 ET, /* European Terminator (post/prefix e.g. $ and %) */
88 /* resolved types */
89 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
91 /* input types, */
92 S, /* Segment Separator (TAB) // used only in L1 */
93 WS, /* White space // used only in L1 */
94 B, /* Paragraph Separator (aka as PS) */
96 /* types for explicit controls */
97 RLO, /* these are used only in X1-X9 */
98 RLE,
99 LRO,
100 LRE,
101 PDF,
103 LRI, /* Isolate formatting characters new with 6.3 */
104 RLI,
105 FSI,
106 PDI,
108 /* resolved types, also resolved directions */
109 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
112 static const char debug_type[][4] =
114 "ON", /* Other Neutral */
115 "L", /* Left Letter */
116 "R", /* Right Letter */
117 "AN", /* Arabic Number */
118 "EN", /* European Number */
119 "AL", /* Arabic Letter (Right-to-left) */
120 "NSM", /* Non-spacing Mark */
121 "CS", /* Common Separator */
122 "ES", /* European Separator */
123 "ET", /* European Terminator (post/prefix e.g. $ and %) */
124 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
125 "S", /* Segment Separator (TAB) used only in L1 */
126 "WS", /* White space used only in L1 */
127 "B", /* Paragraph Separator (aka as PS) */
128 "RLO", /* these are used only in X1-X9 */
129 "RLE",
130 "LRO",
131 "LRE",
132 "PDF",
133 "LRI", /* Isolate formatting characters new with 6.3 */
134 "RLI",
135 "FSI",
136 "PDI",
139 static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end)
141 int i, len = 0;
142 TRACE("%s:", header);
143 for (i = start; i < end && len < 200; i++) {
144 TRACE(" %s", debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
152 /* Convert the libwine information to the direction enum */
153 static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count)
155 UINT32 i;
157 for (i = 0; i < count; ++i)
158 chartype[i] = get_table_entry( bidi_direction_table, string[i] );
161 /* RESOLVE EXPLICIT */
163 static inline UINT8 get_greater_even_level(UINT8 level)
165 return odd(level) ? level + 1 : level + 2;
168 static inline UINT8 get_greater_odd_level(UINT8 level)
170 return odd(level) ? level + 2 : level + 1;
173 static inline UINT8 get_embedding_direction(UINT8 level)
175 return odd(level) ? R : L;
178 /*------------------------------------------------------------------------
179 Function: bidi_resolve_explicit
181 Recursively resolves explicit embedding levels and overrides.
182 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
184 Input: Base embedding level and direction
185 Character count
187 Output: Array of embedding levels
189 In/Out: Array of direction classes
192 Note: The function uses two simple counters to keep track of
193 matching explicit codes and PDF. Use the default argument for
194 the outermost call. The nesting counter counts the recursion
195 depth and not the embedding level.
196 ------------------------------------------------------------------------*/
197 typedef struct tagStackItem
199 UINT8 level;
200 UINT8 override;
201 BOOL isolate;
202 } StackItem;
204 #define push_stack(l,o,i) \
205 do { stack_top--; \
206 stack[stack_top].level = l; \
207 stack[stack_top].override = o; \
208 stack[stack_top].isolate = i;} while(0)
210 #define pop_stack() do { stack_top++; } while(0)
212 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
214 static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count)
216 /* X1 */
217 int overflow_isolate_count = 0;
218 int overflow_embedding_count = 0;
219 int valid_isolate_count = 0;
220 UINT32 i;
222 StackItem stack[MAX_DEPTH+2];
223 int stack_top = MAX_DEPTH+1;
225 stack[stack_top].level = baselevel;
226 stack[stack_top].override = NI;
227 stack[stack_top].isolate = FALSE;
229 for (i = 0; i < count; i++) {
230 UINT8 least_odd, least_even;
232 switch (classes[i]) {
234 /* X2 */
235 case RLE:
236 least_odd = get_greater_odd_level(stack[stack_top].level);
237 levels[i] = valid_level(least_odd) ? least_odd : stack[stack_top].level;
238 if (valid_level(least_odd))
239 push_stack(least_odd, NI, FALSE);
240 else if (overflow_isolate_count == 0)
241 overflow_embedding_count++;
242 break;
244 /* X3 */
245 case LRE:
246 least_even = get_greater_even_level(stack[stack_top].level);
247 levels[i] = valid_level(least_even) ? least_even : stack[stack_top].level;
248 if (valid_level(least_even))
249 push_stack(least_even, NI, FALSE);
250 else if (overflow_isolate_count == 0)
251 overflow_embedding_count++;
252 break;
254 /* X4 */
255 case RLO:
256 least_odd = get_greater_odd_level(stack[stack_top].level);
257 levels[i] = stack[stack_top].level;
258 if (valid_level(least_odd))
259 push_stack(least_odd, R, FALSE);
260 else if (overflow_isolate_count == 0)
261 overflow_embedding_count++;
262 break;
264 /* X5 */
265 case LRO:
266 least_even = get_greater_even_level(stack[stack_top].level);
267 levels[i] = stack[stack_top].level;
268 if (valid_level(least_even))
269 push_stack(least_even, L, FALSE);
270 else if (overflow_isolate_count == 0)
271 overflow_embedding_count++;
272 break;
274 /* X5a */
275 case RLI:
276 least_odd = get_greater_odd_level(stack[stack_top].level);
277 levels[i] = stack[stack_top].level;
278 if (valid_level(least_odd))
280 valid_isolate_count++;
281 push_stack(least_odd, NI, TRUE);
283 else
284 overflow_isolate_count++;
285 break;
287 /* X5b */
288 case LRI:
289 least_even = get_greater_even_level(stack[stack_top].level);
290 levels[i] = stack[stack_top].level;
291 if (valid_level(least_even))
293 valid_isolate_count++;
294 push_stack(least_even, NI, TRUE);
296 else
297 overflow_isolate_count++;
298 break;
300 /* X5c */
301 case FSI:
303 UINT8 new_level = 0;
304 int skipping = 0;
305 int j;
307 levels[i] = stack[stack_top].level;
308 for (j = i+1; j < count; j++)
310 if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI)
312 skipping++;
313 continue;
315 else if (classes[j] == PDI)
317 if (skipping)
318 skipping --;
319 else
320 break;
321 continue;
324 if (skipping) continue;
326 if (classes[j] == L)
328 new_level = 0;
329 break;
331 else if (classes[j] == R || classes[j] == AL)
333 new_level = 1;
334 break;
337 if (odd(new_level))
339 least_odd = get_greater_odd_level(stack[stack_top].level);
340 if (valid_level(least_odd))
342 valid_isolate_count++;
343 push_stack(least_odd, NI, TRUE);
345 else
346 overflow_isolate_count++;
348 else
350 least_even = get_greater_even_level(stack[stack_top].level);
351 if (valid_level(least_even))
353 valid_isolate_count++;
354 push_stack(least_even, NI, TRUE);
356 else
357 overflow_isolate_count++;
359 break;
362 /* X6 */
363 case ON:
364 case L:
365 case R:
366 case AN:
367 case EN:
368 case AL:
369 case NSM:
370 case CS:
371 case ES:
372 case ET:
373 case S:
374 case WS:
375 levels[i] = stack[stack_top].level;
376 if (stack[stack_top].override != NI)
377 classes[i] = stack[stack_top].override;
378 break;
380 /* X6a */
381 case PDI:
382 if (overflow_isolate_count) overflow_isolate_count--;
383 else if (!valid_isolate_count) {/* do nothing */}
384 else
386 overflow_embedding_count = 0;
387 while (!stack[stack_top].isolate) pop_stack();
388 pop_stack();
389 valid_isolate_count--;
391 levels[i] = stack[stack_top].level;
392 break;
394 /* X7 */
395 case PDF:
396 levels[i] = stack[stack_top].level;
397 if (overflow_isolate_count) {/* do nothing */}
398 else if (overflow_embedding_count) overflow_embedding_count--;
399 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
400 pop_stack();
401 break;
403 /* X8 */
404 default:
405 levels[i] = baselevel;
406 break;
409 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
410 for (i = 0; i < count ; i++)
411 if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF)
412 classes[i] = BN;
415 static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence)
417 if (index == -1 || index == back_fence) return index;
418 index--;
419 while (index > back_fence && classes[index] == BN) index--;
420 return index;
423 static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence)
425 if (index == front_fence) return index;
426 index++;
427 while (index < front_fence && classes[index] == BN) index++;
428 return index;
431 typedef struct tagRun
433 int start;
434 int end;
435 UINT8 e;
436 } Run;
438 typedef struct tagRunChar
440 WCHAR ch;
441 UINT8 *class;
442 } RunChar;
444 typedef struct tagIsolatedRun
446 struct list entry;
447 int length;
448 UINT8 sos;
449 UINT8 eos;
450 UINT8 e;
452 RunChar item[1];
453 } IsolatedRun;
455 static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
457 if (index >= (run->length-1)) return -1;
458 index++;
459 while (index < run->length && *run->item[index].class == BN) index++;
460 if (index == run->length) return -1;
461 return index;
464 static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
466 if (index <= 0) return -1;
467 index--;
468 while (index > -1 && *run->item[index].class == BN) index--;
469 return index;
472 static inline void iso_dump_types(const char* header, IsolatedRun *run)
474 int i, len = 0;
475 TRACE("%s:",header);
476 TRACE("[ ");
477 for (i = 0; i < run->length && len < 200; i++) {
478 TRACE(" %s", debug_type[*run->item[i].class]);
479 len += strlen(debug_type[*run->item[i].class])+1;
481 if (i != run->length)
482 TRACE("...");
483 TRACE(" ]\n");
486 /*------------------------------------------------------------------------
487 Function: bidi_resolve_weak
489 Resolves the directionality of numeric and other weak character types
491 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
493 Input: Array of embedding levels
494 Character count
496 In/Out: Array of directional classes
498 Note: On input only these directional classes are expected
499 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
500 ------------------------------------------------------------------------*/
501 static BOOL bidi_is_isolate(UINT8 class)
503 return class == LRI || class == RLI || class == FSI || class == PDI;
506 static void bidi_resolve_weak(IsolatedRun *iso_run)
508 int i;
510 /* W1 */
511 for (i=0; i < iso_run->length; i++) {
512 if (*iso_run->item[i].class == NSM) {
513 int j = get_prev_valid_char_from_run(iso_run, i);
514 if (j == -1)
515 *iso_run->item[i].class = iso_run->sos;
516 else if (bidi_is_isolate(*iso_run->item[j].class))
517 *iso_run->item[i].class = ON;
518 else
519 *iso_run->item[i].class = *iso_run->item[j].class;
523 /* W2 */
524 for (i = 0; i < iso_run->length; i++) {
525 if (*iso_run->item[i].class == EN) {
526 int j = get_prev_valid_char_from_run(iso_run, i);
527 while (j > -1) {
528 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
529 if (*iso_run->item[j].class == AL)
530 *iso_run->item[i].class = AN;
531 break;
533 j = get_prev_valid_char_from_run(iso_run, j);
538 /* W3 */
539 for (i = 0; i < iso_run->length; i++) {
540 if (*iso_run->item[i].class == AL)
541 *iso_run->item[i].class = R;
544 /* W4 */
545 for (i = 0; i < iso_run->length; i++) {
546 if (*iso_run->item[i].class == ES) {
547 int b = get_prev_valid_char_from_run(iso_run, i);
548 int f = get_next_valid_char_from_run(iso_run, i);
550 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
551 *iso_run->item[i].class = EN;
553 else if (*iso_run->item[i].class == CS) {
554 int b = get_prev_valid_char_from_run(iso_run, i);
555 int f = get_next_valid_char_from_run(iso_run, i);
557 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
558 *iso_run->item[i].class = EN;
559 else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
560 *iso_run->item[i].class = AN;
564 /* W5 */
565 for (i = 0; i < iso_run->length; i++) {
566 if (*iso_run->item[i].class == ET) {
567 int j;
568 for (j = i-1 ; j > -1; j--) {
569 if (*iso_run->item[j].class == BN) continue;
570 if (*iso_run->item[j].class == ET) continue;
571 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
572 else break;
574 if (*iso_run->item[i].class == ET) {
575 for (j = i+1; j < iso_run->length; j++) {
576 if (*iso_run->item[j].class == BN) continue;
577 if (*iso_run->item[j].class == ET) continue;
578 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
579 else break;
585 /* W6 */
586 for (i = 0; i < iso_run->length; i++) {
587 if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
589 int b = i-1;
590 int f = i+1;
591 if (b > -1 && *iso_run->item[b].class == BN)
592 *iso_run->item[b].class = ON;
593 if (f < iso_run->length && *iso_run->item[f].class == BN)
594 *iso_run->item[f].class = ON;
596 *iso_run->item[i].class = ON;
600 /* W7 */
601 for (i = 0; i < iso_run->length; i++) {
602 if (*iso_run->item[i].class == EN) {
603 int j;
604 for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
605 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
606 if (*iso_run->item[j].class == L)
607 *iso_run->item[i].class = L;
608 break;
610 if (iso_run->sos == L && j == -1)
611 *iso_run->item[i].class = L;
616 typedef struct tagBracketPair
618 int start;
619 int end;
620 } BracketPair;
622 static int __cdecl bracketpair_compr(const void *a, const void* b)
624 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
627 static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
629 WCHAR *open_stack;
630 int *stack_index;
631 int stack_top = iso_run->length;
632 BracketPair *out = NULL;
633 int pair_count = 0;
634 int i;
636 open_stack = malloc(sizeof(WCHAR) * iso_run->length);
637 stack_index = malloc(sizeof(int) * iso_run->length);
639 for (i = 0; i < iso_run->length; i++) {
640 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
641 if (ubv)
643 if (!out)
645 out = malloc(sizeof(BracketPair));
646 out[0].start = -1;
649 if ((ubv >> 8) == 0) {
650 stack_top--;
651 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
652 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
653 if (open_stack[stack_top] == 0x232A)
654 open_stack[stack_top] = 0x3009;
655 stack_index[stack_top] = i;
657 else if ((ubv >> 8) == 1) {
658 int j;
660 if (stack_top == iso_run->length) continue;
661 for (j = stack_top; j < iso_run->length; j++) {
662 WCHAR c = iso_run->item[i].ch;
663 if (c == 0x232A) c = 0x3009;
664 if (c == open_stack[j]) {
665 out[pair_count].start = stack_index[j];
666 out[pair_count].end = i;
667 pair_count++;
668 out = realloc(out, sizeof(BracketPair) * (pair_count+1));
669 out[pair_count].start = -1;
670 stack_top = j+1;
671 break;
677 if (pair_count == 0)
679 free(out);
680 out = NULL;
682 else if (pair_count > 1)
683 qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
685 free(open_stack);
686 free(stack_index);
687 return out;
690 static inline UINT8 get_rule_N0_class(UINT8 class)
692 return (class == AN || class == EN) ? R : class;
695 /*------------------------------------------------------------------------
696 Function: bidi_resolve_neutrals
698 Resolves the directionality of neutral character types.
700 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
702 Input: Array of embedding levels
703 Character count
704 Baselevel
706 In/Out: Array of directional classes
708 Note: On input only these directional classes are expected
709 R, L, NI, AN, EN and BN
711 W8 resolves a number of ENs to L
712 ------------------------------------------------------------------------*/
713 static void bidi_resolve_neutrals(IsolatedRun *run)
715 BracketPair *pairs;
716 int i;
718 /* Translate isolates into NI */
719 for (i = 0; i < run->length; i++) {
720 switch (*run->item[i].class) {
721 case B:
722 case S:
723 case WS:
724 case FSI:
725 case LRI:
726 case RLI:
727 case PDI: *run->item[i].class = NI;
730 /* "Only NI, L, R, AN, EN and BN are allowed" */
731 ASSERT(*run->item[i].class <= EN || *run->item[i].class == BN);
734 /* N0: Skipping bracketed pairs for now */
735 pairs = bidi_compute_bracket_pairs(run);
736 if (pairs) {
737 BracketPair *p = pairs;
738 int i = 0;
739 while (p->start >= 0) {
740 UINT8 e = get_embedding_direction(run->e);
741 UINT8 o = get_embedding_direction(run->e + 1);
742 BOOL flag_o = FALSE;
743 int j;
745 TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
747 /* N0.b */
748 for (j = p->start+1; j < p->end; j++) {
749 if (get_rule_N0_class(*run->item[j].class) == e) {
750 *run->item[p->start].class = e;
751 *run->item[p->end].class = e;
752 break;
754 else if (get_rule_N0_class(*run->item[j].class) == o)
755 flag_o = TRUE;
757 /* N0.c */
758 if (j == p->end && flag_o) {
759 for (j = p->start; j >= 0; j--) {
760 if (get_rule_N0_class(*run->item[j].class) == o) {
761 *run->item[p->start].class = o;
762 *run->item[p->end].class = o;
763 break;
765 else if (get_rule_N0_class(*run->item[j].class) == e) {
766 *run->item[p->start].class = e;
767 *run->item[p->end].class = e;
768 break;
771 if (j < 0) {
772 *run->item[p->start].class = run->sos;
773 *run->item[p->end].class = run->sos;
777 i++;
778 p = &pairs[i];
780 free(pairs);
783 /* N1 */
784 for (i = 0; i < run->length; i++) {
785 UINT8 l, r;
787 if (*run->item[i].class == NI) {
788 int b = get_prev_valid_char_from_run(run, i);
789 int j;
791 if (b == -1) {
792 l = run->sos;
793 b = 0;
795 else {
796 if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
797 l = R;
798 else if (*run->item[b].class == L)
799 l = L;
800 else /* No string type */
801 continue;
803 j = get_next_valid_char_from_run(run, i);
804 while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
805 if (j == -1) {
806 r = run->eos;
807 j = run->length;
809 else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
810 r = R;
811 else if (*run->item[j].class == L)
812 r = L;
813 else /* No string type */
814 continue;
816 if (r == l) {
817 for (b = i; b < j && b < run->length; b++)
818 *run->item[b].class = r;
823 /* N2 */
824 for (i = 0; i < run->length; i++) {
825 if (*run->item[i].class == NI) {
826 int b = i-1;
827 int f = i+1;
829 *run->item[i].class = get_embedding_direction(run->e);
830 if (b > -1 && *run->item[b].class == BN)
831 *run->item[b].class = get_embedding_direction(run->e);
832 if (f < run->length && *run->item[f].class == BN)
833 *run->item[f].class = get_embedding_direction(run->e);
838 /*------------------------------------------------------------------------
839 Function: bidi_resolve_implicit
841 Recursively resolves implicit embedding levels.
842 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
844 Input: Array of direction classes
845 Character count
846 Base level
848 In/Out: Array of embedding levels
850 Note: levels may exceed 15 on output.
851 Accepted subset of direction classes
852 R, L, AN, EN
853 ------------------------------------------------------------------------*/
854 static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos)
856 int i;
858 /* I1/2 */
859 for (i = sos; i <= eos; i++) {
860 if (classes[i] == BN)
861 continue;
863 ASSERT(classes[i] != ON); /* "No Neutrals allowed to survive here." */
864 ASSERT(classes[i] <= EN); /* "Out of range." */
866 if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN))
867 levels[i]++;
868 else if (!odd(levels[i]) && classes[i] == R)
869 levels[i]++;
870 else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN))
871 levels[i] += 2;
875 static inline BOOL is_rule_L1_reset_class(UINT8 class)
877 switch (class) {
878 case WS:
879 case FSI:
880 case LRI:
881 case RLI:
882 case PDI:
883 case LRE:
884 case RLE:
885 case LRO:
886 case RLO:
887 case PDF:
888 case BN:
889 return TRUE;
890 default:
891 return FALSE;
895 static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos)
897 int i;
899 /* L1 */
900 for (i = sos; i <= eos; i++) {
901 if (classes[i] == B || classes[i] == S) {
902 int j = i - 1;
903 while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j]))
904 levels[j--] = baselevel;
905 levels[i] = baselevel;
907 else if (classes[i] == LRE || classes[i] == RLE || classes[i] == LRO || classes[i] == RLO ||
908 classes[i] == PDF || classes[i] == BN) {
909 levels[i] = i ? levels[i - 1] : baselevel;
911 if (i == eos && is_rule_L1_reset_class(classes[i])) {
912 int j = i;
913 while (j >= sos && is_rule_L1_reset_class(classes[j]))
914 levels[j--] = baselevel;
919 static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set)
921 int run_start, run_end, i;
922 int run_count = 0;
923 HRESULT hr = S_OK;
924 Run *runs;
926 if (!(runs = calloc(count, sizeof(*runs))))
927 return E_OUTOFMEMORY;
929 list_init(set);
931 /* Build Runs */
932 run_start = 0;
933 while (run_start < count) {
934 run_end = get_next_valid_char_index(classes, run_start, count);
935 while (run_end < count && levels[run_end] == levels[run_start])
936 run_end = get_next_valid_char_index(classes, run_end, count);
937 run_end--;
938 runs[run_count].start = run_start;
939 runs[run_count].end = run_end;
940 runs[run_count].e = levels[run_start];
941 run_start = get_next_valid_char_index(classes, run_end, count);
942 run_count++;
945 /* Build Isolating Runs */
946 i = 0;
947 while (i < run_count) {
948 int k = i;
949 if (runs[k].start >= 0) {
950 IsolatedRun *current_isolated;
951 int type_fence, real_end;
952 int j;
954 if (!(current_isolated = malloc(sizeof(IsolatedRun) + sizeof(RunChar)*count)))
956 hr = E_OUTOFMEMORY;
957 break;
960 run_start = runs[k].start;
961 current_isolated->e = runs[k].e;
962 current_isolated->length = (runs[k].end - runs[k].start)+1;
964 for (j = 0; j < current_isolated->length; j++) {
965 current_isolated->item[j].class = &classes[runs[k].start+j];
966 current_isolated->item[j].ch = string[runs[k].start+j];
969 run_end = runs[k].end;
971 TRACE("{ [%i -- %i]",run_start, run_end);
973 if (classes[run_end] == BN)
974 run_end = get_prev_valid_char_index(classes, run_end, runs[k].start);
976 while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) {
977 j = k+1;
978 search:
979 while (j < run_count && classes[runs[j].start] != PDI) j++;
980 if (j < run_count && runs[i].e != runs[j].e) {
981 j++;
982 goto search;
985 if (j != run_count) {
986 int l = current_isolated->length;
987 int m;
989 current_isolated->length += (runs[j].end - runs[j].start)+1;
990 for (m = 0; l < current_isolated->length; l++, m++) {
991 current_isolated->item[l].class = &classes[runs[j].start+m];
992 current_isolated->item[l].ch = string[runs[j].start+m];
995 TRACE("[%i -- %i]", runs[j].start, runs[j].end);
997 run_end = runs[j].end;
998 if (classes[run_end] == BN)
999 run_end = get_prev_valid_char_index(classes, run_end, runs[i].start);
1000 runs[j].start = -1;
1001 k = j;
1003 else {
1004 run_end = count;
1005 break;
1009 type_fence = get_prev_valid_char_index(classes, run_start, -1);
1011 if (type_fence == -1)
1012 current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start];
1013 else
1014 current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start];
1016 current_isolated->sos = get_embedding_direction(current_isolated->sos);
1018 if (run_end == count)
1019 current_isolated->eos = current_isolated->sos;
1020 else {
1021 /* eos could be an BN */
1022 if (classes[run_end] == BN) {
1023 real_end = get_prev_valid_char_index(classes, run_end, run_start-1);
1024 if (real_end < run_start)
1025 real_end = run_start;
1027 else
1028 real_end = run_end;
1030 type_fence = get_next_valid_char_index(classes, run_end, count);
1031 if (type_fence == count)
1032 current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end];
1033 else
1034 current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end];
1036 current_isolated->eos = get_embedding_direction(current_isolated->eos);
1039 list_add_tail(set, &current_isolated->entry);
1040 TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1042 i++;
1045 free(runs);
1046 return hr;
1049 HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels)
1051 IsolatedRun *iso_run, *next;
1052 struct list IsolatingRuns;
1053 UINT8 *chartype;
1054 HRESULT hr;
1056 TRACE("%s, %u\n", debugstr_wn(string, count), count);
1058 if (!(chartype = malloc(count * sizeof(*chartype))))
1059 return E_OUTOFMEMORY;
1061 bidi_classify(string, chartype, count);
1062 if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count);
1064 bidi_resolve_explicit(baselevel, chartype, levels, count);
1065 memcpy(explicit, levels, count*sizeof(*explicit));
1067 if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count);
1069 /* X10/BD13: Compute Isolating runs */
1070 hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns);
1071 if (FAILED(hr))
1072 goto done;
1074 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1076 if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
1078 bidi_resolve_weak(iso_run);
1079 if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
1081 bidi_resolve_neutrals(iso_run);
1082 if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
1084 list_remove(&iso_run->entry);
1085 free(iso_run);
1088 if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count);
1089 bidi_resolve_implicit(chartype, levels, 0, count-1);
1091 bidi_classify(string, chartype, count);
1092 bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1);
1094 done:
1095 free(chartype);
1096 return hr;