mshtml: Added nsIURI::GetPrePath implementation.
[wine.git] / dlls / dwrite / bidi.c
blob48a7a06de1eba3d46d8dd2be90596ccad8ce7114
1 /*
2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wine/debug.h"
49 #include "wine/list.h"
51 #include "dwrite_private.h"
53 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
55 extern const unsigned short bidi_bracket_table[];
57 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
58 #define MAX_DEPTH 125
60 #define odd(x) ((x) & 1)
62 /*------------------------------------------------------------------------
63 Bidirectional Character Types
65 as defined by the Unicode Bidirectional Algorithm Table 3-7.
67 Note:
69 The list of bidirectional character types here is not grouped the
70 same way as the table 3-7, since the numberic values for the types
71 are chosen to keep the state and action tables compact.
72 ------------------------------------------------------------------------*/
73 enum directions
75 /* input types */
76 /* ON MUST be zero, code relies on ON = NI = 0 */
77 ON = 0, /* Other Neutral */
78 L, /* Left Letter */
79 R, /* Right Letter */
80 AN, /* Arabic Number */
81 EN, /* European Number */
82 AL, /* Arabic Letter (Right-to-left) */
83 NSM, /* Non-spacing Mark */
84 CS, /* Common Separator */
85 ES, /* European Separator */
86 ET, /* European Terminator (post/prefix e.g. $ and %) */
88 /* resolved types */
89 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
91 /* input types, */
92 S, /* Segment Separator (TAB) // used only in L1 */
93 WS, /* White space // used only in L1 */
94 B, /* Paragraph Separator (aka as PS) */
96 /* types for explicit controls */
97 RLO, /* these are used only in X1-X9 */
98 RLE,
99 LRO,
100 LRE,
101 PDF,
103 LRI, /* Isolate formatting characters new with 6.3 */
104 RLI,
105 FSI,
106 PDI,
108 /* resolved types, also resolved directions */
109 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
112 static const char debug_type[][4] =
114 "ON", /* Other Neutral */
115 "L", /* Left Letter */
116 "R", /* Right Letter */
117 "AN", /* Arabic Number */
118 "EN", /* European Number */
119 "AL", /* Arabic Letter (Right-to-left) */
120 "NSM", /* Non-spacing Mark */
121 "CS", /* Common Separator */
122 "ES", /* European Separator */
123 "ET", /* European Terminator (post/prefix e.g. $ and %) */
124 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
125 "S", /* Segment Separator (TAB) used only in L1 */
126 "WS", /* White space used only in L1 */
127 "B", /* Paragraph Separator (aka as PS) */
128 "RLO", /* these are used only in X1-X9 */
129 "RLE",
130 "LRO",
131 "LRE",
132 "PDF",
133 "LRI", /* Isolate formatting characters new with 6.3 */
134 "RLI",
135 "FSI",
136 "PDI",
139 static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end)
141 int i, len = 0;
142 TRACE("%s:", header);
143 for (i = start; i < end && len < 200; i++) {
144 TRACE(" %s", debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
152 /* Convert the libwine information to the direction enum */
153 static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count)
155 static const enum directions dir_map[16] =
157 L, /* unassigned defaults to L */
170 NSM,
172 PDF /* also LRE, LRO, RLE, RLO */
175 UINT32 i;
177 for (i = 0; i < count; ++i) {
178 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
180 switch (chartype[i]) {
181 case ES:
182 break;
183 case PDF:
184 switch (string[i]) {
185 case 0x202a: chartype[i] = LRE; break;
186 case 0x202b: chartype[i] = RLE; break;
187 case 0x202c: chartype[i] = PDF; break;
188 case 0x202d: chartype[i] = LRO; break;
189 case 0x202e: chartype[i] = RLO; break;
190 case 0x2066: chartype[i] = LRI; break;
191 case 0x2067: chartype[i] = RLI; break;
192 case 0x2068: chartype[i] = FSI; break;
193 case 0x2069: chartype[i] = PDI; break;
195 break;
200 WCHAR bidi_get_mirrored_char(WCHAR ch)
202 extern const WCHAR wine_mirror_map[];
203 return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)];
206 /* RESOLVE EXPLICIT */
208 static inline UINT8 get_greater_even_level(UINT8 level)
210 return odd(level) ? level + 1 : level + 2;
213 static inline UINT8 get_greater_odd_level(UINT8 level)
215 return odd(level) ? level + 2 : level + 1;
218 static inline UINT8 get_embedding_direction(UINT8 level)
220 return odd(level) ? R : L;
223 /*------------------------------------------------------------------------
224 Function: bidi_resolve_explicit
226 Recursively resolves explicit embedding levels and overrides.
227 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
229 Input: Base embedding level and direction
230 Character count
232 Output: Array of embedding levels
234 In/Out: Array of direction classes
237 Note: The function uses two simple counters to keep track of
238 matching explicit codes and PDF. Use the default argument for
239 the outermost call. The nesting counter counts the recursion
240 depth and not the embedding level.
241 ------------------------------------------------------------------------*/
242 typedef struct tagStackItem
244 UINT8 level;
245 UINT8 override;
246 BOOL isolate;
247 } StackItem;
249 #define push_stack(l,o,i) \
250 do { stack_top--; \
251 stack[stack_top].level = l; \
252 stack[stack_top].override = o; \
253 stack[stack_top].isolate = i;} while(0)
255 #define pop_stack() do { stack_top++; } while(0)
257 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
259 static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count)
261 /* X1 */
262 int overflow_isolate_count = 0;
263 int overflow_embedding_count = 0;
264 int valid_isolate_count = 0;
265 UINT32 i;
267 StackItem stack[MAX_DEPTH+2];
268 int stack_top = MAX_DEPTH+1;
270 stack[stack_top].level = baselevel;
271 stack[stack_top].override = NI;
272 stack[stack_top].isolate = FALSE;
274 for (i = 0; i < count; i++) {
275 UINT8 least_odd, least_even;
277 switch (classes[i]) {
279 /* X2 */
280 case RLE:
281 least_odd = get_greater_odd_level(stack[stack_top].level);
282 levels[i] = stack[stack_top].level;
283 if (valid_level(least_odd))
284 push_stack(least_odd, NI, FALSE);
285 else if (overflow_isolate_count == 0)
286 overflow_embedding_count++;
287 break;
289 /* X3 */
290 case LRE:
291 least_even = get_greater_even_level(stack[stack_top].level);
292 levels[i] = stack[stack_top].level;
293 if (valid_level(least_even))
294 push_stack(least_even, NI, FALSE);
295 else if (overflow_isolate_count == 0)
296 overflow_embedding_count++;
297 break;
299 /* X4 */
300 case RLO:
301 least_odd = get_greater_odd_level(stack[stack_top].level);
302 levels[i] = stack[stack_top].level;
303 if (valid_level(least_odd))
304 push_stack(least_odd, R, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 break;
309 /* X5 */
310 case LRO:
311 least_even = get_greater_even_level(stack[stack_top].level);
312 levels[i] = stack[stack_top].level;
313 if (valid_level(least_even))
314 push_stack(least_even, L, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 break;
319 /* X5a */
320 case RLI:
321 least_odd = get_greater_odd_level(stack[stack_top].level);
322 levels[i] = stack[stack_top].level;
323 if (valid_level(least_odd))
325 valid_isolate_count++;
326 push_stack(least_odd, NI, TRUE);
328 else
329 overflow_isolate_count++;
330 break;
332 /* X5b */
333 case LRI:
334 least_even = get_greater_even_level(stack[stack_top].level);
335 levels[i] = stack[stack_top].level;
336 if (valid_level(least_even))
338 valid_isolate_count++;
339 push_stack(least_even, NI, TRUE);
341 else
342 overflow_isolate_count++;
343 break;
345 /* X5c */
346 case FSI:
348 UINT8 new_level = 0;
349 int skipping = 0;
350 int j;
352 levels[i] = stack[stack_top].level;
353 for (j = i+1; j < count; j++)
355 if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI)
357 skipping++;
358 continue;
360 else if (classes[j] == PDI)
362 if (skipping)
363 skipping --;
364 else
365 break;
366 continue;
369 if (skipping) continue;
371 if (classes[j] == L)
373 new_level = 0;
374 break;
376 else if (classes[j] == R || classes[j] == AL)
378 new_level = 1;
379 break;
382 if (odd(new_level))
384 least_odd = get_greater_odd_level(stack[stack_top].level);
385 if (valid_level(least_odd))
387 valid_isolate_count++;
388 push_stack(least_odd, NI, TRUE);
390 else
391 overflow_isolate_count++;
393 else
395 least_even = get_greater_even_level(stack[stack_top].level);
396 if (valid_level(least_even))
398 valid_isolate_count++;
399 push_stack(least_even, NI, TRUE);
401 else
402 overflow_isolate_count++;
404 break;
407 /* X6 */
408 case ON:
409 case L:
410 case R:
411 case AN:
412 case EN:
413 case AL:
414 case NSM:
415 case CS:
416 case ES:
417 case ET:
418 case S:
419 case WS:
420 levels[i] = stack[stack_top].level;
421 if (stack[stack_top].override != NI)
422 classes[i] = stack[stack_top].override;
423 break;
425 /* X6a */
426 case PDI:
427 if (overflow_isolate_count) overflow_isolate_count--;
428 else if (!valid_isolate_count) {/* do nothing */}
429 else
431 overflow_embedding_count = 0;
432 while (!stack[stack_top].isolate) pop_stack();
433 pop_stack();
434 valid_isolate_count--;
436 levels[i] = stack[stack_top].level;
437 break;
439 /* X7 */
440 case PDF:
441 levels[i] = stack[stack_top].level;
442 if (overflow_isolate_count) {/* do nothing */}
443 else if (overflow_embedding_count) overflow_embedding_count--;
444 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
445 pop_stack();
446 break;
448 /* X8: Nothing */
449 default:
450 break;
453 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
454 for (i = 0; i < count ; i++)
455 if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF)
456 classes[i] = BN;
459 static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence)
461 if (index == -1 || index == back_fence) return index;
462 index--;
463 while (index > back_fence && classes[index] == BN) index--;
464 return index;
467 static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence)
469 if (index == front_fence) return index;
470 index++;
471 while (index < front_fence && classes[index] == BN) index++;
472 return index;
475 typedef struct tagRun
477 int start;
478 int end;
479 UINT8 e;
480 } Run;
482 typedef struct tagRunChar
484 WCHAR ch;
485 UINT8 *class;
486 } RunChar;
488 typedef struct tagIsolatedRun
490 struct list entry;
491 int length;
492 UINT8 sos;
493 UINT8 eos;
494 UINT8 e;
496 RunChar item[1];
497 } IsolatedRun;
499 static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
501 if (index >= (run->length-1)) return -1;
502 index++;
503 while (index < run->length && *run->item[index].class == BN) index++;
504 if (index == run->length) return -1;
505 return index;
508 static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
510 if (index <= 0) return -1;
511 index--;
512 while (index > -1 && *run->item[index].class == BN) index--;
513 return index;
516 static inline int iso_previousChar(IsolatedRun *run, int index)
518 if (index <= 0) return -1;
519 return index--;
522 static inline void iso_dump_types(const char* header, IsolatedRun *run)
524 int i, len = 0;
525 TRACE("%s:",header);
526 TRACE("[ ");
527 for (i = 0; i < run->length && len < 200; i++) {
528 TRACE(" %s", debug_type[*run->item[i].class]);
529 len += strlen(debug_type[*run->item[i].class])+1;
531 if (i != run->length)
532 TRACE("...");
533 TRACE(" ]\n");
536 /*------------------------------------------------------------------------
537 Function: bidi_resolve_weak
539 Resolves the directionality of numeric and other weak character types
541 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
543 Input: Array of embedding levels
544 Character count
546 In/Out: Array of directional classes
548 Note: On input only these directional classes are expected
549 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
550 ------------------------------------------------------------------------*/
551 static void bidi_resolve_weak(IsolatedRun *iso_run)
553 int i;
555 /* W1 */
556 for (i=0; i < iso_run->length; i++) {
557 if (*iso_run->item[i].class == NSM) {
558 int j = get_prev_valid_char_from_run(iso_run, i);
559 if (j == -1)
560 *iso_run->item[i].class = iso_run->sos;
561 else if (*iso_run->item[j].class >= LRI)
562 *iso_run->item[i].class = ON;
563 else
564 *iso_run->item[i].class = *iso_run->item[j].class;
568 /* W2 */
569 for (i = 0; i < iso_run->length; i++) {
570 if (*iso_run->item[i].class == EN) {
571 int j = get_prev_valid_char_from_run(iso_run, i);
572 while (j > -1) {
573 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
574 if (*iso_run->item[j].class == AL)
575 *iso_run->item[i].class = AN;
576 break;
578 j = get_prev_valid_char_from_run(iso_run, j);
583 /* W3 */
584 for (i = 0; i < iso_run->length; i++) {
585 if (*iso_run->item[i].class == AL)
586 *iso_run->item[i].class = R;
589 /* W4 */
590 for (i = 0; i < iso_run->length; i++) {
591 if (*iso_run->item[i].class == ES) {
592 int b = get_prev_valid_char_from_run(iso_run, i);
593 int f = get_next_valid_char_from_run(iso_run, i);
595 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
596 *iso_run->item[i].class = EN;
598 else if (*iso_run->item[i].class == CS) {
599 int b = get_prev_valid_char_from_run(iso_run, i);
600 int f = get_next_valid_char_from_run(iso_run, i);
602 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
603 *iso_run->item[i].class = EN;
604 else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
605 *iso_run->item[i].class = AN;
609 /* W5 */
610 for (i = 0; i < iso_run->length; i++) {
611 if (*iso_run->item[i].class == ET) {
612 int j;
613 for (j = i-1 ; j > -1; j--) {
614 if (*iso_run->item[j].class == BN) continue;
615 if (*iso_run->item[j].class == ET) continue;
616 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
617 else break;
619 if (*iso_run->item[i].class == ET) {
620 for (j = i+1; j < iso_run->length; j++) {
621 if (*iso_run->item[j].class == BN) continue;
622 if (*iso_run->item[j].class == ET) continue;
623 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
624 else break;
630 /* W6 */
631 for (i = 0; i < iso_run->length; i++) {
632 if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
634 int b = i-1;
635 int f = i+1;
636 if (b > -1 && *iso_run->item[b].class == BN)
637 *iso_run->item[b].class = ON;
638 if (f < iso_run->length && *iso_run->item[f].class == BN)
639 *iso_run->item[f].class = ON;
641 *iso_run->item[i].class = ON;
645 /* W7 */
646 for (i = 0; i < iso_run->length; i++) {
647 if (*iso_run->item[i].class == EN) {
648 int j;
649 for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
650 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
651 if (*iso_run->item[j].class == L)
652 *iso_run->item[i].class = L;
653 break;
655 if (iso_run->sos == L && j == -1)
656 *iso_run->item[i].class = L;
661 typedef struct tagBracketPair
663 int start;
664 int end;
665 } BracketPair;
667 static int bracketpair_compr(const void *a, const void* b)
669 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
672 static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
674 WCHAR *open_stack;
675 int *stack_index;
676 int stack_top = iso_run->length;
677 BracketPair *out = NULL;
678 int pair_count = 0;
679 int i;
681 open_stack = heap_alloc(sizeof(WCHAR) * iso_run->length);
682 stack_index = heap_alloc(sizeof(int) * iso_run->length);
684 for (i = 0; i < iso_run->length; i++) {
685 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
686 if (ubv) {
687 if (!out) {
688 out = heap_alloc(sizeof(BracketPair));
689 out[0].start = -1;
692 if ((ubv >> 8) == 0) {
693 stack_top--;
694 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
695 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
696 if (open_stack[stack_top] == 0x232A)
697 open_stack[stack_top] = 0x3009;
698 stack_index[stack_top] = i;
700 else if ((ubv >> 8) == 1) {
701 int j;
703 if (stack_top == iso_run->length) continue;
704 for (j = stack_top; j < iso_run->length; j++) {
705 WCHAR c = iso_run->item[i].ch;
706 if (c == 0x232A) c = 0x3009;
707 if (c == open_stack[j]) {
708 out[pair_count].start = stack_index[j];
709 out[pair_count].end = i;
710 pair_count++;
711 out = heap_realloc(out, sizeof(BracketPair) * (pair_count+1));
712 out[pair_count].start = -1;
713 stack_top = j+1;
714 break;
720 if (pair_count == 0) {
721 heap_free(out);
722 out = NULL;
724 else if (pair_count > 1)
725 qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
727 heap_free(open_stack);
728 heap_free(stack_index);
729 return out;
732 static inline UINT8 get_rule_N0_class(UINT8 class)
734 return (class == AN || class == EN) ? R : class;
737 /*------------------------------------------------------------------------
738 Function: bidi_resolve_neutrals
740 Resolves the directionality of neutral character types.
742 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
744 Input: Array of embedding levels
745 Character count
746 Baselevel
748 In/Out: Array of directional classes
750 Note: On input only these directional classes are expected
751 R, L, NI, AN, EN and BN
753 W8 resolves a number of ENs to L
754 ------------------------------------------------------------------------*/
755 static void bidi_resolve_neutrals(IsolatedRun *run)
757 BracketPair *pairs;
758 int i;
760 /* Translate isolates into NI */
761 for (i = 0; i < run->length; i++) {
762 if (*run->item[i].class >= LRI)
763 *run->item[i].class = NI;
765 switch (*run->item[i].class) {
766 case B:
767 case S:
768 case WS: *run->item[i].class = NI;
771 ASSERT(*run->item[i].class < 5 || *run->item[i].class == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
774 /* N0: Skipping bracketed pairs for now */
775 pairs = bidi_compute_bracket_pairs(run);
776 if (pairs) {
777 BracketPair *p = pairs;
778 int i = 0;
779 while (p->start >= 0) {
780 UINT8 e = get_embedding_direction(run->e);
781 UINT8 o = get_embedding_direction(run->e + 1);
782 BOOL flag_o = FALSE;
783 int j;
785 TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
787 /* N0.b */
788 for (j = p->start+1; j < p->end; j++) {
789 if (get_rule_N0_class(*run->item[j].class) == e) {
790 *run->item[p->start].class = e;
791 *run->item[p->end].class = e;
792 break;
794 else if (get_rule_N0_class(*run->item[j].class) == o)
795 flag_o = TRUE;
797 /* N0.c */
798 if (j == p->end && flag_o) {
799 for (j = p->start; j >= 0; j--) {
800 if (get_rule_N0_class(*run->item[j].class) == o) {
801 *run->item[p->start].class = o;
802 *run->item[p->end].class = o;
803 break;
805 else if (get_rule_N0_class(*run->item[j].class) == e) {
806 *run->item[p->start].class = e;
807 *run->item[p->end].class = e;
808 break;
811 if (j < 0) {
812 *run->item[p->start].class = run->sos;
813 *run->item[p->end].class = run->sos;
817 i++;
818 p = &pairs[i];
820 heap_free(pairs);
823 /* N1 */
824 for (i = 0; i < run->length; i++) {
825 UINT8 l, r;
827 if (*run->item[i].class == NI) {
828 int b = get_prev_valid_char_from_run(run, i);
829 int j;
831 if (b == -1) {
832 l = run->sos;
833 b = 0;
835 else {
836 if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
837 l = R;
838 else if (*run->item[b].class == L)
839 l = L;
840 else /* No string type */
841 continue;
843 j = get_next_valid_char_from_run(run, i);
844 while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
845 if (j == -1) {
846 r = run->eos;
847 j = run->length;
849 else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
850 r = R;
851 else if (*run->item[j].class == L)
852 r = L;
853 else /* No string type */
854 continue;
856 if (r == l) {
857 for (b = i; b < j && b < run->length; b++)
858 *run->item[b].class = r;
863 /* N2 */
864 for (i = 0; i < run->length; i++) {
865 if (*run->item[i].class == NI) {
866 int b = i-1;
867 int f = i+1;
869 *run->item[i].class = get_embedding_direction(run->e);
870 if (b > -1 && *run->item[b].class == BN)
871 *run->item[b].class = get_embedding_direction(run->e);
872 if (f < run->length && *run->item[f].class == BN)
873 *run->item[f].class = get_embedding_direction(run->e);
878 /*------------------------------------------------------------------------
879 Function: bidi_resolve_implicit
881 Recursively resolves implicit embedding levels.
882 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
884 Input: Array of direction classes
885 Character count
886 Base level
888 In/Out: Array of embedding levels
890 Note: levels may exceed 15 on output.
891 Accepted subset of direction classes
892 R, L, AN, EN
893 ------------------------------------------------------------------------*/
894 static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos)
896 int i;
898 /* I1/2 */
899 for (i = sos; i <= eos; i++) {
900 if (classes[i] == BN)
901 continue;
903 ASSERT(classes[i] > 0); /* "No Neutrals allowed to survive here." */
904 ASSERT(classes[i] < 5); /* "Out of range." */
906 if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN))
907 levels[i]++;
908 else if (!odd(levels[i]) && classes[i] == R)
909 levels[i]++;
910 else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN))
911 levels[i] += 2;
915 static inline BOOL is_rule_L1_reset_class(UINT8 class)
917 switch (class) {
918 case WS:
919 case FSI:
920 case LRI:
921 case RLI:
922 case PDI:
923 case LRE:
924 case RLE:
925 case LRO:
926 case RLO:
927 case PDF:
928 case BN:
929 return TRUE;
930 default:
931 return FALSE;
935 static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos)
937 int i;
939 /* L1 */
940 for (i = sos; i <= eos; i++) {
941 if (classes[i] == B || classes[i] == S) {
942 int j = i - 1;
943 while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j]))
944 levels[j--] = baselevel;
945 levels[i] = baselevel;
947 if (i == eos && is_rule_L1_reset_class(classes[i])) {
948 int j = i;
949 while (j >= sos && is_rule_L1_reset_class(classes[j]))
950 levels[j--] = baselevel;
955 static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set)
957 int run_start, run_end, i;
958 int run_count = 0;
959 HRESULT hr = S_OK;
960 Run *runs;
962 runs = heap_alloc(count * sizeof(Run));
963 if (!runs)
964 return E_OUTOFMEMORY;
966 list_init(set);
968 /* Build Runs */
969 run_start = 0;
970 while (run_start < count) {
971 run_end = get_next_valid_char_index(classes, run_start, count);
972 while (run_end < count && levels[run_end] == levels[run_start])
973 run_end = get_next_valid_char_index(classes, run_end, count);
974 run_end--;
975 runs[run_count].start = run_start;
976 runs[run_count].end = run_end;
977 runs[run_count].e = levels[run_start];
978 run_start = get_next_valid_char_index(classes, run_end, count);
979 run_count++;
982 /* Build Isolating Runs */
983 i = 0;
984 while (i < run_count) {
985 int k = i;
986 if (runs[k].start >= 0) {
987 IsolatedRun *current_isolated;
988 int type_fence, real_end;
989 int j;
991 current_isolated = heap_alloc(sizeof(IsolatedRun) + sizeof(RunChar)*count);
992 if (!current_isolated) {
993 hr = E_OUTOFMEMORY;
994 break;
997 run_start = runs[k].start;
998 current_isolated->e = runs[k].e;
999 current_isolated->length = (runs[k].end - runs[k].start)+1;
1001 for (j = 0; j < current_isolated->length; j++) {
1002 current_isolated->item[j].class = &classes[runs[k].start+j];
1003 current_isolated->item[j].ch = string[runs[k].start+j];
1006 run_end = runs[k].end;
1008 TRACE("{ [%i -- %i]",run_start, run_end);
1010 if (classes[run_end] == BN)
1011 run_end = get_prev_valid_char_index(classes, run_end, runs[k].start);
1013 while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) {
1014 j = k+1;
1015 search:
1016 while (j < run_count && classes[runs[j].start] != PDI) j++;
1017 if (j < run_count && runs[i].e != runs[j].e) {
1018 j++;
1019 goto search;
1022 if (j != run_count) {
1023 int l = current_isolated->length;
1024 int m;
1026 current_isolated->length += (runs[j].end - runs[j].start)+1;
1027 for (m = 0; l < current_isolated->length; l++, m++) {
1028 current_isolated->item[l].class = &classes[runs[j].start+m];
1029 current_isolated->item[l].ch = string[runs[j].start+m];
1032 TRACE("[%i -- %i]", runs[j].start, runs[j].end);
1034 run_end = runs[j].end;
1035 if (classes[run_end] == BN)
1036 run_end = get_prev_valid_char_index(classes, run_end, runs[i].start);
1037 runs[j].start = -1;
1038 k = j;
1040 else {
1041 run_end = count;
1042 break;
1046 type_fence = get_prev_valid_char_index(classes, run_start, -1);
1048 if (type_fence == -1)
1049 current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start];
1050 else
1051 current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start];
1053 current_isolated->sos = get_embedding_direction(current_isolated->sos);
1055 if (run_end == count)
1056 current_isolated->eos = current_isolated->sos;
1057 else {
1058 /* eos could be an BN */
1059 if (classes[run_end] == BN) {
1060 real_end = get_prev_valid_char_index(classes, run_end, run_start-1);
1061 if (real_end < run_start)
1062 real_end = run_start;
1064 else
1065 real_end = run_end;
1067 type_fence = get_next_valid_char_index(classes, run_end, count);
1068 if (type_fence == count)
1069 current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end];
1070 else
1071 current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end];
1073 current_isolated->eos = get_embedding_direction(current_isolated->eos);
1076 list_add_tail(set, &current_isolated->entry);
1077 TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1079 i++;
1082 heap_free(runs);
1083 return hr;
1086 HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels)
1088 IsolatedRun *iso_run, *next;
1089 struct list IsolatingRuns;
1090 UINT8 *chartype;
1091 HRESULT hr;
1093 TRACE("%s, %u\n", debugstr_wn(string, count), count);
1095 chartype = heap_alloc(count*sizeof(*chartype));
1096 if (!chartype)
1097 return E_OUTOFMEMORY;
1099 bidi_classify(string, chartype, count);
1100 if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count);
1102 bidi_resolve_explicit(baselevel, chartype, levels, count);
1103 memcpy(explicit, levels, count*sizeof(*explicit));
1105 if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count);
1107 /* X10/BD13: Compute Isolating runs */
1108 hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns);
1109 if (FAILED(hr))
1110 goto done;
1112 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1114 if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
1116 bidi_resolve_weak(iso_run);
1117 if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
1119 bidi_resolve_neutrals(iso_run);
1120 if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
1122 list_remove(&iso_run->entry);
1123 heap_free(iso_run);
1126 if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count);
1127 bidi_resolve_implicit(chartype, levels, 0, count-1);
1129 bidi_classify(string, chartype, count);
1130 bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1);
1132 done:
1133 heap_free(chartype);
1134 return hr;