dwrite: Use E_NOT_SUFFICIENT_BUFFER definition.
[wine.git] / dlls / dwrite / bidi.c
blob8cd56ea7bb991b72694e1724ac0c4d657e01d2fa
1 /*
2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wine/debug.h"
49 #include "wine/list.h"
51 #include "dwrite.h"
52 #include "dwrite_private.h"
54 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
56 extern const unsigned short bidi_bracket_table[];
58 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
59 #define MAX_DEPTH 125
61 #define odd(x) ((x) & 1)
63 /*------------------------------------------------------------------------
64 Bidirectional Character Types
66 as defined by the Unicode Bidirectional Algorithm Table 3-7.
68 Note:
70 The list of bidirectional character types here is not grouped the
71 same way as the table 3-7, since the numberic values for the types
72 are chosen to keep the state and action tables compact.
73 ------------------------------------------------------------------------*/
74 enum directions
76 /* input types */
77 /* ON MUST be zero, code relies on ON = NI = 0 */
78 ON = 0, /* Other Neutral */
79 L, /* Left Letter */
80 R, /* Right Letter */
81 AN, /* Arabic Number */
82 EN, /* European Number */
83 AL, /* Arabic Letter (Right-to-left) */
84 NSM, /* Non-spacing Mark */
85 CS, /* Common Separator */
86 ES, /* European Separator */
87 ET, /* European Terminator (post/prefix e.g. $ and %) */
89 /* resolved types */
90 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
92 /* input types, */
93 S, /* Segment Separator (TAB) // used only in L1 */
94 WS, /* White space // used only in L1 */
95 B, /* Paragraph Separator (aka as PS) */
97 /* types for explicit controls */
98 RLO, /* these are used only in X1-X9 */
99 RLE,
100 LRO,
101 LRE,
102 PDF,
104 LRI, /* Isolate formatting characters new with 6.3 */
105 RLI,
106 FSI,
107 PDI,
109 /* resolved types, also resolved directions */
110 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
113 static const char debug_type[][4] =
115 "ON", /* Other Neutral */
116 "L", /* Left Letter */
117 "R", /* Right Letter */
118 "AN", /* Arabic Number */
119 "EN", /* European Number */
120 "AL", /* Arabic Letter (Right-to-left) */
121 "NSM", /* Non-spacing Mark */
122 "CS", /* Common Separator */
123 "ES", /* European Separator */
124 "ET", /* European Terminator (post/prefix e.g. $ and %) */
125 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
126 "S", /* Segment Separator (TAB) used only in L1 */
127 "WS", /* White space used only in L1 */
128 "B", /* Paragraph Separator (aka as PS) */
129 "RLO", /* these are used only in X1-X9 */
130 "RLE",
131 "LRO",
132 "LRE",
133 "PDF",
134 "LRI", /* Isolate formatting characters new with 6.3 */
135 "RLI",
136 "FSI",
137 "PDI",
140 static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end)
142 int i, len = 0;
143 TRACE("%s:", header);
144 for (i = start; i < end && len < 200; i++) {
145 TRACE(" %s", debug_type[types[i]]);
146 len += strlen(debug_type[types[i]])+1;
148 if (i != end)
149 TRACE("...");
150 TRACE("\n");
153 /* Convert the libwine information to the direction enum */
154 static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count)
156 static const enum directions dir_map[16] =
158 L, /* unassigned defaults to L */
171 NSM,
173 PDF /* also LRE, LRO, RLE, RLO */
176 UINT32 i;
178 for (i = 0; i < count; ++i) {
179 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
181 switch (chartype[i]) {
182 case ES:
183 break;
184 case PDF:
185 switch (string[i]) {
186 case 0x202a: chartype[i] = LRE; break;
187 case 0x202b: chartype[i] = RLE; break;
188 case 0x202c: chartype[i] = PDF; break;
189 case 0x202d: chartype[i] = LRO; break;
190 case 0x202e: chartype[i] = RLO; break;
191 case 0x2066: chartype[i] = LRI; break;
192 case 0x2067: chartype[i] = RLI; break;
193 case 0x2068: chartype[i] = FSI; break;
194 case 0x2069: chartype[i] = PDI; break;
196 break;
201 WCHAR bidi_get_mirrored_char(WCHAR ch)
203 extern const WCHAR wine_mirror_map[];
204 return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)];
207 /* RESOLVE EXPLICIT */
209 static inline UINT8 get_greater_even_level(UINT8 level)
211 return odd(level) ? level + 1 : level + 2;
214 static inline UINT8 get_greater_odd_level(UINT8 level)
216 return odd(level) ? level + 2 : level + 1;
219 static inline UINT8 get_embedding_direction(UINT8 level)
221 return odd(level) ? R : L;
224 /*------------------------------------------------------------------------
225 Function: bidi_resolve_explicit
227 Recursively resolves explicit embedding levels and overrides.
228 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
230 Input: Base embedding level and direction
231 Character count
233 Output: Array of embedding levels
235 In/Out: Array of direction classes
238 Note: The function uses two simple counters to keep track of
239 matching explicit codes and PDF. Use the default argument for
240 the outermost call. The nesting counter counts the recursion
241 depth and not the embedding level.
242 ------------------------------------------------------------------------*/
243 typedef struct tagStackItem
245 UINT8 level;
246 UINT8 override;
247 BOOL isolate;
248 } StackItem;
250 #define push_stack(l,o,i) \
251 do { stack_top--; \
252 stack[stack_top].level = l; \
253 stack[stack_top].override = o; \
254 stack[stack_top].isolate = i;} while(0)
256 #define pop_stack() do { stack_top++; } while(0)
258 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
260 static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count)
262 /* X1 */
263 int overflow_isolate_count = 0;
264 int overflow_embedding_count = 0;
265 int valid_isolate_count = 0;
266 UINT32 i;
268 StackItem stack[MAX_DEPTH+2];
269 int stack_top = MAX_DEPTH+1;
271 stack[stack_top].level = baselevel;
272 stack[stack_top].override = NI;
273 stack[stack_top].isolate = FALSE;
275 for (i = 0; i < count; i++) {
276 UINT8 least_odd, least_even;
278 switch (classes[i]) {
280 /* X2 */
281 case RLE:
282 least_odd = get_greater_odd_level(stack[stack_top].level);
283 levels[i] = stack[stack_top].level;
284 if (valid_level(least_odd))
285 push_stack(least_odd, NI, FALSE);
286 else if (overflow_isolate_count == 0)
287 overflow_embedding_count++;
288 break;
290 /* X3 */
291 case LRE:
292 least_even = get_greater_even_level(stack[stack_top].level);
293 levels[i] = stack[stack_top].level;
294 if (valid_level(least_even))
295 push_stack(least_even, NI, FALSE);
296 else if (overflow_isolate_count == 0)
297 overflow_embedding_count++;
298 break;
300 /* X4 */
301 case RLO:
302 least_odd = get_greater_odd_level(stack[stack_top].level);
303 levels[i] = stack[stack_top].level;
304 if (valid_level(least_odd))
305 push_stack(least_odd, R, FALSE);
306 else if (overflow_isolate_count == 0)
307 overflow_embedding_count++;
308 break;
310 /* X5 */
311 case LRO:
312 least_even = get_greater_even_level(stack[stack_top].level);
313 levels[i] = stack[stack_top].level;
314 if (valid_level(least_even))
315 push_stack(least_even, L, FALSE);
316 else if (overflow_isolate_count == 0)
317 overflow_embedding_count++;
318 break;
320 /* X5a */
321 case RLI:
322 least_odd = get_greater_odd_level(stack[stack_top].level);
323 levels[i] = stack[stack_top].level;
324 if (valid_level(least_odd))
326 valid_isolate_count++;
327 push_stack(least_odd, NI, TRUE);
329 else
330 overflow_isolate_count++;
331 break;
333 /* X5b */
334 case LRI:
335 least_even = get_greater_even_level(stack[stack_top].level);
336 levels[i] = stack[stack_top].level;
337 if (valid_level(least_even))
339 valid_isolate_count++;
340 push_stack(least_even, NI, TRUE);
342 else
343 overflow_isolate_count++;
344 break;
346 /* X5c */
347 case FSI:
349 UINT8 new_level = 0;
350 int skipping = 0;
351 int j;
353 levels[i] = stack[stack_top].level;
354 for (j = i+1; j < count; j++)
356 if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI)
358 skipping++;
359 continue;
361 else if (classes[j] == PDI)
363 if (skipping)
364 skipping --;
365 else
366 break;
367 continue;
370 if (skipping) continue;
372 if (classes[j] == L)
374 new_level = 0;
375 break;
377 else if (classes[j] == R || classes[j] == AL)
379 new_level = 1;
380 break;
383 if (odd(new_level))
385 least_odd = get_greater_odd_level(stack[stack_top].level);
386 if (valid_level(least_odd))
388 valid_isolate_count++;
389 push_stack(least_odd, NI, TRUE);
391 else
392 overflow_isolate_count++;
394 else
396 least_even = get_greater_even_level(stack[stack_top].level);
397 if (valid_level(least_even))
399 valid_isolate_count++;
400 push_stack(least_even, NI, TRUE);
402 else
403 overflow_isolate_count++;
405 break;
408 /* X6 */
409 case ON:
410 case L:
411 case R:
412 case AN:
413 case EN:
414 case AL:
415 case NSM:
416 case CS:
417 case ES:
418 case ET:
419 case S:
420 case WS:
421 levels[i] = stack[stack_top].level;
422 if (stack[stack_top].override != NI)
423 classes[i] = stack[stack_top].override;
424 break;
426 /* X6a */
427 case PDI:
428 if (overflow_isolate_count) overflow_isolate_count--;
429 else if (!valid_isolate_count) {/* do nothing */}
430 else
432 overflow_embedding_count = 0;
433 while (!stack[stack_top].isolate) pop_stack();
434 pop_stack();
435 valid_isolate_count--;
437 levels[i] = stack[stack_top].level;
438 break;
440 /* X7 */
441 case PDF:
442 levels[i] = stack[stack_top].level;
443 if (overflow_isolate_count) {/* do nothing */}
444 else if (overflow_embedding_count) overflow_embedding_count--;
445 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
446 pop_stack();
447 break;
449 /* X8: Nothing */
450 default:
451 break;
454 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
455 for (i = 0; i < count ; i++)
456 if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF)
457 classes[i] = BN;
460 static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence)
462 if (index == -1 || index == back_fence) return index;
463 index--;
464 while (index > back_fence && classes[index] == BN) index--;
465 return index;
468 static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence)
470 if (index == front_fence) return index;
471 index++;
472 while (index < front_fence && classes[index] == BN) index++;
473 return index;
476 typedef struct tagRun
478 int start;
479 int end;
480 UINT8 e;
481 } Run;
483 typedef struct tagRunChar
485 WCHAR ch;
486 UINT8 *class;
487 } RunChar;
489 typedef struct tagIsolatedRun
491 struct list entry;
492 int length;
493 UINT8 sos;
494 UINT8 eos;
495 UINT8 e;
497 RunChar item[1];
498 } IsolatedRun;
500 static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
502 if (index >= (run->length-1)) return -1;
503 index++;
504 while (index < run->length && *run->item[index].class == BN) index++;
505 if (index == run->length) return -1;
506 return index;
509 static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
511 if (index <= 0) return -1;
512 index--;
513 while (index > -1 && *run->item[index].class == BN) index--;
514 return index;
517 static inline int iso_previousChar(IsolatedRun *run, int index)
519 if (index <= 0) return -1;
520 return index--;
523 static inline void iso_dump_types(const char* header, IsolatedRun *run)
525 int i, len = 0;
526 TRACE("%s:",header);
527 TRACE("[ ");
528 for (i = 0; i < run->length && len < 200; i++) {
529 TRACE(" %s", debug_type[*run->item[i].class]);
530 len += strlen(debug_type[*run->item[i].class])+1;
532 if (i != run->length)
533 TRACE("...");
534 TRACE(" ]\n");
537 /*------------------------------------------------------------------------
538 Function: bidi_resolve_weak
540 Resolves the directionality of numeric and other weak character types
542 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
544 Input: Array of embedding levels
545 Character count
547 In/Out: Array of directional classes
549 Note: On input only these directional classes are expected
550 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
551 ------------------------------------------------------------------------*/
552 static void bidi_resolve_weak(IsolatedRun *iso_run)
554 int i;
556 /* W1 */
557 for (i=0; i < iso_run->length; i++) {
558 if (*iso_run->item[i].class == NSM) {
559 int j = get_prev_valid_char_from_run(iso_run, i);
560 if (j == -1)
561 *iso_run->item[i].class = iso_run->sos;
562 else if (*iso_run->item[j].class >= LRI)
563 *iso_run->item[i].class = ON;
564 else
565 *iso_run->item[i].class = *iso_run->item[j].class;
569 /* W2 */
570 for (i = 0; i < iso_run->length; i++) {
571 if (*iso_run->item[i].class == EN) {
572 int j = get_prev_valid_char_from_run(iso_run, i);
573 while (j > -1) {
574 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
575 if (*iso_run->item[j].class == AL)
576 *iso_run->item[i].class = AN;
577 break;
579 j = get_prev_valid_char_from_run(iso_run, j);
584 /* W3 */
585 for (i = 0; i < iso_run->length; i++) {
586 if (*iso_run->item[i].class == AL)
587 *iso_run->item[i].class = R;
590 /* W4 */
591 for (i = 0; i < iso_run->length; i++) {
592 if (*iso_run->item[i].class == ES) {
593 int b = get_prev_valid_char_from_run(iso_run, i);
594 int f = get_next_valid_char_from_run(iso_run, i);
596 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
597 *iso_run->item[i].class = EN;
599 else if (*iso_run->item[i].class == CS) {
600 int b = get_prev_valid_char_from_run(iso_run, i);
601 int f = get_next_valid_char_from_run(iso_run, i);
603 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
604 *iso_run->item[i].class = EN;
605 else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
606 *iso_run->item[i].class = AN;
610 /* W5 */
611 for (i = 0; i < iso_run->length; i++) {
612 if (*iso_run->item[i].class == ET) {
613 int j;
614 for (j = i-1 ; j > -1; j--) {
615 if (*iso_run->item[j].class == BN) continue;
616 if (*iso_run->item[j].class == ET) continue;
617 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
618 else break;
620 if (*iso_run->item[i].class == ET) {
621 for (j = i+1; j < iso_run->length; j++) {
622 if (*iso_run->item[j].class == BN) continue;
623 if (*iso_run->item[j].class == ET) continue;
624 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
625 else break;
631 /* W6 */
632 for (i = 0; i < iso_run->length; i++) {
633 if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
635 int b = i-1;
636 int f = i+1;
637 if (b > -1 && *iso_run->item[b].class == BN)
638 *iso_run->item[b].class = ON;
639 if (f < iso_run->length && *iso_run->item[f].class == BN)
640 *iso_run->item[f].class = ON;
642 *iso_run->item[i].class = ON;
646 /* W7 */
647 for (i = 0; i < iso_run->length; i++) {
648 if (*iso_run->item[i].class == EN) {
649 int j;
650 for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
651 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
652 if (*iso_run->item[j].class == L)
653 *iso_run->item[i].class = L;
654 break;
656 if (iso_run->sos == L && j == -1)
657 *iso_run->item[i].class = L;
662 typedef struct tagBracketPair
664 int start;
665 int end;
666 } BracketPair;
668 static int bracketpair_compr(const void *a, const void* b)
670 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
673 static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
675 WCHAR *open_stack;
676 int *stack_index;
677 int stack_top = iso_run->length;
678 BracketPair *out = NULL;
679 int pair_count = 0;
680 int i;
682 open_stack = heap_alloc(sizeof(WCHAR) * iso_run->length);
683 stack_index = heap_alloc(sizeof(int) * iso_run->length);
685 for (i = 0; i < iso_run->length; i++) {
686 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
687 if (ubv) {
688 if (!out) {
689 out = heap_alloc(sizeof(BracketPair));
690 out[0].start = -1;
693 if ((ubv >> 8) == 0) {
694 stack_top--;
695 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
696 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
697 if (open_stack[stack_top] == 0x232A)
698 open_stack[stack_top] = 0x3009;
699 stack_index[stack_top] = i;
701 else if ((ubv >> 8) == 1) {
702 int j;
704 if (stack_top == iso_run->length) continue;
705 for (j = stack_top; j < iso_run->length; j++) {
706 WCHAR c = iso_run->item[i].ch;
707 if (c == 0x232A) c = 0x3009;
708 if (c == open_stack[j]) {
709 out[pair_count].start = stack_index[j];
710 out[pair_count].end = i;
711 pair_count++;
712 out = heap_realloc(out, sizeof(BracketPair) * (pair_count+1));
713 out[pair_count].start = -1;
714 stack_top = j+1;
715 break;
721 if (pair_count == 0) {
722 heap_free(out);
723 out = NULL;
725 else if (pair_count > 1)
726 qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
728 heap_free(open_stack);
729 heap_free(stack_index);
730 return out;
733 static inline UINT8 get_rule_N0_class(UINT8 class)
735 return (class == AN || class == EN) ? R : class;
738 /*------------------------------------------------------------------------
739 Function: bidi_resolve_neutrals
741 Resolves the directionality of neutral character types.
743 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
745 Input: Array of embedding levels
746 Character count
747 Baselevel
749 In/Out: Array of directional classes
751 Note: On input only these directional classes are expected
752 R, L, NI, AN, EN and BN
754 W8 resolves a number of ENs to L
755 ------------------------------------------------------------------------*/
756 static void bidi_resolve_neutrals(IsolatedRun *run)
758 BracketPair *pairs;
759 int i;
761 /* Translate isolates into NI */
762 for (i = 0; i < run->length; i++) {
763 if (*run->item[i].class >= LRI)
764 *run->item[i].class = NI;
766 switch (*run->item[i].class) {
767 case B:
768 case S:
769 case WS: *run->item[i].class = NI;
772 ASSERT(*run->item[i].class < 5 || *run->item[i].class == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
775 /* N0: Skipping bracketed pairs for now */
776 pairs = bidi_compute_bracket_pairs(run);
777 if (pairs) {
778 BracketPair *p = pairs;
779 int i = 0;
780 while (p->start >= 0) {
781 UINT8 e = get_embedding_direction(run->e);
782 UINT8 o = get_embedding_direction(run->e + 1);
783 BOOL flag_o = FALSE;
784 int j;
786 TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
788 /* N0.b */
789 for (j = p->start+1; j < p->end; j++) {
790 if (get_rule_N0_class(*run->item[j].class) == e) {
791 *run->item[p->start].class = e;
792 *run->item[p->end].class = e;
793 break;
795 else if (get_rule_N0_class(*run->item[j].class) == o)
796 flag_o = TRUE;
798 /* N0.c */
799 if (j == p->end && flag_o) {
800 for (j = p->start; j >= 0; j--) {
801 if (get_rule_N0_class(*run->item[j].class) == o) {
802 *run->item[p->start].class = o;
803 *run->item[p->end].class = o;
804 break;
806 else if (get_rule_N0_class(*run->item[j].class) == e) {
807 *run->item[p->start].class = e;
808 *run->item[p->end].class = e;
809 break;
812 if (j < 0) {
813 *run->item[p->start].class = run->sos;
814 *run->item[p->end].class = run->sos;
818 i++;
819 p = &pairs[i];
821 heap_free(pairs);
824 /* N1 */
825 for (i = 0; i < run->length; i++) {
826 UINT8 l, r;
828 if (*run->item[i].class == NI) {
829 int b = get_prev_valid_char_from_run(run, i);
830 int j;
832 if (b == -1) {
833 l = run->sos;
834 b = 0;
836 else {
837 if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
838 l = R;
839 else if (*run->item[b].class == L)
840 l = L;
841 else /* No string type */
842 continue;
844 j = get_next_valid_char_from_run(run, i);
845 while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
846 if (j == -1) {
847 r = run->eos;
848 j = run->length;
850 else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
851 r = R;
852 else if (*run->item[j].class == L)
853 r = L;
854 else /* No string type */
855 continue;
857 if (r == l) {
858 for (b = i; b < j && b < run->length; b++)
859 *run->item[b].class = r;
864 /* N2 */
865 for (i = 0; i < run->length; i++) {
866 if (*run->item[i].class == NI) {
867 int b = i-1;
868 int f = i+1;
870 *run->item[i].class = get_embedding_direction(run->e);
871 if (b > -1 && *run->item[b].class == BN)
872 *run->item[b].class = get_embedding_direction(run->e);
873 if (f < run->length && *run->item[f].class == BN)
874 *run->item[f].class = get_embedding_direction(run->e);
879 /*------------------------------------------------------------------------
880 Function: bidi_resolve_implicit
882 Recursively resolves implicit embedding levels.
883 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
885 Input: Array of direction classes
886 Character count
887 Base level
889 In/Out: Array of embedding levels
891 Note: levels may exceed 15 on output.
892 Accepted subset of direction classes
893 R, L, AN, EN
894 ------------------------------------------------------------------------*/
895 static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos)
897 int i;
899 /* I1/2 */
900 for (i = sos; i <= eos; i++) {
901 if (classes[i] == BN)
902 continue;
904 ASSERT(classes[i] > 0); /* "No Neutrals allowed to survive here." */
905 ASSERT(classes[i] < 5); /* "Out of range." */
907 if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN))
908 levels[i]++;
909 else if (!odd(levels[i]) && classes[i] == R)
910 levels[i]++;
911 else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN))
912 levels[i] += 2;
916 static inline BOOL is_rule_L1_reset_class(UINT8 class)
918 switch (class) {
919 case WS:
920 case FSI:
921 case LRI:
922 case RLI:
923 case PDI:
924 case LRE:
925 case RLE:
926 case LRO:
927 case RLO:
928 case PDF:
929 case BN:
930 return TRUE;
931 default:
932 return FALSE;
936 static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos)
938 int i;
940 /* L1 */
941 for (i = sos; i <= eos; i++) {
942 if (classes[i] == B || classes[i] == S) {
943 int j = i - 1;
944 while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j]))
945 levels[j--] = baselevel;
946 levels[i] = baselevel;
948 if (i == eos && is_rule_L1_reset_class(classes[i])) {
949 int j = i;
950 while (j >= sos && is_rule_L1_reset_class(classes[j]))
951 levels[j--] = baselevel;
956 static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set)
958 int run_start, run_end, i;
959 int run_count = 0;
960 HRESULT hr = S_OK;
961 Run *runs;
963 runs = heap_alloc(count * sizeof(Run));
964 if (!runs)
965 return E_OUTOFMEMORY;
967 list_init(set);
969 /* Build Runs */
970 run_start = 0;
971 while (run_start < count) {
972 run_end = get_next_valid_char_index(classes, run_start, count);
973 while (run_end < count && levels[run_end] == levels[run_start])
974 run_end = get_next_valid_char_index(classes, run_end, count);
975 run_end--;
976 runs[run_count].start = run_start;
977 runs[run_count].end = run_end;
978 runs[run_count].e = levels[run_start];
979 run_start = get_next_valid_char_index(classes, run_end, count);
980 run_count++;
983 /* Build Isolating Runs */
984 i = 0;
985 while (i < run_count) {
986 int k = i;
987 if (runs[k].start >= 0) {
988 IsolatedRun *current_isolated;
989 int type_fence, real_end;
990 int j;
992 current_isolated = heap_alloc(sizeof(IsolatedRun) + sizeof(RunChar)*count);
993 if (!current_isolated) {
994 hr = E_OUTOFMEMORY;
995 break;
998 run_start = runs[k].start;
999 current_isolated->e = runs[k].e;
1000 current_isolated->length = (runs[k].end - runs[k].start)+1;
1002 for (j = 0; j < current_isolated->length; j++) {
1003 current_isolated->item[j].class = &classes[runs[k].start+j];
1004 current_isolated->item[j].ch = string[runs[k].start+j];
1007 run_end = runs[k].end;
1009 TRACE("{ [%i -- %i]",run_start, run_end);
1011 if (classes[run_end] == BN)
1012 run_end = get_prev_valid_char_index(classes, run_end, runs[k].start);
1014 while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) {
1015 j = k+1;
1016 search:
1017 while (j < run_count && classes[runs[j].start] != PDI) j++;
1018 if (j < run_count && runs[i].e != runs[j].e) {
1019 j++;
1020 goto search;
1023 if (j != run_count) {
1024 int l = current_isolated->length;
1025 int m;
1027 current_isolated->length += (runs[j].end - runs[j].start)+1;
1028 for (m = 0; l < current_isolated->length; l++, m++) {
1029 current_isolated->item[l].class = &classes[runs[j].start+m];
1030 current_isolated->item[l].ch = string[runs[j].start+m];
1033 TRACE("[%i -- %i]", runs[j].start, runs[j].end);
1035 run_end = runs[j].end;
1036 if (classes[run_end] == BN)
1037 run_end = get_prev_valid_char_index(classes, run_end, runs[i].start);
1038 runs[j].start = -1;
1039 k = j;
1041 else {
1042 run_end = count;
1043 break;
1047 type_fence = get_prev_valid_char_index(classes, run_start, -1);
1049 if (type_fence == -1)
1050 current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start];
1051 else
1052 current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start];
1054 current_isolated->sos = get_embedding_direction(current_isolated->sos);
1056 if (run_end == count)
1057 current_isolated->eos = current_isolated->sos;
1058 else {
1059 /* eos could be an BN */
1060 if (classes[run_end] == BN) {
1061 real_end = get_prev_valid_char_index(classes, run_end, run_start-1);
1062 if (real_end < run_start)
1063 real_end = run_start;
1065 else
1066 real_end = run_end;
1068 type_fence = get_next_valid_char_index(classes, run_end, count);
1069 if (type_fence == count)
1070 current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end];
1071 else
1072 current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end];
1074 current_isolated->eos = get_embedding_direction(current_isolated->eos);
1077 list_add_tail(set, &current_isolated->entry);
1078 TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1080 i++;
1083 heap_free(runs);
1084 return hr;
1087 HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels)
1089 IsolatedRun *iso_run, *next;
1090 struct list IsolatingRuns;
1091 UINT8 *chartype;
1092 HRESULT hr;
1094 TRACE("%s, %u\n", debugstr_wn(string, count), count);
1096 chartype = heap_alloc(count*sizeof(*chartype));
1097 if (!chartype)
1098 return E_OUTOFMEMORY;
1100 bidi_classify(string, chartype, count);
1101 if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count);
1103 bidi_resolve_explicit(baselevel, chartype, levels, count);
1104 memcpy(explicit, levels, count*sizeof(*explicit));
1106 if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count);
1108 /* X10/BD13: Compute Isolating runs */
1109 hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns);
1110 if (FAILED(hr))
1111 goto done;
1113 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1115 if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
1117 bidi_resolve_weak(iso_run);
1118 if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
1120 bidi_resolve_neutrals(iso_run);
1121 if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
1123 list_remove(&iso_run->entry);
1124 heap_free(iso_run);
1127 if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count);
1128 bidi_resolve_implicit(chartype, levels, 0, count-1);
1130 bidi_classify(string, chartype, count);
1131 bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1);
1133 done:
1134 heap_free(chartype);
1135 return hr;