winex11: Avoid deadlock when setting cursor.
[wine.git] / dlls / usp10 / bidi.c
blob0fb03ef061ae7fc800b63b3362cb139b28ea9302
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include "config.h"
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include "windef.h"
49 #include "winbase.h"
50 #include "wingdi.h"
51 #include "winnls.h"
52 #include "usp10.h"
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/heap.h"
56 #include "wine/list.h"
58 #include "usp10_internal.h"
60 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
62 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
64 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
65 #define MAX_DEPTH 125
67 /* HELPER FUNCTIONS AND DECLARATIONS */
69 /*------------------------------------------------------------------------
70 Bidirectional Character Types
72 as defined by the Unicode Bidirectional Algorithm Table 3-7.
74 Note:
76 The list of bidirectional character types here is not grouped the
77 same way as the table 3-7, since the numberic values for the types
78 are chosen to keep the state and action tables compact.
79 ------------------------------------------------------------------------*/
80 enum directions
82 /* input types */
83 /* ON MUST be zero, code relies on ON = NI = 0 */
84 ON = 0, /* Other Neutral */
85 L, /* Left Letter */
86 R, /* Right Letter */
87 AN, /* Arabic Number */
88 EN, /* European Number */
89 AL, /* Arabic Letter (Right-to-left) */
90 NSM, /* Non-spacing Mark */
91 CS, /* Common Separator */
92 ES, /* European Separator */
93 ET, /* European Terminator (post/prefix e.g. $ and %) */
95 /* resolved types */
96 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
98 /* input types, */
99 S, /* Segment Separator (TAB) // used only in L1 */
100 WS, /* White space // used only in L1 */
101 B, /* Paragraph Separator (aka as PS) */
103 /* types for explicit controls */
104 RLO, /* these are used only in X1-X9 */
105 RLE,
106 LRO,
107 LRE,
108 PDF,
110 LRI, /* Isolate formatting characters new with 6.3 */
111 RLI,
112 FSI,
113 PDI,
115 /* resolved types, also resolved directions */
116 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
119 static const char debug_type[][4] =
121 "ON", /* Other Neutral */
122 "L", /* Left Letter */
123 "R", /* Right Letter */
124 "AN", /* Arabic Number */
125 "EN", /* European Number */
126 "AL", /* Arabic Letter (Right-to-left) */
127 "NSM", /* Non-spacing Mark */
128 "CS", /* Common Separator */
129 "ES", /* European Separator */
130 "ET", /* European Terminator (post/prefix e.g. $ and %) */
131 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
132 "S", /* Segment Separator (TAB) // used only in L1 */
133 "WS", /* White space // used only in L1 */
134 "B", /* Paragraph Separator (aka as PS) */
135 "RLO", /* these are used only in X1-X9 */
136 "RLE",
137 "LRO",
138 "LRE",
139 "PDF",
140 "LRI", /* Isolate formatting characters new with 6.3 */
141 "RLI",
142 "FSI",
143 "PDI",
146 /* HELPER FUNCTIONS */
148 static inline void dump_types(const char* header, WORD *types, int start, int end)
150 int i, len = 0;
151 TRACE("%s:",header);
152 for (i = start; i < end && len < 200; i++)
154 TRACE(" %s",debug_type[types[i]]);
155 len += strlen(debug_type[types[i]])+1;
157 if (i != end)
158 TRACE("...");
159 TRACE("\n");
162 /* Convert the libwine information to the direction enum */
163 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
165 static const enum directions dir_map[16] =
167 L, /* unassigned defaults to L */
180 NSM,
182 PDF /* also LRE, LRO, RLE, RLO */
185 unsigned i;
187 for (i = 0; i < count; ++i)
189 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
190 switch (chartype[i])
192 case ES:
193 if (!c->fLegacyBidiClass) break;
194 switch (string[i])
196 case '-':
197 case '+': chartype[i] = NI; break;
198 case '/': chartype[i] = CS; break;
200 break;
201 case PDF:
202 switch (string[i])
204 case 0x202A: chartype[i] = LRE; break;
205 case 0x202B: chartype[i] = RLE; break;
206 case 0x202C: chartype[i] = PDF; break;
207 case 0x202D: chartype[i] = LRO; break;
208 case 0x202E: chartype[i] = RLO; break;
209 case 0x2066: chartype[i] = LRI; break;
210 case 0x2067: chartype[i] = RLI; break;
211 case 0x2068: chartype[i] = FSI; break;
212 case 0x2069: chartype[i] = PDI; break;
214 break;
219 /* RESOLVE EXPLICIT */
221 static WORD GreaterEven(int i)
223 return odd(i) ? i + 1 : i + 2;
226 static WORD GreaterOdd(int i)
228 return odd(i) ? i + 2 : i + 1;
231 static WORD EmbeddingDirection(int level)
233 return odd(level) ? R : L;
236 /*------------------------------------------------------------------------
237 Function: resolveExplicit
239 Recursively resolves explicit embedding levels and overrides.
240 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
242 Input: Base embedding level and direction
243 Character count
245 Output: Array of embedding levels
247 In/Out: Array of direction classes
250 Note: The function uses two simple counters to keep track of
251 matching explicit codes and PDF. Use the default argument for
252 the outermost call. The nesting counter counts the recursion
253 depth and not the embedding level.
254 ------------------------------------------------------------------------*/
255 typedef struct tagStackItem {
256 int level;
257 int override;
258 BOOL isolate;
259 } StackItem;
261 #define push_stack(l,o,i) \
262 do { stack_top--; \
263 stack[stack_top].level = l; \
264 stack[stack_top].override = o; \
265 stack[stack_top].isolate = i;} while(0)
267 #define pop_stack() do { stack_top++; } while(0)
269 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
271 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
273 /* X1 */
274 int overflow_isolate_count = 0;
275 int overflow_embedding_count = 0;
276 int valid_isolate_count = 0;
277 int i;
279 StackItem stack[MAX_DEPTH+2];
280 int stack_top = MAX_DEPTH+1;
282 stack[stack_top].level = level;
283 stack[stack_top].override = NI;
284 stack[stack_top].isolate = FALSE;
286 if (initialOverride)
288 if (odd(level))
289 push_stack(level, R, FALSE);
290 else
291 push_stack(level, L, FALSE);
294 for (i = 0; i < count; i++)
296 poutOverrides[i] = stack[stack_top].override;
298 /* X2 */
299 if (pclass[i] == RLE)
301 int least_odd = GreaterOdd(stack[stack_top].level);
302 poutLevel[i] = stack[stack_top].level;
303 if (valid_level(least_odd))
304 push_stack(least_odd, NI, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
308 /* X3 */
309 else if (pclass[i] == LRE)
311 int least_even = GreaterEven(stack[stack_top].level);
312 poutLevel[i] = stack[stack_top].level;
313 if (valid_level(least_even))
314 push_stack(least_even, NI, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
318 /* X4 */
319 else if (pclass[i] == RLO)
321 int least_odd = GreaterOdd(stack[stack_top].level);
322 poutLevel[i] = stack[stack_top].level;
323 if (valid_level(least_odd))
324 push_stack(least_odd, R, FALSE);
325 else if (overflow_isolate_count == 0)
326 overflow_embedding_count++;
328 /* X5 */
329 else if (pclass[i] == LRO)
331 int least_even = GreaterEven(stack[stack_top].level);
332 poutLevel[i] = stack[stack_top].level;
333 if (valid_level(least_even))
334 push_stack(least_even, L, FALSE);
335 else if (overflow_isolate_count == 0)
336 overflow_embedding_count++;
338 /* X5a */
339 else if (pclass[i] == RLI)
341 int least_odd = GreaterOdd(stack[stack_top].level);
342 poutLevel[i] = stack[stack_top].level;
343 if (valid_level(least_odd))
345 valid_isolate_count++;
346 push_stack(least_odd, NI, TRUE);
348 else
349 overflow_isolate_count++;
351 /* X5b */
352 else if (pclass[i] == LRI)
354 int least_even = GreaterEven(stack[stack_top].level);
355 poutLevel[i] = stack[stack_top].level;
356 if (valid_level(least_even))
358 valid_isolate_count++;
359 push_stack(least_even, NI, TRUE);
361 else
362 overflow_isolate_count++;
364 /* X5c */
365 else if (pclass[i] == FSI)
367 int j;
368 int new_level = 0;
369 int skipping = 0;
370 poutLevel[i] = stack[stack_top].level;
371 for (j = i+1; j < count; j++)
373 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
375 skipping++;
376 continue;
378 else if (pclass[j] == PDI)
380 if (skipping)
381 skipping --;
382 else
383 break;
384 continue;
387 if (skipping) continue;
389 if (pclass[j] == L)
391 new_level = 0;
392 break;
394 else if (pclass[j] == R || pclass[j] == AL)
396 new_level = 1;
397 break;
400 if (odd(new_level))
402 int least_odd = GreaterOdd(stack[stack_top].level);
403 if (valid_level(least_odd))
405 valid_isolate_count++;
406 push_stack(least_odd, NI, TRUE);
408 else
409 overflow_isolate_count++;
411 else
413 int least_even = GreaterEven(stack[stack_top].level);
414 if (valid_level(least_even))
416 valid_isolate_count++;
417 push_stack(least_even, NI, TRUE);
419 else
420 overflow_isolate_count++;
423 /* X6 */
424 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
426 poutLevel[i] = stack[stack_top].level;
427 if (stack[stack_top].override != NI)
428 pclass[i] = stack[stack_top].override;
430 /* X6a */
431 else if (pclass[i] == PDI)
433 if (overflow_isolate_count) overflow_isolate_count--;
434 else if (!valid_isolate_count) {/* do nothing */}
435 else
437 overflow_embedding_count = 0;
438 while (!stack[stack_top].isolate) pop_stack();
439 pop_stack();
440 valid_isolate_count --;
442 poutLevel[i] = stack[stack_top].level;
444 /* X7 */
445 else if (pclass[i] == PDF)
447 poutLevel[i] = stack[stack_top].level;
448 if (overflow_isolate_count) {/* do nothing */}
449 else if (overflow_embedding_count) overflow_embedding_count--;
450 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
451 pop_stack();
453 /* X8: Nothing */
455 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
456 for (i = 0; i < count ; i++)
457 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
458 pclass[i] = BN;
461 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
463 if (index == -1 || index == back_fence) return index;
464 index --;
465 while (index > back_fence && pcls[index] == BN) index --;
466 return index;
469 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
471 if (index == front_fence) return index;
472 index ++;
473 while (index < front_fence && pcls[index] == BN) index ++;
474 return index;
477 typedef struct tagRun
479 int start;
480 int end;
481 WORD e;
482 } Run;
484 typedef struct tagRunChar
486 WCHAR ch;
487 WORD *pcls;
488 } RunChar;
490 typedef struct tagIsolatedRun
492 struct list entry;
493 int length;
494 WORD sos;
495 WORD eos;
496 WORD e;
498 RunChar item[1];
499 } IsolatedRun;
501 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
503 if (index >= (iso_run->length-1)) return -1;
504 index ++;
505 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
506 if (index == iso_run->length) return -1;
507 return index;
510 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
513 if (index <= 0) return -1;
514 index --;
515 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
516 return index;
519 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
521 int i, len = 0;
522 TRACE("%s:",header);
523 TRACE("[ ");
524 for (i = 0; i < iso_run->length && len < 200; i++)
526 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
527 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
529 if (i != iso_run->length)
530 TRACE("...");
531 TRACE(" ]\n");
534 /*------------------------------------------------------------------------
535 Function: resolveWeak
537 Resolves the directionality of numeric and other weak character types
539 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
541 Input: Array of embedding levels
542 Character count
544 In/Out: Array of directional classes
546 Note: On input only these directional classes are expected
547 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
548 ------------------------------------------------------------------------*/
550 static void resolveWeak(IsolatedRun * iso_run)
552 int i;
554 /* W1 */
555 for (i=0; i < iso_run->length; i++)
557 if (*iso_run->item[i].pcls == NSM)
559 int j = iso_previousValidChar(iso_run, i);
560 if (j == -1)
561 *iso_run->item[i].pcls = iso_run->sos;
562 else if (*iso_run->item[j].pcls >= LRI)
563 *iso_run->item[i].pcls = ON;
564 else
565 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
569 /* W2 */
570 for (i = 0; i < iso_run->length; i++)
572 if (*iso_run->item[i].pcls == EN)
574 int j = iso_previousValidChar(iso_run, i);
575 while (j > -1)
577 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
579 if (*iso_run->item[j].pcls == AL)
580 *iso_run->item[i].pcls = AN;
581 break;
583 j = iso_previousValidChar(iso_run, j);
588 /* W3 */
589 for (i = 0; i < iso_run->length; i++)
591 if (*iso_run->item[i].pcls == AL)
592 *iso_run->item[i].pcls = R;
595 /* W4 */
596 for (i = 0; i < iso_run->length; i++)
598 if (*iso_run->item[i].pcls == ES)
600 int b = iso_previousValidChar(iso_run, i);
601 int f = iso_nextValidChar(iso_run, i);
603 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
604 *iso_run->item[i].pcls = EN;
606 else if (*iso_run->item[i].pcls == CS)
608 int b = iso_previousValidChar(iso_run, i);
609 int f = iso_nextValidChar(iso_run, i);
611 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
612 *iso_run->item[i].pcls = EN;
613 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
614 *iso_run->item[i].pcls = AN;
618 /* W5 */
619 for (i = 0; i < iso_run->length; i++)
621 if (*iso_run->item[i].pcls == ET)
623 int j;
624 for (j = i-1 ; j > -1; j--)
626 if (*iso_run->item[j].pcls == BN) continue;
627 if (*iso_run->item[j].pcls == ET) continue;
628 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
629 else break;
631 if (*iso_run->item[i].pcls == ET)
633 for (j = i+1; j < iso_run->length; j++)
635 if (*iso_run->item[j].pcls == BN) continue;
636 if (*iso_run->item[j].pcls == ET) continue;
637 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
638 else break;
644 /* W6 */
645 for (i = 0; i < iso_run->length; i++)
647 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
649 int b = i-1;
650 int f = i+1;
651 if (b > -1 && *iso_run->item[b].pcls == BN)
652 *iso_run->item[b].pcls = ON;
653 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
654 *iso_run->item[f].pcls = ON;
656 *iso_run->item[i].pcls = ON;
660 /* W7 */
661 for (i = 0; i < iso_run->length; i++)
663 if (*iso_run->item[i].pcls == EN)
665 int j;
666 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
667 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
669 if (*iso_run->item[j].pcls == L)
670 *iso_run->item[i].pcls = L;
671 break;
673 if (iso_run->sos == L && j == -1)
674 *iso_run->item[i].pcls = L;
679 typedef struct tagBracketPair
681 int start;
682 int end;
683 } BracketPair;
685 static int compr(const void *a, const void* b)
687 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
690 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
692 WCHAR *open_stack;
693 int *stack_index;
694 int stack_top = iso_run->length;
695 unsigned int pair_count = 0;
696 BracketPair *out = NULL;
697 SIZE_T out_size = 0;
698 int i;
700 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
701 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
703 for (i = 0; i < iso_run->length; i++)
705 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
707 if (!ubv)
708 continue;
710 if ((ubv >> 8) == 0)
712 --stack_top;
713 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
714 /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */
715 if (open_stack[stack_top] == 0x232a)
716 open_stack[stack_top] = 0x3009;
717 stack_index[stack_top] = i;
719 else if ((ubv >> 8) == 1)
721 unsigned int j;
723 for (j = stack_top; j < iso_run->length; ++j)
725 WCHAR c = iso_run->item[i].ch;
727 if (c == 0x232a)
728 c = 0x3009;
730 if (c != open_stack[j])
731 continue;
733 if (!(usp10_array_reserve((void **)&out, &out_size, pair_count + 2, sizeof(*out))))
734 ERR("Failed to grow output array.\n");
736 out[pair_count].start = stack_index[j];
737 out[pair_count].end = i;
738 ++pair_count;
740 out[pair_count].start = -1;
741 stack_top = j + 1;
742 break;
747 heap_free(open_stack);
748 heap_free(stack_index);
750 if (!pair_count)
751 return NULL;
753 qsort(out, pair_count, sizeof(*out), compr);
755 return out;
758 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
760 /*------------------------------------------------------------------------
761 Function: resolveNeutrals
763 Resolves the directionality of neutral character types.
765 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
767 Input: Array of embedding levels
768 Character count
769 Baselevel
771 In/Out: Array of directional classes
773 Note: On input only these directional classes are expected
774 R, L, NI, AN, EN and BN
776 W8 resolves a number of ENs to L
777 ------------------------------------------------------------------------*/
778 static void resolveNeutrals(IsolatedRun *iso_run)
780 int i;
781 BracketPair *pairs = NULL;
783 /* Translate isolates into NI */
784 for (i = 0; i < iso_run->length; i++)
786 if (*iso_run->item[i].pcls >= LRI)
787 *iso_run->item[i].pcls = NI;
789 switch(*iso_run->item[i].pcls)
791 case B:
792 case S:
793 case WS: *iso_run->item[i].pcls = NI;
796 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
799 /* N0: Skipping bracketed pairs for now */
800 pairs = computeBracketPairs(iso_run);
801 if (pairs)
803 BracketPair *p = &pairs[0];
804 int i = 0;
805 while (p->start >= 0)
807 int j;
808 int e = EmbeddingDirection(iso_run->e);
809 int o = EmbeddingDirection(iso_run->e+1);
810 BOOL flag_o = FALSE;
811 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
813 /* N0.b */
814 for (j = p->start+1; j < p->end; j++)
816 if (N0_TYPE(*iso_run->item[j].pcls) == e)
818 *iso_run->item[p->start].pcls = e;
819 *iso_run->item[p->end].pcls = e;
820 break;
822 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
823 flag_o = TRUE;
825 /* N0.c */
826 if (j == p->end && flag_o)
828 for (j = p->start; j >= 0; j--)
830 if (N0_TYPE(*iso_run->item[j].pcls) == o)
832 *iso_run->item[p->start].pcls = o;
833 *iso_run->item[p->end].pcls = o;
834 break;
836 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
838 *iso_run->item[p->start].pcls = e;
839 *iso_run->item[p->end].pcls = e;
840 break;
843 if ( j < 0 )
845 *iso_run->item[p->start].pcls = iso_run->sos;
846 *iso_run->item[p->end].pcls = iso_run->sos;
850 i++;
851 p = &pairs[i];
853 heap_free(pairs);
856 /* N1 */
857 for (i = 0; i < iso_run->length; i++)
859 WORD l,r;
861 if (*iso_run->item[i].pcls == NI)
863 int j;
864 int b = iso_previousValidChar(iso_run, i);
866 if (b == -1)
868 l = iso_run->sos;
869 b = 0;
871 else
873 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
874 l = R;
875 else if (*iso_run->item[b].pcls == L)
876 l = L;
877 else /* No string type */
878 continue;
880 j = iso_nextValidChar(iso_run, i);
881 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
883 if (j == -1)
885 r = iso_run->eos;
886 j = iso_run->length;
888 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
889 r = R;
890 else if (*iso_run->item[j].pcls == L)
891 r = L;
892 else /* No string type */
893 continue;
895 if (r == l)
897 for (b = i; b < j && b < iso_run->length; b++)
898 *iso_run->item[b].pcls = r;
903 /* N2 */
904 for (i = 0; i < iso_run->length; i++)
906 if (*iso_run->item[i].pcls == NI)
908 int b = i-1;
909 int f = i+1;
910 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
911 if (b > -1 && *iso_run->item[b].pcls == BN)
912 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
913 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
914 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
919 /*------------------------------------------------------------------------
920 Function: resolveImplicit
922 Recursively resolves implicit embedding levels.
923 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
925 Input: Array of direction classes
926 Character count
927 Base level
929 In/Out: Array of embedding levels
931 Note: levels may exceed 15 on output.
932 Accepted subset of direction classes
933 R, L, AN, EN
934 ------------------------------------------------------------------------*/
935 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
937 int i;
939 /* I1/2 */
940 for (i = sos; i <= eos; i++)
942 if (pcls[i] == BN)
943 continue;
945 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
946 ASSERT(pcls[i] < 5); /* "Out of range." */
948 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
949 plevel[i]++;
950 else if (!odd(plevel[i]) && pcls[i] == R)
951 plevel[i]++;
952 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
953 plevel[i]+=2;
957 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
959 int i;
961 /* L1 */
962 for (i = sos; i <= eos; i++)
964 if (pcls[i] == B || pcls[i] == S)
966 int j = i -1;
967 while (i > sos && j >= sos &&
968 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
969 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
970 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
971 plevel[j--] = baselevel;
972 plevel[i] = baselevel;
974 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
975 pcls[i] == PDF || pcls[i] == BN)
977 plevel[i] = i ? plevel[i - 1] : baselevel;
979 if (i == eos &&
980 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
981 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
982 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
984 int j = i;
985 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
986 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
987 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
988 plevel[j--] = baselevel;
993 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
994 const WCHAR *string, unsigned int uCount, struct list *set)
996 int run_start, run_end, i;
997 int run_count = 0;
998 Run *runs;
999 IsolatedRun *current_isolated;
1001 if (!(runs = heap_calloc(uCount, sizeof(*runs))))
1002 return;
1004 list_init(set);
1006 /* Build Runs */
1007 run_start = 0;
1008 while (run_start < uCount)
1010 run_end = nextValidChar(pcls, run_start, uCount);
1011 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1012 run_end --;
1013 runs[run_count].start = run_start;
1014 runs[run_count].end = run_end;
1015 runs[run_count].e = pLevel[run_start];
1016 run_start = nextValidChar(pcls, run_end, uCount);
1017 run_count++;
1020 /* Build Isolating Runs */
1021 i = 0;
1022 while (i < run_count)
1024 int k = i;
1025 if (runs[k].start >= 0)
1027 int type_fence, real_end;
1028 int j;
1030 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
1031 break;
1033 run_start = runs[k].start;
1034 current_isolated->e = runs[k].e;
1035 current_isolated->length = (runs[k].end - runs[k].start)+1;
1037 for (j = 0; j < current_isolated->length; j++)
1039 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1040 current_isolated->item[j].ch = string[runs[k].start + j];
1043 run_end = runs[k].end;
1045 TRACE("{ [%i -- %i]",run_start, run_end);
1047 if (pcls[run_end] == BN)
1048 run_end = previousValidChar(pcls, run_end, runs[k].start);
1050 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1052 j = k+1;
1053 search:
1054 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1055 if (j < run_count && runs[i].e != runs[j].e)
1057 j++;
1058 goto search;
1061 if (j != run_count)
1063 int m;
1064 int l = current_isolated->length;
1066 current_isolated->length += (runs[j].end - runs[j].start)+1;
1067 for (m = 0; l < current_isolated->length; l++, m++)
1069 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1070 current_isolated->item[l].ch = string[runs[j].start + m];
1073 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1075 run_end = runs[j].end;
1076 if (pcls[run_end] == BN)
1077 run_end = previousValidChar(pcls, run_end, runs[i].start);
1078 runs[j].start = -1;
1079 k = j;
1081 else
1083 run_end = uCount;
1084 break;
1088 type_fence = previousValidChar(pcls, run_start, -1);
1090 if (type_fence == -1)
1091 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1092 else
1093 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1095 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1097 if (run_end == uCount)
1098 current_isolated->eos = current_isolated->sos;
1099 else
1101 /* eos could be an BN */
1102 if ( pcls[run_end] == BN )
1104 real_end = previousValidChar(pcls, run_end, run_start-1);
1105 if (real_end < run_start)
1106 real_end = run_start;
1108 else
1109 real_end = run_end;
1111 type_fence = nextValidChar(pcls, run_end, uCount);
1112 if (type_fence == uCount)
1113 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1114 else
1115 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1117 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1120 list_add_tail(set, &current_isolated->entry);
1121 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1123 i++;
1126 heap_free(runs);
1129 /*************************************************************
1130 * BIDI_DeterminLevels
1132 BOOL BIDI_DetermineLevels(
1133 const WCHAR *lpString, /* [in] The string for which information is to be returned */
1134 unsigned int uCount, /* [in] Number of WCHARs in string. */
1135 const SCRIPT_STATE *s,
1136 const SCRIPT_CONTROL *c,
1137 WORD *lpOutLevels, /* [out] final string levels */
1138 WORD *lpOutOverrides /* [out] final string overrides */
1141 WORD *chartype;
1142 unsigned baselevel = 0;
1143 struct list IsolatingRuns;
1144 IsolatedRun *iso_run, *next;
1146 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1148 if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1150 WARN("Out of memory\n");
1151 return FALSE;
1154 baselevel = s->uBidiLevel;
1156 classify(lpString, chartype, uCount, c);
1157 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1159 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1161 /* resolve explicit */
1162 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1163 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1165 /* X10/BD13: Computer Isolating runs */
1166 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1168 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1170 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1172 /* resolve weak */
1173 resolveWeak(iso_run);
1174 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1176 /* resolve neutrals */
1177 resolveNeutrals(iso_run);
1178 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1180 list_remove(&iso_run->entry);
1181 heap_free(iso_run);
1184 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1185 /* resolveImplicit */
1186 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1188 /* resolveResolvedLevels*/
1189 classify(lpString, chartype, uCount, c);
1190 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1192 heap_free(chartype);
1193 return TRUE;
1196 /* reverse cch indexes */
1197 static void reverse(int *pidx, int cch)
1199 int temp;
1200 int ich = 0;
1201 for (; ich < --cch; ich++)
1203 temp = pidx[ich];
1204 pidx[ich] = pidx[cch];
1205 pidx[cch] = temp;
1210 /*------------------------------------------------------------------------
1211 Functions: reorder/reorderLevel
1213 Recursively reorders the display string
1214 "From the highest level down, reverse all characters at that level and
1215 higher, down to the lowest odd level"
1217 Implements rule L2 of the Unicode bidi Algorithm.
1219 Input: Array of embedding levels
1220 Character count
1221 Flag enabling reversal (set to false by initial caller)
1223 In/Out: Text to reorder
1225 Note: levels may exceed 15 resp. 61 on input.
1227 Rule L3 - reorder combining marks is not implemented here
1228 Rule L4 - glyph mirroring is implemented as a display option below
1230 Note: this should be applied a line at a time
1231 -------------------------------------------------------------------------*/
1232 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1234 int ich = 0;
1236 /* true as soon as first odd level encountered */
1237 fReverse = fReverse || odd(level);
1239 for (; ich < cch; ich++)
1241 if (plevel[ich] < level)
1243 break;
1245 else if (plevel[ich] > level)
1247 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1248 cch - ich, fReverse) - 1;
1251 if (fReverse)
1253 reverse(pIndexs, ich);
1255 return ich;
1258 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1259 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1261 int ich = 0;
1262 int newlevel = -1;
1264 /* true as soon as first odd level encountered */
1265 fReverse = fReverse || odd(level);
1267 for (; ich < cch; ich++)
1269 if (plevel[ich] < level)
1270 break;
1271 else if (plevel[ich] > level)
1272 newlevel = ich;
1274 if (fReverse)
1276 reverse(pIndexs, ich);
1279 if (newlevel >= 0)
1281 ich = 0;
1282 for (; ich < cch; ich++)
1283 if (plevel[ich] < level)
1284 break;
1285 else if (plevel[ich] > level)
1286 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1287 cch - ich, fReverse) - 1;
1290 return ich;
1293 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1295 unsigned int i;
1297 classify(string, strength, count, c);
1298 for (i = 0; i < count; i++)
1300 switch (strength[i])
1302 case L:
1303 case LRE:
1304 case LRO:
1305 case R:
1306 case AL:
1307 case RLE:
1308 case RLO:
1309 strength[i] = BIDI_STRONG;
1310 break;
1311 case PDF:
1312 case EN:
1313 case ES:
1314 case ET:
1315 case AN:
1316 case CS:
1317 case BN:
1318 strength[i] = BIDI_WEAK;
1319 break;
1320 case B:
1321 case S:
1322 case WS:
1323 case ON:
1324 default: /* Neutrals and NSM */
1325 strength[i] = BIDI_NEUTRAL;
1328 return TRUE;