gdiplus: Support GdipSetClipRegion in metafiles.
[wine.git] / dlls / usp10 / bidi.c
blob5941ef81ca9d8bd7d5df96a5ae9f741c782bd133
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include "config.h"
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include "windef.h"
49 #include "winbase.h"
50 #include "wingdi.h"
51 #include "winnls.h"
52 #include "usp10.h"
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
64 #define MAX_DEPTH 125
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
73 Note:
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
79 enum directions
81 /* input types */
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON = 0, /* Other Neutral */
84 L, /* Left Letter */
85 R, /* Right Letter */
86 AN, /* Arabic Number */
87 EN, /* European Number */
88 AL, /* Arabic Letter (Right-to-left) */
89 NSM, /* Non-spacing Mark */
90 CS, /* Common Separator */
91 ES, /* European Separator */
92 ET, /* European Terminator (post/prefix e.g. $ and %) */
94 /* resolved types */
95 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
97 /* input types, */
98 S, /* Segment Separator (TAB) // used only in L1 */
99 WS, /* White space // used only in L1 */
100 B, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO, /* these are used only in X1-X9 */
104 RLE,
105 LRO,
106 LRE,
107 PDF,
109 LRI, /* Isolate formatting characters new with 6.3 */
110 RLI,
111 FSI,
112 PDI,
114 /* resolved types, also resolved directions */
115 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
135 "RLE",
136 "LRO",
137 "LRE",
138 "PDF",
139 "LRI", /* Isolate formatting characters new with 6.3 */
140 "RLI",
141 "FSI",
142 "PDI",
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header, WORD *types, int start, int end)
149 int i, len = 0;
150 TRACE("%s:",header);
151 for (i = start; i < end && len < 200; i++)
153 TRACE(" %s",debug_type[types[i]]);
154 len += strlen(debug_type[types[i]])+1;
156 if (i != end)
157 TRACE("...");
158 TRACE("\n");
161 /* Convert the libwine information to the direction enum */
162 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
164 static const enum directions dir_map[16] =
166 L, /* unassigned defaults to L */
179 NSM,
181 PDF /* also LRE, LRO, RLE, RLO */
184 unsigned i;
186 for (i = 0; i < count; ++i)
188 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
189 switch (chartype[i])
191 case ES:
192 if (!c->fLegacyBidiClass) break;
193 switch (string[i])
195 case '-':
196 case '+': chartype[i] = NI; break;
197 case '/': chartype[i] = CS; break;
199 break;
200 case PDF:
201 switch (string[i])
203 case 0x202A: chartype[i] = LRE; break;
204 case 0x202B: chartype[i] = RLE; break;
205 case 0x202C: chartype[i] = PDF; break;
206 case 0x202D: chartype[i] = LRO; break;
207 case 0x202E: chartype[i] = RLO; break;
208 case 0x2066: chartype[i] = LRI; break;
209 case 0x2067: chartype[i] = RLI; break;
210 case 0x2068: chartype[i] = FSI; break;
211 case 0x2069: chartype[i] = PDI; break;
213 break;
218 /* RESOLVE EXPLICIT */
220 static WORD GreaterEven(int i)
222 return odd(i) ? i + 1 : i + 2;
225 static WORD GreaterOdd(int i)
227 return odd(i) ? i + 2 : i + 1;
230 static WORD EmbeddingDirection(int level)
232 return odd(level) ? R : L;
235 /*------------------------------------------------------------------------
236 Function: resolveExplicit
238 Recursively resolves explicit embedding levels and overrides.
239 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 Input: Base embedding level and direction
242 Character count
244 Output: Array of embedding levels
246 In/Out: Array of direction classes
249 Note: The function uses two simple counters to keep track of
250 matching explicit codes and PDF. Use the default argument for
251 the outermost call. The nesting counter counts the recursion
252 depth and not the embedding level.
253 ------------------------------------------------------------------------*/
254 typedef struct tagStackItem {
255 int level;
256 int override;
257 BOOL isolate;
258 } StackItem;
260 #define push_stack(l,o,i) \
261 do { stack_top--; \
262 stack[stack_top].level = l; \
263 stack[stack_top].override = o; \
264 stack[stack_top].isolate = i;} while(0)
266 #define pop_stack() do { stack_top++; } while(0)
268 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
272 /* X1 */
273 int overflow_isolate_count = 0;
274 int overflow_embedding_count = 0;
275 int valid_isolate_count = 0;
276 int i;
278 StackItem stack[MAX_DEPTH+2];
279 int stack_top = MAX_DEPTH+1;
281 stack[stack_top].level = level;
282 stack[stack_top].override = NI;
283 stack[stack_top].isolate = FALSE;
285 if (initialOverride)
287 if (odd(level))
288 push_stack(level, R, FALSE);
289 else
290 push_stack(level, L, FALSE);
293 for (i = 0; i < count; i++)
295 poutOverrides[i] = stack[stack_top].override;
297 /* X2 */
298 if (pclass[i] == RLE)
300 int least_odd = GreaterOdd(stack[stack_top].level);
301 poutLevel[i] = stack[stack_top].level;
302 if (valid_level(least_odd))
303 push_stack(least_odd, NI, FALSE);
304 else if (overflow_isolate_count == 0)
305 overflow_embedding_count++;
307 /* X3 */
308 else if (pclass[i] == LRE)
310 int least_even = GreaterEven(stack[stack_top].level);
311 poutLevel[i] = stack[stack_top].level;
312 if (valid_level(least_even))
313 push_stack(least_even, NI, FALSE);
314 else if (overflow_isolate_count == 0)
315 overflow_embedding_count++;
317 /* X4 */
318 else if (pclass[i] == RLO)
320 int least_odd = GreaterOdd(stack[stack_top].level);
321 poutLevel[i] = stack[stack_top].level;
322 if (valid_level(least_odd))
323 push_stack(least_odd, R, FALSE);
324 else if (overflow_isolate_count == 0)
325 overflow_embedding_count++;
327 /* X5 */
328 else if (pclass[i] == LRO)
330 int least_even = GreaterEven(stack[stack_top].level);
331 poutLevel[i] = stack[stack_top].level;
332 if (valid_level(least_even))
333 push_stack(least_even, L, FALSE);
334 else if (overflow_isolate_count == 0)
335 overflow_embedding_count++;
337 /* X5a */
338 else if (pclass[i] == RLI)
340 int least_odd = GreaterOdd(stack[stack_top].level);
341 poutLevel[i] = stack[stack_top].level;
342 if (valid_level(least_odd))
344 valid_isolate_count++;
345 push_stack(least_odd, NI, TRUE);
347 else
348 overflow_isolate_count++;
350 /* X5b */
351 else if (pclass[i] == LRI)
353 int least_even = GreaterEven(stack[stack_top].level);
354 poutLevel[i] = stack[stack_top].level;
355 if (valid_level(least_even))
357 valid_isolate_count++;
358 push_stack(least_even, NI, TRUE);
360 else
361 overflow_isolate_count++;
363 /* X5c */
364 else if (pclass[i] == FSI)
366 int j;
367 int new_level = 0;
368 int skipping = 0;
369 poutLevel[i] = stack[stack_top].level;
370 for (j = i+1; j < count; j++)
372 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
374 skipping++;
375 continue;
377 else if (pclass[j] == PDI)
379 if (skipping)
380 skipping --;
381 else
382 break;
383 continue;
386 if (skipping) continue;
388 if (pclass[j] == L)
390 new_level = 0;
391 break;
393 else if (pclass[j] == R || pclass[j] == AL)
395 new_level = 1;
396 break;
399 if (odd(new_level))
401 int least_odd = GreaterOdd(stack[stack_top].level);
402 if (valid_level(least_odd))
404 valid_isolate_count++;
405 push_stack(least_odd, NI, TRUE);
407 else
408 overflow_isolate_count++;
410 else
412 int least_even = GreaterEven(stack[stack_top].level);
413 if (valid_level(least_even))
415 valid_isolate_count++;
416 push_stack(least_even, NI, TRUE);
418 else
419 overflow_isolate_count++;
422 /* X6 */
423 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
425 poutLevel[i] = stack[stack_top].level;
426 if (stack[stack_top].override != NI)
427 pclass[i] = stack[stack_top].override;
429 /* X6a */
430 else if (pclass[i] == PDI)
432 if (overflow_isolate_count) overflow_isolate_count--;
433 else if (!valid_isolate_count) {/* do nothing */}
434 else
436 overflow_embedding_count = 0;
437 while (!stack[stack_top].isolate) pop_stack();
438 pop_stack();
439 valid_isolate_count --;
441 poutLevel[i] = stack[stack_top].level;
443 /* X7 */
444 else if (pclass[i] == PDF)
446 poutLevel[i] = stack[stack_top].level;
447 if (overflow_isolate_count) {/* do nothing */}
448 else if (overflow_embedding_count) overflow_embedding_count--;
449 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
450 pop_stack();
452 /* X8: Nothing */
454 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
455 for (i = 0; i < count ; i++)
456 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
457 pclass[i] = BN;
460 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
462 if (index == -1 || index == back_fence) return index;
463 index --;
464 while (index > back_fence && pcls[index] == BN) index --;
465 return index;
468 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
470 if (index == front_fence) return index;
471 index ++;
472 while (index < front_fence && pcls[index] == BN) index ++;
473 return index;
476 typedef struct tagRun
478 int start;
479 int end;
480 WORD e;
481 } Run;
483 typedef struct tagRunChar
485 WCHAR ch;
486 WORD *pcls;
487 } RunChar;
489 typedef struct tagIsolatedRun
491 struct list entry;
492 int length;
493 WORD sos;
494 WORD eos;
495 WORD e;
497 RunChar item[1];
498 } IsolatedRun;
500 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
502 if (index >= (iso_run->length-1)) return -1;
503 index ++;
504 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
505 if (index == iso_run->length) return -1;
506 return index;
509 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
512 if (index <= 0) return -1;
513 index --;
514 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
515 return index;
518 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
520 int i, len = 0;
521 TRACE("%s:",header);
522 TRACE("[ ");
523 for (i = 0; i < iso_run->length && len < 200; i++)
525 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
526 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
528 if (i != iso_run->length)
529 TRACE("...");
530 TRACE(" ]\n");
533 /*------------------------------------------------------------------------
534 Function: resolveWeak
536 Resolves the directionality of numeric and other weak character types
538 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
540 Input: Array of embedding levels
541 Character count
543 In/Out: Array of directional classes
545 Note: On input only these directional classes are expected
546 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
547 ------------------------------------------------------------------------*/
549 static void resolveWeak(IsolatedRun * iso_run)
551 int i;
553 /* W1 */
554 for (i=0; i < iso_run->length; i++)
556 if (*iso_run->item[i].pcls == NSM)
558 int j = iso_previousValidChar(iso_run, i);
559 if (j == -1)
560 *iso_run->item[i].pcls = iso_run->sos;
561 else if (*iso_run->item[j].pcls >= LRI)
562 *iso_run->item[i].pcls = ON;
563 else
564 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
568 /* W2 */
569 for (i = 0; i < iso_run->length; i++)
571 if (*iso_run->item[i].pcls == EN)
573 int j = iso_previousValidChar(iso_run, i);
574 while (j > -1)
576 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
578 if (*iso_run->item[j].pcls == AL)
579 *iso_run->item[i].pcls = AN;
580 break;
582 j = iso_previousValidChar(iso_run, j);
587 /* W3 */
588 for (i = 0; i < iso_run->length; i++)
590 if (*iso_run->item[i].pcls == AL)
591 *iso_run->item[i].pcls = R;
594 /* W4 */
595 for (i = 0; i < iso_run->length; i++)
597 if (*iso_run->item[i].pcls == ES)
599 int b = iso_previousValidChar(iso_run, i);
600 int f = iso_nextValidChar(iso_run, i);
602 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
603 *iso_run->item[i].pcls = EN;
605 else if (*iso_run->item[i].pcls == CS)
607 int b = iso_previousValidChar(iso_run, i);
608 int f = iso_nextValidChar(iso_run, i);
610 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
611 *iso_run->item[i].pcls = EN;
612 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
613 *iso_run->item[i].pcls = AN;
617 /* W5 */
618 for (i = 0; i < iso_run->length; i++)
620 if (*iso_run->item[i].pcls == ET)
622 int j;
623 for (j = i-1 ; j > -1; j--)
625 if (*iso_run->item[j].pcls == BN) continue;
626 if (*iso_run->item[j].pcls == ET) continue;
627 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
628 else break;
630 if (*iso_run->item[i].pcls == ET)
632 for (j = i+1; j < iso_run->length; j++)
634 if (*iso_run->item[j].pcls == BN) continue;
635 if (*iso_run->item[j].pcls == ET) continue;
636 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
637 else break;
643 /* W6 */
644 for (i = 0; i < iso_run->length; i++)
646 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
648 int b = i-1;
649 int f = i+1;
650 if (b > -1 && *iso_run->item[b].pcls == BN)
651 *iso_run->item[b].pcls = ON;
652 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
653 *iso_run->item[f].pcls = ON;
655 *iso_run->item[i].pcls = ON;
659 /* W7 */
660 for (i = 0; i < iso_run->length; i++)
662 if (*iso_run->item[i].pcls == EN)
664 int j;
665 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
666 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
668 if (*iso_run->item[j].pcls == L)
669 *iso_run->item[i].pcls = L;
670 break;
672 if (iso_run->sos == L && j == -1)
673 *iso_run->item[i].pcls = L;
678 typedef struct tagBracketPair
680 int start;
681 int end;
682 } BracketPair;
684 static int compr(const void *a, const void* b)
686 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
689 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
691 WCHAR *open_stack;
692 int *stack_index;
693 int stack_top = iso_run->length;
694 BracketPair *out = NULL;
695 int pair_count = 0;
696 int i;
698 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
699 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
701 for (i = 0; i < iso_run->length; i++)
703 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
704 if (ubv)
706 if (!out)
708 out = heap_alloc(sizeof(*out));
709 out[0].start = -1;
712 if ((ubv >> 8) == 0)
714 stack_top --;
715 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
716 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
717 if (open_stack[stack_top] == 0x232A)
718 open_stack[stack_top] = 0x3009;
719 stack_index[stack_top] = i;
721 else if ((ubv >> 8) == 1)
723 int j;
724 if (stack_top == iso_run->length) continue;
725 for (j = stack_top; j < iso_run->length; j++)
727 WCHAR c = iso_run->item[i].ch;
728 if (c == 0x232A) c = 0x3009;
729 if (c == open_stack[j])
731 out[pair_count].start = stack_index[j];
732 out[pair_count].end = i;
733 pair_count++;
734 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
735 out[pair_count].start = -1;
736 stack_top = j+1;
737 break;
743 if (pair_count == 0)
745 heap_free(out);
746 out = NULL;
748 else if (pair_count > 1)
749 qsort(out, pair_count, sizeof(BracketPair), compr);
751 heap_free(open_stack);
752 heap_free(stack_index);
753 return out;
756 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
758 /*------------------------------------------------------------------------
759 Function: resolveNeutrals
761 Resolves the directionality of neutral character types.
763 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
765 Input: Array of embedding levels
766 Character count
767 Baselevel
769 In/Out: Array of directional classes
771 Note: On input only these directional classes are expected
772 R, L, NI, AN, EN and BN
774 W8 resolves a number of ENs to L
775 ------------------------------------------------------------------------*/
776 static void resolveNeutrals(IsolatedRun *iso_run)
778 int i;
779 BracketPair *pairs = NULL;
781 /* Translate isolates into NI */
782 for (i = 0; i < iso_run->length; i++)
784 if (*iso_run->item[i].pcls >= LRI)
785 *iso_run->item[i].pcls = NI;
787 switch(*iso_run->item[i].pcls)
789 case B:
790 case S:
791 case WS: *iso_run->item[i].pcls = NI;
794 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
797 /* N0: Skipping bracketed pairs for now */
798 pairs = computeBracketPairs(iso_run);
799 if (pairs)
801 BracketPair *p = &pairs[0];
802 int i = 0;
803 while (p->start >= 0)
805 int j;
806 int e = EmbeddingDirection(iso_run->e);
807 int o = EmbeddingDirection(iso_run->e+1);
808 BOOL flag_o = FALSE;
809 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
811 /* N0.b */
812 for (j = p->start+1; j < p->end; j++)
814 if (N0_TYPE(*iso_run->item[j].pcls) == e)
816 *iso_run->item[p->start].pcls = e;
817 *iso_run->item[p->end].pcls = e;
818 break;
820 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
821 flag_o = TRUE;
823 /* N0.c */
824 if (j == p->end && flag_o)
826 for (j = p->start; j >= 0; j--)
828 if (N0_TYPE(*iso_run->item[j].pcls) == o)
830 *iso_run->item[p->start].pcls = o;
831 *iso_run->item[p->end].pcls = o;
832 break;
834 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
836 *iso_run->item[p->start].pcls = e;
837 *iso_run->item[p->end].pcls = e;
838 break;
841 if ( j < 0 )
843 *iso_run->item[p->start].pcls = iso_run->sos;
844 *iso_run->item[p->end].pcls = iso_run->sos;
848 i++;
849 p = &pairs[i];
851 heap_free(pairs);
854 /* N1 */
855 for (i = 0; i < iso_run->length; i++)
857 WORD l,r;
859 if (*iso_run->item[i].pcls == NI)
861 int j;
862 int b = iso_previousValidChar(iso_run, i);
864 if (b == -1)
866 l = iso_run->sos;
867 b = 0;
869 else
871 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
872 l = R;
873 else if (*iso_run->item[b].pcls == L)
874 l = L;
875 else /* No string type */
876 continue;
878 j = iso_nextValidChar(iso_run, i);
879 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
881 if (j == -1)
883 r = iso_run->eos;
884 j = iso_run->length;
886 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
887 r = R;
888 else if (*iso_run->item[j].pcls == L)
889 r = L;
890 else /* No string type */
891 continue;
893 if (r == l)
895 for (b = i; b < j && b < iso_run->length; b++)
896 *iso_run->item[b].pcls = r;
901 /* N2 */
902 for (i = 0; i < iso_run->length; i++)
904 if (*iso_run->item[i].pcls == NI)
906 int b = i-1;
907 int f = i+1;
908 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
909 if (b > -1 && *iso_run->item[b].pcls == BN)
910 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
911 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
912 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
917 /*------------------------------------------------------------------------
918 Function: resolveImplicit
920 Recursively resolves implicit embedding levels.
921 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
923 Input: Array of direction classes
924 Character count
925 Base level
927 In/Out: Array of embedding levels
929 Note: levels may exceed 15 on output.
930 Accepted subset of direction classes
931 R, L, AN, EN
932 ------------------------------------------------------------------------*/
933 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
935 int i;
937 /* I1/2 */
938 for (i = sos; i <= eos; i++)
940 if (pcls[i] == BN)
941 continue;
943 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
944 ASSERT(pcls[i] < 5); /* "Out of range." */
946 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
947 plevel[i]++;
948 else if (!odd(plevel[i]) && pcls[i] == R)
949 plevel[i]++;
950 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
951 plevel[i]+=2;
955 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
957 int i;
959 /* L1 */
960 for (i = sos; i <= eos; i++)
962 if (pcls[i] == B || pcls[i] == S)
964 int j = i -1;
965 while (i > sos && j >= sos &&
966 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
967 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
968 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
969 plevel[j--] = baselevel;
970 plevel[i] = baselevel;
972 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
973 pcls[i] == PDF || pcls[i] == BN)
975 plevel[i] = i ? plevel[i - 1] : baselevel;
977 if (i == eos &&
978 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
979 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
980 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
982 int j = i;
983 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
984 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
985 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
986 plevel[j--] = baselevel;
991 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
992 const WCHAR *string, unsigned int uCount, struct list *set)
994 int run_start, run_end, i;
995 int run_count = 0;
996 Run *runs;
997 IsolatedRun *current_isolated;
999 if (!(runs = heap_alloc(uCount * sizeof(*runs))))
1000 return;
1002 list_init(set);
1004 /* Build Runs */
1005 run_start = 0;
1006 while (run_start < uCount)
1008 run_end = nextValidChar(pcls, run_start, uCount);
1009 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1010 run_end --;
1011 runs[run_count].start = run_start;
1012 runs[run_count].end = run_end;
1013 runs[run_count].e = pLevel[run_start];
1014 run_start = nextValidChar(pcls, run_end, uCount);
1015 run_count++;
1018 /* Build Isolating Runs */
1019 i = 0;
1020 while (i < run_count)
1022 int k = i;
1023 if (runs[k].start >= 0)
1025 int type_fence, real_end;
1026 int j;
1028 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
1029 break;
1031 run_start = runs[k].start;
1032 current_isolated->e = runs[k].e;
1033 current_isolated->length = (runs[k].end - runs[k].start)+1;
1035 for (j = 0; j < current_isolated->length; j++)
1037 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1038 current_isolated->item[j].ch = string[runs[k].start + j];
1041 run_end = runs[k].end;
1043 TRACE("{ [%i -- %i]",run_start, run_end);
1045 if (pcls[run_end] == BN)
1046 run_end = previousValidChar(pcls, run_end, runs[k].start);
1048 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1050 j = k+1;
1051 search:
1052 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1053 if (j < run_count && runs[i].e != runs[j].e)
1055 j++;
1056 goto search;
1059 if (j != run_count)
1061 int m;
1062 int l = current_isolated->length;
1064 current_isolated->length += (runs[j].end - runs[j].start)+1;
1065 for (m = 0; l < current_isolated->length; l++, m++)
1067 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1068 current_isolated->item[l].ch = string[runs[j].start + m];
1071 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1073 run_end = runs[j].end;
1074 if (pcls[run_end] == BN)
1075 run_end = previousValidChar(pcls, run_end, runs[i].start);
1076 runs[j].start = -1;
1077 k = j;
1079 else
1081 run_end = uCount;
1082 break;
1086 type_fence = previousValidChar(pcls, run_start, -1);
1088 if (type_fence == -1)
1089 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1090 else
1091 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1093 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1095 if (run_end == uCount)
1096 current_isolated->eos = current_isolated->sos;
1097 else
1099 /* eos could be an BN */
1100 if ( pcls[run_end] == BN )
1102 real_end = previousValidChar(pcls, run_end, run_start-1);
1103 if (real_end < run_start)
1104 real_end = run_start;
1106 else
1107 real_end = run_end;
1109 type_fence = nextValidChar(pcls, run_end, uCount);
1110 if (type_fence == uCount)
1111 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1112 else
1113 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1115 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1118 list_add_tail(set, &current_isolated->entry);
1119 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1121 i++;
1124 heap_free(runs);
1127 /*************************************************************
1128 * BIDI_DeterminLevels
1130 BOOL BIDI_DetermineLevels(
1131 const WCHAR *lpString, /* [in] The string for which information is to be returned */
1132 unsigned int uCount, /* [in] Number of WCHARs in string. */
1133 const SCRIPT_STATE *s,
1134 const SCRIPT_CONTROL *c,
1135 WORD *lpOutLevels, /* [out] final string levels */
1136 WORD *lpOutOverrides /* [out] final string overrides */
1139 WORD *chartype;
1140 unsigned baselevel = 0;
1141 struct list IsolatingRuns;
1142 IsolatedRun *iso_run, *next;
1144 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1146 if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1148 WARN("Out of memory\n");
1149 return FALSE;
1152 baselevel = s->uBidiLevel;
1154 classify(lpString, chartype, uCount, c);
1155 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1157 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1159 /* resolve explicit */
1160 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1161 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1163 /* X10/BD13: Computer Isolating runs */
1164 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1166 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1168 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1170 /* resolve weak */
1171 resolveWeak(iso_run);
1172 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1174 /* resolve neutrals */
1175 resolveNeutrals(iso_run);
1176 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1178 list_remove(&iso_run->entry);
1179 heap_free(iso_run);
1182 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1183 /* resolveImplicit */
1184 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1186 /* resolveResolvedLevels*/
1187 classify(lpString, chartype, uCount, c);
1188 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1190 heap_free(chartype);
1191 return TRUE;
1194 /* reverse cch indexes */
1195 static void reverse(int *pidx, int cch)
1197 int temp;
1198 int ich = 0;
1199 for (; ich < --cch; ich++)
1201 temp = pidx[ich];
1202 pidx[ich] = pidx[cch];
1203 pidx[cch] = temp;
1208 /*------------------------------------------------------------------------
1209 Functions: reorder/reorderLevel
1211 Recursively reorders the display string
1212 "From the highest level down, reverse all characters at that level and
1213 higher, down to the lowest odd level"
1215 Implements rule L2 of the Unicode bidi Algorithm.
1217 Input: Array of embedding levels
1218 Character count
1219 Flag enabling reversal (set to false by initial caller)
1221 In/Out: Text to reorder
1223 Note: levels may exceed 15 resp. 61 on input.
1225 Rule L3 - reorder combining marks is not implemented here
1226 Rule L4 - glyph mirroring is implemented as a display option below
1228 Note: this should be applied a line at a time
1229 -------------------------------------------------------------------------*/
1230 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1232 int ich = 0;
1234 /* true as soon as first odd level encountered */
1235 fReverse = fReverse || odd(level);
1237 for (; ich < cch; ich++)
1239 if (plevel[ich] < level)
1241 break;
1243 else if (plevel[ich] > level)
1245 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1246 cch - ich, fReverse) - 1;
1249 if (fReverse)
1251 reverse(pIndexs, ich);
1253 return ich;
1256 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1257 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1259 int ich = 0;
1260 int newlevel = -1;
1262 /* true as soon as first odd level encountered */
1263 fReverse = fReverse || odd(level);
1265 for (; ich < cch; ich++)
1267 if (plevel[ich] < level)
1268 break;
1269 else if (plevel[ich] > level)
1270 newlevel = ich;
1272 if (fReverse)
1274 reverse(pIndexs, ich);
1277 if (newlevel >= 0)
1279 ich = 0;
1280 for (; ich < cch; ich++)
1281 if (plevel[ich] < level)
1282 break;
1283 else if (plevel[ich] > level)
1284 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1285 cch - ich, fReverse) - 1;
1288 return ich;
1291 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1293 unsigned int i;
1295 classify(string, strength, count, c);
1296 for (i = 0; i < count; i++)
1298 switch (strength[i])
1300 case L:
1301 case LRE:
1302 case LRO:
1303 case R:
1304 case AL:
1305 case RLE:
1306 case RLO:
1307 strength[i] = BIDI_STRONG;
1308 break;
1309 case PDF:
1310 case EN:
1311 case ES:
1312 case ET:
1313 case AN:
1314 case CS:
1315 case BN:
1316 strength[i] = BIDI_WEAK;
1317 break;
1318 case B:
1319 case S:
1320 case WS:
1321 case ON:
1322 default: /* Neutrals and NSM */
1323 strength[i] = BIDI_NEUTRAL;
1326 return TRUE;