usp10: Implement N0: bracketing pairs.
[wine/multimedia.git] / dlls / usp10 / bidi.c
blob63ffa6a6225af7a0d7b88c7b2416e42df54f2de8
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include "config.h"
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include "windef.h"
49 #include "winbase.h"
50 #include "wingdi.h"
51 #include "winnls.h"
52 #include "usp10.h"
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table[];
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
64 #define MAX_DEPTH 125
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
73 Note:
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
79 enum directions
81 /* input types */
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON = 0, /* Other Neutral */
84 L, /* Left Letter */
85 R, /* Right Letter */
86 AN, /* Arabic Number */
87 EN, /* European Number */
88 AL, /* Arabic Letter (Right-to-left) */
89 NSM, /* Non-spacing Mark */
90 CS, /* Common Separator */
91 ES, /* European Separator */
92 ET, /* European Terminator (post/prefix e.g. $ and %) */
94 /* resolved types */
95 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
97 /* input types, */
98 S, /* Segment Separator (TAB) // used only in L1 */
99 WS, /* White space // used only in L1 */
100 B, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO, /* these are used only in X1-X9 */
104 RLE,
105 LRO,
106 LRE,
107 PDF,
109 LRI, /* Isolate formatting characters new with 6.3 */
110 RLI,
111 FSI,
112 PDI,
114 /* resolved types, also resolved directions */
115 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
135 "RLE",
136 "LRO",
137 "LRE",
138 "PDF",
139 "LRI", /* Isolate formatting characters new with 6.3 */
140 "RLI",
141 "FSI",
142 "PDI",
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header, WORD *types, int start, int end)
149 int i;
150 TRACE("%s:",header);
151 for (i = start; i< end; i++)
152 TRACE(" %s",debug_type[types[i]]);
153 TRACE("\n");
156 /* Convert the libwine information to the direction enum */
157 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
159 static const enum directions dir_map[16] =
161 L, /* unassigned defaults to L */
174 NSM,
176 PDF /* also LRE, LRO, RLE, RLO */
179 unsigned i;
181 for (i = 0; i < uCount; ++i)
183 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
184 switch (chartype[i])
186 case ES:
187 if (!c->fLegacyBidiClass) break;
188 switch (lpString[i])
190 case '-':
191 case '+': chartype[i] = NI; break;
192 case '/': chartype[i] = CS; break;
194 break;
195 case PDF:
196 switch (lpString[i])
198 case 0x202A: chartype[i] = LRE; break;
199 case 0x202B: chartype[i] = RLE; break;
200 case 0x202C: chartype[i] = PDF; break;
201 case 0x202D: chartype[i] = LRO; break;
202 case 0x202E: chartype[i] = RLO; break;
203 case 0x2066: chartype[i] = LRI; break;
204 case 0x2067: chartype[i] = RLI; break;
205 case 0x2068: chartype[i] = FSI; break;
206 case 0x2069: chartype[i] = PDI; break;
208 break;
213 /* RESOLVE EXPLICIT */
215 static WORD GreaterEven(int i)
217 return odd(i) ? i + 1 : i + 2;
220 static WORD GreaterOdd(int i)
222 return odd(i) ? i + 2 : i + 1;
225 static WORD EmbeddingDirection(int level)
227 return odd(level) ? R : L;
230 /*------------------------------------------------------------------------
231 Function: resolveExplicit
233 Recursively resolves explicit embedding levels and overrides.
234 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
236 Input: Base embedding level and direction
237 Character count
239 Output: Array of embedding levels
241 In/Out: Array of direction classes
244 Note: The function uses two simple counters to keep track of
245 matching explicit codes and PDF. Use the default argument for
246 the outermost call. The nesting counter counts the recursion
247 depth and not the embedding level.
248 ------------------------------------------------------------------------*/
249 typedef struct tagStackItem {
250 int level;
251 int override;
252 BOOL isolate;
253 } StackItem;
255 #define push_stack(l,o,i) \
256 do { stack_top--; \
257 stack[stack_top].level = l; \
258 stack[stack_top].override = o; \
259 stack[stack_top].isolate = i;} while(0)
261 #define pop_stack() do { stack_top++; } while(0)
263 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
265 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
267 /* X1 */
268 int overflow_isolate_count = 0;
269 int overflow_embedding_count = 0;
270 int valid_isolate_count = 0;
271 int i;
273 StackItem stack[MAX_DEPTH+2];
274 int stack_top = MAX_DEPTH+1;
276 stack[stack_top].level = level;
277 stack[stack_top].override = NI;
278 stack[stack_top].isolate = FALSE;
280 for (i = 0; i < count; i++)
282 /* X2 */
283 if (pclass[i] == RLE)
285 int least_odd = GreaterOdd(stack[stack_top].level);
286 poutLevel[i] = stack[stack_top].level;
287 if (valid_level(least_odd))
288 push_stack(least_odd, NI, FALSE);
289 else if (overflow_isolate_count == 0)
290 overflow_embedding_count++;
292 /* X3 */
293 else if (pclass[i] == LRE)
295 int least_even = GreaterEven(stack[stack_top].level);
296 poutLevel[i] = stack[stack_top].level;
297 if (valid_level(least_even))
298 push_stack(least_even, NI, FALSE);
299 else if (overflow_isolate_count == 0)
300 overflow_embedding_count++;
302 /* X4 */
303 else if (pclass[i] == RLO)
305 int least_odd = GreaterOdd(stack[stack_top].level);
306 poutLevel[i] = stack[stack_top].level;
307 if (valid_level(least_odd))
308 push_stack(least_odd, R, FALSE);
309 else if (overflow_isolate_count == 0)
310 overflow_embedding_count++;
312 /* X5 */
313 else if (pclass[i] == LRO)
315 int least_even = GreaterEven(stack[stack_top].level);
316 poutLevel[i] = stack[stack_top].level;
317 if (valid_level(least_even))
318 push_stack(least_even, L, FALSE);
319 else if (overflow_isolate_count == 0)
320 overflow_embedding_count++;
322 /* X5a */
323 else if (pclass[i] == RLI)
325 int least_odd = GreaterOdd(stack[stack_top].level);
326 poutLevel[i] = stack[stack_top].level;
327 if (valid_level(least_odd))
329 valid_isolate_count++;
330 push_stack(least_odd, NI, TRUE);
332 else
333 overflow_isolate_count++;
335 /* X5b */
336 else if (pclass[i] == LRI)
338 int least_even = GreaterEven(stack[stack_top].level);
339 poutLevel[i] = stack[stack_top].level;
340 if (valid_level(least_even))
342 valid_isolate_count++;
343 push_stack(least_even, NI, TRUE);
345 else
346 overflow_isolate_count++;
348 /* X5c */
349 else if (pclass[i] == FSI)
351 int j;
352 int new_level = 0;
353 int skipping = 0;
354 poutLevel[i] = stack[stack_top].level;
355 for (j = i+1; j < count; j++)
357 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
359 skipping++;
360 continue;
362 else if (pclass[j] == PDI)
364 if (skipping)
365 skipping --;
366 else
367 break;
368 continue;
371 if (skipping) continue;
373 if (pclass[j] == L)
375 new_level = 0;
376 break;
378 else if (pclass[j] == R || pclass[j] == AL)
380 new_level = 1;
381 break;
384 if (odd(new_level))
386 int least_odd = GreaterOdd(stack[stack_top].level);
387 if (valid_level(least_odd))
389 valid_isolate_count++;
390 push_stack(least_odd, NI, TRUE);
392 else
393 overflow_isolate_count++;
395 else
397 int least_even = GreaterEven(stack[stack_top].level);
398 if (valid_level(least_even))
400 valid_isolate_count++;
401 push_stack(least_even, NI, TRUE);
403 else
404 overflow_isolate_count++;
407 /* X6 */
408 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
410 poutLevel[i] = stack[stack_top].level;
411 if (stack[stack_top].override != NI)
412 pclass[i] = stack[stack_top].override;
414 /* X6a */
415 else if (pclass[i] == PDI)
417 if (overflow_isolate_count) overflow_isolate_count--;
418 else if (!valid_isolate_count) {/* do nothing */}
419 else
421 overflow_embedding_count = 0;
422 while (!stack[stack_top].isolate) pop_stack();
423 pop_stack();
424 valid_isolate_count --;
426 poutLevel[i] = stack[stack_top].level;
428 /* X7 */
429 else if (pclass[i] == PDF)
431 poutLevel[i] = stack[stack_top].level;
432 if (overflow_isolate_count) {/* do nothing */}
433 else if (overflow_embedding_count) overflow_embedding_count--;
434 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
435 pop_stack();
437 /* X8: Nothing */
439 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
440 for (i = 0; i < count ; i++)
441 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
442 pclass[i] = BN;
445 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
447 if (index == -1 || index == back_fence) return index;
448 index --;
449 while (index > back_fence && pcls[index] == BN) index --;
450 return index;
453 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
455 if (index == front_fence) return index;
456 index ++;
457 while (index < front_fence && pcls[index] == BN) index ++;
458 return index;
461 typedef struct tagRun
463 int start;
464 int end;
465 WORD e;
466 } Run;
468 typedef struct tagRunChar
470 WCHAR ch;
471 WORD *pcls;
472 } RunChar;
474 typedef struct tagIsolatedRun
476 struct list entry;
477 int length;
478 WORD sos;
479 WORD eos;
480 WORD e;
482 RunChar item[1];
483 } IsolatedRun;
485 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
487 if (index >= (iso_run->length-1)) return -1;
488 index ++;
489 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
490 if (index == iso_run->length) return -1;
491 return index;
494 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
497 if (index <= 0) return -1;
498 index --;
499 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
500 return index;
503 static inline int iso_previousChar(IsolatedRun *iso_run, int index)
505 if (index <= 0) return -1;
506 return index --;
509 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
511 int i;
512 TRACE("%s:",header);
513 TRACE("[ ");
514 for (i = 0; i < iso_run->length; i++)
515 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
516 TRACE(" ]\n");
519 /*------------------------------------------------------------------------
520 Function: resolveWeak
522 Resolves the directionality of numeric and other weak character types
524 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
526 Input: Array of embedding levels
527 Character count
529 In/Out: Array of directional classes
531 Note: On input only these directional classes are expected
532 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
533 ------------------------------------------------------------------------*/
535 static void resolveWeak(IsolatedRun * iso_run)
537 int i;
539 /* W1 */
540 for (i=0; i < iso_run->length; i++)
542 if (*iso_run->item[i].pcls == NSM)
544 int j = iso_previousValidChar(iso_run, i);
545 if (j == -1)
546 *iso_run->item[i].pcls = iso_run->sos;
547 else if (*iso_run->item[j].pcls >= LRI)
548 *iso_run->item[i].pcls = ON;
549 else
550 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
554 /* W2 */
555 for (i = 0; i < iso_run->length; i++)
557 if (*iso_run->item[i].pcls == EN)
559 int j = iso_previousValidChar(iso_run, i);
560 while (j > -1)
562 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
564 if (*iso_run->item[j].pcls == AL)
565 *iso_run->item[i].pcls = AN;
566 break;
568 j = iso_previousValidChar(iso_run, j);
573 /* W3 */
574 for (i = 0; i < iso_run->length; i++)
576 if (*iso_run->item[i].pcls == AL)
577 *iso_run->item[i].pcls = R;
580 /* W4 */
581 for (i = 0; i < iso_run->length; i++)
583 if (*iso_run->item[i].pcls == ES)
585 int b = iso_previousValidChar(iso_run, i);
586 int f = iso_nextValidChar(iso_run, i);
588 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
589 *iso_run->item[i].pcls = EN;
591 else if (*iso_run->item[i].pcls == CS)
593 int b = iso_previousValidChar(iso_run, i);
594 int f = iso_nextValidChar(iso_run, i);
596 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
597 *iso_run->item[i].pcls = EN;
598 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
599 *iso_run->item[i].pcls = AN;
603 /* W5 */
604 for (i = 0; i < iso_run->length; i++)
606 if (*iso_run->item[i].pcls == ET)
608 int j;
609 for (j = i-1 ; j > -1; j--)
611 if (*iso_run->item[j].pcls == BN) continue;
612 if (*iso_run->item[j].pcls == ET) continue;
613 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
614 else break;
616 if (*iso_run->item[i].pcls == ET)
618 for (j = i+1; j < iso_run->length; j++)
620 if (*iso_run->item[j].pcls == BN) continue;
621 if (*iso_run->item[j].pcls == ET) continue;
622 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
623 else break;
629 /* W6 */
630 for (i = 0; i < iso_run->length; i++)
632 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
634 int b = i-1;
635 int f = i+1;
636 if (b > -1 && *iso_run->item[b].pcls == BN)
637 *iso_run->item[b].pcls = ON;
638 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
639 *iso_run->item[f].pcls = ON;
641 *iso_run->item[i].pcls = ON;
645 /* W7 */
646 for (i = 0; i < iso_run->length; i++)
648 if (*iso_run->item[i].pcls == EN)
650 int j;
651 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
652 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
654 if (*iso_run->item[j].pcls == L)
655 *iso_run->item[i].pcls = L;
656 break;
658 if (iso_run->sos == L && j == -1)
659 *iso_run->item[i].pcls = L;
664 typedef struct tagBracketPair
666 int start;
667 int end;
668 } BracketPair;
670 static int compr(const void *a, const void* b)
672 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
675 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
677 WCHAR *open_stack;
678 int *stack_index;
679 int stack_top = iso_run->length;
680 BracketPair *out = NULL;
681 int pair_count = 0;
682 int i;
684 open_stack = HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR) * iso_run->length);
685 stack_index = HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run->length);
687 for (i = 0; i < iso_run->length; i++)
689 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
690 if (ubv)
692 if (!out)
694 out = HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair));
695 out[0].start = -1;
698 if ((ubv >> 8) == 0)
700 stack_top --;
701 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
702 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
703 if (open_stack[stack_top] == 0x232A)
704 open_stack[stack_top] = 0x3009;
705 stack_index[stack_top] = i;
707 else if ((ubv >> 8) == 1)
709 int j;
710 if (stack_top == iso_run->length) continue;
711 for (j = stack_top; j < iso_run->length; j++)
713 WCHAR c = iso_run->item[i].ch;
714 if (c == 0x232A) c = 0x3009;
715 if (c == open_stack[j])
717 out[pair_count].start = stack_index[j];
718 out[pair_count].end = i;
719 pair_count++;
720 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
721 out[pair_count].start = -1;
722 stack_top = j+1;
723 break;
729 if (pair_count == 0)
731 HeapFree(GetProcessHeap(),0,out);
732 out = NULL;
734 else if (pair_count > 1)
735 qsort(out, pair_count, sizeof(BracketPair), compr);
737 HeapFree(GetProcessHeap(), 0, open_stack);
738 HeapFree(GetProcessHeap(), 0, stack_index);
739 return out;
742 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
744 /*------------------------------------------------------------------------
745 Function: resolveNeutrals
747 Resolves the directionality of neutral character types.
749 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
751 Input: Array of embedding levels
752 Character count
753 Baselevel
755 In/Out: Array of directional classes
757 Note: On input only these directional classes are expected
758 R, L, NI, AN, EN and BN
760 W8 resolves a number of ENs to L
761 ------------------------------------------------------------------------*/
762 static void resolveNeutrals(IsolatedRun *iso_run)
764 int i;
765 BracketPair *pairs = NULL;
767 /* Translate isolates into NI */
768 for (i = 0; i < iso_run->length; i++)
770 if (*iso_run->item[i].pcls >= LRI)
771 *iso_run->item[i].pcls = NI;
773 switch(*iso_run->item[i].pcls)
775 case B:
776 case S:
777 case WS: *iso_run->item[i].pcls = NI;
780 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
783 /* N0: Skipping bracketed pairs for now */
784 pairs = computeBracketPairs(iso_run);
785 if (pairs)
787 BracketPair *p = &pairs[0];
788 int i = 0;
789 while (p->start >= 0)
791 int j;
792 int e = EmbeddingDirection(iso_run->e);
793 int o = EmbeddingDirection(iso_run->e+1);
794 BOOL flag_o = FALSE;
795 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
797 /* N0.b */
798 for (j = p->start+1; j < p->end; j++)
800 if (N0_TYPE(*iso_run->item[j].pcls) == e)
802 *iso_run->item[p->start].pcls = e;
803 *iso_run->item[p->end].pcls = e;
804 break;
806 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
807 flag_o = TRUE;
809 /* N0.c */
810 if (j == p->end && flag_o)
812 for (j = p->start; j >= 0; j--)
814 if (N0_TYPE(*iso_run->item[j].pcls) == o)
816 *iso_run->item[p->start].pcls = o;
817 *iso_run->item[p->end].pcls = o;
818 break;
820 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
822 *iso_run->item[p->start].pcls = e;
823 *iso_run->item[p->end].pcls = e;
824 break;
827 if ( j < 0 )
829 *iso_run->item[p->start].pcls = iso_run->sos;
830 *iso_run->item[p->end].pcls = iso_run->sos;
834 i++;
835 p = &pairs[i];
837 HeapFree(GetProcessHeap(),0,pairs);
840 /* N1 */
841 for (i = 0; i < iso_run->length; i++)
843 WORD l,r;
845 if (*iso_run->item[i].pcls == NI)
847 int j;
848 int b = iso_previousValidChar(iso_run, i);
850 if (b == -1)
852 l = iso_run->sos;
853 b = 0;
855 else
857 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
858 l = R;
859 else if (*iso_run->item[b].pcls == L)
860 l = L;
861 else /* No string type */
862 continue;
864 j = iso_nextValidChar(iso_run, i);
865 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
867 if (j == -1)
869 r = iso_run->eos;
870 j = iso_run->length;
872 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
873 r = R;
874 else if (*iso_run->item[j].pcls == L)
875 r = L;
876 else /* No string type */
877 continue;
879 if (r == l)
881 for (b = i; b < j && b < iso_run->length; b++)
882 *iso_run->item[b].pcls = r;
887 /* N2 */
888 for (i = 0; i < iso_run->length; i++)
890 if (*iso_run->item[i].pcls == NI)
892 int b = i-1;
893 int f = i+1;
894 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
895 if (b > -1 && *iso_run->item[b].pcls == BN)
896 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
897 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
898 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
903 /*------------------------------------------------------------------------
904 Function: resolveImplicit
906 Recursively resolves implicit embedding levels.
907 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
909 Input: Array of direction classes
910 Character count
911 Base level
913 In/Out: Array of embedding levels
915 Note: levels may exceed 15 on output.
916 Accepted subset of direction classes
917 R, L, AN, EN
918 ------------------------------------------------------------------------*/
919 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
921 int i;
923 /* I1/2 */
924 for (i = sos; i <= eos; i++)
926 if (pcls[i] == BN)
927 continue;
929 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
930 ASSERT(pcls[i] < 5); /* "Out of range." */
932 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
933 plevel[i]++;
934 else if (!odd(plevel[i]) && pcls[i] == R)
935 plevel[i]++;
936 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
937 plevel[i]+=2;
941 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
943 int i;
945 /* L1 */
946 for (i = sos; i <= eos; i++)
948 if (pcls[i] == B || pcls[i] == S)
950 int j = i -1;
951 while (i > sos && j >= sos &&
952 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
953 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
954 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
955 plevel[j--] = baselevel;
956 plevel[i] = baselevel;
958 if (i == eos &&
959 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
960 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
961 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
963 int j = i;
964 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
965 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
966 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
967 plevel[j--] = baselevel;
972 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, LPCWSTR lpString, int uCount, struct list *set)
974 int run_start, run_end, i;
975 int run_count = 0;
976 Run *runs;
977 IsolatedRun *current_isolated;
979 runs = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(Run));
980 if (!runs) return;
982 list_init(set);
984 /* Build Runs */
985 run_start = 0;
986 while (run_start < uCount)
988 run_end = nextValidChar(pcls, run_start, uCount);
989 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
990 run_end --;
991 runs[run_count].start = run_start;
992 runs[run_count].end = run_end;
993 runs[run_count].e = pLevel[run_start];
994 run_start = nextValidChar(pcls, run_end, uCount);
995 run_count++;
998 /* Build Isolating Runs */
999 i = 0;
1000 while (i < run_count)
1002 int k = i;
1003 if (runs[k].start >= 0)
1005 int type_fence, real_end;
1006 int j;
1007 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(RunChar)*uCount);
1008 if (!current_isolated) break;
1010 run_start = runs[k].start;
1011 current_isolated->e = runs[k].e;
1012 current_isolated->length = (runs[k].end - runs[k].start)+1;
1014 for (j = 0; j < current_isolated->length; j++)
1016 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1017 current_isolated->item[j].ch = lpString[runs[k].start+j];
1020 run_end = runs[k].end;
1022 TRACE("{ [%i -- %i]",run_start, run_end);
1024 if (pcls[run_end] == BN)
1025 run_end = previousValidChar(pcls, run_end, runs[k].start);
1027 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1029 j = k+1;
1030 search:
1031 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1032 if (j < run_count && runs[i].e != runs[j].e)
1034 j++;
1035 goto search;
1038 if (j != run_count)
1040 int m;
1041 int l = current_isolated->length;
1043 current_isolated->length += (runs[j].end - runs[j].start)+1;
1044 for (m = 0; l < current_isolated->length; l++, m++)
1046 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1047 current_isolated->item[l].ch = lpString[runs[j].start+m];
1050 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1052 run_end = runs[j].end;
1053 if (pcls[run_end] == BN)
1054 run_end = previousValidChar(pcls, run_end, runs[i].start);
1055 runs[j].start = -1;
1056 k = j;
1058 else
1060 run_end = uCount;
1061 break;
1065 type_fence = previousValidChar(pcls, run_start, -1);
1067 if (type_fence == -1)
1068 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1069 else
1070 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1072 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1074 if (run_end == uCount)
1075 current_isolated->eos = current_isolated->sos;
1076 else
1078 /* eos could be an BN */
1079 if ( pcls[run_end] == BN )
1081 real_end = previousValidChar(pcls, run_end, run_start-1);
1082 if (real_end < run_start)
1083 real_end = run_start;
1085 else
1086 real_end = run_end;
1088 type_fence = nextValidChar(pcls, run_end, uCount);
1089 if (type_fence == uCount)
1090 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1091 else
1092 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1094 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1097 list_add_tail(set, &current_isolated->entry);
1098 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1100 i++;
1103 HeapFree(GetProcessHeap(), 0, runs);
1106 /*************************************************************
1107 * BIDI_DeterminLevels
1109 BOOL BIDI_DetermineLevels(
1110 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1111 INT uCount, /* [in] Number of WCHARs in string. */
1112 const SCRIPT_STATE *s,
1113 const SCRIPT_CONTROL *c,
1114 WORD *lpOutLevels /* [out] final string levels */
1117 WORD *chartype;
1118 unsigned baselevel = 0;
1119 struct list IsolatingRuns;
1120 IsolatedRun *iso_run, *next;
1122 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1124 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
1125 if (!chartype)
1127 WARN("Out of memory\n");
1128 return FALSE;
1131 baselevel = s->uBidiLevel;
1133 classify(lpString, chartype, uCount, c);
1134 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1136 /* resolve explicit */
1137 resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
1138 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1140 /* X10/BD13: Computer Isolating runs */
1141 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1143 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1145 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1147 /* resolve weak */
1148 resolveWeak(iso_run);
1149 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1151 /* resolve neutrals */
1152 resolveNeutrals(iso_run);
1153 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1155 list_remove(&iso_run->entry);
1156 HeapFree(GetProcessHeap(),0,iso_run);
1159 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1160 /* resolveImplicit */
1161 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1163 /* resolveResolvedLevels*/
1164 classify(lpString, chartype, uCount, c);
1165 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1167 HeapFree(GetProcessHeap(), 0, chartype);
1168 return TRUE;
1171 /* reverse cch indexes */
1172 static void reverse(int *pidx, int cch)
1174 int temp;
1175 int ich = 0;
1176 for (; ich < --cch; ich++)
1178 temp = pidx[ich];
1179 pidx[ich] = pidx[cch];
1180 pidx[cch] = temp;
1185 /*------------------------------------------------------------------------
1186 Functions: reorder/reorderLevel
1188 Recursively reorders the display string
1189 "From the highest level down, reverse all characters at that level and
1190 higher, down to the lowest odd level"
1192 Implements rule L2 of the Unicode bidi Algorithm.
1194 Input: Array of embedding levels
1195 Character count
1196 Flag enabling reversal (set to false by initial caller)
1198 In/Out: Text to reorder
1200 Note: levels may exceed 15 resp. 61 on input.
1202 Rule L3 - reorder combining marks is not implemented here
1203 Rule L4 - glyph mirroring is implemented as a display option below
1205 Note: this should be applied a line at a time
1206 -------------------------------------------------------------------------*/
1207 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1209 int ich = 0;
1211 /* true as soon as first odd level encountered */
1212 fReverse = fReverse || odd(level);
1214 for (; ich < cch; ich++)
1216 if (plevel[ich] < level)
1218 break;
1220 else if (plevel[ich] > level)
1222 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1223 cch - ich, fReverse) - 1;
1226 if (fReverse)
1228 reverse(pIndexs, ich);
1230 return ich;
1233 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1234 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1236 int ich = 0;
1237 int newlevel = -1;
1239 /* true as soon as first odd level encountered */
1240 fReverse = fReverse || odd(level);
1242 for (; ich < cch; ich++)
1244 if (plevel[ich] < level)
1245 break;
1246 else if (plevel[ich] > level)
1247 newlevel = ich;
1249 if (fReverse)
1251 reverse(pIndexs, ich);
1254 if (newlevel >= 0)
1256 ich = 0;
1257 for (; ich < cch; ich++)
1258 if (plevel[ich] < level)
1259 break;
1260 else if (plevel[ich] > level)
1261 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1262 cch - ich, fReverse) - 1;
1265 return ich;
1268 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1269 WORD* lpStrength)
1271 int i;
1272 classify(lpString, lpStrength, uCount, c);
1274 for ( i = 0; i < uCount; i++)
1276 switch(lpStrength[i])
1278 case L:
1279 case LRE:
1280 case LRO:
1281 case R:
1282 case AL:
1283 case RLE:
1284 case RLO:
1285 lpStrength[i] = BIDI_STRONG;
1286 break;
1287 case PDF:
1288 case EN:
1289 case ES:
1290 case ET:
1291 case AN:
1292 case CS:
1293 case BN:
1294 lpStrength[i] = BIDI_WEAK;
1295 break;
1296 case B:
1297 case S:
1298 case WS:
1299 case ON:
1300 default: /* Neutrals and NSM */
1301 lpStrength[i] = BIDI_NEUTRAL;
1304 return TRUE;