ntdll: Remove unused macros.
[wine/multimedia.git] / dlls / usp10 / bidi.c
blob28252b6e499b69498402219bb5f3fa905e542a4f
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include "config.h"
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include "windef.h"
49 #include "winbase.h"
50 #include "wingdi.h"
51 #include "winnls.h"
52 #include "usp10.h"
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table[];
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
64 #define MAX_DEPTH 125
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
73 Note:
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
79 enum directions
81 /* input types */
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON = 0, /* Other Neutral */
84 L, /* Left Letter */
85 R, /* Right Letter */
86 AN, /* Arabic Number */
87 EN, /* European Number */
88 AL, /* Arabic Letter (Right-to-left) */
89 NSM, /* Non-spacing Mark */
90 CS, /* Common Separator */
91 ES, /* European Separator */
92 ET, /* European Terminator (post/prefix e.g. $ and %) */
94 /* resolved types */
95 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
97 /* input types, */
98 S, /* Segment Separator (TAB) // used only in L1 */
99 WS, /* White space // used only in L1 */
100 B, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO, /* these are used only in X1-X9 */
104 RLE,
105 LRO,
106 LRE,
107 PDF,
109 LRI, /* Isolate formatting characters new with 6.3 */
110 RLI,
111 FSI,
112 PDI,
114 /* resolved types, also resolved directions */
115 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
135 "RLE",
136 "LRO",
137 "LRE",
138 "PDF",
139 "LRI", /* Isolate formatting characters new with 6.3 */
140 "RLI",
141 "FSI",
142 "PDI",
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header, WORD *types, int start, int end)
149 int i, len = 0;
150 TRACE("%s:",header);
151 for (i = start; i < end && len < 200; i++)
153 TRACE(" %s",debug_type[types[i]]);
154 len += strlen(debug_type[types[i]])+1;
156 if (i != end)
157 TRACE("...");
158 TRACE("\n");
161 /* Convert the libwine information to the direction enum */
162 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
164 static const enum directions dir_map[16] =
166 L, /* unassigned defaults to L */
179 NSM,
181 PDF /* also LRE, LRO, RLE, RLO */
184 unsigned i;
186 for (i = 0; i < uCount; ++i)
188 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
189 switch (chartype[i])
191 case ES:
192 if (!c->fLegacyBidiClass) break;
193 switch (lpString[i])
195 case '-':
196 case '+': chartype[i] = NI; break;
197 case '/': chartype[i] = CS; break;
199 break;
200 case PDF:
201 switch (lpString[i])
203 case 0x202A: chartype[i] = LRE; break;
204 case 0x202B: chartype[i] = RLE; break;
205 case 0x202C: chartype[i] = PDF; break;
206 case 0x202D: chartype[i] = LRO; break;
207 case 0x202E: chartype[i] = RLO; break;
208 case 0x2066: chartype[i] = LRI; break;
209 case 0x2067: chartype[i] = RLI; break;
210 case 0x2068: chartype[i] = FSI; break;
211 case 0x2069: chartype[i] = PDI; break;
213 break;
218 /* RESOLVE EXPLICIT */
220 static WORD GreaterEven(int i)
222 return odd(i) ? i + 1 : i + 2;
225 static WORD GreaterOdd(int i)
227 return odd(i) ? i + 2 : i + 1;
230 static WORD EmbeddingDirection(int level)
232 return odd(level) ? R : L;
235 /*------------------------------------------------------------------------
236 Function: resolveExplicit
238 Recursively resolves explicit embedding levels and overrides.
239 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 Input: Base embedding level and direction
242 Character count
244 Output: Array of embedding levels
246 In/Out: Array of direction classes
249 Note: The function uses two simple counters to keep track of
250 matching explicit codes and PDF. Use the default argument for
251 the outermost call. The nesting counter counts the recursion
252 depth and not the embedding level.
253 ------------------------------------------------------------------------*/
254 typedef struct tagStackItem {
255 int level;
256 int override;
257 BOOL isolate;
258 } StackItem;
260 #define push_stack(l,o,i) \
261 do { stack_top--; \
262 stack[stack_top].level = l; \
263 stack[stack_top].override = o; \
264 stack[stack_top].isolate = i;} while(0)
266 #define pop_stack() do { stack_top++; } while(0)
268 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
272 /* X1 */
273 int overflow_isolate_count = 0;
274 int overflow_embedding_count = 0;
275 int valid_isolate_count = 0;
276 int i;
278 StackItem stack[MAX_DEPTH+2];
279 int stack_top = MAX_DEPTH+1;
281 stack[stack_top].level = level;
282 stack[stack_top].override = NI;
283 stack[stack_top].isolate = FALSE;
285 for (i = 0; i < count; i++)
287 /* X2 */
288 if (pclass[i] == RLE)
290 int least_odd = GreaterOdd(stack[stack_top].level);
291 poutLevel[i] = stack[stack_top].level;
292 if (valid_level(least_odd))
293 push_stack(least_odd, NI, FALSE);
294 else if (overflow_isolate_count == 0)
295 overflow_embedding_count++;
297 /* X3 */
298 else if (pclass[i] == LRE)
300 int least_even = GreaterEven(stack[stack_top].level);
301 poutLevel[i] = stack[stack_top].level;
302 if (valid_level(least_even))
303 push_stack(least_even, NI, FALSE);
304 else if (overflow_isolate_count == 0)
305 overflow_embedding_count++;
307 /* X4 */
308 else if (pclass[i] == RLO)
310 int least_odd = GreaterOdd(stack[stack_top].level);
311 poutLevel[i] = stack[stack_top].level;
312 if (valid_level(least_odd))
313 push_stack(least_odd, R, FALSE);
314 else if (overflow_isolate_count == 0)
315 overflow_embedding_count++;
317 /* X5 */
318 else if (pclass[i] == LRO)
320 int least_even = GreaterEven(stack[stack_top].level);
321 poutLevel[i] = stack[stack_top].level;
322 if (valid_level(least_even))
323 push_stack(least_even, L, FALSE);
324 else if (overflow_isolate_count == 0)
325 overflow_embedding_count++;
327 /* X5a */
328 else if (pclass[i] == RLI)
330 int least_odd = GreaterOdd(stack[stack_top].level);
331 poutLevel[i] = stack[stack_top].level;
332 if (valid_level(least_odd))
334 valid_isolate_count++;
335 push_stack(least_odd, NI, TRUE);
337 else
338 overflow_isolate_count++;
340 /* X5b */
341 else if (pclass[i] == LRI)
343 int least_even = GreaterEven(stack[stack_top].level);
344 poutLevel[i] = stack[stack_top].level;
345 if (valid_level(least_even))
347 valid_isolate_count++;
348 push_stack(least_even, NI, TRUE);
350 else
351 overflow_isolate_count++;
353 /* X5c */
354 else if (pclass[i] == FSI)
356 int j;
357 int new_level = 0;
358 int skipping = 0;
359 poutLevel[i] = stack[stack_top].level;
360 for (j = i+1; j < count; j++)
362 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
364 skipping++;
365 continue;
367 else if (pclass[j] == PDI)
369 if (skipping)
370 skipping --;
371 else
372 break;
373 continue;
376 if (skipping) continue;
378 if (pclass[j] == L)
380 new_level = 0;
381 break;
383 else if (pclass[j] == R || pclass[j] == AL)
385 new_level = 1;
386 break;
389 if (odd(new_level))
391 int least_odd = GreaterOdd(stack[stack_top].level);
392 if (valid_level(least_odd))
394 valid_isolate_count++;
395 push_stack(least_odd, NI, TRUE);
397 else
398 overflow_isolate_count++;
400 else
402 int least_even = GreaterEven(stack[stack_top].level);
403 if (valid_level(least_even))
405 valid_isolate_count++;
406 push_stack(least_even, NI, TRUE);
408 else
409 overflow_isolate_count++;
412 /* X6 */
413 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
415 poutLevel[i] = stack[stack_top].level;
416 if (stack[stack_top].override != NI)
417 pclass[i] = stack[stack_top].override;
419 /* X6a */
420 else if (pclass[i] == PDI)
422 if (overflow_isolate_count) overflow_isolate_count--;
423 else if (!valid_isolate_count) {/* do nothing */}
424 else
426 overflow_embedding_count = 0;
427 while (!stack[stack_top].isolate) pop_stack();
428 pop_stack();
429 valid_isolate_count --;
431 poutLevel[i] = stack[stack_top].level;
433 /* X7 */
434 else if (pclass[i] == PDF)
436 poutLevel[i] = stack[stack_top].level;
437 if (overflow_isolate_count) {/* do nothing */}
438 else if (overflow_embedding_count) overflow_embedding_count--;
439 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
440 pop_stack();
442 /* X8: Nothing */
444 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
445 for (i = 0; i < count ; i++)
446 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
447 pclass[i] = BN;
450 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
452 if (index == -1 || index == back_fence) return index;
453 index --;
454 while (index > back_fence && pcls[index] == BN) index --;
455 return index;
458 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
460 if (index == front_fence) return index;
461 index ++;
462 while (index < front_fence && pcls[index] == BN) index ++;
463 return index;
466 typedef struct tagRun
468 int start;
469 int end;
470 WORD e;
471 } Run;
473 typedef struct tagRunChar
475 WCHAR ch;
476 WORD *pcls;
477 } RunChar;
479 typedef struct tagIsolatedRun
481 struct list entry;
482 int length;
483 WORD sos;
484 WORD eos;
485 WORD e;
487 RunChar item[1];
488 } IsolatedRun;
490 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
492 if (index >= (iso_run->length-1)) return -1;
493 index ++;
494 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
495 if (index == iso_run->length) return -1;
496 return index;
499 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
502 if (index <= 0) return -1;
503 index --;
504 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
505 return index;
508 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
510 int i, len = 0;
511 TRACE("%s:",header);
512 TRACE("[ ");
513 for (i = 0; i < iso_run->length && len < 200; i++)
515 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
516 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
518 if (i != iso_run->length)
519 TRACE("...");
520 TRACE(" ]\n");
523 /*------------------------------------------------------------------------
524 Function: resolveWeak
526 Resolves the directionality of numeric and other weak character types
528 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
530 Input: Array of embedding levels
531 Character count
533 In/Out: Array of directional classes
535 Note: On input only these directional classes are expected
536 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
537 ------------------------------------------------------------------------*/
539 static void resolveWeak(IsolatedRun * iso_run)
541 int i;
543 /* W1 */
544 for (i=0; i < iso_run->length; i++)
546 if (*iso_run->item[i].pcls == NSM)
548 int j = iso_previousValidChar(iso_run, i);
549 if (j == -1)
550 *iso_run->item[i].pcls = iso_run->sos;
551 else if (*iso_run->item[j].pcls >= LRI)
552 *iso_run->item[i].pcls = ON;
553 else
554 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
558 /* W2 */
559 for (i = 0; i < iso_run->length; i++)
561 if (*iso_run->item[i].pcls == EN)
563 int j = iso_previousValidChar(iso_run, i);
564 while (j > -1)
566 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
568 if (*iso_run->item[j].pcls == AL)
569 *iso_run->item[i].pcls = AN;
570 break;
572 j = iso_previousValidChar(iso_run, j);
577 /* W3 */
578 for (i = 0; i < iso_run->length; i++)
580 if (*iso_run->item[i].pcls == AL)
581 *iso_run->item[i].pcls = R;
584 /* W4 */
585 for (i = 0; i < iso_run->length; i++)
587 if (*iso_run->item[i].pcls == ES)
589 int b = iso_previousValidChar(iso_run, i);
590 int f = iso_nextValidChar(iso_run, i);
592 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
593 *iso_run->item[i].pcls = EN;
595 else if (*iso_run->item[i].pcls == CS)
597 int b = iso_previousValidChar(iso_run, i);
598 int f = iso_nextValidChar(iso_run, i);
600 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
601 *iso_run->item[i].pcls = EN;
602 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
603 *iso_run->item[i].pcls = AN;
607 /* W5 */
608 for (i = 0; i < iso_run->length; i++)
610 if (*iso_run->item[i].pcls == ET)
612 int j;
613 for (j = i-1 ; j > -1; j--)
615 if (*iso_run->item[j].pcls == BN) continue;
616 if (*iso_run->item[j].pcls == ET) continue;
617 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
618 else break;
620 if (*iso_run->item[i].pcls == ET)
622 for (j = i+1; j < iso_run->length; j++)
624 if (*iso_run->item[j].pcls == BN) continue;
625 if (*iso_run->item[j].pcls == ET) continue;
626 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
627 else break;
633 /* W6 */
634 for (i = 0; i < iso_run->length; i++)
636 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
638 int b = i-1;
639 int f = i+1;
640 if (b > -1 && *iso_run->item[b].pcls == BN)
641 *iso_run->item[b].pcls = ON;
642 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
643 *iso_run->item[f].pcls = ON;
645 *iso_run->item[i].pcls = ON;
649 /* W7 */
650 for (i = 0; i < iso_run->length; i++)
652 if (*iso_run->item[i].pcls == EN)
654 int j;
655 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
656 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
658 if (*iso_run->item[j].pcls == L)
659 *iso_run->item[i].pcls = L;
660 break;
662 if (iso_run->sos == L && j == -1)
663 *iso_run->item[i].pcls = L;
668 typedef struct tagBracketPair
670 int start;
671 int end;
672 } BracketPair;
674 static int compr(const void *a, const void* b)
676 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
679 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
681 WCHAR *open_stack;
682 int *stack_index;
683 int stack_top = iso_run->length;
684 BracketPair *out = NULL;
685 int pair_count = 0;
686 int i;
688 open_stack = HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR) * iso_run->length);
689 stack_index = HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run->length);
691 for (i = 0; i < iso_run->length; i++)
693 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
694 if (ubv)
696 if (!out)
698 out = HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair));
699 out[0].start = -1;
702 if ((ubv >> 8) == 0)
704 stack_top --;
705 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
706 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
707 if (open_stack[stack_top] == 0x232A)
708 open_stack[stack_top] = 0x3009;
709 stack_index[stack_top] = i;
711 else if ((ubv >> 8) == 1)
713 int j;
714 if (stack_top == iso_run->length) continue;
715 for (j = stack_top; j < iso_run->length; j++)
717 WCHAR c = iso_run->item[i].ch;
718 if (c == 0x232A) c = 0x3009;
719 if (c == open_stack[j])
721 out[pair_count].start = stack_index[j];
722 out[pair_count].end = i;
723 pair_count++;
724 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
725 out[pair_count].start = -1;
726 stack_top = j+1;
727 break;
733 if (pair_count == 0)
735 HeapFree(GetProcessHeap(),0,out);
736 out = NULL;
738 else if (pair_count > 1)
739 qsort(out, pair_count, sizeof(BracketPair), compr);
741 HeapFree(GetProcessHeap(), 0, open_stack);
742 HeapFree(GetProcessHeap(), 0, stack_index);
743 return out;
746 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
748 /*------------------------------------------------------------------------
749 Function: resolveNeutrals
751 Resolves the directionality of neutral character types.
753 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
755 Input: Array of embedding levels
756 Character count
757 Baselevel
759 In/Out: Array of directional classes
761 Note: On input only these directional classes are expected
762 R, L, NI, AN, EN and BN
764 W8 resolves a number of ENs to L
765 ------------------------------------------------------------------------*/
766 static void resolveNeutrals(IsolatedRun *iso_run)
768 int i;
769 BracketPair *pairs = NULL;
771 /* Translate isolates into NI */
772 for (i = 0; i < iso_run->length; i++)
774 if (*iso_run->item[i].pcls >= LRI)
775 *iso_run->item[i].pcls = NI;
777 switch(*iso_run->item[i].pcls)
779 case B:
780 case S:
781 case WS: *iso_run->item[i].pcls = NI;
784 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
787 /* N0: Skipping bracketed pairs for now */
788 pairs = computeBracketPairs(iso_run);
789 if (pairs)
791 BracketPair *p = &pairs[0];
792 int i = 0;
793 while (p->start >= 0)
795 int j;
796 int e = EmbeddingDirection(iso_run->e);
797 int o = EmbeddingDirection(iso_run->e+1);
798 BOOL flag_o = FALSE;
799 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
801 /* N0.b */
802 for (j = p->start+1; j < p->end; j++)
804 if (N0_TYPE(*iso_run->item[j].pcls) == e)
806 *iso_run->item[p->start].pcls = e;
807 *iso_run->item[p->end].pcls = e;
808 break;
810 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
811 flag_o = TRUE;
813 /* N0.c */
814 if (j == p->end && flag_o)
816 for (j = p->start; j >= 0; j--)
818 if (N0_TYPE(*iso_run->item[j].pcls) == o)
820 *iso_run->item[p->start].pcls = o;
821 *iso_run->item[p->end].pcls = o;
822 break;
824 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
826 *iso_run->item[p->start].pcls = e;
827 *iso_run->item[p->end].pcls = e;
828 break;
831 if ( j < 0 )
833 *iso_run->item[p->start].pcls = iso_run->sos;
834 *iso_run->item[p->end].pcls = iso_run->sos;
838 i++;
839 p = &pairs[i];
841 HeapFree(GetProcessHeap(),0,pairs);
844 /* N1 */
845 for (i = 0; i < iso_run->length; i++)
847 WORD l,r;
849 if (*iso_run->item[i].pcls == NI)
851 int j;
852 int b = iso_previousValidChar(iso_run, i);
854 if (b == -1)
856 l = iso_run->sos;
857 b = 0;
859 else
861 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
862 l = R;
863 else if (*iso_run->item[b].pcls == L)
864 l = L;
865 else /* No string type */
866 continue;
868 j = iso_nextValidChar(iso_run, i);
869 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
871 if (j == -1)
873 r = iso_run->eos;
874 j = iso_run->length;
876 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
877 r = R;
878 else if (*iso_run->item[j].pcls == L)
879 r = L;
880 else /* No string type */
881 continue;
883 if (r == l)
885 for (b = i; b < j && b < iso_run->length; b++)
886 *iso_run->item[b].pcls = r;
891 /* N2 */
892 for (i = 0; i < iso_run->length; i++)
894 if (*iso_run->item[i].pcls == NI)
896 int b = i-1;
897 int f = i+1;
898 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
899 if (b > -1 && *iso_run->item[b].pcls == BN)
900 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
901 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
902 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
907 /*------------------------------------------------------------------------
908 Function: resolveImplicit
910 Recursively resolves implicit embedding levels.
911 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
913 Input: Array of direction classes
914 Character count
915 Base level
917 In/Out: Array of embedding levels
919 Note: levels may exceed 15 on output.
920 Accepted subset of direction classes
921 R, L, AN, EN
922 ------------------------------------------------------------------------*/
923 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
925 int i;
927 /* I1/2 */
928 for (i = sos; i <= eos; i++)
930 if (pcls[i] == BN)
931 continue;
933 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
934 ASSERT(pcls[i] < 5); /* "Out of range." */
936 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
937 plevel[i]++;
938 else if (!odd(plevel[i]) && pcls[i] == R)
939 plevel[i]++;
940 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
941 plevel[i]+=2;
945 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
947 int i;
949 /* L1 */
950 for (i = sos; i <= eos; i++)
952 if (pcls[i] == B || pcls[i] == S)
954 int j = i -1;
955 while (i > sos && j >= sos &&
956 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
957 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
958 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
959 plevel[j--] = baselevel;
960 plevel[i] = baselevel;
962 if (i == eos &&
963 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
964 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
965 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
967 int j = i;
968 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
969 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
970 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
971 plevel[j--] = baselevel;
976 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, LPCWSTR lpString, int uCount, struct list *set)
978 int run_start, run_end, i;
979 int run_count = 0;
980 Run *runs;
981 IsolatedRun *current_isolated;
983 runs = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(Run));
984 if (!runs) return;
986 list_init(set);
988 /* Build Runs */
989 run_start = 0;
990 while (run_start < uCount)
992 run_end = nextValidChar(pcls, run_start, uCount);
993 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
994 run_end --;
995 runs[run_count].start = run_start;
996 runs[run_count].end = run_end;
997 runs[run_count].e = pLevel[run_start];
998 run_start = nextValidChar(pcls, run_end, uCount);
999 run_count++;
1002 /* Build Isolating Runs */
1003 i = 0;
1004 while (i < run_count)
1006 int k = i;
1007 if (runs[k].start >= 0)
1009 int type_fence, real_end;
1010 int j;
1011 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(RunChar)*uCount);
1012 if (!current_isolated) break;
1014 run_start = runs[k].start;
1015 current_isolated->e = runs[k].e;
1016 current_isolated->length = (runs[k].end - runs[k].start)+1;
1018 for (j = 0; j < current_isolated->length; j++)
1020 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1021 current_isolated->item[j].ch = lpString[runs[k].start+j];
1024 run_end = runs[k].end;
1026 TRACE("{ [%i -- %i]",run_start, run_end);
1028 if (pcls[run_end] == BN)
1029 run_end = previousValidChar(pcls, run_end, runs[k].start);
1031 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1033 j = k+1;
1034 search:
1035 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1036 if (j < run_count && runs[i].e != runs[j].e)
1038 j++;
1039 goto search;
1042 if (j != run_count)
1044 int m;
1045 int l = current_isolated->length;
1047 current_isolated->length += (runs[j].end - runs[j].start)+1;
1048 for (m = 0; l < current_isolated->length; l++, m++)
1050 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1051 current_isolated->item[l].ch = lpString[runs[j].start+m];
1054 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1056 run_end = runs[j].end;
1057 if (pcls[run_end] == BN)
1058 run_end = previousValidChar(pcls, run_end, runs[i].start);
1059 runs[j].start = -1;
1060 k = j;
1062 else
1064 run_end = uCount;
1065 break;
1069 type_fence = previousValidChar(pcls, run_start, -1);
1071 if (type_fence == -1)
1072 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1073 else
1074 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1076 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1078 if (run_end == uCount)
1079 current_isolated->eos = current_isolated->sos;
1080 else
1082 /* eos could be an BN */
1083 if ( pcls[run_end] == BN )
1085 real_end = previousValidChar(pcls, run_end, run_start-1);
1086 if (real_end < run_start)
1087 real_end = run_start;
1089 else
1090 real_end = run_end;
1092 type_fence = nextValidChar(pcls, run_end, uCount);
1093 if (type_fence == uCount)
1094 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1095 else
1096 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1098 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1101 list_add_tail(set, &current_isolated->entry);
1102 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1104 i++;
1107 HeapFree(GetProcessHeap(), 0, runs);
1110 /*************************************************************
1111 * BIDI_DeterminLevels
1113 BOOL BIDI_DetermineLevels(
1114 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1115 INT uCount, /* [in] Number of WCHARs in string. */
1116 const SCRIPT_STATE *s,
1117 const SCRIPT_CONTROL *c,
1118 WORD *lpOutLevels /* [out] final string levels */
1121 WORD *chartype;
1122 unsigned baselevel = 0;
1123 struct list IsolatingRuns;
1124 IsolatedRun *iso_run, *next;
1126 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1128 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
1129 if (!chartype)
1131 WARN("Out of memory\n");
1132 return FALSE;
1135 baselevel = s->uBidiLevel;
1137 classify(lpString, chartype, uCount, c);
1138 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1140 /* resolve explicit */
1141 resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
1142 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1144 /* X10/BD13: Computer Isolating runs */
1145 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1147 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1149 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1151 /* resolve weak */
1152 resolveWeak(iso_run);
1153 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1155 /* resolve neutrals */
1156 resolveNeutrals(iso_run);
1157 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1159 list_remove(&iso_run->entry);
1160 HeapFree(GetProcessHeap(),0,iso_run);
1163 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1164 /* resolveImplicit */
1165 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1167 /* resolveResolvedLevels*/
1168 classify(lpString, chartype, uCount, c);
1169 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1171 HeapFree(GetProcessHeap(), 0, chartype);
1172 return TRUE;
1175 /* reverse cch indexes */
1176 static void reverse(int *pidx, int cch)
1178 int temp;
1179 int ich = 0;
1180 for (; ich < --cch; ich++)
1182 temp = pidx[ich];
1183 pidx[ich] = pidx[cch];
1184 pidx[cch] = temp;
1189 /*------------------------------------------------------------------------
1190 Functions: reorder/reorderLevel
1192 Recursively reorders the display string
1193 "From the highest level down, reverse all characters at that level and
1194 higher, down to the lowest odd level"
1196 Implements rule L2 of the Unicode bidi Algorithm.
1198 Input: Array of embedding levels
1199 Character count
1200 Flag enabling reversal (set to false by initial caller)
1202 In/Out: Text to reorder
1204 Note: levels may exceed 15 resp. 61 on input.
1206 Rule L3 - reorder combining marks is not implemented here
1207 Rule L4 - glyph mirroring is implemented as a display option below
1209 Note: this should be applied a line at a time
1210 -------------------------------------------------------------------------*/
1211 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1213 int ich = 0;
1215 /* true as soon as first odd level encountered */
1216 fReverse = fReverse || odd(level);
1218 for (; ich < cch; ich++)
1220 if (plevel[ich] < level)
1222 break;
1224 else if (plevel[ich] > level)
1226 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1227 cch - ich, fReverse) - 1;
1230 if (fReverse)
1232 reverse(pIndexs, ich);
1234 return ich;
1237 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1238 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1240 int ich = 0;
1241 int newlevel = -1;
1243 /* true as soon as first odd level encountered */
1244 fReverse = fReverse || odd(level);
1246 for (; ich < cch; ich++)
1248 if (plevel[ich] < level)
1249 break;
1250 else if (plevel[ich] > level)
1251 newlevel = ich;
1253 if (fReverse)
1255 reverse(pIndexs, ich);
1258 if (newlevel >= 0)
1260 ich = 0;
1261 for (; ich < cch; ich++)
1262 if (plevel[ich] < level)
1263 break;
1264 else if (plevel[ich] > level)
1265 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1266 cch - ich, fReverse) - 1;
1269 return ich;
1272 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1273 WORD* lpStrength)
1275 int i;
1276 classify(lpString, lpStrength, uCount, c);
1278 for ( i = 0; i < uCount; i++)
1280 switch(lpStrength[i])
1282 case L:
1283 case LRE:
1284 case LRO:
1285 case R:
1286 case AL:
1287 case RLE:
1288 case RLO:
1289 lpStrength[i] = BIDI_STRONG;
1290 break;
1291 case PDF:
1292 case EN:
1293 case ES:
1294 case ET:
1295 case AN:
1296 case CS:
1297 case BN:
1298 lpStrength[i] = BIDI_WEAK;
1299 break;
1300 case B:
1301 case S:
1302 case WS:
1303 case ON:
1304 default: /* Neutrals and NSM */
1305 lpStrength[i] = BIDI_NEUTRAL;
1308 return TRUE;