devenum: Implement IMoniker::GetClassID().
[wine.git] / dlls / usp10 / bidi.c
blob39839e0ee3771b7933bc4def67802d0d0c6f3286
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include "config.h"
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include "windef.h"
49 #include "winbase.h"
50 #include "wingdi.h"
51 #include "winnls.h"
52 #include "usp10.h"
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
64 #define MAX_DEPTH 125
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
73 Note:
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
79 enum directions
81 /* input types */
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON = 0, /* Other Neutral */
84 L, /* Left Letter */
85 R, /* Right Letter */
86 AN, /* Arabic Number */
87 EN, /* European Number */
88 AL, /* Arabic Letter (Right-to-left) */
89 NSM, /* Non-spacing Mark */
90 CS, /* Common Separator */
91 ES, /* European Separator */
92 ET, /* European Terminator (post/prefix e.g. $ and %) */
94 /* resolved types */
95 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
97 /* input types, */
98 S, /* Segment Separator (TAB) // used only in L1 */
99 WS, /* White space // used only in L1 */
100 B, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO, /* these are used only in X1-X9 */
104 RLE,
105 LRO,
106 LRE,
107 PDF,
109 LRI, /* Isolate formatting characters new with 6.3 */
110 RLI,
111 FSI,
112 PDI,
114 /* resolved types, also resolved directions */
115 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
135 "RLE",
136 "LRO",
137 "LRE",
138 "PDF",
139 "LRI", /* Isolate formatting characters new with 6.3 */
140 "RLI",
141 "FSI",
142 "PDI",
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header, WORD *types, int start, int end)
149 int i, len = 0;
150 TRACE("%s:",header);
151 for (i = start; i < end && len < 200; i++)
153 TRACE(" %s",debug_type[types[i]]);
154 len += strlen(debug_type[types[i]])+1;
156 if (i != end)
157 TRACE("...");
158 TRACE("\n");
161 /* Convert the libwine information to the direction enum */
162 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
164 static const enum directions dir_map[16] =
166 L, /* unassigned defaults to L */
179 NSM,
181 PDF /* also LRE, LRO, RLE, RLO */
184 unsigned i;
186 for (i = 0; i < uCount; ++i)
188 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
189 switch (chartype[i])
191 case ES:
192 if (!c->fLegacyBidiClass) break;
193 switch (lpString[i])
195 case '-':
196 case '+': chartype[i] = NI; break;
197 case '/': chartype[i] = CS; break;
199 break;
200 case PDF:
201 switch (lpString[i])
203 case 0x202A: chartype[i] = LRE; break;
204 case 0x202B: chartype[i] = RLE; break;
205 case 0x202C: chartype[i] = PDF; break;
206 case 0x202D: chartype[i] = LRO; break;
207 case 0x202E: chartype[i] = RLO; break;
208 case 0x2066: chartype[i] = LRI; break;
209 case 0x2067: chartype[i] = RLI; break;
210 case 0x2068: chartype[i] = FSI; break;
211 case 0x2069: chartype[i] = PDI; break;
213 break;
218 /* RESOLVE EXPLICIT */
220 static WORD GreaterEven(int i)
222 return odd(i) ? i + 1 : i + 2;
225 static WORD GreaterOdd(int i)
227 return odd(i) ? i + 2 : i + 1;
230 static WORD EmbeddingDirection(int level)
232 return odd(level) ? R : L;
235 /*------------------------------------------------------------------------
236 Function: resolveExplicit
238 Recursively resolves explicit embedding levels and overrides.
239 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 Input: Base embedding level and direction
242 Character count
244 Output: Array of embedding levels
246 In/Out: Array of direction classes
249 Note: The function uses two simple counters to keep track of
250 matching explicit codes and PDF. Use the default argument for
251 the outermost call. The nesting counter counts the recursion
252 depth and not the embedding level.
253 ------------------------------------------------------------------------*/
254 typedef struct tagStackItem {
255 int level;
256 int override;
257 BOOL isolate;
258 } StackItem;
260 #define push_stack(l,o,i) \
261 do { stack_top--; \
262 stack[stack_top].level = l; \
263 stack[stack_top].override = o; \
264 stack[stack_top].isolate = i;} while(0)
266 #define pop_stack() do { stack_top++; } while(0)
268 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
272 /* X1 */
273 int overflow_isolate_count = 0;
274 int overflow_embedding_count = 0;
275 int valid_isolate_count = 0;
276 int i;
278 StackItem stack[MAX_DEPTH+2];
279 int stack_top = MAX_DEPTH+1;
281 stack[stack_top].level = level;
282 stack[stack_top].override = NI;
283 stack[stack_top].isolate = FALSE;
285 if (initialOverride)
287 if (odd(level))
288 push_stack(level, R, FALSE);
289 else
290 push_stack(level, L, FALSE);
293 for (i = 0; i < count; i++)
295 poutOverrides[i] = stack[stack_top].override;
297 /* X2 */
298 if (pclass[i] == RLE)
300 int least_odd = GreaterOdd(stack[stack_top].level);
301 poutLevel[i] = stack[stack_top].level;
302 if (valid_level(least_odd))
303 push_stack(least_odd, NI, FALSE);
304 else if (overflow_isolate_count == 0)
305 overflow_embedding_count++;
307 /* X3 */
308 else if (pclass[i] == LRE)
310 int least_even = GreaterEven(stack[stack_top].level);
311 poutLevel[i] = stack[stack_top].level;
312 if (valid_level(least_even))
313 push_stack(least_even, NI, FALSE);
314 else if (overflow_isolate_count == 0)
315 overflow_embedding_count++;
317 /* X4 */
318 else if (pclass[i] == RLO)
320 int least_odd = GreaterOdd(stack[stack_top].level);
321 poutLevel[i] = stack[stack_top].level;
322 if (valid_level(least_odd))
323 push_stack(least_odd, R, FALSE);
324 else if (overflow_isolate_count == 0)
325 overflow_embedding_count++;
327 /* X5 */
328 else if (pclass[i] == LRO)
330 int least_even = GreaterEven(stack[stack_top].level);
331 poutLevel[i] = stack[stack_top].level;
332 if (valid_level(least_even))
333 push_stack(least_even, L, FALSE);
334 else if (overflow_isolate_count == 0)
335 overflow_embedding_count++;
337 /* X5a */
338 else if (pclass[i] == RLI)
340 int least_odd = GreaterOdd(stack[stack_top].level);
341 poutLevel[i] = stack[stack_top].level;
342 if (valid_level(least_odd))
344 valid_isolate_count++;
345 push_stack(least_odd, NI, TRUE);
347 else
348 overflow_isolate_count++;
350 /* X5b */
351 else if (pclass[i] == LRI)
353 int least_even = GreaterEven(stack[stack_top].level);
354 poutLevel[i] = stack[stack_top].level;
355 if (valid_level(least_even))
357 valid_isolate_count++;
358 push_stack(least_even, NI, TRUE);
360 else
361 overflow_isolate_count++;
363 /* X5c */
364 else if (pclass[i] == FSI)
366 int j;
367 int new_level = 0;
368 int skipping = 0;
369 poutLevel[i] = stack[stack_top].level;
370 for (j = i+1; j < count; j++)
372 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
374 skipping++;
375 continue;
377 else if (pclass[j] == PDI)
379 if (skipping)
380 skipping --;
381 else
382 break;
383 continue;
386 if (skipping) continue;
388 if (pclass[j] == L)
390 new_level = 0;
391 break;
393 else if (pclass[j] == R || pclass[j] == AL)
395 new_level = 1;
396 break;
399 if (odd(new_level))
401 int least_odd = GreaterOdd(stack[stack_top].level);
402 if (valid_level(least_odd))
404 valid_isolate_count++;
405 push_stack(least_odd, NI, TRUE);
407 else
408 overflow_isolate_count++;
410 else
412 int least_even = GreaterEven(stack[stack_top].level);
413 if (valid_level(least_even))
415 valid_isolate_count++;
416 push_stack(least_even, NI, TRUE);
418 else
419 overflow_isolate_count++;
422 /* X6 */
423 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
425 poutLevel[i] = stack[stack_top].level;
426 if (stack[stack_top].override != NI)
427 pclass[i] = stack[stack_top].override;
429 /* X6a */
430 else if (pclass[i] == PDI)
432 if (overflow_isolate_count) overflow_isolate_count--;
433 else if (!valid_isolate_count) {/* do nothing */}
434 else
436 overflow_embedding_count = 0;
437 while (!stack[stack_top].isolate) pop_stack();
438 pop_stack();
439 valid_isolate_count --;
441 poutLevel[i] = stack[stack_top].level;
443 /* X7 */
444 else if (pclass[i] == PDF)
446 poutLevel[i] = stack[stack_top].level;
447 if (overflow_isolate_count) {/* do nothing */}
448 else if (overflow_embedding_count) overflow_embedding_count--;
449 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
450 pop_stack();
452 /* X8: Nothing */
454 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
455 for (i = 0; i < count ; i++)
456 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
457 pclass[i] = BN;
460 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
462 if (index == -1 || index == back_fence) return index;
463 index --;
464 while (index > back_fence && pcls[index] == BN) index --;
465 return index;
468 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
470 if (index == front_fence) return index;
471 index ++;
472 while (index < front_fence && pcls[index] == BN) index ++;
473 return index;
476 typedef struct tagRun
478 int start;
479 int end;
480 WORD e;
481 } Run;
483 typedef struct tagRunChar
485 WCHAR ch;
486 WORD *pcls;
487 } RunChar;
489 typedef struct tagIsolatedRun
491 struct list entry;
492 int length;
493 WORD sos;
494 WORD eos;
495 WORD e;
497 RunChar item[1];
498 } IsolatedRun;
500 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
502 if (index >= (iso_run->length-1)) return -1;
503 index ++;
504 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
505 if (index == iso_run->length) return -1;
506 return index;
509 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
512 if (index <= 0) return -1;
513 index --;
514 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
515 return index;
518 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
520 int i, len = 0;
521 TRACE("%s:",header);
522 TRACE("[ ");
523 for (i = 0; i < iso_run->length && len < 200; i++)
525 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
526 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
528 if (i != iso_run->length)
529 TRACE("...");
530 TRACE(" ]\n");
533 /*------------------------------------------------------------------------
534 Function: resolveWeak
536 Resolves the directionality of numeric and other weak character types
538 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
540 Input: Array of embedding levels
541 Character count
543 In/Out: Array of directional classes
545 Note: On input only these directional classes are expected
546 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
547 ------------------------------------------------------------------------*/
549 static void resolveWeak(IsolatedRun * iso_run)
551 int i;
553 /* W1 */
554 for (i=0; i < iso_run->length; i++)
556 if (*iso_run->item[i].pcls == NSM)
558 int j = iso_previousValidChar(iso_run, i);
559 if (j == -1)
560 *iso_run->item[i].pcls = iso_run->sos;
561 else if (*iso_run->item[j].pcls >= LRI)
562 *iso_run->item[i].pcls = ON;
563 else
564 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
568 /* W2 */
569 for (i = 0; i < iso_run->length; i++)
571 if (*iso_run->item[i].pcls == EN)
573 int j = iso_previousValidChar(iso_run, i);
574 while (j > -1)
576 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
578 if (*iso_run->item[j].pcls == AL)
579 *iso_run->item[i].pcls = AN;
580 break;
582 j = iso_previousValidChar(iso_run, j);
587 /* W3 */
588 for (i = 0; i < iso_run->length; i++)
590 if (*iso_run->item[i].pcls == AL)
591 *iso_run->item[i].pcls = R;
594 /* W4 */
595 for (i = 0; i < iso_run->length; i++)
597 if (*iso_run->item[i].pcls == ES)
599 int b = iso_previousValidChar(iso_run, i);
600 int f = iso_nextValidChar(iso_run, i);
602 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
603 *iso_run->item[i].pcls = EN;
605 else if (*iso_run->item[i].pcls == CS)
607 int b = iso_previousValidChar(iso_run, i);
608 int f = iso_nextValidChar(iso_run, i);
610 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
611 *iso_run->item[i].pcls = EN;
612 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
613 *iso_run->item[i].pcls = AN;
617 /* W5 */
618 for (i = 0; i < iso_run->length; i++)
620 if (*iso_run->item[i].pcls == ET)
622 int j;
623 for (j = i-1 ; j > -1; j--)
625 if (*iso_run->item[j].pcls == BN) continue;
626 if (*iso_run->item[j].pcls == ET) continue;
627 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
628 else break;
630 if (*iso_run->item[i].pcls == ET)
632 for (j = i+1; j < iso_run->length; j++)
634 if (*iso_run->item[j].pcls == BN) continue;
635 if (*iso_run->item[j].pcls == ET) continue;
636 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
637 else break;
643 /* W6 */
644 for (i = 0; i < iso_run->length; i++)
646 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
648 int b = i-1;
649 int f = i+1;
650 if (b > -1 && *iso_run->item[b].pcls == BN)
651 *iso_run->item[b].pcls = ON;
652 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
653 *iso_run->item[f].pcls = ON;
655 *iso_run->item[i].pcls = ON;
659 /* W7 */
660 for (i = 0; i < iso_run->length; i++)
662 if (*iso_run->item[i].pcls == EN)
664 int j;
665 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
666 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
668 if (*iso_run->item[j].pcls == L)
669 *iso_run->item[i].pcls = L;
670 break;
672 if (iso_run->sos == L && j == -1)
673 *iso_run->item[i].pcls = L;
678 typedef struct tagBracketPair
680 int start;
681 int end;
682 } BracketPair;
684 static int compr(const void *a, const void* b)
686 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
689 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
691 WCHAR *open_stack;
692 int *stack_index;
693 int stack_top = iso_run->length;
694 BracketPair *out = NULL;
695 int pair_count = 0;
696 int i;
698 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
699 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
701 for (i = 0; i < iso_run->length; i++)
703 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
704 if (ubv)
706 if (!out)
708 out = heap_alloc(sizeof(*out));
709 out[0].start = -1;
712 if ((ubv >> 8) == 0)
714 stack_top --;
715 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
716 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
717 if (open_stack[stack_top] == 0x232A)
718 open_stack[stack_top] = 0x3009;
719 stack_index[stack_top] = i;
721 else if ((ubv >> 8) == 1)
723 int j;
724 if (stack_top == iso_run->length) continue;
725 for (j = stack_top; j < iso_run->length; j++)
727 WCHAR c = iso_run->item[i].ch;
728 if (c == 0x232A) c = 0x3009;
729 if (c == open_stack[j])
731 out[pair_count].start = stack_index[j];
732 out[pair_count].end = i;
733 pair_count++;
734 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
735 out[pair_count].start = -1;
736 stack_top = j+1;
737 break;
743 if (pair_count == 0)
745 heap_free(out);
746 out = NULL;
748 else if (pair_count > 1)
749 qsort(out, pair_count, sizeof(BracketPair), compr);
751 heap_free(open_stack);
752 heap_free(stack_index);
753 return out;
756 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
758 /*------------------------------------------------------------------------
759 Function: resolveNeutrals
761 Resolves the directionality of neutral character types.
763 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
765 Input: Array of embedding levels
766 Character count
767 Baselevel
769 In/Out: Array of directional classes
771 Note: On input only these directional classes are expected
772 R, L, NI, AN, EN and BN
774 W8 resolves a number of ENs to L
775 ------------------------------------------------------------------------*/
776 static void resolveNeutrals(IsolatedRun *iso_run)
778 int i;
779 BracketPair *pairs = NULL;
781 /* Translate isolates into NI */
782 for (i = 0; i < iso_run->length; i++)
784 if (*iso_run->item[i].pcls >= LRI)
785 *iso_run->item[i].pcls = NI;
787 switch(*iso_run->item[i].pcls)
789 case B:
790 case S:
791 case WS: *iso_run->item[i].pcls = NI;
794 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
797 /* N0: Skipping bracketed pairs for now */
798 pairs = computeBracketPairs(iso_run);
799 if (pairs)
801 BracketPair *p = &pairs[0];
802 int i = 0;
803 while (p->start >= 0)
805 int j;
806 int e = EmbeddingDirection(iso_run->e);
807 int o = EmbeddingDirection(iso_run->e+1);
808 BOOL flag_o = FALSE;
809 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
811 /* N0.b */
812 for (j = p->start+1; j < p->end; j++)
814 if (N0_TYPE(*iso_run->item[j].pcls) == e)
816 *iso_run->item[p->start].pcls = e;
817 *iso_run->item[p->end].pcls = e;
818 break;
820 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
821 flag_o = TRUE;
823 /* N0.c */
824 if (j == p->end && flag_o)
826 for (j = p->start; j >= 0; j--)
828 if (N0_TYPE(*iso_run->item[j].pcls) == o)
830 *iso_run->item[p->start].pcls = o;
831 *iso_run->item[p->end].pcls = o;
832 break;
834 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
836 *iso_run->item[p->start].pcls = e;
837 *iso_run->item[p->end].pcls = e;
838 break;
841 if ( j < 0 )
843 *iso_run->item[p->start].pcls = iso_run->sos;
844 *iso_run->item[p->end].pcls = iso_run->sos;
848 i++;
849 p = &pairs[i];
851 heap_free(pairs);
854 /* N1 */
855 for (i = 0; i < iso_run->length; i++)
857 WORD l,r;
859 if (*iso_run->item[i].pcls == NI)
861 int j;
862 int b = iso_previousValidChar(iso_run, i);
864 if (b == -1)
866 l = iso_run->sos;
867 b = 0;
869 else
871 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
872 l = R;
873 else if (*iso_run->item[b].pcls == L)
874 l = L;
875 else /* No string type */
876 continue;
878 j = iso_nextValidChar(iso_run, i);
879 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
881 if (j == -1)
883 r = iso_run->eos;
884 j = iso_run->length;
886 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
887 r = R;
888 else if (*iso_run->item[j].pcls == L)
889 r = L;
890 else /* No string type */
891 continue;
893 if (r == l)
895 for (b = i; b < j && b < iso_run->length; b++)
896 *iso_run->item[b].pcls = r;
901 /* N2 */
902 for (i = 0; i < iso_run->length; i++)
904 if (*iso_run->item[i].pcls == NI)
906 int b = i-1;
907 int f = i+1;
908 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
909 if (b > -1 && *iso_run->item[b].pcls == BN)
910 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
911 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
912 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
917 /*------------------------------------------------------------------------
918 Function: resolveImplicit
920 Recursively resolves implicit embedding levels.
921 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
923 Input: Array of direction classes
924 Character count
925 Base level
927 In/Out: Array of embedding levels
929 Note: levels may exceed 15 on output.
930 Accepted subset of direction classes
931 R, L, AN, EN
932 ------------------------------------------------------------------------*/
933 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
935 int i;
937 /* I1/2 */
938 for (i = sos; i <= eos; i++)
940 if (pcls[i] == BN)
941 continue;
943 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
944 ASSERT(pcls[i] < 5); /* "Out of range." */
946 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
947 plevel[i]++;
948 else if (!odd(plevel[i]) && pcls[i] == R)
949 plevel[i]++;
950 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
951 plevel[i]+=2;
955 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
957 int i;
959 /* L1 */
960 for (i = sos; i <= eos; i++)
962 if (pcls[i] == B || pcls[i] == S)
964 int j = i -1;
965 while (i > sos && j >= sos &&
966 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
967 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
968 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
969 plevel[j--] = baselevel;
970 plevel[i] = baselevel;
972 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
973 pcls[i] == PDF || pcls[i] == BN)
975 plevel[i] = i ? plevel[i - 1] : baselevel;
977 if (i == eos &&
978 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
979 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
980 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
982 int j = i;
983 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
984 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
985 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
986 plevel[j--] = baselevel;
991 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, LPCWSTR lpString, int uCount, struct list *set)
993 int run_start, run_end, i;
994 int run_count = 0;
995 Run *runs;
996 IsolatedRun *current_isolated;
998 if (!(runs = heap_alloc(uCount * sizeof(*runs))))
999 return;
1001 list_init(set);
1003 /* Build Runs */
1004 run_start = 0;
1005 while (run_start < uCount)
1007 run_end = nextValidChar(pcls, run_start, uCount);
1008 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1009 run_end --;
1010 runs[run_count].start = run_start;
1011 runs[run_count].end = run_end;
1012 runs[run_count].e = pLevel[run_start];
1013 run_start = nextValidChar(pcls, run_end, uCount);
1014 run_count++;
1017 /* Build Isolating Runs */
1018 i = 0;
1019 while (i < run_count)
1021 int k = i;
1022 if (runs[k].start >= 0)
1024 int type_fence, real_end;
1025 int j;
1027 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
1028 break;
1030 run_start = runs[k].start;
1031 current_isolated->e = runs[k].e;
1032 current_isolated->length = (runs[k].end - runs[k].start)+1;
1034 for (j = 0; j < current_isolated->length; j++)
1036 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1037 current_isolated->item[j].ch = lpString[runs[k].start+j];
1040 run_end = runs[k].end;
1042 TRACE("{ [%i -- %i]",run_start, run_end);
1044 if (pcls[run_end] == BN)
1045 run_end = previousValidChar(pcls, run_end, runs[k].start);
1047 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1049 j = k+1;
1050 search:
1051 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1052 if (j < run_count && runs[i].e != runs[j].e)
1054 j++;
1055 goto search;
1058 if (j != run_count)
1060 int m;
1061 int l = current_isolated->length;
1063 current_isolated->length += (runs[j].end - runs[j].start)+1;
1064 for (m = 0; l < current_isolated->length; l++, m++)
1066 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1067 current_isolated->item[l].ch = lpString[runs[j].start+m];
1070 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1072 run_end = runs[j].end;
1073 if (pcls[run_end] == BN)
1074 run_end = previousValidChar(pcls, run_end, runs[i].start);
1075 runs[j].start = -1;
1076 k = j;
1078 else
1080 run_end = uCount;
1081 break;
1085 type_fence = previousValidChar(pcls, run_start, -1);
1087 if (type_fence == -1)
1088 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1089 else
1090 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1092 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1094 if (run_end == uCount)
1095 current_isolated->eos = current_isolated->sos;
1096 else
1098 /* eos could be an BN */
1099 if ( pcls[run_end] == BN )
1101 real_end = previousValidChar(pcls, run_end, run_start-1);
1102 if (real_end < run_start)
1103 real_end = run_start;
1105 else
1106 real_end = run_end;
1108 type_fence = nextValidChar(pcls, run_end, uCount);
1109 if (type_fence == uCount)
1110 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1111 else
1112 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1114 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1117 list_add_tail(set, &current_isolated->entry);
1118 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1120 i++;
1123 heap_free(runs);
1126 /*************************************************************
1127 * BIDI_DeterminLevels
1129 BOOL BIDI_DetermineLevels(
1130 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1131 INT uCount, /* [in] Number of WCHARs in string. */
1132 const SCRIPT_STATE *s,
1133 const SCRIPT_CONTROL *c,
1134 WORD *lpOutLevels, /* [out] final string levels */
1135 WORD *lpOutOverrides /* [out] final string overrides */
1138 WORD *chartype;
1139 unsigned baselevel = 0;
1140 struct list IsolatingRuns;
1141 IsolatedRun *iso_run, *next;
1143 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1145 if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1147 WARN("Out of memory\n");
1148 return FALSE;
1151 baselevel = s->uBidiLevel;
1153 classify(lpString, chartype, uCount, c);
1154 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1156 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1158 /* resolve explicit */
1159 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1160 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1162 /* X10/BD13: Computer Isolating runs */
1163 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1165 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1167 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1169 /* resolve weak */
1170 resolveWeak(iso_run);
1171 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1173 /* resolve neutrals */
1174 resolveNeutrals(iso_run);
1175 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1177 list_remove(&iso_run->entry);
1178 heap_free(iso_run);
1181 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1182 /* resolveImplicit */
1183 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1185 /* resolveResolvedLevels*/
1186 classify(lpString, chartype, uCount, c);
1187 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1189 heap_free(chartype);
1190 return TRUE;
1193 /* reverse cch indexes */
1194 static void reverse(int *pidx, int cch)
1196 int temp;
1197 int ich = 0;
1198 for (; ich < --cch; ich++)
1200 temp = pidx[ich];
1201 pidx[ich] = pidx[cch];
1202 pidx[cch] = temp;
1207 /*------------------------------------------------------------------------
1208 Functions: reorder/reorderLevel
1210 Recursively reorders the display string
1211 "From the highest level down, reverse all characters at that level and
1212 higher, down to the lowest odd level"
1214 Implements rule L2 of the Unicode bidi Algorithm.
1216 Input: Array of embedding levels
1217 Character count
1218 Flag enabling reversal (set to false by initial caller)
1220 In/Out: Text to reorder
1222 Note: levels may exceed 15 resp. 61 on input.
1224 Rule L3 - reorder combining marks is not implemented here
1225 Rule L4 - glyph mirroring is implemented as a display option below
1227 Note: this should be applied a line at a time
1228 -------------------------------------------------------------------------*/
1229 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1231 int ich = 0;
1233 /* true as soon as first odd level encountered */
1234 fReverse = fReverse || odd(level);
1236 for (; ich < cch; ich++)
1238 if (plevel[ich] < level)
1240 break;
1242 else if (plevel[ich] > level)
1244 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1245 cch - ich, fReverse) - 1;
1248 if (fReverse)
1250 reverse(pIndexs, ich);
1252 return ich;
1255 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1256 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1258 int ich = 0;
1259 int newlevel = -1;
1261 /* true as soon as first odd level encountered */
1262 fReverse = fReverse || odd(level);
1264 for (; ich < cch; ich++)
1266 if (plevel[ich] < level)
1267 break;
1268 else if (plevel[ich] > level)
1269 newlevel = ich;
1271 if (fReverse)
1273 reverse(pIndexs, ich);
1276 if (newlevel >= 0)
1278 ich = 0;
1279 for (; ich < cch; ich++)
1280 if (plevel[ich] < level)
1281 break;
1282 else if (plevel[ich] > level)
1283 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1284 cch - ich, fReverse) - 1;
1287 return ich;
1290 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1291 WORD* lpStrength)
1293 int i;
1294 classify(lpString, lpStrength, uCount, c);
1296 for ( i = 0; i < uCount; i++)
1298 switch(lpStrength[i])
1300 case L:
1301 case LRE:
1302 case LRO:
1303 case R:
1304 case AL:
1305 case RLE:
1306 case RLO:
1307 lpStrength[i] = BIDI_STRONG;
1308 break;
1309 case PDF:
1310 case EN:
1311 case ES:
1312 case ET:
1313 case AN:
1314 case CS:
1315 case BN:
1316 lpStrength[i] = BIDI_WEAK;
1317 break;
1318 case B:
1319 case S:
1320 case WS:
1321 case ON:
1322 default: /* Neutrals and NSM */
1323 lpStrength[i] = BIDI_NEUTRAL;
1326 return TRUE;