kernel32/tests: Ensure that pipe tests read the full pipe content.
[wine/wine-gecko.git] / dlls / usp10 / bidi.c
blob5b227d81f6b9722273b0f3eef5076e06782aa16d
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include "config.h"
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include "windef.h"
49 #include "winbase.h"
50 #include "wingdi.h"
51 #include "winnls.h"
52 #include "usp10.h"
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table[];
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
64 #define MAX_DEPTH 125
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
73 Note:
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
79 enum directions
81 /* input types */
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON = 0, /* Other Neutral */
84 L, /* Left Letter */
85 R, /* Right Letter */
86 AN, /* Arabic Number */
87 EN, /* European Number */
88 AL, /* Arabic Letter (Right-to-left) */
89 NSM, /* Non-spacing Mark */
90 CS, /* Common Separator */
91 ES, /* European Separator */
92 ET, /* European Terminator (post/prefix e.g. $ and %) */
94 /* resolved types */
95 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
97 /* input types, */
98 S, /* Segment Separator (TAB) // used only in L1 */
99 WS, /* White space // used only in L1 */
100 B, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO, /* these are used only in X1-X9 */
104 RLE,
105 LRO,
106 LRE,
107 PDF,
109 LRI, /* Isolate formatting characters new with 6.3 */
110 RLI,
111 FSI,
112 PDI,
114 /* resolved types, also resolved directions */
115 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
135 "RLE",
136 "LRO",
137 "LRE",
138 "PDF",
139 "LRI", /* Isolate formatting characters new with 6.3 */
140 "RLI",
141 "FSI",
142 "PDI",
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header, WORD *types, int start, int end)
149 int i, len = 0;
150 TRACE("%s:",header);
151 for (i = start; i < end && len < 200; i++)
153 TRACE(" %s",debug_type[types[i]]);
154 len += strlen(debug_type[types[i]])+1;
156 if (i != end)
157 TRACE("...");
158 TRACE("\n");
161 /* Convert the libwine information to the direction enum */
162 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
164 static const enum directions dir_map[16] =
166 L, /* unassigned defaults to L */
179 NSM,
181 PDF /* also LRE, LRO, RLE, RLO */
184 unsigned i;
186 for (i = 0; i < uCount; ++i)
188 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
189 switch (chartype[i])
191 case ES:
192 if (!c->fLegacyBidiClass) break;
193 switch (lpString[i])
195 case '-':
196 case '+': chartype[i] = NI; break;
197 case '/': chartype[i] = CS; break;
199 break;
200 case PDF:
201 switch (lpString[i])
203 case 0x202A: chartype[i] = LRE; break;
204 case 0x202B: chartype[i] = RLE; break;
205 case 0x202C: chartype[i] = PDF; break;
206 case 0x202D: chartype[i] = LRO; break;
207 case 0x202E: chartype[i] = RLO; break;
208 case 0x2066: chartype[i] = LRI; break;
209 case 0x2067: chartype[i] = RLI; break;
210 case 0x2068: chartype[i] = FSI; break;
211 case 0x2069: chartype[i] = PDI; break;
213 break;
218 /* RESOLVE EXPLICIT */
220 static WORD GreaterEven(int i)
222 return odd(i) ? i + 1 : i + 2;
225 static WORD GreaterOdd(int i)
227 return odd(i) ? i + 2 : i + 1;
230 static WORD EmbeddingDirection(int level)
232 return odd(level) ? R : L;
235 /*------------------------------------------------------------------------
236 Function: resolveExplicit
238 Recursively resolves explicit embedding levels and overrides.
239 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 Input: Base embedding level and direction
242 Character count
244 Output: Array of embedding levels
246 In/Out: Array of direction classes
249 Note: The function uses two simple counters to keep track of
250 matching explicit codes and PDF. Use the default argument for
251 the outermost call. The nesting counter counts the recursion
252 depth and not the embedding level.
253 ------------------------------------------------------------------------*/
254 typedef struct tagStackItem {
255 int level;
256 int override;
257 BOOL isolate;
258 } StackItem;
260 #define push_stack(l,o,i) \
261 do { stack_top--; \
262 stack[stack_top].level = l; \
263 stack[stack_top].override = o; \
264 stack[stack_top].isolate = i;} while(0)
266 #define pop_stack() do { stack_top++; } while(0)
268 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
272 /* X1 */
273 int overflow_isolate_count = 0;
274 int overflow_embedding_count = 0;
275 int valid_isolate_count = 0;
276 int i;
278 StackItem stack[MAX_DEPTH+2];
279 int stack_top = MAX_DEPTH+1;
281 stack[stack_top].level = level;
282 stack[stack_top].override = NI;
283 stack[stack_top].isolate = FALSE;
285 for (i = 0; i < count; i++)
287 /* X2 */
288 if (pclass[i] == RLE)
290 int least_odd = GreaterOdd(stack[stack_top].level);
291 poutLevel[i] = stack[stack_top].level;
292 if (valid_level(least_odd))
293 push_stack(least_odd, NI, FALSE);
294 else if (overflow_isolate_count == 0)
295 overflow_embedding_count++;
297 /* X3 */
298 else if (pclass[i] == LRE)
300 int least_even = GreaterEven(stack[stack_top].level);
301 poutLevel[i] = stack[stack_top].level;
302 if (valid_level(least_even))
303 push_stack(least_even, NI, FALSE);
304 else if (overflow_isolate_count == 0)
305 overflow_embedding_count++;
307 /* X4 */
308 else if (pclass[i] == RLO)
310 int least_odd = GreaterOdd(stack[stack_top].level);
311 poutLevel[i] = stack[stack_top].level;
312 if (valid_level(least_odd))
313 push_stack(least_odd, R, FALSE);
314 else if (overflow_isolate_count == 0)
315 overflow_embedding_count++;
317 /* X5 */
318 else if (pclass[i] == LRO)
320 int least_even = GreaterEven(stack[stack_top].level);
321 poutLevel[i] = stack[stack_top].level;
322 if (valid_level(least_even))
323 push_stack(least_even, L, FALSE);
324 else if (overflow_isolate_count == 0)
325 overflow_embedding_count++;
327 /* X5a */
328 else if (pclass[i] == RLI)
330 int least_odd = GreaterOdd(stack[stack_top].level);
331 poutLevel[i] = stack[stack_top].level;
332 if (valid_level(least_odd))
334 valid_isolate_count++;
335 push_stack(least_odd, NI, TRUE);
337 else
338 overflow_isolate_count++;
340 /* X5b */
341 else if (pclass[i] == LRI)
343 int least_even = GreaterEven(stack[stack_top].level);
344 poutLevel[i] = stack[stack_top].level;
345 if (valid_level(least_even))
347 valid_isolate_count++;
348 push_stack(least_even, NI, TRUE);
350 else
351 overflow_isolate_count++;
353 /* X5c */
354 else if (pclass[i] == FSI)
356 int j;
357 int new_level = 0;
358 int skipping = 0;
359 poutLevel[i] = stack[stack_top].level;
360 for (j = i+1; j < count; j++)
362 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
364 skipping++;
365 continue;
367 else if (pclass[j] == PDI)
369 if (skipping)
370 skipping --;
371 else
372 break;
373 continue;
376 if (skipping) continue;
378 if (pclass[j] == L)
380 new_level = 0;
381 break;
383 else if (pclass[j] == R || pclass[j] == AL)
385 new_level = 1;
386 break;
389 if (odd(new_level))
391 int least_odd = GreaterOdd(stack[stack_top].level);
392 if (valid_level(least_odd))
394 valid_isolate_count++;
395 push_stack(least_odd, NI, TRUE);
397 else
398 overflow_isolate_count++;
400 else
402 int least_even = GreaterEven(stack[stack_top].level);
403 if (valid_level(least_even))
405 valid_isolate_count++;
406 push_stack(least_even, NI, TRUE);
408 else
409 overflow_isolate_count++;
412 /* X6 */
413 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
415 poutLevel[i] = stack[stack_top].level;
416 if (stack[stack_top].override != NI)
417 pclass[i] = stack[stack_top].override;
419 /* X6a */
420 else if (pclass[i] == PDI)
422 if (overflow_isolate_count) overflow_isolate_count--;
423 else if (!valid_isolate_count) {/* do nothing */}
424 else
426 overflow_embedding_count = 0;
427 while (!stack[stack_top].isolate) pop_stack();
428 pop_stack();
429 valid_isolate_count --;
431 poutLevel[i] = stack[stack_top].level;
433 /* X7 */
434 else if (pclass[i] == PDF)
436 poutLevel[i] = stack[stack_top].level;
437 if (overflow_isolate_count) {/* do nothing */}
438 else if (overflow_embedding_count) overflow_embedding_count--;
439 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
440 pop_stack();
442 /* X8: Nothing */
444 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
445 for (i = 0; i < count ; i++)
446 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
447 pclass[i] = BN;
450 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
452 if (index == -1 || index == back_fence) return index;
453 index --;
454 while (index > back_fence && pcls[index] == BN) index --;
455 return index;
458 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
460 if (index == front_fence) return index;
461 index ++;
462 while (index < front_fence && pcls[index] == BN) index ++;
463 return index;
466 typedef struct tagRun
468 int start;
469 int end;
470 WORD e;
471 } Run;
473 typedef struct tagRunChar
475 WCHAR ch;
476 WORD *pcls;
477 } RunChar;
479 typedef struct tagIsolatedRun
481 struct list entry;
482 int length;
483 WORD sos;
484 WORD eos;
485 WORD e;
487 RunChar item[1];
488 } IsolatedRun;
490 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
492 if (index >= (iso_run->length-1)) return -1;
493 index ++;
494 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
495 if (index == iso_run->length) return -1;
496 return index;
499 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
502 if (index <= 0) return -1;
503 index --;
504 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
505 return index;
508 static inline int iso_previousChar(IsolatedRun *iso_run, int index)
510 if (index <= 0) return -1;
511 return index --;
514 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
516 int i, len = 0;
517 TRACE("%s:",header);
518 TRACE("[ ");
519 for (i = 0; i < iso_run->length && len < 200; i++)
521 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
522 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
524 if (i != iso_run->length)
525 TRACE("...");
526 TRACE(" ]\n");
529 /*------------------------------------------------------------------------
530 Function: resolveWeak
532 Resolves the directionality of numeric and other weak character types
534 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
536 Input: Array of embedding levels
537 Character count
539 In/Out: Array of directional classes
541 Note: On input only these directional classes are expected
542 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
543 ------------------------------------------------------------------------*/
545 static void resolveWeak(IsolatedRun * iso_run)
547 int i;
549 /* W1 */
550 for (i=0; i < iso_run->length; i++)
552 if (*iso_run->item[i].pcls == NSM)
554 int j = iso_previousValidChar(iso_run, i);
555 if (j == -1)
556 *iso_run->item[i].pcls = iso_run->sos;
557 else if (*iso_run->item[j].pcls >= LRI)
558 *iso_run->item[i].pcls = ON;
559 else
560 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
564 /* W2 */
565 for (i = 0; i < iso_run->length; i++)
567 if (*iso_run->item[i].pcls == EN)
569 int j = iso_previousValidChar(iso_run, i);
570 while (j > -1)
572 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
574 if (*iso_run->item[j].pcls == AL)
575 *iso_run->item[i].pcls = AN;
576 break;
578 j = iso_previousValidChar(iso_run, j);
583 /* W3 */
584 for (i = 0; i < iso_run->length; i++)
586 if (*iso_run->item[i].pcls == AL)
587 *iso_run->item[i].pcls = R;
590 /* W4 */
591 for (i = 0; i < iso_run->length; i++)
593 if (*iso_run->item[i].pcls == ES)
595 int b = iso_previousValidChar(iso_run, i);
596 int f = iso_nextValidChar(iso_run, i);
598 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
599 *iso_run->item[i].pcls = EN;
601 else if (*iso_run->item[i].pcls == CS)
603 int b = iso_previousValidChar(iso_run, i);
604 int f = iso_nextValidChar(iso_run, i);
606 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
607 *iso_run->item[i].pcls = EN;
608 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
609 *iso_run->item[i].pcls = AN;
613 /* W5 */
614 for (i = 0; i < iso_run->length; i++)
616 if (*iso_run->item[i].pcls == ET)
618 int j;
619 for (j = i-1 ; j > -1; j--)
621 if (*iso_run->item[j].pcls == BN) continue;
622 if (*iso_run->item[j].pcls == ET) continue;
623 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
624 else break;
626 if (*iso_run->item[i].pcls == ET)
628 for (j = i+1; j < iso_run->length; j++)
630 if (*iso_run->item[j].pcls == BN) continue;
631 if (*iso_run->item[j].pcls == ET) continue;
632 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
633 else break;
639 /* W6 */
640 for (i = 0; i < iso_run->length; i++)
642 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
644 int b = i-1;
645 int f = i+1;
646 if (b > -1 && *iso_run->item[b].pcls == BN)
647 *iso_run->item[b].pcls = ON;
648 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
649 *iso_run->item[f].pcls = ON;
651 *iso_run->item[i].pcls = ON;
655 /* W7 */
656 for (i = 0; i < iso_run->length; i++)
658 if (*iso_run->item[i].pcls == EN)
660 int j;
661 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
662 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
664 if (*iso_run->item[j].pcls == L)
665 *iso_run->item[i].pcls = L;
666 break;
668 if (iso_run->sos == L && j == -1)
669 *iso_run->item[i].pcls = L;
674 typedef struct tagBracketPair
676 int start;
677 int end;
678 } BracketPair;
680 static int compr(const void *a, const void* b)
682 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
685 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
687 WCHAR *open_stack;
688 int *stack_index;
689 int stack_top = iso_run->length;
690 BracketPair *out = NULL;
691 int pair_count = 0;
692 int i;
694 open_stack = HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR) * iso_run->length);
695 stack_index = HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run->length);
697 for (i = 0; i < iso_run->length; i++)
699 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
700 if (ubv)
702 if (!out)
704 out = HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair));
705 out[0].start = -1;
708 if ((ubv >> 8) == 0)
710 stack_top --;
711 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
712 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
713 if (open_stack[stack_top] == 0x232A)
714 open_stack[stack_top] = 0x3009;
715 stack_index[stack_top] = i;
717 else if ((ubv >> 8) == 1)
719 int j;
720 if (stack_top == iso_run->length) continue;
721 for (j = stack_top; j < iso_run->length; j++)
723 WCHAR c = iso_run->item[i].ch;
724 if (c == 0x232A) c = 0x3009;
725 if (c == open_stack[j])
727 out[pair_count].start = stack_index[j];
728 out[pair_count].end = i;
729 pair_count++;
730 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
731 out[pair_count].start = -1;
732 stack_top = j+1;
733 break;
739 if (pair_count == 0)
741 HeapFree(GetProcessHeap(),0,out);
742 out = NULL;
744 else if (pair_count > 1)
745 qsort(out, pair_count, sizeof(BracketPair), compr);
747 HeapFree(GetProcessHeap(), 0, open_stack);
748 HeapFree(GetProcessHeap(), 0, stack_index);
749 return out;
752 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
754 /*------------------------------------------------------------------------
755 Function: resolveNeutrals
757 Resolves the directionality of neutral character types.
759 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
761 Input: Array of embedding levels
762 Character count
763 Baselevel
765 In/Out: Array of directional classes
767 Note: On input only these directional classes are expected
768 R, L, NI, AN, EN and BN
770 W8 resolves a number of ENs to L
771 ------------------------------------------------------------------------*/
772 static void resolveNeutrals(IsolatedRun *iso_run)
774 int i;
775 BracketPair *pairs = NULL;
777 /* Translate isolates into NI */
778 for (i = 0; i < iso_run->length; i++)
780 if (*iso_run->item[i].pcls >= LRI)
781 *iso_run->item[i].pcls = NI;
783 switch(*iso_run->item[i].pcls)
785 case B:
786 case S:
787 case WS: *iso_run->item[i].pcls = NI;
790 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
793 /* N0: Skipping bracketed pairs for now */
794 pairs = computeBracketPairs(iso_run);
795 if (pairs)
797 BracketPair *p = &pairs[0];
798 int i = 0;
799 while (p->start >= 0)
801 int j;
802 int e = EmbeddingDirection(iso_run->e);
803 int o = EmbeddingDirection(iso_run->e+1);
804 BOOL flag_o = FALSE;
805 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
807 /* N0.b */
808 for (j = p->start+1; j < p->end; j++)
810 if (N0_TYPE(*iso_run->item[j].pcls) == e)
812 *iso_run->item[p->start].pcls = e;
813 *iso_run->item[p->end].pcls = e;
814 break;
816 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
817 flag_o = TRUE;
819 /* N0.c */
820 if (j == p->end && flag_o)
822 for (j = p->start; j >= 0; j--)
824 if (N0_TYPE(*iso_run->item[j].pcls) == o)
826 *iso_run->item[p->start].pcls = o;
827 *iso_run->item[p->end].pcls = o;
828 break;
830 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
832 *iso_run->item[p->start].pcls = e;
833 *iso_run->item[p->end].pcls = e;
834 break;
837 if ( j < 0 )
839 *iso_run->item[p->start].pcls = iso_run->sos;
840 *iso_run->item[p->end].pcls = iso_run->sos;
844 i++;
845 p = &pairs[i];
847 HeapFree(GetProcessHeap(),0,pairs);
850 /* N1 */
851 for (i = 0; i < iso_run->length; i++)
853 WORD l,r;
855 if (*iso_run->item[i].pcls == NI)
857 int j;
858 int b = iso_previousValidChar(iso_run, i);
860 if (b == -1)
862 l = iso_run->sos;
863 b = 0;
865 else
867 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
868 l = R;
869 else if (*iso_run->item[b].pcls == L)
870 l = L;
871 else /* No string type */
872 continue;
874 j = iso_nextValidChar(iso_run, i);
875 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
877 if (j == -1)
879 r = iso_run->eos;
880 j = iso_run->length;
882 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
883 r = R;
884 else if (*iso_run->item[j].pcls == L)
885 r = L;
886 else /* No string type */
887 continue;
889 if (r == l)
891 for (b = i; b < j && b < iso_run->length; b++)
892 *iso_run->item[b].pcls = r;
897 /* N2 */
898 for (i = 0; i < iso_run->length; i++)
900 if (*iso_run->item[i].pcls == NI)
902 int b = i-1;
903 int f = i+1;
904 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
905 if (b > -1 && *iso_run->item[b].pcls == BN)
906 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
907 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
908 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
913 /*------------------------------------------------------------------------
914 Function: resolveImplicit
916 Recursively resolves implicit embedding levels.
917 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
919 Input: Array of direction classes
920 Character count
921 Base level
923 In/Out: Array of embedding levels
925 Note: levels may exceed 15 on output.
926 Accepted subset of direction classes
927 R, L, AN, EN
928 ------------------------------------------------------------------------*/
929 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
931 int i;
933 /* I1/2 */
934 for (i = sos; i <= eos; i++)
936 if (pcls[i] == BN)
937 continue;
939 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
940 ASSERT(pcls[i] < 5); /* "Out of range." */
942 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
943 plevel[i]++;
944 else if (!odd(plevel[i]) && pcls[i] == R)
945 plevel[i]++;
946 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
947 plevel[i]+=2;
951 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
953 int i;
955 /* L1 */
956 for (i = sos; i <= eos; i++)
958 if (pcls[i] == B || pcls[i] == S)
960 int j = i -1;
961 while (i > sos && j >= sos &&
962 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
963 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
964 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
965 plevel[j--] = baselevel;
966 plevel[i] = baselevel;
968 if (i == eos &&
969 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
970 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
971 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
973 int j = i;
974 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
975 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
976 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
977 plevel[j--] = baselevel;
982 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, LPCWSTR lpString, int uCount, struct list *set)
984 int run_start, run_end, i;
985 int run_count = 0;
986 Run *runs;
987 IsolatedRun *current_isolated;
989 runs = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(Run));
990 if (!runs) return;
992 list_init(set);
994 /* Build Runs */
995 run_start = 0;
996 while (run_start < uCount)
998 run_end = nextValidChar(pcls, run_start, uCount);
999 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1000 run_end --;
1001 runs[run_count].start = run_start;
1002 runs[run_count].end = run_end;
1003 runs[run_count].e = pLevel[run_start];
1004 run_start = nextValidChar(pcls, run_end, uCount);
1005 run_count++;
1008 /* Build Isolating Runs */
1009 i = 0;
1010 while (i < run_count)
1012 int k = i;
1013 if (runs[k].start >= 0)
1015 int type_fence, real_end;
1016 int j;
1017 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(RunChar)*uCount);
1018 if (!current_isolated) break;
1020 run_start = runs[k].start;
1021 current_isolated->e = runs[k].e;
1022 current_isolated->length = (runs[k].end - runs[k].start)+1;
1024 for (j = 0; j < current_isolated->length; j++)
1026 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1027 current_isolated->item[j].ch = lpString[runs[k].start+j];
1030 run_end = runs[k].end;
1032 TRACE("{ [%i -- %i]",run_start, run_end);
1034 if (pcls[run_end] == BN)
1035 run_end = previousValidChar(pcls, run_end, runs[k].start);
1037 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1039 j = k+1;
1040 search:
1041 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1042 if (j < run_count && runs[i].e != runs[j].e)
1044 j++;
1045 goto search;
1048 if (j != run_count)
1050 int m;
1051 int l = current_isolated->length;
1053 current_isolated->length += (runs[j].end - runs[j].start)+1;
1054 for (m = 0; l < current_isolated->length; l++, m++)
1056 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1057 current_isolated->item[l].ch = lpString[runs[j].start+m];
1060 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1062 run_end = runs[j].end;
1063 if (pcls[run_end] == BN)
1064 run_end = previousValidChar(pcls, run_end, runs[i].start);
1065 runs[j].start = -1;
1066 k = j;
1068 else
1070 run_end = uCount;
1071 break;
1075 type_fence = previousValidChar(pcls, run_start, -1);
1077 if (type_fence == -1)
1078 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1079 else
1080 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1082 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1084 if (run_end == uCount)
1085 current_isolated->eos = current_isolated->sos;
1086 else
1088 /* eos could be an BN */
1089 if ( pcls[run_end] == BN )
1091 real_end = previousValidChar(pcls, run_end, run_start-1);
1092 if (real_end < run_start)
1093 real_end = run_start;
1095 else
1096 real_end = run_end;
1098 type_fence = nextValidChar(pcls, run_end, uCount);
1099 if (type_fence == uCount)
1100 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1101 else
1102 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1104 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1107 list_add_tail(set, &current_isolated->entry);
1108 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1110 i++;
1113 HeapFree(GetProcessHeap(), 0, runs);
1116 /*************************************************************
1117 * BIDI_DeterminLevels
1119 BOOL BIDI_DetermineLevels(
1120 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1121 INT uCount, /* [in] Number of WCHARs in string. */
1122 const SCRIPT_STATE *s,
1123 const SCRIPT_CONTROL *c,
1124 WORD *lpOutLevels /* [out] final string levels */
1127 WORD *chartype;
1128 unsigned baselevel = 0;
1129 struct list IsolatingRuns;
1130 IsolatedRun *iso_run, *next;
1132 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1134 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
1135 if (!chartype)
1137 WARN("Out of memory\n");
1138 return FALSE;
1141 baselevel = s->uBidiLevel;
1143 classify(lpString, chartype, uCount, c);
1144 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1146 /* resolve explicit */
1147 resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
1148 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1150 /* X10/BD13: Computer Isolating runs */
1151 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1153 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1155 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1157 /* resolve weak */
1158 resolveWeak(iso_run);
1159 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1161 /* resolve neutrals */
1162 resolveNeutrals(iso_run);
1163 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1165 list_remove(&iso_run->entry);
1166 HeapFree(GetProcessHeap(),0,iso_run);
1169 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1170 /* resolveImplicit */
1171 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1173 /* resolveResolvedLevels*/
1174 classify(lpString, chartype, uCount, c);
1175 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1177 HeapFree(GetProcessHeap(), 0, chartype);
1178 return TRUE;
1181 /* reverse cch indexes */
1182 static void reverse(int *pidx, int cch)
1184 int temp;
1185 int ich = 0;
1186 for (; ich < --cch; ich++)
1188 temp = pidx[ich];
1189 pidx[ich] = pidx[cch];
1190 pidx[cch] = temp;
1195 /*------------------------------------------------------------------------
1196 Functions: reorder/reorderLevel
1198 Recursively reorders the display string
1199 "From the highest level down, reverse all characters at that level and
1200 higher, down to the lowest odd level"
1202 Implements rule L2 of the Unicode bidi Algorithm.
1204 Input: Array of embedding levels
1205 Character count
1206 Flag enabling reversal (set to false by initial caller)
1208 In/Out: Text to reorder
1210 Note: levels may exceed 15 resp. 61 on input.
1212 Rule L3 - reorder combining marks is not implemented here
1213 Rule L4 - glyph mirroring is implemented as a display option below
1215 Note: this should be applied a line at a time
1216 -------------------------------------------------------------------------*/
1217 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1219 int ich = 0;
1221 /* true as soon as first odd level encountered */
1222 fReverse = fReverse || odd(level);
1224 for (; ich < cch; ich++)
1226 if (plevel[ich] < level)
1228 break;
1230 else if (plevel[ich] > level)
1232 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1233 cch - ich, fReverse) - 1;
1236 if (fReverse)
1238 reverse(pIndexs, ich);
1240 return ich;
1243 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1244 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1246 int ich = 0;
1247 int newlevel = -1;
1249 /* true as soon as first odd level encountered */
1250 fReverse = fReverse || odd(level);
1252 for (; ich < cch; ich++)
1254 if (plevel[ich] < level)
1255 break;
1256 else if (plevel[ich] > level)
1257 newlevel = ich;
1259 if (fReverse)
1261 reverse(pIndexs, ich);
1264 if (newlevel >= 0)
1266 ich = 0;
1267 for (; ich < cch; ich++)
1268 if (plevel[ich] < level)
1269 break;
1270 else if (plevel[ich] > level)
1271 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1272 cch - ich, fReverse) - 1;
1275 return ich;
1278 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1279 WORD* lpStrength)
1281 int i;
1282 classify(lpString, lpStrength, uCount, c);
1284 for ( i = 0; i < uCount; i++)
1286 switch(lpStrength[i])
1288 case L:
1289 case LRE:
1290 case LRO:
1291 case R:
1292 case AL:
1293 case RLE:
1294 case RLO:
1295 lpStrength[i] = BIDI_STRONG;
1296 break;
1297 case PDF:
1298 case EN:
1299 case ES:
1300 case ET:
1301 case AN:
1302 case CS:
1303 case BN:
1304 lpStrength[i] = BIDI_WEAK;
1305 break;
1306 case B:
1307 case S:
1308 case WS:
1309 case ON:
1310 default: /* Neutrals and NSM */
1311 lpStrength[i] = BIDI_NEUTRAL;
1314 return TRUE;