user32: Move *RegisterDeviceNotification*() to input.c.
[wine.git] / dlls / usp10 / bidi.c
blobe1d0a11697d87618c1ff64362198e4eff7eba8ed
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
45 #include <stdlib.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wingdi.h"
49 #include "winnls.h"
50 #include "usp10.h"
51 #include "wine/debug.h"
52 #include "wine/heap.h"
53 #include "wine/list.h"
55 #include "usp10_internal.h"
57 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
58 extern const unsigned short bidi_direction_table[] DECLSPEC_HIDDEN;
60 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
62 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
63 #define MAX_DEPTH 125
65 /* HELPER FUNCTIONS AND DECLARATIONS */
67 /*------------------------------------------------------------------------
68 Bidirectional Character Types
70 as defined by the Unicode Bidirectional Algorithm Table 3-7.
72 Note:
74 The list of bidirectional character types here is not grouped the
75 same way as the table 3-7, since the numeric values for the types
76 are chosen to keep the state and action tables compact.
77 ------------------------------------------------------------------------*/
78 enum directions
80 /* input types */
81 /* ON MUST be zero, code relies on ON = NI = 0 */
82 ON = 0, /* Other Neutral */
83 L, /* Left Letter */
84 R, /* Right Letter */
85 AN, /* Arabic Number */
86 EN, /* European Number */
87 AL, /* Arabic Letter (Right-to-left) */
88 NSM, /* Non-spacing Mark */
89 CS, /* Common Separator */
90 ES, /* European Separator */
91 ET, /* European Terminator (post/prefix e.g. $ and %) */
93 /* resolved types */
94 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
96 /* input types, */
97 S, /* Segment Separator (TAB) // used only in L1 */
98 WS, /* White space // used only in L1 */
99 B, /* Paragraph Separator (aka as PS) */
101 /* types for explicit controls */
102 RLO, /* these are used only in X1-X9 */
103 RLE,
104 LRO,
105 LRE,
106 PDF,
108 LRI, /* Isolate formatting characters new with 6.3 */
109 RLI,
110 FSI,
111 PDI,
113 /* resolved types, also resolved directions */
114 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
117 static const char debug_type[][4] =
119 "ON", /* Other Neutral */
120 "L", /* Left Letter */
121 "R", /* Right Letter */
122 "AN", /* Arabic Number */
123 "EN", /* European Number */
124 "AL", /* Arabic Letter (Right-to-left) */
125 "NSM", /* Non-spacing Mark */
126 "CS", /* Common Separator */
127 "ES", /* European Separator */
128 "ET", /* European Terminator (post/prefix e.g. $ and %) */
129 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
130 "S", /* Segment Separator (TAB) // used only in L1 */
131 "WS", /* White space // used only in L1 */
132 "B", /* Paragraph Separator (aka as PS) */
133 "RLO", /* these are used only in X1-X9 */
134 "RLE",
135 "LRO",
136 "LRE",
137 "PDF",
138 "LRI", /* Isolate formatting characters new with 6.3 */
139 "RLI",
140 "FSI",
141 "PDI",
144 /* HELPER FUNCTIONS */
146 static inline void dump_types(const char* header, WORD *types, int start, int end)
148 int i, len = 0;
149 TRACE("%s:",header);
150 for (i = start; i < end && len < 200; i++)
152 TRACE(" %s",debug_type[types[i]]);
153 len += strlen(debug_type[types[i]])+1;
155 if (i != end)
156 TRACE("...");
157 TRACE("\n");
160 /* Convert the libwine information to the direction enum */
161 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
163 unsigned i;
165 for (i = 0; i < count; ++i)
167 chartype[i] = get_table_entry( bidi_direction_table, string[i] );
168 if (c->fLegacyBidiClass && chartype[i] == ES)
170 if (string[i] == '+' || string[i] == '-') chartype[i] = NI;
175 /* RESOLVE EXPLICIT */
177 static WORD GreaterEven(int i)
179 return odd(i) ? i + 1 : i + 2;
182 static WORD GreaterOdd(int i)
184 return odd(i) ? i + 2 : i + 1;
187 static WORD EmbeddingDirection(int level)
189 return odd(level) ? R : L;
192 /*------------------------------------------------------------------------
193 Function: resolveExplicit
195 Recursively resolves explicit embedding levels and overrides.
196 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
198 Input: Base embedding level and direction
199 Character count
201 Output: Array of embedding levels
203 In/Out: Array of direction classes
206 Note: The function uses two simple counters to keep track of
207 matching explicit codes and PDF. Use the default argument for
208 the outermost call. The nesting counter counts the recursion
209 depth and not the embedding level.
210 ------------------------------------------------------------------------*/
211 typedef struct tagStackItem {
212 int level;
213 int override;
214 BOOL isolate;
215 } StackItem;
217 #define push_stack(l,o,i) \
218 do { stack_top--; \
219 stack[stack_top].level = l; \
220 stack[stack_top].override = o; \
221 stack[stack_top].isolate = i;} while(0)
223 #define pop_stack() do { stack_top++; } while(0)
225 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
227 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
229 /* X1 */
230 int overflow_isolate_count = 0;
231 int overflow_embedding_count = 0;
232 int valid_isolate_count = 0;
233 int i;
235 StackItem stack[MAX_DEPTH+2];
236 int stack_top = MAX_DEPTH+1;
238 stack[stack_top].level = level;
239 stack[stack_top].override = NI;
240 stack[stack_top].isolate = FALSE;
242 if (initialOverride)
244 if (odd(level))
245 push_stack(level, R, FALSE);
246 else
247 push_stack(level, L, FALSE);
250 for (i = 0; i < count; i++)
252 poutOverrides[i] = stack[stack_top].override;
254 /* X2 */
255 if (pclass[i] == RLE)
257 int least_odd = GreaterOdd(stack[stack_top].level);
258 poutLevel[i] = stack[stack_top].level;
259 if (valid_level(least_odd))
260 push_stack(least_odd, NI, FALSE);
261 else if (overflow_isolate_count == 0)
262 overflow_embedding_count++;
264 /* X3 */
265 else if (pclass[i] == LRE)
267 int least_even = GreaterEven(stack[stack_top].level);
268 poutLevel[i] = stack[stack_top].level;
269 if (valid_level(least_even))
270 push_stack(least_even, NI, FALSE);
271 else if (overflow_isolate_count == 0)
272 overflow_embedding_count++;
274 /* X4 */
275 else if (pclass[i] == RLO)
277 int least_odd = GreaterOdd(stack[stack_top].level);
278 poutLevel[i] = stack[stack_top].level;
279 if (valid_level(least_odd))
280 push_stack(least_odd, R, FALSE);
281 else if (overflow_isolate_count == 0)
282 overflow_embedding_count++;
284 /* X5 */
285 else if (pclass[i] == LRO)
287 int least_even = GreaterEven(stack[stack_top].level);
288 poutLevel[i] = stack[stack_top].level;
289 if (valid_level(least_even))
290 push_stack(least_even, L, FALSE);
291 else if (overflow_isolate_count == 0)
292 overflow_embedding_count++;
294 /* X5a */
295 else if (pclass[i] == RLI)
297 int least_odd = GreaterOdd(stack[stack_top].level);
298 poutLevel[i] = stack[stack_top].level;
299 if (valid_level(least_odd))
301 valid_isolate_count++;
302 push_stack(least_odd, NI, TRUE);
304 else
305 overflow_isolate_count++;
307 /* X5b */
308 else if (pclass[i] == LRI)
310 int least_even = GreaterEven(stack[stack_top].level);
311 poutLevel[i] = stack[stack_top].level;
312 if (valid_level(least_even))
314 valid_isolate_count++;
315 push_stack(least_even, NI, TRUE);
317 else
318 overflow_isolate_count++;
320 /* X5c */
321 else if (pclass[i] == FSI)
323 int j;
324 int new_level = 0;
325 int skipping = 0;
326 poutLevel[i] = stack[stack_top].level;
327 for (j = i+1; j < count; j++)
329 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
331 skipping++;
332 continue;
334 else if (pclass[j] == PDI)
336 if (skipping)
337 skipping --;
338 else
339 break;
340 continue;
343 if (skipping) continue;
345 if (pclass[j] == L)
347 new_level = 0;
348 break;
350 else if (pclass[j] == R || pclass[j] == AL)
352 new_level = 1;
353 break;
356 if (odd(new_level))
358 int least_odd = GreaterOdd(stack[stack_top].level);
359 if (valid_level(least_odd))
361 valid_isolate_count++;
362 push_stack(least_odd, NI, TRUE);
364 else
365 overflow_isolate_count++;
367 else
369 int least_even = GreaterEven(stack[stack_top].level);
370 if (valid_level(least_even))
372 valid_isolate_count++;
373 push_stack(least_even, NI, TRUE);
375 else
376 overflow_isolate_count++;
379 /* X6 */
380 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
382 poutLevel[i] = stack[stack_top].level;
383 if (stack[stack_top].override != NI)
384 pclass[i] = stack[stack_top].override;
386 /* X6a */
387 else if (pclass[i] == PDI)
389 if (overflow_isolate_count) overflow_isolate_count--;
390 else if (!valid_isolate_count) {/* do nothing */}
391 else
393 overflow_embedding_count = 0;
394 while (!stack[stack_top].isolate) pop_stack();
395 pop_stack();
396 valid_isolate_count --;
398 poutLevel[i] = stack[stack_top].level;
400 /* X7 */
401 else if (pclass[i] == PDF)
403 poutLevel[i] = stack[stack_top].level;
404 if (overflow_isolate_count) {/* do nothing */}
405 else if (overflow_embedding_count) overflow_embedding_count--;
406 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
407 pop_stack();
409 /* X8: Nothing */
411 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
412 for (i = 0; i < count ; i++)
413 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
414 pclass[i] = BN;
417 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
419 if (index == -1 || index == back_fence) return index;
420 index --;
421 while (index > back_fence && pcls[index] == BN) index --;
422 return index;
425 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
427 if (index == front_fence) return index;
428 index ++;
429 while (index < front_fence && pcls[index] == BN) index ++;
430 return index;
433 typedef struct tagRun
435 int start;
436 int end;
437 WORD e;
438 } Run;
440 typedef struct tagRunChar
442 WCHAR ch;
443 WORD *pcls;
444 } RunChar;
446 typedef struct tagIsolatedRun
448 struct list entry;
449 int length;
450 WORD sos;
451 WORD eos;
452 WORD e;
454 RunChar item[1];
455 } IsolatedRun;
457 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
459 if (index >= (iso_run->length-1)) return -1;
460 index ++;
461 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
462 if (index == iso_run->length) return -1;
463 return index;
466 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
469 if (index <= 0) return -1;
470 index --;
471 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
472 return index;
475 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
477 int i, len = 0;
478 TRACE("%s:",header);
479 TRACE("[ ");
480 for (i = 0; i < iso_run->length && len < 200; i++)
482 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
483 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
485 if (i != iso_run->length)
486 TRACE("...");
487 TRACE(" ]\n");
490 /*------------------------------------------------------------------------
491 Function: resolveWeak
493 Resolves the directionality of numeric and other weak character types
495 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
497 Input: Array of embedding levels
498 Character count
500 In/Out: Array of directional classes
502 Note: On input only these directional classes are expected
503 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
504 ------------------------------------------------------------------------*/
506 static void resolveWeak(IsolatedRun * iso_run)
508 int i;
510 /* W1 */
511 for (i=0; i < iso_run->length; i++)
513 if (*iso_run->item[i].pcls == NSM)
515 int j = iso_previousValidChar(iso_run, i);
516 if (j == -1)
517 *iso_run->item[i].pcls = iso_run->sos;
518 else if (*iso_run->item[j].pcls >= LRI)
519 *iso_run->item[i].pcls = ON;
520 else
521 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
525 /* W2 */
526 for (i = 0; i < iso_run->length; i++)
528 if (*iso_run->item[i].pcls == EN)
530 int j = iso_previousValidChar(iso_run, i);
531 while (j > -1)
533 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
535 if (*iso_run->item[j].pcls == AL)
536 *iso_run->item[i].pcls = AN;
537 break;
539 j = iso_previousValidChar(iso_run, j);
544 /* W3 */
545 for (i = 0; i < iso_run->length; i++)
547 if (*iso_run->item[i].pcls == AL)
548 *iso_run->item[i].pcls = R;
551 /* W4 */
552 for (i = 0; i < iso_run->length; i++)
554 if (*iso_run->item[i].pcls == ES)
556 int b = iso_previousValidChar(iso_run, i);
557 int f = iso_nextValidChar(iso_run, i);
559 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
560 *iso_run->item[i].pcls = EN;
562 else if (*iso_run->item[i].pcls == CS)
564 int b = iso_previousValidChar(iso_run, i);
565 int f = iso_nextValidChar(iso_run, i);
567 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
568 *iso_run->item[i].pcls = EN;
569 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
570 *iso_run->item[i].pcls = AN;
574 /* W5 */
575 for (i = 0; i < iso_run->length; i++)
577 if (*iso_run->item[i].pcls == ET)
579 int j;
580 for (j = i-1 ; j > -1; j--)
582 if (*iso_run->item[j].pcls == BN) continue;
583 if (*iso_run->item[j].pcls == ET) continue;
584 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
585 else break;
587 if (*iso_run->item[i].pcls == ET)
589 for (j = i+1; j < iso_run->length; j++)
591 if (*iso_run->item[j].pcls == BN) continue;
592 if (*iso_run->item[j].pcls == ET) continue;
593 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
594 else break;
600 /* W6 */
601 for (i = 0; i < iso_run->length; i++)
603 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
605 int b = i-1;
606 int f = i+1;
607 if (b > -1 && *iso_run->item[b].pcls == BN)
608 *iso_run->item[b].pcls = ON;
609 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
610 *iso_run->item[f].pcls = ON;
612 *iso_run->item[i].pcls = ON;
616 /* W7 */
617 for (i = 0; i < iso_run->length; i++)
619 if (*iso_run->item[i].pcls == EN)
621 int j;
622 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
623 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
625 if (*iso_run->item[j].pcls == L)
626 *iso_run->item[i].pcls = L;
627 break;
629 if (iso_run->sos == L && j == -1)
630 *iso_run->item[i].pcls = L;
635 typedef struct tagBracketPair
637 int start;
638 int end;
639 } BracketPair;
641 static int __cdecl compr(const void *a, const void* b)
643 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
646 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
648 WCHAR *open_stack;
649 int *stack_index;
650 int stack_top = iso_run->length;
651 unsigned int pair_count = 0;
652 BracketPair *out = NULL;
653 SIZE_T out_size = 0;
654 int i;
656 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
657 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
659 for (i = 0; i < iso_run->length; i++)
661 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
663 if (!ubv)
664 continue;
666 if ((ubv >> 8) == 0)
668 --stack_top;
669 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
670 /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */
671 if (open_stack[stack_top] == 0x232a)
672 open_stack[stack_top] = 0x3009;
673 stack_index[stack_top] = i;
675 else if ((ubv >> 8) == 1)
677 unsigned int j;
679 for (j = stack_top; j < iso_run->length; ++j)
681 WCHAR c = iso_run->item[i].ch;
683 if (c == 0x232a)
684 c = 0x3009;
686 if (c != open_stack[j])
687 continue;
689 if (!(usp10_array_reserve((void **)&out, &out_size, pair_count + 2, sizeof(*out))))
690 ERR("Failed to grow output array.\n");
692 out[pair_count].start = stack_index[j];
693 out[pair_count].end = i;
694 ++pair_count;
696 out[pair_count].start = -1;
697 stack_top = j + 1;
698 break;
703 heap_free(open_stack);
704 heap_free(stack_index);
706 if (!pair_count)
707 return NULL;
709 qsort(out, pair_count, sizeof(*out), compr);
711 return out;
714 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
716 /*------------------------------------------------------------------------
717 Function: resolveNeutrals
719 Resolves the directionality of neutral character types.
721 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
723 Input: Array of embedding levels
724 Character count
725 Baselevel
727 In/Out: Array of directional classes
729 Note: On input only these directional classes are expected
730 R, L, NI, AN, EN and BN
732 W8 resolves a number of ENs to L
733 ------------------------------------------------------------------------*/
734 static void resolveNeutrals(IsolatedRun *iso_run)
736 int i;
737 BracketPair *pairs = NULL;
739 /* Translate isolates into NI */
740 for (i = 0; i < iso_run->length; i++)
742 if (*iso_run->item[i].pcls >= LRI)
743 *iso_run->item[i].pcls = NI;
745 switch(*iso_run->item[i].pcls)
747 case B:
748 case S:
749 case WS: *iso_run->item[i].pcls = NI;
752 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
755 /* N0: Skipping bracketed pairs for now */
756 pairs = computeBracketPairs(iso_run);
757 if (pairs)
759 BracketPair *p = &pairs[0];
760 int i = 0;
761 while (p->start >= 0)
763 int j;
764 int e = EmbeddingDirection(iso_run->e);
765 int o = EmbeddingDirection(iso_run->e+1);
766 BOOL flag_o = FALSE;
767 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
769 /* N0.b */
770 for (j = p->start+1; j < p->end; j++)
772 if (N0_TYPE(*iso_run->item[j].pcls) == e)
774 *iso_run->item[p->start].pcls = e;
775 *iso_run->item[p->end].pcls = e;
776 break;
778 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
779 flag_o = TRUE;
781 /* N0.c */
782 if (j == p->end && flag_o)
784 for (j = p->start; j >= 0; j--)
786 if (N0_TYPE(*iso_run->item[j].pcls) == o)
788 *iso_run->item[p->start].pcls = o;
789 *iso_run->item[p->end].pcls = o;
790 break;
792 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
794 *iso_run->item[p->start].pcls = e;
795 *iso_run->item[p->end].pcls = e;
796 break;
799 if ( j < 0 )
801 *iso_run->item[p->start].pcls = iso_run->sos;
802 *iso_run->item[p->end].pcls = iso_run->sos;
806 i++;
807 p = &pairs[i];
809 heap_free(pairs);
812 /* N1 */
813 for (i = 0; i < iso_run->length; i++)
815 WORD l,r;
817 if (*iso_run->item[i].pcls == NI)
819 int j;
820 int b = iso_previousValidChar(iso_run, i);
822 if (b == -1)
824 l = iso_run->sos;
825 b = 0;
827 else
829 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
830 l = R;
831 else if (*iso_run->item[b].pcls == L)
832 l = L;
833 else /* No string type */
834 continue;
836 j = iso_nextValidChar(iso_run, i);
837 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
839 if (j == -1)
841 r = iso_run->eos;
842 j = iso_run->length;
844 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
845 r = R;
846 else if (*iso_run->item[j].pcls == L)
847 r = L;
848 else /* No string type */
849 continue;
851 if (r == l)
853 for (b = i; b < j && b < iso_run->length; b++)
854 *iso_run->item[b].pcls = r;
859 /* N2 */
860 for (i = 0; i < iso_run->length; i++)
862 if (*iso_run->item[i].pcls == NI)
864 int b = i-1;
865 int f = i+1;
866 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
867 if (b > -1 && *iso_run->item[b].pcls == BN)
868 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
869 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
870 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
875 /*------------------------------------------------------------------------
876 Function: resolveImplicit
878 Recursively resolves implicit embedding levels.
879 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
881 Input: Array of direction classes
882 Character count
883 Base level
885 In/Out: Array of embedding levels
887 Note: levels may exceed 15 on output.
888 Accepted subset of direction classes
889 R, L, AN, EN
890 ------------------------------------------------------------------------*/
891 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
893 int i;
895 /* I1/2 */
896 for (i = sos; i <= eos; i++)
898 if (pcls[i] == BN)
899 continue;
901 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
902 ASSERT(pcls[i] < 5); /* "Out of range." */
904 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
905 plevel[i]++;
906 else if (!odd(plevel[i]) && pcls[i] == R)
907 plevel[i]++;
908 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
909 plevel[i]+=2;
913 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
915 int i;
917 /* L1 */
918 for (i = sos; i <= eos; i++)
920 if (pcls[i] == B || pcls[i] == S)
922 int j = i -1;
923 while (i > sos && j >= sos &&
924 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
925 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
926 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
927 plevel[j--] = baselevel;
928 plevel[i] = baselevel;
930 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
931 pcls[i] == PDF || pcls[i] == BN)
933 plevel[i] = i ? plevel[i - 1] : baselevel;
935 if (i == eos &&
936 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
937 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
938 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
940 int j = i;
941 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
942 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
943 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
944 plevel[j--] = baselevel;
949 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
950 const WCHAR *string, unsigned int uCount, struct list *set)
952 int run_start, run_end, i;
953 int run_count = 0;
954 Run *runs;
955 IsolatedRun *current_isolated;
957 if (!(runs = heap_calloc(uCount, sizeof(*runs))))
958 return;
960 list_init(set);
962 /* Build Runs */
963 run_start = 0;
964 while (run_start < uCount)
966 run_end = nextValidChar(pcls, run_start, uCount);
967 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
968 run_end --;
969 runs[run_count].start = run_start;
970 runs[run_count].end = run_end;
971 runs[run_count].e = pLevel[run_start];
972 run_start = nextValidChar(pcls, run_end, uCount);
973 run_count++;
976 /* Build Isolating Runs */
977 i = 0;
978 while (i < run_count)
980 int k = i;
981 if (runs[k].start >= 0)
983 int type_fence, real_end;
984 int j;
986 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
987 break;
989 run_start = runs[k].start;
990 current_isolated->e = runs[k].e;
991 current_isolated->length = (runs[k].end - runs[k].start)+1;
993 for (j = 0; j < current_isolated->length; j++)
995 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
996 current_isolated->item[j].ch = string[runs[k].start + j];
999 run_end = runs[k].end;
1001 TRACE("{ [%i -- %i]",run_start, run_end);
1003 if (pcls[run_end] == BN)
1004 run_end = previousValidChar(pcls, run_end, runs[k].start);
1006 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1008 j = k+1;
1009 search:
1010 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1011 if (j < run_count && runs[i].e != runs[j].e)
1013 j++;
1014 goto search;
1017 if (j != run_count)
1019 int m;
1020 int l = current_isolated->length;
1022 current_isolated->length += (runs[j].end - runs[j].start)+1;
1023 for (m = 0; l < current_isolated->length; l++, m++)
1025 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1026 current_isolated->item[l].ch = string[runs[j].start + m];
1029 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1031 run_end = runs[j].end;
1032 if (pcls[run_end] == BN)
1033 run_end = previousValidChar(pcls, run_end, runs[i].start);
1034 runs[j].start = -1;
1035 k = j;
1037 else
1039 run_end = uCount;
1040 break;
1044 type_fence = previousValidChar(pcls, run_start, -1);
1046 if (type_fence == -1)
1047 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1048 else
1049 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1051 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1053 if (run_end == uCount)
1054 current_isolated->eos = current_isolated->sos;
1055 else
1057 /* eos could be an BN */
1058 if ( pcls[run_end] == BN )
1060 real_end = previousValidChar(pcls, run_end, run_start-1);
1061 if (real_end < run_start)
1062 real_end = run_start;
1064 else
1065 real_end = run_end;
1067 type_fence = nextValidChar(pcls, run_end, uCount);
1068 if (type_fence == uCount)
1069 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1070 else
1071 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1073 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1076 list_add_tail(set, &current_isolated->entry);
1077 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1079 i++;
1082 heap_free(runs);
1085 /*************************************************************
1086 * BIDI_DetermineLevels
1088 BOOL BIDI_DetermineLevels(
1089 const WCHAR *lpString, /* [in] The string for which information is to be returned */
1090 unsigned int uCount, /* [in] Number of WCHARs in string. */
1091 const SCRIPT_STATE *s,
1092 const SCRIPT_CONTROL *c,
1093 WORD *lpOutLevels, /* [out] final string levels */
1094 WORD *lpOutOverrides /* [out] final string overrides */
1097 WORD *chartype;
1098 unsigned baselevel = 0;
1099 struct list IsolatingRuns;
1100 IsolatedRun *iso_run, *next;
1102 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1104 if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1106 WARN("Out of memory\n");
1107 return FALSE;
1110 baselevel = s->uBidiLevel;
1112 classify(lpString, chartype, uCount, c);
1113 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1115 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1117 /* resolve explicit */
1118 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1119 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1121 /* X10/BD13: Computer Isolating runs */
1122 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1124 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1126 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1128 /* resolve weak */
1129 resolveWeak(iso_run);
1130 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1132 /* resolve neutrals */
1133 resolveNeutrals(iso_run);
1134 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1136 list_remove(&iso_run->entry);
1137 heap_free(iso_run);
1140 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1141 /* resolveImplicit */
1142 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1144 /* resolveResolvedLevels*/
1145 classify(lpString, chartype, uCount, c);
1146 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1148 heap_free(chartype);
1149 return TRUE;
1152 /* reverse cch indices */
1153 static void reverse(int *pidx, int cch)
1155 int temp;
1156 int ich = 0;
1157 for (; ich < --cch; ich++)
1159 temp = pidx[ich];
1160 pidx[ich] = pidx[cch];
1161 pidx[cch] = temp;
1166 /*------------------------------------------------------------------------
1167 Functions: reorder/reorderLevel
1169 Recursively reorders the display string
1170 "From the highest level down, reverse all characters at that level and
1171 higher, down to the lowest odd level"
1173 Implements rule L2 of the Unicode bidi Algorithm.
1175 Input: Array of embedding levels
1176 Character count
1177 Flag enabling reversal (set to false by initial caller)
1179 In/Out: Text to reorder
1181 Note: levels may exceed 15 resp. 61 on input.
1183 Rule L3 - reorder combining marks is not implemented here
1184 Rule L4 - glyph mirroring is implemented as a display option below
1186 Note: this should be applied a line at a time
1187 -------------------------------------------------------------------------*/
1188 int BIDI_ReorderV2lLevel(int level, int *pIndices, const BYTE* plevel, int cch, BOOL fReverse)
1190 int ich = 0;
1192 /* true as soon as first odd level encountered */
1193 fReverse = fReverse || odd(level);
1195 for (; ich < cch; ich++)
1197 if (plevel[ich] < level)
1199 break;
1201 else if (plevel[ich] > level)
1203 ich += BIDI_ReorderV2lLevel(level + 1, pIndices + ich, plevel + ich,
1204 cch - ich, fReverse) - 1;
1207 if (fReverse)
1209 reverse(pIndices, ich);
1211 return ich;
1214 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1215 int BIDI_ReorderL2vLevel(int level, int *pIndices, const BYTE* plevel, int cch, BOOL fReverse)
1217 int ich = 0;
1218 int newlevel = -1;
1220 /* true as soon as first odd level encountered */
1221 fReverse = fReverse || odd(level);
1223 for (; ich < cch; ich++)
1225 if (plevel[ich] < level)
1226 break;
1227 else if (plevel[ich] > level)
1228 newlevel = ich;
1230 if (fReverse)
1232 reverse(pIndices, ich);
1235 if (newlevel >= 0)
1237 ich = 0;
1238 for (; ich < cch; ich++)
1239 if (plevel[ich] < level)
1240 break;
1241 else if (plevel[ich] > level)
1242 ich += BIDI_ReorderL2vLevel(level + 1, pIndices + ich, plevel + ich,
1243 cch - ich, fReverse) - 1;
1246 return ich;
1249 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1251 unsigned int i;
1253 classify(string, strength, count, c);
1254 for (i = 0; i < count; i++)
1256 switch (strength[i])
1258 case L:
1259 case LRE:
1260 case LRO:
1261 case R:
1262 case AL:
1263 case RLE:
1264 case RLO:
1265 strength[i] = BIDI_STRONG;
1266 break;
1267 case PDF:
1268 case EN:
1269 case ES:
1270 case ET:
1271 case AN:
1272 case CS:
1273 case BN:
1274 strength[i] = BIDI_WEAK;
1275 break;
1276 case B:
1277 case S:
1278 case WS:
1279 case ON:
1280 default: /* Neutrals and NSM */
1281 strength[i] = BIDI_NEUTRAL;
1284 return TRUE;