usp10: Rewrite resolveExplicit for Unicode 6.3.
[wine/multimedia.git] / dlls / usp10 / bidi.c
blob6996f492fb4afb56491fd8a91fcde55a25f6492b
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include "config.h"
46 #include <stdarg.h>
47 #include "windef.h"
48 #include "winbase.h"
49 #include "wingdi.h"
50 #include "winnls.h"
51 #include "usp10.h"
52 #include "wine/unicode.h"
53 #include "wine/debug.h"
55 #include "usp10_internal.h"
57 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
59 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
60 #define MAX_DEPTH 125
62 /* HELPER FUNCTIONS AND DECLARATIONS */
64 /*------------------------------------------------------------------------
65 Bidirectional Character Types
67 as defined by the Unicode Bidirectional Algorithm Table 3-7.
69 Note:
71 The list of bidirectional character types here is not grouped the
72 same way as the table 3-7, since the numberic values for the types
73 are chosen to keep the state and action tables compact.
74 ------------------------------------------------------------------------*/
75 enum directions
77 /* input types */
78 /* ON MUST be zero, code relies on ON = NI = 0 */
79 ON = 0, /* Other Neutral */
80 L, /* Left Letter */
81 R, /* Right Letter */
82 AN, /* Arabic Number */
83 EN, /* European Number */
84 AL, /* Arabic Letter (Right-to-left) */
85 NSM, /* Non-spacing Mark */
86 CS, /* Common Separator */
87 ES, /* European Separator */
88 ET, /* European Terminator (post/prefix e.g. $ and %) */
90 /* resolved types */
91 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
93 /* input types, */
94 S, /* Segment Separator (TAB) // used only in L1 */
95 WS, /* White space // used only in L1 */
96 B, /* Paragraph Separator (aka as PS) */
98 /* types for explicit controls */
99 RLO, /* these are used only in X1-X9 */
100 RLE,
101 LRO,
102 LRE,
103 PDF,
105 LRI, /* Isolate formatting characters new with 6.3 */
106 RLI,
107 FSI,
108 PDI,
110 /* resolved types, also resolved directions */
111 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
114 static const char debug_type[][4] =
116 "ON", /* Other Neutral */
117 "L", /* Left Letter */
118 "R", /* Right Letter */
119 "AN", /* Arabic Number */
120 "EN", /* European Number */
121 "AL", /* Arabic Letter (Right-to-left) */
122 "NSM", /* Non-spacing Mark */
123 "CS", /* Common Separator */
124 "ES", /* European Separator */
125 "ET", /* European Terminator (post/prefix e.g. $ and %) */
126 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
127 "S", /* Segment Separator (TAB) // used only in L1 */
128 "WS", /* White space // used only in L1 */
129 "B", /* Paragraph Separator (aka as PS) */
130 "RLO", /* these are used only in X1-X9 */
131 "RLE",
132 "LRO",
133 "LRE",
134 "PDF",
135 "LRI", /* Isolate formatting characters new with 6.3 */
136 "RLI",
137 "FSI",
138 "PDI",
141 /* HELPER FUNCTIONS */
143 static inline void dump_types(const char* header, WORD *types, int start, int end)
145 int i;
146 TRACE("%s:",header);
147 for (i = start; i< end; i++)
148 TRACE(" %s",debug_type[types[i]]);
149 TRACE("\n");
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
155 static const enum directions dir_map[16] =
157 L, /* unassigned defaults to L */
170 NSM,
172 PDF /* also LRE, LRO, RLE, RLO */
175 unsigned i;
177 for (i = 0; i < uCount; ++i)
179 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
180 switch (chartype[i])
182 case ES:
183 if (!c->fLegacyBidiClass) break;
184 switch (lpString[i])
186 case '-':
187 case '+': chartype[i] = NI; break;
188 case '/': chartype[i] = CS; break;
190 break;
191 case PDF:
192 switch (lpString[i])
194 case 0x202A: chartype[i] = LRE; break;
195 case 0x202B: chartype[i] = RLE; break;
196 case 0x202C: chartype[i] = PDF; break;
197 case 0x202D: chartype[i] = LRO; break;
198 case 0x202E: chartype[i] = RLO; break;
199 case 0x2066: chartype[i] = LRI; break;
200 case 0x2067: chartype[i] = RLI; break;
201 case 0x2068: chartype[i] = FSI; break;
202 case 0x2069: chartype[i] = PDI; break;
204 break;
209 /* Set a run of cval values at locations all prior to, but not including */
210 /* iStart, to the new value nval. */
211 static void SetDeferredRun(WORD *pval, int cval, int iStart, int nval)
213 int i = iStart - 1;
214 for (; i >= iStart - cval; i--)
216 pval[i] = nval;
220 /* RESOLVE EXPLICIT */
222 static WORD GreaterEven(int i)
224 return odd(i) ? i + 1 : i + 2;
227 static WORD GreaterOdd(int i)
229 return odd(i) ? i + 2 : i + 1;
232 static WORD EmbeddingDirection(int level)
234 return odd(level) ? R : L;
237 /*------------------------------------------------------------------------
238 Function: resolveExplicit
240 Recursively resolves explicit embedding levels and overrides.
241 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
243 Input: Base embedding level and direction
244 Character count
246 Output: Array of embedding levels
248 In/Out: Array of direction classes
251 Note: The function uses two simple counters to keep track of
252 matching explicit codes and PDF. Use the default argument for
253 the outermost call. The nesting counter counts the recursion
254 depth and not the embedding level.
255 ------------------------------------------------------------------------*/
256 typedef struct tagStackItem {
257 int level;
258 int override;
259 BOOL isolate;
260 } StackItem;
262 #define push_stack(l,o,i) \
263 do { stack_top--; \
264 stack[stack_top].level = l; \
265 stack[stack_top].override = o; \
266 stack[stack_top].isolate = i;} while(0)
268 #define pop_stack() do { stack_top++; } while(0)
270 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
272 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
274 /* X1 */
275 int overflow_isolate_count = 0;
276 int overflow_embedding_count = 0;
277 int valid_isolate_count = 0;
278 int i;
280 StackItem stack[MAX_DEPTH+2];
281 int stack_top = MAX_DEPTH+1;
283 stack[stack_top].level = level;
284 stack[stack_top].override = NI;
285 stack[stack_top].isolate = FALSE;
287 for (i = 0; i < count; i++)
289 /* X2 */
290 if (pclass[i] == RLE)
292 int least_odd = GreaterOdd(stack[stack_top].level);
293 poutLevel[i] = stack[stack_top].level;
294 if (valid_level(least_odd))
295 push_stack(least_odd, NI, FALSE);
296 else if (overflow_isolate_count == 0)
297 overflow_embedding_count++;
299 /* X3 */
300 else if (pclass[i] == LRE)
302 int least_even = GreaterEven(stack[stack_top].level);
303 poutLevel[i] = stack[stack_top].level;
304 if (valid_level(least_even))
305 push_stack(least_even, NI, FALSE);
306 else if (overflow_isolate_count == 0)
307 overflow_embedding_count++;
309 /* X4 */
310 else if (pclass[i] == RLO)
312 int least_odd = GreaterOdd(stack[stack_top].level);
313 poutLevel[i] = stack[stack_top].level;
314 if (valid_level(least_odd))
315 push_stack(least_odd, R, FALSE);
316 else if (overflow_isolate_count == 0)
317 overflow_embedding_count++;
319 /* X5 */
320 else if (pclass[i] == LRO)
322 int least_even = GreaterEven(stack[stack_top].level);
323 poutLevel[i] = stack[stack_top].level;
324 if (valid_level(least_even))
325 push_stack(least_even, L, FALSE);
326 else if (overflow_isolate_count == 0)
327 overflow_embedding_count++;
329 /* X5a */
330 else if (pclass[i] == RLI)
332 int least_odd = GreaterOdd(stack[stack_top].level);
333 poutLevel[i] = stack[stack_top].level;
334 if (valid_level(least_odd))
336 valid_isolate_count++;
337 push_stack(least_odd, NI, TRUE);
339 else
340 overflow_isolate_count++;
341 pclass[i] = NI;
343 /* X5b */
344 else if (pclass[i] == LRI)
346 int least_even = GreaterEven(stack[stack_top].level);
347 poutLevel[i] = stack[stack_top].level;
348 if (valid_level(least_even))
350 valid_isolate_count++;
351 push_stack(least_even, NI, TRUE);
353 else
354 overflow_isolate_count++;
355 pclass[i] = NI;
357 /* X5c */
358 else if (pclass[i] == FSI)
360 int j;
361 int new_level = 0;
362 int skipping = 0;
363 poutLevel[i] = stack[stack_top].level;
364 for (j = i+1; j < count; j++)
366 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
368 skipping++;
369 continue;
371 else if (pclass[j] == PDI)
373 if (skipping)
374 skipping --;
375 else
376 break;
377 continue;
380 if (skipping) continue;
382 if (pclass[j] == L)
384 new_level = 0;
385 break;
387 else if (pclass[j] == R || pclass[j] == AL)
389 new_level = 1;
390 break;
393 if (odd(new_level))
395 int least_odd = GreaterOdd(stack[stack_top].level);
396 if (valid_level(least_odd))
398 valid_isolate_count++;
399 push_stack(least_odd, NI, TRUE);
401 else
402 overflow_isolate_count++;
404 else
406 int least_even = GreaterEven(stack[stack_top].level);
407 if (valid_level(least_even))
409 valid_isolate_count++;
410 push_stack(least_even, NI, TRUE);
412 else
413 overflow_isolate_count++;
415 pclass[i] = NI;
417 /* X6 */
418 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
420 poutLevel[i] = stack[stack_top].level;
421 if (stack[stack_top].override != NI)
422 pclass[i] = stack[stack_top].override;
424 /* X6a */
425 else if (pclass[i] == PDI)
427 if (overflow_isolate_count) overflow_isolate_count--;
428 else if (!valid_isolate_count) {/* do nothing */}
429 else
431 overflow_embedding_count = 0;
432 while (!stack[stack_top].isolate) pop_stack();
433 pop_stack();
434 valid_isolate_count --;
436 poutLevel[i] = stack[stack_top].level;
437 pclass[i] = NI;
439 /* X7 */
440 else if (pclass[i] == PDF)
442 poutLevel[i] = stack[stack_top].level;
443 if (overflow_isolate_count) {/* do nothing */}
444 else if (overflow_embedding_count) overflow_embedding_count--;
445 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
446 pop_stack();
448 /* X8: Nothing */
450 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
451 for (i = 0; i < count ; i++)
452 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
453 pclass[i] = BN;
456 /* RESOLVE WEAK TYPES */
458 enum states /* possible states */
460 xa, /* Arabic letter */
461 xr, /* right letter */
462 xl, /* left letter */
464 ao, /* Arabic lett. foll by ON */
465 ro, /* right lett. foll by ON */
466 lo, /* left lett. foll by ON */
468 rt, /* ET following R */
469 lt, /* ET following L */
471 cn, /* EN, AN following AL */
472 ra, /* Arabic number foll R */
473 re, /* European number foll R */
474 la, /* Arabic number foll L */
475 le, /* European number foll L */
477 ac, /* CS following cn */
478 rc, /* CS following ra */
479 rs, /* CS,ES following re */
480 lc, /* CS following la */
481 ls, /* CS,ES following le */
483 ret, /* ET following re */
484 let, /* ET following le */
487 static const int stateWeak[][10] =
489 /* NI, L, R, AN, EN, AL,NSM, CS, ES, ET */
490 /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* Arabic letter */
491 /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
492 /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
494 /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* Arabic lett. foll by ON*/
495 /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
496 /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */
498 /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */
499 /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */
501 /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */
502 /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* Arabic number foll R */
503 /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* European number foll R */
504 /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* Arabic number foll L */
505 /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* European number foll L */
507 /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */
508 /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */
509 /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */
510 /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */
511 /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */
513 /*ret*/{ ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */
514 /*let*/{ lo, xl, xr, la, le, xa,let, lo, lo,let }, /* ET following le */
517 enum actions /* possible actions */
519 /* primitives */
520 IX = 0x100, /* increment */
521 XX = 0xF, /* no-op */
523 /* actions */
524 xxx = (XX << 4) + XX, /* no-op */
525 xIx = IX + xxx, /* increment run */
526 xxN = (XX << 4) + ON, /* set current to NI */
527 xxE = (XX << 4) + EN, /* set current to EN */
528 xxA = (XX << 4) + AN, /* set current to AN */
529 xxR = (XX << 4) + R, /* set current to R */
530 xxL = (XX << 4) + L, /* set current to L */
531 Nxx = (ON << 4) + 0xF, /* set run to neutral */
532 Axx = (AN << 4) + 0xF, /* set run to AN */
533 ExE = (EN << 4) + EN, /* set run to EN, set current to EN */
534 NIx = (ON << 4) + 0xF + IX, /* set run to NI, increment */
535 NxN = (ON << 4) + ON, /* set run to NI, set current to NI */
536 NxR = (ON << 4) + R, /* set run to NI, set current to R */
537 NxE = (ON << 4) + EN, /* set run to NI, set current to EN */
539 AxA = (AN << 4) + AN, /* set run to AN, set current to AN */
540 NxL = (ON << 4) + L, /* set run to NI, set current to L */
541 LxL = (L << 4) + L, /* set run to L, set current to L */
544 static const int actionWeak[][10] =
546 /* NI, L, R, AN, EN, AL, NSM, CS, ES, ET */
547 /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* Arabic letter */
548 /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */
549 /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
551 /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* Arabic lett. foll by ON */
552 /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */
553 /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */
555 /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */
556 /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */
558 /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */
559 /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll R */
560 /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* European number foll R */
561 /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll L */
562 /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* European number foll L */
564 /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */
565 /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */
566 /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */
567 /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */
568 /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */
570 /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */
571 /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }, /* ET following le */
574 static int GetDeferredType(int action)
576 return (action >> 4) & 0xF;
579 static int GetResolvedType(int action)
581 return action & 0xF;
584 /* Note on action table:
586 States can be of two kinds:
587 - Immediate Resolution State, where each input token
588 is resolved as soon as it is seen. These states have
589 only single action codes (xxN) or the no-op (xxx)
590 for static input tokens.
591 - Deferred Resolution State, where input tokens either
592 either extend the run (xIx) or resolve its Type (e.g. Nxx).
594 Input classes are of three kinds
595 - Static Input Token, where the class of the token remains
596 unchanged on output (AN, L, NI, R)
597 - Replaced Input Token, where the class of the token is
598 always replaced on output (AL, BN, NSM, CS, ES, ET)
599 - Conditional Input Token, where the class of the token is
600 changed on output in some, but not all, cases (EN)
602 Where tokens are subject to change, a double action
603 (e.g. NxA, or NxN) is _required_ after deferred states,
604 resolving both the deferred state and changing the current token.
607 /*------------------------------------------------------------------------
608 Function: resolveWeak
610 Resolves the directionality of numeric and other weak character types
612 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
614 Input: Array of embedding levels
615 Character count
617 In/Out: Array of directional classes
619 Note: On input only these directional classes are expected
620 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
621 ------------------------------------------------------------------------*/
622 static void resolveWeak(int baselevel, WORD *pcls, WORD *plevel, int cch)
624 int state = odd(baselevel) ? xr : xl;
625 int cls;
627 int level = baselevel;
628 int action, clsRun, clsNew;
629 int cchRun = 0;
630 int ich = 0;
632 for (; ich < cch; ich++)
634 /* ignore boundary neutrals */
635 if (pcls[ich] == BN)
637 /* must flatten levels unless at a level change; */
638 plevel[ich] = level;
640 /* lookahead for level changes */
641 if (ich + 1 == cch && level != baselevel)
643 /* have to fixup last BN before end of the loop, since
644 * its fix-upped value will be needed below the assert */
645 pcls[ich] = EmbeddingDirection(level);
647 else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN)
649 /* fixup LAST BN in front / after a level run to make
650 * it act like the SOR/EOR in rule X10 */
651 int newlevel = plevel[ich+1];
652 if (level > newlevel) {
653 newlevel = level;
655 plevel[ich] = newlevel;
657 /* must match assigned level */
658 pcls[ich] = EmbeddingDirection(newlevel);
659 level = plevel[ich+1];
661 else
663 /* don't interrupt runs */
664 if (cchRun)
666 cchRun++;
668 continue;
672 ASSERT(pcls[ich] <= BN);
673 cls = pcls[ich];
675 action = actionWeak[state][cls];
677 /* resolve the directionality for deferred runs */
678 clsRun = GetDeferredType(action);
679 if (clsRun != XX)
681 SetDeferredRun(pcls, cchRun, ich, clsRun);
682 cchRun = 0;
685 /* resolve the directionality class at the current location */
686 clsNew = GetResolvedType(action);
687 if (clsNew != XX)
688 pcls[ich] = clsNew;
690 /* increment a deferred run */
691 if (IX & action)
692 cchRun++;
694 state = stateWeak[state][cls];
697 /* resolve any deferred runs
698 * use the direction of the current level to emulate PDF */
699 cls = EmbeddingDirection(level);
701 /* resolve the directionality for deferred runs */
702 clsRun = GetDeferredType(actionWeak[state][cls]);
703 if (clsRun != XX)
704 SetDeferredRun(pcls, cchRun, ich, clsRun);
707 /* RESOLVE NEUTRAL TYPES */
709 /* action values */
710 enum neutralactions
712 /* action to resolve previous input */
713 nL = L, /* resolve EN to L */
714 En = 3 << 4, /* resolve neutrals run to embedding level direction */
715 Rn = R << 4, /* resolve neutrals run to strong right */
716 Ln = L << 4, /* resolved neutrals run to strong left */
717 In = (1<<8), /* increment count of deferred neutrals */
718 LnL = (1<<4)+L, /* set run and EN to L */
721 static int GetDeferredNeutrals(int action, int level)
723 action = (action >> 4) & 0xF;
724 if (action == (En >> 4))
725 return EmbeddingDirection(level);
726 else
727 return action;
730 static int GetResolvedNeutrals(int action)
732 action = action & 0xF;
733 if (action == In)
734 return 0;
735 else
736 return action;
739 /* state values */
740 enum resolvestates
742 /* new temporary class */
743 r, /* R and characters resolved to R */
744 l, /* L and characters resolved to L */
745 rn, /* NI preceded by right */
746 ln, /* NI preceded by left */
747 a, /* AN preceded by left (the abbreviation 'la' is used up above) */
748 na, /* NI preceded by a */
752 /*------------------------------------------------------------------------
753 Notes:
755 By rule W7, whenever a EN is 'dominated' by an L (including start of
756 run with embedding direction = L) it is resolved to, and further treated
757 as L.
759 This leads to the need for 'a' and 'na' states.
760 ------------------------------------------------------------------------*/
762 static const int actionNeutrals[][5] =
764 /* NI, L, R, AN, EN = cls */
765 { In, 0, 0, 0, 0 }, /* r right */
766 { In, 0, 0, 0, L }, /* l left */
768 { In, En, Rn, Rn, Rn }, /* rn NI preceded by right */
769 { In, Ln, En, En, LnL}, /* ln NI preceded by left */
771 { In, 0, 0, 0, L }, /* a AN preceded by left */
772 { In, En, Rn, Rn, En }, /* na NI preceded by a */
775 static const int stateNeutrals[][5] =
777 /* NI, L, R, AN, EN */
778 { rn, l, r, r, r }, /* r right */
779 { ln, l, r, a, l }, /* l left */
781 { rn, l, r, r, r }, /* rn NI preceded by right */
782 { ln, l, r, a, l }, /* ln NI preceded by left */
784 { na, l, r, a, l }, /* a AN preceded by left */
785 { na, l, r, a, l }, /* na NI preceded by la */
788 /*------------------------------------------------------------------------
789 Function: resolveNeutrals
791 Resolves the directionality of neutral character types.
793 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
795 Input: Array of embedding levels
796 Character count
797 Baselevel
799 In/Out: Array of directional classes
801 Note: On input only these directional classes are expected
802 R, L, NI, AN, EN and BN
804 W8 resolves a number of ENs to L
805 ------------------------------------------------------------------------*/
806 static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int cch)
808 /* the state at the start of text depends on the base level */
809 int state = odd(baselevel) ? r : l;
810 int cls;
812 int cchRun = 0;
813 int level = baselevel;
815 int action, clsRun, clsNew;
816 int ich = 0;
817 for (; ich < cch; ich++)
819 /* ignore boundary neutrals */
820 if (pcls[ich] == BN)
822 /* include in the count for a deferred run */
823 if (cchRun)
824 cchRun++;
826 /* skip any further processing */
827 continue;
830 ASSERT(pcls[ich] < 5); /* "Only NI, L, R, AN, EN are allowed" */
831 cls = pcls[ich];
833 action = actionNeutrals[state][cls];
835 /* resolve the directionality for deferred runs */
836 clsRun = GetDeferredNeutrals(action, level);
837 if (clsRun != NI)
839 SetDeferredRun(pcls, cchRun, ich, clsRun);
840 cchRun = 0;
843 /* resolve the directionality class at the current location */
844 clsNew = GetResolvedNeutrals(action);
845 if (clsNew != NI)
846 pcls[ich] = clsNew;
848 if (In & action)
849 cchRun++;
851 state = stateNeutrals[state][cls];
852 level = plevel[ich];
855 /* resolve any deferred runs */
856 cls = EmbeddingDirection(level); /* eor has type of current level */
858 /* resolve the directionality for deferred runs */
859 clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
860 if (clsRun != NI)
861 SetDeferredRun(pcls, cchRun, ich, clsRun);
864 /* RESOLVE IMPLICIT */
866 /*------------------------------------------------------------------------
867 Function: resolveImplicit
869 Recursively resolves implicit embedding levels.
870 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
872 Input: Array of direction classes
873 Character count
874 Base level
876 In/Out: Array of embedding levels
878 Note: levels may exceed 15 on output.
879 Accepted subset of direction classes
880 R, L, AN, EN
881 ------------------------------------------------------------------------*/
882 static const WORD addLevel[][4] =
884 /* L, R, AN, EN */
885 /* even */ { 0, 1, 2, 2, },
886 /* odd */ { 1, 0, 1, 1, }
890 static void resolveImplicit(const WORD * pcls, WORD *plevel, int cch)
892 int ich = 0;
893 for (; ich < cch; ich++)
895 /* cannot resolve bn here, since some bn were resolved to strong
896 * types in resolveWeak. To remove these we need the original
897 * types, which are available again in resolveWhiteSpace */
898 if (pcls[ich] == BN)
900 continue;
902 ASSERT(pcls[ich] > 0); /* "No Neutrals allowed to survive here." */
903 ASSERT(pcls[ich] < 5); /* "Out of range." */
904 plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
908 /*************************************************************
909 * BIDI_DeterminLevels
911 BOOL BIDI_DetermineLevels(
912 LPCWSTR lpString, /* [in] The string for which information is to be returned */
913 INT uCount, /* [in] Number of WCHARs in string. */
914 const SCRIPT_STATE *s,
915 const SCRIPT_CONTROL *c,
916 WORD *lpOutLevels /* [out] final string levels */
919 WORD *chartype;
920 unsigned baselevel = 0;
921 INT j;
922 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
924 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
925 if (!chartype)
927 WARN("Out of memory\n");
928 return FALSE;
931 baselevel = s->uBidiLevel;
933 classify(lpString, chartype, uCount, c);
934 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
936 for (j = 0; j < uCount; ++j)
937 switch(chartype[j])
939 case B:
940 case S:
941 case WS:
942 case ON: chartype[j] = NI;
943 default: continue;
946 /* resolve explicit */
947 resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
948 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
950 /* resolve weak */
951 resolveWeak(baselevel, chartype, lpOutLevels, uCount);
953 /* resolve neutrals */
954 resolveNeutrals(baselevel, chartype, lpOutLevels, uCount);
956 /* resolveImplicit */
957 resolveImplicit(chartype, lpOutLevels, uCount);
959 HeapFree(GetProcessHeap(), 0, chartype);
960 return TRUE;
963 /* reverse cch indexes */
964 static void reverse(int *pidx, int cch)
966 int temp;
967 int ich = 0;
968 for (; ich < --cch; ich++)
970 temp = pidx[ich];
971 pidx[ich] = pidx[cch];
972 pidx[cch] = temp;
977 /*------------------------------------------------------------------------
978 Functions: reorder/reorderLevel
980 Recursively reorders the display string
981 "From the highest level down, reverse all characters at that level and
982 higher, down to the lowest odd level"
984 Implements rule L2 of the Unicode bidi Algorithm.
986 Input: Array of embedding levels
987 Character count
988 Flag enabling reversal (set to false by initial caller)
990 In/Out: Text to reorder
992 Note: levels may exceed 15 resp. 61 on input.
994 Rule L3 - reorder combining marks is not implemented here
995 Rule L4 - glyph mirroring is implemented as a display option below
997 Note: this should be applied a line at a time
998 -------------------------------------------------------------------------*/
999 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1001 int ich = 0;
1003 /* true as soon as first odd level encountered */
1004 fReverse = fReverse || odd(level);
1006 for (; ich < cch; ich++)
1008 if (plevel[ich] < level)
1010 break;
1012 else if (plevel[ich] > level)
1014 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1015 cch - ich, fReverse) - 1;
1018 if (fReverse)
1020 reverse(pIndexs, ich);
1022 return ich;
1025 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1026 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1028 int ich = 0;
1029 int newlevel = -1;
1031 /* true as soon as first odd level encountered */
1032 fReverse = fReverse || odd(level);
1034 for (; ich < cch; ich++)
1036 if (plevel[ich] < level)
1037 break;
1038 else if (plevel[ich] > level)
1039 newlevel = ich;
1041 if (fReverse)
1043 reverse(pIndexs, ich);
1046 if (newlevel >= 0)
1048 ich = 0;
1049 for (; ich < cch; ich++)
1050 if (plevel[ich] < level)
1051 break;
1052 else if (plevel[ich] > level)
1053 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1054 cch - ich, fReverse) - 1;
1057 return ich;
1060 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1061 WORD* lpStrength)
1063 int i;
1064 classify(lpString, lpStrength, uCount, c);
1066 for ( i = 0; i < uCount; i++)
1068 switch(lpStrength[i])
1070 case L:
1071 case LRE:
1072 case LRO:
1073 case R:
1074 case AL:
1075 case RLE:
1076 case RLO:
1077 lpStrength[i] = BIDI_STRONG;
1078 break;
1079 case PDF:
1080 case EN:
1081 case ES:
1082 case ET:
1083 case AN:
1084 case CS:
1085 case BN:
1086 lpStrength[i] = BIDI_WEAK;
1087 break;
1088 case B:
1089 case S:
1090 case WS:
1091 case ON:
1092 default: /* Neutrals and NSM */
1093 lpStrength[i] = BIDI_NEUTRAL;
1096 return TRUE;