ups10: Reimplement ScriptLayout to properly handle mixed runs.
[wine.git] / dlls / usp10 / bidi.c
blob2763453276c2f8750357ed9e19f8872bba0dc945
1 /*
2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include "config.h"
46 #include <stdarg.h>
47 #include "windef.h"
48 #include "winbase.h"
49 #include "wingdi.h"
50 #include "winnls.h"
51 #include "usp10.h"
52 #include "wine/debug.h"
54 #include "usp10_internal.h"
56 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
58 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
59 #define MAX_LEVEL 61
61 /* HELPER FUNCTIONS AND DECLARATIONS */
63 /*------------------------------------------------------------------------
64 Bidirectional Character Types
66 as defined by the Unicode Bidirectional Algorithm Table 3-7.
68 Note:
70 The list of bidirectional character types here is not grouped the
71 same way as the table 3-7, since the numberic values for the types
72 are chosen to keep the state and action tables compact.
73 ------------------------------------------------------------------------*/
74 enum directions
76 /* input types */
77 /* ON MUST be zero, code relies on ON = N = 0 */
78 ON = 0, /* Other Neutral */
79 L, /* Left Letter */
80 R, /* Right Letter */
81 AN, /* Arabic Number */
82 EN, /* European Number */
83 AL, /* Arabic Letter (Right-to-left) */
84 NSM, /* Non-spacing Mark */
85 CS, /* Common Separator */
86 ES, /* European Separator */
87 ET, /* European Terminator (post/prefix e.g. $ and %) */
89 /* resolved types */
90 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
92 /* input types, */
93 S, /* Segment Separator (TAB) // used only in L1 */
94 WS, /* White space // used only in L1 */
95 B, /* Paragraph Separator (aka as PS) */
97 /* types for explicit controls */
98 RLO, /* these are used only in X1-X9 */
99 RLE,
100 LRO,
101 LRE,
102 PDF,
104 /* resolved types, also resolved directions */
105 N = ON, /* alias, where ON, WS and S are treated the same */
108 /* HELPER FUNCTIONS */
110 /* grep -r ';BN;' data.txt | grep -v [0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F] | sed -e s@\;.*@@ -e s/^..../0x\&,\ / | xargs echo */
111 static const WCHAR BNs[] = {
112 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
113 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016,
114 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x007F, 0x0080, 0x0081, 0x0082,
115 0x0083, 0x0084, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C,
116 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
117 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E,
118 0x009F, 0x00AD, 0x070F, 0x200B, 0x200C, 0x200D, 0x2060, 0x2061, 0x2062,
119 0x2063, 0x206A, 0x206B, 0x206C, 0x206D, 0x206E, 0x206F, 0xFEFF
122 /* Idem, but with ';R;' instead of ';BN;' */
123 static const WCHAR Rs[] = {
124 0x05BE, 0x05C0, 0x05C3, 0x05C6, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4,
125 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD,
126 0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6,
127 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4,
128 0x07C0, 0x07C1, 0x07C2, 0x07C3, 0x07C4, 0x07C5, 0x07C6, 0x07C7, 0x07C8,
129 0x07C9, 0x07CA, 0x07CB, 0x07CC, 0x07CD, 0x07CE, 0x07CF, 0x07D0, 0x07D1,
130 0x07D2, 0x07D3, 0x07D4, 0x07D5, 0x07D6, 0x07D7, 0x07D8, 0x07D9, 0x07DA,
131 0x07DB, 0x07DC, 0x07DD, 0x07DE, 0x07DF, 0x07E0, 0x07E1, 0x07E2, 0x07E3,
132 0x07E4, 0x07E5, 0x07E6, 0x07E7, 0x07E8, 0x07E9, 0x07EA, 0x07F4, 0x07F5,
133 0x07FA, 0x200F, 0xFB1D, 0xFB1F, 0xFB20, 0xFB21, 0xFB22, 0xFB23, 0xFB24,
134 0xFB25, 0xFB26, 0xFB27, 0xFB28, 0xFB2A, 0xFB2B, 0xFB2C, 0xFB2D, 0xFB2E,
135 0xFB2F, 0xFB30, 0xFB31, 0xFB32, 0xFB33, 0xFB34, 0xFB35, 0xFB36, 0xFB38,
136 0xFB39, 0xFB3A, 0xFB3B, 0xFB3C, 0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44,
137 0xFB46, 0xFB47, 0xFB48, 0xFB49, 0xFB4A, 0xFB4B, 0xFB4C, 0xFB4D, 0xFB4E,
138 0xFB4F
141 /* Convert the incomplete win32 table to some slightly more useful data */
142 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
144 unsigned i, j;
145 GetStringTypeW(CT_CTYPE2, lpString, uCount, chartype);
146 for (i = 0; i < uCount; ++i)
147 switch (chartype[i])
149 case C2_LEFTTORIGHT: chartype[i] = L; break;
150 case C2_RIGHTTOLEFT:
151 chartype[i] = AL;
152 for (j = 0; j < sizeof(Rs)/sizeof(WCHAR); ++j)
153 if (Rs[j] == lpString[i])
155 chartype[i] = R;
156 break;
158 break;
160 case C2_EUROPENUMBER: chartype[i] = EN; break;
161 case C2_EUROPESEPARATOR:
162 if (c->fLegacyBidiClass && (lpString[i] == '-' || lpString[i] =='+'))
163 chartype[i] = N;
164 else if (c->fLegacyBidiClass && lpString[i] == '/')
165 chartype[i] = CS;
166 else
167 chartype[i] = ES; break;
168 case C2_EUROPETERMINATOR: chartype[i] = ET; break;
169 case C2_ARABICNUMBER: chartype[i] = AN; break;
170 case C2_COMMONSEPARATOR: chartype[i] = CS; break;
171 case C2_BLOCKSEPARATOR: chartype[i] = B; break;
172 case C2_SEGMENTSEPARATOR: chartype[i] = S; break;
173 case C2_WHITESPACE: chartype[i] = WS; break;
174 case C2_OTHERNEUTRAL:
175 switch (lpString[i])
177 case 0x202A: chartype[i] = LRE; break;
178 case 0x202B: chartype[i] = RLE; break;
179 case 0x202C: chartype[i] = PDF; break;
180 case 0x202D: chartype[i] = LRO; break;
181 case 0x202E: chartype[i] = RLO; break;
182 default: chartype[i] = ON; break;
184 break;
185 case C2_NOTAPPLICABLE:
186 chartype[i] = NSM;
187 for (j = 0; j < sizeof(BNs)/sizeof(WCHAR); ++j)
188 if (BNs[j] == lpString[i])
190 chartype[i] = BN;
191 break;
193 break;
195 default:
196 /* According to BiDi spec, unassigned characters default to L */
197 FIXME("Unhandled character type: %04x\n", chartype[i]);
198 chartype[i] = L;
199 break;
203 /* Set a run of cval values at locations all prior to, but not including */
204 /* iStart, to the new value nval. */
205 static void SetDeferredRun(WORD *pval, int cval, int iStart, int nval)
207 int i = iStart - 1;
208 for (; i >= iStart - cval; i--)
210 pval[i] = nval;
214 /* RESOLVE EXPLICIT */
216 static WORD GreaterEven(int i)
218 return odd(i) ? i + 1 : i + 2;
221 static WORD GreaterOdd(int i)
223 return odd(i) ? i + 2 : i + 1;
226 static WORD EmbeddingDirection(int level)
228 return odd(level) ? R : L;
231 /*------------------------------------------------------------------------
232 Function: resolveExplicit
234 Recursively resolves explicit embedding levels and overrides.
235 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
237 Input: Base embedding level and direction
238 Character count
240 Output: Array of embedding levels
242 In/Out: Array of direction classes
245 Note: The function uses two simple counters to keep track of
246 matching explicit codes and PDF. Use the default argument for
247 the outermost call. The nesting counter counts the recursion
248 depth and not the embedding level.
249 ------------------------------------------------------------------------*/
251 static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest)
253 /* always called with a valid nesting level
254 nesting levels are != embedding levels */
255 int nLastValid = nNest;
256 int ich = 0;
258 /* check input values */
259 ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
261 /* process the text */
262 for (; ich < cch; ich++)
264 WORD cls = pcls[ich];
265 switch (cls)
267 case LRO:
268 case LRE:
269 nNest++;
270 if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0))
272 plevel[ich] = GreaterEven(level);
273 pcls[ich] = BN;
274 ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
275 &pcls[ich+1], &plevel[ich+1],
276 cch - (ich+1), nNest);
277 nNest--;
278 continue;
280 cls = pcls[ich] = BN;
281 break;
283 case RLO:
284 case RLE:
285 nNest++;
286 if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0))
288 plevel[ich] = GreaterOdd(level);
289 pcls[ich] = BN;
290 ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
291 &pcls[ich+1], &plevel[ich+1],
292 cch - (ich+1), nNest);
293 nNest--;
294 continue;
296 cls = pcls[ich] = BN;
297 break;
299 case PDF:
300 cls = pcls[ich] = BN;
301 if (nNest)
303 if (nLastValid < nNest)
305 nNest--;
307 else
309 cch = ich; /* break the loop, but complete body */
314 /* Apply the override */
315 if (dir != N)
317 cls = dir;
319 plevel[ich] = level;
320 if (pcls[ich] != BN)
321 pcls[ich] = cls;
324 return ich;
327 /* RESOLVE WEAK TYPES */
329 enum states /* possible states */
331 xa, /* arabic letter */
332 xr, /* right letter */
333 xl, /* left letter */
335 ao, /* arabic lett. foll by ON */
336 ro, /* right lett. foll by ON */
337 lo, /* left lett. foll by ON */
339 rt, /* ET following R */
340 lt, /* ET following L */
342 cn, /* EN, AN following AL */
343 ra, /* arabic number foll R */
344 re, /* european number foll R */
345 la, /* arabic number foll L */
346 le, /* european number foll L */
348 ac, /* CS following cn */
349 rc, /* CS following ra */
350 rs, /* CS,ES following re */
351 lc, /* CS following la */
352 ls, /* CS,ES following le */
354 ret, /* ET following re */
355 let, /* ET following le */
358 static const int stateWeak[][10] =
360 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
361 /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* arabic letter */
362 /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
363 /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
365 /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* arabic lett. foll by ON*/
366 /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
367 /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */
369 /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */
370 /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */
372 /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */
373 /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* arabic number foll R */
374 /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* european number foll R */
375 /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* arabic number foll L */
376 /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* european number foll L */
378 /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */
379 /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */
380 /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */
381 /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */
382 /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */
384 /*ret*/{ ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */
385 /*let*/{ lo, xl, xr, la, le, xa,let, lo, lo,let }, /* ET following le */
388 enum actions /* possible actions */
390 /* primitives */
391 IX = 0x100, /* increment */
392 XX = 0xF, /* no-op */
394 /* actions */
395 xxx = (XX << 4) + XX, /* no-op */
396 xIx = IX + xxx, /* increment run */
397 xxN = (XX << 4) + ON, /* set current to N */
398 xxE = (XX << 4) + EN, /* set current to EN */
399 xxA = (XX << 4) + AN, /* set current to AN */
400 xxR = (XX << 4) + R, /* set current to R */
401 xxL = (XX << 4) + L, /* set current to L */
402 Nxx = (ON << 4) + 0xF, /* set run to neutral */
403 Axx = (AN << 4) + 0xF, /* set run to AN */
404 ExE = (EN << 4) + EN, /* set run to EN, set current to EN */
405 NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */
406 NxN = (ON << 4) + ON, /* set run to N, set current to N */
407 NxR = (ON << 4) + R, /* set run to N, set current to R */
408 NxE = (ON << 4) + EN, /* set run to N, set current to EN */
410 AxA = (AN << 4) + AN, /* set run to AN, set current to AN */
411 NxL = (ON << 4) + L, /* set run to N, set current to L */
412 LxL = (L << 4) + L, /* set run to L, set current to L */
415 static const int actionWeak[][10] =
417 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
418 /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* arabic letter */
419 /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */
420 /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
422 /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* arabic lett. foll by ON */
423 /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */
424 /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */
426 /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */
427 /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */
429 /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */
430 /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll R */
431 /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* european number foll R */
432 /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll L */
433 /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* european number foll L */
435 /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */
436 /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */
437 /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */
438 /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */
439 /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */
441 /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */
442 /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }, /* ET following le */
445 static int GetDeferredType(int action)
447 return (action >> 4) & 0xF;
450 static int GetResolvedType(int action)
452 return action & 0xF;
455 /* Note on action table:
457 States can be of two kinds:
458 - Immediate Resolution State, where each input token
459 is resolved as soon as it is seen. These states have
460 only single action codes (xxN) or the no-op (xxx)
461 for static input tokens.
462 - Deferred Resolution State, where input tokens either
463 either extend the run (xIx) or resolve its Type (e.g. Nxx).
465 Input classes are of three kinds
466 - Static Input Token, where the class of the token remains
467 unchanged on output (AN, L, N, R)
468 - Replaced Input Token, where the class of the token is
469 always replaced on output (AL, BN, NSM, CS, ES, ET)
470 - Conditional Input Token, where the class of the token is
471 changed on output in some, but not all, cases (EN)
473 Where tokens are subject to change, a double action
474 (e.g. NxA, or NxN) is _required_ after deferred states,
475 resolving both the deferred state and changing the current token.
478 /*------------------------------------------------------------------------
479 Function: resolveWeak
481 Resolves the directionality of numeric and other weak character types
483 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
485 Input: Array of embedding levels
486 Character count
488 In/Out: Array of directional classes
490 Note: On input only these directional classes are expected
491 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
492 ------------------------------------------------------------------------*/
493 static void resolveWeak(int baselevel, WORD *pcls, WORD *plevel, int cch)
495 int state = odd(baselevel) ? xr : xl;
496 int cls;
498 int level = baselevel;
499 int action, clsRun, clsNew;
500 int cchRun = 0;
501 int ich = 0;
503 for (; ich < cch; ich++)
505 /* ignore boundary neutrals */
506 if (pcls[ich] == BN)
508 /* must flatten levels unless at a level change; */
509 plevel[ich] = level;
511 /* lookahead for level changes */
512 if (ich + 1 == cch && level != baselevel)
514 /* have to fixup last BN before end of the loop, since
515 * its fix-upped value will be needed below the assert */
516 pcls[ich] = EmbeddingDirection(level);
518 else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN)
520 /* fixup LAST BN in front / after a level run to make
521 * it act like the SOR/EOR in rule X10 */
522 int newlevel = plevel[ich+1];
523 if (level > newlevel) {
524 newlevel = level;
526 plevel[ich] = newlevel;
528 /* must match assigned level */
529 pcls[ich] = EmbeddingDirection(newlevel);
530 level = plevel[ich+1];
532 else
534 /* don't interrupt runs */
535 if (cchRun)
537 cchRun++;
539 continue;
543 ASSERT(pcls[ich] <= BN);
544 cls = pcls[ich];
546 action = actionWeak[state][cls];
548 /* resolve the directionality for deferred runs */
549 clsRun = GetDeferredType(action);
550 if (clsRun != XX)
552 SetDeferredRun(pcls, cchRun, ich, clsRun);
553 cchRun = 0;
556 /* resolve the directionality class at the current location */
557 clsNew = GetResolvedType(action);
558 if (clsNew != XX)
559 pcls[ich] = clsNew;
561 /* increment a deferred run */
562 if (IX & action)
563 cchRun++;
565 state = stateWeak[state][cls];
568 /* resolve any deferred runs
569 * use the direction of the current level to emulate PDF */
570 cls = EmbeddingDirection(level);
572 /* resolve the directionality for deferred runs */
573 clsRun = GetDeferredType(actionWeak[state][cls]);
574 if (clsRun != XX)
575 SetDeferredRun(pcls, cchRun, ich, clsRun);
578 /* RESOLVE NEUTRAL TYPES */
580 /* action values */
581 enum neutralactions
583 /* action to resolve previous input */
584 nL = L, /* resolve EN to L */
585 En = 3 << 4, /* resolve neutrals run to embedding level direction */
586 Rn = R << 4, /* resolve neutrals run to strong right */
587 Ln = L << 4, /* resolved neutrals run to strong left */
588 In = (1<<8), /* increment count of deferred neutrals */
589 LnL = (1<<4)+L, /* set run and EN to L */
592 static int GetDeferredNeutrals(int action, int level)
594 action = (action >> 4) & 0xF;
595 if (action == (En >> 4))
596 return EmbeddingDirection(level);
597 else
598 return action;
601 static int GetResolvedNeutrals(int action)
603 action = action & 0xF;
604 if (action == In)
605 return 0;
606 else
607 return action;
610 /* state values */
611 enum resolvestates
613 /* new temporary class */
614 r, /* R and characters resolved to R */
615 l, /* L and characters resolved to L */
616 rn, /* N preceded by right */
617 ln, /* N preceded by left */
618 a, /* AN preceded by left (the abbreviation 'la' is used up above) */
619 na, /* N preceded by a */
623 /*------------------------------------------------------------------------
624 Notes:
626 By rule W7, whenever a EN is 'dominated' by an L (including start of
627 run with embedding direction = L) it is resolved to, and further treated
628 as L.
630 This leads to the need for 'a' and 'na' states.
631 ------------------------------------------------------------------------*/
633 static const int actionNeutrals[][5] =
635 /* N, L, R, AN, EN = cls */
636 { In, 0, 0, 0, 0 }, /* r right */
637 { In, 0, 0, 0, L }, /* l left */
639 { In, En, Rn, Rn, Rn }, /* rn N preceded by right */
640 { In, Ln, En, En, LnL}, /* ln N preceded by left */
642 { In, 0, 0, 0, L }, /* a AN preceded by left */
643 { In, En, Rn, Rn, En }, /* na N preceded by a */
646 static const int stateNeutrals[][5] =
648 /* N, L, R, AN, EN */
649 { rn, l, r, r, r }, /* r right */
650 { ln, l, r, a, l }, /* l left */
652 { rn, l, r, r, r }, /* rn N preceded by right */
653 { ln, l, r, a, l }, /* ln N preceded by left */
655 { na, l, r, a, l }, /* a AN preceded by left */
656 { na, l, r, a, l }, /* na N preceded by la */
659 /*------------------------------------------------------------------------
660 Function: resolveNeutrals
662 Resolves the directionality of neutral character types.
664 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
666 Input: Array of embedding levels
667 Character count
668 Baselevel
670 In/Out: Array of directional classes
672 Note: On input only these directional classes are expected
673 R, L, N, AN, EN and BN
675 W8 resolves a number of ENs to L
676 ------------------------------------------------------------------------*/
677 static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int cch)
679 /* the state at the start of text depends on the base level */
680 int state = odd(baselevel) ? r : l;
681 int cls;
683 int cchRun = 0;
684 int level = baselevel;
686 int action, clsRun, clsNew;
687 int ich = 0;
688 for (; ich < cch; ich++)
690 /* ignore boundary neutrals */
691 if (pcls[ich] == BN)
693 /* include in the count for a deferred run */
694 if (cchRun)
695 cchRun++;
697 /* skip any further processing */
698 continue;
701 ASSERT(pcls[ich] < 5); /* "Only N, L, R, AN, EN are allowed" */
702 cls = pcls[ich];
704 action = actionNeutrals[state][cls];
706 /* resolve the directionality for deferred runs */
707 clsRun = GetDeferredNeutrals(action, level);
708 if (clsRun != N)
710 SetDeferredRun(pcls, cchRun, ich, clsRun);
711 cchRun = 0;
714 /* resolve the directionality class at the current location */
715 clsNew = GetResolvedNeutrals(action);
716 if (clsNew != N)
717 pcls[ich] = clsNew;
719 if (In & action)
720 cchRun++;
722 state = stateNeutrals[state][cls];
723 level = plevel[ich];
726 /* resolve any deferred runs */
727 cls = EmbeddingDirection(level); /* eor has type of current level */
729 /* resolve the directionality for deferred runs */
730 clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
731 if (clsRun != N)
732 SetDeferredRun(pcls, cchRun, ich, clsRun);
735 /* RESOLVE IMPLICIT */
737 /*------------------------------------------------------------------------
738 Function: resolveImplicit
740 Recursively resolves implicit embedding levels.
741 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
743 Input: Array of direction classes
744 Character count
745 Base level
747 In/Out: Array of embedding levels
749 Note: levels may exceed 15 on output.
750 Accepted subset of direction classes
751 R, L, AN, EN
752 ------------------------------------------------------------------------*/
753 static const WORD addLevel[][4] =
755 /* L, R, AN, EN */
756 /* even */ { 0, 1, 2, 2, },
757 /* odd */ { 1, 0, 1, 1, }
761 static void resolveImplicit(const WORD * pcls, WORD *plevel, int cch)
763 int ich = 0;
764 for (; ich < cch; ich++)
766 /* cannot resolve bn here, since some bn were resolved to strong
767 * types in resolveWeak. To remove these we need the original
768 * types, which are available again in resolveWhiteSpace */
769 if (pcls[ich] == BN)
771 continue;
773 ASSERT(pcls[ich] > 0); /* "No Neutrals allowed to survive here." */
774 ASSERT(pcls[ich] < 5); /* "Out of range." */
775 plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
779 /*************************************************************
780 * BIDI_DeterminLevels
782 BOOL BIDI_DetermineLevels(
783 LPCWSTR lpString, /* [in] The string for which information is to be returned */
784 INT uCount, /* [in] Number of WCHARs in string. */
785 const SCRIPT_STATE *s,
786 const SCRIPT_CONTROL *c,
787 WORD *lpOutLevels /* [out] final string levels */
790 WORD *chartype;
791 unsigned baselevel = 0,j;
792 TRACE("%s, %d", debugstr_wn(lpString, uCount), uCount);
794 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
795 if (!chartype)
797 WARN("Out of memory\n");
798 return FALSE;
801 baselevel = s->uBidiLevel;
803 classify(lpString, chartype, uCount, c);
805 for (j = 0; j < uCount; ++j)
806 switch(chartype[j])
808 case B:
809 case S:
810 case WS:
811 case ON: chartype[j] = N;
812 default: continue;
815 /* resolve explicit */
816 resolveExplicit(baselevel, N, chartype, lpOutLevels, uCount, 0);
818 /* resolve weak */
819 resolveWeak(baselevel, chartype, lpOutLevels, uCount);
821 /* resolve neutrals */
822 resolveNeutrals(baselevel, chartype, lpOutLevels, uCount);
824 /* resolveImplicit */
825 resolveImplicit(chartype, lpOutLevels, uCount);
827 HeapFree(GetProcessHeap(), 0, chartype);
828 return TRUE;
831 /* reverse cch indexes */
832 static void reverse(int *pidx, int cch)
834 int temp;
835 int ich = 0;
836 for (; ich < --cch; ich++)
838 temp = pidx[ich];
839 pidx[ich] = pidx[cch];
840 pidx[cch] = temp;
845 /*------------------------------------------------------------------------
846 Functions: reorder/reorderLevel
848 Recursively reorders the display string
849 "From the highest level down, reverse all characters at that level and
850 higher, down to the lowest odd level"
852 Implements rule L2 of the Unicode bidi Algorithm.
854 Input: Array of embedding levels
855 Character count
856 Flag enabling reversal (set to false by initial caller)
858 In/Out: Text to reorder
860 Note: levels may exceed 15 resp. 61 on input.
862 Rule L3 - reorder combining marks is not implemented here
863 Rule L4 - glyph mirroring is implemented as a display option below
865 Note: this should be applied a line at a time
866 -------------------------------------------------------------------------*/
867 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
869 int ich = 0;
871 /* true as soon as first odd level encountered */
872 fReverse = fReverse || odd(level);
874 for (; ich < cch; ich++)
876 if (plevel[ich] < level)
878 break;
880 else if (plevel[ich] > level)
882 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
883 cch - ich, fReverse) - 1;
886 if (fReverse)
888 reverse(pIndexs, ich);
890 return ich;
893 /* Applies the reorder in reverse. Taking an already reordered string and returing the original */
894 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
896 int ich = 0;
897 int newlevel = -1;
899 /* true as soon as first odd level encountered */
900 fReverse = fReverse || odd(level);
902 for (; ich < cch; ich++)
904 if (plevel[ich] < level)
905 break;
906 else if (plevel[ich] > level)
907 newlevel = ich;
909 if (fReverse)
911 reverse(pIndexs, ich);
914 if (newlevel > 1)
916 ich = 0;
917 for (; ich < cch; ich++)
918 if (plevel[ich] > level)
919 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
920 cch - ich, fReverse) - 1;
923 return ich;