wined3d: Recognize the SM4 lt opcode.
[wine/multimedia.git] / dlls / gdi32 / bidi.c
blob7d7f6bc5c4434a644387ac9897d89792023e71a6
1 /*
2 * GDI BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 * Code derived from the modified reference implementation
22 * that was found in revision 17 of http://unicode.org/reports/tr9/
23 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 * Permission is hereby granted, free of charge, to any person obtaining a
28 * copy of the Unicode data files and any associated documentation (the
29 * "Data Files") or Unicode software and any associated documentation (the
30 * "Software") to deal in the Data Files or Software without restriction,
31 * including without limitation the rights to use, copy, modify, merge,
32 * publish, distribute, and/or sell copies of the Data Files or Software,
33 * and to permit persons to whom the Data Files or Software are furnished
34 * to do so, provided that (a) the above copyright notice(s) and this
35 * permission notice appear with all copies of the Data Files or Software,
36 * (b) both the above copyright notice(s) and this permission notice appear
37 * in associated documentation, and (c) there is clear notice in each
38 * modified Data File or in the Software as well as in the documentation
39 * associated with the Data File(s) or Software that the data or software
40 * has been modified.
43 #include "config.h"
45 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wingdi.h"
49 #include "winnls.h"
50 #include "wine/debug.h"
51 #include "gdi_private.h"
53 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
55 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
56 #define MAX_LEVEL 61
58 /* HELPER FUNCTIONS AND DECLARATIONS */
60 #define odd(x) ((x) & 1)
62 /*------------------------------------------------------------------------
63 Bidirectional Character Types
65 as defined by the Unicode Bidirectional Algorithm Table 3-7.
67 Note:
69 The list of bidirectional character types here is not grouped the
70 same way as the table 3-7, since the numberic values for the types
71 are chosen to keep the state and action tables compact.
72 ------------------------------------------------------------------------*/
73 enum directions
75 /* input types */
76 /* ON MUST be zero, code relies on ON = N = 0 */
77 ON = 0, /* Other Neutral */
78 L, /* Left Letter */
79 R, /* Right Letter */
80 AN, /* Arabic Number */
81 EN, /* European Number */
82 AL, /* Arabic Letter (Right-to-left) */
83 NSM, /* Non-spacing Mark */
84 CS, /* Common Separator */
85 ES, /* European Separator */
86 ET, /* European Terminator (post/prefix e.g. $ and %) */
88 /* resolved types */
89 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
91 /* input types, */
92 S, /* Segment Separator (TAB) // used only in L1 */
93 WS, /* White space // used only in L1 */
94 B, /* Paragraph Separator (aka as PS) */
96 /* types for explicit controls */
97 RLO, /* these are used only in X1-X9 */
98 RLE,
99 LRO,
100 LRE,
101 PDF,
103 /* resolved types, also resolved directions */
104 N = ON, /* alias, where ON, WS and S are treated the same */
107 /* HELPER FUNCTIONS */
109 /* grep -r ';BN;' data.txt | grep -v [0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F] | sed -e s@\;.*@@ -e s/^..../0x\&,\ / | xargs echo */
110 static const WCHAR BNs[] = {
111 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
112 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016,
113 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x007F, 0x0080, 0x0081, 0x0082,
114 0x0083, 0x0084, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C,
115 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
116 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E,
117 0x009F, 0x00AD, 0x070F, 0x200B, 0x200C, 0x200D, 0x2060, 0x2061, 0x2062,
118 0x2063, 0x206A, 0x206B, 0x206C, 0x206D, 0x206E, 0x206F, 0xFEFF
121 /* Idem, but with ';R;' instead of ';BN;' */
122 static const WCHAR Rs[] = {
123 0x05BE, 0x05C0, 0x05C3, 0x05C6, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4,
124 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD,
125 0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6,
126 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4,
127 0x07C0, 0x07C1, 0x07C2, 0x07C3, 0x07C4, 0x07C5, 0x07C6, 0x07C7, 0x07C8,
128 0x07C9, 0x07CA, 0x07CB, 0x07CC, 0x07CD, 0x07CE, 0x07CF, 0x07D0, 0x07D1,
129 0x07D2, 0x07D3, 0x07D4, 0x07D5, 0x07D6, 0x07D7, 0x07D8, 0x07D9, 0x07DA,
130 0x07DB, 0x07DC, 0x07DD, 0x07DE, 0x07DF, 0x07E0, 0x07E1, 0x07E2, 0x07E3,
131 0x07E4, 0x07E5, 0x07E6, 0x07E7, 0x07E8, 0x07E9, 0x07EA, 0x07F4, 0x07F5,
132 0x07FA, 0x200F, 0xFB1D, 0xFB1F, 0xFB20, 0xFB21, 0xFB22, 0xFB23, 0xFB24,
133 0xFB25, 0xFB26, 0xFB27, 0xFB28, 0xFB2A, 0xFB2B, 0xFB2C, 0xFB2D, 0xFB2E,
134 0xFB2F, 0xFB30, 0xFB31, 0xFB32, 0xFB33, 0xFB34, 0xFB35, 0xFB36, 0xFB38,
135 0xFB39, 0xFB3A, 0xFB3B, 0xFB3C, 0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44,
136 0xFB46, 0xFB47, 0xFB48, 0xFB49, 0xFB4A, 0xFB4B, 0xFB4C, 0xFB4D, 0xFB4E,
137 0xFB4F
140 /* Convert the incomplete win32 table to some slightly more useful data */
141 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount)
143 unsigned i, j;
144 GetStringTypeW(CT_CTYPE2, lpString, uCount, chartype);
145 for (i = 0; i < uCount; ++i)
146 switch (chartype[i])
148 case C2_LEFTTORIGHT: chartype[i] = L; break;
149 case C2_RIGHTTOLEFT:
150 chartype[i] = AL;
151 for (j = 0; j < sizeof(Rs)/sizeof(WCHAR); ++j)
152 if (Rs[j] == lpString[i])
154 chartype[i] = R;
155 break;
157 break;
159 case C2_EUROPENUMBER: chartype[i] = EN; break;
160 case C2_EUROPESEPARATOR: chartype[i] = ES; break;
161 case C2_EUROPETERMINATOR: chartype[i] = ET; break;
162 case C2_ARABICNUMBER: chartype[i] = AN; break;
163 case C2_COMMONSEPARATOR: chartype[i] = CS; break;
164 case C2_BLOCKSEPARATOR: chartype[i] = B; break;
165 case C2_SEGMENTSEPARATOR: chartype[i] = S; break;
166 case C2_WHITESPACE: chartype[i] = WS; break;
167 case C2_OTHERNEUTRAL:
168 switch (lpString[i])
170 case 0x202A: chartype[i] = LRE; break;
171 case 0x202B: chartype[i] = RLE; break;
172 case 0x202C: chartype[i] = PDF; break;
173 case 0x202D: chartype[i] = LRO; break;
174 case 0x202E: chartype[i] = RLO; break;
175 default: chartype[i] = ON; break;
177 break;
178 case C2_NOTAPPLICABLE:
179 chartype[i] = NSM;
180 for (j = 0; j < sizeof(BNs)/sizeof(WCHAR); ++j)
181 if (BNs[j] == lpString[i])
183 chartype[i] = BN;
184 break;
186 break;
188 default:
189 /* According to BiDi spec, unassigned characters default to L */
190 FIXME("Unhandled character type: %04x\n", chartype[i]);
191 chartype[i] = L;
192 break;
196 /* reverse cch characters */
197 static void reverse(LPWSTR psz, int cch)
199 WCHAR chTemp;
200 int ich = 0;
201 for (; ich < --cch; ich++)
203 chTemp = psz[ich];
204 psz[ich] = psz[cch];
205 psz[cch] = chTemp;
209 /* Set a run of cval values at locations all prior to, but not including */
210 /* iStart, to the new value nval. */
211 static void SetDeferredRun(WORD *pval, int cval, int iStart, int nval)
213 int i = iStart - 1;
214 for (; i >= iStart - cval; i--)
216 pval[i] = nval;
220 /* THE PARAGRAPH LEVEL */
222 /*------------------------------------------------------------------------
223 Function: resolveParagraphs
225 Resolves the input strings into blocks over which the algorithm
226 is then applied.
228 Implements Rule P1 of the Unicode Bidi Algorithm
230 Input: Text string
231 Character count
233 Output: revised character count
235 Note: This is a very simplistic function. In effect it restricts
236 the action of the algorithm to the first paragraph in the input
237 where a paragraph ends at the end of the first block separator
238 or at the end of the input text.
240 ------------------------------------------------------------------------*/
242 static int resolveParagraphs(WORD *types, int cch)
244 /* skip characters not of type B */
245 int ich = 0;
246 for(; ich < cch && types[ich] != B; ich++);
247 /* stop after first B, make it a BN for use in the next steps */
248 if (ich < cch && types[ich] == B)
249 types[ich++] = BN;
250 return ich;
253 /* RESOLVE EXPLICIT */
255 static WORD GreaterEven(int i)
257 return odd(i) ? i + 1 : i + 2;
260 static WORD GreaterOdd(int i)
262 return odd(i) ? i + 2 : i + 1;
265 static WORD EmbeddingDirection(int level)
267 return odd(level) ? R : L;
270 /*------------------------------------------------------------------------
271 Function: resolveExplicit
273 Recursively resolves explicit embedding levels and overrides.
274 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
276 Input: Base embedding level and direction
277 Character count
279 Output: Array of embedding levels
281 In/Out: Array of direction classes
284 Note: The function uses two simple counters to keep track of
285 matching explicit codes and PDF. Use the default argument for
286 the outermost call. The nesting counter counts the recursion
287 depth and not the embedding level.
288 ------------------------------------------------------------------------*/
290 static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest)
292 /* always called with a valid nesting level
293 nesting levels are != embedding levels */
294 int nLastValid = nNest;
295 int ich = 0;
297 /* check input values */
298 ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
300 /* process the text */
301 for (; ich < cch; ich++)
303 WORD cls = pcls[ich];
304 switch (cls)
306 case LRO:
307 case LRE:
308 nNest++;
309 if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0))
311 plevel[ich] = GreaterEven(level);
312 pcls[ich] = BN;
313 ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
314 &pcls[ich+1], &plevel[ich+1],
315 cch - (ich+1), nNest);
316 nNest--;
317 continue;
319 cls = pcls[ich] = BN;
320 break;
322 case RLO:
323 case RLE:
324 nNest++;
325 if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0))
327 plevel[ich] = GreaterOdd(level);
328 pcls[ich] = BN;
329 ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
330 &pcls[ich+1], &plevel[ich+1],
331 cch - (ich+1), nNest);
332 nNest--;
333 continue;
335 cls = pcls[ich] = BN;
336 break;
338 case PDF:
339 cls = pcls[ich] = BN;
340 if (nNest)
342 if (nLastValid < nNest)
344 nNest--;
346 else
348 cch = ich; /* break the loop, but complete body */
353 /* Apply the override */
354 if (dir != N)
356 cls = dir;
358 plevel[ich] = level;
359 if (pcls[ich] != BN)
360 pcls[ich] = cls;
363 return ich;
366 /* RESOLVE WEAK TYPES */
368 enum states /* possible states */
370 xa, /* arabic letter */
371 xr, /* right letter */
372 xl, /* left letter */
374 ao, /* arabic lett. foll by ON */
375 ro, /* right lett. foll by ON */
376 lo, /* left lett. foll by ON */
378 rt, /* ET following R */
379 lt, /* ET following L */
381 cn, /* EN, AN following AL */
382 ra, /* arabic number foll R */
383 re, /* european number foll R */
384 la, /* arabic number foll L */
385 le, /* european number foll L */
387 ac, /* CS following cn */
388 rc, /* CS following ra */
389 rs, /* CS,ES following re */
390 lc, /* CS following la */
391 ls, /* CS,ES following le */
393 ret, /* ET following re */
394 let, /* ET following le */
397 static const int stateWeak[][10] =
399 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
400 /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* arabic letter */
401 /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
402 /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
404 /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* arabic lett. foll by ON*/
405 /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
406 /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */
408 /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */
409 /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */
411 /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */
412 /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* arabic number foll R */
413 /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* european number foll R */
414 /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* arabic number foll L */
415 /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* european number foll L */
417 /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */
418 /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */
419 /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */
420 /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */
421 /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */
423 /*ret*/{ ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */
424 /*let*/{ lo, xl, xr, la, le, xa,let, lo, lo,let }, /* ET following le */
427 enum actions /* possible actions */
429 /* primitives */
430 IX = 0x100, /* increment */
431 XX = 0xF, /* no-op */
433 /* actions */
434 xxx = (XX << 4) + XX, /* no-op */
435 xIx = IX + xxx, /* increment run */
436 xxN = (XX << 4) + ON, /* set current to N */
437 xxE = (XX << 4) + EN, /* set current to EN */
438 xxA = (XX << 4) + AN, /* set current to AN */
439 xxR = (XX << 4) + R, /* set current to R */
440 xxL = (XX << 4) + L, /* set current to L */
441 Nxx = (ON << 4) + 0xF, /* set run to neutral */
442 Axx = (AN << 4) + 0xF, /* set run to AN */
443 ExE = (EN << 4) + EN, /* set run to EN, set current to EN */
444 NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */
445 NxN = (ON << 4) + ON, /* set run to N, set current to N */
446 NxR = (ON << 4) + R, /* set run to N, set current to R */
447 NxE = (ON << 4) + EN, /* set run to N, set current to EN */
449 AxA = (AN << 4) + AN, /* set run to AN, set current to AN */
450 NxL = (ON << 4) + L, /* set run to N, set current to L */
451 LxL = (L << 4) + L, /* set run to L, set current to L */
454 static const int actionWeak[][10] =
456 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
457 /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* arabic letter */
458 /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */
459 /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
461 /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* arabic lett. foll by ON */
462 /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */
463 /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */
465 /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */
466 /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */
468 /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */
469 /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll R */
470 /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* european number foll R */
471 /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll L */
472 /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* european number foll L */
474 /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */
475 /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */
476 /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */
477 /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */
478 /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */
480 /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */
481 /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }, /* ET following le */
484 static int GetDeferredType(int action)
486 return (action >> 4) & 0xF;
489 static int GetResolvedType(int action)
491 return action & 0xF;
494 /* Note on action table:
496 States can be of two kinds:
497 - Immediate Resolution State, where each input token
498 is resolved as soon as it is seen. These states have
499 only single action codes (xxN) or the no-op (xxx)
500 for static input tokens.
501 - Deferred Resolution State, where input tokens either
502 either extend the run (xIx) or resolve its Type (e.g. Nxx).
504 Input classes are of three kinds
505 - Static Input Token, where the class of the token remains
506 unchanged on output (AN, L, N, R)
507 - Replaced Input Token, where the class of the token is
508 always replaced on output (AL, BN, NSM, CS, ES, ET)
509 - Conditional Input Token, where the class of the token is
510 changed on output in some, but not all, cases (EN)
512 Where tokens are subject to change, a double action
513 (e.g. NxA, or NxN) is _required_ after deferred states,
514 resolving both the deferred state and changing the current token.
517 /*------------------------------------------------------------------------
518 Function: resolveWeak
520 Resolves the directionality of numeric and other weak character types
522 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
524 Input: Array of embedding levels
525 Character count
527 In/Out: Array of directional classes
529 Note: On input only these directional classes are expected
530 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
531 ------------------------------------------------------------------------*/
532 static void resolveWeak(int baselevel, WORD *pcls, WORD *plevel, int cch)
534 int state = odd(baselevel) ? xr : xl;
535 int cls;
537 int level = baselevel;
538 int action, clsRun, clsNew;
539 int cchRun = 0;
540 int ich = 0;
542 for (; ich < cch; ich++)
544 /* ignore boundary neutrals */
545 if (pcls[ich] == BN)
547 /* must flatten levels unless at a level change; */
548 plevel[ich] = level;
550 /* lookahead for level changes */
551 if (ich + 1 == cch && level != baselevel)
553 /* have to fixup last BN before end of the loop, since
554 * its fix-upped value will be needed below the assert */
555 pcls[ich] = EmbeddingDirection(level);
557 else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN)
559 /* fixup LAST BN in front / after a level run to make
560 * it act like the SOR/EOR in rule X10 */
561 int newlevel = plevel[ich+1];
562 if (level > newlevel) {
563 newlevel = level;
565 plevel[ich] = newlevel;
567 /* must match assigned level */
568 pcls[ich] = EmbeddingDirection(newlevel);
569 level = plevel[ich+1];
571 else
573 /* don't interrupt runs */
574 if (cchRun)
576 cchRun++;
578 continue;
582 ASSERT(pcls[ich] <= BN);
583 cls = pcls[ich];
585 action = actionWeak[state][cls];
587 /* resolve the directionality for deferred runs */
588 clsRun = GetDeferredType(action);
589 if (clsRun != XX)
591 SetDeferredRun(pcls, cchRun, ich, clsRun);
592 cchRun = 0;
595 /* resolve the directionality class at the current location */
596 clsNew = GetResolvedType(action);
597 if (clsNew != XX)
598 pcls[ich] = clsNew;
600 /* increment a deferred run */
601 if (IX & action)
602 cchRun++;
604 state = stateWeak[state][cls];
607 /* resolve any deferred runs
608 * use the direction of the current level to emulate PDF */
609 cls = EmbeddingDirection(level);
611 /* resolve the directionality for deferred runs */
612 clsRun = GetDeferredType(actionWeak[state][cls]);
613 if (clsRun != XX)
614 SetDeferredRun(pcls, cchRun, ich, clsRun);
617 /* RESOLVE NEUTRAL TYPES */
619 /* action values */
620 enum neutralactions
622 /* action to resolve previous input */
623 nL = L, /* resolve EN to L */
624 En = 3 << 4, /* resolve neutrals run to embedding level direction */
625 Rn = R << 4, /* resolve neutrals run to strong right */
626 Ln = L << 4, /* resolved neutrals run to strong left */
627 In = (1<<8), /* increment count of deferred neutrals */
628 LnL = (1<<4)+L, /* set run and EN to L */
631 static int GetDeferredNeutrals(int action, int level)
633 action = (action >> 4) & 0xF;
634 if (action == (En >> 4))
635 return EmbeddingDirection(level);
636 else
637 return action;
640 static int GetResolvedNeutrals(int action)
642 action = action & 0xF;
643 if (action == In)
644 return 0;
645 else
646 return action;
649 /* state values */
650 enum resolvestates
652 /* new temporary class */
653 r, /* R and characters resolved to R */
654 l, /* L and characters resolved to L */
655 rn, /* N preceded by right */
656 ln, /* N preceded by left */
657 a, /* AN preceded by left (the abbreviation 'la' is used up above) */
658 na, /* N preceded by a */
662 /*------------------------------------------------------------------------
663 Notes:
665 By rule W7, whenever a EN is 'dominated' by an L (including start of
666 run with embedding direction = L) it is resolved to, and further treated
667 as L.
669 This leads to the need for 'a' and 'na' states.
670 ------------------------------------------------------------------------*/
672 static const int actionNeutrals[][5] =
674 /* N, L, R, AN, EN = cls */
675 { In, 0, 0, 0, 0 }, /* r right */
676 { In, 0, 0, 0, L }, /* l left */
678 { In, En, Rn, Rn, Rn }, /* rn N preceded by right */
679 { In, Ln, En, En, LnL}, /* ln N preceded by left */
681 { In, 0, 0, 0, L }, /* a AN preceded by left */
682 { In, En, Rn, Rn, En }, /* na N preceded by a */
685 static const int stateNeutrals[][5] =
687 /* N, L, R, AN, EN */
688 { rn, l, r, r, r }, /* r right */
689 { ln, l, r, a, l }, /* l left */
691 { rn, l, r, r, r }, /* rn N preceded by right */
692 { ln, l, r, a, l }, /* ln N preceded by left */
694 { na, l, r, a, l }, /* a AN preceded by left */
695 { na, l, r, a, l }, /* na N preceded by la */
698 /*------------------------------------------------------------------------
699 Function: resolveNeutrals
701 Resolves the directionality of neutral character types.
703 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
705 Input: Array of embedding levels
706 Character count
707 Baselevel
709 In/Out: Array of directional classes
711 Note: On input only these directional classes are expected
712 R, L, N, AN, EN and BN
714 W8 resolves a number of ENs to L
715 ------------------------------------------------------------------------*/
716 static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int cch)
718 /* the state at the start of text depends on the base level */
719 int state = odd(baselevel) ? r : l;
720 int cls;
722 int cchRun = 0;
723 int level = baselevel;
725 int action, clsRun, clsNew;
726 int ich = 0;
727 for (; ich < cch; ich++)
729 /* ignore boundary neutrals */
730 if (pcls[ich] == BN)
732 /* include in the count for a deferred run */
733 if (cchRun)
734 cchRun++;
736 /* skip any further processing */
737 continue;
740 ASSERT(pcls[ich] < 5); /* "Only N, L, R, AN, EN are allowed" */
741 cls = pcls[ich];
743 action = actionNeutrals[state][cls];
745 /* resolve the directionality for deferred runs */
746 clsRun = GetDeferredNeutrals(action, level);
747 if (clsRun != N)
749 SetDeferredRun(pcls, cchRun, ich, clsRun);
750 cchRun = 0;
753 /* resolve the directionality class at the current location */
754 clsNew = GetResolvedNeutrals(action);
755 if (clsNew != N)
756 pcls[ich] = clsNew;
758 if (In & action)
759 cchRun++;
761 state = stateNeutrals[state][cls];
762 level = plevel[ich];
765 /* resolve any deferred runs */
766 cls = EmbeddingDirection(level); /* eor has type of current level */
768 /* resolve the directionality for deferred runs */
769 clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
770 if (clsRun != N)
771 SetDeferredRun(pcls, cchRun, ich, clsRun);
774 /* RESOLVE IMPLICIT */
776 /*------------------------------------------------------------------------
777 Function: resolveImplicit
779 Recursively resolves implicit embedding levels.
780 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
782 Input: Array of direction classes
783 Character count
784 Base level
786 In/Out: Array of embedding levels
788 Note: levels may exceed 15 on output.
789 Accepted subset of direction classes
790 R, L, AN, EN
791 ------------------------------------------------------------------------*/
792 static const WORD addLevel[][4] =
794 /* L, R, AN, EN */
795 /* even */ { 0, 1, 2, 2, },
796 /* odd */ { 1, 0, 1, 1, }
800 static void resolveImplicit(const WORD * pcls, WORD *plevel, int cch)
802 int ich = 0;
803 for (; ich < cch; ich++)
805 /* cannot resolve bn here, since some bn were resolved to strong
806 * types in resolveWeak. To remove these we need the original
807 * types, which are available again in resolveWhiteSpace */
808 if (pcls[ich] == BN)
810 continue;
812 ASSERT(pcls[ich] > 0); /* "No Neutrals allowed to survive here." */
813 ASSERT(pcls[ich] < 5); /* "Out of range." */
814 plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
818 /* REORDER */
819 /*------------------------------------------------------------------------
820 Function: resolveLines
822 Breaks a paragraph into lines
824 Input: Character count
825 In/Out: Array of characters
826 Array of line break flags
828 Returns the count of characters on the first line
830 Note: This function only breaks lines at hard line breaks. Other
831 line breaks can be passed in. If pbrk[n] is TRUE, then a break
832 occurs after the character in pszInput[n]. Breaks before the first
833 character are not allowed.
834 ------------------------------------------------------------------------*/
835 static int resolveLines(LPCWSTR pszInput, BOOL * pbrk, int cch)
837 /* skip characters not of type LS */
838 int ich = 0;
839 for(; ich < cch; ich++)
841 if (pszInput[ich] == (WCHAR)'\n' || (pbrk && pbrk[ich]))
843 ich++;
844 break;
848 return ich;
851 /*------------------------------------------------------------------------
852 Function: resolveWhiteSpace
854 Resolves levels for WS and S
855 Implements rule L1 of the Unicode bidi Algorithm.
857 Input: Base embedding level
858 Character count
859 Array of direction classes (for one line of text)
861 In/Out: Array of embedding levels (for one line of text)
863 Note: this should be applied a line at a time. The default driver
864 code supplied in this file assumes a single line of text; for
865 a real implementation, cch and the initial pointer values
866 would have to be adjusted.
867 ------------------------------------------------------------------------*/
868 static void resolveWhitespace(int baselevel, const WORD *pcls, WORD *plevel, int cch)
870 int cchrun = 0;
871 int oldlevel = baselevel;
873 int ich = 0;
874 for (; ich < cch; ich++)
876 switch(pcls[ich])
878 default:
879 cchrun = 0; /* any other character breaks the run */
880 break;
881 case WS:
882 cchrun++;
883 break;
885 case RLE:
886 case LRE:
887 case LRO:
888 case RLO:
889 case PDF:
890 case BN:
891 plevel[ich] = oldlevel;
892 cchrun++;
893 break;
895 case S:
896 case B:
897 /* reset levels for WS before eot */
898 SetDeferredRun(plevel, cchrun, ich, baselevel);
899 cchrun = 0;
900 plevel[ich] = baselevel;
901 break;
903 oldlevel = plevel[ich];
905 /* reset level before eot */
906 SetDeferredRun(plevel, cchrun, ich, baselevel);
910 /*------------------------------------------------------------------------
911 Functions: reorder/reorderLevel
913 Recursively reorders the display string
914 "From the highest level down, reverse all characters at that level and
915 higher, down to the lowest odd level"
917 Implements rule L2 of the Unicode bidi Algorithm.
919 Input: Array of embedding levels
920 Character count
921 Flag enabling reversal (set to false by initial caller)
923 In/Out: Text to reorder
925 Note: levels may exceed 15 resp. 61 on input.
927 Rule L3 - reorder combining marks is not implemented here
928 Rule L4 - glyph mirroring is implemented as a display option below
930 Note: this should be applied a line at a time
931 -------------------------------------------------------------------------*/
932 static int reorderLevel(int level, LPWSTR pszText, const WORD* plevel, int cch, BOOL fReverse)
934 int ich = 0;
936 /* true as soon as first odd level encountered */
937 fReverse = fReverse || odd(level);
939 for (; ich < cch; ich++)
941 if (plevel[ich] < level)
943 break;
945 else if (plevel[ich] > level)
947 ich += reorderLevel(level + 1, pszText + ich, plevel + ich,
948 cch - ich, fReverse) - 1;
951 if (fReverse)
953 reverse(pszText, ich);
955 return ich;
958 static int reorder(int baselevel, LPWSTR pszText, const WORD* plevel, int cch)
960 int ich = 0;
962 while (ich < cch)
964 ich += reorderLevel(baselevel, pszText + ich, plevel + ich,
965 cch - ich, FALSE);
967 return ich;
970 /* DISPLAY OPTIONS */
971 /*-----------------------------------------------------------------------
972 Function: mirror
974 Crudely implements rule L4 of the Unicode Bidirectional Algorithm
975 Demonstrate mirrored brackets, braces and parens
978 Input: Array of levels
979 Count of characters
981 In/Out: Array of characters (should be array of glyph ids)
983 Note;
984 A full implementation would need to substitute mirrored glyphs even
985 for characters that are not paired (e.g. integral sign).
986 -----------------------------------------------------------------------*/
987 static void mirror(LPWSTR pszInput, const WORD* plevel, int cch)
989 static int warn_once;
990 int i;
992 for (i = 0; i < cch; ++i)
994 if (!odd(plevel[i]))
995 continue;
996 /* This needs the data from http://www.unicode.org/Public/UNIDATA/BidiMirroring.txt */
997 if (!warn_once++)
998 FIXME("stub: mirroring of characters not yet implemented\n");
999 break;
1003 /*------------------------------------------------------------------------
1004 Function: BidiLines
1006 Implements the Line-by-Line phases of the Unicode Bidi Algorithm
1008 Input: Count of characters
1009 flag whether to mirror
1011 Inp/Out: Input text
1012 Array of character directions
1013 Array of levels
1015 ------------------------------------------------------------------------*/
1016 static void BidiLines(int baselevel, LPWSTR pszOutLine, LPCWSTR pszLine, WORD * pclsLine,
1017 WORD * plevelLine, int cchPara, int fMirror, BOOL * pbrk)
1019 int cchLine = 0;
1023 /* break lines at LS */
1024 cchLine = resolveLines(pszLine, pbrk, cchPara);
1026 /* resolve whitespace */
1027 resolveWhitespace(baselevel, pclsLine, plevelLine, cchLine);
1029 if (pszOutLine)
1031 if (fMirror)
1032 mirror(pszOutLine, plevelLine, cchLine);
1034 /* reorder each line in place */
1035 reorder(baselevel, pszOutLine, plevelLine, cchLine);
1038 pszLine += cchLine;
1039 plevelLine += cchLine;
1040 pbrk += pbrk ? cchLine : 0;
1041 pclsLine += cchLine;
1042 cchPara -= cchLine;
1044 } while (cchPara);
1047 /*************************************************************
1048 * BIDI_Reorder
1050 BOOL BIDI_Reorder(
1051 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1052 INT uCount, /* [in] Number of WCHARs in string. */
1053 DWORD dwFlags, /* [in] GetCharacterPlacement compatible flags specifying how to process the string */
1054 DWORD dwWineGCP_Flags, /* [in] Wine internal flags - Force paragraph direction */
1055 LPWSTR lpOutString, /* [out] Reordered string */
1056 INT uCountOut, /* [in] Size of output buffer */
1057 UINT *lpOrder /* [out] Logical -> Visual order map */
1060 WORD *levels;
1061 WORD *chartype;
1062 unsigned i, baselevel = 0, done;
1063 TRACE("%s, %d, 0x%08x lpOutString=%p, lpOrder=%p\n",
1064 debugstr_wn(lpString, uCount), uCount, dwFlags,
1065 lpOutString, lpOrder);
1067 if (!(dwFlags & GCP_REORDER))
1069 FIXME("Asked to reorder without reorder flag set\n");
1070 return FALSE;
1073 if (uCountOut < uCount)
1075 FIXME("lpOutString too small\n");
1076 return FALSE;
1079 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * 2 * sizeof(WORD));
1080 levels = chartype + uCount;
1081 if (!chartype)
1083 WARN("Out of memory\n");
1084 return FALSE;
1087 if (lpOutString)
1088 memcpy(lpOutString, lpString, uCount * sizeof(WCHAR));
1090 if (WINE_GCPW_FORCE_RTL == (dwWineGCP_Flags&WINE_GCPW_DIR_MASK))
1091 baselevel = 1;
1093 done = 0;
1094 while (done < uCount)
1096 unsigned j;
1097 classify(lpString + done, chartype, uCount - done);
1098 /* limit text to first block */
1099 i = resolveParagraphs(chartype, uCount - done);
1100 for (j = 0; j < i; ++j)
1101 switch(chartype[j])
1103 case B:
1104 case S:
1105 case WS:
1106 case ON: chartype[j] = N;
1107 default: continue;
1110 if ((dwWineGCP_Flags&WINE_GCPW_DIR_MASK) == WINE_GCPW_LOOSE_RTL)
1111 baselevel = 1;
1112 else if ((dwWineGCP_Flags&WINE_GCPW_DIR_MASK) == WINE_GCPW_LOOSE_LTR)
1113 baselevel = 0;
1115 if (dwWineGCP_Flags & WINE_GCPW_LOOSE_MASK)
1117 for (j = 0; j < i; ++j)
1118 if (chartype[j] == L)
1120 baselevel = 0;
1121 break;
1123 else if (chartype[j] == R || chartype[j] == AL)
1125 baselevel = 1;
1126 break;
1130 /* resolve explicit */
1131 resolveExplicit(baselevel, N, chartype, levels, i, 0);
1133 /* resolve weak */
1134 resolveWeak(baselevel, chartype, levels, i);
1136 /* resolve neutrals */
1137 resolveNeutrals(baselevel, chartype, levels, i);
1139 /* resolveImplicit */
1140 resolveImplicit(chartype, levels, i);
1142 /* assign directional types again, but for WS, S this time */
1143 classify(lpString + done, chartype, i);
1145 BidiLines(baselevel, lpOutString ? lpOutString + done : NULL, lpString + done,
1146 chartype, levels, i, !(dwFlags & GCP_SYMSWAPOFF), 0);
1148 if (lpOrder)
1150 int k, lastgood;
1151 for (j = lastgood = 0; j < i; ++j)
1152 if (levels[j] != levels[lastgood])
1154 --j;
1155 if (odd(levels[lastgood]))
1156 for (k = j; k >= lastgood; --k)
1157 lpOrder[done + k] = done + j - k;
1158 else
1159 for (k = lastgood; k <= j; ++k)
1160 lpOrder[done + k] = done + k;
1161 lastgood = ++j;
1163 if (odd(levels[lastgood]))
1164 for (k = j - 1; k >= lastgood; --k)
1165 lpOrder[done + k] = done + j - 1 - k;
1166 else
1167 for (k = lastgood; k < j; ++k)
1168 lpOrder[done + k] = done + k;
1170 done += i;
1173 HeapFree(GetProcessHeap(), 0, chartype);
1174 return TRUE;