2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
52 #include "wine/debug.h"
54 #include "usp10_internal.h"
56 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
58 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
61 /* HELPER FUNCTIONS AND DECLARATIONS */
63 /*------------------------------------------------------------------------
64 Bidirectional Character Types
66 as defined by the Unicode Bidirectional Algorithm Table 3-7.
70 The list of bidirectional character types here is not grouped the
71 same way as the table 3-7, since the numberic values for the types
72 are chosen to keep the state and action tables compact.
73 ------------------------------------------------------------------------*/
77 /* ON MUST be zero, code relies on ON = N = 0 */
78 ON
= 0, /* Other Neutral */
81 AN
, /* Arabic Number */
82 EN
, /* European Number */
83 AL
, /* Arabic Letter (Right-to-left) */
84 NSM
, /* Non-spacing Mark */
85 CS
, /* Common Separator */
86 ES
, /* European Separator */
87 ET
, /* European Terminator (post/prefix e.g. $ and %) */
90 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
93 S
, /* Segment Separator (TAB) // used only in L1 */
94 WS
, /* White space // used only in L1 */
95 B
, /* Paragraph Separator (aka as PS) */
97 /* types for explicit controls */
98 RLO
, /* these are used only in X1-X9 */
104 /* resolved types, also resolved directions */
105 N
= ON
, /* alias, where ON, WS and S are treated the same */
108 /* HELPER FUNCTIONS */
110 /* grep -r ';BN;' data.txt | grep -v [0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F] | sed -e s@\;.*@@ -e s/^..../0x\&,\ / | xargs echo */
111 static const WCHAR BNs
[] = {
112 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
113 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016,
114 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x007F, 0x0080, 0x0081, 0x0082,
115 0x0083, 0x0084, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C,
116 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
117 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E,
118 0x009F, 0x00AD, 0x070F, 0x200B, 0x200C, 0x200D, 0x2060, 0x2061, 0x2062,
119 0x2063, 0x206A, 0x206B, 0x206C, 0x206D, 0x206E, 0x206F, 0xFEFF
122 /* Idem, but with ';R;' instead of ';BN;' */
123 static const WCHAR Rs
[] = {
124 0x05BE, 0x05C0, 0x05C3, 0x05C6, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4,
125 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD,
126 0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6,
127 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4,
128 0x07C0, 0x07C1, 0x07C2, 0x07C3, 0x07C4, 0x07C5, 0x07C6, 0x07C7, 0x07C8,
129 0x07C9, 0x07CA, 0x07CB, 0x07CC, 0x07CD, 0x07CE, 0x07CF, 0x07D0, 0x07D1,
130 0x07D2, 0x07D3, 0x07D4, 0x07D5, 0x07D6, 0x07D7, 0x07D8, 0x07D9, 0x07DA,
131 0x07DB, 0x07DC, 0x07DD, 0x07DE, 0x07DF, 0x07E0, 0x07E1, 0x07E2, 0x07E3,
132 0x07E4, 0x07E5, 0x07E6, 0x07E7, 0x07E8, 0x07E9, 0x07EA, 0x07F4, 0x07F5,
133 0x07FA, 0x200F, 0xFB1D, 0xFB1F, 0xFB20, 0xFB21, 0xFB22, 0xFB23, 0xFB24,
134 0xFB25, 0xFB26, 0xFB27, 0xFB28, 0xFB2A, 0xFB2B, 0xFB2C, 0xFB2D, 0xFB2E,
135 0xFB2F, 0xFB30, 0xFB31, 0xFB32, 0xFB33, 0xFB34, 0xFB35, 0xFB36, 0xFB38,
136 0xFB39, 0xFB3A, 0xFB3B, 0xFB3C, 0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44,
137 0xFB46, 0xFB47, 0xFB48, 0xFB49, 0xFB4A, 0xFB4B, 0xFB4C, 0xFB4D, 0xFB4E,
141 /* Convert the incomplete win32 table to some slightly more useful data */
142 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
145 GetStringTypeW(CT_CTYPE2
, lpString
, uCount
, chartype
);
146 for (i
= 0; i
< uCount
; ++i
)
149 case C2_LEFTTORIGHT
: chartype
[i
] = L
; break;
152 for (j
= 0; j
< sizeof(Rs
)/sizeof(WCHAR
); ++j
)
153 if (Rs
[j
] == lpString
[i
])
160 case C2_EUROPENUMBER
: chartype
[i
] = EN
; break;
161 case C2_EUROPESEPARATOR
:
162 if (c
->fLegacyBidiClass
&& (lpString
[i
] == '-' || lpString
[i
] =='+'))
164 else if (c
->fLegacyBidiClass
&& lpString
[i
] == '/')
167 chartype
[i
] = ES
; break;
168 case C2_EUROPETERMINATOR
: chartype
[i
] = ET
; break;
169 case C2_ARABICNUMBER
: chartype
[i
] = AN
; break;
170 case C2_COMMONSEPARATOR
: chartype
[i
] = CS
; break;
171 case C2_BLOCKSEPARATOR
: chartype
[i
] = B
; break;
172 case C2_SEGMENTSEPARATOR
: chartype
[i
] = S
; break;
173 case C2_WHITESPACE
: chartype
[i
] = WS
; break;
174 case C2_OTHERNEUTRAL
:
177 case 0x202A: chartype
[i
] = LRE
; break;
178 case 0x202B: chartype
[i
] = RLE
; break;
179 case 0x202C: chartype
[i
] = PDF
; break;
180 case 0x202D: chartype
[i
] = LRO
; break;
181 case 0x202E: chartype
[i
] = RLO
; break;
182 default: chartype
[i
] = ON
; break;
185 case C2_NOTAPPLICABLE
:
187 for (j
= 0; j
< sizeof(BNs
)/sizeof(WCHAR
); ++j
)
188 if (BNs
[j
] == lpString
[i
])
196 /* According to BiDi spec, unassigned characters default to L */
197 FIXME("Unhandled character type: %04x\n", chartype
[i
]);
203 /* Set a run of cval values at locations all prior to, but not including */
204 /* iStart, to the new value nval. */
205 static void SetDeferredRun(WORD
*pval
, int cval
, int iStart
, int nval
)
208 for (; i
>= iStart
- cval
; i
--)
214 /* RESOLVE EXPLICIT */
216 static WORD
GreaterEven(int i
)
218 return odd(i
) ? i
+ 1 : i
+ 2;
221 static WORD
GreaterOdd(int i
)
223 return odd(i
) ? i
+ 2 : i
+ 1;
226 static WORD
EmbeddingDirection(int level
)
228 return odd(level
) ? R
: L
;
231 /*------------------------------------------------------------------------
232 Function: resolveExplicit
234 Recursively resolves explicit embedding levels and overrides.
235 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
237 Input: Base embedding level and direction
240 Output: Array of embedding levels
242 In/Out: Array of direction classes
245 Note: The function uses two simple counters to keep track of
246 matching explicit codes and PDF. Use the default argument for
247 the outermost call. The nesting counter counts the recursion
248 depth and not the embedding level.
249 ------------------------------------------------------------------------*/
251 static int resolveExplicit(int level
, int dir
, WORD
*pcls
, WORD
*plevel
, int cch
, int nNest
)
253 /* always called with a valid nesting level
254 nesting levels are != embedding levels */
255 int nLastValid
= nNest
;
258 /* check input values */
259 ASSERT(nNest
>= 0 && level
>= 0 && level
<= MAX_LEVEL
);
261 /* process the text */
262 for (; ich
< cch
; ich
++)
264 WORD cls
= pcls
[ich
];
270 if (GreaterEven(level
) <= MAX_LEVEL
- (cls
== LRO
? 2 : 0))
272 plevel
[ich
] = GreaterEven(level
);
274 ich
+= resolveExplicit(plevel
[ich
], (cls
== LRE
? N
: L
),
275 &pcls
[ich
+1], &plevel
[ich
+1],
276 cch
- (ich
+1), nNest
);
280 cls
= pcls
[ich
] = BN
;
286 if (GreaterOdd(level
) <= MAX_LEVEL
- (cls
== RLO
? 2 : 0))
288 plevel
[ich
] = GreaterOdd(level
);
290 ich
+= resolveExplicit(plevel
[ich
], (cls
== RLE
? N
: R
),
291 &pcls
[ich
+1], &plevel
[ich
+1],
292 cch
- (ich
+1), nNest
);
296 cls
= pcls
[ich
] = BN
;
300 cls
= pcls
[ich
] = BN
;
303 if (nLastValid
< nNest
)
309 cch
= ich
; /* break the loop, but complete body */
314 /* Apply the override */
327 /* RESOLVE WEAK TYPES */
329 enum states
/* possible states */
331 xa
, /* arabic letter */
332 xr
, /* right letter */
333 xl
, /* left letter */
335 ao
, /* arabic lett. foll by ON */
336 ro
, /* right lett. foll by ON */
337 lo
, /* left lett. foll by ON */
339 rt
, /* ET following R */
340 lt
, /* ET following L */
342 cn
, /* EN, AN following AL */
343 ra
, /* arabic number foll R */
344 re
, /* european number foll R */
345 la
, /* arabic number foll L */
346 le
, /* european number foll L */
348 ac
, /* CS following cn */
349 rc
, /* CS following ra */
350 rs
, /* CS,ES following re */
351 lc
, /* CS following la */
352 ls
, /* CS,ES following le */
354 ret
, /* ET following re */
355 let
, /* ET following le */
358 static const int stateWeak
[][10] =
360 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
361 /*xa*/ { ao
, xl
, xr
, cn
, cn
, xa
, xa
, ao
, ao
, ao
}, /* arabic letter */
362 /*xr*/ { ro
, xl
, xr
, ra
, re
, xa
, xr
, ro
, ro
, rt
}, /* right letter */
363 /*xl*/ { lo
, xl
, xr
, la
, le
, xa
, xl
, lo
, lo
, lt
}, /* left letter */
365 /*ao*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* arabic lett. foll by ON*/
366 /*ro*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* right lett. foll by ON */
367 /*lo*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* left lett. foll by ON */
369 /*rt*/ { ro
, xl
, xr
, ra
, re
, xa
, rt
, ro
, ro
, rt
}, /* ET following R */
370 /*lt*/ { lo
, xl
, xr
, la
, le
, xa
, lt
, lo
, lo
, lt
}, /* ET following L */
372 /*cn*/ { ao
, xl
, xr
, cn
, cn
, xa
, cn
, ac
, ao
, ao
}, /* EN, AN following AL */
373 /*ra*/ { ro
, xl
, xr
, ra
, re
, xa
, ra
, rc
, ro
, rt
}, /* arabic number foll R */
374 /*re*/ { ro
, xl
, xr
, ra
, re
, xa
, re
, rs
, rs
,ret
}, /* european number foll R */
375 /*la*/ { lo
, xl
, xr
, la
, le
, xa
, la
, lc
, lo
, lt
}, /* arabic number foll L */
376 /*le*/ { lo
, xl
, xr
, la
, le
, xa
, le
, ls
, ls
,let
}, /* european number foll L */
378 /*ac*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* CS following cn */
379 /*rc*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS following ra */
380 /*rs*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS,ES following re */
381 /*lc*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS following la */
382 /*ls*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS,ES following le */
384 /*ret*/{ ro
, xl
, xr
, ra
, re
, xa
,ret
, ro
, ro
,ret
}, /* ET following re */
385 /*let*/{ lo
, xl
, xr
, la
, le
, xa
,let
, lo
, lo
,let
}, /* ET following le */
388 enum actions
/* possible actions */
391 IX
= 0x100, /* increment */
392 XX
= 0xF, /* no-op */
395 xxx
= (XX
<< 4) + XX
, /* no-op */
396 xIx
= IX
+ xxx
, /* increment run */
397 xxN
= (XX
<< 4) + ON
, /* set current to N */
398 xxE
= (XX
<< 4) + EN
, /* set current to EN */
399 xxA
= (XX
<< 4) + AN
, /* set current to AN */
400 xxR
= (XX
<< 4) + R
, /* set current to R */
401 xxL
= (XX
<< 4) + L
, /* set current to L */
402 Nxx
= (ON
<< 4) + 0xF, /* set run to neutral */
403 Axx
= (AN
<< 4) + 0xF, /* set run to AN */
404 ExE
= (EN
<< 4) + EN
, /* set run to EN, set current to EN */
405 NIx
= (ON
<< 4) + 0xF + IX
, /* set run to N, increment */
406 NxN
= (ON
<< 4) + ON
, /* set run to N, set current to N */
407 NxR
= (ON
<< 4) + R
, /* set run to N, set current to R */
408 NxE
= (ON
<< 4) + EN
, /* set run to N, set current to EN */
410 AxA
= (AN
<< 4) + AN
, /* set run to AN, set current to AN */
411 NxL
= (ON
<< 4) + L
, /* set run to N, set current to L */
412 LxL
= (L
<< 4) + L
, /* set run to L, set current to L */
415 static const int actionWeak
[][10] =
417 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
418 /*xa*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxR
, xxN
, xxN
, xxN
}, /* arabic letter */
419 /*xr*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxR
, xxN
, xxN
, xIx
}, /* right letter */
420 /*xl*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xIx
}, /* left letter */
422 /*ao*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxN
, xxN
, xxN
, xxN
}, /* arabic lett. foll by ON */
423 /*ro*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* right lett. foll by ON */
424 /*lo*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* left lett. foll by ON */
426 /*rt*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following R */
427 /*lt*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following L */
429 /*cn*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxA
, xIx
, xxN
, xxN
}, /* EN, AN following AL */
430 /*ra*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* arabic number foll R */
431 /*re*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xIx
, xIx
, xxE
}, /* european number foll R */
432 /*la*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* arabic number foll L */
433 /*le*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xIx
, xIx
, xxL
}, /* european number foll L */
435 /*ac*/ { Nxx
, Nxx
, Nxx
, Axx
, AxA
, NxR
, NxN
, NxN
, NxN
, NxN
}, /* CS following cn */
436 /*rc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following ra */
437 /*rs*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following re */
438 /*lc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following la */
439 /*ls*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following le */
441 /*ret*/{ xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xxN
, xxN
, xxE
}, /* ET following re */
442 /*let*/{ xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xxL
}, /* ET following le */
445 static int GetDeferredType(int action
)
447 return (action
>> 4) & 0xF;
450 static int GetResolvedType(int action
)
455 /* Note on action table:
457 States can be of two kinds:
458 - Immediate Resolution State, where each input token
459 is resolved as soon as it is seen. These states have
460 only single action codes (xxN) or the no-op (xxx)
461 for static input tokens.
462 - Deferred Resolution State, where input tokens either
463 either extend the run (xIx) or resolve its Type (e.g. Nxx).
465 Input classes are of three kinds
466 - Static Input Token, where the class of the token remains
467 unchanged on output (AN, L, N, R)
468 - Replaced Input Token, where the class of the token is
469 always replaced on output (AL, BN, NSM, CS, ES, ET)
470 - Conditional Input Token, where the class of the token is
471 changed on output in some, but not all, cases (EN)
473 Where tokens are subject to change, a double action
474 (e.g. NxA, or NxN) is _required_ after deferred states,
475 resolving both the deferred state and changing the current token.
478 /*------------------------------------------------------------------------
479 Function: resolveWeak
481 Resolves the directionality of numeric and other weak character types
483 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
485 Input: Array of embedding levels
488 In/Out: Array of directional classes
490 Note: On input only these directional classes are expected
491 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
492 ------------------------------------------------------------------------*/
493 static void resolveWeak(int baselevel
, WORD
*pcls
, WORD
*plevel
, int cch
)
495 int state
= odd(baselevel
) ? xr
: xl
;
498 int level
= baselevel
;
499 int action
, clsRun
, clsNew
;
503 for (; ich
< cch
; ich
++)
505 /* ignore boundary neutrals */
508 /* must flatten levels unless at a level change; */
511 /* lookahead for level changes */
512 if (ich
+ 1 == cch
&& level
!= baselevel
)
514 /* have to fixup last BN before end of the loop, since
515 * its fix-upped value will be needed below the assert */
516 pcls
[ich
] = EmbeddingDirection(level
);
518 else if (ich
+ 1 < cch
&& level
!= plevel
[ich
+1] && pcls
[ich
+1] != BN
)
520 /* fixup LAST BN in front / after a level run to make
521 * it act like the SOR/EOR in rule X10 */
522 int newlevel
= plevel
[ich
+1];
523 if (level
> newlevel
) {
526 plevel
[ich
] = newlevel
;
528 /* must match assigned level */
529 pcls
[ich
] = EmbeddingDirection(newlevel
);
530 level
= plevel
[ich
+1];
534 /* don't interrupt runs */
543 ASSERT(pcls
[ich
] <= BN
);
546 action
= actionWeak
[state
][cls
];
548 /* resolve the directionality for deferred runs */
549 clsRun
= GetDeferredType(action
);
552 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
556 /* resolve the directionality class at the current location */
557 clsNew
= GetResolvedType(action
);
561 /* increment a deferred run */
565 state
= stateWeak
[state
][cls
];
568 /* resolve any deferred runs
569 * use the direction of the current level to emulate PDF */
570 cls
= EmbeddingDirection(level
);
572 /* resolve the directionality for deferred runs */
573 clsRun
= GetDeferredType(actionWeak
[state
][cls
]);
575 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
578 /* RESOLVE NEUTRAL TYPES */
583 /* action to resolve previous input */
584 nL
= L
, /* resolve EN to L */
585 En
= 3 << 4, /* resolve neutrals run to embedding level direction */
586 Rn
= R
<< 4, /* resolve neutrals run to strong right */
587 Ln
= L
<< 4, /* resolved neutrals run to strong left */
588 In
= (1<<8), /* increment count of deferred neutrals */
589 LnL
= (1<<4)+L
, /* set run and EN to L */
592 static int GetDeferredNeutrals(int action
, int level
)
594 action
= (action
>> 4) & 0xF;
595 if (action
== (En
>> 4))
596 return EmbeddingDirection(level
);
601 static int GetResolvedNeutrals(int action
)
603 action
= action
& 0xF;
613 /* new temporary class */
614 r
, /* R and characters resolved to R */
615 l
, /* L and characters resolved to L */
616 rn
, /* N preceded by right */
617 ln
, /* N preceded by left */
618 a
, /* AN preceded by left (the abbreviation 'la' is used up above) */
619 na
, /* N preceded by a */
623 /*------------------------------------------------------------------------
626 By rule W7, whenever a EN is 'dominated' by an L (including start of
627 run with embedding direction = L) it is resolved to, and further treated
630 This leads to the need for 'a' and 'na' states.
631 ------------------------------------------------------------------------*/
633 static const int actionNeutrals
[][5] =
635 /* N, L, R, AN, EN = cls */
636 { In
, 0, 0, 0, 0 }, /* r right */
637 { In
, 0, 0, 0, L
}, /* l left */
639 { In
, En
, Rn
, Rn
, Rn
}, /* rn N preceded by right */
640 { In
, Ln
, En
, En
, LnL
}, /* ln N preceded by left */
642 { In
, 0, 0, 0, L
}, /* a AN preceded by left */
643 { In
, En
, Rn
, Rn
, En
}, /* na N preceded by a */
646 static const int stateNeutrals
[][5] =
648 /* N, L, R, AN, EN */
649 { rn
, l
, r
, r
, r
}, /* r right */
650 { ln
, l
, r
, a
, l
}, /* l left */
652 { rn
, l
, r
, r
, r
}, /* rn N preceded by right */
653 { ln
, l
, r
, a
, l
}, /* ln N preceded by left */
655 { na
, l
, r
, a
, l
}, /* a AN preceded by left */
656 { na
, l
, r
, a
, l
}, /* na N preceded by la */
659 /*------------------------------------------------------------------------
660 Function: resolveNeutrals
662 Resolves the directionality of neutral character types.
664 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
666 Input: Array of embedding levels
670 In/Out: Array of directional classes
672 Note: On input only these directional classes are expected
673 R, L, N, AN, EN and BN
675 W8 resolves a number of ENs to L
676 ------------------------------------------------------------------------*/
677 static void resolveNeutrals(int baselevel
, WORD
*pcls
, const WORD
*plevel
, int cch
)
679 /* the state at the start of text depends on the base level */
680 int state
= odd(baselevel
) ? r
: l
;
684 int level
= baselevel
;
686 int action
, clsRun
, clsNew
;
688 for (; ich
< cch
; ich
++)
690 /* ignore boundary neutrals */
693 /* include in the count for a deferred run */
697 /* skip any further processing */
701 ASSERT(pcls
[ich
] < 5); /* "Only N, L, R, AN, EN are allowed" */
704 action
= actionNeutrals
[state
][cls
];
706 /* resolve the directionality for deferred runs */
707 clsRun
= GetDeferredNeutrals(action
, level
);
710 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
714 /* resolve the directionality class at the current location */
715 clsNew
= GetResolvedNeutrals(action
);
722 state
= stateNeutrals
[state
][cls
];
726 /* resolve any deferred runs */
727 cls
= EmbeddingDirection(level
); /* eor has type of current level */
729 /* resolve the directionality for deferred runs */
730 clsRun
= GetDeferredNeutrals(actionNeutrals
[state
][cls
], level
);
732 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
735 /* RESOLVE IMPLICIT */
737 /*------------------------------------------------------------------------
738 Function: resolveImplicit
740 Recursively resolves implicit embedding levels.
741 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
743 Input: Array of direction classes
747 In/Out: Array of embedding levels
749 Note: levels may exceed 15 on output.
750 Accepted subset of direction classes
752 ------------------------------------------------------------------------*/
753 static const WORD addLevel
[][4] =
756 /* even */ { 0, 1, 2, 2, },
757 /* odd */ { 1, 0, 1, 1, }
761 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int cch
)
764 for (; ich
< cch
; ich
++)
766 /* cannot resolve bn here, since some bn were resolved to strong
767 * types in resolveWeak. To remove these we need the original
768 * types, which are available again in resolveWhiteSpace */
773 ASSERT(pcls
[ich
] > 0); /* "No Neutrals allowed to survive here." */
774 ASSERT(pcls
[ich
] < 5); /* "Out of range." */
775 plevel
[ich
] += addLevel
[odd(plevel
[ich
])][pcls
[ich
] - 1];
779 /*************************************************************
780 * BIDI_DeterminLevels
782 BOOL
BIDI_DetermineLevels(
783 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
784 INT uCount
, /* [in] Number of WCHARs in string. */
785 const SCRIPT_STATE
*s
,
786 const SCRIPT_CONTROL
*c
,
787 WORD
*lpOutLevels
/* [out] final string levels */
791 unsigned baselevel
= 0,j
;
792 TRACE("%s, %d", debugstr_wn(lpString
, uCount
), uCount
);
794 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
797 WARN("Out of memory\n");
801 baselevel
= s
->uBidiLevel
;
803 classify(lpString
, chartype
, uCount
, c
);
805 for (j
= 0; j
< uCount
; ++j
)
811 case ON
: chartype
[j
] = N
;
815 /* resolve explicit */
816 resolveExplicit(baselevel
, N
, chartype
, lpOutLevels
, uCount
, 0);
819 resolveWeak(baselevel
, chartype
, lpOutLevels
, uCount
);
821 /* resolve neutrals */
822 resolveNeutrals(baselevel
, chartype
, lpOutLevels
, uCount
);
824 /* resolveImplicit */
825 resolveImplicit(chartype
, lpOutLevels
, uCount
);
827 HeapFree(GetProcessHeap(), 0, chartype
);
831 /* reverse cch indexes */
832 static void reverse(int *pidx
, int cch
)
836 for (; ich
< --cch
; ich
++)
839 pidx
[ich
] = pidx
[cch
];
845 /*------------------------------------------------------------------------
846 Functions: reorder/reorderLevel
848 Recursively reorders the display string
849 "From the highest level down, reverse all characters at that level and
850 higher, down to the lowest odd level"
852 Implements rule L2 of the Unicode bidi Algorithm.
854 Input: Array of embedding levels
856 Flag enabling reversal (set to false by initial caller)
858 In/Out: Text to reorder
860 Note: levels may exceed 15 resp. 61 on input.
862 Rule L3 - reorder combining marks is not implemented here
863 Rule L4 - glyph mirroring is implemented as a display option below
865 Note: this should be applied a line at a time
866 -------------------------------------------------------------------------*/
867 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
871 /* true as soon as first odd level encountered */
872 fReverse
= fReverse
|| odd(level
);
874 for (; ich
< cch
; ich
++)
876 if (plevel
[ich
] < level
)
880 else if (plevel
[ich
] > level
)
882 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
883 cch
- ich
, fReverse
) - 1;
888 reverse(pIndexs
, ich
);
893 /* Applies the reorder in reverse. Taking an already reordered string and returing the original */
894 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
899 /* true as soon as first odd level encountered */
900 fReverse
= fReverse
|| odd(level
);
902 for (; ich
< cch
; ich
++)
904 if (plevel
[ich
] < level
)
906 else if (plevel
[ich
] > level
)
911 reverse(pIndexs
, ich
);
917 for (; ich
< cch
; ich
++)
918 if (plevel
[ich
] > level
)
919 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
920 cch
- ich
, fReverse
) - 1;