2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
52 #include "wine/unicode.h"
53 #include "wine/debug.h"
55 #include "usp10_internal.h"
57 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
59 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
62 /* HELPER FUNCTIONS AND DECLARATIONS */
64 /*------------------------------------------------------------------------
65 Bidirectional Character Types
67 as defined by the Unicode Bidirectional Algorithm Table 3-7.
71 The list of bidirectional character types here is not grouped the
72 same way as the table 3-7, since the numberic values for the types
73 are chosen to keep the state and action tables compact.
74 ------------------------------------------------------------------------*/
78 /* ON MUST be zero, code relies on ON = NI = 0 */
79 ON
= 0, /* Other Neutral */
82 AN
, /* Arabic Number */
83 EN
, /* European Number */
84 AL
, /* Arabic Letter (Right-to-left) */
85 NSM
, /* Non-spacing Mark */
86 CS
, /* Common Separator */
87 ES
, /* European Separator */
88 ET
, /* European Terminator (post/prefix e.g. $ and %) */
91 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
94 S
, /* Segment Separator (TAB) // used only in L1 */
95 WS
, /* White space // used only in L1 */
96 B
, /* Paragraph Separator (aka as PS) */
98 /* types for explicit controls */
99 RLO
, /* these are used only in X1-X9 */
105 LRI
, /* Isolate formatting characters new with 6.3 */
110 /* resolved types, also resolved directions */
111 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
114 static const char debug_type
[][4] =
116 "ON", /* Other Neutral */
117 "L", /* Left Letter */
118 "R", /* Right Letter */
119 "AN", /* Arabic Number */
120 "EN", /* European Number */
121 "AL", /* Arabic Letter (Right-to-left) */
122 "NSM", /* Non-spacing Mark */
123 "CS", /* Common Separator */
124 "ES", /* European Separator */
125 "ET", /* European Terminator (post/prefix e.g. $ and %) */
126 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
127 "S", /* Segment Separator (TAB) // used only in L1 */
128 "WS", /* White space // used only in L1 */
129 "B", /* Paragraph Separator (aka as PS) */
130 "RLO", /* these are used only in X1-X9 */
135 "LRI", /* Isolate formatting characters new with 6.3 */
141 /* HELPER FUNCTIONS */
143 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
147 for (i
= start
; i
< end
; i
++)
148 TRACE(" %s",debug_type
[types
[i
]]);
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
155 static const enum directions dir_map
[16] =
157 L
, /* unassigned defaults to L */
172 PDF
/* also LRE, LRO, RLE, RLO */
177 for (i
= 0; i
< uCount
; ++i
)
179 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
183 if (!c
->fLegacyBidiClass
) break;
187 case '+': chartype
[i
] = NI
; break;
188 case '/': chartype
[i
] = CS
; break;
194 case 0x202A: chartype
[i
] = LRE
; break;
195 case 0x202B: chartype
[i
] = RLE
; break;
196 case 0x202C: chartype
[i
] = PDF
; break;
197 case 0x202D: chartype
[i
] = LRO
; break;
198 case 0x202E: chartype
[i
] = RLO
; break;
199 case 0x2066: chartype
[i
] = LRI
; break;
200 case 0x2067: chartype
[i
] = RLI
; break;
201 case 0x2068: chartype
[i
] = FSI
; break;
202 case 0x2069: chartype
[i
] = PDI
; break;
209 /* Set a run of cval values at locations all prior to, but not including */
210 /* iStart, to the new value nval. */
211 static void SetDeferredRun(WORD
*pval
, int cval
, int iStart
, int nval
)
214 for (; i
>= iStart
- cval
; i
--)
220 /* RESOLVE EXPLICIT */
222 static WORD
GreaterEven(int i
)
224 return odd(i
) ? i
+ 1 : i
+ 2;
227 static WORD
GreaterOdd(int i
)
229 return odd(i
) ? i
+ 2 : i
+ 1;
232 static WORD
EmbeddingDirection(int level
)
234 return odd(level
) ? R
: L
;
237 /*------------------------------------------------------------------------
238 Function: resolveExplicit
240 Recursively resolves explicit embedding levels and overrides.
241 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
243 Input: Base embedding level and direction
246 Output: Array of embedding levels
248 In/Out: Array of direction classes
251 Note: The function uses two simple counters to keep track of
252 matching explicit codes and PDF. Use the default argument for
253 the outermost call. The nesting counter counts the recursion
254 depth and not the embedding level.
255 ------------------------------------------------------------------------*/
256 typedef struct tagStackItem
{
262 #define push_stack(l,o,i) \
264 stack[stack_top].level = l; \
265 stack[stack_top].override = o; \
266 stack[stack_top].isolate = i;} while(0)
268 #define pop_stack() do { stack_top++; } while(0)
270 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
272 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
275 int overflow_isolate_count
= 0;
276 int overflow_embedding_count
= 0;
277 int valid_isolate_count
= 0;
280 StackItem stack
[MAX_DEPTH
+2];
281 int stack_top
= MAX_DEPTH
+1;
283 stack
[stack_top
].level
= level
;
284 stack
[stack_top
].override
= NI
;
285 stack
[stack_top
].isolate
= FALSE
;
287 for (i
= 0; i
< count
; i
++)
290 if (pclass
[i
] == RLE
)
292 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
293 poutLevel
[i
] = stack
[stack_top
].level
;
294 if (valid_level(least_odd
))
295 push_stack(least_odd
, NI
, FALSE
);
296 else if (overflow_isolate_count
== 0)
297 overflow_embedding_count
++;
300 else if (pclass
[i
] == LRE
)
302 int least_even
= GreaterEven(stack
[stack_top
].level
);
303 poutLevel
[i
] = stack
[stack_top
].level
;
304 if (valid_level(least_even
))
305 push_stack(least_even
, NI
, FALSE
);
306 else if (overflow_isolate_count
== 0)
307 overflow_embedding_count
++;
310 else if (pclass
[i
] == RLO
)
312 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
313 poutLevel
[i
] = stack
[stack_top
].level
;
314 if (valid_level(least_odd
))
315 push_stack(least_odd
, R
, FALSE
);
316 else if (overflow_isolate_count
== 0)
317 overflow_embedding_count
++;
320 else if (pclass
[i
] == LRO
)
322 int least_even
= GreaterEven(stack
[stack_top
].level
);
323 poutLevel
[i
] = stack
[stack_top
].level
;
324 if (valid_level(least_even
))
325 push_stack(least_even
, L
, FALSE
);
326 else if (overflow_isolate_count
== 0)
327 overflow_embedding_count
++;
330 else if (pclass
[i
] == RLI
)
332 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
333 poutLevel
[i
] = stack
[stack_top
].level
;
334 if (valid_level(least_odd
))
336 valid_isolate_count
++;
337 push_stack(least_odd
, NI
, TRUE
);
340 overflow_isolate_count
++;
344 else if (pclass
[i
] == LRI
)
346 int least_even
= GreaterEven(stack
[stack_top
].level
);
347 poutLevel
[i
] = stack
[stack_top
].level
;
348 if (valid_level(least_even
))
350 valid_isolate_count
++;
351 push_stack(least_even
, NI
, TRUE
);
354 overflow_isolate_count
++;
358 else if (pclass
[i
] == FSI
)
363 poutLevel
[i
] = stack
[stack_top
].level
;
364 for (j
= i
+1; j
< count
; j
++)
366 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
371 else if (pclass
[j
] == PDI
)
380 if (skipping
) continue;
387 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
395 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
396 if (valid_level(least_odd
))
398 valid_isolate_count
++;
399 push_stack(least_odd
, NI
, TRUE
);
402 overflow_isolate_count
++;
406 int least_even
= GreaterEven(stack
[stack_top
].level
);
407 if (valid_level(least_even
))
409 valid_isolate_count
++;
410 push_stack(least_even
, NI
, TRUE
);
413 overflow_isolate_count
++;
418 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
420 poutLevel
[i
] = stack
[stack_top
].level
;
421 if (stack
[stack_top
].override
!= NI
)
422 pclass
[i
] = stack
[stack_top
].override
;
425 else if (pclass
[i
] == PDI
)
427 if (overflow_isolate_count
) overflow_isolate_count
--;
428 else if (!valid_isolate_count
) {/* do nothing */}
431 overflow_embedding_count
= 0;
432 while (!stack
[stack_top
].isolate
) pop_stack();
434 valid_isolate_count
--;
436 poutLevel
[i
] = stack
[stack_top
].level
;
440 else if (pclass
[i
] == PDF
)
442 poutLevel
[i
] = stack
[stack_top
].level
;
443 if (overflow_isolate_count
) {/* do nothing */}
444 else if (overflow_embedding_count
) overflow_embedding_count
--;
445 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
450 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
451 for (i
= 0; i
< count
; i
++)
452 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
456 /* RESOLVE WEAK TYPES */
458 enum states
/* possible states */
460 xa
, /* Arabic letter */
461 xr
, /* right letter */
462 xl
, /* left letter */
464 ao
, /* Arabic lett. foll by ON */
465 ro
, /* right lett. foll by ON */
466 lo
, /* left lett. foll by ON */
468 rt
, /* ET following R */
469 lt
, /* ET following L */
471 cn
, /* EN, AN following AL */
472 ra
, /* Arabic number foll R */
473 re
, /* European number foll R */
474 la
, /* Arabic number foll L */
475 le
, /* European number foll L */
477 ac
, /* CS following cn */
478 rc
, /* CS following ra */
479 rs
, /* CS,ES following re */
480 lc
, /* CS following la */
481 ls
, /* CS,ES following le */
483 ret
, /* ET following re */
484 let
, /* ET following le */
487 static const int stateWeak
[][10] =
489 /* NI, L, R, AN, EN, AL,NSM, CS, ES, ET */
490 /*xa*/ { ao
, xl
, xr
, cn
, cn
, xa
, xa
, ao
, ao
, ao
}, /* Arabic letter */
491 /*xr*/ { ro
, xl
, xr
, ra
, re
, xa
, xr
, ro
, ro
, rt
}, /* right letter */
492 /*xl*/ { lo
, xl
, xr
, la
, le
, xa
, xl
, lo
, lo
, lt
}, /* left letter */
494 /*ao*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* Arabic lett. foll by ON*/
495 /*ro*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* right lett. foll by ON */
496 /*lo*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* left lett. foll by ON */
498 /*rt*/ { ro
, xl
, xr
, ra
, re
, xa
, rt
, ro
, ro
, rt
}, /* ET following R */
499 /*lt*/ { lo
, xl
, xr
, la
, le
, xa
, lt
, lo
, lo
, lt
}, /* ET following L */
501 /*cn*/ { ao
, xl
, xr
, cn
, cn
, xa
, cn
, ac
, ao
, ao
}, /* EN, AN following AL */
502 /*ra*/ { ro
, xl
, xr
, ra
, re
, xa
, ra
, rc
, ro
, rt
}, /* Arabic number foll R */
503 /*re*/ { ro
, xl
, xr
, ra
, re
, xa
, re
, rs
, rs
,ret
}, /* European number foll R */
504 /*la*/ { lo
, xl
, xr
, la
, le
, xa
, la
, lc
, lo
, lt
}, /* Arabic number foll L */
505 /*le*/ { lo
, xl
, xr
, la
, le
, xa
, le
, ls
, ls
,let
}, /* European number foll L */
507 /*ac*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* CS following cn */
508 /*rc*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS following ra */
509 /*rs*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS,ES following re */
510 /*lc*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS following la */
511 /*ls*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS,ES following le */
513 /*ret*/{ ro
, xl
, xr
, ra
, re
, xa
,ret
, ro
, ro
,ret
}, /* ET following re */
514 /*let*/{ lo
, xl
, xr
, la
, le
, xa
,let
, lo
, lo
,let
}, /* ET following le */
517 enum actions
/* possible actions */
520 IX
= 0x100, /* increment */
521 XX
= 0xF, /* no-op */
524 xxx
= (XX
<< 4) + XX
, /* no-op */
525 xIx
= IX
+ xxx
, /* increment run */
526 xxN
= (XX
<< 4) + ON
, /* set current to NI */
527 xxE
= (XX
<< 4) + EN
, /* set current to EN */
528 xxA
= (XX
<< 4) + AN
, /* set current to AN */
529 xxR
= (XX
<< 4) + R
, /* set current to R */
530 xxL
= (XX
<< 4) + L
, /* set current to L */
531 Nxx
= (ON
<< 4) + 0xF, /* set run to neutral */
532 Axx
= (AN
<< 4) + 0xF, /* set run to AN */
533 ExE
= (EN
<< 4) + EN
, /* set run to EN, set current to EN */
534 NIx
= (ON
<< 4) + 0xF + IX
, /* set run to NI, increment */
535 NxN
= (ON
<< 4) + ON
, /* set run to NI, set current to NI */
536 NxR
= (ON
<< 4) + R
, /* set run to NI, set current to R */
537 NxE
= (ON
<< 4) + EN
, /* set run to NI, set current to EN */
539 AxA
= (AN
<< 4) + AN
, /* set run to AN, set current to AN */
540 NxL
= (ON
<< 4) + L
, /* set run to NI, set current to L */
541 LxL
= (L
<< 4) + L
, /* set run to L, set current to L */
544 static const int actionWeak
[][10] =
546 /* NI, L, R, AN, EN, AL, NSM, CS, ES, ET */
547 /*xa*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxR
, xxN
, xxN
, xxN
}, /* Arabic letter */
548 /*xr*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxR
, xxN
, xxN
, xIx
}, /* right letter */
549 /*xl*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xIx
}, /* left letter */
551 /*ao*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxN
, xxN
, xxN
, xxN
}, /* Arabic lett. foll by ON */
552 /*ro*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* right lett. foll by ON */
553 /*lo*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* left lett. foll by ON */
555 /*rt*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following R */
556 /*lt*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following L */
558 /*cn*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxA
, xIx
, xxN
, xxN
}, /* EN, AN following AL */
559 /*ra*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* Arabic number foll R */
560 /*re*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xIx
, xIx
, xxE
}, /* European number foll R */
561 /*la*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* Arabic number foll L */
562 /*le*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xIx
, xIx
, xxL
}, /* European number foll L */
564 /*ac*/ { Nxx
, Nxx
, Nxx
, Axx
, AxA
, NxR
, NxN
, NxN
, NxN
, NxN
}, /* CS following cn */
565 /*rc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following ra */
566 /*rs*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following re */
567 /*lc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following la */
568 /*ls*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following le */
570 /*ret*/{ xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xxN
, xxN
, xxE
}, /* ET following re */
571 /*let*/{ xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xxL
}, /* ET following le */
574 static int GetDeferredType(int action
)
576 return (action
>> 4) & 0xF;
579 static int GetResolvedType(int action
)
584 /* Note on action table:
586 States can be of two kinds:
587 - Immediate Resolution State, where each input token
588 is resolved as soon as it is seen. These states have
589 only single action codes (xxN) or the no-op (xxx)
590 for static input tokens.
591 - Deferred Resolution State, where input tokens either
592 either extend the run (xIx) or resolve its Type (e.g. Nxx).
594 Input classes are of three kinds
595 - Static Input Token, where the class of the token remains
596 unchanged on output (AN, L, NI, R)
597 - Replaced Input Token, where the class of the token is
598 always replaced on output (AL, BN, NSM, CS, ES, ET)
599 - Conditional Input Token, where the class of the token is
600 changed on output in some, but not all, cases (EN)
602 Where tokens are subject to change, a double action
603 (e.g. NxA, or NxN) is _required_ after deferred states,
604 resolving both the deferred state and changing the current token.
607 /*------------------------------------------------------------------------
608 Function: resolveWeak
610 Resolves the directionality of numeric and other weak character types
612 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
614 Input: Array of embedding levels
617 In/Out: Array of directional classes
619 Note: On input only these directional classes are expected
620 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
621 ------------------------------------------------------------------------*/
622 static void resolveWeak(int baselevel
, WORD
*pcls
, WORD
*plevel
, int cch
)
624 int state
= odd(baselevel
) ? xr
: xl
;
627 int level
= baselevel
;
628 int action
, clsRun
, clsNew
;
632 for (; ich
< cch
; ich
++)
634 /* ignore boundary neutrals */
637 /* must flatten levels unless at a level change; */
640 /* lookahead for level changes */
641 if (ich
+ 1 == cch
&& level
!= baselevel
)
643 /* have to fixup last BN before end of the loop, since
644 * its fix-upped value will be needed below the assert */
645 pcls
[ich
] = EmbeddingDirection(level
);
647 else if (ich
+ 1 < cch
&& level
!= plevel
[ich
+1] && pcls
[ich
+1] != BN
)
649 /* fixup LAST BN in front / after a level run to make
650 * it act like the SOR/EOR in rule X10 */
651 int newlevel
= plevel
[ich
+1];
652 if (level
> newlevel
) {
655 plevel
[ich
] = newlevel
;
657 /* must match assigned level */
658 pcls
[ich
] = EmbeddingDirection(newlevel
);
659 level
= plevel
[ich
+1];
663 /* don't interrupt runs */
672 ASSERT(pcls
[ich
] <= BN
);
675 action
= actionWeak
[state
][cls
];
677 /* resolve the directionality for deferred runs */
678 clsRun
= GetDeferredType(action
);
681 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
685 /* resolve the directionality class at the current location */
686 clsNew
= GetResolvedType(action
);
690 /* increment a deferred run */
694 state
= stateWeak
[state
][cls
];
697 /* resolve any deferred runs
698 * use the direction of the current level to emulate PDF */
699 cls
= EmbeddingDirection(level
);
701 /* resolve the directionality for deferred runs */
702 clsRun
= GetDeferredType(actionWeak
[state
][cls
]);
704 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
707 /* RESOLVE NEUTRAL TYPES */
712 /* action to resolve previous input */
713 nL
= L
, /* resolve EN to L */
714 En
= 3 << 4, /* resolve neutrals run to embedding level direction */
715 Rn
= R
<< 4, /* resolve neutrals run to strong right */
716 Ln
= L
<< 4, /* resolved neutrals run to strong left */
717 In
= (1<<8), /* increment count of deferred neutrals */
718 LnL
= (1<<4)+L
, /* set run and EN to L */
721 static int GetDeferredNeutrals(int action
, int level
)
723 action
= (action
>> 4) & 0xF;
724 if (action
== (En
>> 4))
725 return EmbeddingDirection(level
);
730 static int GetResolvedNeutrals(int action
)
732 action
= action
& 0xF;
742 /* new temporary class */
743 r
, /* R and characters resolved to R */
744 l
, /* L and characters resolved to L */
745 rn
, /* NI preceded by right */
746 ln
, /* NI preceded by left */
747 a
, /* AN preceded by left (the abbreviation 'la' is used up above) */
748 na
, /* NI preceded by a */
752 /*------------------------------------------------------------------------
755 By rule W7, whenever a EN is 'dominated' by an L (including start of
756 run with embedding direction = L) it is resolved to, and further treated
759 This leads to the need for 'a' and 'na' states.
760 ------------------------------------------------------------------------*/
762 static const int actionNeutrals
[][5] =
764 /* NI, L, R, AN, EN = cls */
765 { In
, 0, 0, 0, 0 }, /* r right */
766 { In
, 0, 0, 0, L
}, /* l left */
768 { In
, En
, Rn
, Rn
, Rn
}, /* rn NI preceded by right */
769 { In
, Ln
, En
, En
, LnL
}, /* ln NI preceded by left */
771 { In
, 0, 0, 0, L
}, /* a AN preceded by left */
772 { In
, En
, Rn
, Rn
, En
}, /* na NI preceded by a */
775 static const int stateNeutrals
[][5] =
777 /* NI, L, R, AN, EN */
778 { rn
, l
, r
, r
, r
}, /* r right */
779 { ln
, l
, r
, a
, l
}, /* l left */
781 { rn
, l
, r
, r
, r
}, /* rn NI preceded by right */
782 { ln
, l
, r
, a
, l
}, /* ln NI preceded by left */
784 { na
, l
, r
, a
, l
}, /* a AN preceded by left */
785 { na
, l
, r
, a
, l
}, /* na NI preceded by la */
788 /*------------------------------------------------------------------------
789 Function: resolveNeutrals
791 Resolves the directionality of neutral character types.
793 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
795 Input: Array of embedding levels
799 In/Out: Array of directional classes
801 Note: On input only these directional classes are expected
802 R, L, NI, AN, EN and BN
804 W8 resolves a number of ENs to L
805 ------------------------------------------------------------------------*/
806 static void resolveNeutrals(int baselevel
, WORD
*pcls
, const WORD
*plevel
, int cch
)
808 /* the state at the start of text depends on the base level */
809 int state
= odd(baselevel
) ? r
: l
;
813 int level
= baselevel
;
815 int action
, clsRun
, clsNew
;
817 for (; ich
< cch
; ich
++)
819 /* ignore boundary neutrals */
822 /* include in the count for a deferred run */
826 /* skip any further processing */
830 ASSERT(pcls
[ich
] < 5); /* "Only NI, L, R, AN, EN are allowed" */
833 action
= actionNeutrals
[state
][cls
];
835 /* resolve the directionality for deferred runs */
836 clsRun
= GetDeferredNeutrals(action
, level
);
839 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
843 /* resolve the directionality class at the current location */
844 clsNew
= GetResolvedNeutrals(action
);
851 state
= stateNeutrals
[state
][cls
];
855 /* resolve any deferred runs */
856 cls
= EmbeddingDirection(level
); /* eor has type of current level */
858 /* resolve the directionality for deferred runs */
859 clsRun
= GetDeferredNeutrals(actionNeutrals
[state
][cls
], level
);
861 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
864 /* RESOLVE IMPLICIT */
866 /*------------------------------------------------------------------------
867 Function: resolveImplicit
869 Recursively resolves implicit embedding levels.
870 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
872 Input: Array of direction classes
876 In/Out: Array of embedding levels
878 Note: levels may exceed 15 on output.
879 Accepted subset of direction classes
881 ------------------------------------------------------------------------*/
882 static const WORD addLevel
[][4] =
885 /* even */ { 0, 1, 2, 2, },
886 /* odd */ { 1, 0, 1, 1, }
890 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int cch
)
893 for (; ich
< cch
; ich
++)
895 /* cannot resolve bn here, since some bn were resolved to strong
896 * types in resolveWeak. To remove these we need the original
897 * types, which are available again in resolveWhiteSpace */
902 ASSERT(pcls
[ich
] > 0); /* "No Neutrals allowed to survive here." */
903 ASSERT(pcls
[ich
] < 5); /* "Out of range." */
904 plevel
[ich
] += addLevel
[odd(plevel
[ich
])][pcls
[ich
] - 1];
908 /*************************************************************
909 * BIDI_DeterminLevels
911 BOOL
BIDI_DetermineLevels(
912 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
913 INT uCount
, /* [in] Number of WCHARs in string. */
914 const SCRIPT_STATE
*s
,
915 const SCRIPT_CONTROL
*c
,
916 WORD
*lpOutLevels
/* [out] final string levels */
920 unsigned baselevel
= 0;
922 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
924 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
927 WARN("Out of memory\n");
931 baselevel
= s
->uBidiLevel
;
933 classify(lpString
, chartype
, uCount
, c
);
934 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
936 for (j
= 0; j
< uCount
; ++j
)
942 case ON
: chartype
[j
] = NI
;
946 /* resolve explicit */
947 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
948 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
951 resolveWeak(baselevel
, chartype
, lpOutLevels
, uCount
);
953 /* resolve neutrals */
954 resolveNeutrals(baselevel
, chartype
, lpOutLevels
, uCount
);
956 /* resolveImplicit */
957 resolveImplicit(chartype
, lpOutLevels
, uCount
);
959 HeapFree(GetProcessHeap(), 0, chartype
);
963 /* reverse cch indexes */
964 static void reverse(int *pidx
, int cch
)
968 for (; ich
< --cch
; ich
++)
971 pidx
[ich
] = pidx
[cch
];
977 /*------------------------------------------------------------------------
978 Functions: reorder/reorderLevel
980 Recursively reorders the display string
981 "From the highest level down, reverse all characters at that level and
982 higher, down to the lowest odd level"
984 Implements rule L2 of the Unicode bidi Algorithm.
986 Input: Array of embedding levels
988 Flag enabling reversal (set to false by initial caller)
990 In/Out: Text to reorder
992 Note: levels may exceed 15 resp. 61 on input.
994 Rule L3 - reorder combining marks is not implemented here
995 Rule L4 - glyph mirroring is implemented as a display option below
997 Note: this should be applied a line at a time
998 -------------------------------------------------------------------------*/
999 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1003 /* true as soon as first odd level encountered */
1004 fReverse
= fReverse
|| odd(level
);
1006 for (; ich
< cch
; ich
++)
1008 if (plevel
[ich
] < level
)
1012 else if (plevel
[ich
] > level
)
1014 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1015 cch
- ich
, fReverse
) - 1;
1020 reverse(pIndexs
, ich
);
1025 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1026 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1031 /* true as soon as first odd level encountered */
1032 fReverse
= fReverse
|| odd(level
);
1034 for (; ich
< cch
; ich
++)
1036 if (plevel
[ich
] < level
)
1038 else if (plevel
[ich
] > level
)
1043 reverse(pIndexs
, ich
);
1049 for (; ich
< cch
; ich
++)
1050 if (plevel
[ich
] < level
)
1052 else if (plevel
[ich
] > level
)
1053 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1054 cch
- ich
, fReverse
) - 1;
1060 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1064 classify(lpString
, lpStrength
, uCount
, c
);
1066 for ( i
= 0; i
< uCount
; i
++)
1068 switch(lpStrength
[i
])
1077 lpStrength
[i
] = BIDI_STRONG
;
1086 lpStrength
[i
] = BIDI_WEAK
;
1092 default: /* Neutrals and NSM */
1093 lpStrength
[i
] = BIDI_NEUTRAL
;