2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
52 #include "wine/unicode.h"
53 #include "wine/debug.h"
55 #include "usp10_internal.h"
57 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
59 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
62 /* HELPER FUNCTIONS AND DECLARATIONS */
64 /*------------------------------------------------------------------------
65 Bidirectional Character Types
67 as defined by the Unicode Bidirectional Algorithm Table 3-7.
71 The list of bidirectional character types here is not grouped the
72 same way as the table 3-7, since the numberic values for the types
73 are chosen to keep the state and action tables compact.
74 ------------------------------------------------------------------------*/
78 /* ON MUST be zero, code relies on ON = N = 0 */
79 ON
= 0, /* Other Neutral */
82 AN
, /* Arabic Number */
83 EN
, /* European Number */
84 AL
, /* Arabic Letter (Right-to-left) */
85 NSM
, /* Non-spacing Mark */
86 CS
, /* Common Separator */
87 ES
, /* European Separator */
88 ET
, /* European Terminator (post/prefix e.g. $ and %) */
91 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
94 S
, /* Segment Separator (TAB) // used only in L1 */
95 WS
, /* White space // used only in L1 */
96 B
, /* Paragraph Separator (aka as PS) */
98 /* types for explicit controls */
99 RLO
, /* these are used only in X1-X9 */
105 /* resolved types, also resolved directions */
106 N
= ON
, /* alias, where ON, WS and S are treated the same */
109 /* HELPER FUNCTIONS */
111 /* Convert the libwine information to the direction enum */
112 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
114 static const enum directions dir_map
[16] =
116 L
, /* unassigned defaults to L */
131 PDF
/* also LRE, LRO, RLE, RLO */
136 for (i
= 0; i
< uCount
; ++i
)
138 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
142 if (!c
->fLegacyBidiClass
) break;
146 case '+': chartype
[i
] = N
; break;
147 case '/': chartype
[i
] = CS
; break;
153 case 0x202A: chartype
[i
] = LRE
; break;
154 case 0x202B: chartype
[i
] = RLE
; break;
155 case 0x202C: chartype
[i
] = PDF
; break;
156 case 0x202D: chartype
[i
] = LRO
; break;
157 case 0x202E: chartype
[i
] = RLO
; break;
164 /* Set a run of cval values at locations all prior to, but not including */
165 /* iStart, to the new value nval. */
166 static void SetDeferredRun(WORD
*pval
, int cval
, int iStart
, int nval
)
169 for (; i
>= iStart
- cval
; i
--)
175 /* RESOLVE EXPLICIT */
177 static WORD
GreaterEven(int i
)
179 return odd(i
) ? i
+ 1 : i
+ 2;
182 static WORD
GreaterOdd(int i
)
184 return odd(i
) ? i
+ 2 : i
+ 1;
187 static WORD
EmbeddingDirection(int level
)
189 return odd(level
) ? R
: L
;
192 /*------------------------------------------------------------------------
193 Function: resolveExplicit
195 Recursively resolves explicit embedding levels and overrides.
196 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
198 Input: Base embedding level and direction
201 Output: Array of embedding levels
203 In/Out: Array of direction classes
206 Note: The function uses two simple counters to keep track of
207 matching explicit codes and PDF. Use the default argument for
208 the outermost call. The nesting counter counts the recursion
209 depth and not the embedding level.
210 ------------------------------------------------------------------------*/
212 static int resolveExplicit(int level
, int dir
, WORD
*pcls
, WORD
*plevel
, int cch
, int nNest
)
214 /* always called with a valid nesting level
215 nesting levels are != embedding levels */
216 int nLastValid
= nNest
;
219 /* check input values */
220 ASSERT(nNest
>= 0 && level
>= 0 && level
<= MAX_LEVEL
);
222 /* process the text */
223 for (; ich
< cch
; ich
++)
225 WORD cls
= pcls
[ich
];
231 if (GreaterEven(level
) <= MAX_LEVEL
- (cls
== LRO
? 2 : 0))
233 plevel
[ich
] = GreaterEven(level
);
235 ich
+= resolveExplicit(plevel
[ich
], (cls
== LRE
? N
: L
),
236 &pcls
[ich
+1], &plevel
[ich
+1],
237 cch
- (ich
+1), nNest
);
241 cls
= pcls
[ich
] = BN
;
247 if (GreaterOdd(level
) <= MAX_LEVEL
- (cls
== RLO
? 2 : 0))
249 plevel
[ich
] = GreaterOdd(level
);
251 ich
+= resolveExplicit(plevel
[ich
], (cls
== RLE
? N
: R
),
252 &pcls
[ich
+1], &plevel
[ich
+1],
253 cch
- (ich
+1), nNest
);
257 cls
= pcls
[ich
] = BN
;
261 cls
= pcls
[ich
] = BN
;
264 if (nLastValid
< nNest
)
270 cch
= ich
; /* break the loop, but complete body */
275 /* Apply the override */
288 /* RESOLVE WEAK TYPES */
290 enum states
/* possible states */
292 xa
, /* Arabic letter */
293 xr
, /* right letter */
294 xl
, /* left letter */
296 ao
, /* Arabic lett. foll by ON */
297 ro
, /* right lett. foll by ON */
298 lo
, /* left lett. foll by ON */
300 rt
, /* ET following R */
301 lt
, /* ET following L */
303 cn
, /* EN, AN following AL */
304 ra
, /* Arabic number foll R */
305 re
, /* European number foll R */
306 la
, /* Arabic number foll L */
307 le
, /* European number foll L */
309 ac
, /* CS following cn */
310 rc
, /* CS following ra */
311 rs
, /* CS,ES following re */
312 lc
, /* CS following la */
313 ls
, /* CS,ES following le */
315 ret
, /* ET following re */
316 let
, /* ET following le */
319 static const int stateWeak
[][10] =
321 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
322 /*xa*/ { ao
, xl
, xr
, cn
, cn
, xa
, xa
, ao
, ao
, ao
}, /* Arabic letter */
323 /*xr*/ { ro
, xl
, xr
, ra
, re
, xa
, xr
, ro
, ro
, rt
}, /* right letter */
324 /*xl*/ { lo
, xl
, xr
, la
, le
, xa
, xl
, lo
, lo
, lt
}, /* left letter */
326 /*ao*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* Arabic lett. foll by ON*/
327 /*ro*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* right lett. foll by ON */
328 /*lo*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* left lett. foll by ON */
330 /*rt*/ { ro
, xl
, xr
, ra
, re
, xa
, rt
, ro
, ro
, rt
}, /* ET following R */
331 /*lt*/ { lo
, xl
, xr
, la
, le
, xa
, lt
, lo
, lo
, lt
}, /* ET following L */
333 /*cn*/ { ao
, xl
, xr
, cn
, cn
, xa
, cn
, ac
, ao
, ao
}, /* EN, AN following AL */
334 /*ra*/ { ro
, xl
, xr
, ra
, re
, xa
, ra
, rc
, ro
, rt
}, /* Arabic number foll R */
335 /*re*/ { ro
, xl
, xr
, ra
, re
, xa
, re
, rs
, rs
,ret
}, /* European number foll R */
336 /*la*/ { lo
, xl
, xr
, la
, le
, xa
, la
, lc
, lo
, lt
}, /* Arabic number foll L */
337 /*le*/ { lo
, xl
, xr
, la
, le
, xa
, le
, ls
, ls
,let
}, /* European number foll L */
339 /*ac*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* CS following cn */
340 /*rc*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS following ra */
341 /*rs*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS,ES following re */
342 /*lc*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS following la */
343 /*ls*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS,ES following le */
345 /*ret*/{ ro
, xl
, xr
, ra
, re
, xa
,ret
, ro
, ro
,ret
}, /* ET following re */
346 /*let*/{ lo
, xl
, xr
, la
, le
, xa
,let
, lo
, lo
,let
}, /* ET following le */
349 enum actions
/* possible actions */
352 IX
= 0x100, /* increment */
353 XX
= 0xF, /* no-op */
356 xxx
= (XX
<< 4) + XX
, /* no-op */
357 xIx
= IX
+ xxx
, /* increment run */
358 xxN
= (XX
<< 4) + ON
, /* set current to N */
359 xxE
= (XX
<< 4) + EN
, /* set current to EN */
360 xxA
= (XX
<< 4) + AN
, /* set current to AN */
361 xxR
= (XX
<< 4) + R
, /* set current to R */
362 xxL
= (XX
<< 4) + L
, /* set current to L */
363 Nxx
= (ON
<< 4) + 0xF, /* set run to neutral */
364 Axx
= (AN
<< 4) + 0xF, /* set run to AN */
365 ExE
= (EN
<< 4) + EN
, /* set run to EN, set current to EN */
366 NIx
= (ON
<< 4) + 0xF + IX
, /* set run to N, increment */
367 NxN
= (ON
<< 4) + ON
, /* set run to N, set current to N */
368 NxR
= (ON
<< 4) + R
, /* set run to N, set current to R */
369 NxE
= (ON
<< 4) + EN
, /* set run to N, set current to EN */
371 AxA
= (AN
<< 4) + AN
, /* set run to AN, set current to AN */
372 NxL
= (ON
<< 4) + L
, /* set run to N, set current to L */
373 LxL
= (L
<< 4) + L
, /* set run to L, set current to L */
376 static const int actionWeak
[][10] =
378 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
379 /*xa*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxR
, xxN
, xxN
, xxN
}, /* Arabic letter */
380 /*xr*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxR
, xxN
, xxN
, xIx
}, /* right letter */
381 /*xl*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xIx
}, /* left letter */
383 /*ao*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxN
, xxN
, xxN
, xxN
}, /* Arabic lett. foll by ON */
384 /*ro*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* right lett. foll by ON */
385 /*lo*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* left lett. foll by ON */
387 /*rt*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following R */
388 /*lt*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following L */
390 /*cn*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxA
, xIx
, xxN
, xxN
}, /* EN, AN following AL */
391 /*ra*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* Arabic number foll R */
392 /*re*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xIx
, xIx
, xxE
}, /* European number foll R */
393 /*la*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* Arabic number foll L */
394 /*le*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xIx
, xIx
, xxL
}, /* European number foll L */
396 /*ac*/ { Nxx
, Nxx
, Nxx
, Axx
, AxA
, NxR
, NxN
, NxN
, NxN
, NxN
}, /* CS following cn */
397 /*rc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following ra */
398 /*rs*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following re */
399 /*lc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following la */
400 /*ls*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following le */
402 /*ret*/{ xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xxN
, xxN
, xxE
}, /* ET following re */
403 /*let*/{ xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xxL
}, /* ET following le */
406 static int GetDeferredType(int action
)
408 return (action
>> 4) & 0xF;
411 static int GetResolvedType(int action
)
416 /* Note on action table:
418 States can be of two kinds:
419 - Immediate Resolution State, where each input token
420 is resolved as soon as it is seen. These states have
421 only single action codes (xxN) or the no-op (xxx)
422 for static input tokens.
423 - Deferred Resolution State, where input tokens either
424 either extend the run (xIx) or resolve its Type (e.g. Nxx).
426 Input classes are of three kinds
427 - Static Input Token, where the class of the token remains
428 unchanged on output (AN, L, N, R)
429 - Replaced Input Token, where the class of the token is
430 always replaced on output (AL, BN, NSM, CS, ES, ET)
431 - Conditional Input Token, where the class of the token is
432 changed on output in some, but not all, cases (EN)
434 Where tokens are subject to change, a double action
435 (e.g. NxA, or NxN) is _required_ after deferred states,
436 resolving both the deferred state and changing the current token.
439 /*------------------------------------------------------------------------
440 Function: resolveWeak
442 Resolves the directionality of numeric and other weak character types
444 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
446 Input: Array of embedding levels
449 In/Out: Array of directional classes
451 Note: On input only these directional classes are expected
452 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
453 ------------------------------------------------------------------------*/
454 static void resolveWeak(int baselevel
, WORD
*pcls
, WORD
*plevel
, int cch
)
456 int state
= odd(baselevel
) ? xr
: xl
;
459 int level
= baselevel
;
460 int action
, clsRun
, clsNew
;
464 for (; ich
< cch
; ich
++)
466 /* ignore boundary neutrals */
469 /* must flatten levels unless at a level change; */
472 /* lookahead for level changes */
473 if (ich
+ 1 == cch
&& level
!= baselevel
)
475 /* have to fixup last BN before end of the loop, since
476 * its fix-upped value will be needed below the assert */
477 pcls
[ich
] = EmbeddingDirection(level
);
479 else if (ich
+ 1 < cch
&& level
!= plevel
[ich
+1] && pcls
[ich
+1] != BN
)
481 /* fixup LAST BN in front / after a level run to make
482 * it act like the SOR/EOR in rule X10 */
483 int newlevel
= plevel
[ich
+1];
484 if (level
> newlevel
) {
487 plevel
[ich
] = newlevel
;
489 /* must match assigned level */
490 pcls
[ich
] = EmbeddingDirection(newlevel
);
491 level
= plevel
[ich
+1];
495 /* don't interrupt runs */
504 ASSERT(pcls
[ich
] <= BN
);
507 action
= actionWeak
[state
][cls
];
509 /* resolve the directionality for deferred runs */
510 clsRun
= GetDeferredType(action
);
513 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
517 /* resolve the directionality class at the current location */
518 clsNew
= GetResolvedType(action
);
522 /* increment a deferred run */
526 state
= stateWeak
[state
][cls
];
529 /* resolve any deferred runs
530 * use the direction of the current level to emulate PDF */
531 cls
= EmbeddingDirection(level
);
533 /* resolve the directionality for deferred runs */
534 clsRun
= GetDeferredType(actionWeak
[state
][cls
]);
536 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
539 /* RESOLVE NEUTRAL TYPES */
544 /* action to resolve previous input */
545 nL
= L
, /* resolve EN to L */
546 En
= 3 << 4, /* resolve neutrals run to embedding level direction */
547 Rn
= R
<< 4, /* resolve neutrals run to strong right */
548 Ln
= L
<< 4, /* resolved neutrals run to strong left */
549 In
= (1<<8), /* increment count of deferred neutrals */
550 LnL
= (1<<4)+L
, /* set run and EN to L */
553 static int GetDeferredNeutrals(int action
, int level
)
555 action
= (action
>> 4) & 0xF;
556 if (action
== (En
>> 4))
557 return EmbeddingDirection(level
);
562 static int GetResolvedNeutrals(int action
)
564 action
= action
& 0xF;
574 /* new temporary class */
575 r
, /* R and characters resolved to R */
576 l
, /* L and characters resolved to L */
577 rn
, /* N preceded by right */
578 ln
, /* N preceded by left */
579 a
, /* AN preceded by left (the abbreviation 'la' is used up above) */
580 na
, /* N preceded by a */
584 /*------------------------------------------------------------------------
587 By rule W7, whenever a EN is 'dominated' by an L (including start of
588 run with embedding direction = L) it is resolved to, and further treated
591 This leads to the need for 'a' and 'na' states.
592 ------------------------------------------------------------------------*/
594 static const int actionNeutrals
[][5] =
596 /* N, L, R, AN, EN = cls */
597 { In
, 0, 0, 0, 0 }, /* r right */
598 { In
, 0, 0, 0, L
}, /* l left */
600 { In
, En
, Rn
, Rn
, Rn
}, /* rn N preceded by right */
601 { In
, Ln
, En
, En
, LnL
}, /* ln N preceded by left */
603 { In
, 0, 0, 0, L
}, /* a AN preceded by left */
604 { In
, En
, Rn
, Rn
, En
}, /* na N preceded by a */
607 static const int stateNeutrals
[][5] =
609 /* N, L, R, AN, EN */
610 { rn
, l
, r
, r
, r
}, /* r right */
611 { ln
, l
, r
, a
, l
}, /* l left */
613 { rn
, l
, r
, r
, r
}, /* rn N preceded by right */
614 { ln
, l
, r
, a
, l
}, /* ln N preceded by left */
616 { na
, l
, r
, a
, l
}, /* a AN preceded by left */
617 { na
, l
, r
, a
, l
}, /* na N preceded by la */
620 /*------------------------------------------------------------------------
621 Function: resolveNeutrals
623 Resolves the directionality of neutral character types.
625 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
627 Input: Array of embedding levels
631 In/Out: Array of directional classes
633 Note: On input only these directional classes are expected
634 R, L, N, AN, EN and BN
636 W8 resolves a number of ENs to L
637 ------------------------------------------------------------------------*/
638 static void resolveNeutrals(int baselevel
, WORD
*pcls
, const WORD
*plevel
, int cch
)
640 /* the state at the start of text depends on the base level */
641 int state
= odd(baselevel
) ? r
: l
;
645 int level
= baselevel
;
647 int action
, clsRun
, clsNew
;
649 for (; ich
< cch
; ich
++)
651 /* ignore boundary neutrals */
654 /* include in the count for a deferred run */
658 /* skip any further processing */
662 ASSERT(pcls
[ich
] < 5); /* "Only N, L, R, AN, EN are allowed" */
665 action
= actionNeutrals
[state
][cls
];
667 /* resolve the directionality for deferred runs */
668 clsRun
= GetDeferredNeutrals(action
, level
);
671 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
675 /* resolve the directionality class at the current location */
676 clsNew
= GetResolvedNeutrals(action
);
683 state
= stateNeutrals
[state
][cls
];
687 /* resolve any deferred runs */
688 cls
= EmbeddingDirection(level
); /* eor has type of current level */
690 /* resolve the directionality for deferred runs */
691 clsRun
= GetDeferredNeutrals(actionNeutrals
[state
][cls
], level
);
693 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
696 /* RESOLVE IMPLICIT */
698 /*------------------------------------------------------------------------
699 Function: resolveImplicit
701 Recursively resolves implicit embedding levels.
702 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
704 Input: Array of direction classes
708 In/Out: Array of embedding levels
710 Note: levels may exceed 15 on output.
711 Accepted subset of direction classes
713 ------------------------------------------------------------------------*/
714 static const WORD addLevel
[][4] =
717 /* even */ { 0, 1, 2, 2, },
718 /* odd */ { 1, 0, 1, 1, }
722 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int cch
)
725 for (; ich
< cch
; ich
++)
727 /* cannot resolve bn here, since some bn were resolved to strong
728 * types in resolveWeak. To remove these we need the original
729 * types, which are available again in resolveWhiteSpace */
734 ASSERT(pcls
[ich
] > 0); /* "No Neutrals allowed to survive here." */
735 ASSERT(pcls
[ich
] < 5); /* "Out of range." */
736 plevel
[ich
] += addLevel
[odd(plevel
[ich
])][pcls
[ich
] - 1];
740 /*************************************************************
741 * BIDI_DeterminLevels
743 BOOL
BIDI_DetermineLevels(
744 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
745 INT uCount
, /* [in] Number of WCHARs in string. */
746 const SCRIPT_STATE
*s
,
747 const SCRIPT_CONTROL
*c
,
748 WORD
*lpOutLevels
/* [out] final string levels */
752 unsigned baselevel
= 0,j
;
753 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
755 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
758 WARN("Out of memory\n");
762 baselevel
= s
->uBidiLevel
;
764 classify(lpString
, chartype
, uCount
, c
);
766 for (j
= 0; j
< uCount
; ++j
)
772 case ON
: chartype
[j
] = N
;
776 /* resolve explicit */
777 resolveExplicit(baselevel
, N
, chartype
, lpOutLevels
, uCount
, 0);
780 resolveWeak(baselevel
, chartype
, lpOutLevels
, uCount
);
782 /* resolve neutrals */
783 resolveNeutrals(baselevel
, chartype
, lpOutLevels
, uCount
);
785 /* resolveImplicit */
786 resolveImplicit(chartype
, lpOutLevels
, uCount
);
788 HeapFree(GetProcessHeap(), 0, chartype
);
792 /* reverse cch indexes */
793 static void reverse(int *pidx
, int cch
)
797 for (; ich
< --cch
; ich
++)
800 pidx
[ich
] = pidx
[cch
];
806 /*------------------------------------------------------------------------
807 Functions: reorder/reorderLevel
809 Recursively reorders the display string
810 "From the highest level down, reverse all characters at that level and
811 higher, down to the lowest odd level"
813 Implements rule L2 of the Unicode bidi Algorithm.
815 Input: Array of embedding levels
817 Flag enabling reversal (set to false by initial caller)
819 In/Out: Text to reorder
821 Note: levels may exceed 15 resp. 61 on input.
823 Rule L3 - reorder combining marks is not implemented here
824 Rule L4 - glyph mirroring is implemented as a display option below
826 Note: this should be applied a line at a time
827 -------------------------------------------------------------------------*/
828 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
832 /* true as soon as first odd level encountered */
833 fReverse
= fReverse
|| odd(level
);
835 for (; ich
< cch
; ich
++)
837 if (plevel
[ich
] < level
)
841 else if (plevel
[ich
] > level
)
843 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
844 cch
- ich
, fReverse
) - 1;
849 reverse(pIndexs
, ich
);
854 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
855 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
860 /* true as soon as first odd level encountered */
861 fReverse
= fReverse
|| odd(level
);
863 for (; ich
< cch
; ich
++)
865 if (plevel
[ich
] < level
)
867 else if (plevel
[ich
] > level
)
872 reverse(pIndexs
, ich
);
878 for (; ich
< cch
; ich
++)
879 if (plevel
[ich
] > level
)
880 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
881 cch
- ich
, fReverse
) - 1;
887 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
891 classify(lpString
, lpStrength
, uCount
, c
);
893 for ( i
= 0; i
< uCount
; i
++)
895 switch(lpStrength
[i
])
904 lpStrength
[i
] = BIDI_STRONG
;
913 lpStrength
[i
] = BIDI_WEAK
;
919 default: /* Neutrals and NSM */
920 lpStrength
[i
] = BIDI_NEUTRAL
;