2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
52 #include "wine/unicode.h"
53 #include "wine/debug.h"
54 #include "wine/list.h"
56 #include "usp10_internal.h"
58 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
60 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
63 /* HELPER FUNCTIONS AND DECLARATIONS */
65 /*------------------------------------------------------------------------
66 Bidirectional Character Types
68 as defined by the Unicode Bidirectional Algorithm Table 3-7.
72 The list of bidirectional character types here is not grouped the
73 same way as the table 3-7, since the numberic values for the types
74 are chosen to keep the state and action tables compact.
75 ------------------------------------------------------------------------*/
79 /* ON MUST be zero, code relies on ON = NI = 0 */
80 ON
= 0, /* Other Neutral */
83 AN
, /* Arabic Number */
84 EN
, /* European Number */
85 AL
, /* Arabic Letter (Right-to-left) */
86 NSM
, /* Non-spacing Mark */
87 CS
, /* Common Separator */
88 ES
, /* European Separator */
89 ET
, /* European Terminator (post/prefix e.g. $ and %) */
92 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
95 S
, /* Segment Separator (TAB) // used only in L1 */
96 WS
, /* White space // used only in L1 */
97 B
, /* Paragraph Separator (aka as PS) */
99 /* types for explicit controls */
100 RLO
, /* these are used only in X1-X9 */
106 LRI
, /* Isolate formatting characters new with 6.3 */
111 /* resolved types, also resolved directions */
112 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
115 static const char debug_type
[][4] =
117 "ON", /* Other Neutral */
118 "L", /* Left Letter */
119 "R", /* Right Letter */
120 "AN", /* Arabic Number */
121 "EN", /* European Number */
122 "AL", /* Arabic Letter (Right-to-left) */
123 "NSM", /* Non-spacing Mark */
124 "CS", /* Common Separator */
125 "ES", /* European Separator */
126 "ET", /* European Terminator (post/prefix e.g. $ and %) */
127 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
128 "S", /* Segment Separator (TAB) // used only in L1 */
129 "WS", /* White space // used only in L1 */
130 "B", /* Paragraph Separator (aka as PS) */
131 "RLO", /* these are used only in X1-X9 */
136 "LRI", /* Isolate formatting characters new with 6.3 */
142 /* HELPER FUNCTIONS */
144 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
148 for (i
= start
; i
< end
; i
++)
149 TRACE(" %s",debug_type
[types
[i
]]);
153 /* Convert the libwine information to the direction enum */
154 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
156 static const enum directions dir_map
[16] =
158 L
, /* unassigned defaults to L */
173 PDF
/* also LRE, LRO, RLE, RLO */
178 for (i
= 0; i
< uCount
; ++i
)
180 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
184 if (!c
->fLegacyBidiClass
) break;
188 case '+': chartype
[i
] = NI
; break;
189 case '/': chartype
[i
] = CS
; break;
195 case 0x202A: chartype
[i
] = LRE
; break;
196 case 0x202B: chartype
[i
] = RLE
; break;
197 case 0x202C: chartype
[i
] = PDF
; break;
198 case 0x202D: chartype
[i
] = LRO
; break;
199 case 0x202E: chartype
[i
] = RLO
; break;
200 case 0x2066: chartype
[i
] = LRI
; break;
201 case 0x2067: chartype
[i
] = RLI
; break;
202 case 0x2068: chartype
[i
] = FSI
; break;
203 case 0x2069: chartype
[i
] = PDI
; break;
210 /* RESOLVE EXPLICIT */
212 static WORD
GreaterEven(int i
)
214 return odd(i
) ? i
+ 1 : i
+ 2;
217 static WORD
GreaterOdd(int i
)
219 return odd(i
) ? i
+ 2 : i
+ 1;
222 static WORD
EmbeddingDirection(int level
)
224 return odd(level
) ? R
: L
;
227 /*------------------------------------------------------------------------
228 Function: resolveExplicit
230 Recursively resolves explicit embedding levels and overrides.
231 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
233 Input: Base embedding level and direction
236 Output: Array of embedding levels
238 In/Out: Array of direction classes
241 Note: The function uses two simple counters to keep track of
242 matching explicit codes and PDF. Use the default argument for
243 the outermost call. The nesting counter counts the recursion
244 depth and not the embedding level.
245 ------------------------------------------------------------------------*/
246 typedef struct tagStackItem
{
252 #define push_stack(l,o,i) \
254 stack[stack_top].level = l; \
255 stack[stack_top].override = o; \
256 stack[stack_top].isolate = i;} while(0)
258 #define pop_stack() do { stack_top++; } while(0)
260 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
262 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
265 int overflow_isolate_count
= 0;
266 int overflow_embedding_count
= 0;
267 int valid_isolate_count
= 0;
270 StackItem stack
[MAX_DEPTH
+2];
271 int stack_top
= MAX_DEPTH
+1;
273 stack
[stack_top
].level
= level
;
274 stack
[stack_top
].override
= NI
;
275 stack
[stack_top
].isolate
= FALSE
;
277 for (i
= 0; i
< count
; i
++)
280 if (pclass
[i
] == RLE
)
282 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
283 poutLevel
[i
] = stack
[stack_top
].level
;
284 if (valid_level(least_odd
))
285 push_stack(least_odd
, NI
, FALSE
);
286 else if (overflow_isolate_count
== 0)
287 overflow_embedding_count
++;
290 else if (pclass
[i
] == LRE
)
292 int least_even
= GreaterEven(stack
[stack_top
].level
);
293 poutLevel
[i
] = stack
[stack_top
].level
;
294 if (valid_level(least_even
))
295 push_stack(least_even
, NI
, FALSE
);
296 else if (overflow_isolate_count
== 0)
297 overflow_embedding_count
++;
300 else if (pclass
[i
] == RLO
)
302 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
303 poutLevel
[i
] = stack
[stack_top
].level
;
304 if (valid_level(least_odd
))
305 push_stack(least_odd
, R
, FALSE
);
306 else if (overflow_isolate_count
== 0)
307 overflow_embedding_count
++;
310 else if (pclass
[i
] == LRO
)
312 int least_even
= GreaterEven(stack
[stack_top
].level
);
313 poutLevel
[i
] = stack
[stack_top
].level
;
314 if (valid_level(least_even
))
315 push_stack(least_even
, L
, FALSE
);
316 else if (overflow_isolate_count
== 0)
317 overflow_embedding_count
++;
320 else if (pclass
[i
] == RLI
)
322 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
323 poutLevel
[i
] = stack
[stack_top
].level
;
324 if (valid_level(least_odd
))
326 valid_isolate_count
++;
327 push_stack(least_odd
, NI
, TRUE
);
330 overflow_isolate_count
++;
333 else if (pclass
[i
] == LRI
)
335 int least_even
= GreaterEven(stack
[stack_top
].level
);
336 poutLevel
[i
] = stack
[stack_top
].level
;
337 if (valid_level(least_even
))
339 valid_isolate_count
++;
340 push_stack(least_even
, NI
, TRUE
);
343 overflow_isolate_count
++;
346 else if (pclass
[i
] == FSI
)
351 poutLevel
[i
] = stack
[stack_top
].level
;
352 for (j
= i
+1; j
< count
; j
++)
354 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
359 else if (pclass
[j
] == PDI
)
368 if (skipping
) continue;
375 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
383 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
384 if (valid_level(least_odd
))
386 valid_isolate_count
++;
387 push_stack(least_odd
, NI
, TRUE
);
390 overflow_isolate_count
++;
394 int least_even
= GreaterEven(stack
[stack_top
].level
);
395 if (valid_level(least_even
))
397 valid_isolate_count
++;
398 push_stack(least_even
, NI
, TRUE
);
401 overflow_isolate_count
++;
405 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
407 poutLevel
[i
] = stack
[stack_top
].level
;
408 if (stack
[stack_top
].override
!= NI
)
409 pclass
[i
] = stack
[stack_top
].override
;
412 else if (pclass
[i
] == PDI
)
414 if (overflow_isolate_count
) overflow_isolate_count
--;
415 else if (!valid_isolate_count
) {/* do nothing */}
418 overflow_embedding_count
= 0;
419 while (!stack
[stack_top
].isolate
) pop_stack();
421 valid_isolate_count
--;
423 poutLevel
[i
] = stack
[stack_top
].level
;
426 else if (pclass
[i
] == PDF
)
428 poutLevel
[i
] = stack
[stack_top
].level
;
429 if (overflow_isolate_count
) {/* do nothing */}
430 else if (overflow_embedding_count
) overflow_embedding_count
--;
431 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
436 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
437 for (i
= 0; i
< count
; i
++)
438 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
442 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
444 if (index
== -1 || index
== back_fence
) return index
;
446 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
450 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
452 if (index
== front_fence
) return index
;
454 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
458 typedef struct tagRun
465 typedef struct tagIsolatedRun
476 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
478 if (index
>= (iso_run
->length
-1)) return -1;
480 while (index
< iso_run
->length
&& *iso_run
->ppcls
[index
] == BN
) index
++;
481 if (index
== iso_run
->length
) return -1;
485 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
488 if (index
<= 0) return -1;
490 while (index
> -1 && *iso_run
->ppcls
[index
] == BN
) index
--;
494 static inline int iso_previousChar(IsolatedRun
*iso_run
, int index
)
496 if (index
<= 0) return -1;
500 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
505 for (i
= 0; i
< iso_run
->length
; i
++)
506 TRACE(" %s",debug_type
[*iso_run
->ppcls
[i
]]);
510 /*------------------------------------------------------------------------
511 Function: resolveWeak
513 Resolves the directionality of numeric and other weak character types
515 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
517 Input: Array of embedding levels
520 In/Out: Array of directional classes
522 Note: On input only these directional classes are expected
523 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
524 ------------------------------------------------------------------------*/
526 static void resolveWeak(IsolatedRun
* iso_run
)
531 for (i
=0; i
< iso_run
->length
; i
++)
533 if (*iso_run
->ppcls
[i
] == NSM
)
535 int j
= iso_previousValidChar(iso_run
, i
);
537 *iso_run
->ppcls
[i
] = iso_run
->sos
;
538 else if (*iso_run
->ppcls
[j
] >= LRI
)
539 *iso_run
->ppcls
[i
] = ON
;
541 *iso_run
->ppcls
[i
] = *iso_run
->ppcls
[j
];
546 for (i
= 0; i
< iso_run
->length
; i
++)
548 if (*iso_run
->ppcls
[i
] == EN
)
550 int j
= iso_previousValidChar(iso_run
, i
);
553 if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == L
|| *iso_run
->ppcls
[j
] == AL
)
555 if (*iso_run
->ppcls
[j
] == AL
)
556 *iso_run
->ppcls
[i
] = AN
;
559 j
= iso_previousValidChar(iso_run
, j
);
565 for (i
= 0; i
< iso_run
->length
; i
++)
567 if (*iso_run
->ppcls
[i
] == AL
)
568 *iso_run
->ppcls
[i
] = R
;
572 for (i
= 0; i
< iso_run
->length
; i
++)
574 if (*iso_run
->ppcls
[i
] == ES
)
576 int b
= iso_previousValidChar(iso_run
, i
);
577 int f
= iso_nextValidChar(iso_run
, i
);
579 if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == EN
&& *iso_run
->ppcls
[f
] == EN
)
580 *iso_run
->ppcls
[i
] = EN
;
582 else if (*iso_run
->ppcls
[i
] == CS
)
584 int b
= iso_previousValidChar(iso_run
, i
);
585 int f
= iso_nextValidChar(iso_run
, i
);
587 if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == EN
&& *iso_run
->ppcls
[f
] == EN
)
588 *iso_run
->ppcls
[i
] = EN
;
589 else if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == AN
&& *iso_run
->ppcls
[f
] == AN
)
590 *iso_run
->ppcls
[i
] = AN
;
595 for (i
= 0; i
< iso_run
->length
; i
++)
597 if (*iso_run
->ppcls
[i
] == ET
)
600 for (j
= i
-1 ; j
> -1; j
--)
602 if (*iso_run
->ppcls
[j
] == BN
) continue;
603 if (*iso_run
->ppcls
[j
] == ET
) continue;
604 else if (*iso_run
->ppcls
[j
] == EN
) *iso_run
->ppcls
[i
] = EN
;
607 if (*iso_run
->ppcls
[i
] == ET
)
609 for (j
= i
+1; j
< iso_run
->length
; j
++)
611 if (*iso_run
->ppcls
[j
] == BN
) continue;
612 if (*iso_run
->ppcls
[j
] == ET
) continue;
613 else if (*iso_run
->ppcls
[j
] == EN
) *iso_run
->ppcls
[i
] = EN
;
621 for (i
= 0; i
< iso_run
->length
; i
++)
623 if (*iso_run
->ppcls
[i
] == ET
|| *iso_run
->ppcls
[i
] == ES
|| *iso_run
->ppcls
[i
] == CS
|| *iso_run
->ppcls
[i
] == ON
)
627 if (b
> -1 && *iso_run
->ppcls
[b
] == BN
)
628 *iso_run
->ppcls
[b
] = ON
;
629 if (f
< iso_run
->length
&& *iso_run
->ppcls
[f
] == BN
)
630 *iso_run
->ppcls
[f
] = ON
;
632 *iso_run
->ppcls
[i
] = ON
;
637 for (i
= 0; i
< iso_run
->length
; i
++)
639 if (*iso_run
->ppcls
[i
] == EN
)
642 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
643 if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == L
)
645 if (*iso_run
->ppcls
[j
] == L
)
646 *iso_run
->ppcls
[i
] = L
;
649 if (iso_run
->sos
== L
&& j
== -1)
650 *iso_run
->ppcls
[i
] = L
;
655 /*------------------------------------------------------------------------
656 Function: resolveNeutrals
658 Resolves the directionality of neutral character types.
660 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
662 Input: Array of embedding levels
666 In/Out: Array of directional classes
668 Note: On input only these directional classes are expected
669 R, L, NI, AN, EN and BN
671 W8 resolves a number of ENs to L
672 ------------------------------------------------------------------------*/
673 static void resolveNeutrals(IsolatedRun
*iso_run
)
677 /* Translate isolates into NI */
678 for (i
= 0; i
< iso_run
->length
; i
++)
680 if (*iso_run
->ppcls
[i
] >= LRI
)
681 *iso_run
->ppcls
[i
] = NI
;
683 switch(*iso_run
->ppcls
[i
])
687 case WS
: *iso_run
->ppcls
[i
] = NI
;
690 ASSERT(*iso_run
->ppcls
[i
] < 5 || *iso_run
->ppcls
[i
] == BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
693 /* N0: Skipping bracketed pairs for now */
696 for (i
= 0; i
< iso_run
->length
; i
++)
700 if (*iso_run
->ppcls
[i
] == NI
)
703 int b
= iso_previousValidChar(iso_run
, i
);
712 if (*iso_run
->ppcls
[b
] == R
|| *iso_run
->ppcls
[b
] == AN
|| *iso_run
->ppcls
[b
] == EN
)
714 else if (*iso_run
->ppcls
[b
] == L
)
716 else /* No string type */
719 j
= iso_nextValidChar(iso_run
, i
);
720 while (j
> -1 && *iso_run
->ppcls
[j
] == NI
) j
= iso_nextValidChar(iso_run
, j
);
727 else if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == AN
|| *iso_run
->ppcls
[j
] == EN
)
729 else if (*iso_run
->ppcls
[j
] == L
)
731 else /* No string type */
736 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
737 *iso_run
->ppcls
[b
] = r
;
743 for (i
= 0; i
< iso_run
->length
; i
++)
745 if (*iso_run
->ppcls
[i
] == NI
)
749 *iso_run
->ppcls
[i
] = EmbeddingDirection(iso_run
->e
);
750 if (b
> -1 && *iso_run
->ppcls
[b
] == BN
)
751 *iso_run
->ppcls
[b
] = EmbeddingDirection(iso_run
->e
);
752 if (f
< iso_run
->length
&& *iso_run
->ppcls
[f
] == BN
)
753 *iso_run
->ppcls
[f
] = EmbeddingDirection(iso_run
->e
);
758 /*------------------------------------------------------------------------
759 Function: resolveImplicit
761 Recursively resolves implicit embedding levels.
762 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
764 Input: Array of direction classes
768 In/Out: Array of embedding levels
770 Note: levels may exceed 15 on output.
771 Accepted subset of direction classes
773 ------------------------------------------------------------------------*/
774 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
779 for (i
= sos
; i
<= eos
; i
++)
784 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
785 ASSERT(pcls
[i
] < 5); /* "Out of range." */
787 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
789 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
791 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
796 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
801 for (i
= sos
; i
<= eos
; i
++)
803 if (pcls
[i
] == B
|| pcls
[i
] == S
)
806 while (i
> sos
&& j
>= sos
&&
807 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
808 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
809 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
810 plevel
[j
--] = baselevel
;
811 plevel
[i
] = baselevel
;
814 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
815 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
816 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
819 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
820 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
821 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
822 plevel
[j
--] = baselevel
;
827 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, int uCount
, struct list
*set
)
829 int run_start
, run_end
, i
;
832 IsolatedRun
*current_isolated
;
838 while (run_start
< uCount
)
840 run_end
= nextValidChar(pcls
, run_start
, uCount
);
841 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
843 runs
[run_count
].start
= run_start
;
844 runs
[run_count
].end
= run_end
;
845 runs
[run_count
].e
= pLevel
[run_start
];
846 run_start
= nextValidChar(pcls
, run_end
, uCount
);
850 /* Build Isolating Runs */
852 while (i
< run_count
)
855 if (runs
[k
].start
>= 0)
857 int type_fence
, real_end
;
859 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(WORD
*)*uCount
);
861 run_start
= runs
[k
].start
;
862 current_isolated
->e
= runs
[k
].e
;
863 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
865 for (j
= 0; j
< current_isolated
->length
; j
++)
866 current_isolated
->ppcls
[j
] = &pcls
[runs
[k
].start
+j
];
868 run_end
= runs
[k
].end
;
870 TRACE("{ [%i -- %i]",run_start
, run_end
);
872 if (pcls
[run_end
] == BN
)
873 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
875 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
879 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
880 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
889 int l
= current_isolated
->length
;
891 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
892 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
893 current_isolated
->ppcls
[l
] = &pcls
[runs
[j
].start
+m
];
895 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
897 run_end
= runs
[j
].end
;
898 if (pcls
[run_end
] == BN
)
899 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
910 type_fence
= previousValidChar(pcls
, run_start
, -1);
912 if (type_fence
== -1)
913 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
915 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
917 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
919 if (run_end
== uCount
)
920 current_isolated
->eos
= current_isolated
->sos
;
923 /* eos could be an BN */
924 if ( pcls
[run_end
] == BN
)
926 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
927 if (real_end
< run_start
)
928 real_end
= run_start
;
933 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
934 if (type_fence
== uCount
)
935 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
937 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
939 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
942 list_add_tail(set
, ¤t_isolated
->entry
);
943 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
949 /*************************************************************
950 * BIDI_DeterminLevels
952 BOOL
BIDI_DetermineLevels(
953 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
954 INT uCount
, /* [in] Number of WCHARs in string. */
955 const SCRIPT_STATE
*s
,
956 const SCRIPT_CONTROL
*c
,
957 WORD
*lpOutLevels
/* [out] final string levels */
961 unsigned baselevel
= 0;
962 struct list IsolatingRuns
;
963 IsolatedRun
*iso_run
, *next
;
965 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
967 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
970 WARN("Out of memory\n");
974 baselevel
= s
->uBidiLevel
;
976 classify(lpString
, chartype
, uCount
, c
);
977 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
979 /* resolve explicit */
980 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
981 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
983 /* X10/BD13: Computer Isolating runs */
984 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, uCount
, &IsolatingRuns
);
986 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
988 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
991 resolveWeak(iso_run
);
992 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
994 /* resolve neutrals */
995 resolveNeutrals(iso_run
);
996 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
998 list_remove(&iso_run
->entry
);
999 HeapFree(GetProcessHeap(),0,iso_run
);
1002 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1003 /* resolveImplicit */
1004 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1006 /* resolveResolvedLevels*/
1007 classify(lpString
, chartype
, uCount
, c
);
1008 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1010 HeapFree(GetProcessHeap(), 0, chartype
);
1014 /* reverse cch indexes */
1015 static void reverse(int *pidx
, int cch
)
1019 for (; ich
< --cch
; ich
++)
1022 pidx
[ich
] = pidx
[cch
];
1028 /*------------------------------------------------------------------------
1029 Functions: reorder/reorderLevel
1031 Recursively reorders the display string
1032 "From the highest level down, reverse all characters at that level and
1033 higher, down to the lowest odd level"
1035 Implements rule L2 of the Unicode bidi Algorithm.
1037 Input: Array of embedding levels
1039 Flag enabling reversal (set to false by initial caller)
1041 In/Out: Text to reorder
1043 Note: levels may exceed 15 resp. 61 on input.
1045 Rule L3 - reorder combining marks is not implemented here
1046 Rule L4 - glyph mirroring is implemented as a display option below
1048 Note: this should be applied a line at a time
1049 -------------------------------------------------------------------------*/
1050 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1054 /* true as soon as first odd level encountered */
1055 fReverse
= fReverse
|| odd(level
);
1057 for (; ich
< cch
; ich
++)
1059 if (plevel
[ich
] < level
)
1063 else if (plevel
[ich
] > level
)
1065 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1066 cch
- ich
, fReverse
) - 1;
1071 reverse(pIndexs
, ich
);
1076 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1077 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1082 /* true as soon as first odd level encountered */
1083 fReverse
= fReverse
|| odd(level
);
1085 for (; ich
< cch
; ich
++)
1087 if (plevel
[ich
] < level
)
1089 else if (plevel
[ich
] > level
)
1094 reverse(pIndexs
, ich
);
1100 for (; ich
< cch
; ich
++)
1101 if (plevel
[ich
] < level
)
1103 else if (plevel
[ich
] > level
)
1104 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1105 cch
- ich
, fReverse
) - 1;
1111 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1115 classify(lpString
, lpStrength
, uCount
, c
);
1117 for ( i
= 0; i
< uCount
; i
++)
1119 switch(lpStrength
[i
])
1128 lpStrength
[i
] = BIDI_STRONG
;
1137 lpStrength
[i
] = BIDI_WEAK
;
1143 default: /* Neutrals and NSM */
1144 lpStrength
[i
] = BIDI_NEUTRAL
;