2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table
[];
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON
= 0, /* Other Neutral */
86 AN
, /* Arabic Number */
87 EN
, /* European Number */
88 AL
, /* Arabic Letter (Right-to-left) */
89 NSM
, /* Non-spacing Mark */
90 CS
, /* Common Separator */
91 ES
, /* European Separator */
92 ET
, /* European Terminator (post/prefix e.g. $ and %) */
95 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
98 S
, /* Segment Separator (TAB) // used only in L1 */
99 WS
, /* White space // used only in L1 */
100 B
, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO
, /* these are used only in X1-X9 */
109 LRI
, /* Isolate formatting characters new with 6.3 */
114 /* resolved types, also resolved directions */
115 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type
[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
139 "LRI", /* Isolate formatting characters new with 6.3 */
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
151 for (i
= start
; i
< end
&& len
< 200; i
++)
153 TRACE(" %s",debug_type
[types
[i
]]);
154 len
+= strlen(debug_type
[types
[i
]])+1;
161 /* Convert the libwine information to the direction enum */
162 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
164 static const enum directions dir_map
[16] =
166 L
, /* unassigned defaults to L */
181 PDF
/* also LRE, LRO, RLE, RLO */
186 for (i
= 0; i
< uCount
; ++i
)
188 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
192 if (!c
->fLegacyBidiClass
) break;
196 case '+': chartype
[i
] = NI
; break;
197 case '/': chartype
[i
] = CS
; break;
203 case 0x202A: chartype
[i
] = LRE
; break;
204 case 0x202B: chartype
[i
] = RLE
; break;
205 case 0x202C: chartype
[i
] = PDF
; break;
206 case 0x202D: chartype
[i
] = LRO
; break;
207 case 0x202E: chartype
[i
] = RLO
; break;
208 case 0x2066: chartype
[i
] = LRI
; break;
209 case 0x2067: chartype
[i
] = RLI
; break;
210 case 0x2068: chartype
[i
] = FSI
; break;
211 case 0x2069: chartype
[i
] = PDI
; break;
218 /* RESOLVE EXPLICIT */
220 static WORD
GreaterEven(int i
)
222 return odd(i
) ? i
+ 1 : i
+ 2;
225 static WORD
GreaterOdd(int i
)
227 return odd(i
) ? i
+ 2 : i
+ 1;
230 static WORD
EmbeddingDirection(int level
)
232 return odd(level
) ? R
: L
;
235 /*------------------------------------------------------------------------
236 Function: resolveExplicit
238 Recursively resolves explicit embedding levels and overrides.
239 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 Input: Base embedding level and direction
244 Output: Array of embedding levels
246 In/Out: Array of direction classes
249 Note: The function uses two simple counters to keep track of
250 matching explicit codes and PDF. Use the default argument for
251 the outermost call. The nesting counter counts the recursion
252 depth and not the embedding level.
253 ------------------------------------------------------------------------*/
254 typedef struct tagStackItem
{
260 #define push_stack(l,o,i) \
262 stack[stack_top].level = l; \
263 stack[stack_top].override = o; \
264 stack[stack_top].isolate = i;} while(0)
266 #define pop_stack() do { stack_top++; } while(0)
268 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
273 int overflow_isolate_count
= 0;
274 int overflow_embedding_count
= 0;
275 int valid_isolate_count
= 0;
278 StackItem stack
[MAX_DEPTH
+2];
279 int stack_top
= MAX_DEPTH
+1;
281 stack
[stack_top
].level
= level
;
282 stack
[stack_top
].override
= NI
;
283 stack
[stack_top
].isolate
= FALSE
;
285 for (i
= 0; i
< count
; i
++)
288 if (pclass
[i
] == RLE
)
290 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
291 poutLevel
[i
] = stack
[stack_top
].level
;
292 if (valid_level(least_odd
))
293 push_stack(least_odd
, NI
, FALSE
);
294 else if (overflow_isolate_count
== 0)
295 overflow_embedding_count
++;
298 else if (pclass
[i
] == LRE
)
300 int least_even
= GreaterEven(stack
[stack_top
].level
);
301 poutLevel
[i
] = stack
[stack_top
].level
;
302 if (valid_level(least_even
))
303 push_stack(least_even
, NI
, FALSE
);
304 else if (overflow_isolate_count
== 0)
305 overflow_embedding_count
++;
308 else if (pclass
[i
] == RLO
)
310 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
311 poutLevel
[i
] = stack
[stack_top
].level
;
312 if (valid_level(least_odd
))
313 push_stack(least_odd
, R
, FALSE
);
314 else if (overflow_isolate_count
== 0)
315 overflow_embedding_count
++;
318 else if (pclass
[i
] == LRO
)
320 int least_even
= GreaterEven(stack
[stack_top
].level
);
321 poutLevel
[i
] = stack
[stack_top
].level
;
322 if (valid_level(least_even
))
323 push_stack(least_even
, L
, FALSE
);
324 else if (overflow_isolate_count
== 0)
325 overflow_embedding_count
++;
328 else if (pclass
[i
] == RLI
)
330 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
331 poutLevel
[i
] = stack
[stack_top
].level
;
332 if (valid_level(least_odd
))
334 valid_isolate_count
++;
335 push_stack(least_odd
, NI
, TRUE
);
338 overflow_isolate_count
++;
341 else if (pclass
[i
] == LRI
)
343 int least_even
= GreaterEven(stack
[stack_top
].level
);
344 poutLevel
[i
] = stack
[stack_top
].level
;
345 if (valid_level(least_even
))
347 valid_isolate_count
++;
348 push_stack(least_even
, NI
, TRUE
);
351 overflow_isolate_count
++;
354 else if (pclass
[i
] == FSI
)
359 poutLevel
[i
] = stack
[stack_top
].level
;
360 for (j
= i
+1; j
< count
; j
++)
362 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
367 else if (pclass
[j
] == PDI
)
376 if (skipping
) continue;
383 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
391 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
392 if (valid_level(least_odd
))
394 valid_isolate_count
++;
395 push_stack(least_odd
, NI
, TRUE
);
398 overflow_isolate_count
++;
402 int least_even
= GreaterEven(stack
[stack_top
].level
);
403 if (valid_level(least_even
))
405 valid_isolate_count
++;
406 push_stack(least_even
, NI
, TRUE
);
409 overflow_isolate_count
++;
413 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
415 poutLevel
[i
] = stack
[stack_top
].level
;
416 if (stack
[stack_top
].override
!= NI
)
417 pclass
[i
] = stack
[stack_top
].override
;
420 else if (pclass
[i
] == PDI
)
422 if (overflow_isolate_count
) overflow_isolate_count
--;
423 else if (!valid_isolate_count
) {/* do nothing */}
426 overflow_embedding_count
= 0;
427 while (!stack
[stack_top
].isolate
) pop_stack();
429 valid_isolate_count
--;
431 poutLevel
[i
] = stack
[stack_top
].level
;
434 else if (pclass
[i
] == PDF
)
436 poutLevel
[i
] = stack
[stack_top
].level
;
437 if (overflow_isolate_count
) {/* do nothing */}
438 else if (overflow_embedding_count
) overflow_embedding_count
--;
439 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
444 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
445 for (i
= 0; i
< count
; i
++)
446 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
450 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
452 if (index
== -1 || index
== back_fence
) return index
;
454 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
458 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
460 if (index
== front_fence
) return index
;
462 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
466 typedef struct tagRun
473 typedef struct tagRunChar
479 typedef struct tagIsolatedRun
490 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
492 if (index
>= (iso_run
->length
-1)) return -1;
494 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
495 if (index
== iso_run
->length
) return -1;
499 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
502 if (index
<= 0) return -1;
504 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
508 static inline int iso_previousChar(IsolatedRun
*iso_run
, int index
)
510 if (index
<= 0) return -1;
514 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
519 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
521 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
522 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
524 if (i
!= iso_run
->length
)
529 /*------------------------------------------------------------------------
530 Function: resolveWeak
532 Resolves the directionality of numeric and other weak character types
534 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
536 Input: Array of embedding levels
539 In/Out: Array of directional classes
541 Note: On input only these directional classes are expected
542 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
543 ------------------------------------------------------------------------*/
545 static void resolveWeak(IsolatedRun
* iso_run
)
550 for (i
=0; i
< iso_run
->length
; i
++)
552 if (*iso_run
->item
[i
].pcls
== NSM
)
554 int j
= iso_previousValidChar(iso_run
, i
);
556 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
557 else if (*iso_run
->item
[j
].pcls
>= LRI
)
558 *iso_run
->item
[i
].pcls
= ON
;
560 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
565 for (i
= 0; i
< iso_run
->length
; i
++)
567 if (*iso_run
->item
[i
].pcls
== EN
)
569 int j
= iso_previousValidChar(iso_run
, i
);
572 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
574 if (*iso_run
->item
[j
].pcls
== AL
)
575 *iso_run
->item
[i
].pcls
= AN
;
578 j
= iso_previousValidChar(iso_run
, j
);
584 for (i
= 0; i
< iso_run
->length
; i
++)
586 if (*iso_run
->item
[i
].pcls
== AL
)
587 *iso_run
->item
[i
].pcls
= R
;
591 for (i
= 0; i
< iso_run
->length
; i
++)
593 if (*iso_run
->item
[i
].pcls
== ES
)
595 int b
= iso_previousValidChar(iso_run
, i
);
596 int f
= iso_nextValidChar(iso_run
, i
);
598 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
599 *iso_run
->item
[i
].pcls
= EN
;
601 else if (*iso_run
->item
[i
].pcls
== CS
)
603 int b
= iso_previousValidChar(iso_run
, i
);
604 int f
= iso_nextValidChar(iso_run
, i
);
606 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
607 *iso_run
->item
[i
].pcls
= EN
;
608 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
609 *iso_run
->item
[i
].pcls
= AN
;
614 for (i
= 0; i
< iso_run
->length
; i
++)
616 if (*iso_run
->item
[i
].pcls
== ET
)
619 for (j
= i
-1 ; j
> -1; j
--)
621 if (*iso_run
->item
[j
].pcls
== BN
) continue;
622 if (*iso_run
->item
[j
].pcls
== ET
) continue;
623 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
626 if (*iso_run
->item
[i
].pcls
== ET
)
628 for (j
= i
+1; j
< iso_run
->length
; j
++)
630 if (*iso_run
->item
[j
].pcls
== BN
) continue;
631 if (*iso_run
->item
[j
].pcls
== ET
) continue;
632 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
640 for (i
= 0; i
< iso_run
->length
; i
++)
642 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
646 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
647 *iso_run
->item
[b
].pcls
= ON
;
648 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
649 *iso_run
->item
[f
].pcls
= ON
;
651 *iso_run
->item
[i
].pcls
= ON
;
656 for (i
= 0; i
< iso_run
->length
; i
++)
658 if (*iso_run
->item
[i
].pcls
== EN
)
661 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
662 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
664 if (*iso_run
->item
[j
].pcls
== L
)
665 *iso_run
->item
[i
].pcls
= L
;
668 if (iso_run
->sos
== L
&& j
== -1)
669 *iso_run
->item
[i
].pcls
= L
;
674 typedef struct tagBracketPair
680 static int compr(const void *a
, const void* b
)
682 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
685 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
689 int stack_top
= iso_run
->length
;
690 BracketPair
*out
= NULL
;
694 open_stack
= HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR
) * iso_run
->length
);
695 stack_index
= HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run
->length
);
697 for (i
= 0; i
< iso_run
->length
; i
++)
699 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
704 out
= HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair
));
711 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
712 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
713 if (open_stack
[stack_top
] == 0x232A)
714 open_stack
[stack_top
] = 0x3009;
715 stack_index
[stack_top
] = i
;
717 else if ((ubv
>> 8) == 1)
720 if (stack_top
== iso_run
->length
) continue;
721 for (j
= stack_top
; j
< iso_run
->length
; j
++)
723 WCHAR c
= iso_run
->item
[i
].ch
;
724 if (c
== 0x232A) c
= 0x3009;
725 if (c
== open_stack
[j
])
727 out
[pair_count
].start
= stack_index
[j
];
728 out
[pair_count
].end
= i
;
730 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
731 out
[pair_count
].start
= -1;
741 HeapFree(GetProcessHeap(),0,out
);
744 else if (pair_count
> 1)
745 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
747 HeapFree(GetProcessHeap(), 0, open_stack
);
748 HeapFree(GetProcessHeap(), 0, stack_index
);
752 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
754 /*------------------------------------------------------------------------
755 Function: resolveNeutrals
757 Resolves the directionality of neutral character types.
759 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
761 Input: Array of embedding levels
765 In/Out: Array of directional classes
767 Note: On input only these directional classes are expected
768 R, L, NI, AN, EN and BN
770 W8 resolves a number of ENs to L
771 ------------------------------------------------------------------------*/
772 static void resolveNeutrals(IsolatedRun
*iso_run
)
775 BracketPair
*pairs
= NULL
;
777 /* Translate isolates into NI */
778 for (i
= 0; i
< iso_run
->length
; i
++)
780 if (*iso_run
->item
[i
].pcls
>= LRI
)
781 *iso_run
->item
[i
].pcls
= NI
;
783 switch(*iso_run
->item
[i
].pcls
)
787 case WS
: *iso_run
->item
[i
].pcls
= NI
;
790 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
793 /* N0: Skipping bracketed pairs for now */
794 pairs
= computeBracketPairs(iso_run
);
797 BracketPair
*p
= &pairs
[0];
799 while (p
->start
>= 0)
802 int e
= EmbeddingDirection(iso_run
->e
);
803 int o
= EmbeddingDirection(iso_run
->e
+1);
805 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
808 for (j
= p
->start
+1; j
< p
->end
; j
++)
810 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
812 *iso_run
->item
[p
->start
].pcls
= e
;
813 *iso_run
->item
[p
->end
].pcls
= e
;
816 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
820 if (j
== p
->end
&& flag_o
)
822 for (j
= p
->start
; j
>= 0; j
--)
824 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
826 *iso_run
->item
[p
->start
].pcls
= o
;
827 *iso_run
->item
[p
->end
].pcls
= o
;
830 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
832 *iso_run
->item
[p
->start
].pcls
= e
;
833 *iso_run
->item
[p
->end
].pcls
= e
;
839 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
840 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
847 HeapFree(GetProcessHeap(),0,pairs
);
851 for (i
= 0; i
< iso_run
->length
; i
++)
855 if (*iso_run
->item
[i
].pcls
== NI
)
858 int b
= iso_previousValidChar(iso_run
, i
);
867 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
869 else if (*iso_run
->item
[b
].pcls
== L
)
871 else /* No string type */
874 j
= iso_nextValidChar(iso_run
, i
);
875 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
882 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
884 else if (*iso_run
->item
[j
].pcls
== L
)
886 else /* No string type */
891 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
892 *iso_run
->item
[b
].pcls
= r
;
898 for (i
= 0; i
< iso_run
->length
; i
++)
900 if (*iso_run
->item
[i
].pcls
== NI
)
904 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
905 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
906 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
907 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
908 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
913 /*------------------------------------------------------------------------
914 Function: resolveImplicit
916 Recursively resolves implicit embedding levels.
917 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
919 Input: Array of direction classes
923 In/Out: Array of embedding levels
925 Note: levels may exceed 15 on output.
926 Accepted subset of direction classes
928 ------------------------------------------------------------------------*/
929 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
934 for (i
= sos
; i
<= eos
; i
++)
939 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
940 ASSERT(pcls
[i
] < 5); /* "Out of range." */
942 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
944 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
946 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
951 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
956 for (i
= sos
; i
<= eos
; i
++)
958 if (pcls
[i
] == B
|| pcls
[i
] == S
)
961 while (i
> sos
&& j
>= sos
&&
962 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
963 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
964 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
965 plevel
[j
--] = baselevel
;
966 plevel
[i
] = baselevel
;
969 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
970 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
971 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
974 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
975 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
976 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
977 plevel
[j
--] = baselevel
;
982 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, LPCWSTR lpString
, int uCount
, struct list
*set
)
984 int run_start
, run_end
, i
;
987 IsolatedRun
*current_isolated
;
989 runs
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(Run
));
996 while (run_start
< uCount
)
998 run_end
= nextValidChar(pcls
, run_start
, uCount
);
999 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
1001 runs
[run_count
].start
= run_start
;
1002 runs
[run_count
].end
= run_end
;
1003 runs
[run_count
].e
= pLevel
[run_start
];
1004 run_start
= nextValidChar(pcls
, run_end
, uCount
);
1008 /* Build Isolating Runs */
1010 while (i
< run_count
)
1013 if (runs
[k
].start
>= 0)
1015 int type_fence
, real_end
;
1017 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(RunChar
)*uCount
);
1018 if (!current_isolated
) break;
1020 run_start
= runs
[k
].start
;
1021 current_isolated
->e
= runs
[k
].e
;
1022 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1024 for (j
= 0; j
< current_isolated
->length
; j
++)
1026 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1027 current_isolated
->item
[j
].ch
= lpString
[runs
[k
].start
+j
];
1030 run_end
= runs
[k
].end
;
1032 TRACE("{ [%i -- %i]",run_start
, run_end
);
1034 if (pcls
[run_end
] == BN
)
1035 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1037 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1041 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1042 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1051 int l
= current_isolated
->length
;
1053 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1054 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1056 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1057 current_isolated
->item
[l
].ch
= lpString
[runs
[j
].start
+m
];
1060 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1062 run_end
= runs
[j
].end
;
1063 if (pcls
[run_end
] == BN
)
1064 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1075 type_fence
= previousValidChar(pcls
, run_start
, -1);
1077 if (type_fence
== -1)
1078 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1080 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1082 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1084 if (run_end
== uCount
)
1085 current_isolated
->eos
= current_isolated
->sos
;
1088 /* eos could be an BN */
1089 if ( pcls
[run_end
] == BN
)
1091 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1092 if (real_end
< run_start
)
1093 real_end
= run_start
;
1098 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1099 if (type_fence
== uCount
)
1100 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1102 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1104 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1107 list_add_tail(set
, ¤t_isolated
->entry
);
1108 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1113 HeapFree(GetProcessHeap(), 0, runs
);
1116 /*************************************************************
1117 * BIDI_DeterminLevels
1119 BOOL
BIDI_DetermineLevels(
1120 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
1121 INT uCount
, /* [in] Number of WCHARs in string. */
1122 const SCRIPT_STATE
*s
,
1123 const SCRIPT_CONTROL
*c
,
1124 WORD
*lpOutLevels
/* [out] final string levels */
1128 unsigned baselevel
= 0;
1129 struct list IsolatingRuns
;
1130 IsolatedRun
*iso_run
, *next
;
1132 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1134 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
1137 WARN("Out of memory\n");
1141 baselevel
= s
->uBidiLevel
;
1143 classify(lpString
, chartype
, uCount
, c
);
1144 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1146 /* resolve explicit */
1147 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
1148 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1150 /* X10/BD13: Computer Isolating runs */
1151 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1153 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1155 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1158 resolveWeak(iso_run
);
1159 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1161 /* resolve neutrals */
1162 resolveNeutrals(iso_run
);
1163 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1165 list_remove(&iso_run
->entry
);
1166 HeapFree(GetProcessHeap(),0,iso_run
);
1169 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1170 /* resolveImplicit */
1171 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1173 /* resolveResolvedLevels*/
1174 classify(lpString
, chartype
, uCount
, c
);
1175 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1177 HeapFree(GetProcessHeap(), 0, chartype
);
1181 /* reverse cch indexes */
1182 static void reverse(int *pidx
, int cch
)
1186 for (; ich
< --cch
; ich
++)
1189 pidx
[ich
] = pidx
[cch
];
1195 /*------------------------------------------------------------------------
1196 Functions: reorder/reorderLevel
1198 Recursively reorders the display string
1199 "From the highest level down, reverse all characters at that level and
1200 higher, down to the lowest odd level"
1202 Implements rule L2 of the Unicode bidi Algorithm.
1204 Input: Array of embedding levels
1206 Flag enabling reversal (set to false by initial caller)
1208 In/Out: Text to reorder
1210 Note: levels may exceed 15 resp. 61 on input.
1212 Rule L3 - reorder combining marks is not implemented here
1213 Rule L4 - glyph mirroring is implemented as a display option below
1215 Note: this should be applied a line at a time
1216 -------------------------------------------------------------------------*/
1217 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1221 /* true as soon as first odd level encountered */
1222 fReverse
= fReverse
|| odd(level
);
1224 for (; ich
< cch
; ich
++)
1226 if (plevel
[ich
] < level
)
1230 else if (plevel
[ich
] > level
)
1232 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1233 cch
- ich
, fReverse
) - 1;
1238 reverse(pIndexs
, ich
);
1243 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1244 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1249 /* true as soon as first odd level encountered */
1250 fReverse
= fReverse
|| odd(level
);
1252 for (; ich
< cch
; ich
++)
1254 if (plevel
[ich
] < level
)
1256 else if (plevel
[ich
] > level
)
1261 reverse(pIndexs
, ich
);
1267 for (; ich
< cch
; ich
++)
1268 if (plevel
[ich
] < level
)
1270 else if (plevel
[ich
] > level
)
1271 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1272 cch
- ich
, fReverse
) - 1;
1278 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1282 classify(lpString
, lpStrength
, uCount
, c
);
1284 for ( i
= 0; i
< uCount
; i
++)
1286 switch(lpStrength
[i
])
1295 lpStrength
[i
] = BIDI_STRONG
;
1304 lpStrength
[i
] = BIDI_WEAK
;
1310 default: /* Neutrals and NSM */
1311 lpStrength
[i
] = BIDI_NEUTRAL
;