2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
48 #include "wine/debug.h"
49 #include "wine/list.h"
52 #include "dwrite_private.h"
54 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
56 extern const unsigned short bidi_bracket_table
[];
58 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
61 #define odd(x) ((x) & 1)
63 /*------------------------------------------------------------------------
64 Bidirectional Character Types
66 as defined by the Unicode Bidirectional Algorithm Table 3-7.
70 The list of bidirectional character types here is not grouped the
71 same way as the table 3-7, since the numberic values for the types
72 are chosen to keep the state and action tables compact.
73 ------------------------------------------------------------------------*/
77 /* ON MUST be zero, code relies on ON = NI = 0 */
78 ON
= 0, /* Other Neutral */
81 AN
, /* Arabic Number */
82 EN
, /* European Number */
83 AL
, /* Arabic Letter (Right-to-left) */
84 NSM
, /* Non-spacing Mark */
85 CS
, /* Common Separator */
86 ES
, /* European Separator */
87 ET
, /* European Terminator (post/prefix e.g. $ and %) */
90 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
93 S
, /* Segment Separator (TAB) // used only in L1 */
94 WS
, /* White space // used only in L1 */
95 B
, /* Paragraph Separator (aka as PS) */
97 /* types for explicit controls */
98 RLO
, /* these are used only in X1-X9 */
104 LRI
, /* Isolate formatting characters new with 6.3 */
109 /* resolved types, also resolved directions */
110 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
113 static const char debug_type
[][4] =
115 "ON", /* Other Neutral */
116 "L", /* Left Letter */
117 "R", /* Right Letter */
118 "AN", /* Arabic Number */
119 "EN", /* European Number */
120 "AL", /* Arabic Letter (Right-to-left) */
121 "NSM", /* Non-spacing Mark */
122 "CS", /* Common Separator */
123 "ES", /* European Separator */
124 "ET", /* European Terminator (post/prefix e.g. $ and %) */
125 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
126 "S", /* Segment Separator (TAB) used only in L1 */
127 "WS", /* White space used only in L1 */
128 "B", /* Paragraph Separator (aka as PS) */
129 "RLO", /* these are used only in X1-X9 */
134 "LRI", /* Isolate formatting characters new with 6.3 */
140 static inline void bidi_dump_types(const char* header
, const UINT8
*types
, UINT32 start
, UINT32 end
)
143 TRACE("%s:", header
);
144 for (i
= start
; i
< end
&& len
< 200; i
++) {
145 TRACE(" %s", debug_type
[types
[i
]]);
146 len
+= strlen(debug_type
[types
[i
]])+1;
153 /* Convert the libwine information to the direction enum */
154 static void bidi_classify(const WCHAR
*string
, UINT8
*chartype
, UINT32 count
)
156 static const enum directions dir_map
[16] =
158 L
, /* unassigned defaults to L */
173 PDF
/* also LRE, LRO, RLE, RLO */
178 for (i
= 0; i
< count
; ++i
) {
179 chartype
[i
] = dir_map
[get_char_typeW(string
[i
]) >> 12];
181 switch (chartype
[i
]) {
186 case 0x202a: chartype
[i
] = LRE
; break;
187 case 0x202b: chartype
[i
] = RLE
; break;
188 case 0x202c: chartype
[i
] = PDF
; break;
189 case 0x202d: chartype
[i
] = LRO
; break;
190 case 0x202e: chartype
[i
] = RLO
; break;
191 case 0x2066: chartype
[i
] = LRI
; break;
192 case 0x2067: chartype
[i
] = RLI
; break;
193 case 0x2068: chartype
[i
] = FSI
; break;
194 case 0x2069: chartype
[i
] = PDI
; break;
201 WCHAR
bidi_get_mirrored_char(WCHAR ch
)
203 extern const WCHAR wine_mirror_map
[];
204 return ch
+ wine_mirror_map
[wine_mirror_map
[ch
>> 8] + (ch
& 0xff)];
207 /* RESOLVE EXPLICIT */
209 static inline UINT8
get_greater_even_level(UINT8 level
)
211 return odd(level
) ? level
+ 1 : level
+ 2;
214 static inline UINT8
get_greater_odd_level(UINT8 level
)
216 return odd(level
) ? level
+ 2 : level
+ 1;
219 static inline UINT8
get_embedding_direction(UINT8 level
)
221 return odd(level
) ? R
: L
;
224 /*------------------------------------------------------------------------
225 Function: bidi_resolve_explicit
227 Recursively resolves explicit embedding levels and overrides.
228 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
230 Input: Base embedding level and direction
233 Output: Array of embedding levels
235 In/Out: Array of direction classes
238 Note: The function uses two simple counters to keep track of
239 matching explicit codes and PDF. Use the default argument for
240 the outermost call. The nesting counter counts the recursion
241 depth and not the embedding level.
242 ------------------------------------------------------------------------*/
243 typedef struct tagStackItem
250 #define push_stack(l,o,i) \
252 stack[stack_top].level = l; \
253 stack[stack_top].override = o; \
254 stack[stack_top].isolate = i;} while(0)
256 #define pop_stack() do { stack_top++; } while(0)
258 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
260 static void bidi_resolve_explicit(UINT8 baselevel
, UINT8
*classes
, UINT8
*levels
, UINT32 count
)
263 int overflow_isolate_count
= 0;
264 int overflow_embedding_count
= 0;
265 int valid_isolate_count
= 0;
268 StackItem stack
[MAX_DEPTH
+2];
269 int stack_top
= MAX_DEPTH
+1;
271 stack
[stack_top
].level
= baselevel
;
272 stack
[stack_top
].override
= NI
;
273 stack
[stack_top
].isolate
= FALSE
;
275 for (i
= 0; i
< count
; i
++) {
276 UINT8 least_odd
, least_even
;
278 switch (classes
[i
]) {
282 least_odd
= get_greater_odd_level(stack
[stack_top
].level
);
283 levels
[i
] = stack
[stack_top
].level
;
284 if (valid_level(least_odd
))
285 push_stack(least_odd
, NI
, FALSE
);
286 else if (overflow_isolate_count
== 0)
287 overflow_embedding_count
++;
292 least_even
= get_greater_even_level(stack
[stack_top
].level
);
293 levels
[i
] = stack
[stack_top
].level
;
294 if (valid_level(least_even
))
295 push_stack(least_even
, NI
, FALSE
);
296 else if (overflow_isolate_count
== 0)
297 overflow_embedding_count
++;
302 least_odd
= get_greater_odd_level(stack
[stack_top
].level
);
303 levels
[i
] = stack
[stack_top
].level
;
304 if (valid_level(least_odd
))
305 push_stack(least_odd
, R
, FALSE
);
306 else if (overflow_isolate_count
== 0)
307 overflow_embedding_count
++;
312 least_even
= get_greater_even_level(stack
[stack_top
].level
);
313 levels
[i
] = stack
[stack_top
].level
;
314 if (valid_level(least_even
))
315 push_stack(least_even
, L
, FALSE
);
316 else if (overflow_isolate_count
== 0)
317 overflow_embedding_count
++;
322 least_odd
= get_greater_odd_level(stack
[stack_top
].level
);
323 levels
[i
] = stack
[stack_top
].level
;
324 if (valid_level(least_odd
))
326 valid_isolate_count
++;
327 push_stack(least_odd
, NI
, TRUE
);
330 overflow_isolate_count
++;
335 least_even
= get_greater_even_level(stack
[stack_top
].level
);
336 levels
[i
] = stack
[stack_top
].level
;
337 if (valid_level(least_even
))
339 valid_isolate_count
++;
340 push_stack(least_even
, NI
, TRUE
);
343 overflow_isolate_count
++;
353 levels
[i
] = stack
[stack_top
].level
;
354 for (j
= i
+1; j
< count
; j
++)
356 if (classes
[j
] == LRI
|| classes
[j
] == RLI
|| classes
[j
] == FSI
)
361 else if (classes
[j
] == PDI
)
370 if (skipping
) continue;
377 else if (classes
[j
] == R
|| classes
[j
] == AL
)
385 least_odd
= get_greater_odd_level(stack
[stack_top
].level
);
386 if (valid_level(least_odd
))
388 valid_isolate_count
++;
389 push_stack(least_odd
, NI
, TRUE
);
392 overflow_isolate_count
++;
396 least_even
= get_greater_even_level(stack
[stack_top
].level
);
397 if (valid_level(least_even
))
399 valid_isolate_count
++;
400 push_stack(least_even
, NI
, TRUE
);
403 overflow_isolate_count
++;
421 levels
[i
] = stack
[stack_top
].level
;
422 if (stack
[stack_top
].override
!= NI
)
423 classes
[i
] = stack
[stack_top
].override
;
428 if (overflow_isolate_count
) overflow_isolate_count
--;
429 else if (!valid_isolate_count
) {/* do nothing */}
432 overflow_embedding_count
= 0;
433 while (!stack
[stack_top
].isolate
) pop_stack();
435 valid_isolate_count
--;
437 levels
[i
] = stack
[stack_top
].level
;
442 levels
[i
] = stack
[stack_top
].level
;
443 if (overflow_isolate_count
) {/* do nothing */}
444 else if (overflow_embedding_count
) overflow_embedding_count
--;
445 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
454 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
455 for (i
= 0; i
< count
; i
++)
456 if (classes
[i
] == RLE
|| classes
[i
] == LRE
|| classes
[i
] == RLO
|| classes
[i
] == LRO
|| classes
[i
] == PDF
)
460 static inline int get_prev_valid_char_index(const UINT8
*classes
, int index
, int back_fence
)
462 if (index
== -1 || index
== back_fence
) return index
;
464 while (index
> back_fence
&& classes
[index
] == BN
) index
--;
468 static inline int get_next_valid_char_index(const UINT8
*classes
, int index
, int front_fence
)
470 if (index
== front_fence
) return index
;
472 while (index
< front_fence
&& classes
[index
] == BN
) index
++;
476 typedef struct tagRun
483 typedef struct tagRunChar
489 typedef struct tagIsolatedRun
500 static inline int get_next_valid_char_from_run(IsolatedRun
*run
, int index
)
502 if (index
>= (run
->length
-1)) return -1;
504 while (index
< run
->length
&& *run
->item
[index
].class == BN
) index
++;
505 if (index
== run
->length
) return -1;
509 static inline int get_prev_valid_char_from_run(IsolatedRun
*run
, int index
)
511 if (index
<= 0) return -1;
513 while (index
> -1 && *run
->item
[index
].class == BN
) index
--;
517 static inline int iso_previousChar(IsolatedRun
*run
, int index
)
519 if (index
<= 0) return -1;
523 static inline void iso_dump_types(const char* header
, IsolatedRun
*run
)
528 for (i
= 0; i
< run
->length
&& len
< 200; i
++) {
529 TRACE(" %s", debug_type
[*run
->item
[i
].class]);
530 len
+= strlen(debug_type
[*run
->item
[i
].class])+1;
532 if (i
!= run
->length
)
537 /*------------------------------------------------------------------------
538 Function: bidi_resolve_weak
540 Resolves the directionality of numeric and other weak character types
542 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
544 Input: Array of embedding levels
547 In/Out: Array of directional classes
549 Note: On input only these directional classes are expected
550 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
551 ------------------------------------------------------------------------*/
552 static void bidi_resolve_weak(IsolatedRun
*iso_run
)
557 for (i
=0; i
< iso_run
->length
; i
++) {
558 if (*iso_run
->item
[i
].class == NSM
) {
559 int j
= get_prev_valid_char_from_run(iso_run
, i
);
561 *iso_run
->item
[i
].class = iso_run
->sos
;
562 else if (*iso_run
->item
[j
].class >= LRI
)
563 *iso_run
->item
[i
].class = ON
;
565 *iso_run
->item
[i
].class = *iso_run
->item
[j
].class;
570 for (i
= 0; i
< iso_run
->length
; i
++) {
571 if (*iso_run
->item
[i
].class == EN
) {
572 int j
= get_prev_valid_char_from_run(iso_run
, i
);
574 if (*iso_run
->item
[j
].class == R
|| *iso_run
->item
[j
].class == L
|| *iso_run
->item
[j
].class == AL
) {
575 if (*iso_run
->item
[j
].class == AL
)
576 *iso_run
->item
[i
].class = AN
;
579 j
= get_prev_valid_char_from_run(iso_run
, j
);
585 for (i
= 0; i
< iso_run
->length
; i
++) {
586 if (*iso_run
->item
[i
].class == AL
)
587 *iso_run
->item
[i
].class = R
;
591 for (i
= 0; i
< iso_run
->length
; i
++) {
592 if (*iso_run
->item
[i
].class == ES
) {
593 int b
= get_prev_valid_char_from_run(iso_run
, i
);
594 int f
= get_next_valid_char_from_run(iso_run
, i
);
596 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].class == EN
&& *iso_run
->item
[f
].class == EN
)
597 *iso_run
->item
[i
].class = EN
;
599 else if (*iso_run
->item
[i
].class == CS
) {
600 int b
= get_prev_valid_char_from_run(iso_run
, i
);
601 int f
= get_next_valid_char_from_run(iso_run
, i
);
603 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].class == EN
&& *iso_run
->item
[f
].class == EN
)
604 *iso_run
->item
[i
].class = EN
;
605 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].class == AN
&& *iso_run
->item
[f
].class == AN
)
606 *iso_run
->item
[i
].class = AN
;
611 for (i
= 0; i
< iso_run
->length
; i
++) {
612 if (*iso_run
->item
[i
].class == ET
) {
614 for (j
= i
-1 ; j
> -1; j
--) {
615 if (*iso_run
->item
[j
].class == BN
) continue;
616 if (*iso_run
->item
[j
].class == ET
) continue;
617 else if (*iso_run
->item
[j
].class == EN
) *iso_run
->item
[i
].class = EN
;
620 if (*iso_run
->item
[i
].class == ET
) {
621 for (j
= i
+1; j
< iso_run
->length
; j
++) {
622 if (*iso_run
->item
[j
].class == BN
) continue;
623 if (*iso_run
->item
[j
].class == ET
) continue;
624 else if (*iso_run
->item
[j
].class == EN
) *iso_run
->item
[i
].class = EN
;
632 for (i
= 0; i
< iso_run
->length
; i
++) {
633 if (*iso_run
->item
[i
].class == ET
|| *iso_run
->item
[i
].class == ES
|| *iso_run
->item
[i
].class == CS
|| *iso_run
->item
[i
].class == ON
)
637 if (b
> -1 && *iso_run
->item
[b
].class == BN
)
638 *iso_run
->item
[b
].class = ON
;
639 if (f
< iso_run
->length
&& *iso_run
->item
[f
].class == BN
)
640 *iso_run
->item
[f
].class = ON
;
642 *iso_run
->item
[i
].class = ON
;
647 for (i
= 0; i
< iso_run
->length
; i
++) {
648 if (*iso_run
->item
[i
].class == EN
) {
650 for (j
= get_prev_valid_char_from_run(iso_run
, i
); j
> -1; j
= get_prev_valid_char_from_run(iso_run
, j
))
651 if (*iso_run
->item
[j
].class == R
|| *iso_run
->item
[j
].class == L
) {
652 if (*iso_run
->item
[j
].class == L
)
653 *iso_run
->item
[i
].class = L
;
656 if (iso_run
->sos
== L
&& j
== -1)
657 *iso_run
->item
[i
].class = L
;
662 typedef struct tagBracketPair
668 static int bracketpair_compr(const void *a
, const void* b
)
670 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
673 static BracketPair
*bidi_compute_bracket_pairs(IsolatedRun
*iso_run
)
677 int stack_top
= iso_run
->length
;
678 BracketPair
*out
= NULL
;
682 open_stack
= heap_alloc(sizeof(WCHAR
) * iso_run
->length
);
683 stack_index
= heap_alloc(sizeof(int) * iso_run
->length
);
685 for (i
= 0; i
< iso_run
->length
; i
++) {
686 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
689 out
= heap_alloc(sizeof(BracketPair
));
693 if ((ubv
>> 8) == 0) {
695 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
696 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
697 if (open_stack
[stack_top
] == 0x232A)
698 open_stack
[stack_top
] = 0x3009;
699 stack_index
[stack_top
] = i
;
701 else if ((ubv
>> 8) == 1) {
704 if (stack_top
== iso_run
->length
) continue;
705 for (j
= stack_top
; j
< iso_run
->length
; j
++) {
706 WCHAR c
= iso_run
->item
[i
].ch
;
707 if (c
== 0x232A) c
= 0x3009;
708 if (c
== open_stack
[j
]) {
709 out
[pair_count
].start
= stack_index
[j
];
710 out
[pair_count
].end
= i
;
712 out
= heap_realloc(out
, sizeof(BracketPair
) * (pair_count
+1));
713 out
[pair_count
].start
= -1;
721 if (pair_count
== 0) {
725 else if (pair_count
> 1)
726 qsort(out
, pair_count
, sizeof(BracketPair
), bracketpair_compr
);
728 heap_free(open_stack
);
729 heap_free(stack_index
);
733 static inline UINT8
get_rule_N0_class(UINT8
class)
735 return (class == AN
|| class == EN
) ? R
: class;
738 /*------------------------------------------------------------------------
739 Function: bidi_resolve_neutrals
741 Resolves the directionality of neutral character types.
743 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
745 Input: Array of embedding levels
749 In/Out: Array of directional classes
751 Note: On input only these directional classes are expected
752 R, L, NI, AN, EN and BN
754 W8 resolves a number of ENs to L
755 ------------------------------------------------------------------------*/
756 static void bidi_resolve_neutrals(IsolatedRun
*run
)
761 /* Translate isolates into NI */
762 for (i
= 0; i
< run
->length
; i
++) {
763 if (*run
->item
[i
].class >= LRI
)
764 *run
->item
[i
].class = NI
;
766 switch (*run
->item
[i
].class) {
769 case WS
: *run
->item
[i
].class = NI
;
772 ASSERT(*run
->item
[i
].class < 5 || *run
->item
[i
].class == BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
775 /* N0: Skipping bracketed pairs for now */
776 pairs
= bidi_compute_bracket_pairs(run
);
778 BracketPair
*p
= pairs
;
780 while (p
->start
>= 0) {
781 UINT8 e
= get_embedding_direction(run
->e
);
782 UINT8 o
= get_embedding_direction(run
->e
+ 1);
786 TRACE("Bracket Pair [%i - %i]\n", p
->start
, p
->end
);
789 for (j
= p
->start
+1; j
< p
->end
; j
++) {
790 if (get_rule_N0_class(*run
->item
[j
].class) == e
) {
791 *run
->item
[p
->start
].class = e
;
792 *run
->item
[p
->end
].class = e
;
795 else if (get_rule_N0_class(*run
->item
[j
].class) == o
)
799 if (j
== p
->end
&& flag_o
) {
800 for (j
= p
->start
; j
>= 0; j
--) {
801 if (get_rule_N0_class(*run
->item
[j
].class) == o
) {
802 *run
->item
[p
->start
].class = o
;
803 *run
->item
[p
->end
].class = o
;
806 else if (get_rule_N0_class(*run
->item
[j
].class) == e
) {
807 *run
->item
[p
->start
].class = e
;
808 *run
->item
[p
->end
].class = e
;
813 *run
->item
[p
->start
].class = run
->sos
;
814 *run
->item
[p
->end
].class = run
->sos
;
825 for (i
= 0; i
< run
->length
; i
++) {
828 if (*run
->item
[i
].class == NI
) {
829 int b
= get_prev_valid_char_from_run(run
, i
);
837 if (*run
->item
[b
].class == R
|| *run
->item
[b
].class == AN
|| *run
->item
[b
].class == EN
)
839 else if (*run
->item
[b
].class == L
)
841 else /* No string type */
844 j
= get_next_valid_char_from_run(run
, i
);
845 while (j
> -1 && *run
->item
[j
].class == NI
) j
= get_next_valid_char_from_run(run
, j
);
850 else if (*run
->item
[j
].class == R
|| *run
->item
[j
].class == AN
|| *run
->item
[j
].class == EN
)
852 else if (*run
->item
[j
].class == L
)
854 else /* No string type */
858 for (b
= i
; b
< j
&& b
< run
->length
; b
++)
859 *run
->item
[b
].class = r
;
865 for (i
= 0; i
< run
->length
; i
++) {
866 if (*run
->item
[i
].class == NI
) {
870 *run
->item
[i
].class = get_embedding_direction(run
->e
);
871 if (b
> -1 && *run
->item
[b
].class == BN
)
872 *run
->item
[b
].class = get_embedding_direction(run
->e
);
873 if (f
< run
->length
&& *run
->item
[f
].class == BN
)
874 *run
->item
[f
].class = get_embedding_direction(run
->e
);
879 /*------------------------------------------------------------------------
880 Function: bidi_resolve_implicit
882 Recursively resolves implicit embedding levels.
883 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
885 Input: Array of direction classes
889 In/Out: Array of embedding levels
891 Note: levels may exceed 15 on output.
892 Accepted subset of direction classes
894 ------------------------------------------------------------------------*/
895 static void bidi_resolve_implicit(const UINT8
*classes
, UINT8
*levels
, int sos
, int eos
)
900 for (i
= sos
; i
<= eos
; i
++) {
901 if (classes
[i
] == BN
)
904 ASSERT(classes
[i
] > 0); /* "No Neutrals allowed to survive here." */
905 ASSERT(classes
[i
] < 5); /* "Out of range." */
907 if (odd(levels
[i
]) && (classes
[i
] == L
|| classes
[i
] == EN
|| classes
[i
] == AN
))
909 else if (!odd(levels
[i
]) && classes
[i
] == R
)
911 else if (!odd(levels
[i
]) && (classes
[i
] == EN
|| classes
[i
] == AN
))
916 static inline BOOL
is_rule_L1_reset_class(UINT8
class)
936 static void bidi_resolve_resolved(UINT8 baselevel
, const UINT8
*classes
, UINT8
*levels
, int sos
, int eos
)
941 for (i
= sos
; i
<= eos
; i
++) {
942 if (classes
[i
] == B
|| classes
[i
] == S
) {
944 while (i
> sos
&& j
>= sos
&& is_rule_L1_reset_class(classes
[j
]))
945 levels
[j
--] = baselevel
;
946 levels
[i
] = baselevel
;
948 if (i
== eos
&& is_rule_L1_reset_class(classes
[i
])) {
950 while (j
>= sos
&& is_rule_L1_reset_class(classes
[j
]))
951 levels
[j
--] = baselevel
;
956 static HRESULT
bidi_compute_isolating_runs_set(UINT8 baselevel
, UINT8
*classes
, UINT8
*levels
, const WCHAR
*string
, UINT32 count
, struct list
*set
)
958 int run_start
, run_end
, i
;
963 runs
= heap_alloc(count
* sizeof(Run
));
965 return E_OUTOFMEMORY
;
971 while (run_start
< count
) {
972 run_end
= get_next_valid_char_index(classes
, run_start
, count
);
973 while (run_end
< count
&& levels
[run_end
] == levels
[run_start
])
974 run_end
= get_next_valid_char_index(classes
, run_end
, count
);
976 runs
[run_count
].start
= run_start
;
977 runs
[run_count
].end
= run_end
;
978 runs
[run_count
].e
= levels
[run_start
];
979 run_start
= get_next_valid_char_index(classes
, run_end
, count
);
983 /* Build Isolating Runs */
985 while (i
< run_count
) {
987 if (runs
[k
].start
>= 0) {
988 IsolatedRun
*current_isolated
;
989 int type_fence
, real_end
;
992 current_isolated
= heap_alloc(sizeof(IsolatedRun
) + sizeof(RunChar
)*count
);
993 if (!current_isolated
) {
998 run_start
= runs
[k
].start
;
999 current_isolated
->e
= runs
[k
].e
;
1000 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1002 for (j
= 0; j
< current_isolated
->length
; j
++) {
1003 current_isolated
->item
[j
].class = &classes
[runs
[k
].start
+j
];
1004 current_isolated
->item
[j
].ch
= string
[runs
[k
].start
+j
];
1007 run_end
= runs
[k
].end
;
1009 TRACE("{ [%i -- %i]",run_start
, run_end
);
1011 if (classes
[run_end
] == BN
)
1012 run_end
= get_prev_valid_char_index(classes
, run_end
, runs
[k
].start
);
1014 while (run_end
< count
&& (classes
[run_end
] == RLI
|| classes
[run_end
] == LRI
|| classes
[run_end
] == FSI
)) {
1017 while (j
< run_count
&& classes
[runs
[j
].start
] != PDI
) j
++;
1018 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
) {
1023 if (j
!= run_count
) {
1024 int l
= current_isolated
->length
;
1027 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1028 for (m
= 0; l
< current_isolated
->length
; l
++, m
++) {
1029 current_isolated
->item
[l
].class = &classes
[runs
[j
].start
+m
];
1030 current_isolated
->item
[l
].ch
= string
[runs
[j
].start
+m
];
1033 TRACE("[%i -- %i]", runs
[j
].start
, runs
[j
].end
);
1035 run_end
= runs
[j
].end
;
1036 if (classes
[run_end
] == BN
)
1037 run_end
= get_prev_valid_char_index(classes
, run_end
, runs
[i
].start
);
1047 type_fence
= get_prev_valid_char_index(classes
, run_start
, -1);
1049 if (type_fence
== -1)
1050 current_isolated
->sos
= (baselevel
> levels
[run_start
]) ? baselevel
: levels
[run_start
];
1052 current_isolated
->sos
= (levels
[type_fence
] > levels
[run_start
]) ? levels
[type_fence
] : levels
[run_start
];
1054 current_isolated
->sos
= get_embedding_direction(current_isolated
->sos
);
1056 if (run_end
== count
)
1057 current_isolated
->eos
= current_isolated
->sos
;
1059 /* eos could be an BN */
1060 if (classes
[run_end
] == BN
) {
1061 real_end
= get_prev_valid_char_index(classes
, run_end
, run_start
-1);
1062 if (real_end
< run_start
)
1063 real_end
= run_start
;
1068 type_fence
= get_next_valid_char_index(classes
, run_end
, count
);
1069 if (type_fence
== count
)
1070 current_isolated
->eos
= (baselevel
> levels
[real_end
]) ? baselevel
: levels
[real_end
];
1072 current_isolated
->eos
= (levels
[type_fence
] > levels
[real_end
]) ? levels
[type_fence
] : levels
[real_end
];
1074 current_isolated
->eos
= get_embedding_direction(current_isolated
->eos
);
1077 list_add_tail(set
, ¤t_isolated
->entry
);
1078 TRACE(" } level %i {%s <--> %s}\n", current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1087 HRESULT
bidi_computelevels(const WCHAR
*string
, UINT32 count
, UINT8 baselevel
, UINT8
*explicit, UINT8
*levels
)
1089 IsolatedRun
*iso_run
, *next
;
1090 struct list IsolatingRuns
;
1094 TRACE("%s, %u\n", debugstr_wn(string
, count
), count
);
1096 chartype
= heap_alloc(count
*sizeof(*chartype
));
1098 return E_OUTOFMEMORY
;
1100 bidi_classify(string
, chartype
, count
);
1101 if (TRACE_ON(bidi
)) bidi_dump_types("start ", chartype
, 0, count
);
1103 bidi_resolve_explicit(baselevel
, chartype
, levels
, count
);
1104 memcpy(explicit, levels
, count
*sizeof(*explicit));
1106 if (TRACE_ON(bidi
)) bidi_dump_types("after explicit", chartype
, 0, count
);
1108 /* X10/BD13: Compute Isolating runs */
1109 hr
= bidi_compute_isolating_runs_set(baselevel
, chartype
, levels
, string
, count
, &IsolatingRuns
);
1113 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1115 if (TRACE_ON(bidi
)) iso_dump_types("run", iso_run
);
1117 bidi_resolve_weak(iso_run
);
1118 if (TRACE_ON(bidi
)) iso_dump_types("after weak", iso_run
);
1120 bidi_resolve_neutrals(iso_run
);
1121 if (TRACE_ON(bidi
)) iso_dump_types("after neutrals", iso_run
);
1123 list_remove(&iso_run
->entry
);
1127 if (TRACE_ON(bidi
)) bidi_dump_types("before implicit", chartype
, 0, count
);
1128 bidi_resolve_implicit(chartype
, levels
, 0, count
-1);
1130 bidi_classify(string
, chartype
, count
);
1131 bidi_resolve_resolved(baselevel
, chartype
, levels
, 0, count
-1);
1134 heap_free(chartype
);