2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
48 #include "wine/debug.h"
50 #include "dwrite_private.h"
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
54 extern const unsigned short bidi_bracket_table
[] DECLSPEC_HIDDEN
;
56 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
59 #define odd(x) ((x) & 1)
61 /*------------------------------------------------------------------------
62 Bidirectional Character Types
64 as defined by the Unicode Bidirectional Algorithm Table 3-7.
68 The list of bidirectional character types here is not grouped the
69 same way as the table 3-7, since the numberic values for the types
70 are chosen to keep the state and action tables compact.
71 ------------------------------------------------------------------------*/
75 /* ON MUST be zero, code relies on ON = NI = 0 */
76 ON
= 0, /* Other Neutral */
79 AN
, /* Arabic Number */
80 EN
, /* European Number */
81 AL
, /* Arabic Letter (Right-to-left) */
82 NSM
, /* Non-spacing Mark */
83 CS
, /* Common Separator */
84 ES
, /* European Separator */
85 ET
, /* European Terminator (post/prefix e.g. $ and %) */
88 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
91 S
, /* Segment Separator (TAB) // used only in L1 */
92 WS
, /* White space // used only in L1 */
93 B
, /* Paragraph Separator (aka as PS) */
95 /* types for explicit controls */
96 RLO
, /* these are used only in X1-X9 */
102 LRI
, /* Isolate formatting characters new with 6.3 */
107 /* resolved types, also resolved directions */
108 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
111 static const char debug_type
[][4] =
113 "ON", /* Other Neutral */
114 "L", /* Left Letter */
115 "R", /* Right Letter */
116 "AN", /* Arabic Number */
117 "EN", /* European Number */
118 "AL", /* Arabic Letter (Right-to-left) */
119 "NSM", /* Non-spacing Mark */
120 "CS", /* Common Separator */
121 "ES", /* European Separator */
122 "ET", /* European Terminator (post/prefix e.g. $ and %) */
123 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
124 "S", /* Segment Separator (TAB) used only in L1 */
125 "WS", /* White space used only in L1 */
126 "B", /* Paragraph Separator (aka as PS) */
127 "RLO", /* these are used only in X1-X9 */
132 "LRI", /* Isolate formatting characters new with 6.3 */
138 static inline void bidi_dump_types(const char* header
, const UINT8
*types
, UINT32 start
, UINT32 end
)
141 TRACE("%s:", header
);
142 for (i
= start
; i
< end
&& len
< 200; i
++) {
143 TRACE(" %s", debug_type
[types
[i
]]);
144 len
+= strlen(debug_type
[types
[i
]])+1;
151 /* Convert the libwine information to the direction enum */
152 static void bidi_classify(const WCHAR
*string
, UINT8
*chartype
, UINT32 count
)
154 static const enum directions dir_map
[16] =
156 L
, /* unassigned defaults to L */
171 PDF
/* also LRE, LRO, RLE, RLO */
176 for (i
= 0; i
< count
; ++i
) {
177 chartype
[i
] = dir_map
[get_char_typeW(string
[i
]) >> 12];
179 switch (chartype
[i
]) {
184 case 0x202a: chartype
[i
] = LRE
; break;
185 case 0x202b: chartype
[i
] = RLE
; break;
186 case 0x202c: chartype
[i
] = PDF
; break;
187 case 0x202d: chartype
[i
] = LRO
; break;
188 case 0x202e: chartype
[i
] = RLO
; break;
189 case 0x2066: chartype
[i
] = LRI
; break;
190 case 0x2067: chartype
[i
] = RLI
; break;
191 case 0x2068: chartype
[i
] = FSI
; break;
192 case 0x2069: chartype
[i
] = PDI
; break;
199 WCHAR
bidi_get_mirrored_char(WCHAR ch
)
201 extern const WCHAR wine_mirror_map
[] DECLSPEC_HIDDEN
;
202 return ch
+ wine_mirror_map
[wine_mirror_map
[ch
>> 8] + (ch
& 0xff)];
205 /* RESOLVE EXPLICIT */
207 static inline UINT8
get_greater_even_level(UINT8 level
)
209 return odd(level
) ? level
+ 1 : level
+ 2;
212 static inline UINT8
get_greater_odd_level(UINT8 level
)
214 return odd(level
) ? level
+ 2 : level
+ 1;
217 static inline UINT8
get_embedding_direction(UINT8 level
)
219 return odd(level
) ? R
: L
;
222 /*------------------------------------------------------------------------
223 Function: bidi_resolve_explicit
225 Recursively resolves explicit embedding levels and overrides.
226 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
228 Input: Base embedding level and direction
231 Output: Array of embedding levels
233 In/Out: Array of direction classes
236 Note: The function uses two simple counters to keep track of
237 matching explicit codes and PDF. Use the default argument for
238 the outermost call. The nesting counter counts the recursion
239 depth and not the embedding level.
240 ------------------------------------------------------------------------*/
241 typedef struct tagStackItem
248 #define push_stack(l,o,i) \
250 stack[stack_top].level = l; \
251 stack[stack_top].override = o; \
252 stack[stack_top].isolate = i;} while(0)
254 #define pop_stack() do { stack_top++; } while(0)
256 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
258 static void bidi_resolve_explicit(UINT8 baselevel
, UINT8
*classes
, UINT8
*levels
, UINT32 count
)
261 int overflow_isolate_count
= 0;
262 int overflow_embedding_count
= 0;
263 int valid_isolate_count
= 0;
266 StackItem stack
[MAX_DEPTH
+2];
267 int stack_top
= MAX_DEPTH
+1;
269 stack
[stack_top
].level
= baselevel
;
270 stack
[stack_top
].override
= NI
;
271 stack
[stack_top
].isolate
= FALSE
;
273 for (i
= 0; i
< count
; i
++) {
274 UINT8 least_odd
, least_even
;
276 switch (classes
[i
]) {
280 least_odd
= get_greater_odd_level(stack
[stack_top
].level
);
281 levels
[i
] = valid_level(least_odd
) ? least_odd
: stack
[stack_top
].level
;
282 if (valid_level(least_odd
))
283 push_stack(least_odd
, NI
, FALSE
);
284 else if (overflow_isolate_count
== 0)
285 overflow_embedding_count
++;
290 least_even
= get_greater_even_level(stack
[stack_top
].level
);
291 levels
[i
] = valid_level(least_even
) ? least_even
: stack
[stack_top
].level
;
292 if (valid_level(least_even
))
293 push_stack(least_even
, NI
, FALSE
);
294 else if (overflow_isolate_count
== 0)
295 overflow_embedding_count
++;
300 least_odd
= get_greater_odd_level(stack
[stack_top
].level
);
301 levels
[i
] = stack
[stack_top
].level
;
302 if (valid_level(least_odd
))
303 push_stack(least_odd
, R
, FALSE
);
304 else if (overflow_isolate_count
== 0)
305 overflow_embedding_count
++;
310 least_even
= get_greater_even_level(stack
[stack_top
].level
);
311 levels
[i
] = stack
[stack_top
].level
;
312 if (valid_level(least_even
))
313 push_stack(least_even
, L
, FALSE
);
314 else if (overflow_isolate_count
== 0)
315 overflow_embedding_count
++;
320 least_odd
= get_greater_odd_level(stack
[stack_top
].level
);
321 levels
[i
] = stack
[stack_top
].level
;
322 if (valid_level(least_odd
))
324 valid_isolate_count
++;
325 push_stack(least_odd
, NI
, TRUE
);
328 overflow_isolate_count
++;
333 least_even
= get_greater_even_level(stack
[stack_top
].level
);
334 levels
[i
] = stack
[stack_top
].level
;
335 if (valid_level(least_even
))
337 valid_isolate_count
++;
338 push_stack(least_even
, NI
, TRUE
);
341 overflow_isolate_count
++;
351 levels
[i
] = stack
[stack_top
].level
;
352 for (j
= i
+1; j
< count
; j
++)
354 if (classes
[j
] == LRI
|| classes
[j
] == RLI
|| classes
[j
] == FSI
)
359 else if (classes
[j
] == PDI
)
368 if (skipping
) continue;
375 else if (classes
[j
] == R
|| classes
[j
] == AL
)
383 least_odd
= get_greater_odd_level(stack
[stack_top
].level
);
384 if (valid_level(least_odd
))
386 valid_isolate_count
++;
387 push_stack(least_odd
, NI
, TRUE
);
390 overflow_isolate_count
++;
394 least_even
= get_greater_even_level(stack
[stack_top
].level
);
395 if (valid_level(least_even
))
397 valid_isolate_count
++;
398 push_stack(least_even
, NI
, TRUE
);
401 overflow_isolate_count
++;
419 levels
[i
] = stack
[stack_top
].level
;
420 if (stack
[stack_top
].override
!= NI
)
421 classes
[i
] = stack
[stack_top
].override
;
426 if (overflow_isolate_count
) overflow_isolate_count
--;
427 else if (!valid_isolate_count
) {/* do nothing */}
430 overflow_embedding_count
= 0;
431 while (!stack
[stack_top
].isolate
) pop_stack();
433 valid_isolate_count
--;
435 levels
[i
] = stack
[stack_top
].level
;
440 levels
[i
] = stack
[stack_top
].level
;
441 if (overflow_isolate_count
) {/* do nothing */}
442 else if (overflow_embedding_count
) overflow_embedding_count
--;
443 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
449 levels
[i
] = baselevel
;
453 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
454 for (i
= 0; i
< count
; i
++)
455 if (classes
[i
] == RLE
|| classes
[i
] == LRE
|| classes
[i
] == RLO
|| classes
[i
] == LRO
|| classes
[i
] == PDF
)
459 static inline int get_prev_valid_char_index(const UINT8
*classes
, int index
, int back_fence
)
461 if (index
== -1 || index
== back_fence
) return index
;
463 while (index
> back_fence
&& classes
[index
] == BN
) index
--;
467 static inline int get_next_valid_char_index(const UINT8
*classes
, int index
, int front_fence
)
469 if (index
== front_fence
) return index
;
471 while (index
< front_fence
&& classes
[index
] == BN
) index
++;
475 typedef struct tagRun
482 typedef struct tagRunChar
488 typedef struct tagIsolatedRun
499 static inline int get_next_valid_char_from_run(IsolatedRun
*run
, int index
)
501 if (index
>= (run
->length
-1)) return -1;
503 while (index
< run
->length
&& *run
->item
[index
].class == BN
) index
++;
504 if (index
== run
->length
) return -1;
508 static inline int get_prev_valid_char_from_run(IsolatedRun
*run
, int index
)
510 if (index
<= 0) return -1;
512 while (index
> -1 && *run
->item
[index
].class == BN
) index
--;
516 static inline void iso_dump_types(const char* header
, IsolatedRun
*run
)
521 for (i
= 0; i
< run
->length
&& len
< 200; i
++) {
522 TRACE(" %s", debug_type
[*run
->item
[i
].class]);
523 len
+= strlen(debug_type
[*run
->item
[i
].class])+1;
525 if (i
!= run
->length
)
530 /*------------------------------------------------------------------------
531 Function: bidi_resolve_weak
533 Resolves the directionality of numeric and other weak character types
535 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
537 Input: Array of embedding levels
540 In/Out: Array of directional classes
542 Note: On input only these directional classes are expected
543 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
544 ------------------------------------------------------------------------*/
545 static BOOL
bidi_is_isolate(UINT8
class)
547 return class == LRI
|| class == RLI
|| class == FSI
|| class == PDI
;
550 static void bidi_resolve_weak(IsolatedRun
*iso_run
)
555 for (i
=0; i
< iso_run
->length
; i
++) {
556 if (*iso_run
->item
[i
].class == NSM
) {
557 int j
= get_prev_valid_char_from_run(iso_run
, i
);
559 *iso_run
->item
[i
].class = iso_run
->sos
;
560 else if (bidi_is_isolate(*iso_run
->item
[j
].class))
561 *iso_run
->item
[i
].class = ON
;
563 *iso_run
->item
[i
].class = *iso_run
->item
[j
].class;
568 for (i
= 0; i
< iso_run
->length
; i
++) {
569 if (*iso_run
->item
[i
].class == EN
) {
570 int j
= get_prev_valid_char_from_run(iso_run
, i
);
572 if (*iso_run
->item
[j
].class == R
|| *iso_run
->item
[j
].class == L
|| *iso_run
->item
[j
].class == AL
) {
573 if (*iso_run
->item
[j
].class == AL
)
574 *iso_run
->item
[i
].class = AN
;
577 j
= get_prev_valid_char_from_run(iso_run
, j
);
583 for (i
= 0; i
< iso_run
->length
; i
++) {
584 if (*iso_run
->item
[i
].class == AL
)
585 *iso_run
->item
[i
].class = R
;
589 for (i
= 0; i
< iso_run
->length
; i
++) {
590 if (*iso_run
->item
[i
].class == ES
) {
591 int b
= get_prev_valid_char_from_run(iso_run
, i
);
592 int f
= get_next_valid_char_from_run(iso_run
, i
);
594 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].class == EN
&& *iso_run
->item
[f
].class == EN
)
595 *iso_run
->item
[i
].class = EN
;
597 else if (*iso_run
->item
[i
].class == CS
) {
598 int b
= get_prev_valid_char_from_run(iso_run
, i
);
599 int f
= get_next_valid_char_from_run(iso_run
, i
);
601 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].class == EN
&& *iso_run
->item
[f
].class == EN
)
602 *iso_run
->item
[i
].class = EN
;
603 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].class == AN
&& *iso_run
->item
[f
].class == AN
)
604 *iso_run
->item
[i
].class = AN
;
609 for (i
= 0; i
< iso_run
->length
; i
++) {
610 if (*iso_run
->item
[i
].class == ET
) {
612 for (j
= i
-1 ; j
> -1; j
--) {
613 if (*iso_run
->item
[j
].class == BN
) continue;
614 if (*iso_run
->item
[j
].class == ET
) continue;
615 else if (*iso_run
->item
[j
].class == EN
) *iso_run
->item
[i
].class = EN
;
618 if (*iso_run
->item
[i
].class == ET
) {
619 for (j
= i
+1; j
< iso_run
->length
; j
++) {
620 if (*iso_run
->item
[j
].class == BN
) continue;
621 if (*iso_run
->item
[j
].class == ET
) continue;
622 else if (*iso_run
->item
[j
].class == EN
) *iso_run
->item
[i
].class = EN
;
630 for (i
= 0; i
< iso_run
->length
; i
++) {
631 if (*iso_run
->item
[i
].class == ET
|| *iso_run
->item
[i
].class == ES
|| *iso_run
->item
[i
].class == CS
|| *iso_run
->item
[i
].class == ON
)
635 if (b
> -1 && *iso_run
->item
[b
].class == BN
)
636 *iso_run
->item
[b
].class = ON
;
637 if (f
< iso_run
->length
&& *iso_run
->item
[f
].class == BN
)
638 *iso_run
->item
[f
].class = ON
;
640 *iso_run
->item
[i
].class = ON
;
645 for (i
= 0; i
< iso_run
->length
; i
++) {
646 if (*iso_run
->item
[i
].class == EN
) {
648 for (j
= get_prev_valid_char_from_run(iso_run
, i
); j
> -1; j
= get_prev_valid_char_from_run(iso_run
, j
))
649 if (*iso_run
->item
[j
].class == R
|| *iso_run
->item
[j
].class == L
) {
650 if (*iso_run
->item
[j
].class == L
)
651 *iso_run
->item
[i
].class = L
;
654 if (iso_run
->sos
== L
&& j
== -1)
655 *iso_run
->item
[i
].class = L
;
660 typedef struct tagBracketPair
666 static int bracketpair_compr(const void *a
, const void* b
)
668 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
671 static BracketPair
*bidi_compute_bracket_pairs(IsolatedRun
*iso_run
)
675 int stack_top
= iso_run
->length
;
676 BracketPair
*out
= NULL
;
680 open_stack
= heap_alloc(sizeof(WCHAR
) * iso_run
->length
);
681 stack_index
= heap_alloc(sizeof(int) * iso_run
->length
);
683 for (i
= 0; i
< iso_run
->length
; i
++) {
684 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
687 out
= heap_alloc(sizeof(BracketPair
));
691 if ((ubv
>> 8) == 0) {
693 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
694 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
695 if (open_stack
[stack_top
] == 0x232A)
696 open_stack
[stack_top
] = 0x3009;
697 stack_index
[stack_top
] = i
;
699 else if ((ubv
>> 8) == 1) {
702 if (stack_top
== iso_run
->length
) continue;
703 for (j
= stack_top
; j
< iso_run
->length
; j
++) {
704 WCHAR c
= iso_run
->item
[i
].ch
;
705 if (c
== 0x232A) c
= 0x3009;
706 if (c
== open_stack
[j
]) {
707 out
[pair_count
].start
= stack_index
[j
];
708 out
[pair_count
].end
= i
;
710 out
= heap_realloc(out
, sizeof(BracketPair
) * (pair_count
+1));
711 out
[pair_count
].start
= -1;
719 if (pair_count
== 0) {
723 else if (pair_count
> 1)
724 qsort(out
, pair_count
, sizeof(BracketPair
), bracketpair_compr
);
726 heap_free(open_stack
);
727 heap_free(stack_index
);
731 static inline UINT8
get_rule_N0_class(UINT8
class)
733 return (class == AN
|| class == EN
) ? R
: class;
736 /*------------------------------------------------------------------------
737 Function: bidi_resolve_neutrals
739 Resolves the directionality of neutral character types.
741 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
743 Input: Array of embedding levels
747 In/Out: Array of directional classes
749 Note: On input only these directional classes are expected
750 R, L, NI, AN, EN and BN
752 W8 resolves a number of ENs to L
753 ------------------------------------------------------------------------*/
754 static void bidi_resolve_neutrals(IsolatedRun
*run
)
759 /* Translate isolates into NI */
760 for (i
= 0; i
< run
->length
; i
++) {
761 switch (*run
->item
[i
].class) {
768 case PDI
: *run
->item
[i
].class = NI
;
771 /* "Only NI, L, R, AN, EN and BN are allowed" */
772 ASSERT(*run
->item
[i
].class <= EN
|| *run
->item
[i
].class == BN
);
775 /* N0: Skipping bracketed pairs for now */
776 pairs
= bidi_compute_bracket_pairs(run
);
778 BracketPair
*p
= pairs
;
780 while (p
->start
>= 0) {
781 UINT8 e
= get_embedding_direction(run
->e
);
782 UINT8 o
= get_embedding_direction(run
->e
+ 1);
786 TRACE("Bracket Pair [%i - %i]\n", p
->start
, p
->end
);
789 for (j
= p
->start
+1; j
< p
->end
; j
++) {
790 if (get_rule_N0_class(*run
->item
[j
].class) == e
) {
791 *run
->item
[p
->start
].class = e
;
792 *run
->item
[p
->end
].class = e
;
795 else if (get_rule_N0_class(*run
->item
[j
].class) == o
)
799 if (j
== p
->end
&& flag_o
) {
800 for (j
= p
->start
; j
>= 0; j
--) {
801 if (get_rule_N0_class(*run
->item
[j
].class) == o
) {
802 *run
->item
[p
->start
].class = o
;
803 *run
->item
[p
->end
].class = o
;
806 else if (get_rule_N0_class(*run
->item
[j
].class) == e
) {
807 *run
->item
[p
->start
].class = e
;
808 *run
->item
[p
->end
].class = e
;
813 *run
->item
[p
->start
].class = run
->sos
;
814 *run
->item
[p
->end
].class = run
->sos
;
825 for (i
= 0; i
< run
->length
; i
++) {
828 if (*run
->item
[i
].class == NI
) {
829 int b
= get_prev_valid_char_from_run(run
, i
);
837 if (*run
->item
[b
].class == R
|| *run
->item
[b
].class == AN
|| *run
->item
[b
].class == EN
)
839 else if (*run
->item
[b
].class == L
)
841 else /* No string type */
844 j
= get_next_valid_char_from_run(run
, i
);
845 while (j
> -1 && *run
->item
[j
].class == NI
) j
= get_next_valid_char_from_run(run
, j
);
850 else if (*run
->item
[j
].class == R
|| *run
->item
[j
].class == AN
|| *run
->item
[j
].class == EN
)
852 else if (*run
->item
[j
].class == L
)
854 else /* No string type */
858 for (b
= i
; b
< j
&& b
< run
->length
; b
++)
859 *run
->item
[b
].class = r
;
865 for (i
= 0; i
< run
->length
; i
++) {
866 if (*run
->item
[i
].class == NI
) {
870 *run
->item
[i
].class = get_embedding_direction(run
->e
);
871 if (b
> -1 && *run
->item
[b
].class == BN
)
872 *run
->item
[b
].class = get_embedding_direction(run
->e
);
873 if (f
< run
->length
&& *run
->item
[f
].class == BN
)
874 *run
->item
[f
].class = get_embedding_direction(run
->e
);
879 /*------------------------------------------------------------------------
880 Function: bidi_resolve_implicit
882 Recursively resolves implicit embedding levels.
883 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
885 Input: Array of direction classes
889 In/Out: Array of embedding levels
891 Note: levels may exceed 15 on output.
892 Accepted subset of direction classes
894 ------------------------------------------------------------------------*/
895 static void bidi_resolve_implicit(const UINT8
*classes
, UINT8
*levels
, int sos
, int eos
)
900 for (i
= sos
; i
<= eos
; i
++) {
901 if (classes
[i
] == BN
)
904 ASSERT(classes
[i
] != ON
); /* "No Neutrals allowed to survive here." */
905 ASSERT(classes
[i
] <= EN
); /* "Out of range." */
907 if (odd(levels
[i
]) && (classes
[i
] == L
|| classes
[i
] == EN
|| classes
[i
] == AN
))
909 else if (!odd(levels
[i
]) && classes
[i
] == R
)
911 else if (!odd(levels
[i
]) && (classes
[i
] == EN
|| classes
[i
] == AN
))
916 static inline BOOL
is_rule_L1_reset_class(UINT8
class)
936 static void bidi_resolve_resolved(UINT8 baselevel
, const UINT8
*classes
, UINT8
*levels
, int sos
, int eos
)
941 for (i
= sos
; i
<= eos
; i
++) {
942 if (classes
[i
] == B
|| classes
[i
] == S
) {
944 while (i
> sos
&& j
>= sos
&& is_rule_L1_reset_class(classes
[j
]))
945 levels
[j
--] = baselevel
;
946 levels
[i
] = baselevel
;
948 else if (classes
[i
] == LRE
|| classes
[i
] == RLE
|| classes
[i
] == LRO
|| classes
[i
] == RLO
||
949 classes
[i
] == PDF
|| classes
[i
] == BN
) {
950 levels
[i
] = i
? levels
[i
- 1] : baselevel
;
952 if (i
== eos
&& is_rule_L1_reset_class(classes
[i
])) {
954 while (j
>= sos
&& is_rule_L1_reset_class(classes
[j
]))
955 levels
[j
--] = baselevel
;
960 static HRESULT
bidi_compute_isolating_runs_set(UINT8 baselevel
, UINT8
*classes
, UINT8
*levels
, const WCHAR
*string
, UINT32 count
, struct list
*set
)
962 int run_start
, run_end
, i
;
967 runs
= heap_alloc(count
* sizeof(Run
));
969 return E_OUTOFMEMORY
;
975 while (run_start
< count
) {
976 run_end
= get_next_valid_char_index(classes
, run_start
, count
);
977 while (run_end
< count
&& levels
[run_end
] == levels
[run_start
])
978 run_end
= get_next_valid_char_index(classes
, run_end
, count
);
980 runs
[run_count
].start
= run_start
;
981 runs
[run_count
].end
= run_end
;
982 runs
[run_count
].e
= levels
[run_start
];
983 run_start
= get_next_valid_char_index(classes
, run_end
, count
);
987 /* Build Isolating Runs */
989 while (i
< run_count
) {
991 if (runs
[k
].start
>= 0) {
992 IsolatedRun
*current_isolated
;
993 int type_fence
, real_end
;
996 current_isolated
= heap_alloc(sizeof(IsolatedRun
) + sizeof(RunChar
)*count
);
997 if (!current_isolated
) {
1002 run_start
= runs
[k
].start
;
1003 current_isolated
->e
= runs
[k
].e
;
1004 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1006 for (j
= 0; j
< current_isolated
->length
; j
++) {
1007 current_isolated
->item
[j
].class = &classes
[runs
[k
].start
+j
];
1008 current_isolated
->item
[j
].ch
= string
[runs
[k
].start
+j
];
1011 run_end
= runs
[k
].end
;
1013 TRACE("{ [%i -- %i]",run_start
, run_end
);
1015 if (classes
[run_end
] == BN
)
1016 run_end
= get_prev_valid_char_index(classes
, run_end
, runs
[k
].start
);
1018 while (run_end
< count
&& (classes
[run_end
] == RLI
|| classes
[run_end
] == LRI
|| classes
[run_end
] == FSI
)) {
1021 while (j
< run_count
&& classes
[runs
[j
].start
] != PDI
) j
++;
1022 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
) {
1027 if (j
!= run_count
) {
1028 int l
= current_isolated
->length
;
1031 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1032 for (m
= 0; l
< current_isolated
->length
; l
++, m
++) {
1033 current_isolated
->item
[l
].class = &classes
[runs
[j
].start
+m
];
1034 current_isolated
->item
[l
].ch
= string
[runs
[j
].start
+m
];
1037 TRACE("[%i -- %i]", runs
[j
].start
, runs
[j
].end
);
1039 run_end
= runs
[j
].end
;
1040 if (classes
[run_end
] == BN
)
1041 run_end
= get_prev_valid_char_index(classes
, run_end
, runs
[i
].start
);
1051 type_fence
= get_prev_valid_char_index(classes
, run_start
, -1);
1053 if (type_fence
== -1)
1054 current_isolated
->sos
= (baselevel
> levels
[run_start
]) ? baselevel
: levels
[run_start
];
1056 current_isolated
->sos
= (levels
[type_fence
] > levels
[run_start
]) ? levels
[type_fence
] : levels
[run_start
];
1058 current_isolated
->sos
= get_embedding_direction(current_isolated
->sos
);
1060 if (run_end
== count
)
1061 current_isolated
->eos
= current_isolated
->sos
;
1063 /* eos could be an BN */
1064 if (classes
[run_end
] == BN
) {
1065 real_end
= get_prev_valid_char_index(classes
, run_end
, run_start
-1);
1066 if (real_end
< run_start
)
1067 real_end
= run_start
;
1072 type_fence
= get_next_valid_char_index(classes
, run_end
, count
);
1073 if (type_fence
== count
)
1074 current_isolated
->eos
= (baselevel
> levels
[real_end
]) ? baselevel
: levels
[real_end
];
1076 current_isolated
->eos
= (levels
[type_fence
] > levels
[real_end
]) ? levels
[type_fence
] : levels
[real_end
];
1078 current_isolated
->eos
= get_embedding_direction(current_isolated
->eos
);
1081 list_add_tail(set
, ¤t_isolated
->entry
);
1082 TRACE(" } level %i {%s <--> %s}\n", current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1091 HRESULT
bidi_computelevels(const WCHAR
*string
, UINT32 count
, UINT8 baselevel
, UINT8
*explicit, UINT8
*levels
)
1093 IsolatedRun
*iso_run
, *next
;
1094 struct list IsolatingRuns
;
1098 TRACE("%s, %u\n", debugstr_wn(string
, count
), count
);
1100 chartype
= heap_alloc(count
*sizeof(*chartype
));
1102 return E_OUTOFMEMORY
;
1104 bidi_classify(string
, chartype
, count
);
1105 if (TRACE_ON(bidi
)) bidi_dump_types("start ", chartype
, 0, count
);
1107 bidi_resolve_explicit(baselevel
, chartype
, levels
, count
);
1108 memcpy(explicit, levels
, count
*sizeof(*explicit));
1110 if (TRACE_ON(bidi
)) bidi_dump_types("after explicit", chartype
, 0, count
);
1112 /* X10/BD13: Compute Isolating runs */
1113 hr
= bidi_compute_isolating_runs_set(baselevel
, chartype
, levels
, string
, count
, &IsolatingRuns
);
1117 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1119 if (TRACE_ON(bidi
)) iso_dump_types("run", iso_run
);
1121 bidi_resolve_weak(iso_run
);
1122 if (TRACE_ON(bidi
)) iso_dump_types("after weak", iso_run
);
1124 bidi_resolve_neutrals(iso_run
);
1125 if (TRACE_ON(bidi
)) iso_dump_types("after neutrals", iso_run
);
1127 list_remove(&iso_run
->entry
);
1131 if (TRACE_ON(bidi
)) bidi_dump_types("before implicit", chartype
, 0, count
);
1132 bidi_resolve_implicit(chartype
, levels
, 0, count
-1);
1134 bidi_classify(string
, chartype
, count
);
1135 bidi_resolve_resolved(baselevel
, chartype
, levels
, 0, count
-1);
1138 heap_free(chartype
);