2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table
[] DECLSPEC_HIDDEN
;
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON
= 0, /* Other Neutral */
86 AN
, /* Arabic Number */
87 EN
, /* European Number */
88 AL
, /* Arabic Letter (Right-to-left) */
89 NSM
, /* Non-spacing Mark */
90 CS
, /* Common Separator */
91 ES
, /* European Separator */
92 ET
, /* European Terminator (post/prefix e.g. $ and %) */
95 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
98 S
, /* Segment Separator (TAB) // used only in L1 */
99 WS
, /* White space // used only in L1 */
100 B
, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO
, /* these are used only in X1-X9 */
109 LRI
, /* Isolate formatting characters new with 6.3 */
114 /* resolved types, also resolved directions */
115 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type
[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
139 "LRI", /* Isolate formatting characters new with 6.3 */
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
151 for (i
= start
; i
< end
&& len
< 200; i
++)
153 TRACE(" %s",debug_type
[types
[i
]]);
154 len
+= strlen(debug_type
[types
[i
]])+1;
161 /* Convert the libwine information to the direction enum */
162 static void classify(const WCHAR
*string
, WORD
*chartype
, DWORD count
, const SCRIPT_CONTROL
*c
)
164 static const enum directions dir_map
[16] =
166 L
, /* unassigned defaults to L */
181 PDF
/* also LRE, LRO, RLE, RLO */
186 for (i
= 0; i
< count
; ++i
)
188 chartype
[i
] = dir_map
[get_char_typeW(string
[i
]) >> 12];
192 if (!c
->fLegacyBidiClass
) break;
196 case '+': chartype
[i
] = NI
; break;
197 case '/': chartype
[i
] = CS
; break;
203 case 0x202A: chartype
[i
] = LRE
; break;
204 case 0x202B: chartype
[i
] = RLE
; break;
205 case 0x202C: chartype
[i
] = PDF
; break;
206 case 0x202D: chartype
[i
] = LRO
; break;
207 case 0x202E: chartype
[i
] = RLO
; break;
208 case 0x2066: chartype
[i
] = LRI
; break;
209 case 0x2067: chartype
[i
] = RLI
; break;
210 case 0x2068: chartype
[i
] = FSI
; break;
211 case 0x2069: chartype
[i
] = PDI
; break;
218 /* RESOLVE EXPLICIT */
220 static WORD
GreaterEven(int i
)
222 return odd(i
) ? i
+ 1 : i
+ 2;
225 static WORD
GreaterOdd(int i
)
227 return odd(i
) ? i
+ 2 : i
+ 1;
230 static WORD
EmbeddingDirection(int level
)
232 return odd(level
) ? R
: L
;
235 /*------------------------------------------------------------------------
236 Function: resolveExplicit
238 Recursively resolves explicit embedding levels and overrides.
239 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 Input: Base embedding level and direction
244 Output: Array of embedding levels
246 In/Out: Array of direction classes
249 Note: The function uses two simple counters to keep track of
250 matching explicit codes and PDF. Use the default argument for
251 the outermost call. The nesting counter counts the recursion
252 depth and not the embedding level.
253 ------------------------------------------------------------------------*/
254 typedef struct tagStackItem
{
260 #define push_stack(l,o,i) \
262 stack[stack_top].level = l; \
263 stack[stack_top].override = o; \
264 stack[stack_top].isolate = i;} while(0)
266 #define pop_stack() do { stack_top++; } while(0)
268 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, WORD
*poutOverrides
, int count
, BOOL initialOverride
)
273 int overflow_isolate_count
= 0;
274 int overflow_embedding_count
= 0;
275 int valid_isolate_count
= 0;
278 StackItem stack
[MAX_DEPTH
+2];
279 int stack_top
= MAX_DEPTH
+1;
281 stack
[stack_top
].level
= level
;
282 stack
[stack_top
].override
= NI
;
283 stack
[stack_top
].isolate
= FALSE
;
288 push_stack(level
, R
, FALSE
);
290 push_stack(level
, L
, FALSE
);
293 for (i
= 0; i
< count
; i
++)
295 poutOverrides
[i
] = stack
[stack_top
].override
;
298 if (pclass
[i
] == RLE
)
300 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
301 poutLevel
[i
] = stack
[stack_top
].level
;
302 if (valid_level(least_odd
))
303 push_stack(least_odd
, NI
, FALSE
);
304 else if (overflow_isolate_count
== 0)
305 overflow_embedding_count
++;
308 else if (pclass
[i
] == LRE
)
310 int least_even
= GreaterEven(stack
[stack_top
].level
);
311 poutLevel
[i
] = stack
[stack_top
].level
;
312 if (valid_level(least_even
))
313 push_stack(least_even
, NI
, FALSE
);
314 else if (overflow_isolate_count
== 0)
315 overflow_embedding_count
++;
318 else if (pclass
[i
] == RLO
)
320 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
321 poutLevel
[i
] = stack
[stack_top
].level
;
322 if (valid_level(least_odd
))
323 push_stack(least_odd
, R
, FALSE
);
324 else if (overflow_isolate_count
== 0)
325 overflow_embedding_count
++;
328 else if (pclass
[i
] == LRO
)
330 int least_even
= GreaterEven(stack
[stack_top
].level
);
331 poutLevel
[i
] = stack
[stack_top
].level
;
332 if (valid_level(least_even
))
333 push_stack(least_even
, L
, FALSE
);
334 else if (overflow_isolate_count
== 0)
335 overflow_embedding_count
++;
338 else if (pclass
[i
] == RLI
)
340 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
341 poutLevel
[i
] = stack
[stack_top
].level
;
342 if (valid_level(least_odd
))
344 valid_isolate_count
++;
345 push_stack(least_odd
, NI
, TRUE
);
348 overflow_isolate_count
++;
351 else if (pclass
[i
] == LRI
)
353 int least_even
= GreaterEven(stack
[stack_top
].level
);
354 poutLevel
[i
] = stack
[stack_top
].level
;
355 if (valid_level(least_even
))
357 valid_isolate_count
++;
358 push_stack(least_even
, NI
, TRUE
);
361 overflow_isolate_count
++;
364 else if (pclass
[i
] == FSI
)
369 poutLevel
[i
] = stack
[stack_top
].level
;
370 for (j
= i
+1; j
< count
; j
++)
372 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
377 else if (pclass
[j
] == PDI
)
386 if (skipping
) continue;
393 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
401 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
402 if (valid_level(least_odd
))
404 valid_isolate_count
++;
405 push_stack(least_odd
, NI
, TRUE
);
408 overflow_isolate_count
++;
412 int least_even
= GreaterEven(stack
[stack_top
].level
);
413 if (valid_level(least_even
))
415 valid_isolate_count
++;
416 push_stack(least_even
, NI
, TRUE
);
419 overflow_isolate_count
++;
423 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
425 poutLevel
[i
] = stack
[stack_top
].level
;
426 if (stack
[stack_top
].override
!= NI
)
427 pclass
[i
] = stack
[stack_top
].override
;
430 else if (pclass
[i
] == PDI
)
432 if (overflow_isolate_count
) overflow_isolate_count
--;
433 else if (!valid_isolate_count
) {/* do nothing */}
436 overflow_embedding_count
= 0;
437 while (!stack
[stack_top
].isolate
) pop_stack();
439 valid_isolate_count
--;
441 poutLevel
[i
] = stack
[stack_top
].level
;
444 else if (pclass
[i
] == PDF
)
446 poutLevel
[i
] = stack
[stack_top
].level
;
447 if (overflow_isolate_count
) {/* do nothing */}
448 else if (overflow_embedding_count
) overflow_embedding_count
--;
449 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
454 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
455 for (i
= 0; i
< count
; i
++)
456 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
460 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
462 if (index
== -1 || index
== back_fence
) return index
;
464 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
468 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
470 if (index
== front_fence
) return index
;
472 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
476 typedef struct tagRun
483 typedef struct tagRunChar
489 typedef struct tagIsolatedRun
500 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
502 if (index
>= (iso_run
->length
-1)) return -1;
504 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
505 if (index
== iso_run
->length
) return -1;
509 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
512 if (index
<= 0) return -1;
514 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
518 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
523 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
525 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
526 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
528 if (i
!= iso_run
->length
)
533 /*------------------------------------------------------------------------
534 Function: resolveWeak
536 Resolves the directionality of numeric and other weak character types
538 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
540 Input: Array of embedding levels
543 In/Out: Array of directional classes
545 Note: On input only these directional classes are expected
546 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
547 ------------------------------------------------------------------------*/
549 static void resolveWeak(IsolatedRun
* iso_run
)
554 for (i
=0; i
< iso_run
->length
; i
++)
556 if (*iso_run
->item
[i
].pcls
== NSM
)
558 int j
= iso_previousValidChar(iso_run
, i
);
560 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
561 else if (*iso_run
->item
[j
].pcls
>= LRI
)
562 *iso_run
->item
[i
].pcls
= ON
;
564 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
569 for (i
= 0; i
< iso_run
->length
; i
++)
571 if (*iso_run
->item
[i
].pcls
== EN
)
573 int j
= iso_previousValidChar(iso_run
, i
);
576 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
578 if (*iso_run
->item
[j
].pcls
== AL
)
579 *iso_run
->item
[i
].pcls
= AN
;
582 j
= iso_previousValidChar(iso_run
, j
);
588 for (i
= 0; i
< iso_run
->length
; i
++)
590 if (*iso_run
->item
[i
].pcls
== AL
)
591 *iso_run
->item
[i
].pcls
= R
;
595 for (i
= 0; i
< iso_run
->length
; i
++)
597 if (*iso_run
->item
[i
].pcls
== ES
)
599 int b
= iso_previousValidChar(iso_run
, i
);
600 int f
= iso_nextValidChar(iso_run
, i
);
602 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
603 *iso_run
->item
[i
].pcls
= EN
;
605 else if (*iso_run
->item
[i
].pcls
== CS
)
607 int b
= iso_previousValidChar(iso_run
, i
);
608 int f
= iso_nextValidChar(iso_run
, i
);
610 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
611 *iso_run
->item
[i
].pcls
= EN
;
612 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
613 *iso_run
->item
[i
].pcls
= AN
;
618 for (i
= 0; i
< iso_run
->length
; i
++)
620 if (*iso_run
->item
[i
].pcls
== ET
)
623 for (j
= i
-1 ; j
> -1; j
--)
625 if (*iso_run
->item
[j
].pcls
== BN
) continue;
626 if (*iso_run
->item
[j
].pcls
== ET
) continue;
627 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
630 if (*iso_run
->item
[i
].pcls
== ET
)
632 for (j
= i
+1; j
< iso_run
->length
; j
++)
634 if (*iso_run
->item
[j
].pcls
== BN
) continue;
635 if (*iso_run
->item
[j
].pcls
== ET
) continue;
636 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
644 for (i
= 0; i
< iso_run
->length
; i
++)
646 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
650 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
651 *iso_run
->item
[b
].pcls
= ON
;
652 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
653 *iso_run
->item
[f
].pcls
= ON
;
655 *iso_run
->item
[i
].pcls
= ON
;
660 for (i
= 0; i
< iso_run
->length
; i
++)
662 if (*iso_run
->item
[i
].pcls
== EN
)
665 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
666 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
668 if (*iso_run
->item
[j
].pcls
== L
)
669 *iso_run
->item
[i
].pcls
= L
;
672 if (iso_run
->sos
== L
&& j
== -1)
673 *iso_run
->item
[i
].pcls
= L
;
678 typedef struct tagBracketPair
684 static int compr(const void *a
, const void* b
)
686 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
689 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
693 int stack_top
= iso_run
->length
;
694 BracketPair
*out
= NULL
;
698 open_stack
= heap_alloc(iso_run
->length
* sizeof(*open_stack
));
699 stack_index
= heap_alloc(iso_run
->length
* sizeof(*stack_index
));
701 for (i
= 0; i
< iso_run
->length
; i
++)
703 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
708 out
= heap_alloc(sizeof(*out
));
715 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
716 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
717 if (open_stack
[stack_top
] == 0x232A)
718 open_stack
[stack_top
] = 0x3009;
719 stack_index
[stack_top
] = i
;
721 else if ((ubv
>> 8) == 1)
724 if (stack_top
== iso_run
->length
) continue;
725 for (j
= stack_top
; j
< iso_run
->length
; j
++)
727 WCHAR c
= iso_run
->item
[i
].ch
;
728 if (c
== 0x232A) c
= 0x3009;
729 if (c
== open_stack
[j
])
731 out
[pair_count
].start
= stack_index
[j
];
732 out
[pair_count
].end
= i
;
734 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
735 out
[pair_count
].start
= -1;
748 else if (pair_count
> 1)
749 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
751 heap_free(open_stack
);
752 heap_free(stack_index
);
756 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
758 /*------------------------------------------------------------------------
759 Function: resolveNeutrals
761 Resolves the directionality of neutral character types.
763 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
765 Input: Array of embedding levels
769 In/Out: Array of directional classes
771 Note: On input only these directional classes are expected
772 R, L, NI, AN, EN and BN
774 W8 resolves a number of ENs to L
775 ------------------------------------------------------------------------*/
776 static void resolveNeutrals(IsolatedRun
*iso_run
)
779 BracketPair
*pairs
= NULL
;
781 /* Translate isolates into NI */
782 for (i
= 0; i
< iso_run
->length
; i
++)
784 if (*iso_run
->item
[i
].pcls
>= LRI
)
785 *iso_run
->item
[i
].pcls
= NI
;
787 switch(*iso_run
->item
[i
].pcls
)
791 case WS
: *iso_run
->item
[i
].pcls
= NI
;
794 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
797 /* N0: Skipping bracketed pairs for now */
798 pairs
= computeBracketPairs(iso_run
);
801 BracketPair
*p
= &pairs
[0];
803 while (p
->start
>= 0)
806 int e
= EmbeddingDirection(iso_run
->e
);
807 int o
= EmbeddingDirection(iso_run
->e
+1);
809 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
812 for (j
= p
->start
+1; j
< p
->end
; j
++)
814 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
816 *iso_run
->item
[p
->start
].pcls
= e
;
817 *iso_run
->item
[p
->end
].pcls
= e
;
820 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
824 if (j
== p
->end
&& flag_o
)
826 for (j
= p
->start
; j
>= 0; j
--)
828 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
830 *iso_run
->item
[p
->start
].pcls
= o
;
831 *iso_run
->item
[p
->end
].pcls
= o
;
834 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
836 *iso_run
->item
[p
->start
].pcls
= e
;
837 *iso_run
->item
[p
->end
].pcls
= e
;
843 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
844 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
855 for (i
= 0; i
< iso_run
->length
; i
++)
859 if (*iso_run
->item
[i
].pcls
== NI
)
862 int b
= iso_previousValidChar(iso_run
, i
);
871 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
873 else if (*iso_run
->item
[b
].pcls
== L
)
875 else /* No string type */
878 j
= iso_nextValidChar(iso_run
, i
);
879 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
886 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
888 else if (*iso_run
->item
[j
].pcls
== L
)
890 else /* No string type */
895 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
896 *iso_run
->item
[b
].pcls
= r
;
902 for (i
= 0; i
< iso_run
->length
; i
++)
904 if (*iso_run
->item
[i
].pcls
== NI
)
908 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
909 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
910 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
911 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
912 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
917 /*------------------------------------------------------------------------
918 Function: resolveImplicit
920 Recursively resolves implicit embedding levels.
921 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
923 Input: Array of direction classes
927 In/Out: Array of embedding levels
929 Note: levels may exceed 15 on output.
930 Accepted subset of direction classes
932 ------------------------------------------------------------------------*/
933 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
938 for (i
= sos
; i
<= eos
; i
++)
943 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
944 ASSERT(pcls
[i
] < 5); /* "Out of range." */
946 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
948 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
950 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
955 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
960 for (i
= sos
; i
<= eos
; i
++)
962 if (pcls
[i
] == B
|| pcls
[i
] == S
)
965 while (i
> sos
&& j
>= sos
&&
966 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
967 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
968 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
969 plevel
[j
--] = baselevel
;
970 plevel
[i
] = baselevel
;
972 else if (pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
|| pcls
[i
] == RLO
||
973 pcls
[i
] == PDF
|| pcls
[i
] == BN
)
975 plevel
[i
] = i
? plevel
[i
- 1] : baselevel
;
978 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
979 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
980 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
983 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
984 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
985 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
986 plevel
[j
--] = baselevel
;
991 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, const WORD
*pLevel
,
992 const WCHAR
*string
, unsigned int uCount
, struct list
*set
)
994 int run_start
, run_end
, i
;
997 IsolatedRun
*current_isolated
;
999 if (!(runs
= heap_alloc(uCount
* sizeof(*runs
))))
1006 while (run_start
< uCount
)
1008 run_end
= nextValidChar(pcls
, run_start
, uCount
);
1009 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
1011 runs
[run_count
].start
= run_start
;
1012 runs
[run_count
].end
= run_end
;
1013 runs
[run_count
].e
= pLevel
[run_start
];
1014 run_start
= nextValidChar(pcls
, run_end
, uCount
);
1018 /* Build Isolating Runs */
1020 while (i
< run_count
)
1023 if (runs
[k
].start
>= 0)
1025 int type_fence
, real_end
;
1028 if (!(current_isolated
= heap_alloc(FIELD_OFFSET(IsolatedRun
, item
[uCount
]))))
1031 run_start
= runs
[k
].start
;
1032 current_isolated
->e
= runs
[k
].e
;
1033 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1035 for (j
= 0; j
< current_isolated
->length
; j
++)
1037 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1038 current_isolated
->item
[j
].ch
= string
[runs
[k
].start
+ j
];
1041 run_end
= runs
[k
].end
;
1043 TRACE("{ [%i -- %i]",run_start
, run_end
);
1045 if (pcls
[run_end
] == BN
)
1046 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1048 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1052 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1053 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1062 int l
= current_isolated
->length
;
1064 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1065 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1067 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1068 current_isolated
->item
[l
].ch
= string
[runs
[j
].start
+ m
];
1071 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1073 run_end
= runs
[j
].end
;
1074 if (pcls
[run_end
] == BN
)
1075 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1086 type_fence
= previousValidChar(pcls
, run_start
, -1);
1088 if (type_fence
== -1)
1089 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1091 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1093 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1095 if (run_end
== uCount
)
1096 current_isolated
->eos
= current_isolated
->sos
;
1099 /* eos could be an BN */
1100 if ( pcls
[run_end
] == BN
)
1102 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1103 if (real_end
< run_start
)
1104 real_end
= run_start
;
1109 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1110 if (type_fence
== uCount
)
1111 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1113 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1115 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1118 list_add_tail(set
, ¤t_isolated
->entry
);
1119 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1127 /*************************************************************
1128 * BIDI_DeterminLevels
1130 BOOL
BIDI_DetermineLevels(
1131 const WCHAR
*lpString
, /* [in] The string for which information is to be returned */
1132 unsigned int uCount
, /* [in] Number of WCHARs in string. */
1133 const SCRIPT_STATE
*s
,
1134 const SCRIPT_CONTROL
*c
,
1135 WORD
*lpOutLevels
, /* [out] final string levels */
1136 WORD
*lpOutOverrides
/* [out] final string overrides */
1140 unsigned baselevel
= 0;
1141 struct list IsolatingRuns
;
1142 IsolatedRun
*iso_run
, *next
;
1144 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1146 if (!(chartype
= heap_alloc(uCount
* sizeof(*chartype
))))
1148 WARN("Out of memory\n");
1152 baselevel
= s
->uBidiLevel
;
1154 classify(lpString
, chartype
, uCount
, c
);
1155 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1157 memset(lpOutOverrides
, 0, sizeof(WORD
) * uCount
);
1159 /* resolve explicit */
1160 resolveExplicit(baselevel
, chartype
, lpOutLevels
, lpOutOverrides
, uCount
, s
->fOverrideDirection
);
1161 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1163 /* X10/BD13: Computer Isolating runs */
1164 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1166 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1168 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1171 resolveWeak(iso_run
);
1172 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1174 /* resolve neutrals */
1175 resolveNeutrals(iso_run
);
1176 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1178 list_remove(&iso_run
->entry
);
1182 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1183 /* resolveImplicit */
1184 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1186 /* resolveResolvedLevels*/
1187 classify(lpString
, chartype
, uCount
, c
);
1188 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1190 heap_free(chartype
);
1194 /* reverse cch indexes */
1195 static void reverse(int *pidx
, int cch
)
1199 for (; ich
< --cch
; ich
++)
1202 pidx
[ich
] = pidx
[cch
];
1208 /*------------------------------------------------------------------------
1209 Functions: reorder/reorderLevel
1211 Recursively reorders the display string
1212 "From the highest level down, reverse all characters at that level and
1213 higher, down to the lowest odd level"
1215 Implements rule L2 of the Unicode bidi Algorithm.
1217 Input: Array of embedding levels
1219 Flag enabling reversal (set to false by initial caller)
1221 In/Out: Text to reorder
1223 Note: levels may exceed 15 resp. 61 on input.
1225 Rule L3 - reorder combining marks is not implemented here
1226 Rule L4 - glyph mirroring is implemented as a display option below
1228 Note: this should be applied a line at a time
1229 -------------------------------------------------------------------------*/
1230 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1234 /* true as soon as first odd level encountered */
1235 fReverse
= fReverse
|| odd(level
);
1237 for (; ich
< cch
; ich
++)
1239 if (plevel
[ich
] < level
)
1243 else if (plevel
[ich
] > level
)
1245 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1246 cch
- ich
, fReverse
) - 1;
1251 reverse(pIndexs
, ich
);
1256 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1257 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1262 /* true as soon as first odd level encountered */
1263 fReverse
= fReverse
|| odd(level
);
1265 for (; ich
< cch
; ich
++)
1267 if (plevel
[ich
] < level
)
1269 else if (plevel
[ich
] > level
)
1274 reverse(pIndexs
, ich
);
1280 for (; ich
< cch
; ich
++)
1281 if (plevel
[ich
] < level
)
1283 else if (plevel
[ich
] > level
)
1284 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1285 cch
- ich
, fReverse
) - 1;
1291 BOOL
BIDI_GetStrengths(const WCHAR
*string
, unsigned int count
, const SCRIPT_CONTROL
*c
, WORD
*strength
)
1295 classify(string
, strength
, count
, c
);
1296 for (i
= 0; i
< count
; i
++)
1298 switch (strength
[i
])
1307 strength
[i
] = BIDI_STRONG
;
1316 strength
[i
] = BIDI_WEAK
;
1322 default: /* Neutrals and NSM */
1323 strength
[i
] = BIDI_NEUTRAL
;