2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table
[] DECLSPEC_HIDDEN
;
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON
= 0, /* Other Neutral */
86 AN
, /* Arabic Number */
87 EN
, /* European Number */
88 AL
, /* Arabic Letter (Right-to-left) */
89 NSM
, /* Non-spacing Mark */
90 CS
, /* Common Separator */
91 ES
, /* European Separator */
92 ET
, /* European Terminator (post/prefix e.g. $ and %) */
95 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
98 S
, /* Segment Separator (TAB) // used only in L1 */
99 WS
, /* White space // used only in L1 */
100 B
, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO
, /* these are used only in X1-X9 */
109 LRI
, /* Isolate formatting characters new with 6.3 */
114 /* resolved types, also resolved directions */
115 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type
[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
139 "LRI", /* Isolate formatting characters new with 6.3 */
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
151 for (i
= start
; i
< end
&& len
< 200; i
++)
153 TRACE(" %s",debug_type
[types
[i
]]);
154 len
+= strlen(debug_type
[types
[i
]])+1;
161 /* Convert the libwine information to the direction enum */
162 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
164 static const enum directions dir_map
[16] =
166 L
, /* unassigned defaults to L */
181 PDF
/* also LRE, LRO, RLE, RLO */
186 for (i
= 0; i
< uCount
; ++i
)
188 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
192 if (!c
->fLegacyBidiClass
) break;
196 case '+': chartype
[i
] = NI
; break;
197 case '/': chartype
[i
] = CS
; break;
203 case 0x202A: chartype
[i
] = LRE
; break;
204 case 0x202B: chartype
[i
] = RLE
; break;
205 case 0x202C: chartype
[i
] = PDF
; break;
206 case 0x202D: chartype
[i
] = LRO
; break;
207 case 0x202E: chartype
[i
] = RLO
; break;
208 case 0x2066: chartype
[i
] = LRI
; break;
209 case 0x2067: chartype
[i
] = RLI
; break;
210 case 0x2068: chartype
[i
] = FSI
; break;
211 case 0x2069: chartype
[i
] = PDI
; break;
218 /* RESOLVE EXPLICIT */
220 static WORD
GreaterEven(int i
)
222 return odd(i
) ? i
+ 1 : i
+ 2;
225 static WORD
GreaterOdd(int i
)
227 return odd(i
) ? i
+ 2 : i
+ 1;
230 static WORD
EmbeddingDirection(int level
)
232 return odd(level
) ? R
: L
;
235 /*------------------------------------------------------------------------
236 Function: resolveExplicit
238 Recursively resolves explicit embedding levels and overrides.
239 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 Input: Base embedding level and direction
244 Output: Array of embedding levels
246 In/Out: Array of direction classes
249 Note: The function uses two simple counters to keep track of
250 matching explicit codes and PDF. Use the default argument for
251 the outermost call. The nesting counter counts the recursion
252 depth and not the embedding level.
253 ------------------------------------------------------------------------*/
254 typedef struct tagStackItem
{
260 #define push_stack(l,o,i) \
262 stack[stack_top].level = l; \
263 stack[stack_top].override = o; \
264 stack[stack_top].isolate = i;} while(0)
266 #define pop_stack() do { stack_top++; } while(0)
268 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, WORD
*poutOverrides
, int count
, BOOL initialOverride
)
273 int overflow_isolate_count
= 0;
274 int overflow_embedding_count
= 0;
275 int valid_isolate_count
= 0;
278 StackItem stack
[MAX_DEPTH
+2];
279 int stack_top
= MAX_DEPTH
+1;
281 stack
[stack_top
].level
= level
;
282 stack
[stack_top
].override
= NI
;
283 stack
[stack_top
].isolate
= FALSE
;
288 push_stack(level
, R
, FALSE
);
290 push_stack(level
, L
, FALSE
);
293 for (i
= 0; i
< count
; i
++)
295 poutOverrides
[i
] = stack
[stack_top
].override
;
298 if (pclass
[i
] == RLE
)
300 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
301 poutLevel
[i
] = stack
[stack_top
].level
;
302 if (valid_level(least_odd
))
303 push_stack(least_odd
, NI
, FALSE
);
304 else if (overflow_isolate_count
== 0)
305 overflow_embedding_count
++;
308 else if (pclass
[i
] == LRE
)
310 int least_even
= GreaterEven(stack
[stack_top
].level
);
311 poutLevel
[i
] = stack
[stack_top
].level
;
312 if (valid_level(least_even
))
313 push_stack(least_even
, NI
, FALSE
);
314 else if (overflow_isolate_count
== 0)
315 overflow_embedding_count
++;
318 else if (pclass
[i
] == RLO
)
320 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
321 poutLevel
[i
] = stack
[stack_top
].level
;
322 if (valid_level(least_odd
))
323 push_stack(least_odd
, R
, FALSE
);
324 else if (overflow_isolate_count
== 0)
325 overflow_embedding_count
++;
328 else if (pclass
[i
] == LRO
)
330 int least_even
= GreaterEven(stack
[stack_top
].level
);
331 poutLevel
[i
] = stack
[stack_top
].level
;
332 if (valid_level(least_even
))
333 push_stack(least_even
, L
, FALSE
);
334 else if (overflow_isolate_count
== 0)
335 overflow_embedding_count
++;
338 else if (pclass
[i
] == RLI
)
340 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
341 poutLevel
[i
] = stack
[stack_top
].level
;
342 if (valid_level(least_odd
))
344 valid_isolate_count
++;
345 push_stack(least_odd
, NI
, TRUE
);
348 overflow_isolate_count
++;
351 else if (pclass
[i
] == LRI
)
353 int least_even
= GreaterEven(stack
[stack_top
].level
);
354 poutLevel
[i
] = stack
[stack_top
].level
;
355 if (valid_level(least_even
))
357 valid_isolate_count
++;
358 push_stack(least_even
, NI
, TRUE
);
361 overflow_isolate_count
++;
364 else if (pclass
[i
] == FSI
)
369 poutLevel
[i
] = stack
[stack_top
].level
;
370 for (j
= i
+1; j
< count
; j
++)
372 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
377 else if (pclass
[j
] == PDI
)
386 if (skipping
) continue;
393 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
401 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
402 if (valid_level(least_odd
))
404 valid_isolate_count
++;
405 push_stack(least_odd
, NI
, TRUE
);
408 overflow_isolate_count
++;
412 int least_even
= GreaterEven(stack
[stack_top
].level
);
413 if (valid_level(least_even
))
415 valid_isolate_count
++;
416 push_stack(least_even
, NI
, TRUE
);
419 overflow_isolate_count
++;
423 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
425 poutLevel
[i
] = stack
[stack_top
].level
;
426 if (stack
[stack_top
].override
!= NI
)
427 pclass
[i
] = stack
[stack_top
].override
;
430 else if (pclass
[i
] == PDI
)
432 if (overflow_isolate_count
) overflow_isolate_count
--;
433 else if (!valid_isolate_count
) {/* do nothing */}
436 overflow_embedding_count
= 0;
437 while (!stack
[stack_top
].isolate
) pop_stack();
439 valid_isolate_count
--;
441 poutLevel
[i
] = stack
[stack_top
].level
;
444 else if (pclass
[i
] == PDF
)
446 poutLevel
[i
] = stack
[stack_top
].level
;
447 if (overflow_isolate_count
) {/* do nothing */}
448 else if (overflow_embedding_count
) overflow_embedding_count
--;
449 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
454 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
455 for (i
= 0; i
< count
; i
++)
456 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
460 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
462 if (index
== -1 || index
== back_fence
) return index
;
464 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
468 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
470 if (index
== front_fence
) return index
;
472 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
476 typedef struct tagRun
483 typedef struct tagRunChar
489 typedef struct tagIsolatedRun
500 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
502 if (index
>= (iso_run
->length
-1)) return -1;
504 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
505 if (index
== iso_run
->length
) return -1;
509 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
512 if (index
<= 0) return -1;
514 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
518 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
523 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
525 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
526 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
528 if (i
!= iso_run
->length
)
533 /*------------------------------------------------------------------------
534 Function: resolveWeak
536 Resolves the directionality of numeric and other weak character types
538 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
540 Input: Array of embedding levels
543 In/Out: Array of directional classes
545 Note: On input only these directional classes are expected
546 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
547 ------------------------------------------------------------------------*/
549 static void resolveWeak(IsolatedRun
* iso_run
)
554 for (i
=0; i
< iso_run
->length
; i
++)
556 if (*iso_run
->item
[i
].pcls
== NSM
)
558 int j
= iso_previousValidChar(iso_run
, i
);
560 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
561 else if (*iso_run
->item
[j
].pcls
>= LRI
)
562 *iso_run
->item
[i
].pcls
= ON
;
564 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
569 for (i
= 0; i
< iso_run
->length
; i
++)
571 if (*iso_run
->item
[i
].pcls
== EN
)
573 int j
= iso_previousValidChar(iso_run
, i
);
576 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
578 if (*iso_run
->item
[j
].pcls
== AL
)
579 *iso_run
->item
[i
].pcls
= AN
;
582 j
= iso_previousValidChar(iso_run
, j
);
588 for (i
= 0; i
< iso_run
->length
; i
++)
590 if (*iso_run
->item
[i
].pcls
== AL
)
591 *iso_run
->item
[i
].pcls
= R
;
595 for (i
= 0; i
< iso_run
->length
; i
++)
597 if (*iso_run
->item
[i
].pcls
== ES
)
599 int b
= iso_previousValidChar(iso_run
, i
);
600 int f
= iso_nextValidChar(iso_run
, i
);
602 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
603 *iso_run
->item
[i
].pcls
= EN
;
605 else if (*iso_run
->item
[i
].pcls
== CS
)
607 int b
= iso_previousValidChar(iso_run
, i
);
608 int f
= iso_nextValidChar(iso_run
, i
);
610 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
611 *iso_run
->item
[i
].pcls
= EN
;
612 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
613 *iso_run
->item
[i
].pcls
= AN
;
618 for (i
= 0; i
< iso_run
->length
; i
++)
620 if (*iso_run
->item
[i
].pcls
== ET
)
623 for (j
= i
-1 ; j
> -1; j
--)
625 if (*iso_run
->item
[j
].pcls
== BN
) continue;
626 if (*iso_run
->item
[j
].pcls
== ET
) continue;
627 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
630 if (*iso_run
->item
[i
].pcls
== ET
)
632 for (j
= i
+1; j
< iso_run
->length
; j
++)
634 if (*iso_run
->item
[j
].pcls
== BN
) continue;
635 if (*iso_run
->item
[j
].pcls
== ET
) continue;
636 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
644 for (i
= 0; i
< iso_run
->length
; i
++)
646 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
650 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
651 *iso_run
->item
[b
].pcls
= ON
;
652 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
653 *iso_run
->item
[f
].pcls
= ON
;
655 *iso_run
->item
[i
].pcls
= ON
;
660 for (i
= 0; i
< iso_run
->length
; i
++)
662 if (*iso_run
->item
[i
].pcls
== EN
)
665 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
666 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
668 if (*iso_run
->item
[j
].pcls
== L
)
669 *iso_run
->item
[i
].pcls
= L
;
672 if (iso_run
->sos
== L
&& j
== -1)
673 *iso_run
->item
[i
].pcls
= L
;
678 typedef struct tagBracketPair
684 static int compr(const void *a
, const void* b
)
686 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
689 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
693 int stack_top
= iso_run
->length
;
694 BracketPair
*out
= NULL
;
698 open_stack
= heap_alloc(iso_run
->length
* sizeof(*open_stack
));
699 stack_index
= heap_alloc(iso_run
->length
* sizeof(*stack_index
));
701 for (i
= 0; i
< iso_run
->length
; i
++)
703 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
708 out
= heap_alloc(sizeof(*out
));
715 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
716 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
717 if (open_stack
[stack_top
] == 0x232A)
718 open_stack
[stack_top
] = 0x3009;
719 stack_index
[stack_top
] = i
;
721 else if ((ubv
>> 8) == 1)
724 if (stack_top
== iso_run
->length
) continue;
725 for (j
= stack_top
; j
< iso_run
->length
; j
++)
727 WCHAR c
= iso_run
->item
[i
].ch
;
728 if (c
== 0x232A) c
= 0x3009;
729 if (c
== open_stack
[j
])
731 out
[pair_count
].start
= stack_index
[j
];
732 out
[pair_count
].end
= i
;
734 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
735 out
[pair_count
].start
= -1;
748 else if (pair_count
> 1)
749 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
751 heap_free(open_stack
);
752 heap_free(stack_index
);
756 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
758 /*------------------------------------------------------------------------
759 Function: resolveNeutrals
761 Resolves the directionality of neutral character types.
763 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
765 Input: Array of embedding levels
769 In/Out: Array of directional classes
771 Note: On input only these directional classes are expected
772 R, L, NI, AN, EN and BN
774 W8 resolves a number of ENs to L
775 ------------------------------------------------------------------------*/
776 static void resolveNeutrals(IsolatedRun
*iso_run
)
779 BracketPair
*pairs
= NULL
;
781 /* Translate isolates into NI */
782 for (i
= 0; i
< iso_run
->length
; i
++)
784 if (*iso_run
->item
[i
].pcls
>= LRI
)
785 *iso_run
->item
[i
].pcls
= NI
;
787 switch(*iso_run
->item
[i
].pcls
)
791 case WS
: *iso_run
->item
[i
].pcls
= NI
;
794 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
797 /* N0: Skipping bracketed pairs for now */
798 pairs
= computeBracketPairs(iso_run
);
801 BracketPair
*p
= &pairs
[0];
803 while (p
->start
>= 0)
806 int e
= EmbeddingDirection(iso_run
->e
);
807 int o
= EmbeddingDirection(iso_run
->e
+1);
809 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
812 for (j
= p
->start
+1; j
< p
->end
; j
++)
814 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
816 *iso_run
->item
[p
->start
].pcls
= e
;
817 *iso_run
->item
[p
->end
].pcls
= e
;
820 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
824 if (j
== p
->end
&& flag_o
)
826 for (j
= p
->start
; j
>= 0; j
--)
828 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
830 *iso_run
->item
[p
->start
].pcls
= o
;
831 *iso_run
->item
[p
->end
].pcls
= o
;
834 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
836 *iso_run
->item
[p
->start
].pcls
= e
;
837 *iso_run
->item
[p
->end
].pcls
= e
;
843 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
844 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
855 for (i
= 0; i
< iso_run
->length
; i
++)
859 if (*iso_run
->item
[i
].pcls
== NI
)
862 int b
= iso_previousValidChar(iso_run
, i
);
871 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
873 else if (*iso_run
->item
[b
].pcls
== L
)
875 else /* No string type */
878 j
= iso_nextValidChar(iso_run
, i
);
879 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
886 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
888 else if (*iso_run
->item
[j
].pcls
== L
)
890 else /* No string type */
895 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
896 *iso_run
->item
[b
].pcls
= r
;
902 for (i
= 0; i
< iso_run
->length
; i
++)
904 if (*iso_run
->item
[i
].pcls
== NI
)
908 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
909 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
910 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
911 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
912 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
917 /*------------------------------------------------------------------------
918 Function: resolveImplicit
920 Recursively resolves implicit embedding levels.
921 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
923 Input: Array of direction classes
927 In/Out: Array of embedding levels
929 Note: levels may exceed 15 on output.
930 Accepted subset of direction classes
932 ------------------------------------------------------------------------*/
933 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
938 for (i
= sos
; i
<= eos
; i
++)
943 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
944 ASSERT(pcls
[i
] < 5); /* "Out of range." */
946 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
948 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
950 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
955 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
960 for (i
= sos
; i
<= eos
; i
++)
962 if (pcls
[i
] == B
|| pcls
[i
] == S
)
965 while (i
> sos
&& j
>= sos
&&
966 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
967 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
968 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
969 plevel
[j
--] = baselevel
;
970 plevel
[i
] = baselevel
;
972 else if (pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
|| pcls
[i
] == RLO
||
973 pcls
[i
] == PDF
|| pcls
[i
] == BN
)
975 plevel
[i
] = i
? plevel
[i
- 1] : baselevel
;
978 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
979 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
980 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
983 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
984 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
985 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
986 plevel
[j
--] = baselevel
;
991 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, LPCWSTR lpString
, int uCount
, struct list
*set
)
993 int run_start
, run_end
, i
;
996 IsolatedRun
*current_isolated
;
998 if (!(runs
= heap_alloc(uCount
* sizeof(*runs
))))
1005 while (run_start
< uCount
)
1007 run_end
= nextValidChar(pcls
, run_start
, uCount
);
1008 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
1010 runs
[run_count
].start
= run_start
;
1011 runs
[run_count
].end
= run_end
;
1012 runs
[run_count
].e
= pLevel
[run_start
];
1013 run_start
= nextValidChar(pcls
, run_end
, uCount
);
1017 /* Build Isolating Runs */
1019 while (i
< run_count
)
1022 if (runs
[k
].start
>= 0)
1024 int type_fence
, real_end
;
1027 if (!(current_isolated
= heap_alloc(FIELD_OFFSET(IsolatedRun
, item
[uCount
]))))
1030 run_start
= runs
[k
].start
;
1031 current_isolated
->e
= runs
[k
].e
;
1032 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1034 for (j
= 0; j
< current_isolated
->length
; j
++)
1036 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1037 current_isolated
->item
[j
].ch
= lpString
[runs
[k
].start
+j
];
1040 run_end
= runs
[k
].end
;
1042 TRACE("{ [%i -- %i]",run_start
, run_end
);
1044 if (pcls
[run_end
] == BN
)
1045 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1047 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1051 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1052 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1061 int l
= current_isolated
->length
;
1063 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1064 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1066 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1067 current_isolated
->item
[l
].ch
= lpString
[runs
[j
].start
+m
];
1070 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1072 run_end
= runs
[j
].end
;
1073 if (pcls
[run_end
] == BN
)
1074 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1085 type_fence
= previousValidChar(pcls
, run_start
, -1);
1087 if (type_fence
== -1)
1088 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1090 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1092 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1094 if (run_end
== uCount
)
1095 current_isolated
->eos
= current_isolated
->sos
;
1098 /* eos could be an BN */
1099 if ( pcls
[run_end
] == BN
)
1101 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1102 if (real_end
< run_start
)
1103 real_end
= run_start
;
1108 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1109 if (type_fence
== uCount
)
1110 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1112 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1114 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1117 list_add_tail(set
, ¤t_isolated
->entry
);
1118 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1126 /*************************************************************
1127 * BIDI_DeterminLevels
1129 BOOL
BIDI_DetermineLevels(
1130 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
1131 INT uCount
, /* [in] Number of WCHARs in string. */
1132 const SCRIPT_STATE
*s
,
1133 const SCRIPT_CONTROL
*c
,
1134 WORD
*lpOutLevels
, /* [out] final string levels */
1135 WORD
*lpOutOverrides
/* [out] final string overrides */
1139 unsigned baselevel
= 0;
1140 struct list IsolatingRuns
;
1141 IsolatedRun
*iso_run
, *next
;
1143 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1145 if (!(chartype
= heap_alloc(uCount
* sizeof(*chartype
))))
1147 WARN("Out of memory\n");
1151 baselevel
= s
->uBidiLevel
;
1153 classify(lpString
, chartype
, uCount
, c
);
1154 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1156 memset(lpOutOverrides
, 0, sizeof(WORD
) * uCount
);
1158 /* resolve explicit */
1159 resolveExplicit(baselevel
, chartype
, lpOutLevels
, lpOutOverrides
, uCount
, s
->fOverrideDirection
);
1160 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1162 /* X10/BD13: Computer Isolating runs */
1163 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1165 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1167 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1170 resolveWeak(iso_run
);
1171 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1173 /* resolve neutrals */
1174 resolveNeutrals(iso_run
);
1175 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1177 list_remove(&iso_run
->entry
);
1181 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1182 /* resolveImplicit */
1183 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1185 /* resolveResolvedLevels*/
1186 classify(lpString
, chartype
, uCount
, c
);
1187 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1189 heap_free(chartype
);
1193 /* reverse cch indexes */
1194 static void reverse(int *pidx
, int cch
)
1198 for (; ich
< --cch
; ich
++)
1201 pidx
[ich
] = pidx
[cch
];
1207 /*------------------------------------------------------------------------
1208 Functions: reorder/reorderLevel
1210 Recursively reorders the display string
1211 "From the highest level down, reverse all characters at that level and
1212 higher, down to the lowest odd level"
1214 Implements rule L2 of the Unicode bidi Algorithm.
1216 Input: Array of embedding levels
1218 Flag enabling reversal (set to false by initial caller)
1220 In/Out: Text to reorder
1222 Note: levels may exceed 15 resp. 61 on input.
1224 Rule L3 - reorder combining marks is not implemented here
1225 Rule L4 - glyph mirroring is implemented as a display option below
1227 Note: this should be applied a line at a time
1228 -------------------------------------------------------------------------*/
1229 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1233 /* true as soon as first odd level encountered */
1234 fReverse
= fReverse
|| odd(level
);
1236 for (; ich
< cch
; ich
++)
1238 if (plevel
[ich
] < level
)
1242 else if (plevel
[ich
] > level
)
1244 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1245 cch
- ich
, fReverse
) - 1;
1250 reverse(pIndexs
, ich
);
1255 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1256 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1261 /* true as soon as first odd level encountered */
1262 fReverse
= fReverse
|| odd(level
);
1264 for (; ich
< cch
; ich
++)
1266 if (plevel
[ich
] < level
)
1268 else if (plevel
[ich
] > level
)
1273 reverse(pIndexs
, ich
);
1279 for (; ich
< cch
; ich
++)
1280 if (plevel
[ich
] < level
)
1282 else if (plevel
[ich
] > level
)
1283 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1284 cch
- ich
, fReverse
) - 1;
1290 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1294 classify(lpString
, lpStrength
, uCount
, c
);
1296 for ( i
= 0; i
< uCount
; i
++)
1298 switch(lpStrength
[i
])
1307 lpStrength
[i
] = BIDI_STRONG
;
1316 lpStrength
[i
] = BIDI_WEAK
;
1322 default: /* Neutrals and NSM */
1323 lpStrength
[i
] = BIDI_NEUTRAL
;