2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table
[];
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON
= 0, /* Other Neutral */
86 AN
, /* Arabic Number */
87 EN
, /* European Number */
88 AL
, /* Arabic Letter (Right-to-left) */
89 NSM
, /* Non-spacing Mark */
90 CS
, /* Common Separator */
91 ES
, /* European Separator */
92 ET
, /* European Terminator (post/prefix e.g. $ and %) */
95 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
98 S
, /* Segment Separator (TAB) // used only in L1 */
99 WS
, /* White space // used only in L1 */
100 B
, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO
, /* these are used only in X1-X9 */
109 LRI
, /* Isolate formatting characters new with 6.3 */
114 /* resolved types, also resolved directions */
115 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type
[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
139 "LRI", /* Isolate formatting characters new with 6.3 */
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
151 for (i
= start
; i
< end
; i
++)
152 TRACE(" %s",debug_type
[types
[i
]]);
156 /* Convert the libwine information to the direction enum */
157 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
159 static const enum directions dir_map
[16] =
161 L
, /* unassigned defaults to L */
176 PDF
/* also LRE, LRO, RLE, RLO */
181 for (i
= 0; i
< uCount
; ++i
)
183 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
187 if (!c
->fLegacyBidiClass
) break;
191 case '+': chartype
[i
] = NI
; break;
192 case '/': chartype
[i
] = CS
; break;
198 case 0x202A: chartype
[i
] = LRE
; break;
199 case 0x202B: chartype
[i
] = RLE
; break;
200 case 0x202C: chartype
[i
] = PDF
; break;
201 case 0x202D: chartype
[i
] = LRO
; break;
202 case 0x202E: chartype
[i
] = RLO
; break;
203 case 0x2066: chartype
[i
] = LRI
; break;
204 case 0x2067: chartype
[i
] = RLI
; break;
205 case 0x2068: chartype
[i
] = FSI
; break;
206 case 0x2069: chartype
[i
] = PDI
; break;
213 /* RESOLVE EXPLICIT */
215 static WORD
GreaterEven(int i
)
217 return odd(i
) ? i
+ 1 : i
+ 2;
220 static WORD
GreaterOdd(int i
)
222 return odd(i
) ? i
+ 2 : i
+ 1;
225 static WORD
EmbeddingDirection(int level
)
227 return odd(level
) ? R
: L
;
230 /*------------------------------------------------------------------------
231 Function: resolveExplicit
233 Recursively resolves explicit embedding levels and overrides.
234 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
236 Input: Base embedding level and direction
239 Output: Array of embedding levels
241 In/Out: Array of direction classes
244 Note: The function uses two simple counters to keep track of
245 matching explicit codes and PDF. Use the default argument for
246 the outermost call. The nesting counter counts the recursion
247 depth and not the embedding level.
248 ------------------------------------------------------------------------*/
249 typedef struct tagStackItem
{
255 #define push_stack(l,o,i) \
257 stack[stack_top].level = l; \
258 stack[stack_top].override = o; \
259 stack[stack_top].isolate = i;} while(0)
261 #define pop_stack() do { stack_top++; } while(0)
263 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
265 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
268 int overflow_isolate_count
= 0;
269 int overflow_embedding_count
= 0;
270 int valid_isolate_count
= 0;
273 StackItem stack
[MAX_DEPTH
+2];
274 int stack_top
= MAX_DEPTH
+1;
276 stack
[stack_top
].level
= level
;
277 stack
[stack_top
].override
= NI
;
278 stack
[stack_top
].isolate
= FALSE
;
280 for (i
= 0; i
< count
; i
++)
283 if (pclass
[i
] == RLE
)
285 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
286 poutLevel
[i
] = stack
[stack_top
].level
;
287 if (valid_level(least_odd
))
288 push_stack(least_odd
, NI
, FALSE
);
289 else if (overflow_isolate_count
== 0)
290 overflow_embedding_count
++;
293 else if (pclass
[i
] == LRE
)
295 int least_even
= GreaterEven(stack
[stack_top
].level
);
296 poutLevel
[i
] = stack
[stack_top
].level
;
297 if (valid_level(least_even
))
298 push_stack(least_even
, NI
, FALSE
);
299 else if (overflow_isolate_count
== 0)
300 overflow_embedding_count
++;
303 else if (pclass
[i
] == RLO
)
305 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
306 poutLevel
[i
] = stack
[stack_top
].level
;
307 if (valid_level(least_odd
))
308 push_stack(least_odd
, R
, FALSE
);
309 else if (overflow_isolate_count
== 0)
310 overflow_embedding_count
++;
313 else if (pclass
[i
] == LRO
)
315 int least_even
= GreaterEven(stack
[stack_top
].level
);
316 poutLevel
[i
] = stack
[stack_top
].level
;
317 if (valid_level(least_even
))
318 push_stack(least_even
, L
, FALSE
);
319 else if (overflow_isolate_count
== 0)
320 overflow_embedding_count
++;
323 else if (pclass
[i
] == RLI
)
325 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
326 poutLevel
[i
] = stack
[stack_top
].level
;
327 if (valid_level(least_odd
))
329 valid_isolate_count
++;
330 push_stack(least_odd
, NI
, TRUE
);
333 overflow_isolate_count
++;
336 else if (pclass
[i
] == LRI
)
338 int least_even
= GreaterEven(stack
[stack_top
].level
);
339 poutLevel
[i
] = stack
[stack_top
].level
;
340 if (valid_level(least_even
))
342 valid_isolate_count
++;
343 push_stack(least_even
, NI
, TRUE
);
346 overflow_isolate_count
++;
349 else if (pclass
[i
] == FSI
)
354 poutLevel
[i
] = stack
[stack_top
].level
;
355 for (j
= i
+1; j
< count
; j
++)
357 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
362 else if (pclass
[j
] == PDI
)
371 if (skipping
) continue;
378 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
386 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
387 if (valid_level(least_odd
))
389 valid_isolate_count
++;
390 push_stack(least_odd
, NI
, TRUE
);
393 overflow_isolate_count
++;
397 int least_even
= GreaterEven(stack
[stack_top
].level
);
398 if (valid_level(least_even
))
400 valid_isolate_count
++;
401 push_stack(least_even
, NI
, TRUE
);
404 overflow_isolate_count
++;
408 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
410 poutLevel
[i
] = stack
[stack_top
].level
;
411 if (stack
[stack_top
].override
!= NI
)
412 pclass
[i
] = stack
[stack_top
].override
;
415 else if (pclass
[i
] == PDI
)
417 if (overflow_isolate_count
) overflow_isolate_count
--;
418 else if (!valid_isolate_count
) {/* do nothing */}
421 overflow_embedding_count
= 0;
422 while (!stack
[stack_top
].isolate
) pop_stack();
424 valid_isolate_count
--;
426 poutLevel
[i
] = stack
[stack_top
].level
;
429 else if (pclass
[i
] == PDF
)
431 poutLevel
[i
] = stack
[stack_top
].level
;
432 if (overflow_isolate_count
) {/* do nothing */}
433 else if (overflow_embedding_count
) overflow_embedding_count
--;
434 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
439 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
440 for (i
= 0; i
< count
; i
++)
441 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
445 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
447 if (index
== -1 || index
== back_fence
) return index
;
449 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
453 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
455 if (index
== front_fence
) return index
;
457 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
461 typedef struct tagRun
468 typedef struct tagRunChar
474 typedef struct tagIsolatedRun
485 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
487 if (index
>= (iso_run
->length
-1)) return -1;
489 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
490 if (index
== iso_run
->length
) return -1;
494 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
497 if (index
<= 0) return -1;
499 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
503 static inline int iso_previousChar(IsolatedRun
*iso_run
, int index
)
505 if (index
<= 0) return -1;
509 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
514 for (i
= 0; i
< iso_run
->length
; i
++)
515 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
519 /*------------------------------------------------------------------------
520 Function: resolveWeak
522 Resolves the directionality of numeric and other weak character types
524 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
526 Input: Array of embedding levels
529 In/Out: Array of directional classes
531 Note: On input only these directional classes are expected
532 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
533 ------------------------------------------------------------------------*/
535 static void resolveWeak(IsolatedRun
* iso_run
)
540 for (i
=0; i
< iso_run
->length
; i
++)
542 if (*iso_run
->item
[i
].pcls
== NSM
)
544 int j
= iso_previousValidChar(iso_run
, i
);
546 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
547 else if (*iso_run
->item
[j
].pcls
>= LRI
)
548 *iso_run
->item
[i
].pcls
= ON
;
550 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
555 for (i
= 0; i
< iso_run
->length
; i
++)
557 if (*iso_run
->item
[i
].pcls
== EN
)
559 int j
= iso_previousValidChar(iso_run
, i
);
562 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
564 if (*iso_run
->item
[j
].pcls
== AL
)
565 *iso_run
->item
[i
].pcls
= AN
;
568 j
= iso_previousValidChar(iso_run
, j
);
574 for (i
= 0; i
< iso_run
->length
; i
++)
576 if (*iso_run
->item
[i
].pcls
== AL
)
577 *iso_run
->item
[i
].pcls
= R
;
581 for (i
= 0; i
< iso_run
->length
; i
++)
583 if (*iso_run
->item
[i
].pcls
== ES
)
585 int b
= iso_previousValidChar(iso_run
, i
);
586 int f
= iso_nextValidChar(iso_run
, i
);
588 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
589 *iso_run
->item
[i
].pcls
= EN
;
591 else if (*iso_run
->item
[i
].pcls
== CS
)
593 int b
= iso_previousValidChar(iso_run
, i
);
594 int f
= iso_nextValidChar(iso_run
, i
);
596 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
597 *iso_run
->item
[i
].pcls
= EN
;
598 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
599 *iso_run
->item
[i
].pcls
= AN
;
604 for (i
= 0; i
< iso_run
->length
; i
++)
606 if (*iso_run
->item
[i
].pcls
== ET
)
609 for (j
= i
-1 ; j
> -1; j
--)
611 if (*iso_run
->item
[j
].pcls
== BN
) continue;
612 if (*iso_run
->item
[j
].pcls
== ET
) continue;
613 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
616 if (*iso_run
->item
[i
].pcls
== ET
)
618 for (j
= i
+1; j
< iso_run
->length
; j
++)
620 if (*iso_run
->item
[j
].pcls
== BN
) continue;
621 if (*iso_run
->item
[j
].pcls
== ET
) continue;
622 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
630 for (i
= 0; i
< iso_run
->length
; i
++)
632 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
636 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
637 *iso_run
->item
[b
].pcls
= ON
;
638 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
639 *iso_run
->item
[f
].pcls
= ON
;
641 *iso_run
->item
[i
].pcls
= ON
;
646 for (i
= 0; i
< iso_run
->length
; i
++)
648 if (*iso_run
->item
[i
].pcls
== EN
)
651 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
652 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
654 if (*iso_run
->item
[j
].pcls
== L
)
655 *iso_run
->item
[i
].pcls
= L
;
658 if (iso_run
->sos
== L
&& j
== -1)
659 *iso_run
->item
[i
].pcls
= L
;
664 typedef struct tagBracketPair
670 static int compr(const void *a
, const void* b
)
672 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
675 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
679 int stack_top
= iso_run
->length
;
680 BracketPair
*out
= NULL
;
684 open_stack
= HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR
) * iso_run
->length
);
685 stack_index
= HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run
->length
);
687 for (i
= 0; i
< iso_run
->length
; i
++)
689 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
694 out
= HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair
));
701 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
702 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
703 if (open_stack
[stack_top
] == 0x232A)
704 open_stack
[stack_top
] = 0x3009;
705 stack_index
[stack_top
] = i
;
707 else if ((ubv
>> 8) == 1)
710 if (stack_top
== iso_run
->length
) continue;
711 for (j
= stack_top
; j
< iso_run
->length
; j
++)
713 WCHAR c
= iso_run
->item
[i
].ch
;
714 if (c
== 0x232A) c
= 0x3009;
715 if (c
== open_stack
[j
])
717 out
[pair_count
].start
= stack_index
[j
];
718 out
[pair_count
].end
= i
;
720 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
721 out
[pair_count
].start
= -1;
731 HeapFree(GetProcessHeap(),0,out
);
734 else if (pair_count
> 1)
735 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
737 HeapFree(GetProcessHeap(), 0, open_stack
);
738 HeapFree(GetProcessHeap(), 0, stack_index
);
742 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
744 /*------------------------------------------------------------------------
745 Function: resolveNeutrals
747 Resolves the directionality of neutral character types.
749 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
751 Input: Array of embedding levels
755 In/Out: Array of directional classes
757 Note: On input only these directional classes are expected
758 R, L, NI, AN, EN and BN
760 W8 resolves a number of ENs to L
761 ------------------------------------------------------------------------*/
762 static void resolveNeutrals(IsolatedRun
*iso_run
)
765 BracketPair
*pairs
= NULL
;
767 /* Translate isolates into NI */
768 for (i
= 0; i
< iso_run
->length
; i
++)
770 if (*iso_run
->item
[i
].pcls
>= LRI
)
771 *iso_run
->item
[i
].pcls
= NI
;
773 switch(*iso_run
->item
[i
].pcls
)
777 case WS
: *iso_run
->item
[i
].pcls
= NI
;
780 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
783 /* N0: Skipping bracketed pairs for now */
784 pairs
= computeBracketPairs(iso_run
);
787 BracketPair
*p
= &pairs
[0];
789 while (p
->start
>= 0)
792 int e
= EmbeddingDirection(iso_run
->e
);
793 int o
= EmbeddingDirection(iso_run
->e
+1);
795 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
798 for (j
= p
->start
+1; j
< p
->end
; j
++)
800 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
802 *iso_run
->item
[p
->start
].pcls
= e
;
803 *iso_run
->item
[p
->end
].pcls
= e
;
806 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
810 if (j
== p
->end
&& flag_o
)
812 for (j
= p
->start
; j
>= 0; j
--)
814 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
816 *iso_run
->item
[p
->start
].pcls
= o
;
817 *iso_run
->item
[p
->end
].pcls
= o
;
820 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
822 *iso_run
->item
[p
->start
].pcls
= e
;
823 *iso_run
->item
[p
->end
].pcls
= e
;
829 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
830 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
837 HeapFree(GetProcessHeap(),0,pairs
);
841 for (i
= 0; i
< iso_run
->length
; i
++)
845 if (*iso_run
->item
[i
].pcls
== NI
)
848 int b
= iso_previousValidChar(iso_run
, i
);
857 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
859 else if (*iso_run
->item
[b
].pcls
== L
)
861 else /* No string type */
864 j
= iso_nextValidChar(iso_run
, i
);
865 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
872 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
874 else if (*iso_run
->item
[j
].pcls
== L
)
876 else /* No string type */
881 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
882 *iso_run
->item
[b
].pcls
= r
;
888 for (i
= 0; i
< iso_run
->length
; i
++)
890 if (*iso_run
->item
[i
].pcls
== NI
)
894 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
895 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
896 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
897 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
898 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
903 /*------------------------------------------------------------------------
904 Function: resolveImplicit
906 Recursively resolves implicit embedding levels.
907 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
909 Input: Array of direction classes
913 In/Out: Array of embedding levels
915 Note: levels may exceed 15 on output.
916 Accepted subset of direction classes
918 ------------------------------------------------------------------------*/
919 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
924 for (i
= sos
; i
<= eos
; i
++)
929 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
930 ASSERT(pcls
[i
] < 5); /* "Out of range." */
932 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
934 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
936 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
941 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
946 for (i
= sos
; i
<= eos
; i
++)
948 if (pcls
[i
] == B
|| pcls
[i
] == S
)
951 while (i
> sos
&& j
>= sos
&&
952 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
953 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
954 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
955 plevel
[j
--] = baselevel
;
956 plevel
[i
] = baselevel
;
959 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
960 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
961 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
964 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
965 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
966 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
967 plevel
[j
--] = baselevel
;
972 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, LPCWSTR lpString
, int uCount
, struct list
*set
)
974 int run_start
, run_end
, i
;
977 IsolatedRun
*current_isolated
;
979 runs
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(Run
));
986 while (run_start
< uCount
)
988 run_end
= nextValidChar(pcls
, run_start
, uCount
);
989 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
991 runs
[run_count
].start
= run_start
;
992 runs
[run_count
].end
= run_end
;
993 runs
[run_count
].e
= pLevel
[run_start
];
994 run_start
= nextValidChar(pcls
, run_end
, uCount
);
998 /* Build Isolating Runs */
1000 while (i
< run_count
)
1003 if (runs
[k
].start
>= 0)
1005 int type_fence
, real_end
;
1007 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(RunChar
)*uCount
);
1008 if (!current_isolated
) break;
1010 run_start
= runs
[k
].start
;
1011 current_isolated
->e
= runs
[k
].e
;
1012 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1014 for (j
= 0; j
< current_isolated
->length
; j
++)
1016 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1017 current_isolated
->item
[j
].ch
= lpString
[runs
[k
].start
+j
];
1020 run_end
= runs
[k
].end
;
1022 TRACE("{ [%i -- %i]",run_start
, run_end
);
1024 if (pcls
[run_end
] == BN
)
1025 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1027 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1031 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1032 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1041 int l
= current_isolated
->length
;
1043 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1044 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1046 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1047 current_isolated
->item
[l
].ch
= lpString
[runs
[j
].start
+m
];
1050 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1052 run_end
= runs
[j
].end
;
1053 if (pcls
[run_end
] == BN
)
1054 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1065 type_fence
= previousValidChar(pcls
, run_start
, -1);
1067 if (type_fence
== -1)
1068 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1070 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1072 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1074 if (run_end
== uCount
)
1075 current_isolated
->eos
= current_isolated
->sos
;
1078 /* eos could be an BN */
1079 if ( pcls
[run_end
] == BN
)
1081 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1082 if (real_end
< run_start
)
1083 real_end
= run_start
;
1088 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1089 if (type_fence
== uCount
)
1090 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1092 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1094 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1097 list_add_tail(set
, ¤t_isolated
->entry
);
1098 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1103 HeapFree(GetProcessHeap(), 0, runs
);
1106 /*************************************************************
1107 * BIDI_DeterminLevels
1109 BOOL
BIDI_DetermineLevels(
1110 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
1111 INT uCount
, /* [in] Number of WCHARs in string. */
1112 const SCRIPT_STATE
*s
,
1113 const SCRIPT_CONTROL
*c
,
1114 WORD
*lpOutLevels
/* [out] final string levels */
1118 unsigned baselevel
= 0;
1119 struct list IsolatingRuns
;
1120 IsolatedRun
*iso_run
, *next
;
1122 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1124 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
1127 WARN("Out of memory\n");
1131 baselevel
= s
->uBidiLevel
;
1133 classify(lpString
, chartype
, uCount
, c
);
1134 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1136 /* resolve explicit */
1137 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
1138 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1140 /* X10/BD13: Computer Isolating runs */
1141 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1143 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1145 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1148 resolveWeak(iso_run
);
1149 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1151 /* resolve neutrals */
1152 resolveNeutrals(iso_run
);
1153 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1155 list_remove(&iso_run
->entry
);
1156 HeapFree(GetProcessHeap(),0,iso_run
);
1159 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1160 /* resolveImplicit */
1161 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1163 /* resolveResolvedLevels*/
1164 classify(lpString
, chartype
, uCount
, c
);
1165 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1167 HeapFree(GetProcessHeap(), 0, chartype
);
1171 /* reverse cch indexes */
1172 static void reverse(int *pidx
, int cch
)
1176 for (; ich
< --cch
; ich
++)
1179 pidx
[ich
] = pidx
[cch
];
1185 /*------------------------------------------------------------------------
1186 Functions: reorder/reorderLevel
1188 Recursively reorders the display string
1189 "From the highest level down, reverse all characters at that level and
1190 higher, down to the lowest odd level"
1192 Implements rule L2 of the Unicode bidi Algorithm.
1194 Input: Array of embedding levels
1196 Flag enabling reversal (set to false by initial caller)
1198 In/Out: Text to reorder
1200 Note: levels may exceed 15 resp. 61 on input.
1202 Rule L3 - reorder combining marks is not implemented here
1203 Rule L4 - glyph mirroring is implemented as a display option below
1205 Note: this should be applied a line at a time
1206 -------------------------------------------------------------------------*/
1207 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1211 /* true as soon as first odd level encountered */
1212 fReverse
= fReverse
|| odd(level
);
1214 for (; ich
< cch
; ich
++)
1216 if (plevel
[ich
] < level
)
1220 else if (plevel
[ich
] > level
)
1222 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1223 cch
- ich
, fReverse
) - 1;
1228 reverse(pIndexs
, ich
);
1233 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1234 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1239 /* true as soon as first odd level encountered */
1240 fReverse
= fReverse
|| odd(level
);
1242 for (; ich
< cch
; ich
++)
1244 if (plevel
[ich
] < level
)
1246 else if (plevel
[ich
] > level
)
1251 reverse(pIndexs
, ich
);
1257 for (; ich
< cch
; ich
++)
1258 if (plevel
[ich
] < level
)
1260 else if (plevel
[ich
] > level
)
1261 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1262 cch
- ich
, fReverse
) - 1;
1268 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1272 classify(lpString
, lpStrength
, uCount
, c
);
1274 for ( i
= 0; i
< uCount
; i
++)
1276 switch(lpStrength
[i
])
1285 lpStrength
[i
] = BIDI_STRONG
;
1294 lpStrength
[i
] = BIDI_WEAK
;
1300 default: /* Neutrals and NSM */
1301 lpStrength
[i
] = BIDI_NEUTRAL
;