2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/list.h"
57 #include "usp10_internal.h"
59 extern const unsigned short bidi_bracket_table
[];
61 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
63 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
66 /* HELPER FUNCTIONS AND DECLARATIONS */
68 /*------------------------------------------------------------------------
69 Bidirectional Character Types
71 as defined by the Unicode Bidirectional Algorithm Table 3-7.
75 The list of bidirectional character types here is not grouped the
76 same way as the table 3-7, since the numberic values for the types
77 are chosen to keep the state and action tables compact.
78 ------------------------------------------------------------------------*/
82 /* ON MUST be zero, code relies on ON = NI = 0 */
83 ON
= 0, /* Other Neutral */
86 AN
, /* Arabic Number */
87 EN
, /* European Number */
88 AL
, /* Arabic Letter (Right-to-left) */
89 NSM
, /* Non-spacing Mark */
90 CS
, /* Common Separator */
91 ES
, /* European Separator */
92 ET
, /* European Terminator (post/prefix e.g. $ and %) */
95 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
98 S
, /* Segment Separator (TAB) // used only in L1 */
99 WS
, /* White space // used only in L1 */
100 B
, /* Paragraph Separator (aka as PS) */
102 /* types for explicit controls */
103 RLO
, /* these are used only in X1-X9 */
109 LRI
, /* Isolate formatting characters new with 6.3 */
114 /* resolved types, also resolved directions */
115 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
118 static const char debug_type
[][4] =
120 "ON", /* Other Neutral */
121 "L", /* Left Letter */
122 "R", /* Right Letter */
123 "AN", /* Arabic Number */
124 "EN", /* European Number */
125 "AL", /* Arabic Letter (Right-to-left) */
126 "NSM", /* Non-spacing Mark */
127 "CS", /* Common Separator */
128 "ES", /* European Separator */
129 "ET", /* European Terminator (post/prefix e.g. $ and %) */
130 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
131 "S", /* Segment Separator (TAB) // used only in L1 */
132 "WS", /* White space // used only in L1 */
133 "B", /* Paragraph Separator (aka as PS) */
134 "RLO", /* these are used only in X1-X9 */
139 "LRI", /* Isolate formatting characters new with 6.3 */
145 /* HELPER FUNCTIONS */
147 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
151 for (i
= start
; i
< end
&& len
< 200; i
++)
153 TRACE(" %s",debug_type
[types
[i
]]);
154 len
+= strlen(debug_type
[types
[i
]])+1;
161 /* Convert the libwine information to the direction enum */
162 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
164 static const enum directions dir_map
[16] =
166 L
, /* unassigned defaults to L */
181 PDF
/* also LRE, LRO, RLE, RLO */
186 for (i
= 0; i
< uCount
; ++i
)
188 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
192 if (!c
->fLegacyBidiClass
) break;
196 case '+': chartype
[i
] = NI
; break;
197 case '/': chartype
[i
] = CS
; break;
203 case 0x202A: chartype
[i
] = LRE
; break;
204 case 0x202B: chartype
[i
] = RLE
; break;
205 case 0x202C: chartype
[i
] = PDF
; break;
206 case 0x202D: chartype
[i
] = LRO
; break;
207 case 0x202E: chartype
[i
] = RLO
; break;
208 case 0x2066: chartype
[i
] = LRI
; break;
209 case 0x2067: chartype
[i
] = RLI
; break;
210 case 0x2068: chartype
[i
] = FSI
; break;
211 case 0x2069: chartype
[i
] = PDI
; break;
218 /* RESOLVE EXPLICIT */
220 static WORD
GreaterEven(int i
)
222 return odd(i
) ? i
+ 1 : i
+ 2;
225 static WORD
GreaterOdd(int i
)
227 return odd(i
) ? i
+ 2 : i
+ 1;
230 static WORD
EmbeddingDirection(int level
)
232 return odd(level
) ? R
: L
;
235 /*------------------------------------------------------------------------
236 Function: resolveExplicit
238 Recursively resolves explicit embedding levels and overrides.
239 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 Input: Base embedding level and direction
244 Output: Array of embedding levels
246 In/Out: Array of direction classes
249 Note: The function uses two simple counters to keep track of
250 matching explicit codes and PDF. Use the default argument for
251 the outermost call. The nesting counter counts the recursion
252 depth and not the embedding level.
253 ------------------------------------------------------------------------*/
254 typedef struct tagStackItem
{
260 #define push_stack(l,o,i) \
262 stack[stack_top].level = l; \
263 stack[stack_top].override = o; \
264 stack[stack_top].isolate = i;} while(0)
266 #define pop_stack() do { stack_top++; } while(0)
268 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
273 int overflow_isolate_count
= 0;
274 int overflow_embedding_count
= 0;
275 int valid_isolate_count
= 0;
278 StackItem stack
[MAX_DEPTH
+2];
279 int stack_top
= MAX_DEPTH
+1;
281 stack
[stack_top
].level
= level
;
282 stack
[stack_top
].override
= NI
;
283 stack
[stack_top
].isolate
= FALSE
;
285 for (i
= 0; i
< count
; i
++)
288 if (pclass
[i
] == RLE
)
290 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
291 poutLevel
[i
] = stack
[stack_top
].level
;
292 if (valid_level(least_odd
))
293 push_stack(least_odd
, NI
, FALSE
);
294 else if (overflow_isolate_count
== 0)
295 overflow_embedding_count
++;
298 else if (pclass
[i
] == LRE
)
300 int least_even
= GreaterEven(stack
[stack_top
].level
);
301 poutLevel
[i
] = stack
[stack_top
].level
;
302 if (valid_level(least_even
))
303 push_stack(least_even
, NI
, FALSE
);
304 else if (overflow_isolate_count
== 0)
305 overflow_embedding_count
++;
308 else if (pclass
[i
] == RLO
)
310 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
311 poutLevel
[i
] = stack
[stack_top
].level
;
312 if (valid_level(least_odd
))
313 push_stack(least_odd
, R
, FALSE
);
314 else if (overflow_isolate_count
== 0)
315 overflow_embedding_count
++;
318 else if (pclass
[i
] == LRO
)
320 int least_even
= GreaterEven(stack
[stack_top
].level
);
321 poutLevel
[i
] = stack
[stack_top
].level
;
322 if (valid_level(least_even
))
323 push_stack(least_even
, L
, FALSE
);
324 else if (overflow_isolate_count
== 0)
325 overflow_embedding_count
++;
328 else if (pclass
[i
] == RLI
)
330 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
331 poutLevel
[i
] = stack
[stack_top
].level
;
332 if (valid_level(least_odd
))
334 valid_isolate_count
++;
335 push_stack(least_odd
, NI
, TRUE
);
338 overflow_isolate_count
++;
341 else if (pclass
[i
] == LRI
)
343 int least_even
= GreaterEven(stack
[stack_top
].level
);
344 poutLevel
[i
] = stack
[stack_top
].level
;
345 if (valid_level(least_even
))
347 valid_isolate_count
++;
348 push_stack(least_even
, NI
, TRUE
);
351 overflow_isolate_count
++;
354 else if (pclass
[i
] == FSI
)
359 poutLevel
[i
] = stack
[stack_top
].level
;
360 for (j
= i
+1; j
< count
; j
++)
362 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
367 else if (pclass
[j
] == PDI
)
376 if (skipping
) continue;
383 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
391 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
392 if (valid_level(least_odd
))
394 valid_isolate_count
++;
395 push_stack(least_odd
, NI
, TRUE
);
398 overflow_isolate_count
++;
402 int least_even
= GreaterEven(stack
[stack_top
].level
);
403 if (valid_level(least_even
))
405 valid_isolate_count
++;
406 push_stack(least_even
, NI
, TRUE
);
409 overflow_isolate_count
++;
413 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
415 poutLevel
[i
] = stack
[stack_top
].level
;
416 if (stack
[stack_top
].override
!= NI
)
417 pclass
[i
] = stack
[stack_top
].override
;
420 else if (pclass
[i
] == PDI
)
422 if (overflow_isolate_count
) overflow_isolate_count
--;
423 else if (!valid_isolate_count
) {/* do nothing */}
426 overflow_embedding_count
= 0;
427 while (!stack
[stack_top
].isolate
) pop_stack();
429 valid_isolate_count
--;
431 poutLevel
[i
] = stack
[stack_top
].level
;
434 else if (pclass
[i
] == PDF
)
436 poutLevel
[i
] = stack
[stack_top
].level
;
437 if (overflow_isolate_count
) {/* do nothing */}
438 else if (overflow_embedding_count
) overflow_embedding_count
--;
439 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
444 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
445 for (i
= 0; i
< count
; i
++)
446 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
450 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
452 if (index
== -1 || index
== back_fence
) return index
;
454 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
458 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
460 if (index
== front_fence
) return index
;
462 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
466 typedef struct tagRun
473 typedef struct tagRunChar
479 typedef struct tagIsolatedRun
490 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
492 if (index
>= (iso_run
->length
-1)) return -1;
494 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
495 if (index
== iso_run
->length
) return -1;
499 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
502 if (index
<= 0) return -1;
504 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
508 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
513 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
515 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
516 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
518 if (i
!= iso_run
->length
)
523 /*------------------------------------------------------------------------
524 Function: resolveWeak
526 Resolves the directionality of numeric and other weak character types
528 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
530 Input: Array of embedding levels
533 In/Out: Array of directional classes
535 Note: On input only these directional classes are expected
536 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
537 ------------------------------------------------------------------------*/
539 static void resolveWeak(IsolatedRun
* iso_run
)
544 for (i
=0; i
< iso_run
->length
; i
++)
546 if (*iso_run
->item
[i
].pcls
== NSM
)
548 int j
= iso_previousValidChar(iso_run
, i
);
550 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
551 else if (*iso_run
->item
[j
].pcls
>= LRI
)
552 *iso_run
->item
[i
].pcls
= ON
;
554 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
559 for (i
= 0; i
< iso_run
->length
; i
++)
561 if (*iso_run
->item
[i
].pcls
== EN
)
563 int j
= iso_previousValidChar(iso_run
, i
);
566 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
568 if (*iso_run
->item
[j
].pcls
== AL
)
569 *iso_run
->item
[i
].pcls
= AN
;
572 j
= iso_previousValidChar(iso_run
, j
);
578 for (i
= 0; i
< iso_run
->length
; i
++)
580 if (*iso_run
->item
[i
].pcls
== AL
)
581 *iso_run
->item
[i
].pcls
= R
;
585 for (i
= 0; i
< iso_run
->length
; i
++)
587 if (*iso_run
->item
[i
].pcls
== ES
)
589 int b
= iso_previousValidChar(iso_run
, i
);
590 int f
= iso_nextValidChar(iso_run
, i
);
592 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
593 *iso_run
->item
[i
].pcls
= EN
;
595 else if (*iso_run
->item
[i
].pcls
== CS
)
597 int b
= iso_previousValidChar(iso_run
, i
);
598 int f
= iso_nextValidChar(iso_run
, i
);
600 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
601 *iso_run
->item
[i
].pcls
= EN
;
602 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
603 *iso_run
->item
[i
].pcls
= AN
;
608 for (i
= 0; i
< iso_run
->length
; i
++)
610 if (*iso_run
->item
[i
].pcls
== ET
)
613 for (j
= i
-1 ; j
> -1; j
--)
615 if (*iso_run
->item
[j
].pcls
== BN
) continue;
616 if (*iso_run
->item
[j
].pcls
== ET
) continue;
617 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
620 if (*iso_run
->item
[i
].pcls
== ET
)
622 for (j
= i
+1; j
< iso_run
->length
; j
++)
624 if (*iso_run
->item
[j
].pcls
== BN
) continue;
625 if (*iso_run
->item
[j
].pcls
== ET
) continue;
626 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
634 for (i
= 0; i
< iso_run
->length
; i
++)
636 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
640 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
641 *iso_run
->item
[b
].pcls
= ON
;
642 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
643 *iso_run
->item
[f
].pcls
= ON
;
645 *iso_run
->item
[i
].pcls
= ON
;
650 for (i
= 0; i
< iso_run
->length
; i
++)
652 if (*iso_run
->item
[i
].pcls
== EN
)
655 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
656 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
658 if (*iso_run
->item
[j
].pcls
== L
)
659 *iso_run
->item
[i
].pcls
= L
;
662 if (iso_run
->sos
== L
&& j
== -1)
663 *iso_run
->item
[i
].pcls
= L
;
668 typedef struct tagBracketPair
674 static int compr(const void *a
, const void* b
)
676 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
679 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
683 int stack_top
= iso_run
->length
;
684 BracketPair
*out
= NULL
;
688 open_stack
= HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR
) * iso_run
->length
);
689 stack_index
= HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run
->length
);
691 for (i
= 0; i
< iso_run
->length
; i
++)
693 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
698 out
= HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair
));
705 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
706 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
707 if (open_stack
[stack_top
] == 0x232A)
708 open_stack
[stack_top
] = 0x3009;
709 stack_index
[stack_top
] = i
;
711 else if ((ubv
>> 8) == 1)
714 if (stack_top
== iso_run
->length
) continue;
715 for (j
= stack_top
; j
< iso_run
->length
; j
++)
717 WCHAR c
= iso_run
->item
[i
].ch
;
718 if (c
== 0x232A) c
= 0x3009;
719 if (c
== open_stack
[j
])
721 out
[pair_count
].start
= stack_index
[j
];
722 out
[pair_count
].end
= i
;
724 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
725 out
[pair_count
].start
= -1;
735 HeapFree(GetProcessHeap(),0,out
);
738 else if (pair_count
> 1)
739 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
741 HeapFree(GetProcessHeap(), 0, open_stack
);
742 HeapFree(GetProcessHeap(), 0, stack_index
);
746 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
748 /*------------------------------------------------------------------------
749 Function: resolveNeutrals
751 Resolves the directionality of neutral character types.
753 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
755 Input: Array of embedding levels
759 In/Out: Array of directional classes
761 Note: On input only these directional classes are expected
762 R, L, NI, AN, EN and BN
764 W8 resolves a number of ENs to L
765 ------------------------------------------------------------------------*/
766 static void resolveNeutrals(IsolatedRun
*iso_run
)
769 BracketPair
*pairs
= NULL
;
771 /* Translate isolates into NI */
772 for (i
= 0; i
< iso_run
->length
; i
++)
774 if (*iso_run
->item
[i
].pcls
>= LRI
)
775 *iso_run
->item
[i
].pcls
= NI
;
777 switch(*iso_run
->item
[i
].pcls
)
781 case WS
: *iso_run
->item
[i
].pcls
= NI
;
784 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
787 /* N0: Skipping bracketed pairs for now */
788 pairs
= computeBracketPairs(iso_run
);
791 BracketPair
*p
= &pairs
[0];
793 while (p
->start
>= 0)
796 int e
= EmbeddingDirection(iso_run
->e
);
797 int o
= EmbeddingDirection(iso_run
->e
+1);
799 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
802 for (j
= p
->start
+1; j
< p
->end
; j
++)
804 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
806 *iso_run
->item
[p
->start
].pcls
= e
;
807 *iso_run
->item
[p
->end
].pcls
= e
;
810 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
814 if (j
== p
->end
&& flag_o
)
816 for (j
= p
->start
; j
>= 0; j
--)
818 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
820 *iso_run
->item
[p
->start
].pcls
= o
;
821 *iso_run
->item
[p
->end
].pcls
= o
;
824 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
826 *iso_run
->item
[p
->start
].pcls
= e
;
827 *iso_run
->item
[p
->end
].pcls
= e
;
833 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
834 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
841 HeapFree(GetProcessHeap(),0,pairs
);
845 for (i
= 0; i
< iso_run
->length
; i
++)
849 if (*iso_run
->item
[i
].pcls
== NI
)
852 int b
= iso_previousValidChar(iso_run
, i
);
861 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
863 else if (*iso_run
->item
[b
].pcls
== L
)
865 else /* No string type */
868 j
= iso_nextValidChar(iso_run
, i
);
869 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
876 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
878 else if (*iso_run
->item
[j
].pcls
== L
)
880 else /* No string type */
885 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
886 *iso_run
->item
[b
].pcls
= r
;
892 for (i
= 0; i
< iso_run
->length
; i
++)
894 if (*iso_run
->item
[i
].pcls
== NI
)
898 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
899 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
900 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
901 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
902 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
907 /*------------------------------------------------------------------------
908 Function: resolveImplicit
910 Recursively resolves implicit embedding levels.
911 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
913 Input: Array of direction classes
917 In/Out: Array of embedding levels
919 Note: levels may exceed 15 on output.
920 Accepted subset of direction classes
922 ------------------------------------------------------------------------*/
923 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
928 for (i
= sos
; i
<= eos
; i
++)
933 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
934 ASSERT(pcls
[i
] < 5); /* "Out of range." */
936 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
938 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
940 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
945 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
950 for (i
= sos
; i
<= eos
; i
++)
952 if (pcls
[i
] == B
|| pcls
[i
] == S
)
955 while (i
> sos
&& j
>= sos
&&
956 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
957 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
958 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
959 plevel
[j
--] = baselevel
;
960 plevel
[i
] = baselevel
;
963 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
964 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
965 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
968 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
969 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
970 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
971 plevel
[j
--] = baselevel
;
976 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, LPCWSTR lpString
, int uCount
, struct list
*set
)
978 int run_start
, run_end
, i
;
981 IsolatedRun
*current_isolated
;
983 runs
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(Run
));
990 while (run_start
< uCount
)
992 run_end
= nextValidChar(pcls
, run_start
, uCount
);
993 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
995 runs
[run_count
].start
= run_start
;
996 runs
[run_count
].end
= run_end
;
997 runs
[run_count
].e
= pLevel
[run_start
];
998 run_start
= nextValidChar(pcls
, run_end
, uCount
);
1002 /* Build Isolating Runs */
1004 while (i
< run_count
)
1007 if (runs
[k
].start
>= 0)
1009 int type_fence
, real_end
;
1011 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(RunChar
)*uCount
);
1012 if (!current_isolated
) break;
1014 run_start
= runs
[k
].start
;
1015 current_isolated
->e
= runs
[k
].e
;
1016 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1018 for (j
= 0; j
< current_isolated
->length
; j
++)
1020 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1021 current_isolated
->item
[j
].ch
= lpString
[runs
[k
].start
+j
];
1024 run_end
= runs
[k
].end
;
1026 TRACE("{ [%i -- %i]",run_start
, run_end
);
1028 if (pcls
[run_end
] == BN
)
1029 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1031 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1035 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1036 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1045 int l
= current_isolated
->length
;
1047 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1048 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1050 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1051 current_isolated
->item
[l
].ch
= lpString
[runs
[j
].start
+m
];
1054 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1056 run_end
= runs
[j
].end
;
1057 if (pcls
[run_end
] == BN
)
1058 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1069 type_fence
= previousValidChar(pcls
, run_start
, -1);
1071 if (type_fence
== -1)
1072 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1074 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1076 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1078 if (run_end
== uCount
)
1079 current_isolated
->eos
= current_isolated
->sos
;
1082 /* eos could be an BN */
1083 if ( pcls
[run_end
] == BN
)
1085 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1086 if (real_end
< run_start
)
1087 real_end
= run_start
;
1092 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1093 if (type_fence
== uCount
)
1094 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1096 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1098 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1101 list_add_tail(set
, ¤t_isolated
->entry
);
1102 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1107 HeapFree(GetProcessHeap(), 0, runs
);
1110 /*************************************************************
1111 * BIDI_DeterminLevels
1113 BOOL
BIDI_DetermineLevels(
1114 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
1115 INT uCount
, /* [in] Number of WCHARs in string. */
1116 const SCRIPT_STATE
*s
,
1117 const SCRIPT_CONTROL
*c
,
1118 WORD
*lpOutLevels
/* [out] final string levels */
1122 unsigned baselevel
= 0;
1123 struct list IsolatingRuns
;
1124 IsolatedRun
*iso_run
, *next
;
1126 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1128 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
1131 WARN("Out of memory\n");
1135 baselevel
= s
->uBidiLevel
;
1137 classify(lpString
, chartype
, uCount
, c
);
1138 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1140 /* resolve explicit */
1141 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
1142 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1144 /* X10/BD13: Computer Isolating runs */
1145 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1147 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1149 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1152 resolveWeak(iso_run
);
1153 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1155 /* resolve neutrals */
1156 resolveNeutrals(iso_run
);
1157 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1159 list_remove(&iso_run
->entry
);
1160 HeapFree(GetProcessHeap(),0,iso_run
);
1163 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1164 /* resolveImplicit */
1165 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1167 /* resolveResolvedLevels*/
1168 classify(lpString
, chartype
, uCount
, c
);
1169 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1171 HeapFree(GetProcessHeap(), 0, chartype
);
1175 /* reverse cch indexes */
1176 static void reverse(int *pidx
, int cch
)
1180 for (; ich
< --cch
; ich
++)
1183 pidx
[ich
] = pidx
[cch
];
1189 /*------------------------------------------------------------------------
1190 Functions: reorder/reorderLevel
1192 Recursively reorders the display string
1193 "From the highest level down, reverse all characters at that level and
1194 higher, down to the lowest odd level"
1196 Implements rule L2 of the Unicode bidi Algorithm.
1198 Input: Array of embedding levels
1200 Flag enabling reversal (set to false by initial caller)
1202 In/Out: Text to reorder
1204 Note: levels may exceed 15 resp. 61 on input.
1206 Rule L3 - reorder combining marks is not implemented here
1207 Rule L4 - glyph mirroring is implemented as a display option below
1209 Note: this should be applied a line at a time
1210 -------------------------------------------------------------------------*/
1211 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1215 /* true as soon as first odd level encountered */
1216 fReverse
= fReverse
|| odd(level
);
1218 for (; ich
< cch
; ich
++)
1220 if (plevel
[ich
] < level
)
1224 else if (plevel
[ich
] > level
)
1226 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1227 cch
- ich
, fReverse
) - 1;
1232 reverse(pIndexs
, ich
);
1237 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1238 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1243 /* true as soon as first odd level encountered */
1244 fReverse
= fReverse
|| odd(level
);
1246 for (; ich
< cch
; ich
++)
1248 if (plevel
[ich
] < level
)
1250 else if (plevel
[ich
] > level
)
1255 reverse(pIndexs
, ich
);
1261 for (; ich
< cch
; ich
++)
1262 if (plevel
[ich
] < level
)
1264 else if (plevel
[ich
] > level
)
1265 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1266 cch
- ich
, fReverse
) - 1;
1272 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1276 classify(lpString
, lpStrength
, uCount
, c
);
1278 for ( i
= 0; i
< uCount
; i
++)
1280 switch(lpStrength
[i
])
1289 lpStrength
[i
] = BIDI_STRONG
;
1298 lpStrength
[i
] = BIDI_WEAK
;
1304 default: /* Neutrals and NSM */
1305 lpStrength
[i
] = BIDI_NEUTRAL
;