2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/heap.h"
56 #include "wine/list.h"
58 #include "usp10_internal.h"
60 extern const unsigned short bidi_bracket_table
[] DECLSPEC_HIDDEN
;
62 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
64 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
67 /* HELPER FUNCTIONS AND DECLARATIONS */
69 /*------------------------------------------------------------------------
70 Bidirectional Character Types
72 as defined by the Unicode Bidirectional Algorithm Table 3-7.
76 The list of bidirectional character types here is not grouped the
77 same way as the table 3-7, since the numberic values for the types
78 are chosen to keep the state and action tables compact.
79 ------------------------------------------------------------------------*/
83 /* ON MUST be zero, code relies on ON = NI = 0 */
84 ON
= 0, /* Other Neutral */
87 AN
, /* Arabic Number */
88 EN
, /* European Number */
89 AL
, /* Arabic Letter (Right-to-left) */
90 NSM
, /* Non-spacing Mark */
91 CS
, /* Common Separator */
92 ES
, /* European Separator */
93 ET
, /* European Terminator (post/prefix e.g. $ and %) */
96 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
99 S
, /* Segment Separator (TAB) // used only in L1 */
100 WS
, /* White space // used only in L1 */
101 B
, /* Paragraph Separator (aka as PS) */
103 /* types for explicit controls */
104 RLO
, /* these are used only in X1-X9 */
110 LRI
, /* Isolate formatting characters new with 6.3 */
115 /* resolved types, also resolved directions */
116 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
119 static const char debug_type
[][4] =
121 "ON", /* Other Neutral */
122 "L", /* Left Letter */
123 "R", /* Right Letter */
124 "AN", /* Arabic Number */
125 "EN", /* European Number */
126 "AL", /* Arabic Letter (Right-to-left) */
127 "NSM", /* Non-spacing Mark */
128 "CS", /* Common Separator */
129 "ES", /* European Separator */
130 "ET", /* European Terminator (post/prefix e.g. $ and %) */
131 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
132 "S", /* Segment Separator (TAB) // used only in L1 */
133 "WS", /* White space // used only in L1 */
134 "B", /* Paragraph Separator (aka as PS) */
135 "RLO", /* these are used only in X1-X9 */
140 "LRI", /* Isolate formatting characters new with 6.3 */
146 /* HELPER FUNCTIONS */
148 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
152 for (i
= start
; i
< end
&& len
< 200; i
++)
154 TRACE(" %s",debug_type
[types
[i
]]);
155 len
+= strlen(debug_type
[types
[i
]])+1;
162 /* Convert the libwine information to the direction enum */
163 static void classify(const WCHAR
*string
, WORD
*chartype
, DWORD count
, const SCRIPT_CONTROL
*c
)
165 static const enum directions dir_map
[16] =
167 L
, /* unassigned defaults to L */
182 PDF
/* also LRE, LRO, RLE, RLO */
187 for (i
= 0; i
< count
; ++i
)
189 chartype
[i
] = dir_map
[get_char_typeW(string
[i
]) >> 12];
193 if (!c
->fLegacyBidiClass
) break;
197 case '+': chartype
[i
] = NI
; break;
198 case '/': chartype
[i
] = CS
; break;
204 case 0x202A: chartype
[i
] = LRE
; break;
205 case 0x202B: chartype
[i
] = RLE
; break;
206 case 0x202C: chartype
[i
] = PDF
; break;
207 case 0x202D: chartype
[i
] = LRO
; break;
208 case 0x202E: chartype
[i
] = RLO
; break;
209 case 0x2066: chartype
[i
] = LRI
; break;
210 case 0x2067: chartype
[i
] = RLI
; break;
211 case 0x2068: chartype
[i
] = FSI
; break;
212 case 0x2069: chartype
[i
] = PDI
; break;
219 /* RESOLVE EXPLICIT */
221 static WORD
GreaterEven(int i
)
223 return odd(i
) ? i
+ 1 : i
+ 2;
226 static WORD
GreaterOdd(int i
)
228 return odd(i
) ? i
+ 2 : i
+ 1;
231 static WORD
EmbeddingDirection(int level
)
233 return odd(level
) ? R
: L
;
236 /*------------------------------------------------------------------------
237 Function: resolveExplicit
239 Recursively resolves explicit embedding levels and overrides.
240 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
242 Input: Base embedding level and direction
245 Output: Array of embedding levels
247 In/Out: Array of direction classes
250 Note: The function uses two simple counters to keep track of
251 matching explicit codes and PDF. Use the default argument for
252 the outermost call. The nesting counter counts the recursion
253 depth and not the embedding level.
254 ------------------------------------------------------------------------*/
255 typedef struct tagStackItem
{
261 #define push_stack(l,o,i) \
263 stack[stack_top].level = l; \
264 stack[stack_top].override = o; \
265 stack[stack_top].isolate = i;} while(0)
267 #define pop_stack() do { stack_top++; } while(0)
269 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
271 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, WORD
*poutOverrides
, int count
, BOOL initialOverride
)
274 int overflow_isolate_count
= 0;
275 int overflow_embedding_count
= 0;
276 int valid_isolate_count
= 0;
279 StackItem stack
[MAX_DEPTH
+2];
280 int stack_top
= MAX_DEPTH
+1;
282 stack
[stack_top
].level
= level
;
283 stack
[stack_top
].override
= NI
;
284 stack
[stack_top
].isolate
= FALSE
;
289 push_stack(level
, R
, FALSE
);
291 push_stack(level
, L
, FALSE
);
294 for (i
= 0; i
< count
; i
++)
296 poutOverrides
[i
] = stack
[stack_top
].override
;
299 if (pclass
[i
] == RLE
)
301 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
302 poutLevel
[i
] = stack
[stack_top
].level
;
303 if (valid_level(least_odd
))
304 push_stack(least_odd
, NI
, FALSE
);
305 else if (overflow_isolate_count
== 0)
306 overflow_embedding_count
++;
309 else if (pclass
[i
] == LRE
)
311 int least_even
= GreaterEven(stack
[stack_top
].level
);
312 poutLevel
[i
] = stack
[stack_top
].level
;
313 if (valid_level(least_even
))
314 push_stack(least_even
, NI
, FALSE
);
315 else if (overflow_isolate_count
== 0)
316 overflow_embedding_count
++;
319 else if (pclass
[i
] == RLO
)
321 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
322 poutLevel
[i
] = stack
[stack_top
].level
;
323 if (valid_level(least_odd
))
324 push_stack(least_odd
, R
, FALSE
);
325 else if (overflow_isolate_count
== 0)
326 overflow_embedding_count
++;
329 else if (pclass
[i
] == LRO
)
331 int least_even
= GreaterEven(stack
[stack_top
].level
);
332 poutLevel
[i
] = stack
[stack_top
].level
;
333 if (valid_level(least_even
))
334 push_stack(least_even
, L
, FALSE
);
335 else if (overflow_isolate_count
== 0)
336 overflow_embedding_count
++;
339 else if (pclass
[i
] == RLI
)
341 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
342 poutLevel
[i
] = stack
[stack_top
].level
;
343 if (valid_level(least_odd
))
345 valid_isolate_count
++;
346 push_stack(least_odd
, NI
, TRUE
);
349 overflow_isolate_count
++;
352 else if (pclass
[i
] == LRI
)
354 int least_even
= GreaterEven(stack
[stack_top
].level
);
355 poutLevel
[i
] = stack
[stack_top
].level
;
356 if (valid_level(least_even
))
358 valid_isolate_count
++;
359 push_stack(least_even
, NI
, TRUE
);
362 overflow_isolate_count
++;
365 else if (pclass
[i
] == FSI
)
370 poutLevel
[i
] = stack
[stack_top
].level
;
371 for (j
= i
+1; j
< count
; j
++)
373 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
378 else if (pclass
[j
] == PDI
)
387 if (skipping
) continue;
394 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
402 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
403 if (valid_level(least_odd
))
405 valid_isolate_count
++;
406 push_stack(least_odd
, NI
, TRUE
);
409 overflow_isolate_count
++;
413 int least_even
= GreaterEven(stack
[stack_top
].level
);
414 if (valid_level(least_even
))
416 valid_isolate_count
++;
417 push_stack(least_even
, NI
, TRUE
);
420 overflow_isolate_count
++;
424 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
426 poutLevel
[i
] = stack
[stack_top
].level
;
427 if (stack
[stack_top
].override
!= NI
)
428 pclass
[i
] = stack
[stack_top
].override
;
431 else if (pclass
[i
] == PDI
)
433 if (overflow_isolate_count
) overflow_isolate_count
--;
434 else if (!valid_isolate_count
) {/* do nothing */}
437 overflow_embedding_count
= 0;
438 while (!stack
[stack_top
].isolate
) pop_stack();
440 valid_isolate_count
--;
442 poutLevel
[i
] = stack
[stack_top
].level
;
445 else if (pclass
[i
] == PDF
)
447 poutLevel
[i
] = stack
[stack_top
].level
;
448 if (overflow_isolate_count
) {/* do nothing */}
449 else if (overflow_embedding_count
) overflow_embedding_count
--;
450 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
455 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
456 for (i
= 0; i
< count
; i
++)
457 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
461 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
463 if (index
== -1 || index
== back_fence
) return index
;
465 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
469 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
471 if (index
== front_fence
) return index
;
473 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
477 typedef struct tagRun
484 typedef struct tagRunChar
490 typedef struct tagIsolatedRun
501 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
503 if (index
>= (iso_run
->length
-1)) return -1;
505 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
506 if (index
== iso_run
->length
) return -1;
510 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
513 if (index
<= 0) return -1;
515 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
519 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
524 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
526 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
527 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
529 if (i
!= iso_run
->length
)
534 /*------------------------------------------------------------------------
535 Function: resolveWeak
537 Resolves the directionality of numeric and other weak character types
539 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
541 Input: Array of embedding levels
544 In/Out: Array of directional classes
546 Note: On input only these directional classes are expected
547 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
548 ------------------------------------------------------------------------*/
550 static void resolveWeak(IsolatedRun
* iso_run
)
555 for (i
=0; i
< iso_run
->length
; i
++)
557 if (*iso_run
->item
[i
].pcls
== NSM
)
559 int j
= iso_previousValidChar(iso_run
, i
);
561 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
562 else if (*iso_run
->item
[j
].pcls
>= LRI
)
563 *iso_run
->item
[i
].pcls
= ON
;
565 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
570 for (i
= 0; i
< iso_run
->length
; i
++)
572 if (*iso_run
->item
[i
].pcls
== EN
)
574 int j
= iso_previousValidChar(iso_run
, i
);
577 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
579 if (*iso_run
->item
[j
].pcls
== AL
)
580 *iso_run
->item
[i
].pcls
= AN
;
583 j
= iso_previousValidChar(iso_run
, j
);
589 for (i
= 0; i
< iso_run
->length
; i
++)
591 if (*iso_run
->item
[i
].pcls
== AL
)
592 *iso_run
->item
[i
].pcls
= R
;
596 for (i
= 0; i
< iso_run
->length
; i
++)
598 if (*iso_run
->item
[i
].pcls
== ES
)
600 int b
= iso_previousValidChar(iso_run
, i
);
601 int f
= iso_nextValidChar(iso_run
, i
);
603 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
604 *iso_run
->item
[i
].pcls
= EN
;
606 else if (*iso_run
->item
[i
].pcls
== CS
)
608 int b
= iso_previousValidChar(iso_run
, i
);
609 int f
= iso_nextValidChar(iso_run
, i
);
611 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
612 *iso_run
->item
[i
].pcls
= EN
;
613 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
614 *iso_run
->item
[i
].pcls
= AN
;
619 for (i
= 0; i
< iso_run
->length
; i
++)
621 if (*iso_run
->item
[i
].pcls
== ET
)
624 for (j
= i
-1 ; j
> -1; j
--)
626 if (*iso_run
->item
[j
].pcls
== BN
) continue;
627 if (*iso_run
->item
[j
].pcls
== ET
) continue;
628 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
631 if (*iso_run
->item
[i
].pcls
== ET
)
633 for (j
= i
+1; j
< iso_run
->length
; j
++)
635 if (*iso_run
->item
[j
].pcls
== BN
) continue;
636 if (*iso_run
->item
[j
].pcls
== ET
) continue;
637 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
645 for (i
= 0; i
< iso_run
->length
; i
++)
647 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
651 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
652 *iso_run
->item
[b
].pcls
= ON
;
653 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
654 *iso_run
->item
[f
].pcls
= ON
;
656 *iso_run
->item
[i
].pcls
= ON
;
661 for (i
= 0; i
< iso_run
->length
; i
++)
663 if (*iso_run
->item
[i
].pcls
== EN
)
666 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
667 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
669 if (*iso_run
->item
[j
].pcls
== L
)
670 *iso_run
->item
[i
].pcls
= L
;
673 if (iso_run
->sos
== L
&& j
== -1)
674 *iso_run
->item
[i
].pcls
= L
;
679 typedef struct tagBracketPair
685 static int compr(const void *a
, const void* b
)
687 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
690 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
694 int stack_top
= iso_run
->length
;
695 unsigned int pair_count
= 0;
696 BracketPair
*out
= NULL
;
700 open_stack
= heap_alloc(iso_run
->length
* sizeof(*open_stack
));
701 stack_index
= heap_alloc(iso_run
->length
* sizeof(*stack_index
));
703 for (i
= 0; i
< iso_run
->length
; i
++)
705 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
713 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
714 /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */
715 if (open_stack
[stack_top
] == 0x232a)
716 open_stack
[stack_top
] = 0x3009;
717 stack_index
[stack_top
] = i
;
719 else if ((ubv
>> 8) == 1)
723 for (j
= stack_top
; j
< iso_run
->length
; ++j
)
725 WCHAR c
= iso_run
->item
[i
].ch
;
730 if (c
!= open_stack
[j
])
733 if (!(usp10_array_reserve((void **)&out
, &out_size
, pair_count
+ 2, sizeof(*out
))))
734 ERR("Failed to grow output array.\n");
736 out
[pair_count
].start
= stack_index
[j
];
737 out
[pair_count
].end
= i
;
740 out
[pair_count
].start
= -1;
747 heap_free(open_stack
);
748 heap_free(stack_index
);
753 qsort(out
, pair_count
, sizeof(*out
), compr
);
758 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
760 /*------------------------------------------------------------------------
761 Function: resolveNeutrals
763 Resolves the directionality of neutral character types.
765 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
767 Input: Array of embedding levels
771 In/Out: Array of directional classes
773 Note: On input only these directional classes are expected
774 R, L, NI, AN, EN and BN
776 W8 resolves a number of ENs to L
777 ------------------------------------------------------------------------*/
778 static void resolveNeutrals(IsolatedRun
*iso_run
)
781 BracketPair
*pairs
= NULL
;
783 /* Translate isolates into NI */
784 for (i
= 0; i
< iso_run
->length
; i
++)
786 if (*iso_run
->item
[i
].pcls
>= LRI
)
787 *iso_run
->item
[i
].pcls
= NI
;
789 switch(*iso_run
->item
[i
].pcls
)
793 case WS
: *iso_run
->item
[i
].pcls
= NI
;
796 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
799 /* N0: Skipping bracketed pairs for now */
800 pairs
= computeBracketPairs(iso_run
);
803 BracketPair
*p
= &pairs
[0];
805 while (p
->start
>= 0)
808 int e
= EmbeddingDirection(iso_run
->e
);
809 int o
= EmbeddingDirection(iso_run
->e
+1);
811 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
814 for (j
= p
->start
+1; j
< p
->end
; j
++)
816 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
818 *iso_run
->item
[p
->start
].pcls
= e
;
819 *iso_run
->item
[p
->end
].pcls
= e
;
822 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
826 if (j
== p
->end
&& flag_o
)
828 for (j
= p
->start
; j
>= 0; j
--)
830 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
832 *iso_run
->item
[p
->start
].pcls
= o
;
833 *iso_run
->item
[p
->end
].pcls
= o
;
836 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
838 *iso_run
->item
[p
->start
].pcls
= e
;
839 *iso_run
->item
[p
->end
].pcls
= e
;
845 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
846 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
857 for (i
= 0; i
< iso_run
->length
; i
++)
861 if (*iso_run
->item
[i
].pcls
== NI
)
864 int b
= iso_previousValidChar(iso_run
, i
);
873 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
875 else if (*iso_run
->item
[b
].pcls
== L
)
877 else /* No string type */
880 j
= iso_nextValidChar(iso_run
, i
);
881 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
888 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
890 else if (*iso_run
->item
[j
].pcls
== L
)
892 else /* No string type */
897 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
898 *iso_run
->item
[b
].pcls
= r
;
904 for (i
= 0; i
< iso_run
->length
; i
++)
906 if (*iso_run
->item
[i
].pcls
== NI
)
910 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
911 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
912 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
913 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
914 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
919 /*------------------------------------------------------------------------
920 Function: resolveImplicit
922 Recursively resolves implicit embedding levels.
923 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
925 Input: Array of direction classes
929 In/Out: Array of embedding levels
931 Note: levels may exceed 15 on output.
932 Accepted subset of direction classes
934 ------------------------------------------------------------------------*/
935 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
940 for (i
= sos
; i
<= eos
; i
++)
945 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
946 ASSERT(pcls
[i
] < 5); /* "Out of range." */
948 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
950 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
952 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
957 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
962 for (i
= sos
; i
<= eos
; i
++)
964 if (pcls
[i
] == B
|| pcls
[i
] == S
)
967 while (i
> sos
&& j
>= sos
&&
968 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
969 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
970 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
971 plevel
[j
--] = baselevel
;
972 plevel
[i
] = baselevel
;
974 else if (pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
|| pcls
[i
] == RLO
||
975 pcls
[i
] == PDF
|| pcls
[i
] == BN
)
977 plevel
[i
] = i
? plevel
[i
- 1] : baselevel
;
980 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
981 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
982 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
985 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
986 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
987 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
988 plevel
[j
--] = baselevel
;
993 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, const WORD
*pLevel
,
994 const WCHAR
*string
, unsigned int uCount
, struct list
*set
)
996 int run_start
, run_end
, i
;
999 IsolatedRun
*current_isolated
;
1001 if (!(runs
= heap_calloc(uCount
, sizeof(*runs
))))
1008 while (run_start
< uCount
)
1010 run_end
= nextValidChar(pcls
, run_start
, uCount
);
1011 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
1013 runs
[run_count
].start
= run_start
;
1014 runs
[run_count
].end
= run_end
;
1015 runs
[run_count
].e
= pLevel
[run_start
];
1016 run_start
= nextValidChar(pcls
, run_end
, uCount
);
1020 /* Build Isolating Runs */
1022 while (i
< run_count
)
1025 if (runs
[k
].start
>= 0)
1027 int type_fence
, real_end
;
1030 if (!(current_isolated
= heap_alloc(FIELD_OFFSET(IsolatedRun
, item
[uCount
]))))
1033 run_start
= runs
[k
].start
;
1034 current_isolated
->e
= runs
[k
].e
;
1035 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1037 for (j
= 0; j
< current_isolated
->length
; j
++)
1039 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1040 current_isolated
->item
[j
].ch
= string
[runs
[k
].start
+ j
];
1043 run_end
= runs
[k
].end
;
1045 TRACE("{ [%i -- %i]",run_start
, run_end
);
1047 if (pcls
[run_end
] == BN
)
1048 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1050 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1054 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1055 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1064 int l
= current_isolated
->length
;
1066 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1067 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1069 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1070 current_isolated
->item
[l
].ch
= string
[runs
[j
].start
+ m
];
1073 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1075 run_end
= runs
[j
].end
;
1076 if (pcls
[run_end
] == BN
)
1077 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1088 type_fence
= previousValidChar(pcls
, run_start
, -1);
1090 if (type_fence
== -1)
1091 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1093 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1095 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1097 if (run_end
== uCount
)
1098 current_isolated
->eos
= current_isolated
->sos
;
1101 /* eos could be an BN */
1102 if ( pcls
[run_end
] == BN
)
1104 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1105 if (real_end
< run_start
)
1106 real_end
= run_start
;
1111 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1112 if (type_fence
== uCount
)
1113 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1115 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1117 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1120 list_add_tail(set
, ¤t_isolated
->entry
);
1121 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1129 /*************************************************************
1130 * BIDI_DeterminLevels
1132 BOOL
BIDI_DetermineLevels(
1133 const WCHAR
*lpString
, /* [in] The string for which information is to be returned */
1134 unsigned int uCount
, /* [in] Number of WCHARs in string. */
1135 const SCRIPT_STATE
*s
,
1136 const SCRIPT_CONTROL
*c
,
1137 WORD
*lpOutLevels
, /* [out] final string levels */
1138 WORD
*lpOutOverrides
/* [out] final string overrides */
1142 unsigned baselevel
= 0;
1143 struct list IsolatingRuns
;
1144 IsolatedRun
*iso_run
, *next
;
1146 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1148 if (!(chartype
= heap_alloc(uCount
* sizeof(*chartype
))))
1150 WARN("Out of memory\n");
1154 baselevel
= s
->uBidiLevel
;
1156 classify(lpString
, chartype
, uCount
, c
);
1157 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1159 memset(lpOutOverrides
, 0, sizeof(WORD
) * uCount
);
1161 /* resolve explicit */
1162 resolveExplicit(baselevel
, chartype
, lpOutLevels
, lpOutOverrides
, uCount
, s
->fOverrideDirection
);
1163 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1165 /* X10/BD13: Computer Isolating runs */
1166 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1168 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1170 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1173 resolveWeak(iso_run
);
1174 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1176 /* resolve neutrals */
1177 resolveNeutrals(iso_run
);
1178 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1180 list_remove(&iso_run
->entry
);
1184 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1185 /* resolveImplicit */
1186 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1188 /* resolveResolvedLevels*/
1189 classify(lpString
, chartype
, uCount
, c
);
1190 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1192 heap_free(chartype
);
1196 /* reverse cch indexes */
1197 static void reverse(int *pidx
, int cch
)
1201 for (; ich
< --cch
; ich
++)
1204 pidx
[ich
] = pidx
[cch
];
1210 /*------------------------------------------------------------------------
1211 Functions: reorder/reorderLevel
1213 Recursively reorders the display string
1214 "From the highest level down, reverse all characters at that level and
1215 higher, down to the lowest odd level"
1217 Implements rule L2 of the Unicode bidi Algorithm.
1219 Input: Array of embedding levels
1221 Flag enabling reversal (set to false by initial caller)
1223 In/Out: Text to reorder
1225 Note: levels may exceed 15 resp. 61 on input.
1227 Rule L3 - reorder combining marks is not implemented here
1228 Rule L4 - glyph mirroring is implemented as a display option below
1230 Note: this should be applied a line at a time
1231 -------------------------------------------------------------------------*/
1232 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1236 /* true as soon as first odd level encountered */
1237 fReverse
= fReverse
|| odd(level
);
1239 for (; ich
< cch
; ich
++)
1241 if (plevel
[ich
] < level
)
1245 else if (plevel
[ich
] > level
)
1247 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1248 cch
- ich
, fReverse
) - 1;
1253 reverse(pIndexs
, ich
);
1258 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1259 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1264 /* true as soon as first odd level encountered */
1265 fReverse
= fReverse
|| odd(level
);
1267 for (; ich
< cch
; ich
++)
1269 if (plevel
[ich
] < level
)
1271 else if (plevel
[ich
] > level
)
1276 reverse(pIndexs
, ich
);
1282 for (; ich
< cch
; ich
++)
1283 if (plevel
[ich
] < level
)
1285 else if (plevel
[ich
] > level
)
1286 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1287 cch
- ich
, fReverse
) - 1;
1293 BOOL
BIDI_GetStrengths(const WCHAR
*string
, unsigned int count
, const SCRIPT_CONTROL
*c
, WORD
*strength
)
1297 classify(string
, strength
, count
, c
);
1298 for (i
= 0; i
< count
; i
++)
1300 switch (strength
[i
])
1309 strength
[i
] = BIDI_STRONG
;
1318 strength
[i
] = BIDI_WEAK
;
1324 default: /* Neutrals and NSM */
1325 strength
[i
] = BIDI_NEUTRAL
;