2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
51 #include "wine/debug.h"
52 #include "wine/heap.h"
53 #include "wine/list.h"
55 #include "usp10_internal.h"
57 extern const unsigned short bidi_bracket_table
[] DECLSPEC_HIDDEN
;
58 extern const unsigned short bidi_direction_table
[] DECLSPEC_HIDDEN
;
60 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
62 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
65 /* HELPER FUNCTIONS AND DECLARATIONS */
67 /*------------------------------------------------------------------------
68 Bidirectional Character Types
70 as defined by the Unicode Bidirectional Algorithm Table 3-7.
74 The list of bidirectional character types here is not grouped the
75 same way as the table 3-7, since the numeric values for the types
76 are chosen to keep the state and action tables compact.
77 ------------------------------------------------------------------------*/
81 /* ON MUST be zero, code relies on ON = NI = 0 */
82 ON
= 0, /* Other Neutral */
85 AN
, /* Arabic Number */
86 EN
, /* European Number */
87 AL
, /* Arabic Letter (Right-to-left) */
88 NSM
, /* Non-spacing Mark */
89 CS
, /* Common Separator */
90 ES
, /* European Separator */
91 ET
, /* European Terminator (post/prefix e.g. $ and %) */
94 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
97 S
, /* Segment Separator (TAB) // used only in L1 */
98 WS
, /* White space // used only in L1 */
99 B
, /* Paragraph Separator (aka as PS) */
101 /* types for explicit controls */
102 RLO
, /* these are used only in X1-X9 */
108 LRI
, /* Isolate formatting characters new with 6.3 */
113 /* resolved types, also resolved directions */
114 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
117 static const char debug_type
[][4] =
119 "ON", /* Other Neutral */
120 "L", /* Left Letter */
121 "R", /* Right Letter */
122 "AN", /* Arabic Number */
123 "EN", /* European Number */
124 "AL", /* Arabic Letter (Right-to-left) */
125 "NSM", /* Non-spacing Mark */
126 "CS", /* Common Separator */
127 "ES", /* European Separator */
128 "ET", /* European Terminator (post/prefix e.g. $ and %) */
129 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
130 "S", /* Segment Separator (TAB) // used only in L1 */
131 "WS", /* White space // used only in L1 */
132 "B", /* Paragraph Separator (aka as PS) */
133 "RLO", /* these are used only in X1-X9 */
138 "LRI", /* Isolate formatting characters new with 6.3 */
144 /* HELPER FUNCTIONS */
146 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
150 for (i
= start
; i
< end
&& len
< 200; i
++)
152 TRACE(" %s",debug_type
[types
[i
]]);
153 len
+= strlen(debug_type
[types
[i
]])+1;
160 /* Convert the libwine information to the direction enum */
161 static void classify(const WCHAR
*string
, WORD
*chartype
, DWORD count
, const SCRIPT_CONTROL
*c
)
165 for (i
= 0; i
< count
; ++i
)
167 chartype
[i
] = get_table_entry_32( bidi_direction_table
, string
[i
] );
168 if (c
->fLegacyBidiClass
&& chartype
[i
] == ES
)
170 if (string
[i
] == '+' || string
[i
] == '-') chartype
[i
] = NI
;
175 /* RESOLVE EXPLICIT */
177 static WORD
GreaterEven(int i
)
179 return odd(i
) ? i
+ 1 : i
+ 2;
182 static WORD
GreaterOdd(int i
)
184 return odd(i
) ? i
+ 2 : i
+ 1;
187 static WORD
EmbeddingDirection(int level
)
189 return odd(level
) ? R
: L
;
192 /*------------------------------------------------------------------------
193 Function: resolveExplicit
195 Recursively resolves explicit embedding levels and overrides.
196 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
198 Input: Base embedding level and direction
201 Output: Array of embedding levels
203 In/Out: Array of direction classes
206 Note: The function uses two simple counters to keep track of
207 matching explicit codes and PDF. Use the default argument for
208 the outermost call. The nesting counter counts the recursion
209 depth and not the embedding level.
210 ------------------------------------------------------------------------*/
211 typedef struct tagStackItem
{
217 #define push_stack(l,o,i) \
219 stack[stack_top].level = l; \
220 stack[stack_top].override = o; \
221 stack[stack_top].isolate = i;} while(0)
223 #define pop_stack() do { stack_top++; } while(0)
225 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
227 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, WORD
*poutOverrides
, int count
, BOOL initialOverride
)
230 int overflow_isolate_count
= 0;
231 int overflow_embedding_count
= 0;
232 int valid_isolate_count
= 0;
235 StackItem stack
[MAX_DEPTH
+2];
236 int stack_top
= MAX_DEPTH
+1;
238 stack
[stack_top
].level
= level
;
239 stack
[stack_top
].override
= NI
;
240 stack
[stack_top
].isolate
= FALSE
;
245 push_stack(level
, R
, FALSE
);
247 push_stack(level
, L
, FALSE
);
250 for (i
= 0; i
< count
; i
++)
252 poutOverrides
[i
] = stack
[stack_top
].override
;
255 if (pclass
[i
] == RLE
)
257 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
258 poutLevel
[i
] = stack
[stack_top
].level
;
259 if (valid_level(least_odd
))
260 push_stack(least_odd
, NI
, FALSE
);
261 else if (overflow_isolate_count
== 0)
262 overflow_embedding_count
++;
265 else if (pclass
[i
] == LRE
)
267 int least_even
= GreaterEven(stack
[stack_top
].level
);
268 poutLevel
[i
] = stack
[stack_top
].level
;
269 if (valid_level(least_even
))
270 push_stack(least_even
, NI
, FALSE
);
271 else if (overflow_isolate_count
== 0)
272 overflow_embedding_count
++;
275 else if (pclass
[i
] == RLO
)
277 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
278 poutLevel
[i
] = stack
[stack_top
].level
;
279 if (valid_level(least_odd
))
280 push_stack(least_odd
, R
, FALSE
);
281 else if (overflow_isolate_count
== 0)
282 overflow_embedding_count
++;
285 else if (pclass
[i
] == LRO
)
287 int least_even
= GreaterEven(stack
[stack_top
].level
);
288 poutLevel
[i
] = stack
[stack_top
].level
;
289 if (valid_level(least_even
))
290 push_stack(least_even
, L
, FALSE
);
291 else if (overflow_isolate_count
== 0)
292 overflow_embedding_count
++;
295 else if (pclass
[i
] == RLI
)
297 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
298 poutLevel
[i
] = stack
[stack_top
].level
;
299 if (valid_level(least_odd
))
301 valid_isolate_count
++;
302 push_stack(least_odd
, NI
, TRUE
);
305 overflow_isolate_count
++;
308 else if (pclass
[i
] == LRI
)
310 int least_even
= GreaterEven(stack
[stack_top
].level
);
311 poutLevel
[i
] = stack
[stack_top
].level
;
312 if (valid_level(least_even
))
314 valid_isolate_count
++;
315 push_stack(least_even
, NI
, TRUE
);
318 overflow_isolate_count
++;
321 else if (pclass
[i
] == FSI
)
326 poutLevel
[i
] = stack
[stack_top
].level
;
327 for (j
= i
+1; j
< count
; j
++)
329 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
334 else if (pclass
[j
] == PDI
)
343 if (skipping
) continue;
350 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
358 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
359 if (valid_level(least_odd
))
361 valid_isolate_count
++;
362 push_stack(least_odd
, NI
, TRUE
);
365 overflow_isolate_count
++;
369 int least_even
= GreaterEven(stack
[stack_top
].level
);
370 if (valid_level(least_even
))
372 valid_isolate_count
++;
373 push_stack(least_even
, NI
, TRUE
);
376 overflow_isolate_count
++;
380 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
382 poutLevel
[i
] = stack
[stack_top
].level
;
383 if (stack
[stack_top
].override
!= NI
)
384 pclass
[i
] = stack
[stack_top
].override
;
387 else if (pclass
[i
] == PDI
)
389 if (overflow_isolate_count
) overflow_isolate_count
--;
390 else if (!valid_isolate_count
) {/* do nothing */}
393 overflow_embedding_count
= 0;
394 while (!stack
[stack_top
].isolate
) pop_stack();
396 valid_isolate_count
--;
398 poutLevel
[i
] = stack
[stack_top
].level
;
401 else if (pclass
[i
] == PDF
)
403 poutLevel
[i
] = stack
[stack_top
].level
;
404 if (overflow_isolate_count
) {/* do nothing */}
405 else if (overflow_embedding_count
) overflow_embedding_count
--;
406 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
411 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
412 for (i
= 0; i
< count
; i
++)
413 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
417 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
419 if (index
== -1 || index
== back_fence
) return index
;
421 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
425 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
427 if (index
== front_fence
) return index
;
429 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
433 typedef struct tagRun
440 typedef struct tagRunChar
446 typedef struct tagIsolatedRun
457 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
459 if (index
>= (iso_run
->length
-1)) return -1;
461 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
462 if (index
== iso_run
->length
) return -1;
466 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
469 if (index
<= 0) return -1;
471 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
475 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
480 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
482 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
483 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
485 if (i
!= iso_run
->length
)
490 /*------------------------------------------------------------------------
491 Function: resolveWeak
493 Resolves the directionality of numeric and other weak character types
495 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
497 Input: Array of embedding levels
500 In/Out: Array of directional classes
502 Note: On input only these directional classes are expected
503 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
504 ------------------------------------------------------------------------*/
506 static void resolveWeak(IsolatedRun
* iso_run
)
511 for (i
=0; i
< iso_run
->length
; i
++)
513 if (*iso_run
->item
[i
].pcls
== NSM
)
515 int j
= iso_previousValidChar(iso_run
, i
);
517 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
518 else if (*iso_run
->item
[j
].pcls
>= LRI
)
519 *iso_run
->item
[i
].pcls
= ON
;
521 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
526 for (i
= 0; i
< iso_run
->length
; i
++)
528 if (*iso_run
->item
[i
].pcls
== EN
)
530 int j
= iso_previousValidChar(iso_run
, i
);
533 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
535 if (*iso_run
->item
[j
].pcls
== AL
)
536 *iso_run
->item
[i
].pcls
= AN
;
539 j
= iso_previousValidChar(iso_run
, j
);
545 for (i
= 0; i
< iso_run
->length
; i
++)
547 if (*iso_run
->item
[i
].pcls
== AL
)
548 *iso_run
->item
[i
].pcls
= R
;
552 for (i
= 0; i
< iso_run
->length
; i
++)
554 if (*iso_run
->item
[i
].pcls
== ES
)
556 int b
= iso_previousValidChar(iso_run
, i
);
557 int f
= iso_nextValidChar(iso_run
, i
);
559 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
560 *iso_run
->item
[i
].pcls
= EN
;
562 else if (*iso_run
->item
[i
].pcls
== CS
)
564 int b
= iso_previousValidChar(iso_run
, i
);
565 int f
= iso_nextValidChar(iso_run
, i
);
567 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
568 *iso_run
->item
[i
].pcls
= EN
;
569 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
570 *iso_run
->item
[i
].pcls
= AN
;
575 for (i
= 0; i
< iso_run
->length
; i
++)
577 if (*iso_run
->item
[i
].pcls
== ET
)
580 for (j
= i
-1 ; j
> -1; j
--)
582 if (*iso_run
->item
[j
].pcls
== BN
) continue;
583 if (*iso_run
->item
[j
].pcls
== ET
) continue;
584 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
587 if (*iso_run
->item
[i
].pcls
== ET
)
589 for (j
= i
+1; j
< iso_run
->length
; j
++)
591 if (*iso_run
->item
[j
].pcls
== BN
) continue;
592 if (*iso_run
->item
[j
].pcls
== ET
) continue;
593 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
601 for (i
= 0; i
< iso_run
->length
; i
++)
603 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
607 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
608 *iso_run
->item
[b
].pcls
= ON
;
609 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
610 *iso_run
->item
[f
].pcls
= ON
;
612 *iso_run
->item
[i
].pcls
= ON
;
617 for (i
= 0; i
< iso_run
->length
; i
++)
619 if (*iso_run
->item
[i
].pcls
== EN
)
622 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
623 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
625 if (*iso_run
->item
[j
].pcls
== L
)
626 *iso_run
->item
[i
].pcls
= L
;
629 if (iso_run
->sos
== L
&& j
== -1)
630 *iso_run
->item
[i
].pcls
= L
;
635 typedef struct tagBracketPair
641 static int __cdecl
compr(const void *a
, const void* b
)
643 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
646 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
650 int stack_top
= iso_run
->length
;
651 unsigned int pair_count
= 0;
652 BracketPair
*out
= NULL
;
656 open_stack
= heap_alloc(iso_run
->length
* sizeof(*open_stack
));
657 stack_index
= heap_alloc(iso_run
->length
* sizeof(*stack_index
));
659 for (i
= 0; i
< iso_run
->length
; i
++)
661 unsigned short ubv
= get_table_entry_16(bidi_bracket_table
, iso_run
->item
[i
].ch
);
669 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
670 /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */
671 if (open_stack
[stack_top
] == 0x232a)
672 open_stack
[stack_top
] = 0x3009;
673 stack_index
[stack_top
] = i
;
675 else if ((ubv
>> 8) == 1)
679 for (j
= stack_top
; j
< iso_run
->length
; ++j
)
681 WCHAR c
= iso_run
->item
[i
].ch
;
686 if (c
!= open_stack
[j
])
689 if (!(usp10_array_reserve((void **)&out
, &out_size
, pair_count
+ 2, sizeof(*out
))))
690 ERR("Failed to grow output array.\n");
692 out
[pair_count
].start
= stack_index
[j
];
693 out
[pair_count
].end
= i
;
696 out
[pair_count
].start
= -1;
703 heap_free(open_stack
);
704 heap_free(stack_index
);
709 qsort(out
, pair_count
, sizeof(*out
), compr
);
714 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
716 /*------------------------------------------------------------------------
717 Function: resolveNeutrals
719 Resolves the directionality of neutral character types.
721 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
723 Input: Array of embedding levels
727 In/Out: Array of directional classes
729 Note: On input only these directional classes are expected
730 R, L, NI, AN, EN and BN
732 W8 resolves a number of ENs to L
733 ------------------------------------------------------------------------*/
734 static void resolveNeutrals(IsolatedRun
*iso_run
)
737 BracketPair
*pairs
= NULL
;
739 /* Translate isolates into NI */
740 for (i
= 0; i
< iso_run
->length
; i
++)
742 if (*iso_run
->item
[i
].pcls
>= LRI
)
743 *iso_run
->item
[i
].pcls
= NI
;
745 switch(*iso_run
->item
[i
].pcls
)
749 case WS
: *iso_run
->item
[i
].pcls
= NI
;
752 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
755 /* N0: Skipping bracketed pairs for now */
756 pairs
= computeBracketPairs(iso_run
);
759 BracketPair
*p
= &pairs
[0];
761 while (p
->start
>= 0)
764 int e
= EmbeddingDirection(iso_run
->e
);
765 int o
= EmbeddingDirection(iso_run
->e
+1);
767 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
770 for (j
= p
->start
+1; j
< p
->end
; j
++)
772 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
774 *iso_run
->item
[p
->start
].pcls
= e
;
775 *iso_run
->item
[p
->end
].pcls
= e
;
778 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
782 if (j
== p
->end
&& flag_o
)
784 for (j
= p
->start
; j
>= 0; j
--)
786 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
788 *iso_run
->item
[p
->start
].pcls
= o
;
789 *iso_run
->item
[p
->end
].pcls
= o
;
792 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
794 *iso_run
->item
[p
->start
].pcls
= e
;
795 *iso_run
->item
[p
->end
].pcls
= e
;
801 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
802 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
813 for (i
= 0; i
< iso_run
->length
; i
++)
817 if (*iso_run
->item
[i
].pcls
== NI
)
820 int b
= iso_previousValidChar(iso_run
, i
);
829 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
831 else if (*iso_run
->item
[b
].pcls
== L
)
833 else /* No string type */
836 j
= iso_nextValidChar(iso_run
, i
);
837 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
844 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
846 else if (*iso_run
->item
[j
].pcls
== L
)
848 else /* No string type */
853 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
854 *iso_run
->item
[b
].pcls
= r
;
860 for (i
= 0; i
< iso_run
->length
; i
++)
862 if (*iso_run
->item
[i
].pcls
== NI
)
866 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
867 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
868 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
869 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
870 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
875 /*------------------------------------------------------------------------
876 Function: resolveImplicit
878 Recursively resolves implicit embedding levels.
879 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
881 Input: Array of direction classes
885 In/Out: Array of embedding levels
887 Note: levels may exceed 15 on output.
888 Accepted subset of direction classes
890 ------------------------------------------------------------------------*/
891 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
896 for (i
= sos
; i
<= eos
; i
++)
901 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
902 ASSERT(pcls
[i
] < 5); /* "Out of range." */
904 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
906 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
908 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
913 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
918 for (i
= sos
; i
<= eos
; i
++)
920 if (pcls
[i
] == B
|| pcls
[i
] == S
)
923 while (i
> sos
&& j
>= sos
&&
924 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
925 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
926 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
927 plevel
[j
--] = baselevel
;
928 plevel
[i
] = baselevel
;
930 else if (pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
|| pcls
[i
] == RLO
||
931 pcls
[i
] == PDF
|| pcls
[i
] == BN
)
933 plevel
[i
] = i
? plevel
[i
- 1] : baselevel
;
936 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
937 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
938 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
941 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
942 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
943 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
944 plevel
[j
--] = baselevel
;
949 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, const WORD
*pLevel
,
950 const WCHAR
*string
, unsigned int uCount
, struct list
*set
)
952 int run_start
, run_end
, i
;
955 IsolatedRun
*current_isolated
;
957 if (!(runs
= heap_calloc(uCount
, sizeof(*runs
))))
964 while (run_start
< uCount
)
966 run_end
= nextValidChar(pcls
, run_start
, uCount
);
967 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
969 runs
[run_count
].start
= run_start
;
970 runs
[run_count
].end
= run_end
;
971 runs
[run_count
].e
= pLevel
[run_start
];
972 run_start
= nextValidChar(pcls
, run_end
, uCount
);
976 /* Build Isolating Runs */
978 while (i
< run_count
)
981 if (runs
[k
].start
>= 0)
983 int type_fence
, real_end
;
986 if (!(current_isolated
= heap_alloc(FIELD_OFFSET(IsolatedRun
, item
[uCount
]))))
989 run_start
= runs
[k
].start
;
990 current_isolated
->e
= runs
[k
].e
;
991 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
993 for (j
= 0; j
< current_isolated
->length
; j
++)
995 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
996 current_isolated
->item
[j
].ch
= string
[runs
[k
].start
+ j
];
999 run_end
= runs
[k
].end
;
1001 TRACE("{ [%i -- %i]",run_start
, run_end
);
1003 if (pcls
[run_end
] == BN
)
1004 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1006 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1010 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1011 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1020 int l
= current_isolated
->length
;
1022 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1023 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1025 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1026 current_isolated
->item
[l
].ch
= string
[runs
[j
].start
+ m
];
1029 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1031 run_end
= runs
[j
].end
;
1032 if (pcls
[run_end
] == BN
)
1033 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1044 type_fence
= previousValidChar(pcls
, run_start
, -1);
1046 if (type_fence
== -1)
1047 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1049 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1051 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1053 if (run_end
== uCount
)
1054 current_isolated
->eos
= current_isolated
->sos
;
1057 /* eos could be an BN */
1058 if ( pcls
[run_end
] == BN
)
1060 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1061 if (real_end
< run_start
)
1062 real_end
= run_start
;
1067 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1068 if (type_fence
== uCount
)
1069 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1071 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1073 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1076 list_add_tail(set
, ¤t_isolated
->entry
);
1077 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1085 /*************************************************************
1086 * BIDI_DetermineLevels
1088 BOOL
BIDI_DetermineLevels(
1089 const WCHAR
*lpString
, /* [in] The string for which information is to be returned */
1090 unsigned int uCount
, /* [in] Number of WCHARs in string. */
1091 const SCRIPT_STATE
*s
,
1092 const SCRIPT_CONTROL
*c
,
1093 WORD
*lpOutLevels
, /* [out] final string levels */
1094 WORD
*lpOutOverrides
/* [out] final string overrides */
1098 unsigned baselevel
= 0;
1099 struct list IsolatingRuns
;
1100 IsolatedRun
*iso_run
, *next
;
1102 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1104 if (!(chartype
= heap_alloc(uCount
* sizeof(*chartype
))))
1106 WARN("Out of memory\n");
1110 baselevel
= s
->uBidiLevel
;
1112 classify(lpString
, chartype
, uCount
, c
);
1113 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1115 memset(lpOutOverrides
, 0, sizeof(WORD
) * uCount
);
1117 /* resolve explicit */
1118 resolveExplicit(baselevel
, chartype
, lpOutLevels
, lpOutOverrides
, uCount
, s
->fOverrideDirection
);
1119 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1121 /* X10/BD13: Computer Isolating runs */
1122 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1124 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1126 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1129 resolveWeak(iso_run
);
1130 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1132 /* resolve neutrals */
1133 resolveNeutrals(iso_run
);
1134 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1136 list_remove(&iso_run
->entry
);
1140 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1141 /* resolveImplicit */
1142 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1144 /* resolveResolvedLevels*/
1145 classify(lpString
, chartype
, uCount
, c
);
1146 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1148 heap_free(chartype
);
1152 /* reverse cch indices */
1153 static void reverse(int *pidx
, int cch
)
1157 for (; ich
< --cch
; ich
++)
1160 pidx
[ich
] = pidx
[cch
];
1166 /*------------------------------------------------------------------------
1167 Functions: reorder/reorderLevel
1169 Recursively reorders the display string
1170 "From the highest level down, reverse all characters at that level and
1171 higher, down to the lowest odd level"
1173 Implements rule L2 of the Unicode bidi Algorithm.
1175 Input: Array of embedding levels
1177 Flag enabling reversal (set to false by initial caller)
1179 In/Out: Text to reorder
1181 Note: levels may exceed 15 resp. 61 on input.
1183 Rule L3 - reorder combining marks is not implemented here
1184 Rule L4 - glyph mirroring is implemented as a display option below
1186 Note: this should be applied a line at a time
1187 -------------------------------------------------------------------------*/
1188 int BIDI_ReorderV2lLevel(int level
, int *pIndices
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1192 /* true as soon as first odd level encountered */
1193 fReverse
= fReverse
|| odd(level
);
1195 for (; ich
< cch
; ich
++)
1197 if (plevel
[ich
] < level
)
1201 else if (plevel
[ich
] > level
)
1203 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndices
+ ich
, plevel
+ ich
,
1204 cch
- ich
, fReverse
) - 1;
1209 reverse(pIndices
, ich
);
1214 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1215 int BIDI_ReorderL2vLevel(int level
, int *pIndices
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1220 /* true as soon as first odd level encountered */
1221 fReverse
= fReverse
|| odd(level
);
1223 for (; ich
< cch
; ich
++)
1225 if (plevel
[ich
] < level
)
1227 else if (plevel
[ich
] > level
)
1232 reverse(pIndices
, ich
);
1238 for (; ich
< cch
; ich
++)
1239 if (plevel
[ich
] < level
)
1241 else if (plevel
[ich
] > level
)
1242 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndices
+ ich
, plevel
+ ich
,
1243 cch
- ich
, fReverse
) - 1;
1249 BOOL
BIDI_GetStrengths(const WCHAR
*string
, unsigned int count
, const SCRIPT_CONTROL
*c
, WORD
*strength
)
1253 classify(string
, strength
, count
, c
);
1254 for (i
= 0; i
< count
; i
++)
1256 switch (strength
[i
])
1265 strength
[i
] = BIDI_STRONG
;
1274 strength
[i
] = BIDI_WEAK
;
1280 default: /* Neutrals and NSM */
1281 strength
[i
] = BIDI_NEUTRAL
;