2 * Unicode normalization functions
4 * Copyright 2019 Huw Davies
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #include "wine/unicode.h"
23 extern WCHAR
wine_compose( const WCHAR
*str
) DECLSPEC_HIDDEN
;
24 extern unsigned int wine_decompose( int flags
, WCHAR ch
, WCHAR
*dst
, unsigned int dstlen
) DECLSPEC_HIDDEN
;
25 extern const unsigned short combining_class_table
[] DECLSPEC_HIDDEN
;
27 static BYTE
get_combining_class( WCHAR c
)
29 return combining_class_table
[combining_class_table
[combining_class_table
[c
>> 8] + ((c
>> 4) & 0xf)] + (c
& 0xf)];
32 static BOOL
is_starter( WCHAR c
)
34 return !get_combining_class( c
);
37 static BOOL
reorderable_pair( WCHAR c1
, WCHAR c2
)
41 /* reorderable if ccc1 > ccc2 > 0 */
42 ccc1
= get_combining_class( c1
);
43 if (ccc1
< 2) return FALSE
;
44 ccc2
= get_combining_class( c2
);
45 return ccc2
&& (ccc1
> ccc2
);
48 static void canonical_order_substring( WCHAR
*str
, unsigned int len
)
56 for (i
= 0; i
< len
- 1; i
++)
58 if (reorderable_pair( str
[i
], str
[i
+ 1] ))
69 /****************************************************************************
70 * canonical_order_string
72 * Reorder the string into canonical order - D108/D109.
74 * Starters (chars with combining class == 0) don't move, so look for continuous
75 * substrings of non-starters and only reorder those.
77 static void canonical_order_string( WCHAR
*str
, unsigned int len
)
79 unsigned int i
, next
= 0;
81 for (i
= 1; i
<= len
; i
++)
83 if (i
== len
|| is_starter( str
[i
] ))
85 if (i
> next
+ 1) /* at least two successive non-starters */
86 canonical_order_substring( str
+ next
, i
- next
);
92 unsigned int wine_decompose_string( int flags
, const WCHAR
*src
, unsigned int src_len
,
93 WCHAR
*dst
, unsigned int dst_len
)
95 unsigned int src_pos
, dst_pos
= 0, decomp_len
;
97 for (src_pos
= 0; src_pos
< src_len
; src_pos
++)
99 if (dst_pos
== dst_len
) return 0;
100 decomp_len
= wine_decompose( flags
, src
[src_pos
], dst
+ dst_pos
, dst_len
- dst_pos
);
101 if (decomp_len
== 0) return 0;
102 dst_pos
+= decomp_len
;
105 if (flags
& WINE_DECOMPOSE_REORDER
) canonical_order_string( dst
, dst_pos
);
109 static BOOL
is_blocked( WCHAR
*starter
, WCHAR
*ptr
)
111 if (ptr
== starter
+ 1) return FALSE
;
112 /* Because the string is already canonically ordered, the chars are blocked
113 only if the previous char's combining class is equal to the test char. */
114 if (get_combining_class( *(ptr
- 1) ) == get_combining_class( *ptr
)) return TRUE
;
118 unsigned int wine_compose_string( WCHAR
*str
, unsigned int len
)
120 unsigned int i
, last_starter
= len
;
123 for (i
= 0; i
< len
; i
++)
126 if (last_starter
== len
|| is_blocked( str
+ last_starter
, str
+ i
) || !(comp
= wine_compose( pair
)))
128 if (is_starter( str
[i
] ))
135 str
[last_starter
] = pair
[0] = comp
;
137 memmove( str
+ i
, str
+ i
+ 1, (len
- i
) * sizeof(WCHAR
) );