gdi32: ntmCellHeight and ntmAvgWidth should be in font units.
[wine/multimedia.git] / libs / wine / sortkey.c
blob17b55374c510bbed4bd3b3cfa061bc0c252f6cbc
1 /*
2 * Unicode sort key generation
4 * Copyright 2003 Dmitry Timoshkov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 #include "wine/unicode.h"
22 extern int get_decomposition(WCHAR src, WCHAR *dst, unsigned int dstlen);
23 extern const unsigned int collation_table[];
26 * flags - normalization NORM_* flags
28 * FIXME: 'variable' flag not handled
30 int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
32 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
33 int key_len[4];
34 char *key_ptr[4];
35 const WCHAR *src_save = src;
36 int srclen_save = srclen;
38 key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
39 for (; srclen; srclen--, src++)
41 int decomposed_len = 1;/*get_decomposition(*src, dummy, 4);*/
42 dummy[0] = *src;
43 if (decomposed_len)
45 int i;
46 for (i = 0; i < decomposed_len; i++)
48 WCHAR wch = dummy[i];
49 unsigned int ce;
51 /* tests show that win2k just ignores NORM_IGNORENONSPACE,
52 * and skips white space and punctuation characters for
53 * NORM_IGNORESYMBOLS.
55 if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
56 continue;
58 if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
60 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
61 if (ce != (unsigned int)-1)
63 if (ce >> 16) key_len[0] += 2;
64 if ((ce >> 8) & 0xff) key_len[1]++;
65 if ((ce >> 4) & 0x0f) key_len[2]++;
66 if (ce & 1)
68 if (wch >> 8) key_len[3]++;
69 key_len[3]++;
72 else
74 key_len[0] += 2;
75 if (wch >> 8) key_len[0]++;
76 if (wch & 0xff) key_len[0]++;
82 if (!dstlen) /* compute length */
83 /* 4 * '\1' + 1 * '\0' + key length */
84 return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1;
86 if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
87 return 0; /* overflow */
89 src = src_save;
90 srclen = srclen_save;
92 key_ptr[0] = dst;
93 key_ptr[1] = key_ptr[0] + key_len[0] + 1;
94 key_ptr[2] = key_ptr[1] + key_len[1] + 1;
95 key_ptr[3] = key_ptr[2] + key_len[2] + 1;
97 for (; srclen; srclen--, src++)
99 int decomposed_len = 1;/*get_decomposition(*src, dummy, 4);*/
100 dummy[0] = *src;
101 if (decomposed_len)
103 int i;
104 for (i = 0; i < decomposed_len; i++)
106 WCHAR wch = dummy[i];
107 unsigned int ce;
109 /* tests show that win2k just ignores NORM_IGNORENONSPACE,
110 * and skips white space and punctuation characters for
111 * NORM_IGNORESYMBOLS.
113 if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
114 continue;
116 if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
118 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
119 if (ce != (unsigned int)-1)
121 WCHAR key;
122 if ((key = ce >> 16))
124 *key_ptr[0]++ = key >> 8;
125 *key_ptr[0]++ = key & 0xff;
127 /* make key 1 start from 2 */
128 if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
129 /* make key 2 start from 2 */
130 if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
131 /* key 3 is always a character code */
132 if (ce & 1)
134 if (wch >> 8) *key_ptr[3]++ = wch >> 8;
135 if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
138 else
140 *key_ptr[0]++ = 0xff;
141 *key_ptr[0]++ = 0xfe;
142 if (wch >> 8) *key_ptr[0]++ = wch >> 8;
143 if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
149 *key_ptr[0] = '\1';
150 *key_ptr[1] = '\1';
151 *key_ptr[2] = '\1';
152 *key_ptr[3]++ = '\1';
153 *key_ptr[3] = 0;
155 return key_ptr[3] - dst;
158 static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1,
159 const WCHAR *str2, int len2)
161 unsigned int ce1, ce2;
162 int ret;
164 /* 32-bit collation element table format:
165 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
166 * case weight - high 4 bit of low 8 bit.
168 while (len1 > 0 && len2 > 0)
170 if (flags & NORM_IGNORESYMBOLS)
172 int skip = 0;
173 /* FIXME: not tested */
174 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
176 str1++;
177 len1--;
178 skip = 1;
180 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
182 str2++;
183 len2--;
184 skip = 1;
186 if (skip) continue;
189 /* hyphen and apostrophe are treated differently depending on
190 * whether SORT_STRINGSORT specified or not
192 if (!(flags & SORT_STRINGSORT))
194 if (*str1 == '-' || *str1 == '\'')
196 if (*str2 != '-' && *str2 != '\'')
198 str1++;
199 len1--;
200 continue;
203 else if (*str2 == '-' || *str2 == '\'')
205 str2++;
206 len2--;
207 continue;
211 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
212 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
214 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
215 ret = (ce1 >> 16) - (ce2 >> 16);
216 else
217 ret = *str1 - *str2;
219 if (ret) return ret;
221 str1++;
222 str2++;
223 len1--;
224 len2--;
226 return len1 - len2;
229 static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int len1,
230 const WCHAR *str2, int len2)
232 unsigned int ce1, ce2;
233 int ret;
235 /* 32-bit collation element table format:
236 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
237 * case weight - high 4 bit of low 8 bit.
239 while (len1 > 0 && len2 > 0)
241 if (flags & NORM_IGNORESYMBOLS)
243 int skip = 0;
244 /* FIXME: not tested */
245 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
247 str1++;
248 len1--;
249 skip = 1;
251 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
253 str2++;
254 len2--;
255 skip = 1;
257 if (skip) continue;
260 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
261 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
263 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
264 ret = ((ce1 >> 8) & 0xff) - ((ce2 >> 8) & 0xff);
265 else
266 ret = *str1 - *str2;
268 if (ret) return ret;
270 str1++;
271 str2++;
272 len1--;
273 len2--;
275 return len1 - len2;
278 static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,
279 const WCHAR *str2, int len2)
281 unsigned int ce1, ce2;
282 int ret;
284 /* 32-bit collation element table format:
285 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
286 * case weight - high 4 bit of low 8 bit.
288 while (len1 > 0 && len2 > 0)
290 if (flags & NORM_IGNORESYMBOLS)
292 int skip = 0;
293 /* FIXME: not tested */
294 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
296 str1++;
297 len1--;
298 skip = 1;
300 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
302 str2++;
303 len2--;
304 skip = 1;
306 if (skip) continue;
309 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
310 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
312 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
313 ret = ((ce1 >> 4) & 0x0f) - ((ce2 >> 4) & 0x0f);
314 else
315 ret = *str1 - *str2;
317 if (ret) return ret;
319 str1++;
320 str2++;
321 len1--;
322 len2--;
324 return len1 - len2;
327 static inline int real_length(const WCHAR *str, int len)
329 while (len && !str[len - 1]) len--;
330 return len;
333 int wine_compare_string(int flags, const WCHAR *str1, int len1,
334 const WCHAR *str2, int len2)
336 int ret;
338 len1 = real_length(str1, len1);
339 len2 = real_length(str2, len2);
341 ret = compare_unicode_weights(flags, str1, len1, str2, len2);
342 if (!ret)
344 if (!(flags & NORM_IGNORENONSPACE))
345 ret = compare_diacritic_weights(flags, str1, len1, str2, len2);
346 if (!ret && !(flags & NORM_IGNORECASE))
347 ret = compare_case_weights(flags, str1, len1, str2, len2);
349 return ret;