Release 8.2.
[wine.git] / dlls / ntdll / locale_private.h
blob40d6ab0004dee8b7ba649db0ecccb4b18cfe3cfa
1 /*
2 * Ntdll locale definitions
4 * Copyright 2019, 2022 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #ifndef __NTDLL_LOCALE_PRIVATE_H
22 #define __NTDLL_LOCALE_PRIVATE_H
24 #include "windef.h"
25 #include "winbase.h"
26 #include "winnls.h"
28 /* NLS codepage file format:
30 * header:
31 * WORD offset to cp2uni table in words
32 * WORD CodePage
33 * WORD MaximumCharacterSize
34 * BYTE[2] DefaultChar
35 * WORD UniDefaultChar
36 * WORD TransDefaultChar
37 * WORD TransUniDefaultChar
38 * BYTE[12] LeadByte
39 * cp2uni table:
40 * WORD offset to uni2cp table in words
41 * WORD[256] cp2uni table
42 * WORD glyph table size
43 * WORD[glyph_table_size] glyph table
44 * WORD number of lead byte ranges
45 * WORD[256] lead byte offsets in words
46 * WORD[leadbytes][256] cp2uni table for lead bytes
47 * uni2cp table:
48 * WORD 0 / 4
49 * BYTE[65536] / WORD[65536] uni2cp table
52 enum nls_section_type
54 NLS_SECTION_SORTKEYS = 9,
55 NLS_SECTION_CASEMAP = 10,
56 NLS_SECTION_CODEPAGE = 11,
57 NLS_SECTION_NORMALIZE = 12
60 /* NLS normalization file */
61 struct norm_table
63 WCHAR name[13]; /* 00 file name */
64 USHORT checksum[3]; /* 1a checksum? */
65 USHORT version[4]; /* 20 Unicode version */
66 USHORT form; /* 28 normalization form */
67 USHORT len_factor; /* 2a factor for length estimates */
68 USHORT unknown1; /* 2c */
69 USHORT decomp_size; /* 2e decomposition hash size */
70 USHORT comp_size; /* 30 composition hash size */
71 USHORT unknown2; /* 32 */
72 USHORT classes; /* 34 combining classes table offset */
73 USHORT props_level1; /* 36 char properties table level 1 offset */
74 USHORT props_level2; /* 38 char properties table level 2 offset */
75 USHORT decomp_hash; /* 3a decomposition hash table offset */
76 USHORT decomp_map; /* 3c decomposition character map table offset */
77 USHORT decomp_seq; /* 3e decomposition character sequences offset */
78 USHORT comp_hash; /* 40 composition hash table offset */
79 USHORT comp_seq; /* 42 composition character sequences offset */
80 /* BYTE[] combining class values */
81 /* BYTE[0x2200] char properties index level 1 */
82 /* BYTE[] char properties index level 2 */
83 /* WORD[] decomposition hash table */
84 /* WORD[] decomposition character map */
85 /* WORD[] decomposition character sequences */
86 /* WORD[] composition hash table */
87 /* WORD[] composition character sequences */
91 /* locale.nls file */
92 struct locale_nls_header
94 UINT ctypes;
95 UINT unknown1;
96 UINT unknown2;
97 UINT unknown3;
98 UINT locales;
99 UINT charmaps;
100 UINT geoids;
101 UINT scripts;
105 static inline WCHAR casemap_ascii( WCHAR ch )
107 if (ch >= 'a' && ch <= 'z') ch -= 'a' - 'A';
108 return ch;
112 static inline int get_utf16( const WCHAR *src, unsigned int srclen, unsigned int *ch )
114 if (IS_HIGH_SURROGATE( src[0] ))
116 if (srclen <= 1) return 0;
117 if (!IS_LOW_SURROGATE( src[1] )) return 0;
118 *ch = 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
119 return 2;
121 if (IS_LOW_SURROGATE( src[0] )) return 0;
122 *ch = src[0];
123 return 1;
127 static inline void put_utf16( WCHAR *dst, unsigned int ch )
129 if (ch >= 0x10000)
131 ch -= 0x10000;
132 dst[0] = 0xd800 | (ch >> 10);
133 dst[1] = 0xdc00 | (ch & 0x3ff);
135 else dst[0] = ch;
139 static inline unsigned int decode_utf8_char( unsigned char ch, const char **str, const char *strend )
141 /* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */
142 static const char utf8_length[128] =
144 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
145 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
146 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
147 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
148 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
149 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
150 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
151 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */
154 /* first byte mask depending on UTF-8 sequence length */
155 static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
157 unsigned int len = utf8_length[ch - 0x80];
158 unsigned int res = ch & utf8_mask[len];
159 const char *end = *str + len;
161 if (end > strend)
163 *str = end;
164 return ~0;
166 switch (len)
168 case 3:
169 if ((ch = end[-3] ^ 0x80) >= 0x40) break;
170 res = (res << 6) | ch;
171 (*str)++;
172 if (res < 0x10) break;
173 case 2:
174 if ((ch = end[-2] ^ 0x80) >= 0x40) break;
175 res = (res << 6) | ch;
176 if (res >= 0x110000 >> 6) break;
177 (*str)++;
178 if (res < 0x20) break;
179 if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
180 case 1:
181 if ((ch = end[-1] ^ 0x80) >= 0x40) break;
182 res = (res << 6) | ch;
183 (*str)++;
184 if (res < 0x80) break;
185 return res;
187 return ~0;
191 static inline void init_codepage_table( USHORT *ptr, CPTABLEINFO *info )
193 USHORT hdr_size = ptr[0];
195 info->CodePage = ptr[1];
196 info->MaximumCharacterSize = ptr[2];
197 info->DefaultChar = ptr[3];
198 info->UniDefaultChar = ptr[4];
199 info->TransDefaultChar = ptr[5];
200 info->TransUniDefaultChar = ptr[6];
201 memcpy( info->LeadByte, ptr + 7, sizeof(info->LeadByte) );
202 ptr += hdr_size;
204 info->WideCharTable = ptr + ptr[0] + 1;
205 info->MultiByteTable = ++ptr;
206 ptr += 256;
207 if (*ptr++) ptr += 256; /* glyph table */
208 info->DBCSRanges = ptr;
209 if (*ptr) /* dbcs ranges */
211 info->DBCSCodePage = 1;
212 info->DBCSOffsets = ptr + 1;
214 else
216 info->DBCSCodePage = 0;
217 info->DBCSOffsets = NULL;
222 static inline int compare_locale_names( const WCHAR *n1, const WCHAR *n2 )
224 for (;;)
226 WCHAR ch1 = casemap_ascii( *n1++ );
227 WCHAR ch2 = casemap_ascii( *n2++ );
228 if (ch1 == '_') ch1 = '-';
229 if (ch2 == '_') ch2 = '-';
230 if (!ch1 || ch1 != ch2) return ch1 - ch2;
235 static inline const NLS_LOCALE_LCNAME_INDEX *find_lcname_entry( const NLS_LOCALE_HEADER *header,
236 const WCHAR *name )
238 const WCHAR *strings = (const WCHAR *)((char *)header + header->strings_offset);
239 const NLS_LOCALE_LCNAME_INDEX *index = (const NLS_LOCALE_LCNAME_INDEX *)((char *)header + header->lcnames_offset);
240 int min = 0, max = header->nb_lcnames - 1;
242 if (!name) return NULL;
243 while (min <= max)
245 int res, pos = (min + max) / 2;
246 const WCHAR *str = strings + index[pos].name;
247 res = compare_locale_names( name, str + 1 );
248 if (res < 0) max = pos - 1;
249 else if (res > 0) min = pos + 1;
250 else return &index[pos];
252 return NULL;
256 static inline const NLS_LOCALE_LCID_INDEX *find_lcid_entry( const NLS_LOCALE_HEADER *header, LCID lcid )
258 const NLS_LOCALE_LCID_INDEX *index = (const NLS_LOCALE_LCID_INDEX *)((char *)header + header->lcids_offset);
259 int min = 0, max = header->nb_lcids - 1;
261 while (min <= max)
263 int pos = (min + max) / 2;
264 if (lcid < index[pos].id) max = pos - 1;
265 else if (lcid > index[pos].id) min = pos + 1;
266 else return &index[pos];
268 return NULL;
272 static inline const NLS_LOCALE_DATA *get_locale_data( const NLS_LOCALE_HEADER *header, UINT idx )
274 ULONG offset = header->locales_offset + idx * header->locale_size;
275 return (const NLS_LOCALE_DATA *)((const char *)header + offset);
279 static inline unsigned int cp_mbstowcs_size( const CPTABLEINFO *info, const char *str, unsigned int len )
281 unsigned int res;
283 if (!info->DBCSCodePage) return len;
285 for (res = 0; len; len--, str++, res++)
287 if (info->DBCSOffsets[(unsigned char)*str] && len > 1)
289 str++;
290 len--;
293 return res;
297 static inline unsigned int cp_wcstombs_size( const CPTABLEINFO *info, const WCHAR *str, unsigned int len )
299 if (info->DBCSCodePage)
301 WCHAR *uni2cp = info->WideCharTable;
302 unsigned int res;
304 for (res = 0; len; len--, str++, res++)
305 if (uni2cp[*str] & 0xff00) res++;
306 return res;
308 else return len;
312 static inline NTSTATUS utf8_wcstombs_size( const WCHAR *src, unsigned int srclen, unsigned int *reslen )
314 unsigned int val, len;
315 NTSTATUS status = STATUS_SUCCESS;
317 for (len = 0; srclen; srclen--, src++)
319 if (*src < 0x80) len++; /* 0x00-0x7f: 1 byte */
320 else if (*src < 0x800) len += 2; /* 0x80-0x7ff: 2 bytes */
321 else
323 if (!get_utf16( src, srclen, &val ))
325 val = 0xfffd;
326 status = STATUS_SOME_NOT_MAPPED;
328 if (val < 0x10000) len += 3; /* 0x800-0xffff: 3 bytes */
329 else /* 0x10000-0x10ffff: 4 bytes */
331 len += 4;
332 src++;
333 srclen--;
337 *reslen = len;
338 return status;
342 static inline NTSTATUS utf8_mbstowcs_size( const char *src, unsigned int srclen, unsigned int *reslen )
344 unsigned int res, len;
345 NTSTATUS status = STATUS_SUCCESS;
346 const char *srcend = src + srclen;
348 for (len = 0; src < srcend; len++)
350 unsigned char ch = *src++;
351 if (ch < 0x80) continue;
352 if ((res = decode_utf8_char( ch, &src, srcend )) > 0x10ffff)
353 status = STATUS_SOME_NOT_MAPPED;
354 else
355 if (res > 0xffff) len++;
357 *reslen = len;
358 return status;
362 static inline unsigned int cp_mbstowcs( const CPTABLEINFO *info, WCHAR *dst, unsigned int dstlen,
363 const char *src, unsigned int srclen )
365 unsigned int i, ret;
367 if (info->DBCSOffsets)
369 for (i = dstlen; srclen && i; i--, srclen--, src++, dst++)
371 USHORT off = info->DBCSOffsets[(unsigned char)*src];
372 if (off && srclen > 1)
374 src++;
375 srclen--;
376 *dst = info->DBCSOffsets[off + (unsigned char)*src];
378 else *dst = info->MultiByteTable[(unsigned char)*src];
380 ret = dstlen - i;
382 else
384 ret = min( srclen, dstlen );
385 for (i = 0; i < ret; i++) dst[i] = info->MultiByteTable[(unsigned char)src[i]];
387 return ret;
391 static inline unsigned int cp_wcstombs( const CPTABLEINFO *info, char *dst, unsigned int dstlen,
392 const WCHAR *src, unsigned int srclen )
394 unsigned int i, ret;
396 if (info->DBCSCodePage)
398 const WCHAR *uni2cp = info->WideCharTable;
400 for (i = dstlen; srclen && i; i--, srclen--, src++)
402 if (uni2cp[*src] & 0xff00)
404 if (i == 1) break; /* do not output a partial char */
405 i--;
406 *dst++ = uni2cp[*src] >> 8;
408 *dst++ = (char)uni2cp[*src];
410 ret = dstlen - i;
412 else
414 const char *uni2cp = info->WideCharTable;
415 ret = min( srclen, dstlen );
416 for (i = 0; i < ret; i++) dst[i] = uni2cp[src[i]];
418 return ret;
422 static inline NTSTATUS utf8_mbstowcs( WCHAR *dst, unsigned int dstlen, unsigned int *reslen,
423 const char *src, unsigned int srclen )
425 unsigned int res;
426 NTSTATUS status = STATUS_SUCCESS;
427 const char *srcend = src + srclen;
428 WCHAR *dstend = dst + dstlen;
430 while ((dst < dstend) && (src < srcend))
432 unsigned char ch = *src++;
433 if (ch < 0x80) /* special fast case for 7-bit ASCII */
435 *dst++ = ch;
436 continue;
438 if ((res = decode_utf8_char( ch, &src, srcend )) <= 0xffff)
440 *dst++ = res;
442 else if (res <= 0x10ffff) /* we need surrogates */
444 res -= 0x10000;
445 *dst++ = 0xd800 | (res >> 10);
446 if (dst == dstend) break;
447 *dst++ = 0xdc00 | (res & 0x3ff);
449 else
451 *dst++ = 0xfffd;
452 status = STATUS_SOME_NOT_MAPPED;
455 if (src < srcend) status = STATUS_BUFFER_TOO_SMALL; /* overflow */
456 *reslen = dstlen - (dstend - dst);
457 return status;
461 static inline NTSTATUS utf8_wcstombs( char *dst, unsigned int dstlen, unsigned int *reslen,
462 const WCHAR *src, unsigned int srclen )
464 char *end;
465 unsigned int val;
466 NTSTATUS status = STATUS_SUCCESS;
468 for (end = dst + dstlen; srclen; srclen--, src++)
470 WCHAR ch = *src;
472 if (ch < 0x80) /* 0x00-0x7f: 1 byte */
474 if (dst > end - 1) break;
475 *dst++ = ch;
476 continue;
478 if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
480 if (dst > end - 2) break;
481 dst[1] = 0x80 | (ch & 0x3f);
482 ch >>= 6;
483 dst[0] = 0xc0 | ch;
484 dst += 2;
485 continue;
487 if (!get_utf16( src, srclen, &val ))
489 val = 0xfffd;
490 status = STATUS_SOME_NOT_MAPPED;
492 if (val < 0x10000) /* 0x800-0xffff: 3 bytes */
494 if (dst > end - 3) break;
495 dst[2] = 0x80 | (val & 0x3f);
496 val >>= 6;
497 dst[1] = 0x80 | (val & 0x3f);
498 val >>= 6;
499 dst[0] = 0xe0 | val;
500 dst += 3;
502 else /* 0x10000-0x10ffff: 4 bytes */
504 if (dst > end - 4) break;
505 dst[3] = 0x80 | (val & 0x3f);
506 val >>= 6;
507 dst[2] = 0x80 | (val & 0x3f);
508 val >>= 6;
509 dst[1] = 0x80 | (val & 0x3f);
510 val >>= 6;
511 dst[0] = 0xf0 | val;
512 dst += 4;
513 src++;
514 srclen--;
517 if (srclen) status = STATUS_BUFFER_TOO_SMALL;
518 *reslen = dstlen - (end - dst);
519 return status;
523 #define HANGUL_SBASE 0xac00
524 #define HANGUL_LBASE 0x1100
525 #define HANGUL_VBASE 0x1161
526 #define HANGUL_TBASE 0x11a7
527 #define HANGUL_LCOUNT 19
528 #define HANGUL_VCOUNT 21
529 #define HANGUL_TCOUNT 28
530 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
531 #define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT)
533 static inline const WCHAR *get_decomposition( const struct norm_table *info, unsigned int ch,
534 BYTE props, WCHAR *buffer, unsigned int *ret_len )
536 const struct pair { WCHAR src; USHORT dst; } *pairs;
537 const USHORT *hash_table = (const USHORT *)info + info->decomp_hash;
538 const WCHAR *ret;
539 unsigned int i, pos, end, len, hash;
541 /* default to no decomposition */
542 put_utf16( buffer, ch );
543 *ret_len = 1 + (ch >= 0x10000);
544 if (!props || props == 0x7f) return buffer;
546 if (props == 0xff) /* Hangul or invalid char */
548 if (ch >= HANGUL_SBASE && ch < HANGUL_SBASE + HANGUL_SCOUNT)
550 unsigned short sindex = ch - HANGUL_SBASE;
551 unsigned short tindex = sindex % HANGUL_TCOUNT;
552 buffer[0] = HANGUL_LBASE + sindex / HANGUL_NCOUNT;
553 buffer[1] = HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
554 if (tindex) buffer[2] = HANGUL_TBASE + tindex;
555 *ret_len = 2 + !!tindex;
556 return buffer;
558 /* ignore other chars in Hangul range */
559 if (ch >= HANGUL_LBASE && ch < HANGUL_LBASE + 0x100) return buffer;
560 if (ch >= HANGUL_SBASE && ch < HANGUL_SBASE + 0x2c00) return buffer;
561 return NULL;
564 hash = ch % info->decomp_size;
565 pos = hash_table[hash];
566 if (pos >> 13)
568 if (props != 0xbf) return buffer;
569 ret = (const USHORT *)info + info->decomp_seq + (pos & 0x1fff);
570 len = pos >> 13;
572 else
574 pairs = (const struct pair *)((const USHORT *)info + info->decomp_map);
576 /* find the end of the hash bucket */
577 for (i = hash + 1; i < info->decomp_size; i++) if (!(hash_table[i] >> 13)) break;
578 if (i < info->decomp_size) end = hash_table[i];
579 else for (end = pos; pairs[end].src; end++) ;
581 for ( ; pos < end; pos++)
583 if (pairs[pos].src != (WCHAR)ch) continue;
584 ret = (const USHORT *)info + info->decomp_seq + (pairs[pos].dst & 0x1fff);
585 len = pairs[pos].dst >> 13;
586 break;
588 if (pos >= end) return buffer;
591 if (len == 7) while (ret[len]) len++;
592 if (!ret[0]) len = 0; /* ignored char */
593 *ret_len = len;
594 return ret;
598 static inline BYTE rol( BYTE val, BYTE count )
600 return (val << count) | (val >> (8 - count));
604 static inline BYTE get_char_props( const struct norm_table *info, unsigned int ch )
606 const BYTE *level1 = (const BYTE *)((const USHORT *)info + info->props_level1);
607 const BYTE *level2 = (const BYTE *)((const USHORT *)info + info->props_level2);
608 BYTE off = level1[ch / 128];
610 if (!off || off >= 0xfb) return rol( off, 5 );
611 return level2[(off - 1) * 128 + ch % 128];
615 static inline BYTE get_combining_class( const struct norm_table *info, unsigned int c )
617 const BYTE *classes = (const BYTE *)((const USHORT *)info + info->classes);
618 BYTE class = get_char_props( info, c ) & 0x3f;
620 if (class == 0x3f) return 0;
621 return classes[class];
625 static inline BOOL reorderable_pair( const struct norm_table *info, unsigned int c1, unsigned int c2 )
627 BYTE ccc1, ccc2;
629 /* reorderable if ccc1 > ccc2 > 0 */
630 ccc1 = get_combining_class( info, c1 );
631 if (ccc1 < 2) return FALSE;
632 ccc2 = get_combining_class( info, c2 );
633 return ccc2 && (ccc1 > ccc2);
636 static inline void canonical_order_substring( const struct norm_table *info, WCHAR *str, unsigned int len )
638 unsigned int i, ch1, ch2, len1, len2;
639 BOOL swapped;
643 swapped = FALSE;
644 for (i = 0; i < len - 1; i += len1)
646 if (!(len1 = get_utf16( str + i, len - i, &ch1 ))) break;
647 if (i + len1 >= len) break;
648 if (!(len2 = get_utf16( str + i + len1, len - i - len1, &ch2 ))) break;
649 if (reorderable_pair( info, ch1, ch2 ))
651 WCHAR tmp[2];
652 memcpy( tmp, str + i, len1 * sizeof(WCHAR) );
653 memcpy( str + i, str + i + len1, len2 * sizeof(WCHAR) );
654 memcpy( str + i + len2, tmp, len1 * sizeof(WCHAR) );
655 swapped = TRUE;
656 i += len2 - len1;
659 } while (swapped);
663 /* reorder the string into canonical order - D108/D109 */
664 static inline void canonical_order_string( const struct norm_table *info, WCHAR *str, unsigned int len )
666 unsigned int ch, i, r, next = 0;
668 for (i = 0; i < len; i += r)
670 if (!(r = get_utf16( str + i, len - i, &ch ))) return;
671 if (i && !get_combining_class( info, ch ))
673 if (i > next + 1) /* at least two successive non-starters */
674 canonical_order_substring( info, str + next, i - next );
675 next = i + r;
678 if (i > next + 1) canonical_order_substring( info, str + next, i - next );
682 static inline NTSTATUS decompose_string( const struct norm_table *info, const WCHAR *src, int src_len,
683 WCHAR *dst, int *dst_len )
685 BYTE props;
686 int src_pos, dst_pos;
687 unsigned int ch, len, decomp_len;
688 WCHAR buffer[3];
689 const WCHAR *decomp;
691 for (src_pos = dst_pos = 0; src_pos < src_len; src_pos += len)
693 if (!(len = get_utf16( src + src_pos, src_len - src_pos, &ch )))
695 *dst_len = src_pos + IS_HIGH_SURROGATE( src[src_pos] );
696 return STATUS_NO_UNICODE_TRANSLATION;
698 props = get_char_props( info, ch );
699 if (!(decomp = get_decomposition( info, ch, props, buffer, &decomp_len )))
701 /* allow final null */
702 if (!ch && src_pos == src_len - 1 && dst_pos < *dst_len)
704 dst[dst_pos++] = 0;
705 break;
707 *dst_len = src_pos;
708 return STATUS_NO_UNICODE_TRANSLATION;
710 if (dst_pos + decomp_len > *dst_len)
712 *dst_len += (src_len - src_pos) * info->len_factor;
713 return STATUS_BUFFER_TOO_SMALL;
715 memcpy( dst + dst_pos, decomp, decomp_len * sizeof(WCHAR) );
716 dst_pos += decomp_len;
719 canonical_order_string( info, dst, dst_pos );
720 *dst_len = dst_pos;
721 return STATUS_SUCCESS;
725 static inline unsigned int compose_hangul( unsigned int ch1, unsigned int ch2 )
727 if (ch1 >= HANGUL_LBASE && ch1 < HANGUL_LBASE + HANGUL_LCOUNT)
729 int lindex = ch1 - HANGUL_LBASE;
730 int vindex = ch2 - HANGUL_VBASE;
731 if (vindex >= 0 && vindex < HANGUL_VCOUNT)
732 return HANGUL_SBASE + (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT;
734 if (ch1 >= HANGUL_SBASE && ch1 < HANGUL_SBASE + HANGUL_SCOUNT)
736 int sindex = ch1 - HANGUL_SBASE;
737 if (!(sindex % HANGUL_TCOUNT))
739 int tindex = ch2 - HANGUL_TBASE;
740 if (tindex > 0 && tindex < HANGUL_TCOUNT) return ch1 + tindex;
743 return 0;
747 static inline unsigned int compose_chars( const struct norm_table *info, unsigned int ch1, unsigned int ch2 )
749 const USHORT *table = (const USHORT *)info + info->comp_hash;
750 const WCHAR *chars = (const USHORT *)info + info->comp_seq;
751 unsigned int hash, start, end, i, len, ch[3];
753 hash = (ch1 + 95 * ch2) % info->comp_size;
754 start = table[hash];
755 end = table[hash + 1];
756 while (start < end)
758 for (i = 0; i < 3; i++, start += len) len = get_utf16( chars + start, end - start, ch + i );
759 if (ch[0] == ch1 && ch[1] == ch2) return ch[2];
761 return 0;
765 static inline unsigned int compose_string( const struct norm_table *info, WCHAR *str, unsigned int srclen )
767 unsigned int i, ch, comp, len, start_ch = 0, last_starter = srclen;
768 BYTE class, prev_class = 0;
770 for (i = 0; i < srclen; i += len)
772 if (!(len = get_utf16( str + i, srclen - i, &ch ))) return 0;
773 class = get_combining_class( info, ch );
774 if (last_starter == srclen || (prev_class && prev_class >= class) ||
775 (!(comp = compose_hangul( start_ch, ch )) &&
776 !(comp = compose_chars( info, start_ch, ch ))))
778 if (!class)
780 last_starter = i;
781 start_ch = ch;
783 prev_class = class;
785 else
787 int comp_len = 1 + (comp >= 0x10000);
788 int start_len = 1 + (start_ch >= 0x10000);
790 if (comp_len != start_len)
791 memmove( str + last_starter + comp_len, str + last_starter + start_len,
792 (i - (last_starter + start_len)) * sizeof(WCHAR) );
793 memmove( str + i + comp_len - start_len, str + i + len, (srclen - i - len) * sizeof(WCHAR) );
794 srclen += comp_len - start_len - len;
795 start_ch = comp;
796 i = last_starter;
797 len = comp_len;
798 prev_class = 0;
799 put_utf16( str + i, comp );
802 return srclen;
806 #endif /* __NTDLL_LOCALE_PRIVATE_H */