dbghelp/dwarf: Don't unmap the fmap of a DWZ module twice.
[wine.git] / dlls / ntdll / locale.c
blobfe185d403f29b203780317f2622fc0dcf69a08c8
1 /*
2 * Locale functions
4 * Copyright 2004, 2019 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define NONAMELESSUNION
23 #include <stdarg.h>
24 #include <string.h>
25 #include <stdlib.h>
27 #include "ntstatus.h"
28 #define WIN32_NO_STATUS
29 #include "windef.h"
30 #include "winbase.h"
31 #include "winnls.h"
32 #include "ntdll_misc.h"
33 #include "wine/debug.h"
35 WINE_DEFAULT_DEBUG_CHANNEL(nls);
37 /* NLS codepage file format:
39 * header:
40 * WORD offset to cp2uni table in words
41 * WORD CodePage
42 * WORD MaximumCharacterSize
43 * BYTE[2] DefaultChar
44 * WORD UniDefaultChar
45 * WORD TransDefaultChar
46 * WORD TransUniDefaultChar
47 * BYTE[12] LeadByte
48 * cp2uni table:
49 * WORD offset to uni2cp table in words
50 * WORD[256] cp2uni table
51 * WORD glyph table size
52 * WORD[glyph_table_size] glyph table
53 * WORD number of lead byte ranges
54 * WORD[256] lead byte offsets in words
55 * WORD[leadbytes][256] cp2uni table for lead bytes
56 * uni2cp table:
57 * WORD 0 / 4
58 * BYTE[65536] / WORD[65536] uni2cp table
61 enum nls_section_type
63 NLS_SECTION_SORTKEYS = 9,
64 NLS_SECTION_CASEMAP = 10,
65 NLS_SECTION_CODEPAGE = 11,
66 NLS_SECTION_NORMALIZE = 12
69 UINT NlsAnsiCodePage = 0;
70 BYTE NlsMbCodePageTag = 0;
71 BYTE NlsMbOemCodePageTag = 0;
73 /* NLS normalization file */
74 struct norm_table
76 WCHAR name[13]; /* 00 file name */
77 USHORT checksum[3]; /* 1a checksum? */
78 USHORT version[4]; /* 20 Unicode version */
79 USHORT form; /* 28 normalization form */
80 USHORT len_factor; /* 2a factor for length estimates */
81 USHORT unknown1; /* 2c */
82 USHORT decomp_size; /* 2e decomposition hash size */
83 USHORT comp_size; /* 30 composition hash size */
84 USHORT unknown2; /* 32 */
85 USHORT classes; /* 34 combining classes table offset */
86 USHORT props_level1; /* 36 char properties table level 1 offset */
87 USHORT props_level2; /* 38 char properties table level 2 offset */
88 USHORT decomp_hash; /* 3a decomposition hash table offset */
89 USHORT decomp_map; /* 3c decomposition character map table offset */
90 USHORT decomp_seq; /* 3e decomposition character sequences offset */
91 USHORT comp_hash; /* 40 composition hash table offset */
92 USHORT comp_seq; /* 42 composition character sequences offset */
93 /* BYTE[] combining class values */
94 /* BYTE[0x2200] char properties index level 1 */
95 /* BYTE[] char properties index level 2 */
96 /* WORD[] decomposition hash table */
97 /* WORD[] decomposition character map */
98 /* WORD[] decomposition character sequences */
99 /* WORD[] composition hash table */
100 /* WORD[] composition character sequences */
103 static NLSTABLEINFO nls_info;
104 static struct norm_table *norm_tables[16];
107 static NTSTATUS load_string( ULONG id, LANGID lang, WCHAR *buffer, ULONG len )
109 const IMAGE_RESOURCE_DATA_ENTRY *data;
110 LDR_RESOURCE_INFO info;
111 NTSTATUS status;
112 WCHAR *p;
113 int i;
115 info.Type = 6; /* RT_STRING */
116 info.Name = (id >> 4) + 1;
117 info.Language = lang;
118 if ((status = LdrFindResource_U( kernel32_handle, &info, 3, &data ))) return status;
119 p = (WCHAR *)((char *)kernel32_handle + data->OffsetToData);
120 for (i = 0; i < (id & 0x0f); i++) p += *p + 1;
121 if (*p >= len) return STATUS_BUFFER_TOO_SMALL;
122 memcpy( buffer, p + 1, *p * sizeof(WCHAR) );
123 buffer[*p] = 0;
124 return STATUS_SUCCESS;
128 static DWORD mbtowc_size( const CPTABLEINFO *info, LPCSTR str, UINT len )
130 DWORD res;
132 if (!info->DBCSCodePage) return len;
134 for (res = 0; len; len--, str++, res++)
136 if (info->DBCSOffsets[(unsigned char)*str] && len > 1)
138 str++;
139 len--;
142 return res;
146 static DWORD wctomb_size( const CPTABLEINFO *info, LPCWSTR str, UINT len )
148 if (info->DBCSCodePage)
150 WCHAR *uni2cp = info->WideCharTable;
151 DWORD res;
153 for (res = 0; len; len--, str++, res++)
154 if (uni2cp[*str] & 0xff00) res++;
155 return res;
157 else return len;
161 static WCHAR casemap( USHORT *table, WCHAR ch )
163 return ch + table[table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0x0f)];
167 static WCHAR casemap_ascii( WCHAR ch )
169 if (ch >= 'a' && ch <= 'z') ch -= 'a' - 'A';
170 return ch;
174 static int get_utf16( const WCHAR *src, unsigned int srclen, unsigned int *ch )
176 if (IS_HIGH_SURROGATE( src[0] ))
178 if (srclen <= 1) return 0;
179 if (!IS_LOW_SURROGATE( src[1] )) return 0;
180 *ch = 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
181 return 2;
183 if (IS_LOW_SURROGATE( src[0] )) return 0;
184 *ch = src[0];
185 return 1;
188 static void put_utf16( WCHAR *dst, unsigned int ch )
190 if (ch >= 0x10000)
192 ch -= 0x10000;
193 dst[0] = 0xd800 | (ch >> 10);
194 dst[1] = 0xdc00 | (ch & 0x3ff);
196 else dst[0] = ch;
200 static NTSTATUS load_norm_table( ULONG form, const struct norm_table **info )
202 unsigned int i;
203 USHORT *data, *tables;
204 SIZE_T size;
205 NTSTATUS status;
207 if (!form) return STATUS_INVALID_PARAMETER;
208 if (form >= ARRAY_SIZE(norm_tables)) return STATUS_OBJECT_NAME_NOT_FOUND;
210 if (!norm_tables[form])
212 if ((status = NtGetNlsSectionPtr( NLS_SECTION_NORMALIZE, form, NULL, (void **)&data, &size )))
213 return status;
215 /* sanity checks */
217 if (size <= 0x44) goto invalid;
218 if (data[0x14] != form) goto invalid;
219 tables = data + 0x1a;
220 for (i = 0; i < 8; i++)
222 if (tables[i] > size / sizeof(USHORT)) goto invalid;
223 if (i && tables[i] < tables[i-1]) goto invalid;
226 if (InterlockedCompareExchangePointer( (void **)&norm_tables[form], data, NULL ))
227 NtUnmapViewOfSection( GetCurrentProcess(), data );
229 *info = norm_tables[form];
230 return STATUS_SUCCESS;
232 invalid:
233 NtUnmapViewOfSection( GetCurrentProcess(), data );
234 return STATUS_INVALID_PARAMETER;
238 static BYTE rol( BYTE val, BYTE count )
240 return (val << count) | (val >> (8 - count));
244 static BYTE get_char_props( const struct norm_table *info, unsigned int ch )
246 const BYTE *level1 = (const BYTE *)((const USHORT *)info + info->props_level1);
247 const BYTE *level2 = (const BYTE *)((const USHORT *)info + info->props_level2);
248 BYTE off = level1[ch / 128];
250 if (!off || off >= 0xfb) return rol( off, 5 );
251 return level2[(off - 1) * 128 + ch % 128];
255 #define HANGUL_SBASE 0xac00
256 #define HANGUL_LBASE 0x1100
257 #define HANGUL_VBASE 0x1161
258 #define HANGUL_TBASE 0x11a7
259 #define HANGUL_LCOUNT 19
260 #define HANGUL_VCOUNT 21
261 #define HANGUL_TCOUNT 28
262 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
263 #define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT)
265 static const WCHAR *get_decomposition( const struct norm_table *info, unsigned int ch,
266 BYTE props, WCHAR *buffer, unsigned int *ret_len )
268 const struct pair { WCHAR src; USHORT dst; } *pairs;
269 const USHORT *hash_table = (const USHORT *)info + info->decomp_hash;
270 const WCHAR *ret;
271 unsigned int i, pos, end, len, hash;
273 /* default to no decomposition */
274 put_utf16( buffer, ch );
275 *ret_len = 1 + (ch >= 0x10000);
276 if (!props || props == 0x7f) return buffer;
278 if (props == 0xff) /* Hangul or invalid char */
280 if (ch >= HANGUL_SBASE && ch < HANGUL_SBASE + HANGUL_SCOUNT)
282 unsigned short sindex = ch - HANGUL_SBASE;
283 unsigned short tindex = sindex % HANGUL_TCOUNT;
284 buffer[0] = HANGUL_LBASE + sindex / HANGUL_NCOUNT;
285 buffer[1] = HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
286 if (tindex) buffer[2] = HANGUL_TBASE + tindex;
287 *ret_len = 2 + !!tindex;
288 return buffer;
290 /* ignore other chars in Hangul range */
291 if (ch >= HANGUL_LBASE && ch < HANGUL_LBASE + 0x100) return buffer;
292 if (ch >= HANGUL_SBASE && ch < HANGUL_SBASE + 0x2c00) return buffer;
293 return NULL;
296 hash = ch % info->decomp_size;
297 pos = hash_table[hash];
298 if (pos >> 13)
300 if (props != 0xbf) return buffer;
301 ret = (const USHORT *)info + info->decomp_seq + (pos & 0x1fff);
302 len = pos >> 13;
304 else
306 pairs = (const struct pair *)((const USHORT *)info + info->decomp_map);
308 /* find the end of the hash bucket */
309 for (i = hash + 1; i < info->decomp_size; i++) if (!(hash_table[i] >> 13)) break;
310 if (i < info->decomp_size) end = hash_table[i];
311 else for (end = pos; pairs[end].src; end++) ;
313 for ( ; pos < end; pos++)
315 if (pairs[pos].src != (WCHAR)ch) continue;
316 ret = (const USHORT *)info + info->decomp_seq + (pairs[pos].dst & 0x1fff);
317 len = pairs[pos].dst >> 13;
318 break;
320 if (pos >= end) return buffer;
323 if (len == 7) while (ret[len]) len++;
324 if (!ret[0]) len = 0; /* ignored char */
325 *ret_len = len;
326 return ret;
330 static BYTE get_combining_class( const struct norm_table *info, unsigned int c )
332 const BYTE *classes = (const BYTE *)((const USHORT *)info + info->classes);
333 BYTE class = get_char_props( info, c ) & 0x3f;
335 if (class == 0x3f) return 0;
336 return classes[class];
340 static BOOL is_starter( const struct norm_table *info, unsigned int c )
342 return !get_combining_class( info, c );
346 static BOOL reorderable_pair( const struct norm_table *info, unsigned int c1, unsigned int c2 )
348 BYTE ccc1, ccc2;
350 /* reorderable if ccc1 > ccc2 > 0 */
351 ccc1 = get_combining_class( info, c1 );
352 if (ccc1 < 2) return FALSE;
353 ccc2 = get_combining_class( info, c2 );
354 return ccc2 && (ccc1 > ccc2);
357 static void canonical_order_substring( const struct norm_table *info, WCHAR *str, unsigned int len )
359 unsigned int i, ch1, ch2, len1, len2;
360 BOOL swapped;
364 swapped = FALSE;
365 for (i = 0; i < len - 1; i += len1)
367 if (!(len1 = get_utf16( str + i, len - i, &ch1 ))) break;
368 if (i + len1 >= len) break;
369 if (!(len2 = get_utf16( str + i + len1, len - i - len1, &ch2 ))) break;
370 if (reorderable_pair( info, ch1, ch2 ))
372 WCHAR tmp[2];
373 memcpy( tmp, str + i, len1 * sizeof(WCHAR) );
374 memcpy( str + i, str + i + len1, len2 * sizeof(WCHAR) );
375 memcpy( str + i + len2, tmp, len1 * sizeof(WCHAR) );
376 swapped = TRUE;
377 i += len2 - len1;
380 } while (swapped);
384 /****************************************************************************
385 * canonical_order_string
387 * Reorder the string into canonical order - D108/D109.
389 * Starters (chars with combining class == 0) don't move, so look for continuous
390 * substrings of non-starters and only reorder those.
392 static void canonical_order_string( const struct norm_table *info, WCHAR *str, unsigned int len )
394 unsigned int ch, i, r, next = 0;
396 for (i = 0; i < len; i += r)
398 if (!(r = get_utf16( str + i, len - i, &ch ))) return;
399 if (i && is_starter( info, ch ))
401 if (i > next + 1) /* at least two successive non-starters */
402 canonical_order_substring( info, str + next, i - next );
403 next = i + r;
406 if (i > next + 1) canonical_order_substring( info, str + next, i - next );
410 static NTSTATUS decompose_string( const struct norm_table *info, const WCHAR *src, int src_len,
411 WCHAR *dst, int *dst_len )
413 BYTE props;
414 int src_pos, dst_pos;
415 unsigned int ch, len, decomp_len;
416 WCHAR buffer[3];
417 const WCHAR *decomp;
419 for (src_pos = dst_pos = 0; src_pos < src_len; src_pos += len)
421 if (!(len = get_utf16( src + src_pos, src_len - src_pos, &ch )))
423 *dst_len = src_pos + IS_HIGH_SURROGATE( src[src_pos] );
424 return STATUS_NO_UNICODE_TRANSLATION;
426 props = get_char_props( info, ch );
427 if (!(decomp = get_decomposition( info, ch, props, buffer, &decomp_len )))
429 /* allow final null */
430 if (!ch && src_pos == src_len - 1 && dst_pos < *dst_len)
432 dst[dst_pos++] = 0;
433 break;
435 *dst_len = src_pos;
436 return STATUS_NO_UNICODE_TRANSLATION;
438 if (dst_pos + decomp_len > *dst_len)
440 *dst_len += (src_len - src_pos) * info->len_factor;
441 return STATUS_BUFFER_TOO_SMALL;
443 memcpy( dst + dst_pos, decomp, decomp_len * sizeof(WCHAR) );
444 dst_pos += decomp_len;
447 canonical_order_string( info, dst, dst_pos );
448 *dst_len = dst_pos;
449 return STATUS_SUCCESS;
453 static unsigned int compose_hangul( unsigned int ch1, unsigned int ch2 )
455 if (ch1 >= HANGUL_LBASE && ch1 < HANGUL_LBASE + HANGUL_LCOUNT)
457 int lindex = ch1 - HANGUL_LBASE;
458 int vindex = ch2 - HANGUL_VBASE;
459 if (vindex >= 0 && vindex < HANGUL_VCOUNT)
460 return HANGUL_SBASE + (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT;
462 if (ch1 >= HANGUL_SBASE && ch1 < HANGUL_SBASE + HANGUL_SCOUNT)
464 int sindex = ch1 - HANGUL_SBASE;
465 if (!(sindex % HANGUL_TCOUNT))
467 int tindex = ch2 - HANGUL_TBASE;
468 if (tindex > 0 && tindex < HANGUL_TCOUNT) return ch1 + tindex;
471 return 0;
475 static unsigned int compose_chars( const struct norm_table *info, unsigned int ch1, unsigned int ch2 )
477 const USHORT *table = (const USHORT *)info + info->comp_hash;
478 const WCHAR *chars = (const USHORT *)info + info->comp_seq;
479 unsigned int hash, start, end, i, len, ch[3];
481 hash = (ch1 + 95 * ch2) % info->comp_size;
482 start = table[hash];
483 end = table[hash + 1];
484 while (start < end)
486 for (i = 0; i < 3; i++, start += len) len = get_utf16( chars + start, end - start, ch + i );
487 if (ch[0] == ch1 && ch[1] == ch2) return ch[2];
489 return 0;
492 static unsigned int compose_string( const struct norm_table *info, WCHAR *str, unsigned int srclen )
494 unsigned int i, ch, comp, len, start_ch = 0, last_starter = srclen;
495 BYTE class, prev_class = 0;
497 for (i = 0; i < srclen; i += len)
499 if (!(len = get_utf16( str + i, srclen - i, &ch ))) return 0;
500 class = get_combining_class( info, ch );
501 if (last_starter == srclen || (prev_class && prev_class >= class) ||
502 (!(comp = compose_hangul( start_ch, ch )) &&
503 !(comp = compose_chars( info, start_ch, ch ))))
505 if (!class)
507 last_starter = i;
508 start_ch = ch;
510 prev_class = class;
512 else
514 int comp_len = 1 + (comp >= 0x10000);
515 int start_len = 1 + (start_ch >= 0x10000);
517 if (comp_len != start_len)
518 memmove( str + last_starter + comp_len, str + last_starter + start_len,
519 (i - (last_starter + start_len)) * sizeof(WCHAR) );
520 memmove( str + i + comp_len - start_len, str + i + len, (srclen - i - len) * sizeof(WCHAR) );
521 srclen += comp_len - start_len - len;
522 start_ch = comp;
523 i = last_starter;
524 len = comp_len;
525 prev_class = 0;
526 put_utf16( str + i, comp );
529 return srclen;
533 static NTSTATUS get_dummy_preferred_ui_language( DWORD flags, LANGID lang, ULONG *count,
534 WCHAR *buffer, ULONG *size )
536 WCHAR name[LOCALE_NAME_MAX_LENGTH + 2];
537 NTSTATUS status;
538 ULONG len;
540 FIXME("(0x%x %p %p %p) returning a dummy value (current locale)\n", flags, count, buffer, size);
542 status = load_string( (flags & MUI_LANGUAGE_ID) ? LOCALE_ILANGUAGE : LOCALE_SNAME,
543 lang, name, ARRAY_SIZE(name) );
544 if (status) return status;
546 len = wcslen( name ) + 2;
547 name[len - 1] = 0;
548 if (buffer)
550 if (len > *size)
552 *size = len;
553 return STATUS_BUFFER_TOO_SMALL;
555 memcpy( buffer, name, len * sizeof(WCHAR) );
557 *size = len;
558 *count = 1;
559 TRACE("returned variable content: %d, \"%s\", %d\n", *count, debugstr_w(buffer), *size);
560 return STATUS_SUCCESS;
564 /**************************************************************************
565 * RtlGetProcessPreferredUILanguages (NTDLL.@)
567 NTSTATUS WINAPI RtlGetProcessPreferredUILanguages( DWORD flags, ULONG *count, WCHAR *buffer, ULONG *size )
569 LANGID ui_language;
571 FIXME( "%08x, %p, %p %p\n", flags, count, buffer, size );
573 NtQueryDefaultUILanguage( &ui_language );
574 return get_dummy_preferred_ui_language( flags, ui_language, count, buffer, size );
578 /**************************************************************************
579 * RtlGetSystemPreferredUILanguages (NTDLL.@)
581 NTSTATUS WINAPI RtlGetSystemPreferredUILanguages( DWORD flags, ULONG unknown, ULONG *count,
582 WCHAR *buffer, ULONG *size )
584 LANGID ui_language;
586 if (flags & ~(MUI_LANGUAGE_NAME | MUI_LANGUAGE_ID | MUI_MACHINE_LANGUAGE_SETTINGS)) return STATUS_INVALID_PARAMETER;
587 if ((flags & MUI_LANGUAGE_NAME) && (flags & MUI_LANGUAGE_ID)) return STATUS_INVALID_PARAMETER;
588 if (*size && !buffer) return STATUS_INVALID_PARAMETER;
590 NtQueryInstallUILanguage( &ui_language );
591 return get_dummy_preferred_ui_language( flags, ui_language, count, buffer, size );
595 /**************************************************************************
596 * RtlGetThreadPreferredUILanguages (NTDLL.@)
598 NTSTATUS WINAPI RtlGetThreadPreferredUILanguages( DWORD flags, ULONG *count, WCHAR *buffer, ULONG *size )
600 LANGID ui_language;
602 FIXME( "%08x, %p, %p %p\n", flags, count, buffer, size );
604 NtQueryDefaultUILanguage( &ui_language );
605 return get_dummy_preferred_ui_language( flags, ui_language, count, buffer, size );
609 /**************************************************************************
610 * RtlGetUserPreferredUILanguages (NTDLL.@)
612 NTSTATUS WINAPI RtlGetUserPreferredUILanguages( DWORD flags, ULONG unknown, ULONG *count,
613 WCHAR *buffer, ULONG *size )
615 LANGID ui_language;
617 if (flags & ~(MUI_LANGUAGE_NAME | MUI_LANGUAGE_ID)) return STATUS_INVALID_PARAMETER;
618 if ((flags & MUI_LANGUAGE_NAME) && (flags & MUI_LANGUAGE_ID)) return STATUS_INVALID_PARAMETER;
619 if (*size && !buffer) return STATUS_INVALID_PARAMETER;
621 NtQueryDefaultUILanguage( &ui_language );
622 return get_dummy_preferred_ui_language( flags, ui_language, count, buffer, size );
626 /**************************************************************************
627 * RtlSetProcessPreferredUILanguages (NTDLL.@)
629 NTSTATUS WINAPI RtlSetProcessPreferredUILanguages( DWORD flags, PCZZWSTR buffer, ULONG *count )
631 FIXME( "%u, %p, %p\n", flags, buffer, count );
632 return STATUS_SUCCESS;
636 /**************************************************************************
637 * RtlSetThreadPreferredUILanguages (NTDLL.@)
639 NTSTATUS WINAPI RtlSetThreadPreferredUILanguages( DWORD flags, PCZZWSTR buffer, ULONG *count )
641 FIXME( "%u, %p, %p\n", flags, buffer, count );
642 return STATUS_SUCCESS;
646 /******************************************************************
647 * RtlInitCodePageTable (NTDLL.@)
649 void WINAPI RtlInitCodePageTable( USHORT *ptr, CPTABLEINFO *info )
651 USHORT hdr_size = ptr[0];
653 info->CodePage = ptr[1];
654 info->MaximumCharacterSize = ptr[2];
655 info->DefaultChar = ptr[3];
656 info->UniDefaultChar = ptr[4];
657 info->TransDefaultChar = ptr[5];
658 info->TransUniDefaultChar = ptr[6];
659 memcpy( info->LeadByte, ptr + 7, sizeof(info->LeadByte) );
660 ptr += hdr_size;
662 info->WideCharTable = ptr + ptr[0] + 1;
663 info->MultiByteTable = ++ptr;
664 ptr += 256;
665 if (*ptr++) ptr += 256; /* glyph table */
666 info->DBCSRanges = ptr;
667 if (*ptr) /* dbcs ranges */
669 info->DBCSCodePage = 1;
670 info->DBCSOffsets = ptr + 1;
672 else
674 info->DBCSCodePage = 0;
675 info->DBCSOffsets = NULL;
680 /**************************************************************************
681 * RtlInitNlsTables (NTDLL.@)
683 void WINAPI RtlInitNlsTables( USHORT *ansi, USHORT *oem, USHORT *casetable, NLSTABLEINFO *info )
685 RtlInitCodePageTable( ansi, &info->AnsiTableInfo );
686 RtlInitCodePageTable( oem, &info->OemTableInfo );
687 info->UpperCaseTable = casetable + 2;
688 info->LowerCaseTable = casetable + casetable[1] + 2;
692 /**************************************************************************
693 * RtlResetRtlTranslations (NTDLL.@)
695 void WINAPI RtlResetRtlTranslations( const NLSTABLEINFO *info )
697 NlsAnsiCodePage = info->AnsiTableInfo.CodePage;
698 NlsMbCodePageTag = info->AnsiTableInfo.DBCSCodePage;
699 NlsMbOemCodePageTag = info->OemTableInfo.DBCSCodePage;
700 nls_info = *info;
704 /**************************************************************************
705 * RtlAnsiCharToUnicodeChar (NTDLL.@)
707 WCHAR WINAPI RtlAnsiCharToUnicodeChar( char **ansi )
709 if (nls_info.AnsiTableInfo.DBCSOffsets)
711 USHORT off = nls_info.AnsiTableInfo.DBCSOffsets[(unsigned char)**ansi];
712 if (off)
714 (*ansi)++;
715 return nls_info.AnsiTableInfo.DBCSOffsets[off + (unsigned char)*(*ansi)++];
718 return nls_info.AnsiTableInfo.MultiByteTable[(unsigned char)*(*ansi)++];
722 /******************************************************************************
723 * RtlCompareUnicodeStrings (NTDLL.@)
725 LONG WINAPI RtlCompareUnicodeStrings( const WCHAR *s1, SIZE_T len1, const WCHAR *s2, SIZE_T len2,
726 BOOLEAN case_insensitive )
728 LONG ret = 0;
729 SIZE_T len = min( len1, len2 );
731 if (case_insensitive)
733 if (nls_info.UpperCaseTable)
735 while (!ret && len--) ret = casemap( nls_info.UpperCaseTable, *s1++ ) -
736 casemap( nls_info.UpperCaseTable, *s2++ );
738 else /* locale not setup yet */
740 while (!ret && len--) ret = casemap_ascii( *s1++ ) - casemap_ascii( *s2++ );
743 else
745 while (!ret && len--) ret = *s1++ - *s2++;
747 if (!ret) ret = len1 - len2;
748 return ret;
752 /**************************************************************************
753 * RtlPrefixUnicodeString (NTDLL.@)
755 BOOLEAN WINAPI RtlPrefixUnicodeString( const UNICODE_STRING *s1, const UNICODE_STRING *s2,
756 BOOLEAN ignore_case )
758 unsigned int i;
760 if (s1->Length > s2->Length) return FALSE;
761 if (ignore_case)
763 for (i = 0; i < s1->Length / sizeof(WCHAR); i++)
764 if (casemap( nls_info.UpperCaseTable, s1->Buffer[i] ) !=
765 casemap( nls_info.UpperCaseTable, s2->Buffer[i] )) return FALSE;
767 else
769 for (i = 0; i < s1->Length / sizeof(WCHAR); i++)
770 if (s1->Buffer[i] != s2->Buffer[i]) return FALSE;
772 return TRUE;
777 /******************************************************************************
778 * RtlHashUnicodeString (NTDLL.@)
780 NTSTATUS WINAPI RtlHashUnicodeString( const UNICODE_STRING *string, BOOLEAN case_insensitive,
781 ULONG alg, ULONG *hash )
783 unsigned int i;
785 if (!string || !hash) return STATUS_INVALID_PARAMETER;
787 switch (alg)
789 case HASH_STRING_ALGORITHM_DEFAULT:
790 case HASH_STRING_ALGORITHM_X65599:
791 break;
792 default:
793 return STATUS_INVALID_PARAMETER;
796 *hash = 0;
797 if (!case_insensitive)
798 for (i = 0; i < string->Length / sizeof(WCHAR); i++)
799 *hash = *hash * 65599 + string->Buffer[i];
800 else if (nls_info.UpperCaseTable)
801 for (i = 0; i < string->Length / sizeof(WCHAR); i++)
802 *hash = *hash * 65599 + casemap( nls_info.UpperCaseTable, string->Buffer[i] );
803 else /* locale not setup yet */
804 for (i = 0; i < string->Length / sizeof(WCHAR); i++)
805 *hash = *hash * 65599 + casemap_ascii( string->Buffer[i] );
806 return STATUS_SUCCESS;
810 /**************************************************************************
811 * RtlCustomCPToUnicodeN (NTDLL.@)
813 NTSTATUS WINAPI RtlCustomCPToUnicodeN( CPTABLEINFO *info, WCHAR *dst, DWORD dstlen, DWORD *reslen,
814 const char *src, DWORD srclen )
816 DWORD i, ret;
818 dstlen /= sizeof(WCHAR);
819 if (info->DBCSOffsets)
821 for (i = dstlen; srclen && i; i--, srclen--, src++, dst++)
823 USHORT off = info->DBCSOffsets[(unsigned char)*src];
824 if (off && srclen > 1)
826 src++;
827 srclen--;
828 *dst = info->DBCSOffsets[off + (unsigned char)*src];
830 else *dst = info->MultiByteTable[(unsigned char)*src];
832 ret = dstlen - i;
834 else
836 ret = min( srclen, dstlen );
837 for (i = 0; i < ret; i++) dst[i] = info->MultiByteTable[(unsigned char)src[i]];
839 if (reslen) *reslen = ret * sizeof(WCHAR);
840 return STATUS_SUCCESS;
844 /**************************************************************************
845 * RtlUnicodeToCustomCPN (NTDLL.@)
847 NTSTATUS WINAPI RtlUnicodeToCustomCPN( CPTABLEINFO *info, char *dst, DWORD dstlen, DWORD *reslen,
848 const WCHAR *src, DWORD srclen )
850 DWORD i, ret;
852 srclen /= sizeof(WCHAR);
853 if (info->DBCSCodePage)
855 WCHAR *uni2cp = info->WideCharTable;
857 for (i = dstlen; srclen && i; i--, srclen--, src++)
859 if (uni2cp[*src] & 0xff00)
861 if (i == 1) break; /* do not output a partial char */
862 i--;
863 *dst++ = uni2cp[*src] >> 8;
865 *dst++ = (char)uni2cp[*src];
867 ret = dstlen - i;
869 else
871 char *uni2cp = info->WideCharTable;
872 ret = min( srclen, dstlen );
873 for (i = 0; i < ret; i++) dst[i] = uni2cp[src[i]];
875 if (reslen) *reslen = ret;
876 return STATUS_SUCCESS;
880 /**************************************************************************
881 * RtlMultiByteToUnicodeN (NTDLL.@)
883 NTSTATUS WINAPI RtlMultiByteToUnicodeN( WCHAR *dst, DWORD dstlen, DWORD *reslen,
884 const char *src, DWORD srclen )
886 if (nls_info.AnsiTableInfo.WideCharTable)
887 return RtlCustomCPToUnicodeN( &nls_info.AnsiTableInfo, dst, dstlen, reslen, src, srclen );
889 /* locale not setup yet */
890 dstlen = min( srclen, dstlen / sizeof(WCHAR) );
891 if (reslen) *reslen = dstlen * sizeof(WCHAR);
892 while (dstlen--) *dst++ = *src++ & 0x7f;
893 return STATUS_SUCCESS;
897 /**************************************************************************
898 * RtlMultiByteToUnicodeSize (NTDLL.@)
900 NTSTATUS WINAPI RtlMultiByteToUnicodeSize( DWORD *size, const char *str, DWORD len )
902 *size = mbtowc_size( &nls_info.AnsiTableInfo, str, len ) * sizeof(WCHAR);
903 return STATUS_SUCCESS;
907 /**************************************************************************
908 * RtlOemToUnicodeN (NTDLL.@)
910 NTSTATUS WINAPI RtlOemToUnicodeN( WCHAR *dst, DWORD dstlen, DWORD *reslen,
911 const char *src, DWORD srclen )
913 return RtlCustomCPToUnicodeN( &nls_info.OemTableInfo, dst, dstlen, reslen, src, srclen );
917 /**************************************************************************
918 * RtlOemStringToUnicodeSize (NTDLL.@)
919 * RtlxOemStringToUnicodeSize (NTDLL.@)
921 DWORD WINAPI RtlOemStringToUnicodeSize( const STRING *str )
923 return (mbtowc_size( &nls_info.OemTableInfo, str->Buffer, str->Length ) + 1) * sizeof(WCHAR);
927 /**************************************************************************
928 * RtlUnicodeStringToOemSize (NTDLL.@)
929 * RtlxUnicodeStringToOemSize (NTDLL.@)
931 DWORD WINAPI RtlUnicodeStringToOemSize( const UNICODE_STRING *str )
933 return wctomb_size( &nls_info.OemTableInfo, str->Buffer, str->Length / sizeof(WCHAR) ) + 1;
937 /**************************************************************************
938 * RtlUnicodeToMultiByteN (NTDLL.@)
940 NTSTATUS WINAPI RtlUnicodeToMultiByteN( char *dst, DWORD dstlen, DWORD *reslen,
941 const WCHAR *src, DWORD srclen )
943 if (nls_info.AnsiTableInfo.WideCharTable)
944 return RtlUnicodeToCustomCPN( &nls_info.AnsiTableInfo, dst, dstlen, reslen, src, srclen );
946 /* locale not setup yet */
947 dstlen = min( srclen / sizeof(WCHAR), dstlen );
948 if (reslen) *reslen = dstlen;
949 while (dstlen--)
951 WCHAR ch = *src++;
952 if (ch > 0x7f) ch = '?';
953 *dst++ = ch;
955 return STATUS_SUCCESS;
959 /**************************************************************************
960 * RtlUnicodeToMultiByteSize (NTDLL.@)
962 NTSTATUS WINAPI RtlUnicodeToMultiByteSize( DWORD *size, const WCHAR *str, DWORD len )
964 *size = wctomb_size( &nls_info.AnsiTableInfo, str, len / sizeof(WCHAR) );
965 return STATUS_SUCCESS;
969 /**************************************************************************
970 * RtlUnicodeToOemN (NTDLL.@)
972 NTSTATUS WINAPI RtlUnicodeToOemN( char *dst, DWORD dstlen, DWORD *reslen,
973 const WCHAR *src, DWORD srclen )
975 return RtlUnicodeToCustomCPN( &nls_info.OemTableInfo, dst, dstlen, reslen, src, srclen );
979 /**************************************************************************
980 * RtlDowncaseUnicodeChar (NTDLL.@)
982 WCHAR WINAPI RtlDowncaseUnicodeChar( WCHAR wch )
984 if (nls_info.LowerCaseTable) return casemap( nls_info.LowerCaseTable, wch );
985 if (wch >= 'A' && wch <= 'Z') wch += 'a' - 'A';
986 return wch;
990 /**************************************************************************
991 * RtlDowncaseUnicodeString (NTDLL.@)
993 NTSTATUS WINAPI RtlDowncaseUnicodeString( UNICODE_STRING *dest, const UNICODE_STRING *src,
994 BOOLEAN alloc )
996 DWORD i, len = src->Length;
998 if (alloc)
1000 dest->MaximumLength = len;
1001 if (!(dest->Buffer = RtlAllocateHeap( GetProcessHeap(), 0, len ))) return STATUS_NO_MEMORY;
1003 else if (len > dest->MaximumLength) return STATUS_BUFFER_OVERFLOW;
1005 for (i = 0; i < len / sizeof(WCHAR); i++)
1006 dest->Buffer[i] = casemap( nls_info.LowerCaseTable, src->Buffer[i] );
1007 dest->Length = len;
1008 return STATUS_SUCCESS;
1012 /**************************************************************************
1013 * RtlUpcaseUnicodeChar (NTDLL.@)
1015 WCHAR WINAPI RtlUpcaseUnicodeChar( WCHAR wch )
1017 return casemap( nls_info.UpperCaseTable, wch );
1021 /**************************************************************************
1022 * RtlUpcaseUnicodeString (NTDLL.@)
1024 NTSTATUS WINAPI RtlUpcaseUnicodeString( UNICODE_STRING *dest, const UNICODE_STRING *src,
1025 BOOLEAN alloc )
1027 DWORD i, len = src->Length;
1029 if (alloc)
1031 dest->MaximumLength = len;
1032 if (!(dest->Buffer = RtlAllocateHeap( GetProcessHeap(), 0, len ))) return STATUS_NO_MEMORY;
1034 else if (len > dest->MaximumLength) return STATUS_BUFFER_OVERFLOW;
1036 for (i = 0; i < len / sizeof(WCHAR); i++)
1037 dest->Buffer[i] = casemap( nls_info.UpperCaseTable, src->Buffer[i] );
1038 dest->Length = len;
1039 return STATUS_SUCCESS;
1043 /**************************************************************************
1044 * RtlUpcaseUnicodeToCustomCPN (NTDLL.@)
1046 NTSTATUS WINAPI RtlUpcaseUnicodeToCustomCPN( CPTABLEINFO *info, char *dst, DWORD dstlen, DWORD *reslen,
1047 const WCHAR *src, DWORD srclen )
1049 DWORD i, ret;
1051 srclen /= sizeof(WCHAR);
1052 if (info->DBCSCodePage)
1054 WCHAR *uni2cp = info->WideCharTable;
1056 for (i = dstlen; srclen && i; i--, srclen--, src++)
1058 WCHAR ch = casemap( nls_info.UpperCaseTable, *src );
1059 if (uni2cp[ch] & 0xff00)
1061 if (i == 1) break; /* do not output a partial char */
1062 i--;
1063 *dst++ = uni2cp[ch] >> 8;
1065 *dst++ = (char)uni2cp[ch];
1067 ret = dstlen - i;
1069 else
1071 char *uni2cp = info->WideCharTable;
1072 ret = min( srclen, dstlen );
1073 for (i = 0; i < ret; i++) dst[i] = uni2cp[casemap( nls_info.UpperCaseTable, src[i] )];
1075 if (reslen) *reslen = ret;
1076 return STATUS_SUCCESS;
1080 /**************************************************************************
1081 * RtlUpcaseUnicodeToMultiByteN (NTDLL.@)
1083 NTSTATUS WINAPI RtlUpcaseUnicodeToMultiByteN( char *dst, DWORD dstlen, DWORD *reslen,
1084 const WCHAR *src, DWORD srclen )
1086 return RtlUpcaseUnicodeToCustomCPN( &nls_info.AnsiTableInfo, dst, dstlen, reslen, src, srclen );
1090 /**************************************************************************
1091 * RtlUpcaseUnicodeToOemN (NTDLL.@)
1093 NTSTATUS WINAPI RtlUpcaseUnicodeToOemN( char *dst, DWORD dstlen, DWORD *reslen,
1094 const WCHAR *src, DWORD srclen )
1096 if (nls_info.OemTableInfo.WideCharTable)
1097 return RtlUpcaseUnicodeToCustomCPN( &nls_info.OemTableInfo, dst, dstlen, reslen, src, srclen );
1099 /* locale not setup yet */
1100 dstlen = min( srclen / sizeof(WCHAR), dstlen );
1101 if (reslen) *reslen = dstlen;
1102 while (dstlen--)
1104 WCHAR ch = *src++;
1105 if (ch > 0x7f) ch = '?';
1106 else ch = casemap_ascii( ch );
1107 *dst++ = ch;
1109 return STATUS_SUCCESS;
1113 /*********************************************************************
1114 * towlower (NTDLL.@)
1116 WCHAR __cdecl towlower( WCHAR ch )
1118 if (ch >= 0x100) return ch;
1119 return casemap( nls_info.LowerCaseTable, ch );
1123 /*********************************************************************
1124 * towupper (NTDLL.@)
1126 WCHAR __cdecl towupper( WCHAR ch )
1128 if (nls_info.UpperCaseTable) return casemap( nls_info.UpperCaseTable, ch );
1129 return casemap_ascii( ch );
1133 /******************************************************************
1134 * RtlLocaleNameToLcid (NTDLL.@)
1136 NTSTATUS WINAPI RtlLocaleNameToLcid( const WCHAR *name, LCID *lcid, ULONG flags )
1138 /* locale name format is: lang[-script][-country][_modifier] */
1140 const IMAGE_RESOURCE_DIRECTORY *resdir;
1141 const IMAGE_RESOURCE_DIRECTORY_ENTRY *et;
1142 LDR_RESOURCE_INFO info;
1143 WCHAR buf[LOCALE_NAME_MAX_LENGTH];
1144 WCHAR lang[LOCALE_NAME_MAX_LENGTH]; /* language ("en") (note: buffer contains the other strings too) */
1145 WCHAR *country = NULL; /* country ("US") */
1146 WCHAR *script = NULL; /* script ("Latn") */
1147 WCHAR *p;
1148 int i;
1150 if (!name) return STATUS_INVALID_PARAMETER_1;
1152 if (!name[0])
1154 *lcid = LANG_INVARIANT;
1155 goto found;
1157 if (wcslen( name ) >= LOCALE_NAME_MAX_LENGTH) return STATUS_INVALID_PARAMETER_1;
1158 wcscpy( lang, name );
1160 if ((p = wcspbrk( lang, L"-_" )) && *p == '-')
1162 *p++ = 0;
1163 country = p;
1164 if ((p = wcspbrk( p, L"-_" )) && *p == '-')
1166 *p++ = 0;
1167 script = country;
1168 country = p;
1169 p = wcspbrk( p, L"-_" );
1171 if (p) *p = 0; /* FIXME: modifier is ignored */
1172 /* second value can be script or country, check length to resolve the ambiguity */
1173 if (!script && wcslen( country ) == 4)
1175 script = country;
1176 country = NULL;
1180 info.Type = 6; /* RT_STRING */
1181 info.Name = (LOCALE_SNAME >> 4) + 1;
1182 if (LdrFindResourceDirectory_U( kernel32_handle, &info, 2, &resdir ))
1183 return STATUS_INVALID_PARAMETER_1;
1185 et = (const IMAGE_RESOURCE_DIRECTORY_ENTRY *)(resdir + 1);
1186 for (i = 0; i < resdir->NumberOfNamedEntries + resdir->NumberOfIdEntries; i++)
1188 LANGID id = et[i].u.Id;
1190 if (PRIMARYLANGID(id) == LANG_NEUTRAL) continue;
1192 if (!load_string( LOCALE_SNAME, id, buf, ARRAY_SIZE(buf) ) && !wcsicmp( name, buf ))
1194 *lcid = MAKELCID( id, SORT_DEFAULT ); /* FIXME: handle sort order */
1195 goto found;
1198 if (load_string( LOCALE_SISO639LANGNAME, id, buf, ARRAY_SIZE(buf) ) || wcsicmp( lang, buf ))
1199 continue;
1201 if (script)
1203 unsigned int len = wcslen( script );
1204 if (load_string( LOCALE_SSCRIPTS, id, buf, ARRAY_SIZE(buf) )) continue;
1205 p = buf;
1206 while (*p)
1208 if (!wcsnicmp( p, script, len ) && (!p[len] || p[len] == ';')) break;
1209 if (!(p = wcschr( p, ';'))) break;
1210 p++;
1212 if (!p || !*p) continue;
1215 if (!country && (flags & 2))
1217 if (!script) id = MAKELANGID( PRIMARYLANGID(id), LANG_NEUTRAL );
1218 switch (id)
1220 case MAKELANGID( LANG_CHINESE, SUBLANG_NEUTRAL ):
1221 case MAKELANGID( LANG_CHINESE, SUBLANG_CHINESE_SINGAPORE ):
1222 *lcid = MAKELCID( 0x7804, SORT_DEFAULT );
1223 break;
1224 case MAKELANGID( LANG_CHINESE, SUBLANG_CHINESE_TRADITIONAL ):
1225 case MAKELANGID( LANG_CHINESE, SUBLANG_CHINESE_MACAU ):
1226 case MAKELANGID( LANG_CHINESE, SUBLANG_CHINESE_HONGKONG ):
1227 *lcid = MAKELCID( 0x7c04, SORT_DEFAULT );
1228 break;
1229 case MAKELANGID( LANG_SERBIAN, SUBLANG_NEUTRAL ):
1230 *lcid = LANG_SERBIAN_NEUTRAL;
1231 break;
1232 default:
1233 *lcid = MAKELANGID( PRIMARYLANGID(id), SUBLANG_NEUTRAL );
1234 break;
1236 goto found;
1239 return STATUS_INVALID_PARAMETER_1;
1241 found:
1242 TRACE( "%s -> %04x\n", debugstr_w(name), *lcid );
1243 return STATUS_SUCCESS;
1247 /* helper for the various utf8 mbstowcs functions */
1248 static unsigned int decode_utf8_char( unsigned char ch, const char **str, const char *strend )
1250 /* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */
1251 static const char utf8_length[128] =
1253 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
1254 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
1255 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
1256 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
1257 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
1258 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
1259 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
1260 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */
1263 /* first byte mask depending on UTF-8 sequence length */
1264 static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
1266 unsigned int len = utf8_length[ch - 0x80];
1267 unsigned int res = ch & utf8_mask[len];
1268 const char *end = *str + len;
1270 if (end > strend)
1272 *str = end;
1273 return ~0;
1275 switch (len)
1277 case 3:
1278 if ((ch = end[-3] ^ 0x80) >= 0x40) break;
1279 res = (res << 6) | ch;
1280 (*str)++;
1281 if (res < 0x10) break;
1282 case 2:
1283 if ((ch = end[-2] ^ 0x80) >= 0x40) break;
1284 res = (res << 6) | ch;
1285 if (res >= 0x110000 >> 6) break;
1286 (*str)++;
1287 if (res < 0x20) break;
1288 if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
1289 case 1:
1290 if ((ch = end[-1] ^ 0x80) >= 0x40) break;
1291 res = (res << 6) | ch;
1292 (*str)++;
1293 if (res < 0x80) break;
1294 return res;
1296 return ~0;
1300 /**************************************************************************
1301 * RtlUTF8ToUnicodeN (NTDLL.@)
1303 NTSTATUS WINAPI RtlUTF8ToUnicodeN( WCHAR *dst, DWORD dstlen, DWORD *reslen, const char *src, DWORD srclen )
1305 unsigned int res, len;
1306 NTSTATUS status = STATUS_SUCCESS;
1307 const char *srcend = src + srclen;
1308 WCHAR *dstend;
1310 if (!src) return STATUS_INVALID_PARAMETER_4;
1311 if (!reslen) return STATUS_INVALID_PARAMETER;
1313 dstlen /= sizeof(WCHAR);
1314 dstend = dst + dstlen;
1315 if (!dst)
1317 for (len = 0; src < srcend; len++)
1319 unsigned char ch = *src++;
1320 if (ch < 0x80) continue;
1321 if ((res = decode_utf8_char( ch, &src, srcend )) > 0x10ffff)
1322 status = STATUS_SOME_NOT_MAPPED;
1323 else
1324 if (res > 0xffff) len++;
1326 *reslen = len * sizeof(WCHAR);
1327 return status;
1330 while ((dst < dstend) && (src < srcend))
1332 unsigned char ch = *src++;
1333 if (ch < 0x80) /* special fast case for 7-bit ASCII */
1335 *dst++ = ch;
1336 continue;
1338 if ((res = decode_utf8_char( ch, &src, srcend )) <= 0xffff)
1340 *dst++ = res;
1342 else if (res <= 0x10ffff) /* we need surrogates */
1344 res -= 0x10000;
1345 *dst++ = 0xd800 | (res >> 10);
1346 if (dst == dstend) break;
1347 *dst++ = 0xdc00 | (res & 0x3ff);
1349 else
1351 *dst++ = 0xfffd;
1352 status = STATUS_SOME_NOT_MAPPED;
1355 if (src < srcend) status = STATUS_BUFFER_TOO_SMALL; /* overflow */
1356 *reslen = (dstlen - (dstend - dst)) * sizeof(WCHAR);
1357 return status;
1361 /**************************************************************************
1362 * RtlUnicodeToUTF8N (NTDLL.@)
1364 NTSTATUS WINAPI RtlUnicodeToUTF8N( char *dst, DWORD dstlen, DWORD *reslen, const WCHAR *src, DWORD srclen )
1366 char *end;
1367 unsigned int val, len;
1368 NTSTATUS status = STATUS_SUCCESS;
1370 if (!src) return STATUS_INVALID_PARAMETER_4;
1371 if (!reslen) return STATUS_INVALID_PARAMETER;
1372 if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5;
1374 srclen /= sizeof(WCHAR);
1376 if (!dst)
1378 for (len = 0; srclen; srclen--, src++)
1380 if (*src < 0x80) len++; /* 0x00-0x7f: 1 byte */
1381 else if (*src < 0x800) len += 2; /* 0x80-0x7ff: 2 bytes */
1382 else
1384 if (!get_utf16( src, srclen, &val ))
1386 val = 0xfffd;
1387 status = STATUS_SOME_NOT_MAPPED;
1389 if (val < 0x10000) len += 3; /* 0x800-0xffff: 3 bytes */
1390 else /* 0x10000-0x10ffff: 4 bytes */
1392 len += 4;
1393 src++;
1394 srclen--;
1398 *reslen = len;
1399 return status;
1402 for (end = dst + dstlen; srclen; srclen--, src++)
1404 WCHAR ch = *src;
1406 if (ch < 0x80) /* 0x00-0x7f: 1 byte */
1408 if (dst > end - 1) break;
1409 *dst++ = ch;
1410 continue;
1412 if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
1414 if (dst > end - 2) break;
1415 dst[1] = 0x80 | (ch & 0x3f);
1416 ch >>= 6;
1417 dst[0] = 0xc0 | ch;
1418 dst += 2;
1419 continue;
1421 if (!get_utf16( src, srclen, &val ))
1423 val = 0xfffd;
1424 status = STATUS_SOME_NOT_MAPPED;
1426 if (val < 0x10000) /* 0x800-0xffff: 3 bytes */
1428 if (dst > end - 3) break;
1429 dst[2] = 0x80 | (val & 0x3f);
1430 val >>= 6;
1431 dst[1] = 0x80 | (val & 0x3f);
1432 val >>= 6;
1433 dst[0] = 0xe0 | val;
1434 dst += 3;
1436 else /* 0x10000-0x10ffff: 4 bytes */
1438 if (dst > end - 4) break;
1439 dst[3] = 0x80 | (val & 0x3f);
1440 val >>= 6;
1441 dst[2] = 0x80 | (val & 0x3f);
1442 val >>= 6;
1443 dst[1] = 0x80 | (val & 0x3f);
1444 val >>= 6;
1445 dst[0] = 0xf0 | val;
1446 dst += 4;
1447 src++;
1448 srclen--;
1451 if (srclen) status = STATUS_BUFFER_TOO_SMALL;
1452 *reslen = dstlen - (end - dst);
1453 return status;
1457 /******************************************************************************
1458 * RtlIsNormalizedString (NTDLL.@)
1460 NTSTATUS WINAPI RtlIsNormalizedString( ULONG form, const WCHAR *str, INT len, BOOLEAN *res )
1462 const struct norm_table *info;
1463 NTSTATUS status;
1464 BYTE props, class, last_class = 0;
1465 unsigned int ch;
1466 int i, r, result = 1;
1468 if ((status = load_norm_table( form, &info ))) return status;
1470 if (len == -1) len = wcslen( str );
1472 for (i = 0; i < len && result; i += r)
1474 if (!(r = get_utf16( str + i, len - i, &ch ))) return STATUS_NO_UNICODE_TRANSLATION;
1475 if (info->comp_size)
1477 if ((ch >= HANGUL_VBASE && ch < HANGUL_VBASE + HANGUL_VCOUNT) ||
1478 (ch >= HANGUL_TBASE && ch < HANGUL_TBASE + HANGUL_TCOUNT))
1480 result = -1; /* QC=Maybe */
1481 continue;
1484 else if (ch >= HANGUL_SBASE && ch < HANGUL_SBASE + HANGUL_SCOUNT)
1486 result = 0; /* QC=No */
1487 break;
1489 props = get_char_props( info, ch );
1490 class = props & 0x3f;
1491 if (class == 0x3f)
1493 last_class = 0;
1494 if (props == 0xbf) result = 0; /* QC=No */
1495 else if (props == 0xff)
1497 /* ignore other chars in Hangul range */
1498 if (ch >= HANGUL_LBASE && ch < HANGUL_LBASE + 0x100) continue;
1499 if (ch >= HANGUL_SBASE && ch < HANGUL_SBASE + 0x2c00) continue;
1500 /* allow final null */
1501 if (!ch && i == len - 1) continue;
1502 return STATUS_NO_UNICODE_TRANSLATION;
1505 else if (props & 0x80)
1507 if ((props & 0xc0) == 0xc0) result = -1; /* QC=Maybe */
1508 if (class && class < last_class) result = 0; /* QC=No */
1509 last_class = class;
1511 else last_class = 0;
1514 if (result == -1)
1516 int dstlen = len * 4;
1517 NTSTATUS status;
1518 WCHAR *buffer = RtlAllocateHeap( GetProcessHeap(), 0, dstlen * sizeof(WCHAR) );
1519 if (!buffer) return STATUS_NO_MEMORY;
1520 status = RtlNormalizeString( form, str, len, buffer, &dstlen );
1521 result = !status && (dstlen == len) && !wcsncmp( buffer, str, len );
1522 RtlFreeHeap( GetProcessHeap(), 0, buffer );
1524 *res = result;
1525 return STATUS_SUCCESS;
1529 /******************************************************************************
1530 * RtlNormalizeString (NTDLL.@)
1532 NTSTATUS WINAPI RtlNormalizeString( ULONG form, const WCHAR *src, INT src_len, WCHAR *dst, INT *dst_len )
1534 int buf_len;
1535 WCHAR *buf = NULL;
1536 const struct norm_table *info;
1537 NTSTATUS status = STATUS_SUCCESS;
1539 TRACE( "%x %s %d %p %d\n", form, debugstr_wn(src, src_len), src_len, dst, *dst_len );
1541 if ((status = load_norm_table( form, &info ))) return status;
1543 if (src_len == -1) src_len = wcslen(src) + 1;
1545 if (!*dst_len)
1547 *dst_len = src_len * info->len_factor;
1548 if (*dst_len > 64) *dst_len = max( 64, src_len + src_len / 8 );
1549 return STATUS_SUCCESS;
1551 if (!src_len)
1553 *dst_len = 0;
1554 return STATUS_SUCCESS;
1557 if (!info->comp_size) return decompose_string( info, src, src_len, dst, dst_len );
1559 buf_len = src_len * 4;
1560 for (;;)
1562 buf = RtlAllocateHeap( GetProcessHeap(), 0, buf_len * sizeof(WCHAR) );
1563 if (!buf) return STATUS_NO_MEMORY;
1564 status = decompose_string( info, src, src_len, buf, &buf_len );
1565 if (status != STATUS_BUFFER_TOO_SMALL) break;
1566 RtlFreeHeap( GetProcessHeap(), 0, buf );
1568 if (!status)
1570 buf_len = compose_string( info, buf, buf_len );
1571 if (*dst_len >= buf_len) memcpy( dst, buf, buf_len * sizeof(WCHAR) );
1572 else status = STATUS_BUFFER_TOO_SMALL;
1574 RtlFreeHeap( GetProcessHeap(), 0, buf );
1575 *dst_len = buf_len;
1576 return status;
1580 /* Punycode parameters */
1581 enum { BASE = 36, TMIN = 1, TMAX = 26, SKEW = 38, DAMP = 700 };
1583 static BOOL check_invalid_chars( const struct norm_table *info, DWORD flags,
1584 const unsigned int *buffer, int len )
1586 int i;
1588 for (i = 0; i < len; i++)
1590 switch (buffer[i])
1592 case 0x200c: /* zero-width non-joiner */
1593 case 0x200d: /* zero-width joiner */
1594 if (!i || get_combining_class( info, buffer[i - 1] ) != 9) return TRUE;
1595 break;
1596 case 0x2260: /* not equal to */
1597 case 0x226e: /* not less than */
1598 case 0x226f: /* not greater than */
1599 if (flags & IDN_USE_STD3_ASCII_RULES) return TRUE;
1600 break;
1602 switch (get_char_props( info, buffer[i] ))
1604 case 0xbf:
1605 return TRUE;
1606 case 0xff:
1607 if (buffer[i] >= HANGUL_SBASE && buffer[i] < HANGUL_SBASE + 0x2c00) break;
1608 return TRUE;
1609 case 0x7f:
1610 if (!(flags & IDN_ALLOW_UNASSIGNED)) return TRUE;
1611 break;
1615 if ((flags & IDN_USE_STD3_ASCII_RULES) && len && (buffer[0] == '-' || buffer[len - 1] == '-'))
1616 return TRUE;
1618 return FALSE;
1622 /******************************************************************************
1623 * RtlIdnToAscii (NTDLL.@)
1625 NTSTATUS WINAPI RtlIdnToAscii( DWORD flags, const WCHAR *src, INT srclen, WCHAR *dst, INT *dstlen )
1627 static const WCHAR prefixW[] = {'x','n','-','-'};
1628 const struct norm_table *info;
1629 NTSTATUS status;
1630 WCHAR normstr[256], res[256];
1631 unsigned int ch, buffer[64];
1632 int i, len, start, end, out_label, out = 0, normlen = ARRAY_SIZE(normstr);
1634 TRACE( "%x %s %p %d\n", flags, debugstr_wn(src, srclen), dst, *dstlen );
1636 if ((status = load_norm_table( 13, &info ))) return status;
1638 if ((status = RtlIdnToNameprepUnicode( flags, src, srclen, normstr, &normlen ))) return status;
1640 /* implementation of Punycode based on RFC 3492 */
1642 for (start = 0; start < normlen; start = end + 1)
1644 int n = 0x80, bias = 72, delta = 0, b = 0, h, buflen = 0;
1646 out_label = out;
1647 for (i = start; i < normlen; i += len)
1649 if (!(len = get_utf16( normstr + i, normlen - i, &ch ))) break;
1650 if (!ch || ch == '.') break;
1651 if (ch < 0x80) b++;
1652 buffer[buflen++] = ch;
1654 end = i;
1656 if (b == end - start)
1658 if (end < normlen) b++;
1659 if (out + b > ARRAY_SIZE(res)) return STATUS_INVALID_IDN_NORMALIZATION;
1660 memcpy( res + out, normstr + start, b * sizeof(WCHAR) );
1661 out += b;
1662 continue;
1665 if (buflen >= 4 && buffer[2] == '-' && buffer[3] == '-') return STATUS_INVALID_IDN_NORMALIZATION;
1666 if (check_invalid_chars( info, flags, buffer, buflen )) return STATUS_INVALID_IDN_NORMALIZATION;
1668 if (out + 5 + b > ARRAY_SIZE(res)) return STATUS_INVALID_IDN_NORMALIZATION;
1669 memcpy( res + out, prefixW, sizeof(prefixW) );
1670 out += ARRAY_SIZE(prefixW);
1671 if (b)
1673 for (i = start; i < end; i++) if (normstr[i] < 0x80) res[out++] = normstr[i];
1674 res[out++] = '-';
1677 for (h = b; h < buflen; delta++, n++)
1679 int m = 0x10ffff, q, k;
1681 for (i = 0; i < buflen; i++) if (buffer[i] >= n && m > buffer[i]) m = buffer[i];
1682 delta += (m - n) * (h + 1);
1683 n = m;
1685 for (i = 0; i < buflen; i++)
1687 if (buffer[i] == n)
1689 for (q = delta, k = BASE; ; k += BASE)
1691 int t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias;
1692 int disp = q < t ? q : t + (q - t) % (BASE - t);
1693 if (out + 1 > ARRAY_SIZE(res)) return STATUS_INVALID_IDN_NORMALIZATION;
1694 res[out++] = disp <= 25 ? 'a' + disp : '0' + disp - 26;
1695 if (q < t) break;
1696 q = (q - t) / (BASE - t);
1698 delta /= (h == b ? DAMP : 2);
1699 delta += delta / (h + 1);
1700 for (k = 0; delta > ((BASE - TMIN) * TMAX) / 2; k += BASE) delta /= BASE - TMIN;
1701 bias = k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
1702 delta = 0;
1703 h++;
1705 else if (buffer[i] < n) delta++;
1709 if (out - out_label > 63) return STATUS_INVALID_IDN_NORMALIZATION;
1711 if (end < normlen)
1713 if (out + 1 > ARRAY_SIZE(res)) return STATUS_INVALID_IDN_NORMALIZATION;
1714 res[out++] = normstr[end];
1718 if (*dstlen)
1720 if (out <= *dstlen) memcpy( dst, res, out * sizeof(WCHAR) );
1721 else status = STATUS_BUFFER_TOO_SMALL;
1723 *dstlen = out;
1724 return status;
1728 /******************************************************************************
1729 * RtlIdnToNameprepUnicode (NTDLL.@)
1731 NTSTATUS WINAPI RtlIdnToNameprepUnicode( DWORD flags, const WCHAR *src, INT srclen,
1732 WCHAR *dst, INT *dstlen )
1734 const struct norm_table *info;
1735 unsigned int ch;
1736 NTSTATUS status;
1737 WCHAR buf[256];
1738 int i, start, len, buflen = ARRAY_SIZE(buf);
1740 if (flags & ~(IDN_ALLOW_UNASSIGNED | IDN_USE_STD3_ASCII_RULES)) return STATUS_INVALID_PARAMETER;
1741 if (!src || srclen < -1) return STATUS_INVALID_PARAMETER;
1743 TRACE( "%x %s %p %d\n", flags, debugstr_wn(src, srclen), dst, *dstlen );
1745 if ((status = load_norm_table( 13, &info ))) return status;
1747 if (srclen == -1) srclen = wcslen(src) + 1;
1749 for (i = 0; i < srclen; i++) if (src[i] < 0x20 || src[i] >= 0x7f) break;
1751 if (i == srclen || (i == srclen - 1 && !src[i])) /* ascii only */
1753 if (srclen > buflen) return STATUS_INVALID_IDN_NORMALIZATION;
1754 memcpy( buf, src, srclen * sizeof(WCHAR) );
1755 buflen = srclen;
1757 else if ((status = RtlNormalizeString( 13, src, srclen, buf, &buflen )))
1759 if (status == STATUS_NO_UNICODE_TRANSLATION) status = STATUS_INVALID_IDN_NORMALIZATION;
1760 return status;
1763 for (i = start = 0; i < buflen; i += len)
1765 if (!(len = get_utf16( buf + i, buflen - i, &ch ))) break;
1766 if (!ch) break;
1767 if (ch == '.')
1769 if (start == i) return STATUS_INVALID_IDN_NORMALIZATION;
1770 /* maximal label length is 63 characters */
1771 if (i - start > 63) return STATUS_INVALID_IDN_NORMALIZATION;
1772 if ((flags & IDN_USE_STD3_ASCII_RULES) && (buf[start] == '-' || buf[i-1] == '-'))
1773 return STATUS_INVALID_IDN_NORMALIZATION;
1774 start = i + 1;
1775 continue;
1777 if (flags & IDN_USE_STD3_ASCII_RULES)
1779 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
1780 (ch >= '0' && ch <= '9') || ch == '-') continue;
1781 return STATUS_INVALID_IDN_NORMALIZATION;
1783 if (!(flags & IDN_ALLOW_UNASSIGNED))
1785 if (get_char_props( info, ch ) == 0x7f) return STATUS_INVALID_IDN_NORMALIZATION;
1788 if (!i || i - start > 63) return STATUS_INVALID_IDN_NORMALIZATION;
1789 if ((flags & IDN_USE_STD3_ASCII_RULES) && (buf[start] == '-' || buf[i-1] == '-'))
1790 return STATUS_INVALID_IDN_NORMALIZATION;
1792 if (*dstlen)
1794 if (buflen <= *dstlen) memcpy( dst, buf, buflen * sizeof(WCHAR) );
1795 else status = STATUS_BUFFER_TOO_SMALL;
1797 *dstlen = buflen;
1798 return status;
1802 /******************************************************************************
1803 * RtlIdnToUnicode (NTDLL.@)
1805 NTSTATUS WINAPI RtlIdnToUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHAR *dst, INT *dstlen )
1807 const struct norm_table *info;
1808 int i, buflen, start, end, out_label, out = 0;
1809 NTSTATUS status;
1810 UINT buffer[64];
1811 WCHAR ch;
1813 if (!src || srclen < -1) return STATUS_INVALID_PARAMETER;
1814 if (srclen == -1) srclen = wcslen( src ) + 1;
1816 TRACE( "%x %s %p %d\n", flags, debugstr_wn(src, srclen), dst, *dstlen );
1818 if ((status = load_norm_table( 13, &info ))) return status;
1820 for (start = 0; start < srclen; )
1822 int n = 0x80, bias = 72, pos = 0, old_pos, w, k, t, delim = 0, digit, delta;
1824 out_label = out;
1825 for (i = start; i < srclen; i++)
1827 ch = src[i];
1828 if (ch > 0x7f || (i != srclen - 1 && !ch)) return STATUS_INVALID_IDN_NORMALIZATION;
1829 if (!ch || ch == '.') break;
1830 if (ch == '-') delim = i;
1832 if (!(flags & IDN_USE_STD3_ASCII_RULES)) continue;
1833 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
1834 (ch >= '0' && ch <= '9') || ch == '-')
1835 continue;
1836 return STATUS_INVALID_IDN_NORMALIZATION;
1838 end = i;
1840 /* last label may be empty */
1841 if (start == end && ch) return STATUS_INVALID_IDN_NORMALIZATION;
1843 if (end - start < 4 ||
1844 (src[start] != 'x' && src[start] != 'X') ||
1845 (src[start + 1] != 'n' && src[start + 1] != 'N') ||
1846 src[start + 2] != '-' || src[start + 3] != '-')
1848 if (end - start > 63) return STATUS_INVALID_IDN_NORMALIZATION;
1850 if ((flags & IDN_USE_STD3_ASCII_RULES) && (src[start] == '-' || src[end - 1] == '-'))
1851 return STATUS_INVALID_IDN_NORMALIZATION;
1853 if (end < srclen) end++;
1854 if (*dstlen)
1856 if (out + end - start <= *dstlen)
1857 memcpy( dst + out, src + start, (end - start) * sizeof(WCHAR));
1858 else return STATUS_BUFFER_TOO_SMALL;
1860 out += end - start;
1861 start = end;
1862 continue;
1865 if (delim == start + 3) delim++;
1866 buflen = 0;
1867 for (i = start + 4; i < delim && buflen < ARRAY_SIZE(buffer); i++) buffer[buflen++] = src[i];
1868 if (buflen) i++;
1869 while (i < end)
1871 old_pos = pos;
1872 w = 1;
1873 for (k = BASE; ; k += BASE)
1875 if (i >= end) return STATUS_INVALID_IDN_NORMALIZATION;
1876 ch = src[i++];
1877 if (ch >= 'a' && ch <= 'z') digit = ch - 'a';
1878 else if (ch >= 'A' && ch <= 'Z') digit = ch - 'A';
1879 else if (ch >= '0' && ch <= '9') digit = ch - '0' + 26;
1880 else return STATUS_INVALID_IDN_NORMALIZATION;
1881 pos += digit * w;
1882 t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias;
1883 if (digit < t) break;
1884 w *= BASE - t;
1887 delta = (pos - old_pos) / (!old_pos ? DAMP : 2);
1888 delta += delta / (buflen + 1);
1889 for (k = 0; delta > ((BASE - TMIN) * TMAX) / 2; k += BASE) delta /= BASE - TMIN;
1890 bias = k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
1891 n += pos / (buflen + 1);
1892 pos %= buflen + 1;
1894 if (buflen >= ARRAY_SIZE(buffer) - 1) return STATUS_INVALID_IDN_NORMALIZATION;
1895 memmove( buffer + pos + 1, buffer + pos, (buflen - pos) * sizeof(*buffer) );
1896 buffer[pos++] = n;
1897 buflen++;
1900 if (check_invalid_chars( info, flags, buffer, buflen )) return STATUS_INVALID_IDN_NORMALIZATION;
1902 for (i = 0; i < buflen; i++)
1904 int len = 1 + (buffer[i] >= 0x10000);
1905 if (*dstlen)
1907 if (out + len <= *dstlen) put_utf16( dst + out, buffer[i] );
1908 else return STATUS_BUFFER_TOO_SMALL;
1910 out += len;
1913 if (out - out_label > 63) return STATUS_INVALID_IDN_NORMALIZATION;
1915 if (end < srclen)
1917 if (*dstlen)
1919 if (out + 1 <= *dstlen) dst[out] = src[end];
1920 else return STATUS_BUFFER_TOO_SMALL;
1922 out++;
1924 start = end + 1;
1926 *dstlen = out;
1927 return STATUS_SUCCESS;