dlls/ntdll/locale_private.h

   1 /*
   2  * Ntdll locale definitions
   3  *
   4  * Copyright 2019, 2022 Alexandre Julliard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  */
  20
  21 #ifndef __NTDLL_LOCALE_PRIVATE_H
  22 #define __NTDLL_LOCALE_PRIVATE_H
  23
  24 #include "windef.h"
  25 #include "winbase.h"
  26 #include "winnls.h"
  27
  28 /* NLS codepage file format:
  29  *
  30  * header:
  31  *   WORD      offset to cp2uni table in words
  32  *   WORD      CodePage
  33  *   WORD      MaximumCharacterSize
  34  *   BYTE[2]   DefaultChar
  35  *   WORD      UniDefaultChar
  36  *   WORD      TransDefaultChar
  37  *   WORD      TransUniDefaultChar
  38  *   BYTE[12]  LeadByte
  39  * cp2uni table:
  40  *   WORD      offset to uni2cp table in words
  41  *   WORD[256] cp2uni table
  42  *   WORD      glyph table size
  43  *   WORD[glyph_table_size] glyph table
  44  *   WORD      number of lead byte ranges
  45  *   WORD[256] lead byte offsets in words
  46  *   WORD[leadbytes][256] cp2uni table for lead bytes
  47  * uni2cp table:
  48  *   WORD      0 / 4
  49  *   BYTE[65536] / WORD[65536]  uni2cp table
  50  */
  51
  52 enum nls_section_type
  53 {
  54     NLS_SECTION_SORTKEYS = 9,
  55     NLS_SECTION_CASEMAP = 10,
  56     NLS_SECTION_CODEPAGE = 11,
  57     NLS_SECTION_NORMALIZE = 12
  58 };
  59
  60 /* NLS normalization file */
  61 struct norm_table
  62 {
  63     WCHAR   name[13];      /* 00 file name */
  64     USHORT  checksum[3];   /* 1a checksum? */
  65     USHORT  version[4];    /* 20 Unicode version */
  66     USHORT  form;          /* 28 normalization form */
  67     USHORT  len_factor;    /* 2a factor for length estimates */
  68     USHORT  unknown1;      /* 2c */
  69     USHORT  decomp_size;   /* 2e decomposition hash size */
  70     USHORT  comp_size;     /* 30 composition hash size */
  71     USHORT  unknown2;      /* 32 */
  72     USHORT  classes;       /* 34 combining classes table offset */
  73     USHORT  props_level1;  /* 36 char properties table level 1 offset */
  74     USHORT  props_level2;  /* 38 char properties table level 2 offset */
  75     USHORT  decomp_hash;   /* 3a decomposition hash table offset */
  76     USHORT  decomp_map;    /* 3c decomposition character map table offset */
  77     USHORT  decomp_seq;    /* 3e decomposition character sequences offset */
  78     USHORT  comp_hash;     /* 40 composition hash table offset */
  79     USHORT  comp_seq;      /* 42 composition character sequences offset */
  80     /* BYTE[]       combining class values */
  81     /* BYTE[0x2200] char properties index level 1 */
  82     /* BYTE[]       char properties index level 2 */
  83     /* WORD[]       decomposition hash table */
  84     /* WORD[]       decomposition character map */
  85     /* WORD[]       decomposition character sequences */
  86     /* WORD[]       composition hash table */
  87     /* WORD[]       composition character sequences */
  88 };
  89
  90
  91 /* locale.nls file */
  92 struct locale_nls_header
  93 {
  94     UINT ctypes;
  95     UINT unknown1;
  96     UINT unknown2;
  97     UINT unknown3;
  98     UINT locales;
  99     UINT charmaps;
 100     UINT geoids;
 101     UINT scripts;
 102 };
 103
 104
 105 static inline WCHAR casemap_ascii( WCHAR ch )
 106 {
 107     if (ch >= 'a' && ch <= 'z') ch -= 'a' - 'A';
 108     return ch;
 109 }
 110
 111
 112 static inline int get_utf16( const WCHAR *src, unsigned int srclen, unsigned int *ch )
 113 {
 114     if (IS_HIGH_SURROGATE( src[0] ))
 115     {
 116         if (srclen <= 1) return 0;
 117         if (!IS_LOW_SURROGATE( src[1] )) return 0;
 118         *ch = 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
 119         return 2;
 120     }
 121     if (IS_LOW_SURROGATE( src[0] )) return 0;
 122     *ch = src[0];
 123     return 1;
 124 }
 125
 126
 127 static inline void put_utf16( WCHAR *dst, unsigned int ch )
 128 {
 129     if (ch >= 0x10000)
 130     {
 131         ch -= 0x10000;
 132         dst[0] = 0xd800 | (ch >> 10);
 133         dst[1] = 0xdc00 | (ch & 0x3ff);
 134     }
 135     else dst[0] = ch;
 136 }
 137
 138
 139 static inline unsigned int decode_utf8_char( unsigned char ch, const char **str, const char *strend )
 140 {
 141     /* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */
 142     static const char utf8_length[128] =
 143     {
 144         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
 145         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
 146         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
 147         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
 148         0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
 149         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
 150         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
 151         3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0  /* 0xf0-0xff */
 152     };
 153
 154     /* first byte mask depending on UTF-8 sequence length */
 155     static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
 156
 157     unsigned int len = utf8_length[ch - 0x80];
 158     unsigned int res = ch & utf8_mask[len];
 159     const char *end = *str + len;
 160
 161     if (end > strend)
 162     {
 163         *str = end;
 164         return ~0;
 165     }
 166     switch (len)
 167     {
 168     case 3:
 169         if ((ch = end[-3] ^ 0x80) >= 0x40) break;
 170         res = (res << 6) | ch;
 171         (*str)++;
 172         if (res < 0x10) break;
 173     case 2:
 174         if ((ch = end[-2] ^ 0x80) >= 0x40) break;
 175         res = (res << 6) | ch;
 176         if (res >= 0x110000 >> 6) break;
 177         (*str)++;
 178         if (res < 0x20) break;
 179         if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
 180     case 1:
 181         if ((ch = end[-1] ^ 0x80) >= 0x40) break;
 182         res = (res << 6) | ch;
 183         (*str)++;
 184         if (res < 0x80) break;
 185         return res;
 186     }
 187     return ~0;
 188 }
 189
 190
 191 static inline void init_codepage_table( USHORT *ptr, CPTABLEINFO *info )
 192 {
 193     USHORT hdr_size = ptr[0];
 194
 195     info->CodePage             = ptr[1];
 196     info->MaximumCharacterSize = ptr[2];
 197     info->DefaultChar          = ptr[3];
 198     info->UniDefaultChar       = ptr[4];
 199     info->TransDefaultChar     = ptr[5];
 200     info->TransUniDefaultChar  = ptr[6];
 201     memcpy( info->LeadByte, ptr + 7, sizeof(info->LeadByte) );
 202     ptr += hdr_size;
 203
 204     info->WideCharTable = ptr + ptr[0] + 1;
 205     info->MultiByteTable = ++ptr;
 206     ptr += 256;
 207     if (*ptr++) ptr += 256;  /* glyph table */
 208     info->DBCSRanges = ptr;
 209     if (*ptr)  /* dbcs ranges */
 210     {
 211         info->DBCSCodePage = 1;
 212         info->DBCSOffsets  = ptr + 1;
 213     }
 214     else
 215     {
 216         info->DBCSCodePage = 0;
 217         info->DBCSOffsets  = NULL;
 218     }
 219 }
 220
 221
 222 static inline int compare_locale_names( const WCHAR *n1, const WCHAR *n2 )
 223 {
 224     for (;;)
 225     {
 226         WCHAR ch1 = casemap_ascii( *n1++ );
 227         WCHAR ch2 = casemap_ascii( *n2++ );
 228         if (ch1 == '_') ch1 = '-';
 229         if (ch2 == '_') ch2 = '-';
 230         if (!ch1 || ch1 != ch2) return ch1 - ch2;
 231     }
 232 }
 233
 234
 235 static inline const NLS_LOCALE_LCNAME_INDEX *find_lcname_entry( const NLS_LOCALE_HEADER *header,
 236                                                                 const WCHAR *name )
 237 {
 238     const WCHAR *strings = (const WCHAR *)((char *)header + header->strings_offset);
 239     const NLS_LOCALE_LCNAME_INDEX *index = (const NLS_LOCALE_LCNAME_INDEX *)((char *)header + header->lcnames_offset);
 240     int min = 0, max = header->nb_lcnames - 1;
 241
 242     if (!name) return NULL;
 243     while (min <= max)
 244     {
 245         int res, pos = (min + max) / 2;
 246         const WCHAR *str = strings + index[pos].name;
 247         res = compare_locale_names( name, str + 1 );
 248         if (res < 0) max = pos - 1;
 249         else if (res > 0) min = pos + 1;
 250         else return &index[pos];
 251     }
 252     return NULL;
 253 }
 254
 255
 256 static inline const NLS_LOCALE_LCID_INDEX *find_lcid_entry( const NLS_LOCALE_HEADER *header, LCID lcid )
 257 {
 258     const NLS_LOCALE_LCID_INDEX *index = (const NLS_LOCALE_LCID_INDEX *)((char *)header + header->lcids_offset);
 259     int min = 0, max = header->nb_lcids - 1;
 260
 261     while (min <= max)
 262     {
 263         int pos = (min + max) / 2;
 264         if (lcid < index[pos].id) max = pos - 1;
 265         else if (lcid > index[pos].id) min = pos + 1;
 266         else return &index[pos];
 267     }
 268     return NULL;
 269 }
 270
 271
 272 static inline const NLS_LOCALE_DATA *get_locale_data( const NLS_LOCALE_HEADER *header, UINT idx )
 273 {
 274     ULONG offset = header->locales_offset + idx * header->locale_size;
 275     return (const NLS_LOCALE_DATA *)((const char *)header + offset);
 276 }
 277
 278
 279 static inline unsigned int cp_mbstowcs_size( const CPTABLEINFO *info, const char *str, unsigned int len )
 280 {
 281     unsigned int res;
 282
 283     if (!info->DBCSCodePage) return len;
 284
 285     for (res = 0; len; len--, str++, res++)
 286     {
 287         if (info->DBCSOffsets[(unsigned char)*str] && len > 1)
 288         {
 289             str++;
 290             len--;
 291         }
 292     }
 293     return res;
 294 }
 295
 296
 297 static inline unsigned int cp_wcstombs_size( const CPTABLEINFO *info, const WCHAR *str, unsigned int len )
 298 {
 299     if (info->DBCSCodePage)
 300     {
 301         WCHAR *uni2cp = info->WideCharTable;
 302         unsigned int res;
 303
 304         for (res = 0; len; len--, str++, res++)
 305             if (uni2cp[*str] & 0xff00) res++;
 306         return res;
 307     }
 308     else return len;
 309 }
 310
 311
 312 static inline NTSTATUS utf8_wcstombs_size( const WCHAR *src, unsigned int srclen, unsigned int *reslen )
 313 {
 314     unsigned int val, len;
 315     NTSTATUS status = STATUS_SUCCESS;
 316
 317     for (len = 0; srclen; srclen--, src++)
 318     {
 319         if (*src < 0x80) len++;  /* 0x00-0x7f: 1 byte */
 320         else if (*src < 0x800) len += 2;  /* 0x80-0x7ff: 2 bytes */
 321         else
 322         {
 323             if (!get_utf16( src, srclen, &val ))
 324             {
 325                 val = 0xfffd;
 326                 status = STATUS_SOME_NOT_MAPPED;
 327             }
 328             if (val < 0x10000) len += 3; /* 0x800-0xffff: 3 bytes */
 329             else   /* 0x10000-0x10ffff: 4 bytes */
 330             {
 331                 len += 4;
 332                 src++;
 333                 srclen--;
 334             }
 335         }
 336     }
 337     *reslen = len;
 338     return status;
 339 }
 340
 341
 342 static inline NTSTATUS utf8_mbstowcs_size( const char *src, unsigned int srclen, unsigned int *reslen )
 343 {
 344     unsigned int res, len;
 345     NTSTATUS status = STATUS_SUCCESS;
 346     const char *srcend = src + srclen;
 347
 348     for (len = 0; src < srcend; len++)
 349     {
 350         unsigned char ch = *src++;
 351         if (ch < 0x80) continue;
 352         if ((res = decode_utf8_char( ch, &src, srcend )) > 0x10ffff)
 353             status = STATUS_SOME_NOT_MAPPED;
 354         else
 355             if (res > 0xffff) len++;
 356     }
 357     *reslen = len;
 358     return status;
 359 }
 360
 361
 362 static inline unsigned int cp_mbstowcs( const CPTABLEINFO *info, WCHAR *dst, unsigned int dstlen,
 363                                         const char *src, unsigned int srclen )
 364 {
 365     unsigned int i, ret;
 366
 367     if (info->DBCSOffsets)
 368     {
 369         for (i = dstlen; srclen && i; i--, srclen--, src++, dst++)
 370         {
 371             USHORT off = info->DBCSOffsets[(unsigned char)*src];
 372             if (off && srclen > 1)
 373             {
 374                 src++;
 375                 srclen--;
 376                 *dst = info->DBCSOffsets[off + (unsigned char)*src];
 377             }
 378             else *dst = info->MultiByteTable[(unsigned char)*src];
 379         }
 380         ret = dstlen - i;
 381     }
 382     else
 383     {
 384         ret = min( srclen, dstlen );
 385         for (i = 0; i < ret; i++) dst[i] = info->MultiByteTable[(unsigned char)src[i]];
 386     }
 387     return ret;
 388 }
 389
 390
 391 static inline unsigned int cp_wcstombs( const CPTABLEINFO *info, char *dst, unsigned int dstlen,
 392                                         const WCHAR *src, unsigned int srclen )
 393 {
 394     unsigned int i, ret;
 395
 396     if (info->DBCSCodePage)
 397     {
 398         const WCHAR *uni2cp = info->WideCharTable;
 399
 400         for (i = dstlen; srclen && i; i--, srclen--, src++)
 401         {
 402             if (uni2cp[*src] & 0xff00)
 403             {
 404                 if (i == 1) break;  /* do not output a partial char */
 405                 i--;
 406                 *dst++ = uni2cp[*src] >> 8;
 407             }
 408             *dst++ = (char)uni2cp[*src];
 409         }
 410         ret = dstlen - i;
 411     }
 412     else
 413     {
 414         const char *uni2cp = info->WideCharTable;
 415         ret = min( srclen, dstlen );
 416         for (i = 0; i < ret; i++) dst[i] = uni2cp[src[i]];
 417     }
 418     return ret;
 419 }
 420
 421
 422 static inline NTSTATUS utf8_mbstowcs( WCHAR *dst, unsigned int dstlen, unsigned int *reslen,
 423                                       const char *src, unsigned int srclen )
 424 {
 425     unsigned int res;
 426     NTSTATUS status = STATUS_SUCCESS;
 427     const char *srcend = src + srclen;
 428     WCHAR *dstend = dst + dstlen;
 429
 430     while ((dst < dstend) && (src < srcend))
 431     {
 432         unsigned char ch = *src++;
 433         if (ch < 0x80)  /* special fast case for 7-bit ASCII */
 434         {
 435             *dst++ = ch;
 436             continue;
 437         }
 438         if ((res = decode_utf8_char( ch, &src, srcend )) <= 0xffff)
 439         {
 440             *dst++ = res;
 441         }
 442         else if (res <= 0x10ffff)  /* we need surrogates */
 443         {
 444             res -= 0x10000;
 445             *dst++ = 0xd800 | (res >> 10);
 446             if (dst == dstend) break;
 447             *dst++ = 0xdc00 | (res & 0x3ff);
 448         }
 449         else
 450         {
 451             *dst++ = 0xfffd;
 452             status = STATUS_SOME_NOT_MAPPED;
 453         }
 454     }
 455     if (src < srcend) status = STATUS_BUFFER_TOO_SMALL;  /* overflow */
 456     *reslen = dstlen - (dstend - dst);
 457     return status;
 458 }
 459
 460
 461 static inline NTSTATUS utf8_wcstombs( char *dst, unsigned int dstlen, unsigned int *reslen,
 462                                       const WCHAR *src, unsigned int srclen )
 463 {
 464     char *end;
 465     unsigned int val;
 466     NTSTATUS status = STATUS_SUCCESS;
 467
 468     for (end = dst + dstlen; srclen; srclen--, src++)
 469     {
 470         WCHAR ch = *src;
 471
 472         if (ch < 0x80)  /* 0x00-0x7f: 1 byte */
 473         {
 474             if (dst > end - 1) break;
 475             *dst++ = ch;
 476             continue;
 477         }
 478         if (ch < 0x800)  /* 0x80-0x7ff: 2 bytes */
 479         {
 480             if (dst > end - 2) break;
 481             dst[1] = 0x80 | (ch & 0x3f);
 482             ch >>= 6;
 483             dst[0] = 0xc0 | ch;
 484             dst += 2;
 485             continue;
 486         }
 487         if (!get_utf16( src, srclen, &val ))
 488         {
 489             val = 0xfffd;
 490             status = STATUS_SOME_NOT_MAPPED;
 491         }
 492         if (val < 0x10000)  /* 0x800-0xffff: 3 bytes */
 493         {
 494             if (dst > end - 3) break;
 495             dst[2] = 0x80 | (val & 0x3f);
 496             val >>= 6;
 497             dst[1] = 0x80 | (val & 0x3f);
 498             val >>= 6;
 499             dst[0] = 0xe0 | val;
 500             dst += 3;
 501         }
 502         else   /* 0x10000-0x10ffff: 4 bytes */
 503         {
 504             if (dst > end - 4) break;
 505             dst[3] = 0x80 | (val & 0x3f);
 506             val >>= 6;
 507             dst[2] = 0x80 | (val & 0x3f);
 508             val >>= 6;
 509             dst[1] = 0x80 | (val & 0x3f);
 510             val >>= 6;
 511             dst[0] = 0xf0 | val;
 512             dst += 4;
 513             src++;
 514             srclen--;
 515         }
 516     }
 517     if (srclen) status = STATUS_BUFFER_TOO_SMALL;
 518     *reslen = dstlen - (end - dst);
 519     return status;
 520 }
 521
 522
 523 #define HANGUL_SBASE  0xac00
 524 #define HANGUL_LBASE  0x1100
 525 #define HANGUL_VBASE  0x1161
 526 #define HANGUL_TBASE  0x11a7
 527 #define HANGUL_LCOUNT 19
 528 #define HANGUL_VCOUNT 21
 529 #define HANGUL_TCOUNT 28
 530 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
 531 #define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT)
 532
 533 static inline const WCHAR *get_decomposition( const struct norm_table *info, unsigned int ch,
 534                                               BYTE props, WCHAR *buffer, unsigned int *ret_len )
 535 {
 536     const struct pair { WCHAR src; USHORT dst; } *pairs;
 537     const USHORT *hash_table = (const USHORT *)info + info->decomp_hash;
 538     const WCHAR *ret;
 539     unsigned int i, pos, end, len, hash;
 540
 541     /* default to no decomposition */
 542     put_utf16( buffer, ch );
 543     *ret_len = 1 + (ch >= 0x10000);
 544     if (!props || props == 0x7f) return buffer;
 545
 546     if (props == 0xff)  /* Hangul or invalid char */
 547     {
 548         if (ch >= HANGUL_SBASE && ch < HANGUL_SBASE + HANGUL_SCOUNT)
 549         {
 550             unsigned short sindex = ch - HANGUL_SBASE;
 551             unsigned short tindex = sindex % HANGUL_TCOUNT;
 552             buffer[0] = HANGUL_LBASE + sindex / HANGUL_NCOUNT;
 553             buffer[1] = HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
 554             if (tindex) buffer[2] = HANGUL_TBASE + tindex;
 555             *ret_len = 2 + !!tindex;
 556             return buffer;
 557         }
 558         /* ignore other chars in Hangul range */
 559         if (ch >= HANGUL_LBASE && ch < HANGUL_LBASE + 0x100) return buffer;
 560         if (ch >= HANGUL_SBASE && ch < HANGUL_SBASE + 0x2c00) return buffer;
 561         return NULL;
 562     }
 563
 564     hash = ch % info->decomp_size;
 565     pos = hash_table[hash];
 566     if (pos >> 13)
 567     {
 568         if (props != 0xbf) return buffer;
 569         ret = (const USHORT *)info + info->decomp_seq + (pos & 0x1fff);
 570         len = pos >> 13;
 571     }
 572     else
 573     {
 574         pairs = (const struct pair *)((const USHORT *)info + info->decomp_map);
 575
 576         /* find the end of the hash bucket */
 577         for (i = hash + 1; i < info->decomp_size; i++) if (!(hash_table[i] >> 13)) break;
 578         if (i < info->decomp_size) end = hash_table[i];
 579         else for (end = pos; pairs[end].src; end++) ;
 580
 581         for ( ; pos < end; pos++)
 582         {
 583             if (pairs[pos].src != (WCHAR)ch) continue;
 584             ret = (const USHORT *)info + info->decomp_seq + (pairs[pos].dst & 0x1fff);
 585             len = pairs[pos].dst >> 13;
 586             break;
 587         }
 588         if (pos >= end) return buffer;
 589     }
 590
 591     if (len == 7) while (ret[len]) len++;
 592     if (!ret[0]) len = 0;  /* ignored char */
 593     *ret_len = len;
 594     return ret;
 595 }
 596
 597
 598 static inline BYTE rol( BYTE val, BYTE count )
 599 {
 600     return (val << count) | (val >> (8 - count));
 601 }
 602
 603
 604 static inline BYTE get_char_props( const struct norm_table *info, unsigned int ch )
 605 {
 606     const BYTE *level1 = (const BYTE *)((const USHORT *)info + info->props_level1);
 607     const BYTE *level2 = (const BYTE *)((const USHORT *)info + info->props_level2);
 608     BYTE off = level1[ch / 128];
 609
 610     if (!off || off >= 0xfb) return rol( off, 5 );
 611     return level2[(off - 1) * 128 + ch % 128];
 612 }
 613
 614
 615 static inline BYTE get_combining_class( const struct norm_table *info, unsigned int c )
 616 {
 617     const BYTE *classes = (const BYTE *)((const USHORT *)info + info->classes);
 618     BYTE class = get_char_props( info, c ) & 0x3f;
 619
 620     if (class == 0x3f) return 0;
 621     return classes[class];
 622 }
 623
 624
 625 static inline BOOL reorderable_pair( const struct norm_table *info, unsigned int c1, unsigned int c2 )
 626 {
 627     BYTE ccc1, ccc2;
 628
 629     /* reorderable if ccc1 > ccc2 > 0 */
 630     ccc1 = get_combining_class( info, c1 );
 631     if (ccc1 < 2) return FALSE;
 632     ccc2 = get_combining_class( info, c2 );
 633     return ccc2 && (ccc1 > ccc2);
 634 }
 635
 636 static inline void canonical_order_substring( const struct norm_table *info, WCHAR *str, unsigned int len )
 637 {
 638     unsigned int i, ch1, ch2, len1, len2;
 639     BOOL swapped;
 640
 641     do
 642     {
 643         swapped = FALSE;
 644         for (i = 0; i < len - 1; i += len1)
 645         {
 646             if (!(len1 = get_utf16( str + i, len - i, &ch1 ))) break;
 647             if (i + len1 >= len) break;
 648             if (!(len2 = get_utf16( str + i + len1, len - i - len1, &ch2 ))) break;
 649             if (reorderable_pair( info, ch1, ch2 ))
 650             {
 651                 WCHAR tmp[2];
 652                 memcpy( tmp, str + i, len1 * sizeof(WCHAR) );
 653                 memcpy( str + i, str + i + len1, len2 * sizeof(WCHAR) );
 654                 memcpy( str + i + len2, tmp, len1 * sizeof(WCHAR) );
 655                 swapped = TRUE;
 656                 i += len2 - len1;
 657             }
 658         }
 659     } while (swapped);
 660 }
 661
 662
 663 /* reorder the string into canonical order - D108/D109 */
 664 static inline void canonical_order_string( const struct norm_table *info, WCHAR *str, unsigned int len )
 665 {
 666     unsigned int ch, i, r, next = 0;
 667
 668     for (i = 0; i < len; i += r)
 669     {
 670         if (!(r = get_utf16( str + i, len - i, &ch ))) return;
 671         if (i && !get_combining_class( info, ch ))
 672         {
 673             if (i > next + 1) /* at least two successive non-starters */
 674                 canonical_order_substring( info, str + next, i - next );
 675             next = i + r;
 676         }
 677     }
 678     if (i > next + 1) canonical_order_substring( info, str + next, i - next );
 679 }
 680
 681
 682 static inline NTSTATUS decompose_string( const struct norm_table *info, const WCHAR *src, int src_len,
 683                                          WCHAR *dst, int *dst_len )
 684 {
 685     BYTE props;
 686     int src_pos, dst_pos;
 687     unsigned int ch, len, decomp_len;
 688     WCHAR buffer[3];
 689     const WCHAR *decomp;
 690
 691     for (src_pos = dst_pos = 0; src_pos < src_len; src_pos += len)
 692     {
 693         if (!(len = get_utf16( src + src_pos, src_len - src_pos, &ch )))
 694         {
 695             *dst_len = src_pos + IS_HIGH_SURROGATE( src[src_pos] );
 696             return STATUS_NO_UNICODE_TRANSLATION;
 697         }
 698         props = get_char_props( info, ch );
 699         if (!(decomp = get_decomposition( info, ch, props, buffer, &decomp_len )))
 700         {
 701             /* allow final null */
 702             if (!ch && src_pos == src_len - 1 && dst_pos < *dst_len)
 703             {
 704                 dst[dst_pos++] = 0;
 705                 break;
 706             }
 707             *dst_len = src_pos;
 708             return STATUS_NO_UNICODE_TRANSLATION;
 709         }
 710         if (dst_pos + decomp_len > *dst_len)
 711         {
 712             *dst_len += (src_len - src_pos) * info->len_factor;
 713             return STATUS_BUFFER_TOO_SMALL;
 714         }
 715         memcpy( dst + dst_pos, decomp, decomp_len * sizeof(WCHAR) );
 716         dst_pos += decomp_len;
 717     }
 718
 719     canonical_order_string( info, dst, dst_pos );
 720     *dst_len = dst_pos;
 721     return STATUS_SUCCESS;
 722 }
 723
 724
 725 static inline unsigned int compose_hangul( unsigned int ch1, unsigned int ch2 )
 726 {
 727     if (ch1 >= HANGUL_LBASE && ch1 < HANGUL_LBASE + HANGUL_LCOUNT)
 728     {
 729         int lindex = ch1 - HANGUL_LBASE;
 730         int vindex = ch2 - HANGUL_VBASE;
 731         if (vindex >= 0 && vindex < HANGUL_VCOUNT)
 732             return HANGUL_SBASE + (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT;
 733     }
 734     if (ch1 >= HANGUL_SBASE && ch1 < HANGUL_SBASE + HANGUL_SCOUNT)
 735     {
 736         int sindex = ch1 - HANGUL_SBASE;
 737         if (!(sindex % HANGUL_TCOUNT))
 738         {
 739             int tindex = ch2 - HANGUL_TBASE;
 740             if (tindex > 0 && tindex < HANGUL_TCOUNT) return ch1 + tindex;
 741         }
 742     }
 743     return 0;
 744 }
 745
 746
 747 static inline unsigned int compose_chars( const struct norm_table *info, unsigned int ch1, unsigned int ch2 )
 748 {
 749     const USHORT *table = (const USHORT *)info + info->comp_hash;
 750     const WCHAR *chars = (const USHORT *)info + info->comp_seq;
 751     unsigned int hash, start, end, i, len, ch[3];
 752
 753     hash = (ch1 + 95 * ch2) % info->comp_size;
 754     start = table[hash];
 755     end = table[hash + 1];
 756     while (start < end)
 757     {
 758         for (i = 0; i < 3; i++, start += len) len = get_utf16( chars + start, end - start, ch + i );
 759         if (ch[0] == ch1 && ch[1] == ch2) return ch[2];
 760     }
 761     return 0;
 762 }
 763
 764
 765 static inline unsigned int compose_string( const struct norm_table *info, WCHAR *str, unsigned int srclen )
 766 {
 767     unsigned int i, ch, comp, len, start_ch = 0, last_starter = srclen;
 768     BYTE class, prev_class = 0;
 769
 770     for (i = 0; i < srclen; i += len)
 771     {
 772         if (!(len = get_utf16( str + i, srclen - i, &ch ))) return 0;
 773         class = get_combining_class( info, ch );
 774         if (last_starter == srclen || (prev_class && prev_class >= class) ||
 775             (!(comp = compose_hangul( start_ch, ch )) &&
 776              !(comp = compose_chars( info, start_ch, ch ))))
 777         {
 778             if (!class)
 779             {
 780                 last_starter = i;
 781                 start_ch = ch;
 782             }
 783             prev_class = class;
 784         }
 785         else
 786         {
 787             int comp_len = 1 + (comp >= 0x10000);
 788             int start_len = 1 + (start_ch >= 0x10000);
 789
 790             if (comp_len != start_len)
 791                 memmove( str + last_starter + comp_len, str + last_starter + start_len,
 792                          (i - (last_starter + start_len)) * sizeof(WCHAR) );
 793             memmove( str + i + comp_len - start_len, str + i + len, (srclen - i - len) * sizeof(WCHAR) );
 794             srclen += comp_len - start_len - len;
 795             start_ch = comp;
 796             i = last_starter;
 797             len = comp_len;
 798             prev_class = 0;
 799             put_utf16( str + i, comp );
 800         }
 801     }
 802     return srclen;
 803 }
 804
 805
 806 #endif /* __NTDLL_LOCALE_PRIVATE_H */