tools/wrc/utils.c

   1 /*
   2  * Utility routines
   3  *
   4  * Copyright 1998 Bertho A. Stultiens
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  */
  20
  21 #include "config.h"
  22 #include "wine/port.h"
  23
  24 #include <assert.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <stdarg.h>
  28 #include <string.h>
  29 #include <ctype.h>
  30
  31 #include "wrc.h"
  32 #include "utils.h"
  33 #include "parser.h"
  34
  35 /* #define WANT_NEAR_INDICATION */
  36
  37 #ifdef WANT_NEAR_INDICATION
  38 void make_print(char *str)
  39 {
  40         while(*str)
  41         {
  42                 if(!isprint(*str))
  43                         *str = ' ';
  44                 str++;
  45         }
  46 }
  47 #endif
  48
  49 static void generic_msg(const char *s, const char *t, const char *n, va_list ap)
  50 {
  51         fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t);
  52         vfprintf(stderr, s, ap);
  53 #ifdef WANT_NEAR_INDICATION
  54         {
  55                 char *cpy;
  56                 if(n)
  57                 {
  58                         cpy = xstrdup(n);
  59                         make_print(cpy);
  60                         fprintf(stderr, " near '%s'", cpy);
  61                         free(cpy);
  62                 }
  63         }
  64 #endif
  65 }
  66
  67
  68 int parser_error(const char *s, ...)
  69 {
  70         va_list ap;
  71         va_start(ap, s);
  72         generic_msg(s, "Error", parser_text, ap);
  73         fputc( '\n', stderr );
  74         va_end(ap);
  75         exit(1);
  76         return 1;
  77 }
  78
  79 int parser_warning(const char *s, ...)
  80 {
  81         va_list ap;
  82         va_start(ap, s);
  83         generic_msg(s, "Warning", parser_text, ap);
  84         va_end(ap);
  85         return 0;
  86 }
  87
  88 void internal_error(const char *file, int line, const char *s, ...)
  89 {
  90         va_list ap;
  91         va_start(ap, s);
  92         fprintf(stderr, "Internal error (please report) %s %d: ", file, line);
  93         vfprintf(stderr, s, ap);
  94         va_end(ap);
  95         exit(3);
  96 }
  97
  98 void fatal_perror( const char *msg, ... )
  99 {
 100         va_list valist;
 101         va_start( valist, msg );
 102         fprintf(stderr, "Error: ");
 103         vfprintf( stderr, msg, valist );
 104         perror( " " );
 105         va_end( valist );
 106         exit(2);
 107 }
 108
 109 void error(const char *s, ...)
 110 {
 111         va_list ap;
 112         va_start(ap, s);
 113         fprintf(stderr, "Error: ");
 114         vfprintf(stderr, s, ap);
 115         va_end(ap);
 116         exit(2);
 117 }
 118
 119 void warning(const char *s, ...)
 120 {
 121         va_list ap;
 122         va_start(ap, s);
 123         fprintf(stderr, "Warning: ");
 124         vfprintf(stderr, s, ap);
 125         va_end(ap);
 126 }
 127
 128 void chat(const char *s, ...)
 129 {
 130         if(debuglevel & DEBUGLEVEL_CHAT)
 131         {
 132                 va_list ap;
 133                 va_start(ap, s);
 134                 fprintf(stderr, "FYI: ");
 135                 vfprintf(stderr, s, ap);
 136                 va_end(ap);
 137         }
 138 }
 139
 140 char *dup_basename(const char *name, const char *ext)
 141 {
 142         int namelen;
 143         int extlen = strlen(ext);
 144         char *base;
 145         char *slash;
 146
 147         if(!name)
 148                 name = "wrc.tab";
 149
 150         slash = strrchr(name, '/');
 151         if (slash)
 152                 name = slash + 1;
 153
 154         namelen = strlen(name);
 155
 156         /* +4 for later extension and +1 for '\0' */
 157         base = xmalloc(namelen +4 +1);
 158         strcpy(base, name);
 159         if(!strcasecmp(name + namelen-extlen, ext))
 160         {
 161                 base[namelen - extlen] = '\0';
 162         }
 163         return base;
 164 }
 165
 166 void *xmalloc(size_t size)
 167 {
 168     void *res;
 169
 170     assert(size > 0);
 171     res = malloc(size);
 172     if(res == NULL)
 173     {
 174         error("Virtual memory exhausted.\n");
 175     }
 176     memset(res, 0x55, size);
 177     return res;
 178 }
 179
 180
 181 void *xrealloc(void *p, size_t size)
 182 {
 183     void *res;
 184
 185     assert(size > 0);
 186     res = realloc(p, size);
 187     if(res == NULL)
 188     {
 189         error("Virtual memory exhausted.\n");
 190     }
 191     return res;
 192 }
 193
 194 char *strmake( const char* fmt, ... )
 195 {
 196     int n;
 197     size_t size = 100;
 198     va_list ap;
 199
 200     for (;;)
 201     {
 202         char *p = xmalloc( size );
 203         va_start( ap, fmt );
 204         n = vsnprintf( p, size, fmt, ap );
 205         va_end( ap );
 206         if (n == -1) size *= 2;
 207         else if ((size_t)n >= size) size = n + 1;
 208         else return p;
 209         free( p );
 210     }
 211 }
 212
 213 char *xstrdup(const char *str)
 214 {
 215         char *s;
 216
 217         assert(str != NULL);
 218         s = xmalloc(strlen(str)+1);
 219         return strcpy(s, str);
 220 }
 221
 222 int strendswith( const char *str, const char *end )
 223 {
 224     int l = strlen(str);
 225     int m = strlen(end);
 226     return l >= m && !strcmp( str + l - m, end );
 227 }
 228
 229 int compare_striA( const char *str1, const char *str2 )
 230 {
 231     for (;;)
 232     {
 233         /* only the A-Z range is case-insensitive */
 234         char ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
 235         char ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
 236         if (!ch1 || ch1 != ch2) return ch1 - ch2;
 237         str1++;
 238         str2++;
 239     }
 240 }
 241
 242 int compare_striW( const WCHAR *str1, const WCHAR *str2 )
 243 {
 244     for (;;)
 245     {
 246         /* only the A-Z range is case-insensitive */
 247         WCHAR ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
 248         WCHAR ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
 249         if (!ch1 || ch1 != ch2) return ch1 - ch2;
 250         str1++;
 251         str2++;
 252     }
 253 }
 254
 255 /*
 256  *****************************************************************************
 257  * Function     : compare_name_id
 258  * Syntax       : int compare_name_id(const name_id_t *n1, const name_id_t *n2)
 259  * Input        :
 260  * Output       :
 261  * Description  :
 262  * Remarks      :
 263  *****************************************************************************
 264 */
 265 int compare_name_id(const name_id_t *n1, const name_id_t *n2)
 266 {
 267         if(n1->type == name_ord && n2->type == name_ord)
 268         {
 269                 return n1->name.i_name - n2->name.i_name;
 270         }
 271         else if(n1->type == name_str && n2->type == name_str)
 272         {
 273                 if(n1->name.s_name->type == str_char
 274                 && n2->name.s_name->type == str_char)
 275                 {
 276                         return compare_striA(n1->name.s_name->str.cstr, n2->name.s_name->str.cstr);
 277                 }
 278                 else if(n1->name.s_name->type == str_unicode
 279                 && n2->name.s_name->type == str_unicode)
 280                 {
 281                         return compare_striW(n1->name.s_name->str.wstr, n2->name.s_name->str.wstr);
 282                 }
 283                 else
 284                 {
 285                         internal_error(__FILE__, __LINE__, "Can't yet compare strings of mixed type\n");
 286                 }
 287         }
 288         else if(n1->type == name_ord && n2->type == name_str)
 289                 return 1;
 290         else if(n1->type == name_str && n2->type == name_ord)
 291                 return -1;
 292         else
 293                 internal_error(__FILE__, __LINE__, "Comparing name-ids with unknown types (%d, %d)\n",
 294                                 n1->type, n2->type);
 295
 296         return 0; /* Keep the compiler happy */
 297 }
 298
 299 #ifdef _WIN32
 300
 301 int is_valid_codepage(int id)
 302 {
 303     return IsValidCodePage( id );
 304 }
 305
 306 static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
 307 {
 308     WCHAR *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
 309     DWORD ret = MultiByteToWideChar( codepage, MB_ERR_INVALID_CHARS, src, srclen, dst, srclen );
 310     if (!ret) return NULL;
 311     dst[ret] = 0;
 312     *dstlen = ret;
 313     return dst;
 314 }
 315
 316 #else  /* _WIN32 */
 317
 318 struct nls_info
 319 {
 320     unsigned short  codepage;
 321     unsigned short  unidef;
 322     unsigned short  trans_unidef;
 323     unsigned short *cp2uni;
 324     unsigned short *dbcs_offsets;
 325 };
 326
 327 static struct nls_info nlsinfo[128];
 328
 329 static void init_nls_info( struct nls_info *info, unsigned short *ptr )
 330 {
 331     unsigned short hdr_size = ptr[0];
 332
 333     info->codepage      = ptr[1];
 334     info->unidef        = ptr[4];
 335     info->trans_unidef  = ptr[6];
 336     ptr += hdr_size;
 337     info->cp2uni = ++ptr;
 338     ptr += 256;
 339     if (*ptr++) ptr += 256;  /* glyph table */
 340     info->dbcs_offsets  = *ptr ? ptr + 1 : NULL;
 341 }
 342
 343 static const struct nls_info *get_nls_info( unsigned int codepage )
 344 {
 345     struct stat st;
 346     unsigned short *data;
 347     char *path;
 348     unsigned int i;
 349     int fd;
 350
 351     for (i = 0; i < ARRAY_SIZE(nlsinfo) && nlsinfo[i].codepage; i++)
 352         if (nlsinfo[i].codepage == codepage) return &nlsinfo[i];
 353
 354     assert( i < ARRAY_SIZE(nlsinfo) );
 355
 356     for (i = 0; nlsdirs[i]; i++)
 357     {
 358         path = strmake( "%s/c_%03u.nls", nlsdirs[i], codepage );
 359         if ((fd = open( path, O_RDONLY )) != -1) break;
 360         free( path );
 361     }
 362     if (!nlsdirs[i]) return NULL;
 363
 364     fstat( fd, &st );
 365     data = xmalloc( st.st_size );
 366     if (read( fd, data, st.st_size ) != st.st_size) error( "failed to load %s\n", path );
 367     close( fd );
 368     free( path );
 369     init_nls_info( &nlsinfo[i], data );
 370     return &nlsinfo[i];
 371 }
 372
 373 int is_valid_codepage(int cp)
 374 {
 375     return cp == CP_UTF8 || get_nls_info( cp );
 376 }
 377
 378 static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
 379 {
 380     const struct nls_info *info = get_nls_info( codepage );
 381     unsigned int i;
 382     WCHAR dbch, *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
 383
 384     if (!info) error( "codepage %u not supported\n", codepage );
 385
 386     if (info->dbcs_offsets)
 387     {
 388         for (i = 0; srclen; i++, srclen--, src++)
 389         {
 390             unsigned short off = info->dbcs_offsets[(unsigned char)*src];
 391             if (off)
 392             {
 393                 if (srclen == 1) return NULL;
 394                 dbch = (src[0] << 8) | (unsigned char)src[1];
 395                 src++;
 396                 srclen--;
 397                 dst[i] = info->dbcs_offsets[off + (unsigned char)*src];
 398                 if (dst[i] == info->unidef && dbch != info->trans_unidef) return NULL;
 399             }
 400             else
 401             {
 402                 dst[i] = info->cp2uni[(unsigned char)*src];
 403                 if (dst[i] == info->unidef && *src != info->trans_unidef) return NULL;
 404             }
 405         }
 406     }
 407     else
 408     {
 409         for (i = 0; i < srclen; i++)
 410         {
 411             dst[i] = info->cp2uni[(unsigned char)src[i]];
 412             if (dst[i] == info->unidef && src[i] != info->trans_unidef) return NULL;
 413         }
 414     }
 415     dst[i] = 0;
 416     *dstlen = i;
 417     return dst;
 418 }
 419
 420 #endif  /* _WIN32 */
 421
 422 static WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
 423 {
 424     static const char utf8_length[128] =
 425     {
 426         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
 427         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
 428         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
 429         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
 430         0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
 431         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
 432         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
 433         3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0  /* 0xf0-0xff */
 434     };
 435     static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
 436
 437     const char *srcend = src + srclen;
 438     int len, res;
 439     WCHAR *ret, *dst;
 440
 441     dst = ret = xmalloc( (srclen + 1) * sizeof(WCHAR) );
 442     while (src < srcend)
 443     {
 444         unsigned char ch = *src++;
 445         if (ch < 0x80)  /* special fast case for 7-bit ASCII */
 446         {
 447             *dst++ = ch;
 448             continue;
 449         }
 450         len = utf8_length[ch - 0x80];
 451         if (len && src + len <= srcend)
 452         {
 453             res = ch & utf8_mask[len];
 454             switch (len)
 455             {
 456             case 3:
 457                 if ((ch = *src ^ 0x80) >= 0x40) break;
 458                 res = (res << 6) | ch;
 459                 src++;
 460                 if (res < 0x10) break;
 461             case 2:
 462                 if ((ch = *src ^ 0x80) >= 0x40) break;
 463                 res = (res << 6) | ch;
 464                 if (res >= 0x110000 >> 6) break;
 465                 src++;
 466                 if (res < 0x20) break;
 467                 if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
 468             case 1:
 469                 if ((ch = *src ^ 0x80) >= 0x40) break;
 470                 res = (res << 6) | ch;
 471                 src++;
 472                 if (res < 0x80) break;
 473                 if (res <= 0xffff) *dst++ = res;
 474                 else
 475                 {
 476                     res -= 0x10000;
 477                     *dst++ = 0xd800 | (res >> 10);
 478                     *dst++ = 0xdc00 | (res & 0x3ff);
 479                 }
 480                 continue;
 481             }
 482         }
 483         *dst++ = 0xfffd;
 484     }
 485     *dst = 0;
 486     *dstlen = dst - ret;
 487     return ret;
 488 }
 489
 490 static char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
 491 {
 492     char *ret, *dst;
 493
 494     dst = ret = xmalloc( srclen * 3 + 1 );
 495     for ( ; srclen; srclen--, src++)
 496     {
 497         unsigned int ch = *src;
 498
 499         if (ch < 0x80)  /* 0x00-0x7f: 1 byte */
 500         {
 501             *dst++ = ch;
 502             continue;
 503         }
 504         if (ch < 0x800)  /* 0x80-0x7ff: 2 bytes */
 505         {
 506             dst[1] = 0x80 | (ch & 0x3f);
 507             ch >>= 6;
 508             dst[0] = 0xc0 | ch;
 509             dst += 2;
 510             continue;
 511         }
 512         if (ch >= 0xd800 && ch <= 0xdbff && srclen > 1 && src[1] >= 0xdc00 && src[1] <= 0xdfff)
 513         {
 514             /* 0x10000-0x10ffff: 4 bytes */
 515             ch = 0x10000 + ((ch & 0x3ff) << 10) + (src[1] & 0x3ff);
 516             dst[3] = 0x80 | (ch & 0x3f);
 517             ch >>= 6;
 518             dst[2] = 0x80 | (ch & 0x3f);
 519             ch >>= 6;
 520             dst[1] = 0x80 | (ch & 0x3f);
 521             ch >>= 6;
 522             dst[0] = 0xf0 | ch;
 523             dst += 4;
 524             src++;
 525             srclen--;
 526             continue;
 527         }
 528         if (ch >= 0xd800 && ch <= 0xdfff) ch = 0xfffd;  /* invalid surrogate pair */
 529
 530         /* 0x800-0xffff: 3 bytes */
 531         dst[2] = 0x80 | (ch & 0x3f);
 532         ch >>= 6;
 533         dst[1] = 0x80 | (ch & 0x3f);
 534         ch >>= 6;
 535         dst[0] = 0xe0 | ch;
 536         dst += 3;
 537     }
 538     *dst = 0;
 539     *dstlen = dst - ret;
 540     return ret;
 541 }
 542
 543 string_t *convert_string_unicode( const string_t *str, int codepage )
 544 {
 545     string_t *ret = xmalloc(sizeof(*ret));
 546
 547     ret->type = str_unicode;
 548     ret->loc = str->loc;
 549
 550     if (str->type == str_char)
 551     {
 552         if (!codepage) parser_error( "Current language is Unicode only, cannot convert string" );
 553
 554         if (codepage == CP_UTF8)
 555             ret->str.wstr = utf8_to_unicode( str->str.cstr, str->size, &ret->size );
 556         else
 557             ret->str.wstr = codepage_to_unicode( codepage, str->str.cstr, str->size, &ret->size );
 558         if (!ret->str.wstr) parser_error( "Invalid character in string '%.*s' for codepage %u",
 559                                           str->size, str->str.cstr, codepage );
 560     }
 561     else
 562     {
 563         ret->size     = str->size;
 564         ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1));
 565         memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) );
 566         ret->str.wstr[ret->size] = 0;
 567     }
 568     return ret;
 569 }
 570
 571 char *convert_string_utf8( const string_t *str, int codepage )
 572 {
 573     int len;
 574     string_t *wstr = convert_string_unicode( str, codepage );
 575     char *ret = unicode_to_utf8( wstr->str.wstr, wstr->size, &len );
 576     free_string( wstr );
 577     return ret;
 578 }
 579
 580 void free_string(string_t *str)
 581 {
 582     if (str->type == str_unicode) free( str->str.wstr );
 583     else free( str->str.cstr );
 584     free( str );
 585 }
 586
 587 /* check if the string is valid utf8 despite a different codepage being in use */
 588 int check_valid_utf8( const string_t *str, int codepage )
 589 {
 590     int i, count;
 591     WCHAR *wstr;
 592
 593     if (!check_utf8) return 0;
 594     if (!codepage) return 0;
 595     if (codepage == CP_UTF8) return 0;
 596     if (!is_valid_codepage( codepage )) return 0;
 597
 598     for (i = count = 0; i < str->size; i++)
 599     {
 600         if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done;
 601         if ((unsigned char)str->str.cstr[i] >= 0xc2) { count++; continue; }
 602         if ((unsigned char)str->str.cstr[i] >= 0x80) goto done;
 603     }
 604     if (!count) return 0;  /* no 8-bit chars at all */
 605
 606     wstr = utf8_to_unicode( str->str.cstr, str->size, &count );
 607     for (i = 0; i < count; i++) if (wstr[i] == 0xfffd) break;
 608     free( wstr );
 609     return (i == count);
 610
 611 done:
 612     check_utf8 = 0;  /* at least one 8-bit non-utf8 string found, stop checking */
 613     return 0;
 614 }
 615
 616
 617 struct lang2cp
 618 {
 619     unsigned short lang;
 620     unsigned short sublang;
 621     unsigned int   cp;
 622 };
 623
 624 /* language to codepage conversion table */
 625 /* specific sublanguages need only be specified if their codepage */
 626 /* differs from the default (SUBLANG_NEUTRAL) */
 627 static const struct lang2cp lang2cps[] =
 628 {
 629     { LANG_AFRIKAANS,      SUBLANG_NEUTRAL,              1252 },
 630     { LANG_ALBANIAN,       SUBLANG_NEUTRAL,              1250 },
 631     { LANG_ALSATIAN,       SUBLANG_NEUTRAL,              1252 },
 632     { LANG_AMHARIC,        SUBLANG_NEUTRAL,              0    },
 633     { LANG_ARABIC,         SUBLANG_NEUTRAL,              1256 },
 634     { LANG_ARMENIAN,       SUBLANG_NEUTRAL,              0    },
 635     { LANG_ASSAMESE,       SUBLANG_NEUTRAL,              0    },
 636     { LANG_ASTURIAN,       SUBLANG_NEUTRAL,              1252 },
 637     { LANG_AZERI,          SUBLANG_NEUTRAL,              1254 },
 638     { LANG_AZERI,          SUBLANG_AZERI_CYRILLIC,       1251 },
 639     { LANG_BASHKIR,        SUBLANG_NEUTRAL,              1251 },
 640     { LANG_BASQUE,         SUBLANG_NEUTRAL,              1252 },
 641     { LANG_BELARUSIAN,     SUBLANG_NEUTRAL,              1251 },
 642     { LANG_BENGALI,        SUBLANG_NEUTRAL,              0    },
 643     { LANG_BOSNIAN,        SUBLANG_NEUTRAL,              1250 },
 644     { LANG_BOSNIAN,        SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC, 1251 },
 645     { LANG_BRETON,         SUBLANG_NEUTRAL,              1252 },
 646     { LANG_BULGARIAN,      SUBLANG_NEUTRAL,              1251 },
 647     { LANG_CATALAN,        SUBLANG_NEUTRAL,              1252 },
 648     { LANG_CHINESE,        SUBLANG_NEUTRAL,              950  },
 649     { LANG_CHINESE,        SUBLANG_CHINESE_SIMPLIFIED,   936  },
 650     { LANG_CHINESE,        SUBLANG_CHINESE_SINGAPORE,    936  },
 651 #ifdef LANG_CORNISH
 652     { LANG_CORNISH,        SUBLANG_NEUTRAL,              1252 },
 653 #endif /* LANG_CORNISH */
 654     { LANG_CORSICAN,       SUBLANG_NEUTRAL,              1252 },
 655     { LANG_CROATIAN,       SUBLANG_NEUTRAL,              1250 },
 656     { LANG_CZECH,          SUBLANG_NEUTRAL,              1250 },
 657     { LANG_DANISH,         SUBLANG_NEUTRAL,              1252 },
 658     { LANG_DARI,           SUBLANG_NEUTRAL,              1256 },
 659     { LANG_DIVEHI,         SUBLANG_NEUTRAL,              0    },
 660     { LANG_DUTCH,          SUBLANG_NEUTRAL,              1252 },
 661     { LANG_ENGLISH,        SUBLANG_NEUTRAL,              1252 },
 662 #ifdef LANG_ESPERANTO
 663     { LANG_ESPERANTO,      SUBLANG_NEUTRAL,              1252 },
 664 #endif /* LANG_ESPERANTO */
 665     { LANG_ESTONIAN,       SUBLANG_NEUTRAL,              1257 },
 666     { LANG_FAEROESE,       SUBLANG_NEUTRAL,              1252 },
 667     { LANG_FILIPINO,       SUBLANG_NEUTRAL,              1252 },
 668     { LANG_FINNISH,        SUBLANG_NEUTRAL,              1252 },
 669     { LANG_FRENCH,         SUBLANG_NEUTRAL,              1252 },
 670     { LANG_FRISIAN,        SUBLANG_NEUTRAL,              1252 },
 671 #ifdef LANG_MANX_GAELIC
 672     { LANG_MANX_GAELIC,    SUBLANG_NEUTRAL,              1252 },
 673 #endif /* LANG_MANX_GAELIC */
 674     { LANG_GALICIAN,       SUBLANG_NEUTRAL,              1252 },
 675     { LANG_GEORGIAN,       SUBLANG_NEUTRAL,              0    },
 676     { LANG_GERMAN,         SUBLANG_NEUTRAL,              1252 },
 677     { LANG_GREEK,          SUBLANG_NEUTRAL,              1253 },
 678     { LANG_GREENLANDIC,    SUBLANG_NEUTRAL,              1252 },
 679     { LANG_GUJARATI,       SUBLANG_NEUTRAL,              0    },
 680     { LANG_HAUSA,          SUBLANG_NEUTRAL,              1252 },
 681     { LANG_HEBREW,         SUBLANG_NEUTRAL,              1255 },
 682     { LANG_HINDI,          SUBLANG_NEUTRAL,              0    },
 683     { LANG_HUNGARIAN,      SUBLANG_NEUTRAL,              1250 },
 684     { LANG_ICELANDIC,      SUBLANG_NEUTRAL,              1252 },
 685     { LANG_IGBO,           SUBLANG_NEUTRAL,              1252 },
 686     { LANG_INDONESIAN,     SUBLANG_NEUTRAL,              1252 },
 687     { LANG_INUKTITUT,      SUBLANG_NEUTRAL,              0    },
 688     { LANG_INUKTITUT,      SUBLANG_INUKTITUT_CANADA_LATIN, 0  },
 689     { LANG_INVARIANT,      SUBLANG_NEUTRAL,              0    },
 690     { LANG_IRISH,          SUBLANG_NEUTRAL,              1252 },
 691     { LANG_ITALIAN,        SUBLANG_NEUTRAL,              1252 },
 692     { LANG_JAPANESE,       SUBLANG_NEUTRAL,              932  },
 693     { LANG_KANNADA,        SUBLANG_NEUTRAL,              0    },
 694     { LANG_KAZAK,          SUBLANG_NEUTRAL,              1251 },
 695     { LANG_KHMER,          SUBLANG_NEUTRAL,              0    },
 696     { LANG_KICHE,          SUBLANG_NEUTRAL,              1252 },
 697     { LANG_KINYARWANDA,    SUBLANG_NEUTRAL,              1252 },
 698     { LANG_KONKANI,        SUBLANG_NEUTRAL,              0    },
 699     { LANG_KOREAN,         SUBLANG_NEUTRAL,              949  },
 700     { LANG_KYRGYZ,         SUBLANG_NEUTRAL,              1251 },
 701     { LANG_LAO,            SUBLANG_NEUTRAL,              0    },
 702     { LANG_LATVIAN,        SUBLANG_NEUTRAL,              1257 },
 703     { LANG_LITHUANIAN,     SUBLANG_NEUTRAL,              1257 },
 704     { LANG_LOWER_SORBIAN,  SUBLANG_NEUTRAL,              1252 },
 705     { LANG_LUXEMBOURGISH,  SUBLANG_NEUTRAL,              1252 },
 706     { LANG_MACEDONIAN,     SUBLANG_NEUTRAL,              1251 },
 707     { LANG_MALAY,          SUBLANG_NEUTRAL,              1252 },
 708     { LANG_MALAYALAM,      SUBLANG_NEUTRAL,              0    },
 709     { LANG_MALTESE,        SUBLANG_NEUTRAL,              0    },
 710     { LANG_MAORI,          SUBLANG_NEUTRAL,              0    },
 711     { LANG_MAPUDUNGUN,     SUBLANG_NEUTRAL,              1252 },
 712     { LANG_MARATHI,        SUBLANG_NEUTRAL,              0    },
 713     { LANG_MOHAWK,         SUBLANG_NEUTRAL,              1252 },
 714     { LANG_MONGOLIAN,      SUBLANG_NEUTRAL,              1251 },
 715     { LANG_NEPALI,         SUBLANG_NEUTRAL,              0    },
 716     { LANG_NEUTRAL,        SUBLANG_NEUTRAL,              1252 },
 717     { LANG_NORWEGIAN,      SUBLANG_NEUTRAL,              1252 },
 718     { LANG_OCCITAN,        SUBLANG_NEUTRAL,              1252 },
 719     { LANG_ORIYA,          SUBLANG_NEUTRAL,              0    },
 720     { LANG_PASHTO,         SUBLANG_NEUTRAL,              0    },
 721     { LANG_PERSIAN,        SUBLANG_NEUTRAL,              1256 },
 722     { LANG_POLISH,         SUBLANG_NEUTRAL,              1250 },
 723     { LANG_PORTUGUESE,     SUBLANG_NEUTRAL,              1252 },
 724     { LANG_PUNJABI,        SUBLANG_NEUTRAL,              0    },
 725     { LANG_QUECHUA,        SUBLANG_NEUTRAL,              1252 },
 726     { LANG_ROMANIAN,       SUBLANG_NEUTRAL,              1250 },
 727     { LANG_ROMANSH,        SUBLANG_NEUTRAL,              1252 },
 728     { LANG_RUSSIAN,        SUBLANG_NEUTRAL,              1251 },
 729     { LANG_SAMI,           SUBLANG_NEUTRAL,              1252 },
 730     { LANG_SANSKRIT,       SUBLANG_NEUTRAL,              0    },
 731     { LANG_SCOTTISH_GAELIC,SUBLANG_NEUTRAL,              1252 },
 732     { LANG_SERBIAN,        SUBLANG_NEUTRAL,              1250 },
 733     { LANG_SERBIAN,        SUBLANG_SERBIAN_CYRILLIC,     1251 },
 734     { LANG_SINHALESE,      SUBLANG_NEUTRAL,              0    },
 735     { LANG_SLOVAK,         SUBLANG_NEUTRAL,              1250 },
 736     { LANG_SLOVENIAN,      SUBLANG_NEUTRAL,              1250 },
 737     { LANG_SOTHO,          SUBLANG_NEUTRAL,              1252 },
 738     { LANG_SPANISH,        SUBLANG_NEUTRAL,              1252 },
 739     { LANG_SWAHILI,        SUBLANG_NEUTRAL,              1252 },
 740     { LANG_SWEDISH,        SUBLANG_NEUTRAL,              1252 },
 741     { LANG_SYRIAC,         SUBLANG_NEUTRAL,              0    },
 742     { LANG_TAJIK,          SUBLANG_NEUTRAL,              1251 },
 743     { LANG_TAMAZIGHT,      SUBLANG_NEUTRAL,              1252 },
 744     { LANG_TAMIL,          SUBLANG_NEUTRAL,              0    },
 745     { LANG_TATAR,          SUBLANG_NEUTRAL,              1251 },
 746     { LANG_TELUGU,         SUBLANG_NEUTRAL,              0    },
 747     { LANG_THAI,           SUBLANG_NEUTRAL,              874  },
 748     { LANG_TIBETAN,        SUBLANG_NEUTRAL,              0    },
 749     { LANG_TSWANA,         SUBLANG_NEUTRAL,              1252 },
 750     { LANG_TURKISH,        SUBLANG_NEUTRAL,              1254 },
 751     { LANG_TURKMEN,        SUBLANG_NEUTRAL,              1250 },
 752     { LANG_UIGHUR,         SUBLANG_NEUTRAL,              1256 },
 753     { LANG_UKRAINIAN,      SUBLANG_NEUTRAL,              1251 },
 754     { LANG_UPPER_SORBIAN,  SUBLANG_NEUTRAL,              1252 },
 755     { LANG_URDU,           SUBLANG_NEUTRAL,              1256 },
 756     { LANG_UZBEK,          SUBLANG_NEUTRAL,              1254 },
 757     { LANG_UZBEK,          SUBLANG_UZBEK_CYRILLIC,       1251 },
 758     { LANG_VIETNAMESE,     SUBLANG_NEUTRAL,              1258 },
 759 #ifdef LANG_WALON
 760     { LANG_WALON,          SUBLANG_NEUTRAL,              1252 },
 761 #endif /* LANG_WALON */
 762     { LANG_WELSH,          SUBLANG_NEUTRAL,              1252 },
 763     { LANG_WOLOF,          SUBLANG_NEUTRAL,              1252 },
 764     { LANG_XHOSA,          SUBLANG_NEUTRAL,              1252 },
 765     { LANG_YAKUT,          SUBLANG_NEUTRAL,              1251 },
 766     { LANG_YI,             SUBLANG_NEUTRAL,              0    },
 767     { LANG_YORUBA,         SUBLANG_NEUTRAL,              1252 },
 768     { LANG_ZULU,           SUBLANG_NEUTRAL,              1252 }
 769 };
 770
 771 int get_language_codepage( unsigned short lang, unsigned short sublang )
 772 {
 773     unsigned int i;
 774     int cp = -1, defcp = -1;
 775
 776     for (i = 0; i < ARRAY_SIZE(lang2cps); i++)
 777     {
 778         if (lang2cps[i].lang != lang) continue;
 779         if (lang2cps[i].sublang == sublang)
 780         {
 781             cp = lang2cps[i].cp;
 782             break;
 783         }
 784         if (lang2cps[i].sublang == SUBLANG_NEUTRAL) defcp = lang2cps[i].cp;
 785     }
 786
 787     if (cp == -1) cp = defcp;
 788     return cp;
 789 }