tools/wmc/mcl.c

   1 /*
   2  * Wine Message Compiler lexical scanner
   3  *
   4  * Copyright 2000 Bertho A. Stultiens (BS)
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  */
  20
  21 #include "config.h"
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #include <ctype.h>
  26 #include <assert.h>
  27 #include <string.h>
  28
  29 #include "utils.h"
  30 #include "wmc.h"
  31 #include "lang.h"
  32
  33 #include "mcy.tab.h"
  34
  35 /*
  36  * Keywords are case insensitive. All normal input is treated as
  37  * being in codepage iso-8859-1 for ascii input files (unicode
  38  * page 0) and as equivalent unicode if unicode input is selected.
  39  * All normal input, which is not part of a message text, is
  40  * enforced to be unicode page 0. Otherwise an error will be
  41  * generated. The normal file data should only be ASCII because
  42  * that is the basic definition of the grammar.
  43  *
  44  * Byteorder or unicode input is determined automatically by
  45  * reading the first 8 bytes and checking them against unicode
  46  * page 0 byteorder (hibyte must be 0).
  47  * -- FIXME --
  48  * Alternatively, the input is checked against a special byte
  49  * sequence to identify the file.
  50  * -- FIXME --
  51  *
  52  *
  53  * Keywords:
  54  *      Codepages
  55  *      Facility
  56  *      FacilityNames
  57  *      LanguageNames
  58  *      MessageId
  59  *      MessageIdTypedef
  60  *      Severity
  61  *      SeverityNames
  62  *      SymbolicName
  63  *
  64  * Default added identifiers for classes:
  65  * SeverityNames:
  66  *      Success         = 0x0
  67  *      Informational   = 0x1
  68  *      Warning         = 0x2
  69  *      Error           = 0x3
  70  * FacilityNames:
  71  *      System          = 0x0FF
  72  *      Application     = 0xFFF
  73  *
  74  * The 'Codepages' keyword is a wmc extension.
  75  */
  76
  77 static const WCHAR ustr_application[]   = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
  78 static const WCHAR ustr_codepages[]     = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
  79 static const WCHAR ustr_english[]       = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
  80 static const WCHAR ustr_error[]         = { 'E', 'r', 'r', 'o', 'r', 0 };
  81 static const WCHAR ustr_facility[]      = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
  82 static const WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
  83 static const WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
  84 static const WCHAR ustr_language[]      = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
  85 static const WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
  86 static const WCHAR ustr_messageid[]     = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
  87 static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
  88 static const WCHAR ustr_dxgi[]  = { 'D', 'x', 'g', 'i', 0 };
  89 static const WCHAR ustr_null[]  = { 'N', 'u', 'l', 'l', 0 };
  90 static const WCHAR ustr_outputbase[]    = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
  91 static const WCHAR ustr_severity[]      = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
  92 static const WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
  93 static const WCHAR ustr_success[]       = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
  94 static const WCHAR ustr_symbolicname[]  = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
  95 static const WCHAR ustr_system[]        = { 'S', 'y', 's', 't', 'e', 'm', 0 };
  96 static const WCHAR ustr_warning[]       = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
  97 static const WCHAR ustr_msg00001[]      = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
  98 /*
  99  * This table is to beat any form of "expression building" to check for
 100  * correct filename characters. It is also used for ident checks.
 101  * FIXME: use it more consistently.
 102  */
 103
 104 #define CH_SHORTNAME    0x01
 105 #define CH_LONGNAME     0x02
 106 #define CH_IDENT        0x04
 107 #define CH_NUMBER       0x08
 108 /*#define CH_WILDCARD   0x10*/
 109 /*#define CH_DOT        0x20*/
 110 #define CH_PUNCT        0x40
 111 #define CH_INVALID      0x80
 112
 113 static const char char_table[256] = {
 114         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
 115         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
 116         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
 117         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
 118         0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
 119         0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
 120         0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
 121         0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
 122         0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
 123         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
 124         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
 125         0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
 126         0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
 127         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
 128         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
 129         0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
 130         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
 131         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
 132         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
 133         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
 134         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
 135         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
 136         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
 137         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
 138         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
 139         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
 140         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
 141         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
 142         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
 143         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
 144         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
 145         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
 146 };
 147
 148 static int isisochar(int ch)
 149 {
 150         return !(ch & (~0xff));
 151 }
 152
 153 static int codepage;
 154
 155 void set_codepage(int cp)
 156 {
 157         codepage = cp;
 158 }
 159
 160 /*
 161  * Input functions
 162  */
 163 #define INPUTBUFFER_SIZE        2048    /* Must be larger than 4 and approx. large enough to hold a line */
 164
 165 static int nungetstack = 0;
 166 static int allocungetstack = 0;
 167 static char *ungetstack = NULL;
 168 static int ninputbuffer = 0;
 169 static WCHAR inputbuffer[INPUTBUFFER_SIZE];
 170
 171 /*
 172  * Fill the input buffer with *one* line of input.
 173  * The line is '\n' terminated so that scanning
 174  * messages with translation works as expected
 175  * (otherwise we cannot pre-translate because the
 176  * language is first known one line before the
 177  * actual message).
 178  */
 179 static int fill_inputbuffer(void)
 180 {
 181     static enum input_mode { INPUT_UNKNOWN, INPUT_ASCII, INPUT_UTF8, INPUT_UNICODE } mode;
 182     static int swapped;
 183     static unsigned char utf8_bom[3] = { 0xef, 0xbb, 0xbf };
 184     WCHAR *wbuf;
 185     int i, pos = 0, len = 0;
 186     char buffer[INPUTBUFFER_SIZE];
 187
 188     if (mode == INPUT_UNKNOWN)
 189     {
 190         len = fread( buffer, 1, 8, yyin );
 191         wbuf = (WCHAR *)buffer;
 192         if (len >= 3 && !memcmp( buffer, utf8_bom, 3 ))
 193         {
 194             mode = INPUT_UTF8;
 195             memmove( buffer, buffer + 3, len - 3 );
 196             len -= 3;
 197         }
 198         else if (len == 8)
 199         {
 200             if (wbuf[0] == 0xfeff || wbuf[0] == 0xfffe)
 201             {
 202                 mode = INPUT_UNICODE;
 203                 pos = 1;
 204                 swapped = (wbuf[0] == 0xfffe);
 205             }
 206             else if (!((wbuf[0] | wbuf[1] | wbuf[2] | wbuf[3]) & 0xff00))
 207             {
 208                 mode = INPUT_UNICODE;
 209             }
 210             else if (!((wbuf[0] | wbuf[1] | wbuf[2] | wbuf[3]) & 0x00ff))
 211             {
 212                 mode = INPUT_UNICODE;
 213                 swapped = 1;
 214             }
 215         }
 216
 217         if (mode == INPUT_UNICODE)
 218         {
 219             len = 4 - pos;
 220             memcpy( inputbuffer, wbuf + pos, len * sizeof(WCHAR) );
 221         }
 222         else if (mode == INPUT_UNKNOWN) mode = unicodein ? INPUT_UTF8 : INPUT_ASCII;
 223     }
 224
 225     switch (mode)
 226     {
 227     case INPUT_ASCII:
 228         if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
 229         wbuf = codepage_to_unicode( codepage, buffer, strlen(buffer), &ninputbuffer );
 230         memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
 231         free( wbuf );
 232         return 1;
 233     case INPUT_UTF8:
 234         if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
 235         wbuf = utf8_to_unicode( buffer, strlen(buffer), &ninputbuffer );
 236         memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
 237         free( wbuf );
 238         return 1;
 239     case INPUT_UNICODE:
 240         len += fread( inputbuffer + len, sizeof(WCHAR), INPUTBUFFER_SIZE - len, yyin );
 241         if (!len) break;
 242         if (swapped) for (i = 0; i < len; i++) inputbuffer[i] = BYTESWAP_WORD( inputbuffer[i] );
 243         ninputbuffer = len;
 244         return 1;
 245     case INPUT_UNKNOWN:
 246         break;
 247     }
 248     if (ferror(yyin)) xyyerror( "Fatal: reading input failed\n" );
 249     return 0;
 250 }
 251
 252 static int get_unichar(void)
 253 {
 254         static WCHAR *b = NULL;
 255         char_number++;
 256
 257         if(nungetstack)
 258                 return ungetstack[--nungetstack];
 259
 260         if(!ninputbuffer)
 261         {
 262                 if(!fill_inputbuffer())
 263                         return EOF;
 264                 b = inputbuffer;
 265         }
 266
 267         ninputbuffer--;
 268         return *b++;
 269 }
 270
 271 static void unget_unichar(int ch)
 272 {
 273         if(ch == EOF)
 274                 return;
 275
 276         char_number--;
 277
 278         if(nungetstack == allocungetstack)
 279         {
 280                 allocungetstack += 32;
 281                 ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
 282         }
 283
 284         ungetstack[nungetstack++] = (WCHAR)ch;
 285 }
 286
 287
 288 /*
 289  * Normal character stack.
 290  * Used for number scanning.
 291  */
 292 static int ncharstack = 0;
 293 static int alloccharstack = 0;
 294 static char *charstack = NULL;
 295
 296 static void empty_char_stack(void)
 297 {
 298         ncharstack = 0;
 299 }
 300
 301 static void push_char(int ch)
 302 {
 303         if(ncharstack == alloccharstack)
 304         {
 305                 alloccharstack += 32;
 306                 charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
 307         }
 308         charstack[ncharstack++] = (char)ch;
 309 }
 310
 311 static int tos_char_stack(void)
 312 {
 313         if(!ncharstack)
 314                 return 0;
 315         else
 316                 return (int)(charstack[ncharstack-1] & 0xff);
 317 }
 318
 319 static char *get_char_stack(void)
 320 {
 321         return charstack;
 322 }
 323
 324 /*
 325  * Unicode character stack.
 326  * Used for general scanner.
 327  */
 328 static int nunicharstack = 0;
 329 static int allocunicharstack = 0;
 330 static WCHAR *unicharstack = NULL;
 331
 332 static void empty_unichar_stack(void)
 333 {
 334         nunicharstack = 0;
 335 }
 336
 337 static void push_unichar(int ch)
 338 {
 339         if(nunicharstack == allocunicharstack)
 340         {
 341                 allocunicharstack += 128;
 342                 unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
 343         }
 344         unicharstack[nunicharstack++] = (WCHAR)ch;
 345 }
 346
 347 #if 0
 348 static int tos_unichar_stack(void)
 349 {
 350         if(!nunicharstack)
 351                 return 0;
 352         else
 353                 return (int)(unicharstack[nunicharstack-1] & 0xffff);
 354 }
 355 #endif
 356
 357 static WCHAR *get_unichar_stack(void)
 358 {
 359         return unicharstack;
 360 }
 361
 362 /*
 363  * Number scanner
 364  *
 365  * state |      ch         | next state
 366  * ------+-----------------+--------------------------
 367  *   0   | [0]             | 1
 368  *   0   | [1-9]           | 4
 369  *   0   | .               | error (should never occur)
 370  *   1   | [xX]            | 2
 371  *   1   | [0-7]           | 3
 372  *   1   | [89a-wyzA-WYZ_] | error invalid digit
 373  *   1   | .               | return 0
 374  *   2   | [0-9a-fA-F]     | 2
 375  *   2   | [g-zG-Z_]       | error invalid hex digit
 376  *   2   | .               | return (hex-number) if TOS != [xX] else error
 377  *   3   | [0-7]           | 3
 378  *   3   | [89a-zA-Z_]     | error invalid octal digit
 379  *   3   | .               | return (octal-number)
 380  *   4   | [0-9]           | 4
 381  *   4   | [a-zA-Z_]       | error invalid decimal digit
 382  *   4   | .               | return (decimal-number)
 383  *
 384  * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
 385  * and return the value. This is not entirely correct, but close
 386  * enough (should check punctuators as trailing context, but the
 387  * char_table is not adapted to that and it is questionable whether
 388  * it is worth the trouble).
 389  * All non-iso-8859-1 characters are an error.
 390  */
 391 static int scan_number(int ch)
 392 {
 393         int state = 0;
 394         int base = 10;
 395         empty_char_stack();
 396
 397         while(1)
 398         {
 399                 if(!isisochar(ch))
 400                         xyyerror("Invalid digit\n");
 401
 402                 switch(state)
 403                 {
 404                 case 0:
 405                         if(isdigit(ch))
 406                         {
 407                                 push_char(ch);
 408                                 if(ch == '0')
 409                                         state = 1;
 410                                 else
 411                                         state = 4;
 412                         }
 413                         else
 414                                 internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n");
 415                         break;
 416                 case 1:
 417                         if(ch == 'x' || ch == 'X')
 418                         {
 419                                 push_char(ch);
 420                                 state = 2;
 421                         }
 422                         else if(ch >= '0' && ch <= '7')
 423                         {
 424                                 push_char(ch);
 425                                 state = 3;
 426                         }
 427                         else if(isalpha(ch) || ch == '_')
 428                                 xyyerror("Invalid number digit\n");
 429                         else
 430                         {
 431                                 unget_unichar(ch);
 432                                 mcy_lval.num = 0;
 433                                 return tNUMBER;
 434                         }
 435                         break;
 436                 case 2:
 437                         if(isxdigit(ch))
 438                                 push_char(ch);
 439                         else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
 440                                 xyyerror("Invalid hex digit\n");
 441                         else
 442                         {
 443                                 base = 16;
 444                                 goto finish;
 445                         }
 446                         break;
 447                 case 3:
 448                         if(ch >= '0' && ch <= '7')
 449                                 push_char(ch);
 450                         else if(isalnum(ch) || ch == '_')
 451                                 xyyerror("Invalid octal digit\n");
 452                         else
 453                         {
 454                                 base = 8;
 455                                 goto finish;
 456                         }
 457                         break;
 458                 case 4:
 459                         if(isdigit(ch))
 460                                 push_char(ch);
 461                         else if(isalnum(ch) || ch == '_')
 462                                 xyyerror("Invalid decimal digit\n");
 463                         else
 464                         {
 465                                 base = 10;
 466                                 goto finish;
 467                         }
 468                         break;
 469                 default:
 470                         internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n");
 471                 }
 472                 ch = get_unichar();
 473         }
 474 finish:
 475         unget_unichar(ch);
 476         push_char(0);
 477         mcy_lval.num = strtoul(get_char_stack(), NULL, base);
 478         return tNUMBER;
 479 }
 480
 481 static void newline(void)
 482 {
 483         line_number++;
 484         char_number = 1;
 485 }
 486
 487 static int unisort(const void *p1, const void *p2)
 488 {
 489         return unistricmp(((const token_t *)p1)->name, ((const token_t *)p2)->name);
 490 }
 491
 492 static token_t *tokentable = NULL;
 493 static int ntokentable = 0;
 494
 495 token_t *lookup_token(const WCHAR *s)
 496 {
 497         token_t tok;
 498
 499         tok.name = s;
 500         return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
 501 }
 502
 503 void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
 504 {
 505         ntokentable++;
 506         tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
 507         tokentable[ntokentable-1].type = type;
 508         tokentable[ntokentable-1].name = name;
 509         tokentable[ntokentable-1].token = tok;
 510         tokentable[ntokentable-1].codepage = cp;
 511         tokentable[ntokentable-1].alias = alias;
 512         tokentable[ntokentable-1].fixed = fix;
 513         qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
 514 }
 515
 516 void get_tokentable(token_t **tab, int *len)
 517 {
 518         assert(tab != NULL);
 519         assert(len != NULL);
 520         *tab = tokentable;
 521         *len = ntokentable;
 522 }
 523
 524 /*
 525  * The scanner
 526  *
 527  */
 528 int mcy_lex(void)
 529 {
 530         static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
 531         static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
 532         static int isinit = 0;
 533         int ch;
 534
 535         if(!isinit)
 536         {
 537                 isinit++;
 538                 set_codepage(WMC_DEFAULT_CODEPAGE);
 539                 add_token(tok_keyword,  ustr_codepages,         tCODEPAGE,      0, NULL, 0);
 540                 add_token(tok_keyword,  ustr_facility,          tFACILITY,      0, NULL, 1);
 541                 add_token(tok_keyword,  ustr_facilitynames,     tFACNAMES,      0, NULL, 1);
 542                 add_token(tok_keyword,  ustr_language,          tLANGUAGE,      0, NULL, 1);
 543                 add_token(tok_keyword,  ustr_languagenames,     tLANNAMES,      0, NULL, 1);
 544                 add_token(tok_keyword,  ustr_messageid,         tMSGID,         0, NULL, 1);
 545                 add_token(tok_keyword,  ustr_messageidtypedef,  tTYPEDEF,       0, NULL, 1);
 546                 add_token(tok_keyword,  ustr_outputbase,        tBASE,          0, NULL, 1);
 547                 add_token(tok_keyword,  ustr_severity,          tSEVERITY,      0, NULL, 1);
 548                 add_token(tok_keyword,  ustr_severitynames,     tSEVNAMES,      0, NULL, 1);
 549                 add_token(tok_keyword,  ustr_symbolicname,      tSYMNAME,       0, NULL, 1);
 550                 add_token(tok_severity, ustr_error,             0x03,           0, NULL, 0);
 551                 add_token(tok_severity, ustr_warning,           0x02,           0, NULL, 0);
 552                 add_token(tok_severity, ustr_informational,     0x01,           0, NULL, 0);
 553                 add_token(tok_severity, ustr_success,           0x00,           0, NULL, 0);
 554                 add_token(tok_facility, ustr_application,       0xFFF,          0, NULL, 0);
 555                 add_token(tok_facility, ustr_system,            0x0FF,          0, NULL, 0);
 556                 add_token(tok_facility, ustr_dxgi,              0x87a,          0, NULL, 0);
 557                 add_token(tok_facility, ustr_null,              0x000,          0, NULL, 0);
 558                 add_token(tok_language, ustr_english,           0x409,          437, ustr_msg00001, 0);
 559         }
 560
 561         empty_unichar_stack();
 562
 563         while(1)
 564         {
 565                 if(want_line)
 566                 {
 567                         while((ch = get_unichar()) != '\n')
 568                         {
 569                                 if(ch == EOF)
 570                                         xyyerror("Unexpected EOF\n");
 571                                 push_unichar(ch);
 572                         }
 573                         newline();
 574                         push_unichar(ch);
 575                         push_unichar(0);
 576                         if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
 577                         {
 578                                 want_line = 0;
 579                                 /* Reset the codepage to our default after each message */
 580                                 set_codepage(WMC_DEFAULT_CODEPAGE);
 581                                 return tMSGEND;
 582                         }
 583                         mcy_lval.str = xunistrdup(get_unichar_stack());
 584                         return tLINE;
 585                 }
 586
 587                 ch = get_unichar();
 588
 589                 if(ch == EOF)
 590                         return EOF;
 591
 592                 if(ch == '\n')
 593                 {
 594                         newline();
 595                         if(want_nl)
 596                         {
 597                                 want_nl = 0;
 598                                 return tNL;
 599                         }
 600                         continue;
 601                 }
 602
 603                 if(isisochar(ch))
 604                 {
 605                         if(want_file)
 606                         {
 607                                 int n = 0;
 608                                 while(n < 8 && isisochar(ch))
 609                                 {
 610                                         int t = char_table[ch];
 611                                         if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
 612                                                 break;
 613
 614                                         push_unichar(ch);
 615                                         n++;
 616                                         ch = get_unichar();
 617                                 }
 618                                 unget_unichar(ch);
 619                                 push_unichar(0);
 620                                 want_file = 0;
 621                                 mcy_lval.str = xunistrdup(get_unichar_stack());
 622                                 return tFILE;
 623                         }
 624
 625                         if(char_table[ch] & CH_IDENT)
 626                         {
 627                                 token_t *tok;
 628                                 while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
 629                                 {
 630                                         push_unichar(ch);
 631                                         ch = get_unichar();
 632                                 }
 633                                 unget_unichar(ch);
 634                                 push_unichar(0);
 635                                 if(!(tok = lookup_token(get_unichar_stack())))
 636                                 {
 637                                         mcy_lval.str = xunistrdup(get_unichar_stack());
 638                                         return tIDENT;
 639                                 }
 640                                 switch(tok->type)
 641                                 {
 642                                 case tok_keyword:
 643                                         return tok->token;
 644
 645                                 case tok_language:
 646                                         codepage = tok->codepage;
 647                                         /* Fall through */
 648                                 case tok_severity:
 649                                 case tok_facility:
 650                                         mcy_lval.tok = tok;
 651                                         return tTOKEN;
 652
 653                                 default:
 654                                         internal_error(__FILE__, __LINE__, "Invalid token type encountered\n");
 655                                 }
 656                         }
 657
 658                         if(isspace(ch)) /* Ignore space */
 659                                 continue;
 660
 661                         if(isdigit(ch))
 662                                 return scan_number(ch);
 663                 }
 664
 665                 switch(ch)
 666                 {
 667                 case ':':
 668                 case '=':
 669                 case '+':
 670                 case '(':
 671                 case ')':
 672                         return ch;
 673                 case ';':
 674                         while(ch != '\n' && ch != EOF)
 675                         {
 676                                 push_unichar(ch);
 677                                 ch = get_unichar();
 678                         }
 679                         newline();
 680                         push_unichar(ch);       /* Include the newline */
 681                         push_unichar(0);
 682                         mcy_lval.str = xunistrdup(get_unichar_stack());
 683                         return tCOMMENT;
 684                 default:
 685                         xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch);
 686                 }
 687         }
 688 }