Changed a few core int10 routines to use the BIOS data segment cursor
[wine.git] / tools / wmc / mcl.c
blob55fd27be213afa0856eae47803f39a1741712f4c
1 /*
2 * Wine Message Compiler lexical scanner
4 * Copyright 2000 Bertho A. Stultiens (BS)
6 */
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <ctype.h>
10 #include <assert.h>
12 #include "config.h"
14 #include "utils.h"
15 #include "wmc.h"
16 #include "lang.h"
18 #include "y.tab.h"
21 * Keywords are case insenitive. All normal input is treated as
22 * being in codepage iso-8859-1 for ascii input files (unicode
23 * page 0) and as equivalent unicode if unicode input is selected.
24 * All normal input, which is not part of a message text, is
25 * enforced to be unicode page 0. Otherwise an error will be
26 * generated. The normal file data should only be ASCII because
27 * that is the basic definition of the grammar.
29 * Byteorder or unicode input is determined automatically by
30 * reading the first 8 bytes and checking them against unicode
31 * page 0 byteorder (hibyte must be 0).
32 * -- FIXME --
33 * Alternatively, the input is checked against a special byte
34 * sequence to identify the file.
35 * -- FIXME --
38 * Keywords:
39 * Codepages
40 * Facility
41 * FacilityNames
42 * LanguageNames
43 * MessageId
44 * MessageIdTypedef
45 * Severity
46 * SeverityNames
47 * SymbolicName
49 * Default added identifiers for classes:
50 * SeverityNames:
51 * Success = 0x0
52 * Informational = 0x1
53 * Warning = 0x2
54 * Error = 0x3
55 * FacilityNames:
56 * System = 0x0FF
57 * Application = 0xFFF
59 * The 'Codepages' keyword is a wmc extension.
62 static WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
63 static WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
64 static WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
65 static WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
66 static WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
67 static WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
68 static WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
69 static WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
70 static WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
71 static WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
72 static WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
73 static WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
74 static WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
75 static WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
76 static WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
77 static WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
78 static WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
79 static WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
80 static WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
82 * This table is to beat any form of "expression building" to check for
83 * correct filename characters. It is also used for ident checks.
84 * FIXME: use it more consistently.
87 #define CH_SHORTNAME 0x01
88 #define CH_LONGNAME 0x02
89 #define CH_IDENT 0x04
90 #define CH_NUMBER 0x08
91 /*#define CH_WILDCARD 0x10*/
92 /*#define CH_DOT 0x20*/
93 #define CH_PUNCT 0x40
94 #define CH_INVALID 0x80
96 static const char char_table[256] = {
97 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
98 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
99 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
100 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
101 0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
102 0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
103 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
104 0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
105 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
106 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
107 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
108 0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
109 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
110 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
111 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
112 0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
113 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
114 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
115 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
116 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
117 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
118 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
119 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
120 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
121 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
122 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
123 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
124 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
125 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
126 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
127 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
128 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
131 static int isisochar(int ch)
133 return !(ch & (~0xff));
136 static int codepage;
137 static const union cptable *codepage_def;
139 void set_codepage(int cp)
141 codepage = cp;
142 codepage_def = find_codepage(codepage);
143 if(!codepage_def)
144 xyyerror("Codepage %d not found; cannot process", codepage);
148 * Input functions
150 static int nungetstack = 0;
151 static int allocungetstack = 0;
152 static char *ungetstack = NULL;
153 static int ninputbuffer = 0;
154 static WCHAR *inputbuffer = NULL;
155 static char *xlatebuffer = NULL;
157 #define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */
160 * Fill the input buffer with *one* line of input.
161 * The line is '\n' terminated so that scanning
162 * messages with translation works as expected
163 * (otherwise we cannot pre-translate because the
164 * language is first known one line before the
165 * actual message).
167 static int fill_inputbuffer(void)
169 int n;
170 static char err_fatalread[] = "Fatal: reading input failed";
171 static int endian = -1;
173 if(!inputbuffer)
175 inputbuffer = xmalloc(INPUTBUFFER_SIZE);
176 xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
179 try_again:
180 if(!unicodein)
182 char *cptr;
183 cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
184 if(!cptr && ferror(yyin))
185 xyyerror(err_fatalread);
186 else if(!cptr)
187 return 0;
188 assert(codepage_def != NULL);
189 n = cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
190 if(n < 0)
191 internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)", n);
192 if(n <= 1)
193 goto try_again; /* Should not hapen */
194 n--; /* Strip added conversion '\0' from input length */
196 * FIXME:
197 * Detect UTF-8 in the first time we read some bytes by
198 * checking the special sequence "FE..." or something like
199 * that. I need to check www.unicode.org for details.
202 else
204 if(endian == -1)
206 n = fread(inputbuffer, 1, 8, yyin);
207 if(n != 8)
209 if(!n && ferror(yyin))
210 xyyerror(err_fatalread);
211 else
212 xyyerror("Fatal: file to short to determine byteorder (should never happen)");
214 if(isisochar(inputbuffer[0]) &&
215 isisochar(inputbuffer[1]) &&
216 isisochar(inputbuffer[2]) &&
217 isisochar(inputbuffer[3]))
219 #ifdef WORDS_BIGENDIAN
220 endian = WMC_BO_BIG;
221 #else
222 endian = WMC_BO_LITTLE;
223 #endif
225 else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
226 isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
227 isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
228 isisochar(BYTESWAP_WORD(inputbuffer[3])))
230 #ifdef WORDS_BIGENDIAN
231 endian = WMC_BO_LITTLE;
232 #else
233 endian = WMC_BO_BIG;
234 #endif
236 else
237 xyyerror("Fatal: cannot determine file's byteorder");
238 /* FIXME:
239 * Determine the file-endian with the leader-bytes
240 * "FF FE..."; can't remember the exact sequence.
242 n /= 2;
243 #ifdef WORDS_BIGENDIAN
244 if(endian == WMC_BO_LITTLE)
245 #else
246 if(endian == WMC_BO_BIG)
247 #endif
249 inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
250 inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
251 inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
252 inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
256 else
258 int i;
259 n = 0;
260 for(i = 0; i < INPUTBUFFER_SIZE; i++)
262 int t;
263 t = fread(&inputbuffer[i], 2, 1, yyin);
264 if(!t && ferror(yyin))
265 xyyerror(err_fatalread);
266 else if(!t && n)
267 break;
268 n++;
269 #ifdef WORDS_BIGENDIAN
270 if(endian == WMC_BO_LITTLE)
271 #else
272 if(endian == WMC_BO_BIG)
273 #endif
275 if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
276 break;
278 else
280 if(inputbuffer[i] == '\n')
281 break;
288 if(!n)
290 yywarning("Re-read line (input was or converted to zilch)");
291 goto try_again; /* Should not happen, but could be due to stdin reading and a signal */
294 ninputbuffer += n;
295 return 1;
298 static int get_unichar(void)
300 static WCHAR *b = NULL;
301 char_number++;
303 if(nungetstack)
304 return ungetstack[--nungetstack];
306 if(!ninputbuffer)
308 if(!fill_inputbuffer())
309 return EOF;
310 b = inputbuffer;
313 ninputbuffer--;
314 return (int)(*b++ & 0xffff);
317 static void unget_unichar(int ch)
319 if(ch == EOF)
320 return;
322 char_number--;
324 if(nungetstack == allocungetstack)
326 allocungetstack += 32;
327 ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
330 ungetstack[nungetstack++] = (WCHAR)ch;
335 * Normal character stack.
336 * Used for number scanning.
338 static int ncharstack = 0;
339 static int alloccharstack = 0;
340 static char *charstack = NULL;
342 static void empty_char_stack(void)
344 ncharstack = 0;
347 static void push_char(int ch)
349 if(ncharstack == alloccharstack)
351 alloccharstack += 32;
352 charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
354 charstack[ncharstack++] = (char)ch;
357 static int tos_char_stack(void)
359 if(!ncharstack)
360 return 0;
361 else
362 return (int)(charstack[ncharstack-1] & 0xff);
365 static char *get_char_stack(void)
367 return charstack;
371 * Unicode character stack.
372 * Used for general scanner.
374 static int nunicharstack = 0;
375 static int allocunicharstack = 0;
376 static WCHAR *unicharstack = NULL;
378 static void empty_unichar_stack(void)
380 nunicharstack = 0;
383 static void push_unichar(int ch)
385 if(nunicharstack == allocunicharstack)
387 allocunicharstack += 128;
388 unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
390 unicharstack[nunicharstack++] = (WCHAR)ch;
393 #if 0
394 static int tos_unichar_stack(void)
396 if(!nunicharstack)
397 return 0;
398 else
399 return (int)(unicharstack[nunicharstack-1] & 0xffff);
401 #endif
403 static WCHAR *get_unichar_stack(void)
405 return unicharstack;
409 * Number scanner
411 * state | ch | next state
412 * ------+-----------------+--------------------------
413 * 0 | [0] | 1
414 * 0 | [1-9] | 4
415 * 0 | . | error (should never occur)
416 * 1 | [xX] | 2
417 * 1 | [0-7] | 3
418 * 1 | [89a-wyzA-WYZ_] | error invalid digit
419 * 1 | . | return 0
420 * 2 | [0-9a-fA-F] | 2
421 * 2 | [g-zG-Z_] | error invalid hex digit
422 * 2 | . | return (hex-number) if TOS != [xX] else error
423 * 3 | [0-7] | 3
424 * 3 | [89a-zA-Z_] | error invalid octal digit
425 * 3 | . | return (octal-number)
426 * 4 | [0-9] | 4
427 * 4 | [a-zA-Z_] | error invalid decimal digit
428 * 4 | . | return (decimal-number)
430 * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
431 * and return the value. This is not entirely correct, but close
432 * enough (should check punctuators as trailing context, but the
433 * char_table is not adapted to that and it is questionable whether
434 * it is worth the trouble).
435 * All non-iso-8859-1 characters are an error.
437 static int scan_number(int ch)
439 int state = 0;
440 int base = 10;
441 empty_char_stack();
443 while(1)
445 if(!isisochar(ch))
446 xyyerror("Invalid digit");
448 switch(state)
450 case 0:
451 if(isdigit(ch))
453 push_char(ch);
454 if(ch == '0')
455 state = 1;
456 else
457 state = 4;
459 else
460 internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state");
461 break;
462 case 1:
463 if(ch == 'x' || ch == 'X')
465 push_char(ch);
466 state = 2;
468 else if(ch >= '0' && ch <= '7')
470 push_char(ch);
471 state = 3;
473 else if(isalpha(ch) || ch == '_')
474 xyyerror("Invalid number digit");
475 else
477 unget_unichar(ch);
478 yylval.num = 0;
479 return tNUMBER;
481 break;
482 case 2:
483 if(isxdigit(ch))
484 push_char(ch);
485 else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
486 xyyerror("Invalid hex digit");
487 else
489 base = 16;
490 goto finish;
492 break;
493 case 3:
494 if(ch >= '0' && ch <= '7')
495 push_char(ch);
496 else if(isalnum(ch) || ch == '_')
497 xyyerror("Invalid octal digit");
498 else
500 base = 8;
501 goto finish;
503 break;
504 case 4:
505 if(isdigit(ch))
506 push_char(ch);
507 else if(isalnum(ch) || ch == '_')
508 xyyerror("Invalid decimal digit");
509 else
511 base = 10;
512 goto finish;
514 break;
515 default:
516 internal_error(__FILE__, __LINE__, "Invalid state in number-scanner");
518 ch = get_unichar();
520 finish:
521 unget_unichar(ch);
522 push_char(0);
523 yylval.num = strtoul(get_char_stack(), NULL, base);
524 return tNUMBER;
527 static void newline(void)
529 line_number++;
530 char_number = 1;
533 static int unisort(const void *p1, const void *p2)
535 return unistricmp(((token_t *)p1)->name, ((token_t *)p2)->name);
538 static token_t *tokentable = NULL;
539 static int ntokentable = 0;
541 token_t *lookup_token(const WCHAR *s)
543 token_t tok;
545 tok.name = s;
546 return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
549 void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
551 ntokentable++;
552 tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
553 tokentable[ntokentable-1].type = type;
554 tokentable[ntokentable-1].name = name;
555 tokentable[ntokentable-1].token = tok;
556 tokentable[ntokentable-1].codepage = cp;
557 tokentable[ntokentable-1].alias = alias;
558 tokentable[ntokentable-1].fixed = fix;
559 qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
562 void get_tokentable(token_t **tab, int *len)
564 assert(tab != NULL);
565 assert(len != NULL);
566 *tab = tokentable;
567 *len = ntokentable;
571 * The scanner
574 int yylex(void)
576 static WCHAR ustr_dot1[] = { '.', '\n', 0 };
577 static WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
578 static int isinit = 0;
579 int ch;
581 if(!isinit)
583 isinit++;
584 set_codepage(WMC_DEFAULT_CODEPAGE);
585 add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
586 add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
587 add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
588 add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
589 add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
590 add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
591 add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
592 add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
593 add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
594 add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
595 add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
596 add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
597 add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
598 add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
599 add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
600 add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
601 add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
602 add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
605 empty_unichar_stack();
607 while(1)
609 if(want_line)
611 while((ch = get_unichar()) != '\n')
613 if(ch == EOF)
614 xyyerror("Unexpected EOF");
615 push_unichar(ch);
617 newline();
618 push_unichar(ch);
619 push_unichar(0);
620 if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
622 want_line = 0;
623 /* Reset the codepage to our default after each message */
624 set_codepage(WMC_DEFAULT_CODEPAGE);
625 return tMSGEND;
627 yylval.str = xunistrdup(get_unichar_stack());
628 return tLINE;
631 ch = get_unichar();
633 if(ch == EOF)
634 return EOF;
636 if(ch == '\n')
638 newline();
639 if(want_nl)
641 want_nl = 0;
642 return tNL;
644 continue;
647 if(isisochar(ch))
649 if(want_file)
651 int n = 0;
652 while(n < 8 && isisochar(ch))
654 int t = char_table[ch];
655 if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
656 break;
658 push_unichar(ch);
659 n++;
660 ch = get_unichar();
662 unget_unichar(ch);
663 push_unichar(0);
664 want_file = 0;
665 yylval.str = xunistrdup(get_unichar_stack());
666 return tFILE;
669 if(char_table[ch] & CH_IDENT)
671 token_t *tok;
672 while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
674 push_unichar(ch);
675 ch = get_unichar();
677 unget_unichar(ch);
678 push_unichar(0);
679 if(!(tok = lookup_token(get_unichar_stack())))
681 yylval.str = xunistrdup(get_unichar_stack());
682 return tIDENT;
684 switch(tok->type)
686 case tok_keyword:
687 return tok->token;
689 case tok_language:
690 codepage = tok->codepage;
691 /* Fall through */
692 case tok_severity:
693 case tok_facility:
694 yylval.tok = tok;
695 return tTOKEN;
697 default:
698 internal_error(__FILE__, __LINE__, "Invalid token type encountered");
702 if(isspace(ch)) /* Ignore space */
703 continue;
705 if(isdigit(ch))
706 return scan_number(ch);
709 switch(ch)
711 case ':':
712 case '=':
713 case '+':
714 case '(':
715 case ')':
716 return ch;
717 case ';':
718 while(ch != '\n' && ch != EOF)
720 push_unichar(ch);
721 ch = get_unichar();
723 newline();
724 push_unichar(ch); /* Include the newline */
725 push_unichar(0);
726 yylval.str = xunistrdup(get_unichar_stack());
727 return tCOMMENT;
728 default:
729 xyyerror("Invalid character '%c' (0x%04x)", isisochar(ch) && isprint(ch) ? ch : '.', ch);