3 * Copyright 1998-2000 Bertho A. Stultiens (BS)
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * 21-May-2000 BS - Fixed the ident requirement of resource names
21 * which can be keywords.
22 * 30-Apr-2000 BS - Reintegration into the wine-tree
23 * 11-Jan-2000 BS - Very drastic cleanup because we don't have a
24 * preprocessor in here anymore.
25 * 02-Jan-2000 BS - Removed the preprocessor code
26 * 23-Dec-1999 BS - Removed the copyright for Martin von Loewis.
27 * There is really nothing left of his code in
29 * 20-Jun-1998 BS - Changed the filename conversion. Filenames are
30 * case-sensitive inder *nix, but not under dos.
31 * default behaviour is to convert to lower case.
32 * - All backslashes are converted to forward and
33 * both single and double slash is recognized as
35 * - Fixed a bug in 'yywf' case that prevented
36 * double quoted names to be scanned properly.
38 * 19-May-1998 BS - Started to build a preprocessor.
39 * - Changed keyword processing completely to
42 * 20-Apr-1998 BS - Added ';' comment stripping
44 * 17-Apr-1998 BS - Made the win32 keywords optional when compiling in
47 * 15-Apr-1998 BS - Changed string handling to include escapes
48 * - Added unicode string handling (no codepage
49 * translation though).
50 * - 'Borrowed' the main idea of string scanning from
51 * the flex manual pages.
52 * - Added conditional handling of scanning depending
53 * on the state of the parser. This was mainly required
54 * to distinguish a file to load or raw data that
55 * follows. MS's definition of filenames is rather
56 * complex... It can be unquoted or double quoted. If
57 * double quoted, then the '\\' char is not automatically
58 * escaped according to Borland's rc compiler, but it
59 * accepts both "\\path\\file.rc" and "\path\file.rc".
60 * This makes life very hard! I go for the escaped
61 * version, as this seems to be the documented way...
62 * - Single quoted strings are now parsed and converted
64 * - Added comment stripping. The implementation is
65 * 'borrowed' from the flex manpages.
66 * - Rebuild string processing so that it may contain
70 /* Exclusive string handling */
72 /* Exclusive unicode string handling */
74 /* Exclusive rcdata single quoted data handling */
76 /* Exclusive comment eating... */
78 /* Set when stripping c-junk */
80 /* Set when scanning #line style directives */
82 /* Set when scanning #pragma */
87 %option noinput nounput noyy_top_state noyywrap
88 %option 8bit never-interactive
89 %option prefix="parser_"
91 /* Some shortcut definitions */
107 #define YY_NO_UNISTD_H
109 #include "../tools.h"
113 #include "newstruc.h"
114 #include "wpp_private.h"
116 #include "parser.tab.h"
118 /* Always update the current character position within a line */
119 #define YY_USER_ACTION char_number+=yyleng; wanted_id = want_id; want_id = 0;
121 #define YY_USER_INIT current_codepage = utf8_input ? CP_UTF8 : -1;
123 static void addcchar(char c);
124 static void addwchar(WCHAR s);
125 static string_t *get_buffered_cstring(void);
126 static string_t *get_buffered_wstring(void);
127 static string_t *make_string(char *s);
129 static char *cbuffer; /* Buffers for string collection */
131 static int cbufalloc = 0;
132 static WCHAR *wbuffer;
134 static int wbufalloc = 0;
136 static int current_codepage = -1; /* use language default */
139 * This one is a bit tricky.
140 * We set 'want_id' in the parser to get the first
141 * identifier we get across in the scanner, but we
142 * also want it to be reset at nearly any token we
143 * see. Exceptions are:
148 * The scanner will automatically reset 'want_id'
149 * after *each* scanner reduction and puts is value
150 * into the var below. In this way we can see the
151 * state after the YY_RULE_SETUP (i.e. the user action;
152 * see above) and don't have to worry too much when
153 * it needs to be reset.
155 static int wanted_id = 0;
156 static int save_wanted_id; /* To save across comment reductions */
166 static struct keyword keywords[] = {
167 { "ACCELERATORS", tACCELERATORS, 0, 0, 0},
168 { "ALT", tALT, 0, 0, 0},
169 { "ASCII", tASCII, 0, 0, 0},
170 { "AUTO3STATE", tAUTO3STATE, 1, 0, 0},
171 { "AUTOCHECKBOX", tAUTOCHECKBOX, 1, 0, 0},
172 { "AUTORADIOBUTTON", tAUTORADIOBUTTON, 1, 0, 0},
173 { "BEGIN", tBEGIN, 0, 0, 0},
174 { "BITMAP", tBITMAP, 0, 0, 0},
175 { "BLOCK", tBLOCK, 0, 0, 0},
176 { "BUTTON", tBUTTON, 1, 0, 0},
177 { "CAPTION", tCAPTION, 0, 0, 0},
178 { "CHARACTERISTICS", tCHARACTERISTICS, 1, 0, 0},
179 { "CHECKBOX", tCHECKBOX, 0, 0, 0},
180 { "CHECKED", tCHECKED, 0, 0, 0},
181 { "CLASS", tCLASS, 0, 0, 0},
182 { "COMBOBOX", tCOMBOBOX, 0, 0, 0},
183 { "CONTROL", tCONTROL, 0, 0, 0},
184 { "CTEXT", tCTEXT, 0, 0, 0},
185 { "CURSOR", tCURSOR, 0, 0, 0},
186 { "DEFPUSHBUTTON", tDEFPUSHBUTTON, 0, 0, 0},
187 { "DIALOG", tDIALOG, 0, 0, 0},
188 { "DIALOGEX", tDIALOGEX, 1, 0, 0},
189 { "DISCARDABLE", tDISCARDABLE, 0, 0, 0},
190 { "DLGINIT", tDLGINIT, 0, 0, 0},
191 { "EDITTEXT", tEDITTEXT, 0, 0, 0},
192 { "END", tEND, 0, 0, 0},
193 { "EXSTYLE", tEXSTYLE, 0, 0, 0},
194 { "FILEFLAGS", tFILEFLAGS, 0, 0, 0},
195 { "FILEFLAGSMASK", tFILEFLAGSMASK, 0, 0, 0},
196 { "FILEOS", tFILEOS, 0, 0, 0},
197 { "FILESUBTYPE", tFILESUBTYPE, 0, 0, 0},
198 { "FILETYPE", tFILETYPE, 0, 0, 0},
199 { "FILEVERSION", tFILEVERSION, 0, 0, 0},
200 { "FIXED", tFIXED, 0, 0, 0},
201 { "FONT", tFONT, 0, 0, 0},
202 { "FONTDIR", tFONTDIR, 0, 0, 0}, /* This is a Borland BRC extension */
203 { "GRAYED", tGRAYED, 0, 0, 0},
204 { "GROUPBOX", tGROUPBOX, 0, 0, 0},
205 { "HELP", tHELP, 0, 0, 0},
206 { "HTML", tHTML, 0, 0, 0},
207 { "ICON", tICON, 0, 0, 0},
208 { "IMPURE", tIMPURE, 0, 0, 0},
209 { "INACTIVE", tINACTIVE, 0, 0, 0},
210 { "LANGUAGE", tLANGUAGE, 1, 0, 1},
211 { "LISTBOX", tLISTBOX, 0, 0, 0},
212 { "LOADONCALL", tLOADONCALL, 0, 0, 0},
213 { "LTEXT", tLTEXT, 0, 0, 0},
214 { "MENU", tMENU, 0, 0, 0},
215 { "MENUBARBREAK", tMENUBARBREAK, 0, 0, 0},
216 { "MENUBREAK", tMENUBREAK, 0, 0, 0},
217 { "MENUEX", tMENUEX, 1, 0, 0},
218 { "MENUITEM", tMENUITEM, 0, 0, 0},
219 { "MESSAGETABLE", tMESSAGETABLE, 1, 0, 0},
220 { "MOVEABLE", tMOVEABLE, 0, 0, 0},
221 { "NOINVERT", tNOINVERT, 0, 0, 0},
222 { "NOT", tNOT, 0, 0, 0},
223 { "POPUP", tPOPUP, 0, 0, 0},
224 { "PRELOAD", tPRELOAD, 0, 0, 0},
225 { "PRODUCTVERSION", tPRODUCTVERSION, 0, 0, 0},
226 { "PURE", tPURE, 0, 0, 0},
227 { "PUSHBUTTON", tPUSHBUTTON, 0, 0, 0},
228 { "RADIOBUTTON", tRADIOBUTTON, 0, 0, 0},
229 { "RCDATA", tRCDATA, 0, 0, 0},
230 { "RTEXT", tRTEXT, 0, 0, 0},
231 { "SCROLLBAR", tSCROLLBAR, 0, 0, 0},
232 { "SEPARATOR", tSEPARATOR, 0, 0, 0},
233 { "SHIFT", tSHIFT, 0, 0, 0},
234 { "STATE3", tSTATE3, 1, 0, 0},
235 { "STRING", tSTRING, 0, 0, 0},
236 { "STRINGTABLE", tSTRINGTABLE, 0, 0, 1},
237 { "STYLE", tSTYLE, 0, 0, 0},
238 { "TOOLBAR", tTOOLBAR, 1, 0, 0},
239 { "VALUE", tVALUE, 0, 0, 0},
240 { "VERSION", tVERSION, 1, 0, 0},
241 { "VERSIONINFO", tVERSIONINFO, 0, 0, 0},
242 { "VIRTKEY", tVIRTKEY, 0, 0, 0}
245 #define NKEYWORDS (sizeof(keywords)/sizeof(keywords[0]))
246 #define KWP(p) ((const struct keyword *)(p))
247 static int kw_cmp_func(const void *s1, const void *s2)
250 ret = compare_striA(KWP(s1)->keyword, KWP(s2)->keyword);
251 if(!ret && (KWP(s1)->needcase || KWP(s2)->needcase))
252 return strcmp(KWP(s1)->keyword, KWP(s2)->keyword);
259 static struct keyword *iskeyword(char *kw)
267 /* Make sure that it is sorted for bsearsh */
268 static int sorted = 0;
271 qsort(keywords, NKEYWORDS, sizeof(keywords[0]), kw_cmp_func);
277 kwp = bsearch(&key, keywords, NKEYWORDS, sizeof(keywords[0]), kw_cmp_func);
281 for(i = 0; i < NKEYWORDS; i++)
283 if(!kw_cmp_func(&key, &keywords[i]))
293 if(kwp == NULL || (kwp->isextension && !extensions))
299 /* converts an integer in string form to an unsigned long and prints an error
301 static unsigned long xstrtoul(const char *nptr, char **endptr, int base)
306 l = strtoul(nptr, endptr, base);
307 if (l == ULONG_MAX && errno == ERANGE)
308 parser_error("integer constant %s is too large", nptr);
315 **************************************************************************
316 * The flexer starts here
317 **************************************************************************
321 * Catch the GCC-style line statements here and parse them.
322 * This has the advantage that you can #include at any
323 * stage in the resource file.
324 * The preprocessor generates line directives in the format:
325 * # <linenum> "filename" <codes>
327 * Codes can be a sequence of:
328 * - 1 start of new file
329 * - 2 returning to previous
331 * - 4 interpret as C-code
333 * 4 is not used and 1 mutually excludes 2
334 * Anyhow, we are not really interested in these at all
335 * because we only want to know the linenumber and
338 <INITIAL,pp_cstrip>^{ws}*\#{ws}*pragma{ws}+ yy_push_state(pp_pragma);
339 <INITIAL,pp_cstrip>^{ws}*\#{ws}* yy_push_state(pp_line);
345 lineno = (int)strtol(yytext, &cptr, 10);
347 parser_error("Malformed '#...' line-directive; invalid linenumber");
348 fname = strchr(cptr, '"');
350 parser_error("Malformed '#...' line-directive; missing filename");
352 cptr = strchr(fname, '"');
354 parser_error("Malformed '#...' line-directive; missing terminating \"");
356 line_number = lineno - 1; /* We didn't read the newline */
357 input_name = xstrdup(fname);
358 /* ignore contents of C include files */
359 len = strlen(input_name);
360 if (len > 1 && !strcasecmp( input_name + len - 2, ".h" ))
366 <pp_pragma>code_page[^\n]* yyless(9); yy_pop_state(); yy_push_state(pp_code_page);
367 <pp_pragma>[^\n]* yy_pop_state(); if (pedantic) parser_warning("Unrecognized #pragma directive '%s'\n",yytext);
369 <pp_code_page>\({ws}*default{ws}*\)[^\n]* current_codepage = -1; yy_pop_state();
370 <pp_code_page>\({ws}*utf8{ws}*\)[^\n]* current_codepage = CP_UTF8; yy_pop_state();
371 <pp_code_page>\({ws}*[0-9]+{ws}*\)[^\n]* {
374 while (*p < '0' || *p > '9') p++;
375 current_codepage = strtol( p, NULL, 10 );
376 if (!is_valid_codepage( current_codepage ))
378 parser_error("Codepage %d not supported", current_codepage);
379 current_codepage = 0;
382 <pp_code_page>[^\n]* yy_pop_state(); parser_error("Malformed #pragma code_page directive");
385 * Strip everything until a ';' taking
386 * into account braces {} for structures,
389 <pp_cstrip>\n line_number++; char_number = 1;
390 <pp_cstrip>. ; /* ignore */
395 [0-9]+[lL]? { parser_lval.num = xstrtoul(yytext, 0, 10);
396 return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
397 0[xX][0-9A-Fa-f]+[lL]? { parser_lval.num = xstrtoul(yytext, 0, 16);
398 return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
399 0[oO][0-7]+[lL]? { parser_lval.num = xstrtoul(yytext+2, 0, 8);
400 return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
402 [A-Za-z_0-9./\\][A-Za-z_0-9./\\\-]* {
403 struct keyword *tok = iskeyword(yytext);
407 if(wanted_id && !tok->alwayskw)
409 parser_lval.str = make_string(yytext);
417 parser_lval.str = make_string(yytext);
423 * Wide string scanning
426 yy_push_state(tklstr);
429 parser_error("16bit resource contains unicode strings");
434 parser_lval.str = get_buffered_wstring();
437 <tklstr>\\[0-7]{1,6} { /* octal escape sequence */
439 result = strtoul(yytext+1, 0, 8);
440 if ( result > 0xffff )
441 parser_error("Character constant out of range");
442 addwchar((WCHAR)result);
444 <tklstr>\\x[0-9a-fA-F]{4} { /* hex escape sequence */
446 result = strtoul(yytext+2, 0, 16);
447 addwchar((WCHAR)result);
449 <tklstr>\\x[0-9a-fA-F]{1,3} { parser_error("Invalid hex escape sequence '%s'", yytext); }
451 <tklstr>\\[0-9]+ parser_error("Bad escape sequence");
452 <tklstr>\\\n{ws}* line_number++; char_number = 1; /* backslash at EOL continues string after leading whitespace on next line */
453 <tklstr>\\a addwchar('\a');
454 <tklstr>\\b addwchar('\b');
455 <tklstr>\\f addwchar('\f');
456 <tklstr>\\n addwchar('\n');
457 <tklstr>\\r addwchar('\r');
458 <tklstr>\\t addwchar('\t');
459 <tklstr>\\v addwchar('\v');
461 if (yytext[1] & 0x80)
462 parser_error("Invalid char %u in wide string", (unsigned char)yytext[1]);
465 <tklstr>\\\r\n addwchar(yytext[2]); line_number++; char_number = 1;
466 <tklstr>\"\" addwchar('\"'); /* "bla""bla" -> "bla\"bla" */
467 <tklstr>\\\"\" addwchar('\"'); /* "bla\""bla" -> "bla\"bla" */
468 <tklstr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
471 while(*yptr) /* FIXME: codepage translation */
474 parser_error("Invalid char %u in wide string", (unsigned char)*yptr);
475 addwchar(*yptr++ & 0xff);
478 <tklstr>\n parser_error("Unterminated string");
481 * Normal string scanning
483 \" yy_push_state(tkstr); cbufidx = 0;
487 parser_lval.str = get_buffered_cstring();
490 <tkstr>\\[0-7]{1,3} { /* octal escape sequence */
492 result = strtol(yytext+1, 0, 8);
494 parser_error("Character constant out of range");
495 addcchar((char)result);
497 <tkstr>\\x[0-9a-fA-F]{2} { /* hex escape sequence */
499 result = strtol(yytext+2, 0, 16);
500 addcchar((char)result);
502 <tkstr>\\x[0-9a-fA-F] { parser_error("Invalid hex escape sequence '%s'", yytext); }
504 <tkstr>\\[0-9]+ parser_error("Bad escape sequence");
505 <tkstr>\\\n{ws}* line_number++; char_number = 1; /* backslash at EOL continues string after leading whitespace on next line */
506 <tkstr>\\a addcchar('\a');
507 <tkstr>\\b addcchar('\b');
508 <tkstr>\\f addcchar('\f');
509 <tkstr>\\n addcchar('\n');
510 <tkstr>\\r addcchar('\r');
511 <tkstr>\\t addcchar('\t');
512 <tkstr>\\v addcchar('\v');
513 <tkstr>\\. addcchar(yytext[1]);
514 <tkstr>\\\r\n addcchar(yytext[2]); line_number++; char_number = 1;
520 <tkstr>\"\" addcchar('\"'); /* "bla""bla" -> "bla\"bla" */
521 <tkstr>\\\"\" addcchar('\"'); /* "bla\""bla" -> "bla\"bla" */
522 <tkstr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
523 <tkstr>\n parser_error("Unterminated string");
528 \' yy_push_state(tkrcd); cbufidx = 0;
531 parser_lval.raw = new_raw_data();
532 parser_lval.raw->size = cbufidx;
533 parser_lval.raw->data = xmalloc(parser_lval.raw->size);
534 memcpy(parser_lval.raw->data, cbuffer, parser_lval.raw->size);
537 <tkrcd>[0-9a-fA-F]{2} {
539 result = strtol(yytext, 0, 16);
540 addcchar((char)result);
542 <tkrcd>{ws}+ ; /* Ignore space */
543 <tkrcd>\n line_number++; char_number = 1;
544 <tkrcd>. parser_error("Malformed data-line");
548 * Should never occur after preprocessing
550 <INITIAL,pp_cstrip>"/*" {
551 yy_push_state(comment);
552 save_wanted_id = wanted_id;
554 parser_warning("Found comments after preprocessing, please report\n");
557 <comment>"*"+[^*/\n]* ;
558 <comment>\n line_number++; char_number = 1;
559 <comment>"*"+"/" yy_pop_state(); want_id = save_wanted_id;
561 ;[^\n]* want_id = wanted_id; /* not really comment, but left-over c-junk */
562 "//"[^\n]* want_id = wanted_id; if(!no_preprocess) parser_warning("Found comments after preprocessing, please report\n");
574 {ws}+ want_id = wanted_id; /* Eat whitespace */
576 <INITIAL>[ -~] return yytext[0];
579 /* Catch all rule to find any unmatched text */
585 parser_error("Unmatched text '%c' (0x%02x) YY_START=%d",
586 isprint((unsigned char)*yytext) ? *yytext : '.', *yytext, YY_START);
591 /* These dup functions copy the enclosed '\0' from
592 * the resource string.
594 static void addcchar(char c)
596 if(cbufidx >= cbufalloc)
599 cbuffer = xrealloc(cbuffer, cbufalloc * sizeof(cbuffer[0]));
600 if(cbufalloc > 65536)
601 parser_warning("Reallocating string buffer larger than 64kB\n");
603 cbuffer[cbufidx++] = c;
606 static void addwchar(WCHAR s)
608 if(wbufidx >= wbufalloc)
611 wbuffer = xrealloc(wbuffer, wbufalloc * sizeof(wbuffer[0]));
612 if(wbufalloc > 65536)
613 parser_warning("Reallocating wide string buffer larger than 64kB\n");
615 wbuffer[wbufidx++] = s;
618 static string_t *get_buffered_cstring(void)
620 string_t *str = new_string();
623 str->type = str_char;
624 str->str.cstr = xmalloc(cbufidx+1);
625 memcpy(str->str.cstr, cbuffer, cbufidx);
626 str->str.cstr[cbufidx] = '\0';
628 if (!current_codepage || current_codepage == -1 || !win32) /* store as ANSI string */
630 if (!current_codepage) parser_error("Codepage set to Unicode only, cannot use ASCII string here");
633 else /* convert to Unicode before storing */
635 string_t *str_w = convert_string_unicode( str, current_codepage );
636 if (check_valid_utf8( str, current_codepage ))
637 parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use, maybe use --utf8?\n",
638 str->str.cstr, current_codepage );
644 static string_t *get_buffered_wstring(void)
646 string_t *str = new_string();
648 str->type = str_unicode;
649 str->str.wstr = xmalloc((wbufidx+1)*sizeof(WCHAR));
650 memcpy(str->str.wstr, wbuffer, wbufidx*sizeof(WCHAR));
651 str->str.wstr[wbufidx] = 0;
655 static string_t *make_string(char *s)
657 string_t *ret, *str = new_string();
658 str->size = strlen(s);
659 str->type = str_char;
660 str->str.cstr = xmalloc(str->size+1);
661 memcpy(str->str.cstr, s, str->size+1);
662 if (current_codepage <= 0 || !win32) return str;
663 ret = convert_string_unicode( str, current_codepage );