2008-02-04 H.J. Lu <hongjiu.lu@intel.com>
[binutils.git] / binutils / rclex.c
blob4fae3da51275cbc36ace1320ca537322278513d7
1 /* rclex.c -- lexer for Windows rc files parser */
3 /* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
4 Free Software Foundation, Inc.
6 Written by Kai Tietz, Onevision.
8 This file is part of GNU Binutils.
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
23 02110-1301, USA. */
26 /* This is a lexer used by the Windows rc file parser. It basically
27 just recognized a bunch of keywords. */
29 #include "sysdep.h"
30 #include "bfd.h"
31 #include "bucomm.h"
32 #include "libiberty.h"
33 #include "safe-ctype.h"
34 #include "windres.h"
35 #include "rcparse.h"
37 #include <assert.h>
39 /* Whether we are in rcdata mode, in which we returns the lengths of
40 strings. */
42 static int rcdata_mode;
44 /* Whether we are supressing lines from cpp (including windows.h or
45 headers from your C sources may bring in externs and typedefs).
46 When active, we return IGNORED_TOKEN, which lets us ignore these
47 outside of resource constructs. Thus, it isn't required to protect
48 all the non-preprocessor lines in your header files with #ifdef
49 RC_INVOKED. It also means your RC file can't include other RC
50 files if they're named "*.h". Sorry. Name them *.rch or whatever. */
52 static int suppress_cpp_data;
54 #define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
56 /* The first filename we detect in the cpp output. We use this to
57 tell included files from the original file. */
59 static char *initial_fn;
61 /* List of allocated strings. */
63 struct alloc_string
65 struct alloc_string *next;
66 char *s;
69 static struct alloc_string *strings;
71 struct rclex_keywords
73 const char *name;
74 int tok;
77 #define K(KEY) { #KEY, KEY }
78 #define KRT(KEY) { #KEY, RT_##KEY }
80 static const struct rclex_keywords keywds[] =
82 K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
83 K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
84 K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
85 K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
86 K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
87 K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
88 K(DLGINCLUDE), K(DLGINIT),
89 K(EDITTEXT), K(END), K(EXSTYLE),
90 K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
91 K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
92 K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
93 K(HEDIT), K(HELP), K(HTML),
94 K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
95 K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
96 K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
97 K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
98 K(NOINVERT), K(NOT),
99 K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
100 K(PURE), K(PUSHBOX), K(PUSHBUTTON),
101 K(RADIOBUTTON), K(RCDATA), K(RTEXT),
102 K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
103 K(STRINGTABLE), K(STYLE),
104 K(TOOLBAR),
105 K(USERBUTTON),
106 K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
107 K(VIRTKEY), K(VXD),
108 { NULL, 0 },
111 /* External input stream from resrc */
112 extern FILE *cpp_pipe;
114 /* Lexical scanner helpers. */
115 static int rclex_lastch = -1;
116 static size_t rclex_tok_max = 0;
117 static size_t rclex_tok_pos = 0;
118 static char *rclex_tok = NULL;
120 static int
121 rclex_translatekeyword (const char *key)
123 if (key && ISUPPER (key[0]))
125 const struct rclex_keywords *kw = &keywds[0];
129 if (! strcmp (kw->name, key))
130 return kw->tok;
131 ++kw;
133 while (kw->name != NULL);
135 return STRING;
138 /* Handle a C preprocessor line. */
140 static void
141 cpp_line (void)
143 const char *s = rclex_tok;
144 int line;
145 char *send, *fn;
146 size_t len, mlen;
148 ++s;
149 while (ISSPACE (*s))
150 ++s;
152 /* Check for #pragma code_page ( DEFAULT | <nr>). */
153 len = strlen (s);
154 mlen = strlen ("pragma");
155 if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
157 const char *end;
159 s += mlen + 1;
160 while (ISSPACE (*s))
161 ++s;
162 len = strlen (s);
163 mlen = strlen ("code_page");
164 if (len <= mlen || memcmp (s, "code_page", mlen) != 0)
165 /* FIXME: We ought to issue a warning message about an unrecognised pragma. */
166 return;
167 s += mlen;
168 while (ISSPACE (*s))
169 ++s;
170 if (*s != '(')
171 /* FIXME: We ought to issue an error message about a malformed pragma. */
172 return;
173 ++s;
174 while (ISSPACE (*s))
175 ++s;
176 if (*s == 0 || (end = strchr (s, ')')) == NULL)
177 /* FIXME: We ought to issue an error message about a malformed pragma. */
178 return;
179 len = (size_t) (end - s);
180 fn = xmalloc (len + 1);
181 if (len)
182 memcpy (fn, s, len);
183 fn[len] = 0;
184 while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
185 fn[--len] = 0;
186 if (! len || (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
187 wind_current_codepage = wind_default_codepage;
188 else if (len > 0)
190 rc_uint_type ncp;
192 if (fn[0] == '0' && (fn[1] == 'x' || fn[1] == 'X'))
193 ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
194 else
195 ncp = (rc_uint_type) strtol (fn, NULL, 10);
196 if (ncp == CP_UTF16 || ! unicode_is_valid_codepage (ncp))
197 fatal (_("invalid value specified for pragma code_page.\n"));
198 wind_current_codepage = ncp;
200 free (fn);
201 return;
204 line = strtol (s, &send, 0);
205 if (*send != '\0' && ! ISSPACE (*send))
206 return;
208 /* Subtract 1 because we are about to count the newline. */
209 rc_lineno = line - 1;
211 s = send;
212 while (ISSPACE (*s))
213 ++s;
215 if (*s != '"')
216 return;
218 ++s;
219 send = strchr (s, '"');
220 if (send == NULL)
221 return;
223 fn = xmalloc (send - s + 1);
224 strncpy (fn, s, send - s);
225 fn[send - s] = '\0';
227 free (rc_filename);
228 rc_filename = fn;
230 if (! initial_fn)
232 initial_fn = xmalloc (strlen (fn) + 1);
233 strcpy (initial_fn, fn);
236 /* Allow the initial file, regardless of name. Suppress all other
237 files if they end in ".h" (this allows included "*.rc"). */
238 if (strcmp (initial_fn, fn) == 0
239 || strcmp (fn + strlen (fn) - 2, ".h") != 0)
240 suppress_cpp_data = 0;
241 else
242 suppress_cpp_data = 1;
245 /* Allocate a string of a given length. */
247 static char *
248 get_string (int len)
250 struct alloc_string *as;
252 as = xmalloc (sizeof *as);
253 as->s = xmalloc (len);
255 as->next = strings;
256 strings = as;
258 return as->s;
261 /* Handle a quoted string. The quotes are stripped. A pair of quotes
262 in a string are turned into a single quote. Adjacent strings are
263 merged separated by whitespace are merged, as in C. */
265 static char *
266 handle_quotes (rc_uint_type *len)
268 const char *input = rclex_tok;
269 char *ret, *s;
270 const char *t;
271 int ch;
272 int num_xdigits;
274 ret = get_string (strlen (input) + 1);
276 s = ret;
277 t = input;
278 if (*t == '"')
279 ++t;
280 while (*t != '\0')
282 if (*t == '\\')
284 ++t;
285 switch (*t)
287 case '\0':
288 rcparse_warning ("backslash at end of string");
289 break;
291 case '\"':
292 rcparse_warning ("use \"\" to put \" in a string");
293 *s++ = '"';
294 ++t;
295 break;
297 case 'a':
298 *s++ = ESCAPE_B; /* Strange, but true... */
299 ++t;
300 break;
302 case 'b':
303 *s++ = ESCAPE_B;
304 ++t;
305 break;
307 case 'f':
308 *s++ = ESCAPE_F;
309 ++t;
310 break;
312 case 'n':
313 *s++ = ESCAPE_N;
314 ++t;
315 break;
317 case 'r':
318 *s++ = ESCAPE_R;
319 ++t;
320 break;
322 case 't':
323 *s++ = ESCAPE_T;
324 ++t;
325 break;
327 case 'v':
328 *s++ = ESCAPE_V;
329 ++t;
330 break;
332 case '\\':
333 *s++ = *t++;
334 break;
336 case '0': case '1': case '2': case '3':
337 case '4': case '5': case '6': case '7':
338 ch = *t - '0';
339 ++t;
340 if (*t >= '0' && *t <= '7')
342 ch = (ch << 3) | (*t - '0');
343 ++t;
344 if (*t >= '0' && *t <= '7')
346 ch = (ch << 3) | (*t - '0');
347 ++t;
350 *s++ = ch;
351 break;
353 case 'x': case 'X':
354 ++t;
355 ch = 0;
356 /* We only handle single byte chars here. Make sure
357 we finish an escape sequence like "/xB0ABC" after
358 the first two digits. */
359 num_xdigits = 2;
360 while (num_xdigits--)
362 if (*t >= '0' && *t <= '9')
363 ch = (ch << 4) | (*t - '0');
364 else if (*t >= 'a' && *t <= 'f')
365 ch = (ch << 4) | (*t - 'a' + 10);
366 else if (*t >= 'A' && *t <= 'F')
367 ch = (ch << 4) | (*t - 'A' + 10);
368 else
369 break;
370 ++t;
372 *s++ = ch;
373 break;
375 default:
376 rcparse_warning ("unrecognized escape sequence");
377 *s++ = '\\';
378 *s++ = *t++;
379 break;
382 else if (*t != '"')
383 *s++ = *t++;
384 else if (t[1] == '\0')
385 break;
386 else if (t[1] == '"')
388 *s++ = '"';
389 t += 2;
391 else
393 rcparse_warning ("unexpected character after '\"'");
394 ++t;
395 assert (ISSPACE (*t));
396 while (ISSPACE (*t))
398 if ((*t) == '\n')
399 ++rc_lineno;
400 ++t;
402 if (*t == '\0')
403 break;
404 assert (*t == '"');
405 ++t;
409 *s = '\0';
411 *len = s - ret;
413 return ret;
416 /* Allocate a unicode string of a given length. */
418 static unichar *
419 get_unistring (int len)
421 return (unichar *) get_string (len * sizeof (unichar));
424 /* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
425 in a string are turned into a single quote. Adjacent strings are
426 merged separated by whitespace are merged, as in C. */
428 static unichar *
429 handle_uniquotes (rc_uint_type *len)
431 const char *input = rclex_tok;
432 unichar *ret, *s;
433 const char *t;
434 int ch;
435 int num_xdigits;
437 ret = get_unistring (strlen (input) + 1);
439 s = ret;
440 t = input;
441 if ((*t == 'L' || *t == 'l') && t[1] == '"')
442 t += 2;
443 else if (*t == '"')
444 ++t;
445 while (*t != '\0')
447 if (*t == '\\')
449 ++t;
450 switch (*t)
452 case '\0':
453 rcparse_warning ("backslash at end of string");
454 break;
456 case '\"':
457 rcparse_warning ("use \"\" to put \" in a string");
458 break;
460 case 'a':
461 *s++ = ESCAPE_B; /* Strange, but true... */
462 ++t;
463 break;
465 case 'b':
466 *s++ = ESCAPE_B;
467 ++t;
468 break;
470 case 'f':
471 *s++ = ESCAPE_F;
472 ++t;
473 break;
475 case 'n':
476 *s++ = ESCAPE_N;
477 ++t;
478 break;
480 case 'r':
481 *s++ = ESCAPE_R;
482 ++t;
483 break;
485 case 't':
486 *s++ = ESCAPE_T;
487 ++t;
488 break;
490 case 'v':
491 *s++ = ESCAPE_V;
492 ++t;
493 break;
495 case '\\':
496 *s++ = (unichar) *t++;
497 break;
499 case '0': case '1': case '2': case '3':
500 case '4': case '5': case '6': case '7':
501 ch = *t - '0';
502 ++t;
503 if (*t >= '0' && *t <= '7')
505 ch = (ch << 3) | (*t - '0');
506 ++t;
507 if (*t >= '0' && *t <= '7')
509 ch = (ch << 3) | (*t - '0');
510 ++t;
513 *s++ = (unichar) ch;
514 break;
516 case 'x': case 'X':
517 ++t;
518 ch = 0;
519 /* We only handle two byte chars here. Make sure
520 we finish an escape sequence like "/xB0ABC" after
521 the first two digits. */
522 num_xdigits = 4;
523 while (num_xdigits--)
525 if (*t >= '0' && *t <= '9')
526 ch = (ch << 4) | (*t - '0');
527 else if (*t >= 'a' && *t <= 'f')
528 ch = (ch << 4) | (*t - 'a' + 10);
529 else if (*t >= 'A' && *t <= 'F')
530 ch = (ch << 4) | (*t - 'A' + 10);
531 else
532 break;
533 ++t;
535 *s++ = (unichar) ch;
536 break;
538 default:
539 rcparse_warning ("unrecognized escape sequence");
540 *s++ = '\\';
541 *s++ = (unichar) *t++;
542 break;
545 else if (*t != '"')
546 *s++ = (unichar) *t++;
547 else if (t[1] == '\0')
548 break;
549 else if (t[1] == '"')
551 *s++ = '"';
552 t += 2;
554 else
556 ++t;
557 assert (ISSPACE (*t));
558 while (ISSPACE (*t))
560 if ((*t) == '\n')
561 ++rc_lineno;
562 ++t;
564 if (*t == '\0')
565 break;
566 assert (*t == '"');
567 ++t;
571 *s = '\0';
573 *len = s - ret;
575 return ret;
578 /* Discard all the strings we have allocated. The parser calls this
579 when it no longer needs them. */
581 void
582 rcparse_discard_strings (void)
584 struct alloc_string *as;
586 as = strings;
587 while (as != NULL)
589 struct alloc_string *n;
591 free (as->s);
592 n = as->next;
593 free (as);
594 as = n;
597 strings = NULL;
600 /* Enter rcdata mode. */
601 void
602 rcparse_rcdata (void)
604 rcdata_mode = 1;
607 /* Go back to normal mode from rcdata mode. */
608 void
609 rcparse_normal (void)
611 rcdata_mode = 0;
614 static void
615 rclex_tok_add_char (int ch)
617 if (! rclex_tok || rclex_tok_max <= rclex_tok_pos)
619 char *h = xmalloc (rclex_tok_max + 9);
621 if (! h)
622 abort ();
623 if (rclex_tok)
625 memcpy (h, rclex_tok, rclex_tok_pos + 1);
626 free (rclex_tok);
628 else
629 rclex_tok_pos = 0;
630 rclex_tok_max += 8;
631 rclex_tok = h;
633 if (ch != -1)
634 rclex_tok[rclex_tok_pos++] = (char) ch;
635 rclex_tok[rclex_tok_pos] = 0;
638 static int
639 rclex_readch (void)
641 int r = -1;
643 if ((r = rclex_lastch) != -1)
644 rclex_lastch = -1;
645 else
647 char ch;
650 if (! cpp_pipe || feof (cpp_pipe)
651 || fread (&ch, 1, 1,cpp_pipe) != 1)
652 break;
653 r = ((int) ch) & 0xff;
655 while (r == 0 || r == '\r');
657 rclex_tok_add_char (r);
658 return r;
661 static int
662 rclex_peekch (void)
664 int r;
666 if ((r = rclex_lastch) == -1)
668 if ((r = rclex_readch ()) != -1)
670 rclex_lastch = r;
671 if (rclex_tok_pos > 0)
672 rclex_tok[--rclex_tok_pos] = 0;
675 return r;
678 static void
679 rclex_string (void)
681 int c;
683 while ((c = rclex_peekch ()) != -1)
685 if (c == '\n')
686 break;
687 if (c == '\\')
689 rclex_readch ();
690 if ((c = rclex_peekch ()) == -1 || c == '\n')
691 break;
692 rclex_readch ();
694 else if (rclex_readch () == '"')
696 if (rclex_peekch () == '"')
697 rclex_readch ();
698 else
699 break;
704 static rc_uint_type
705 read_digit (int ch)
707 rc_uint_type base = 10;
708 rc_uint_type ret, val;
709 int warned = 0;
711 ret = 0;
712 if (ch == '0')
714 base = 8;
715 switch (rclex_peekch ())
717 case 'o': case 'O':
718 rclex_readch ();
719 base = 8;
720 break;
722 case 'x': case 'X':
723 rclex_readch ();
724 base = 16;
725 break;
728 else
729 ret = (rc_uint_type) (ch - '0');
730 while ((ch = rclex_peekch ()) != -1)
732 if (ISDIGIT (ch))
733 val = (rc_uint_type) (ch - '0');
734 else if (ch >= 'a' && ch <= 'f')
735 val = (rc_uint_type) ((ch - 'a') + 10);
736 else if (ch >= 'A' && ch <= 'F')
737 val = (rc_uint_type) ((ch - 'A') + 10);
738 else
739 break;
740 rclex_readch ();
741 if (! warned && val >= base)
743 warned = 1;
744 rcparse_warning ("digit exceeds base");
746 ret *= base;
747 ret += val;
749 return ret;
752 /* yyparser entry method. */
755 yylex (void)
757 char *s;
758 unichar *us;
759 rc_uint_type length;
760 int ch;
762 /* Make sure that rclex_tok is initialized. */
763 if (! rclex_tok)
764 rclex_tok_add_char (-1);
770 /* Clear token. */
771 rclex_tok_pos = 0;
772 rclex_tok[0] = 0;
774 if ((ch = rclex_readch ()) == -1)
775 return -1;
776 if (ch == '\n')
777 ++rc_lineno;
779 while (ch <= 0x20);
781 switch (ch)
783 case '#':
784 while ((ch = rclex_peekch ()) != -1 && ch != '\n')
785 rclex_readch ();
786 cpp_line ();
787 ch = IGNORED_TOKEN;
788 break;
790 case '{':
791 ch = IGNORE_CPP (BEG);
792 break;
794 case '}':
795 ch = IGNORE_CPP (END);
796 break;
798 case '0': case '1': case '2': case '3': case '4':
799 case '5': case '6': case '7': case '8': case '9':
800 yylval.i.val = read_digit (ch);
801 yylval.i.dword = 0;
802 switch (rclex_peekch ())
804 case 'l': case 'L':
805 rclex_readch ();
806 yylval.i.dword = 1;
807 break;
809 ch = IGNORE_CPP (NUMBER);
810 break;
811 case '"':
812 rclex_string ();
813 ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
814 if (ch == IGNORED_TOKEN)
815 break;
816 s = handle_quotes (&length);
817 if (! rcdata_mode)
818 yylval.s = s;
819 else
821 yylval.ss.length = length;
822 yylval.ss.s = s;
824 break;
825 case 'L': case 'l':
826 if (rclex_peekch () == '"')
828 rclex_readch ();
829 rclex_string ();
830 ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
831 if (ch == IGNORED_TOKEN)
832 break;
833 us = handle_uniquotes (&length);
834 if (! rcdata_mode)
835 yylval.uni = us;
836 else
838 yylval.suni.length = length;
839 yylval.suni.s = us;
841 break;
843 /* Fall through. */
844 default:
845 if (ISIDST (ch) || ch=='$')
847 while ((ch = rclex_peekch ()) != -1
848 && (ISIDNUM (ch) || ch == '$' || ch == '.'
849 || ch == ':' || ch == '\\' || ch == '/'
850 || ch == '_' || ch == '-')
852 rclex_readch ();
853 ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
854 if (ch == STRING)
856 s = get_string (strlen (rclex_tok) + 1);
857 strcpy (s, rclex_tok);
858 yylval.s = s;
860 else if (ch == BLOCK)
862 const char *hs = NULL;
864 switch (yylex ())
866 case STRING:
867 case QUOTEDSTRING:
868 hs = yylval.s;
869 break;
870 case SIZEDSTRING:
871 hs = yylval.s = yylval.ss.s;
872 break;
874 if (! hs)
876 rcparse_warning ("BLOCK expects a string as argument.");
877 ch = IGNORED_TOKEN;
879 else if (! strcmp (hs, "StringFileInfo"))
880 ch = BLOCKSTRINGFILEINFO;
881 else if (! strcmp (hs, "VarFileInfo"))
882 ch = BLOCKVARFILEINFO;
884 break;
886 ch = IGNORE_CPP (ch);
887 break;
890 while (ch == IGNORED_TOKEN);
892 return ch;