1 /* rclex.c -- lexer for Windows rc files parser */
3 /* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
4 Free Software Foundation, Inc.
6 Written by Kai Tietz, Onevision.
8 This file is part of GNU Binutils.
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
26 /* This is a lexer used by the Windows rc file parser. It basically
27 just recognized a bunch of keywords. */
32 #include "libiberty.h"
33 #include "safe-ctype.h"
39 /* Whether we are in rcdata mode, in which we returns the lengths of
42 static int rcdata_mode
;
44 /* Whether we are supressing lines from cpp (including windows.h or
45 headers from your C sources may bring in externs and typedefs).
46 When active, we return IGNORED_TOKEN, which lets us ignore these
47 outside of resource constructs. Thus, it isn't required to protect
48 all the non-preprocessor lines in your header files with #ifdef
49 RC_INVOKED. It also means your RC file can't include other RC
50 files if they're named "*.h". Sorry. Name them *.rch or whatever. */
52 static int suppress_cpp_data
;
54 #define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
56 /* The first filename we detect in the cpp output. We use this to
57 tell included files from the original file. */
59 static char *initial_fn
;
61 /* List of allocated strings. */
65 struct alloc_string
*next
;
69 static struct alloc_string
*strings
;
77 #define K(KEY) { #KEY, KEY }
78 #define KRT(KEY) { #KEY, RT_##KEY }
80 static const struct rclex_keywords keywds
[] =
82 K(ACCELERATORS
), K(ALT
), K(ANICURSOR
), K(ANIICON
), K(ASCII
),
83 K(AUTO3STATE
), K(AUTOCHECKBOX
), K(AUTORADIOBUTTON
),
84 K(BEDIT
), { "BEGIN", BEG
}, K(BITMAP
), K(BLOCK
), K(BUTTON
),
85 K(CAPTION
), K(CHARACTERISTICS
), K(CHECKBOX
), K(CHECKED
),
86 K(CLASS
), K(COMBOBOX
), K(CONTROL
), K(CTEXT
), K(CURSOR
),
87 K(DEFPUSHBUTTON
), K(DIALOG
), K(DIALOGEX
), K(DISCARDABLE
),
88 K(DLGINCLUDE
), K(DLGINIT
),
89 K(EDITTEXT
), K(END
), K(EXSTYLE
),
90 K(FILEFLAGS
), K(FILEFLAGSMASK
), K(FILEOS
), K(FILESUBTYPE
),
91 K(FILETYPE
), K(FILEVERSION
), K(FIXED
), K(FONT
), K(FONTDIR
),
92 K(GRAYED
), KRT(GROUP_CURSOR
), KRT(GROUP_ICON
), K(GROUPBOX
),
93 K(HEDIT
), K(HELP
), K(HTML
),
94 K(ICON
), K(IEDIT
), K(IMPURE
), K(INACTIVE
),
95 K(LANGUAGE
), K(LISTBOX
), K(LOADONCALL
), K(LTEXT
),
96 K(MANIFEST
), K(MENU
), K(MENUBARBREAK
), K(MENUBREAK
),
97 K(MENUEX
), K(MENUITEM
), K(MESSAGETABLE
), K(MOVEABLE
),
99 K(PLUGPLAY
), K(POPUP
), K(PRELOAD
), K(PRODUCTVERSION
),
100 K(PURE
), K(PUSHBOX
), K(PUSHBUTTON
),
101 K(RADIOBUTTON
), K(RCDATA
), K(RTEXT
),
102 K(SCROLLBAR
), K(SEPARATOR
), K(SHIFT
), K(STATE3
),
103 K(STRINGTABLE
), K(STYLE
),
106 K(VALUE
), { "VERSION", VERSIONK
}, K(VERSIONINFO
),
111 /* External input stream from resrc */
112 extern FILE *cpp_pipe
;
114 /* Lexical scanner helpers. */
115 static int rclex_lastch
= -1;
116 static size_t rclex_tok_max
= 0;
117 static size_t rclex_tok_pos
= 0;
118 static char *rclex_tok
= NULL
;
121 rclex_translatekeyword (const char *key
)
123 if (key
&& ISUPPER (key
[0]))
125 const struct rclex_keywords
*kw
= &keywds
[0];
129 if (! strcmp (kw
->name
, key
))
133 while (kw
->name
!= NULL
);
138 /* Handle a C preprocessor line. */
143 const char *s
= rclex_tok
;
152 /* Check for #pragma code_page ( DEFAULT | <nr>). */
154 mlen
= strlen ("pragma");
155 if (len
> mlen
&& memcmp (s
, "pragma", mlen
) == 0 && ISSPACE (s
[mlen
]))
163 mlen
= strlen ("code_page");
164 if (len
<= mlen
|| memcmp (s
, "code_page", mlen
) != 0)
165 /* FIXME: We ought to issue a warning message about an unrecognised pragma. */
171 /* FIXME: We ought to issue an error message about a malformed pragma. */
176 if (*s
== 0 || (end
= strchr (s
, ')')) == NULL
)
177 /* FIXME: We ought to issue an error message about a malformed pragma. */
179 len
= (size_t) (end
- s
);
180 fn
= xmalloc (len
+ 1);
184 while (len
> 0 && (fn
[len
- 1] > 0 && fn
[len
- 1] <= 0x20))
186 if (! len
|| (len
== strlen ("DEFAULT") && strcasecmp (fn
, "DEFAULT") == 0))
187 wind_current_codepage
= wind_default_codepage
;
192 if (fn
[0] == '0' && (fn
[1] == 'x' || fn
[1] == 'X'))
193 ncp
= (rc_uint_type
) strtol (fn
+ 2, NULL
, 16);
195 ncp
= (rc_uint_type
) strtol (fn
, NULL
, 10);
196 if (ncp
== CP_UTF16
|| ! unicode_is_valid_codepage (ncp
))
197 fatal (_("invalid value specified for pragma code_page.\n"));
198 wind_current_codepage
= ncp
;
204 line
= strtol (s
, &send
, 0);
205 if (*send
!= '\0' && ! ISSPACE (*send
))
208 /* Subtract 1 because we are about to count the newline. */
209 rc_lineno
= line
- 1;
219 send
= strchr (s
, '"');
223 fn
= xmalloc (send
- s
+ 1);
224 strncpy (fn
, s
, send
- s
);
232 initial_fn
= xmalloc (strlen (fn
) + 1);
233 strcpy (initial_fn
, fn
);
236 /* Allow the initial file, regardless of name. Suppress all other
237 files if they end in ".h" (this allows included "*.rc"). */
238 if (strcmp (initial_fn
, fn
) == 0
239 || strcmp (fn
+ strlen (fn
) - 2, ".h") != 0)
240 suppress_cpp_data
= 0;
242 suppress_cpp_data
= 1;
245 /* Allocate a string of a given length. */
250 struct alloc_string
*as
;
252 as
= xmalloc (sizeof *as
);
253 as
->s
= xmalloc (len
);
261 /* Handle a quoted string. The quotes are stripped. A pair of quotes
262 in a string are turned into a single quote. Adjacent strings are
263 merged separated by whitespace are merged, as in C. */
266 handle_quotes (rc_uint_type
*len
)
268 const char *input
= rclex_tok
;
274 ret
= get_string (strlen (input
) + 1);
288 rcparse_warning ("backslash at end of string");
292 rcparse_warning ("use \"\" to put \" in a string");
298 *s
++ = ESCAPE_B
; /* Strange, but true... */
336 case '0': case '1': case '2': case '3':
337 case '4': case '5': case '6': case '7':
340 if (*t
>= '0' && *t
<= '7')
342 ch
= (ch
<< 3) | (*t
- '0');
344 if (*t
>= '0' && *t
<= '7')
346 ch
= (ch
<< 3) | (*t
- '0');
356 /* We only handle single byte chars here. Make sure
357 we finish an escape sequence like "/xB0ABC" after
358 the first two digits. */
360 while (num_xdigits
--)
362 if (*t
>= '0' && *t
<= '9')
363 ch
= (ch
<< 4) | (*t
- '0');
364 else if (*t
>= 'a' && *t
<= 'f')
365 ch
= (ch
<< 4) | (*t
- 'a' + 10);
366 else if (*t
>= 'A' && *t
<= 'F')
367 ch
= (ch
<< 4) | (*t
- 'A' + 10);
376 rcparse_warning ("unrecognized escape sequence");
384 else if (t
[1] == '\0')
386 else if (t
[1] == '"')
393 rcparse_warning ("unexpected character after '\"'");
395 assert (ISSPACE (*t
));
416 /* Allocate a unicode string of a given length. */
419 get_unistring (int len
)
421 return (unichar
*) get_string (len
* sizeof (unichar
));
424 /* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
425 in a string are turned into a single quote. Adjacent strings are
426 merged separated by whitespace are merged, as in C. */
429 handle_uniquotes (rc_uint_type
*len
)
431 const char *input
= rclex_tok
;
437 ret
= get_unistring (strlen (input
) + 1);
441 if ((*t
== 'L' || *t
== 'l') && t
[1] == '"')
453 rcparse_warning ("backslash at end of string");
457 rcparse_warning ("use \"\" to put \" in a string");
461 *s
++ = ESCAPE_B
; /* Strange, but true... */
496 *s
++ = (unichar
) *t
++;
499 case '0': case '1': case '2': case '3':
500 case '4': case '5': case '6': case '7':
503 if (*t
>= '0' && *t
<= '7')
505 ch
= (ch
<< 3) | (*t
- '0');
507 if (*t
>= '0' && *t
<= '7')
509 ch
= (ch
<< 3) | (*t
- '0');
519 /* We only handle two byte chars here. Make sure
520 we finish an escape sequence like "/xB0ABC" after
521 the first two digits. */
523 while (num_xdigits
--)
525 if (*t
>= '0' && *t
<= '9')
526 ch
= (ch
<< 4) | (*t
- '0');
527 else if (*t
>= 'a' && *t
<= 'f')
528 ch
= (ch
<< 4) | (*t
- 'a' + 10);
529 else if (*t
>= 'A' && *t
<= 'F')
530 ch
= (ch
<< 4) | (*t
- 'A' + 10);
539 rcparse_warning ("unrecognized escape sequence");
541 *s
++ = (unichar
) *t
++;
546 *s
++ = (unichar
) *t
++;
547 else if (t
[1] == '\0')
549 else if (t
[1] == '"')
557 assert (ISSPACE (*t
));
578 /* Discard all the strings we have allocated. The parser calls this
579 when it no longer needs them. */
582 rcparse_discard_strings (void)
584 struct alloc_string
*as
;
589 struct alloc_string
*n
;
600 /* Enter rcdata mode. */
602 rcparse_rcdata (void)
607 /* Go back to normal mode from rcdata mode. */
609 rcparse_normal (void)
615 rclex_tok_add_char (int ch
)
617 if (! rclex_tok
|| rclex_tok_max
<= rclex_tok_pos
)
619 char *h
= xmalloc (rclex_tok_max
+ 9);
625 memcpy (h
, rclex_tok
, rclex_tok_pos
+ 1);
634 rclex_tok
[rclex_tok_pos
++] = (char) ch
;
635 rclex_tok
[rclex_tok_pos
] = 0;
643 if ((r
= rclex_lastch
) != -1)
650 if (! cpp_pipe
|| feof (cpp_pipe
)
651 || fread (&ch
, 1, 1,cpp_pipe
) != 1)
653 r
= ((int) ch
) & 0xff;
655 while (r
== 0 || r
== '\r');
657 rclex_tok_add_char (r
);
666 if ((r
= rclex_lastch
) == -1)
668 if ((r
= rclex_readch ()) != -1)
671 if (rclex_tok_pos
> 0)
672 rclex_tok
[--rclex_tok_pos
] = 0;
683 while ((c
= rclex_peekch ()) != -1)
690 if ((c
= rclex_peekch ()) == -1 || c
== '\n')
694 else if (rclex_readch () == '"')
696 if (rclex_peekch () == '"')
707 rc_uint_type base
= 10;
708 rc_uint_type ret
, val
;
715 switch (rclex_peekch ())
729 ret
= (rc_uint_type
) (ch
- '0');
730 while ((ch
= rclex_peekch ()) != -1)
733 val
= (rc_uint_type
) (ch
- '0');
734 else if (ch
>= 'a' && ch
<= 'f')
735 val
= (rc_uint_type
) ((ch
- 'a') + 10);
736 else if (ch
>= 'A' && ch
<= 'F')
737 val
= (rc_uint_type
) ((ch
- 'A') + 10);
741 if (! warned
&& val
>= base
)
744 rcparse_warning ("digit exceeds base");
752 /* yyparser entry method. */
762 /* Make sure that rclex_tok is initialized. */
764 rclex_tok_add_char (-1);
774 if ((ch
= rclex_readch ()) == -1)
784 while ((ch
= rclex_peekch ()) != -1 && ch
!= '\n')
791 ch
= IGNORE_CPP (BEG
);
795 ch
= IGNORE_CPP (END
);
798 case '0': case '1': case '2': case '3': case '4':
799 case '5': case '6': case '7': case '8': case '9':
800 yylval
.i
.val
= read_digit (ch
);
802 switch (rclex_peekch ())
809 ch
= IGNORE_CPP (NUMBER
);
813 ch
= IGNORE_CPP ((! rcdata_mode
? QUOTEDSTRING
: SIZEDSTRING
));
814 if (ch
== IGNORED_TOKEN
)
816 s
= handle_quotes (&length
);
821 yylval
.ss
.length
= length
;
826 if (rclex_peekch () == '"')
830 ch
= IGNORE_CPP ((! rcdata_mode
? QUOTEDUNISTRING
: SIZEDUNISTRING
));
831 if (ch
== IGNORED_TOKEN
)
833 us
= handle_uniquotes (&length
);
838 yylval
.suni
.length
= length
;
845 if (ISIDST (ch
) || ch
=='$')
847 while ((ch
= rclex_peekch ()) != -1
848 && (ISIDNUM (ch
) || ch
== '$' || ch
== '.'
849 || ch
== ':' || ch
== '\\' || ch
== '/'
850 || ch
== '_' || ch
== '-')
853 ch
= IGNORE_CPP (rclex_translatekeyword (rclex_tok
));
856 s
= get_string (strlen (rclex_tok
) + 1);
857 strcpy (s
, rclex_tok
);
860 else if (ch
== BLOCK
)
862 const char *hs
= NULL
;
871 hs
= yylval
.s
= yylval
.ss
.s
;
876 rcparse_warning ("BLOCK expects a string as argument.");
879 else if (! strcmp (hs
, "StringFileInfo"))
880 ch
= BLOCKSTRINGFILEINFO
;
881 else if (! strcmp (hs
, "VarFileInfo"))
882 ch
= BLOCKVARFILEINFO
;
886 ch
= IGNORE_CPP (ch
);
890 while (ch
== IGNORED_TOKEN
);