Update my e-mail address for new employer.
[official-gcc.git] / gcc / gengtype-parse.c
blob37366ad987d75042512a1474148fc796e8d9a5bb
1 /* Process source files and output type information.
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 #include "bconfig.h"
22 #include "system.h"
23 #include "gengtype.h"
25 /* This is a simple recursive-descent parser which understands a subset of
26 the C type grammar.
28 Rule functions are suffixed _seq if they scan a sequence of items;
29 _opt if they may consume zero tokens; _seqopt if both are true. The
30 "consume_" prefix indicates that a sequence of tokens is parsed for
31 syntactic correctness and then thrown away. */
33 /* Simple one-token lookahead mechanism. */
35 struct token
37 const char *value;
38 int code;
39 bool valid;
41 static struct token T;
43 /* Retrieve the code of the current token; if there is no current token,
44 get the next one from the lexer. */
45 static inline int
46 token (void)
48 if (!T.valid)
50 T.code = yylex (&T.value);
51 T.valid = true;
53 return T.code;
56 /* Retrieve the value of the current token (if any) and mark it consumed.
57 The next call to token() will get another token from the lexer. */
58 static inline const char *
59 advance (void)
61 T.valid = false;
62 return T.value;
65 /* Diagnostics. */
67 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
68 static const char *const token_names[] = {
69 "GTY",
70 "typedef",
71 "extern",
72 "static",
73 "union",
74 "struct",
75 "enum",
76 "VEC",
77 "DEF_VEC_[OP]",
78 "DEF_VEC_I",
79 "DEF_VEC_ALLOC_[IOP]",
80 "...",
81 "ptr_alias",
82 "nested_ptr",
83 "a param<N>_is option",
84 "a number",
85 "a scalar type",
86 "an identifier",
87 "a string constant",
88 "a character constant",
89 "an array declarator",
92 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
93 static const char *const token_value_format[] = {
94 "%s",
95 "'%s'",
96 "'%s'",
97 "'%s'",
98 "'\"%s\"'",
99 "\"'%s'\"",
100 "'[%s]'",
103 /* Produce a printable representation for a token defined by CODE and
104 VALUE. This sometimes returns pointers into malloc memory and
105 sometimes not, therefore it is unsafe to free the pointer it
106 returns, so that memory is leaked. This does not matter, as this
107 function is only used for diagnostics, and in a successful run of
108 the program there will be none. */
109 static const char *
110 print_token (int code, const char *value)
112 if (code < CHAR_TOKEN_OFFSET)
113 return xasprintf ("'%c'", code);
114 else if (code < FIRST_TOKEN_WITH_VALUE)
115 return xasprintf ("'%s'", token_names[code - CHAR_TOKEN_OFFSET]);
116 else if (!value)
117 return token_names[code - CHAR_TOKEN_OFFSET]; /* don't quote these */
118 else
119 return xasprintf (token_value_format[code - FIRST_TOKEN_WITH_VALUE],
120 value);
123 /* Convenience wrapper around print_token which produces the printable
124 representation of the current token. */
125 static inline const char *
126 print_cur_token (void)
128 return print_token (T.code, T.value);
131 /* Report a parse error on the current line, with diagnostic MSG.
132 Behaves as standard printf with respect to additional arguments and
133 format escapes. */
134 static void ATTRIBUTE_PRINTF_1
135 parse_error (const char *msg, ...)
137 va_list ap;
139 fprintf (stderr, "%s:%d: parse error: ", lexer_line.file, lexer_line.line);
141 va_start (ap, msg);
142 vfprintf (stderr, msg, ap);
143 va_end (ap);
145 hit_error = true;
148 /* If the next token does not have code T, report a parse error; otherwise
149 return the token's value. */
150 static const char *
151 require (int t)
153 int u = token ();
154 const char *v = advance ();
155 if (u != t)
157 parse_error ("expected %s, have %s",
158 print_token (t, 0), print_token (u, v));
159 return 0;
161 return v;
164 /* If the next token does not have one of the codes T1 or T2, report a
165 parse error; otherwise return the token's value. */
166 static const char *
167 require2 (int t1, int t2)
169 int u = token ();
170 const char *v = advance ();
171 if (u != t1 && u != t2)
173 parse_error ("expected %s or %s, have %s",
174 print_token (t1, 0), print_token (t2, 0),
175 print_token (u, v));
176 return 0;
178 return v;
181 /* Near-terminals. */
183 /* C-style string constant concatenation: STRING+
184 Bare STRING should appear nowhere else in this file. */
185 static const char *
186 string_seq (void)
188 const char *s1, *s2;
189 size_t l1, l2;
190 char *buf;
192 s1 = require (STRING);
193 if (s1 == 0)
194 return "";
195 while (token () == STRING)
197 s2 = advance ();
199 l1 = strlen (s1);
200 l2 = strlen (s2);
201 buf = XRESIZEVEC (char, s1, l1 + l2 + 1);
202 memcpy (buf + l1, s2, l2 + 1);
203 XDELETE (s2);
204 s1 = buf;
206 return s1;
209 /* typedef_name: either an ID, or VEC(x,y) which is translated to VEC_x_y.
210 Use only where VEC(x,y) is legitimate, i.e. in positions where a
211 typedef name may appear. */
212 static const char *
213 typedef_name (void)
215 if (token () == VEC_TOKEN)
217 const char *c1, *c2, *r;
218 advance ();
219 require ('(');
220 c1 = require2 (ID, SCALAR);
221 require (',');
222 c2 = require (ID);
223 require (')');
224 r = concat ("VEC_", c1, "_", c2, (char *)0);
225 free ((void *)c1);
226 free ((void *)c2);
227 return r;
229 else
230 return require (ID);
233 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
234 static void
235 consume_balanced (int opener, int closer)
237 require (opener);
238 for (;;)
239 switch (token ())
241 default: advance (); break;
242 case '(': consume_balanced ('(',')'); break;
243 case '[': consume_balanced ('[',']'); break;
244 case '{': consume_balanced ('{','}'); break;
246 case '}':
247 case ']':
248 case ')':
249 if (token () != closer)
250 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
251 closer, token ());
252 advance ();
253 return;
255 case EOF_TOKEN:
256 parse_error ("unexpected end of file within %c%c-delimited construct",
257 opener, closer);
258 return;
262 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
263 expressions, until we encounter a semicolon outside any such
264 delimiters; absorb that too. If IMMEDIATE is true, it is an error
265 if the semicolon is not the first token encountered. */
266 static void
267 consume_until_semi (bool immediate)
269 if (immediate && token () != ';')
270 require (';');
271 for (;;)
272 switch (token ())
274 case ';': advance (); return;
275 default: advance (); break;
277 case '(': consume_balanced ('(',')'); break;
278 case '[': consume_balanced ('[',']'); break;
279 case '{': consume_balanced ('{','}'); break;
281 case '}':
282 case ']':
283 case ')':
284 parse_error ("unmatched '%c' while scanning for ';'", token ());
285 return;
287 case EOF_TOKEN:
288 parse_error ("unexpected end of file while scanning for ';'");
289 return;
293 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
294 expressions, until we encounter a comma or semicolon outside any
295 such delimiters; absorb that too. If IMMEDIATE is true, it is an
296 error if the comma or semicolon is not the first token encountered.
297 Returns true if the loop ended with a comma. */
298 static bool
299 consume_until_comma_or_semi (bool immediate)
301 if (immediate && token () != ',' && token () != ';')
302 require2 (',', ';');
303 for (;;)
304 switch (token ())
306 case ',': advance (); return true;
307 case ';': advance (); return false;
308 default: advance (); break;
310 case '(': consume_balanced ('(',')'); break;
311 case '[': consume_balanced ('[',']'); break;
312 case '{': consume_balanced ('{','}'); break;
314 case '}':
315 case ']':
316 case ')':
317 parse_error ("unmatched '%s' while scanning for ',' or ';'",
318 print_cur_token ());
319 return false;
321 case EOF_TOKEN:
322 parse_error ("unexpected end of file while scanning for ',' or ';'");
323 return false;
328 /* GTY(()) option handling. */
329 static type_p type (options_p *optsp, bool nested);
331 /* Optional parenthesized string: ('(' string_seq ')')? */
332 static options_p
333 str_optvalue_opt (options_p prev)
335 const char *name = advance ();
336 const char *value = "";
337 if (token () == '(')
339 advance ();
340 value = string_seq ();
341 require (')');
343 return create_option (prev, name, value);
346 /* absdecl: type '*'*
347 -- a vague approximation to what the C standard calls an abstract
348 declarator. The only kinds that are actually used are those that
349 are just a bare type and those that have trailing pointer-stars.
350 Further kinds should be implemented if and when they become
351 necessary. Used only within GTY(()) option values, therefore
352 further GTY(()) tags within the type are invalid. Note that the
353 return value has already been run through adjust_field_type. */
354 static type_p
355 absdecl (void)
357 type_p ty;
358 options_p opts;
360 ty = type (&opts, true);
361 while (token () == '*')
363 ty = create_pointer (ty);
364 advance ();
367 if (opts)
368 parse_error ("nested GTY(()) options are invalid");
370 return adjust_field_type (ty, 0);
373 /* Type-option: '(' absdecl ')' */
374 static options_p
375 type_optvalue (options_p prev, const char *name)
377 type_p ty;
378 require ('(');
379 ty = absdecl ();
380 require (')');
381 return create_option (prev, name, ty);
384 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
385 static options_p
386 nestedptr_optvalue (options_p prev)
388 type_p ty;
389 const char *from, *to;
391 require ('(');
392 ty = absdecl ();
393 require (',');
394 to = string_seq ();
395 require (',');
396 from = string_seq ();
397 require (')');
399 return create_nested_ptr_option (prev, ty, to, from);
402 /* One GTY(()) option:
403 ID str_optvalue_opt
404 | PTR_ALIAS type_optvalue
405 | PARAM_IS type_optvalue
406 | NESTED_PTR nestedptr_optvalue
408 static options_p
409 option (options_p prev)
411 switch (token ())
413 case ID:
414 return str_optvalue_opt (prev);
416 case PTR_ALIAS:
417 advance ();
418 return type_optvalue (prev, "ptr_alias");
420 case PARAM_IS:
421 return type_optvalue (prev, advance ());
423 case NESTED_PTR:
424 advance ();
425 return nestedptr_optvalue (prev);
427 default:
428 parse_error ("expected an option keyword, have %s",
429 print_cur_token ());
430 advance ();
431 return create_option (prev, "", "");
435 /* One comma-separated list of options. */
436 static options_p
437 option_seq (void)
439 options_p o;
441 o = option (0);
442 while (token () == ',')
444 advance ();
445 o = option (o);
447 return o;
450 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
451 static options_p
452 gtymarker (void)
454 options_p result = 0;
455 require (GTY_TOKEN);
456 require ('(');
457 require ('(');
458 if (token () != ')')
459 result = option_seq ();
460 require (')');
461 require (')');
462 return result;
465 /* Optional GTY marker. */
466 static options_p
467 gtymarker_opt (void)
469 if (token () != GTY_TOKEN)
470 return 0;
471 return gtymarker ();
474 /* Declarators. The logic here is largely lifted from c-parser.c.
475 Note that we do not have to process abstract declarators, which can
476 appear only in parameter type lists or casts (but see absdecl,
477 above). Also, type qualifiers are thrown out in gengtype-lex.l so
478 we don't have to do it. */
480 /* array_and_function_declarators_opt:
481 \epsilon
482 array_and_function_declarators_opt ARRAY
483 array_and_function_declarators_opt '(' ... ')'
485 where '...' indicates stuff we ignore except insofar as grouping
486 symbols ()[]{} must balance.
488 Subroutine of direct_declarator - do not use elsewhere. */
490 static type_p
491 array_and_function_declarators_opt (type_p ty)
493 if (token () == ARRAY)
495 const char *array = advance ();
496 return create_array (array_and_function_declarators_opt (ty), array);
498 else if (token () == '(')
500 /* We don't need exact types for functions. */
501 consume_balanced ('(', ')');
502 array_and_function_declarators_opt (ty);
503 return create_scalar_type ("function type");
505 else
506 return ty;
509 static type_p inner_declarator (type_p, const char **, options_p *);
511 /* direct_declarator:
512 '(' inner_declarator ')'
513 gtymarker_opt ID array_and_function_declarators_opt
515 Subroutine of declarator, mutually recursive with inner_declarator;
516 do not use elsewhere. */
517 static type_p
518 direct_declarator (type_p ty, const char **namep, options_p *optsp)
520 /* The first token in a direct-declarator must be an ID, a
521 GTY marker, or an open parenthesis. */
522 switch (token ())
524 case GTY_TOKEN:
525 *optsp = gtymarker ();
526 /* fall through */
527 case ID:
528 *namep = require (ID);
529 break;
531 case '(':
532 advance ();
533 ty = inner_declarator (ty, namep, optsp);
534 require (')');
535 break;
537 default:
538 parse_error ("expected '(', 'GTY', or an identifier, have %s",
539 print_cur_token ());
540 /* Do _not_ advance if what we have is a close squiggle brace, as
541 we will get much better error recovery that way. */
542 if (token () != '}')
543 advance ();
544 return 0;
546 return array_and_function_declarators_opt (ty);
549 /* The difference between inner_declarator and declarator is in the
550 handling of stars. Consider this declaration:
552 char * (*pfc) (void)
554 It declares a pointer to a function that takes no arguments and
555 returns a char*. To construct the correct type for this
556 declaration, the star outside the parentheses must be processed
557 _before_ the function type, the star inside the parentheses must
558 be processed _after_ the function type. To accomplish this,
559 declarator() creates pointers before recursing (it is actually
560 coded as a while loop), whereas inner_declarator() recurses before
561 creating pointers. */
563 /* inner_declarator:
564 '*' inner_declarator
565 direct_declarator
567 Mutually recursive subroutine of direct_declarator; do not use
568 elsewhere. */
570 static type_p
571 inner_declarator (type_p ty, const char **namep, options_p *optsp)
573 if (token () == '*')
575 type_p inner;
576 advance ();
577 inner = inner_declarator (ty, namep, optsp);
578 if (inner == 0)
579 return 0;
580 else
581 return create_pointer (ty);
583 else
584 return direct_declarator (ty, namep, optsp);
587 /* declarator: '*'+ direct_declarator
589 This is the sole public interface to this part of the grammar.
590 Arguments are the type known so far, a pointer to where the name
591 may be stored, and a pointer to where GTY options may be stored.
592 Returns the final type. */
594 static type_p
595 declarator (type_p ty, const char **namep, options_p *optsp)
597 *namep = 0;
598 *optsp = 0;
599 while (token () == '*')
601 advance ();
602 ty = create_pointer (ty);
604 return direct_declarator (ty, namep, optsp);
607 /* Types and declarations. */
609 /* Structure field(s) declaration:
611 type bitfield ';'
612 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
615 Knows that such declarations must end with a close brace (or,
616 erroneously, at EOF).
618 static pair_p
619 struct_field_seq (void)
621 pair_p f = 0;
622 type_p ty, dty;
623 options_p opts, dopts;
624 const char *name;
625 bool another;
629 ty = type (&opts, true);
630 /* Another piece of the IFCVT_EXTRA_FIELDS special case, see type(). */
631 if (!ty && token () == '}')
632 break;
634 if (!ty || token () == ':')
636 consume_until_semi (false);
637 continue;
642 dty = declarator (ty, &name, &dopts);
643 /* There could be any number of weird things after the declarator,
644 notably bitfield declarations and __attribute__s. If this
645 function returns true, the last thing was a comma, so we have
646 more than one declarator paired with the current type. */
647 another = consume_until_comma_or_semi (false);
649 if (!dty)
650 continue;
652 if (opts && dopts)
653 parse_error ("two GTY(()) options for field %s", name);
654 if (opts && !dopts)
655 dopts = opts;
657 f = create_field_at (f, dty, name, dopts, &lexer_line);
659 while (another);
661 while (token () != '}' && token () != EOF_TOKEN);
662 return nreverse_pairs (f);
665 /* This is called type(), but what it parses (sort of) is what C calls
666 declaration-specifiers and specifier-qualifier-list:
668 SCALAR
669 | ID // typedef
670 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
671 | ENUM ID ( '{' ... '}' )?
673 Returns a partial type; under some conditions (notably
674 "struct foo GTY((...)) thing;") it may write an options
675 structure to *OPTSP.
677 static type_p
678 type (options_p *optsp, bool nested)
680 const char *s;
681 bool is_union;
682 *optsp = 0;
683 switch (token ())
685 case SCALAR:
686 s = advance ();
687 return create_scalar_type (s);
689 case ID:
690 case VEC_TOKEN:
691 s = typedef_name ();
692 return resolve_typedef (s, &lexer_line);
694 case STRUCT:
695 case UNION:
697 options_p opts = 0;
699 is_union = (token() == UNION);
700 advance ();
702 if (token () == ID)
703 s = advance ();
704 else
705 s = xasprintf ("anonymous:%s:%d", lexer_line.file, lexer_line.line);
707 /* Top-level structures that are not explicitly tagged GTY(())
708 are treated as mere forward declarations. This is because
709 there are a lot of structures that we don't need to know
710 about, and some of those have weird macro stuff in them
711 that we can't handle. */
712 if (nested || token () == GTY_TOKEN)
714 opts = gtymarker_opt ();
715 if (token () == '{')
717 pair_p fields;
718 advance ();
719 fields = struct_field_seq ();
720 require ('}');
721 return new_structure (s, is_union, &lexer_line, fields, opts);
724 else if (token () == '{')
725 consume_balanced ('{', '}');
726 if (opts)
727 *optsp = opts;
728 return find_structure (s, is_union);
731 case ENUM:
732 advance ();
733 if (token () == ID)
734 s = advance ();
735 else
736 s = xasprintf ("anonymous:%s:%d", lexer_line.file, lexer_line.line);
738 if (token () == '{')
739 consume_balanced ('{','}');
740 return create_scalar_type (s);
742 default:
743 parse_error ("expected a type specifier, have %s", print_cur_token ());
744 advance ();
745 return create_scalar_type ("erroneous type");
749 /* Top level constructs. */
751 /* Dispatch declarations beginning with 'typedef'. */
753 static void
754 typedef_decl (void)
756 type_p ty, dty;
757 const char *name;
758 options_p opts;
759 bool another;
761 gcc_assert (token () == TYPEDEF);
762 advance ();
764 ty = type (&opts, false);
765 if (!ty)
766 return;
767 if (opts)
768 parse_error ("GTY((...)) cannot be applied to a typedef");
771 dty = declarator (ty, &name, &opts);
772 if (opts)
773 parse_error ("GTY((...)) cannot be applied to a typedef");
775 /* Yet another place where we could have junk (notably attributes)
776 after the declarator. */
777 another = consume_until_comma_or_semi (false);
778 if (dty)
779 do_typedef (name, dty, &lexer_line);
781 while (another);
784 /* Structure definition: type() does all the work. */
786 static void
787 struct_or_union (void)
789 options_p dummy;
790 type (&dummy, false);
791 /* There may be junk after the type: notably, we cannot currently
792 distinguish 'struct foo *function(prototype);' from 'struct foo;'
793 ... we could call declarator(), but it's a waste of time at
794 present. Instead, just eat whatever token is currently lookahead
795 and go back to lexical skipping mode. */
796 advance ();
799 /* GC root declaration:
800 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
801 If the gtymarker is not present, we ignore the rest of the declaration. */
802 static void
803 extern_or_static (void)
805 options_p opts, opts2, dopts;
806 type_p ty, dty;
807 const char *name;
808 require2 (EXTERN, STATIC);
810 if (token () != GTY_TOKEN)
812 advance ();
813 return;
816 opts = gtymarker ();
817 ty = type (&opts2, true); /* if we get here, it's got a GTY(()) */
818 dty = declarator (ty, &name, &dopts);
820 if ((opts && dopts) || (opts && opts2) || (opts2 && dopts))
821 parse_error ("GTY((...)) specified more than once for %s", name);
822 else if (opts2)
823 opts = opts2;
824 else if (dopts)
825 opts = dopts;
827 if (dty)
829 note_variable (name, adjust_field_type (dty, opts), opts, &lexer_line);
830 require2 (';', '=');
834 /* Definition of a generic VEC structure:
836 'DEF_VEC_[IPO]' '(' id ')' ';'
838 Scalar VECs require slightly different treatment than otherwise -
839 that's handled in note_def_vec, we just pass it along.*/
840 static void
841 def_vec (void)
843 bool is_scalar = (token() == DEFVEC_I);
844 const char *type;
846 require2 (DEFVEC_OP, DEFVEC_I);
847 require ('(');
848 type = require2 (ID, SCALAR);
849 require (')');
850 require (';');
852 if (!type)
853 return;
855 note_def_vec (type, is_scalar, &lexer_line);
856 note_def_vec_alloc (type, "none", &lexer_line);
859 /* Definition of an allocation strategy for a VEC structure:
861 'DEF_VEC_ALLOC_[IPO]' '(' id ',' id ')' ';'
863 For purposes of gengtype, this just declares a wrapper structure. */
864 static void
865 def_vec_alloc (void)
867 const char *type, *astrat;
869 require (DEFVEC_ALLOC);
870 require ('(');
871 type = require2 (ID, SCALAR);
872 require (',');
873 astrat = require (ID);
874 require (')');
875 require (';');
877 if (!type || !astrat)
878 return;
880 note_def_vec_alloc (type, astrat, &lexer_line);
883 /* Parse the file FNAME for GC-relevant declarations and definitions.
884 This is the only entry point to this file. */
885 void
886 parse_file (const char *fname)
888 yybegin (fname);
889 for (;;)
891 switch (token ())
893 case EXTERN:
894 case STATIC:
895 extern_or_static ();
896 break;
898 case STRUCT:
899 case UNION:
900 struct_or_union ();
901 break;
903 case TYPEDEF:
904 typedef_decl ();
905 break;
907 case DEFVEC_OP:
908 case DEFVEC_I:
909 def_vec ();
910 break;
912 case DEFVEC_ALLOC:
913 def_vec_alloc ();
914 break;
916 case EOF_TOKEN:
917 goto eof;
919 default:
920 parse_error ("unexpected top level token, %s", print_cur_token ());
921 goto eof;
923 lexer_toplevel_done = 1;
926 eof:
927 advance ();
928 yyend ();