* config.gcc (cygwin tm_file): Add cygwin-stdint.h.
[official-gcc.git] / gcc / gengtype-parse.c
blob357981ad3cb392616603a3576694a9741b6f953c
1 /* Process source files and output type information.
2 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "bconfig.h"
21 #include "system.h"
22 #include "gengtype.h"
24 /* This is a simple recursive-descent parser which understands a subset of
25 the C type grammar.
27 Rule functions are suffixed _seq if they scan a sequence of items;
28 _opt if they may consume zero tokens; _seqopt if both are true. The
29 "consume_" prefix indicates that a sequence of tokens is parsed for
30 syntactic correctness and then thrown away. */
32 /* Simple one-token lookahead mechanism. */
34 struct token
36 const char *value;
37 int code;
38 bool valid;
40 static struct token T;
42 /* Retrieve the code of the current token; if there is no current token,
43 get the next one from the lexer. */
44 static inline int
45 token (void)
47 if (!T.valid)
49 T.code = yylex (&T.value);
50 T.valid = true;
52 return T.code;
55 /* Retrieve the value of the current token (if any) and mark it consumed.
56 The next call to token() will get another token from the lexer. */
57 static inline const char *
58 advance (void)
60 T.valid = false;
61 return T.value;
64 /* Diagnostics. */
66 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
67 static const char *const token_names[] = {
68 "GTY",
69 "typedef",
70 "extern",
71 "static",
72 "union",
73 "struct",
74 "enum",
75 "VEC",
76 "DEF_VEC_[OP]",
77 "DEF_VEC_I",
78 "DEF_VEC_ALLOC_[IOP]",
79 "...",
80 "ptr_alias",
81 "nested_ptr",
82 "a param<N>_is option",
83 "a number",
84 "a scalar type",
85 "an identifier",
86 "a string constant",
87 "a character constant",
88 "an array declarator",
91 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
92 static const char *const token_value_format[] = {
93 "%s",
94 "'%s'",
95 "'%s'",
96 "'%s'",
97 "'\"%s\"'",
98 "\"'%s'\"",
99 "'[%s]'",
102 /* Produce a printable representation for a token defined by CODE and
103 VALUE. This sometimes returns pointers into malloc memory and
104 sometimes not, therefore it is unsafe to free the pointer it
105 returns, so that memory is leaked. This does not matter, as this
106 function is only used for diagnostics, and in a successful run of
107 the program there will be none. */
108 static const char *
109 print_token (int code, const char *value)
111 if (code < CHAR_TOKEN_OFFSET)
112 return xasprintf ("'%c'", code);
113 else if (code < FIRST_TOKEN_WITH_VALUE)
114 return xasprintf ("'%s'", token_names[code - CHAR_TOKEN_OFFSET]);
115 else if (!value)
116 return token_names[code - CHAR_TOKEN_OFFSET]; /* don't quote these */
117 else
118 return xasprintf (token_value_format[code - FIRST_TOKEN_WITH_VALUE],
119 value);
122 /* Convenience wrapper around print_token which produces the printable
123 representation of the current token. */
124 static inline const char *
125 print_cur_token (void)
127 return print_token (T.code, T.value);
130 /* Report a parse error on the current line, with diagnostic MSG.
131 Behaves as standard printf with respect to additional arguments and
132 format escapes. */
133 static void ATTRIBUTE_PRINTF_1
134 parse_error (const char *msg, ...)
136 va_list ap;
138 fprintf (stderr, "%s:%d: parse error: ", lexer_line.file, lexer_line.line);
140 va_start (ap, msg);
141 vfprintf (stderr, msg, ap);
142 va_end (ap);
144 hit_error = true;
147 /* If the next token does not have code T, report a parse error; otherwise
148 return the token's value. */
149 static const char *
150 require (int t)
152 int u = token ();
153 const char *v = advance ();
154 if (u != t)
156 parse_error ("expected %s, have %s",
157 print_token (t, 0), print_token (u, v));
158 return 0;
160 return v;
163 /* If the next token does not have one of the codes T1 or T2, report a
164 parse error; otherwise return the token's value. */
165 static const char *
166 require2 (int t1, int t2)
168 int u = token ();
169 const char *v = advance ();
170 if (u != t1 && u != t2)
172 parse_error ("expected %s or %s, have %s",
173 print_token (t1, 0), print_token (t2, 0),
174 print_token (u, v));
175 return 0;
177 return v;
180 /* Near-terminals. */
182 /* C-style string constant concatenation: STRING+
183 Bare STRING should appear nowhere else in this file. */
184 static const char *
185 string_seq (void)
187 const char *s1, *s2;
188 size_t l1, l2;
189 char *buf;
191 s1 = require (STRING);
192 if (s1 == 0)
193 return "";
194 while (token () == STRING)
196 s2 = advance ();
198 l1 = strlen (s1);
199 l2 = strlen (s2);
200 buf = XRESIZEVEC (char, CONST_CAST(char *, s1), l1 + l2 + 1);
201 memcpy (buf + l1, s2, l2 + 1);
202 XDELETE (CONST_CAST (char *, s2));
203 s1 = buf;
205 return s1;
208 /* typedef_name: either an ID, or VEC(x,y) which is translated to VEC_x_y.
209 Use only where VEC(x,y) is legitimate, i.e. in positions where a
210 typedef name may appear. */
211 static const char *
212 typedef_name (void)
214 if (token () == VEC_TOKEN)
216 const char *c1, *c2, *r;
217 advance ();
218 require ('(');
219 c1 = require2 (ID, SCALAR);
220 require (',');
221 c2 = require (ID);
222 require (')');
223 r = concat ("VEC_", c1, "_", c2, (char *)0);
224 free (CONST_CAST (char *, c1));
225 free (CONST_CAST (char *, c2));
226 return r;
228 else
229 return require (ID);
232 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
233 static void
234 consume_balanced (int opener, int closer)
236 require (opener);
237 for (;;)
238 switch (token ())
240 default: advance (); break;
241 case '(': consume_balanced ('(',')'); break;
242 case '[': consume_balanced ('[',']'); break;
243 case '{': consume_balanced ('{','}'); break;
245 case '}':
246 case ']':
247 case ')':
248 if (token () != closer)
249 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
250 closer, token ());
251 advance ();
252 return;
254 case EOF_TOKEN:
255 parse_error ("unexpected end of file within %c%c-delimited construct",
256 opener, closer);
257 return;
261 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
262 expressions, until we encounter a semicolon outside any such
263 delimiters; absorb that too. If IMMEDIATE is true, it is an error
264 if the semicolon is not the first token encountered. */
265 static void
266 consume_until_semi (bool immediate)
268 if (immediate && token () != ';')
269 require (';');
270 for (;;)
271 switch (token ())
273 case ';': advance (); return;
274 default: advance (); break;
276 case '(': consume_balanced ('(',')'); break;
277 case '[': consume_balanced ('[',']'); break;
278 case '{': consume_balanced ('{','}'); break;
280 case '}':
281 case ']':
282 case ')':
283 parse_error ("unmatched '%c' while scanning for ';'", token ());
284 return;
286 case EOF_TOKEN:
287 parse_error ("unexpected end of file while scanning for ';'");
288 return;
292 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
293 expressions, until we encounter a comma or semicolon outside any
294 such delimiters; absorb that too. If IMMEDIATE is true, it is an
295 error if the comma or semicolon is not the first token encountered.
296 Returns true if the loop ended with a comma. */
297 static bool
298 consume_until_comma_or_semi (bool immediate)
300 if (immediate && token () != ',' && token () != ';')
301 require2 (',', ';');
302 for (;;)
303 switch (token ())
305 case ',': advance (); return true;
306 case ';': advance (); return false;
307 default: advance (); break;
309 case '(': consume_balanced ('(',')'); break;
310 case '[': consume_balanced ('[',']'); break;
311 case '{': consume_balanced ('{','}'); break;
313 case '}':
314 case ']':
315 case ')':
316 parse_error ("unmatched '%s' while scanning for ',' or ';'",
317 print_cur_token ());
318 return false;
320 case EOF_TOKEN:
321 parse_error ("unexpected end of file while scanning for ',' or ';'");
322 return false;
327 /* GTY(()) option handling. */
328 static type_p type (options_p *optsp, bool nested);
330 /* Optional parenthesized string: ('(' string_seq ')')? */
331 static options_p
332 str_optvalue_opt (options_p prev)
334 const char *name = advance ();
335 const char *value = "";
336 if (token () == '(')
338 advance ();
339 value = string_seq ();
340 require (')');
342 return create_option (prev, name, value);
345 /* absdecl: type '*'*
346 -- a vague approximation to what the C standard calls an abstract
347 declarator. The only kinds that are actually used are those that
348 are just a bare type and those that have trailing pointer-stars.
349 Further kinds should be implemented if and when they become
350 necessary. Used only within GTY(()) option values, therefore
351 further GTY(()) tags within the type are invalid. Note that the
352 return value has already been run through adjust_field_type. */
353 static type_p
354 absdecl (void)
356 type_p ty;
357 options_p opts;
359 ty = type (&opts, true);
360 while (token () == '*')
362 ty = create_pointer (ty);
363 advance ();
366 if (opts)
367 parse_error ("nested GTY(()) options are invalid");
369 return adjust_field_type (ty, 0);
372 /* Type-option: '(' absdecl ')' */
373 static options_p
374 type_optvalue (options_p prev, const char *name)
376 type_p ty;
377 require ('(');
378 ty = absdecl ();
379 require (')');
380 return create_option (prev, name, ty);
383 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
384 static options_p
385 nestedptr_optvalue (options_p prev)
387 type_p ty;
388 const char *from, *to;
390 require ('(');
391 ty = absdecl ();
392 require (',');
393 to = string_seq ();
394 require (',');
395 from = string_seq ();
396 require (')');
398 return create_nested_ptr_option (prev, ty, to, from);
401 /* One GTY(()) option:
402 ID str_optvalue_opt
403 | PTR_ALIAS type_optvalue
404 | PARAM_IS type_optvalue
405 | NESTED_PTR nestedptr_optvalue
407 static options_p
408 option (options_p prev)
410 switch (token ())
412 case ID:
413 return str_optvalue_opt (prev);
415 case PTR_ALIAS:
416 advance ();
417 return type_optvalue (prev, "ptr_alias");
419 case PARAM_IS:
420 return type_optvalue (prev, advance ());
422 case NESTED_PTR:
423 advance ();
424 return nestedptr_optvalue (prev);
426 default:
427 parse_error ("expected an option keyword, have %s",
428 print_cur_token ());
429 advance ();
430 return create_option (prev, "", "");
434 /* One comma-separated list of options. */
435 static options_p
436 option_seq (void)
438 options_p o;
440 o = option (0);
441 while (token () == ',')
443 advance ();
444 o = option (o);
446 return o;
449 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
450 static options_p
451 gtymarker (void)
453 options_p result = 0;
454 require (GTY_TOKEN);
455 require ('(');
456 require ('(');
457 if (token () != ')')
458 result = option_seq ();
459 require (')');
460 require (')');
461 return result;
464 /* Optional GTY marker. */
465 static options_p
466 gtymarker_opt (void)
468 if (token () != GTY_TOKEN)
469 return 0;
470 return gtymarker ();
473 /* Declarators. The logic here is largely lifted from c-parser.c.
474 Note that we do not have to process abstract declarators, which can
475 appear only in parameter type lists or casts (but see absdecl,
476 above). Also, type qualifiers are thrown out in gengtype-lex.l so
477 we don't have to do it. */
479 /* array_and_function_declarators_opt:
480 \epsilon
481 array_and_function_declarators_opt ARRAY
482 array_and_function_declarators_opt '(' ... ')'
484 where '...' indicates stuff we ignore except insofar as grouping
485 symbols ()[]{} must balance.
487 Subroutine of direct_declarator - do not use elsewhere. */
489 static type_p
490 array_and_function_declarators_opt (type_p ty)
492 if (token () == ARRAY)
494 const char *array = advance ();
495 return create_array (array_and_function_declarators_opt (ty), array);
497 else if (token () == '(')
499 /* We don't need exact types for functions. */
500 consume_balanced ('(', ')');
501 array_and_function_declarators_opt (ty);
502 return create_scalar_type ("function type");
504 else
505 return ty;
508 static type_p inner_declarator (type_p, const char **, options_p *);
510 /* direct_declarator:
511 '(' inner_declarator ')'
512 gtymarker_opt ID array_and_function_declarators_opt
514 Subroutine of declarator, mutually recursive with inner_declarator;
515 do not use elsewhere. */
516 static type_p
517 direct_declarator (type_p ty, const char **namep, options_p *optsp)
519 /* The first token in a direct-declarator must be an ID, a
520 GTY marker, or an open parenthesis. */
521 switch (token ())
523 case GTY_TOKEN:
524 *optsp = gtymarker ();
525 /* fall through */
526 case ID:
527 *namep = require (ID);
528 break;
530 case '(':
531 advance ();
532 ty = inner_declarator (ty, namep, optsp);
533 require (')');
534 break;
536 default:
537 parse_error ("expected '(', 'GTY', or an identifier, have %s",
538 print_cur_token ());
539 /* Do _not_ advance if what we have is a close squiggle brace, as
540 we will get much better error recovery that way. */
541 if (token () != '}')
542 advance ();
543 return 0;
545 return array_and_function_declarators_opt (ty);
548 /* The difference between inner_declarator and declarator is in the
549 handling of stars. Consider this declaration:
551 char * (*pfc) (void)
553 It declares a pointer to a function that takes no arguments and
554 returns a char*. To construct the correct type for this
555 declaration, the star outside the parentheses must be processed
556 _before_ the function type, the star inside the parentheses must
557 be processed _after_ the function type. To accomplish this,
558 declarator() creates pointers before recursing (it is actually
559 coded as a while loop), whereas inner_declarator() recurses before
560 creating pointers. */
562 /* inner_declarator:
563 '*' inner_declarator
564 direct_declarator
566 Mutually recursive subroutine of direct_declarator; do not use
567 elsewhere. */
569 static type_p
570 inner_declarator (type_p ty, const char **namep, options_p *optsp)
572 if (token () == '*')
574 type_p inner;
575 advance ();
576 inner = inner_declarator (ty, namep, optsp);
577 if (inner == 0)
578 return 0;
579 else
580 return create_pointer (ty);
582 else
583 return direct_declarator (ty, namep, optsp);
586 /* declarator: '*'+ direct_declarator
588 This is the sole public interface to this part of the grammar.
589 Arguments are the type known so far, a pointer to where the name
590 may be stored, and a pointer to where GTY options may be stored.
591 Returns the final type. */
593 static type_p
594 declarator (type_p ty, const char **namep, options_p *optsp)
596 *namep = 0;
597 *optsp = 0;
598 while (token () == '*')
600 advance ();
601 ty = create_pointer (ty);
603 return direct_declarator (ty, namep, optsp);
606 /* Types and declarations. */
608 /* Structure field(s) declaration:
610 type bitfield ';'
611 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
614 Knows that such declarations must end with a close brace (or,
615 erroneously, at EOF).
617 static pair_p
618 struct_field_seq (void)
620 pair_p f = 0;
621 type_p ty, dty;
622 options_p opts, dopts;
623 const char *name;
624 bool another;
628 ty = type (&opts, true);
629 /* Another piece of the IFCVT_EXTRA_FIELDS special case, see type(). */
630 if (!ty && token () == '}')
631 break;
633 if (!ty || token () == ':')
635 consume_until_semi (false);
636 continue;
641 dty = declarator (ty, &name, &dopts);
642 /* There could be any number of weird things after the declarator,
643 notably bitfield declarations and __attribute__s. If this
644 function returns true, the last thing was a comma, so we have
645 more than one declarator paired with the current type. */
646 another = consume_until_comma_or_semi (false);
648 if (!dty)
649 continue;
651 if (opts && dopts)
652 parse_error ("two GTY(()) options for field %s", name);
653 if (opts && !dopts)
654 dopts = opts;
656 f = create_field_at (f, dty, name, dopts, &lexer_line);
658 while (another);
660 while (token () != '}' && token () != EOF_TOKEN);
661 return nreverse_pairs (f);
664 /* This is called type(), but what it parses (sort of) is what C calls
665 declaration-specifiers and specifier-qualifier-list:
667 SCALAR
668 | ID // typedef
669 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
670 | ENUM ID ( '{' ... '}' )?
672 Returns a partial type; under some conditions (notably
673 "struct foo GTY((...)) thing;") it may write an options
674 structure to *OPTSP.
676 static type_p
677 type (options_p *optsp, bool nested)
679 const char *s;
680 bool is_union;
681 *optsp = 0;
682 switch (token ())
684 case SCALAR:
685 s = advance ();
686 return create_scalar_type (s);
688 case ID:
689 case VEC_TOKEN:
690 s = typedef_name ();
691 return resolve_typedef (s, &lexer_line);
693 case STRUCT:
694 case UNION:
696 options_p opts = 0;
698 is_union = (token() == UNION);
699 advance ();
701 if (token () == ID)
702 s = advance ();
703 else
704 s = xasprintf ("anonymous:%s:%d", lexer_line.file, lexer_line.line);
706 /* Top-level structures that are not explicitly tagged GTY(())
707 are treated as mere forward declarations. This is because
708 there are a lot of structures that we don't need to know
709 about, and some of those have weird macro stuff in them
710 that we can't handle. */
711 if (nested || token () == GTY_TOKEN)
713 opts = gtymarker_opt ();
714 if (token () == '{')
716 pair_p fields;
717 advance ();
718 fields = struct_field_seq ();
719 require ('}');
720 return new_structure (s, is_union, &lexer_line, fields, opts);
723 else if (token () == '{')
724 consume_balanced ('{', '}');
725 if (opts)
726 *optsp = opts;
727 return find_structure (s, is_union);
730 case ENUM:
731 advance ();
732 if (token () == ID)
733 s = advance ();
734 else
735 s = xasprintf ("anonymous:%s:%d", lexer_line.file, lexer_line.line);
737 if (token () == '{')
738 consume_balanced ('{','}');
739 return create_scalar_type (s);
741 default:
742 parse_error ("expected a type specifier, have %s", print_cur_token ());
743 advance ();
744 return create_scalar_type ("erroneous type");
748 /* Top level constructs. */
750 /* Dispatch declarations beginning with 'typedef'. */
752 static void
753 typedef_decl (void)
755 type_p ty, dty;
756 const char *name;
757 options_p opts;
758 bool another;
760 gcc_assert (token () == TYPEDEF);
761 advance ();
763 ty = type (&opts, false);
764 if (!ty)
765 return;
766 if (opts)
767 parse_error ("GTY((...)) cannot be applied to a typedef");
770 dty = declarator (ty, &name, &opts);
771 if (opts)
772 parse_error ("GTY((...)) cannot be applied to a typedef");
774 /* Yet another place where we could have junk (notably attributes)
775 after the declarator. */
776 another = consume_until_comma_or_semi (false);
777 if (dty)
778 do_typedef (name, dty, &lexer_line);
780 while (another);
783 /* Structure definition: type() does all the work. */
785 static void
786 struct_or_union (void)
788 options_p dummy;
789 type (&dummy, false);
790 /* There may be junk after the type: notably, we cannot currently
791 distinguish 'struct foo *function(prototype);' from 'struct foo;'
792 ... we could call declarator(), but it's a waste of time at
793 present. Instead, just eat whatever token is currently lookahead
794 and go back to lexical skipping mode. */
795 advance ();
798 /* GC root declaration:
799 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
800 If the gtymarker is not present, we ignore the rest of the declaration. */
801 static void
802 extern_or_static (void)
804 options_p opts, opts2, dopts;
805 type_p ty, dty;
806 const char *name;
807 require2 (EXTERN, STATIC);
809 if (token () != GTY_TOKEN)
811 advance ();
812 return;
815 opts = gtymarker ();
816 ty = type (&opts2, true); /* if we get here, it's got a GTY(()) */
817 dty = declarator (ty, &name, &dopts);
819 if ((opts && dopts) || (opts && opts2) || (opts2 && dopts))
820 parse_error ("GTY((...)) specified more than once for %s", name);
821 else if (opts2)
822 opts = opts2;
823 else if (dopts)
824 opts = dopts;
826 if (dty)
828 note_variable (name, adjust_field_type (dty, opts), opts, &lexer_line);
829 require2 (';', '=');
833 /* Definition of a generic VEC structure:
835 'DEF_VEC_[IPO]' '(' id ')' ';'
837 Scalar VECs require slightly different treatment than otherwise -
838 that's handled in note_def_vec, we just pass it along.*/
839 static void
840 def_vec (void)
842 bool is_scalar = (token() == DEFVEC_I);
843 const char *type;
845 require2 (DEFVEC_OP, DEFVEC_I);
846 require ('(');
847 type = require2 (ID, SCALAR);
848 require (')');
849 require (';');
851 if (!type)
852 return;
854 note_def_vec (type, is_scalar, &lexer_line);
855 note_def_vec_alloc (type, "none", &lexer_line);
858 /* Definition of an allocation strategy for a VEC structure:
860 'DEF_VEC_ALLOC_[IPO]' '(' id ',' id ')' ';'
862 For purposes of gengtype, this just declares a wrapper structure. */
863 static void
864 def_vec_alloc (void)
866 const char *type, *astrat;
868 require (DEFVEC_ALLOC);
869 require ('(');
870 type = require2 (ID, SCALAR);
871 require (',');
872 astrat = require (ID);
873 require (')');
874 require (';');
876 if (!type || !astrat)
877 return;
879 note_def_vec_alloc (type, astrat, &lexer_line);
882 /* Parse the file FNAME for GC-relevant declarations and definitions.
883 This is the only entry point to this file. */
884 void
885 parse_file (const char *fname)
887 yybegin (fname);
888 for (;;)
890 switch (token ())
892 case EXTERN:
893 case STATIC:
894 extern_or_static ();
895 break;
897 case STRUCT:
898 case UNION:
899 struct_or_union ();
900 break;
902 case TYPEDEF:
903 typedef_decl ();
904 break;
906 case DEFVEC_OP:
907 case DEFVEC_I:
908 def_vec ();
909 break;
911 case DEFVEC_ALLOC:
912 def_vec_alloc ();
913 break;
915 case EOF_TOKEN:
916 goto eof;
918 default:
919 parse_error ("unexpected top level token, %s", print_cur_token ());
920 goto eof;
922 lexer_toplevel_done = 1;
925 eof:
926 advance ();
927 yyend ();