./:
[official-gcc.git] / gcc / gengtype-parse.c
blob6977ff26d8196debb76ff2adeaa22f98a8bef34a
1 /* Process source files and output type information.
2 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "bconfig.h"
21 #include "system.h"
22 #include "gengtype.h"
24 /* This is a simple recursive-descent parser which understands a subset of
25 the C type grammar.
27 Rule functions are suffixed _seq if they scan a sequence of items;
28 _opt if they may consume zero tokens; _seqopt if both are true. The
29 "consume_" prefix indicates that a sequence of tokens is parsed for
30 syntactic correctness and then thrown away. */
32 /* Simple one-token lookahead mechanism. */
34 struct token
36 const char *value;
37 int code;
38 bool valid;
40 static struct token T;
42 /* Retrieve the code of the current token; if there is no current token,
43 get the next one from the lexer. */
44 static inline int
45 token (void)
47 if (!T.valid)
49 T.code = yylex (&T.value);
50 T.valid = true;
52 return T.code;
55 /* Retrieve the value of the current token (if any) and mark it consumed.
56 The next call to token() will get another token from the lexer. */
57 static inline const char *
58 advance (void)
60 T.valid = false;
61 return T.value;
64 /* Diagnostics. */
66 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
67 static const char *const token_names[] = {
68 "GTY",
69 "typedef",
70 "extern",
71 "static",
72 "union",
73 "struct",
74 "enum",
75 "VEC",
76 "DEF_VEC_[OP]",
77 "DEF_VEC_I",
78 "DEF_VEC_ALLOC_[IOP]",
79 "...",
80 "ptr_alias",
81 "nested_ptr",
82 "a param<N>_is option",
83 "a number",
84 "a scalar type",
85 "an identifier",
86 "a string constant",
87 "a character constant",
88 "an array declarator",
91 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
92 static const char *const token_value_format[] = {
93 "%s",
94 "'%s'",
95 "'%s'",
96 "'%s'",
97 "'\"%s\"'",
98 "\"'%s'\"",
99 "'[%s]'",
102 /* Produce a printable representation for a token defined by CODE and
103 VALUE. This sometimes returns pointers into malloc memory and
104 sometimes not, therefore it is unsafe to free the pointer it
105 returns, so that memory is leaked. This does not matter, as this
106 function is only used for diagnostics, and in a successful run of
107 the program there will be none. */
108 static const char *
109 print_token (int code, const char *value)
111 if (code < CHAR_TOKEN_OFFSET)
112 return xasprintf ("'%c'", code);
113 else if (code < FIRST_TOKEN_WITH_VALUE)
114 return xasprintf ("'%s'", token_names[code - CHAR_TOKEN_OFFSET]);
115 else if (!value)
116 return token_names[code - CHAR_TOKEN_OFFSET]; /* don't quote these */
117 else
118 return xasprintf (token_value_format[code - FIRST_TOKEN_WITH_VALUE],
119 value);
122 /* Convenience wrapper around print_token which produces the printable
123 representation of the current token. */
124 static inline const char *
125 print_cur_token (void)
127 return print_token (T.code, T.value);
130 /* Report a parse error on the current line, with diagnostic MSG.
131 Behaves as standard printf with respect to additional arguments and
132 format escapes. */
133 static void ATTRIBUTE_PRINTF_1
134 parse_error (const char *msg, ...)
136 va_list ap;
138 fprintf (stderr, "%s:%d: parse error: ", lexer_line.file, lexer_line.line);
140 va_start (ap, msg);
141 vfprintf (stderr, msg, ap);
142 va_end (ap);
144 fputc ('\n', stderr);
146 hit_error = true;
149 /* If the next token does not have code T, report a parse error; otherwise
150 return the token's value. */
151 static const char *
152 require (int t)
154 int u = token ();
155 const char *v = advance ();
156 if (u != t)
158 parse_error ("expected %s, have %s",
159 print_token (t, 0), print_token (u, v));
160 return 0;
162 return v;
165 /* If the next token does not have one of the codes T1 or T2, report a
166 parse error; otherwise return the token's value. */
167 static const char *
168 require2 (int t1, int t2)
170 int u = token ();
171 const char *v = advance ();
172 if (u != t1 && u != t2)
174 parse_error ("expected %s or %s, have %s",
175 print_token (t1, 0), print_token (t2, 0),
176 print_token (u, v));
177 return 0;
179 return v;
182 /* Near-terminals. */
184 /* C-style string constant concatenation: STRING+
185 Bare STRING should appear nowhere else in this file. */
186 static const char *
187 string_seq (void)
189 const char *s1, *s2;
190 size_t l1, l2;
191 char *buf;
193 s1 = require (STRING);
194 if (s1 == 0)
195 return "";
196 while (token () == STRING)
198 s2 = advance ();
200 l1 = strlen (s1);
201 l2 = strlen (s2);
202 buf = XRESIZEVEC (char, CONST_CAST(char *, s1), l1 + l2 + 1);
203 memcpy (buf + l1, s2, l2 + 1);
204 XDELETE (CONST_CAST (char *, s2));
205 s1 = buf;
207 return s1;
210 /* typedef_name: either an ID, or VEC(x,y) which is translated to VEC_x_y.
211 Use only where VEC(x,y) is legitimate, i.e. in positions where a
212 typedef name may appear. */
213 static const char *
214 typedef_name (void)
216 if (token () == VEC_TOKEN)
218 const char *c1, *c2, *r;
219 advance ();
220 require ('(');
221 c1 = require2 (ID, SCALAR);
222 require (',');
223 c2 = require (ID);
224 require (')');
225 r = concat ("VEC_", c1, "_", c2, (char *)0);
226 free (CONST_CAST (char *, c1));
227 free (CONST_CAST (char *, c2));
228 return r;
230 else
231 return require (ID);
234 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
235 static void
236 consume_balanced (int opener, int closer)
238 require (opener);
239 for (;;)
240 switch (token ())
242 default: advance (); break;
243 case '(': consume_balanced ('(',')'); break;
244 case '[': consume_balanced ('[',']'); break;
245 case '{': consume_balanced ('{','}'); break;
247 case '}':
248 case ']':
249 case ')':
250 if (token () != closer)
251 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
252 closer, token ());
253 advance ();
254 return;
256 case EOF_TOKEN:
257 parse_error ("unexpected end of file within %c%c-delimited construct",
258 opener, closer);
259 return;
263 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
264 expressions, until we encounter a semicolon outside any such
265 delimiters; absorb that too. If IMMEDIATE is true, it is an error
266 if the semicolon is not the first token encountered. */
267 static void
268 consume_until_semi (bool immediate)
270 if (immediate && token () != ';')
271 require (';');
272 for (;;)
273 switch (token ())
275 case ';': advance (); return;
276 default: advance (); break;
278 case '(': consume_balanced ('(',')'); break;
279 case '[': consume_balanced ('[',']'); break;
280 case '{': consume_balanced ('{','}'); break;
282 case '}':
283 case ']':
284 case ')':
285 parse_error ("unmatched '%c' while scanning for ';'", token ());
286 return;
288 case EOF_TOKEN:
289 parse_error ("unexpected end of file while scanning for ';'");
290 return;
294 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
295 expressions, until we encounter a comma or semicolon outside any
296 such delimiters; absorb that too. If IMMEDIATE is true, it is an
297 error if the comma or semicolon is not the first token encountered.
298 Returns true if the loop ended with a comma. */
299 static bool
300 consume_until_comma_or_semi (bool immediate)
302 if (immediate && token () != ',' && token () != ';')
303 require2 (',', ';');
304 for (;;)
305 switch (token ())
307 case ',': advance (); return true;
308 case ';': advance (); return false;
309 default: advance (); break;
311 case '(': consume_balanced ('(',')'); break;
312 case '[': consume_balanced ('[',']'); break;
313 case '{': consume_balanced ('{','}'); break;
315 case '}':
316 case ']':
317 case ')':
318 parse_error ("unmatched '%s' while scanning for ',' or ';'",
319 print_cur_token ());
320 return false;
322 case EOF_TOKEN:
323 parse_error ("unexpected end of file while scanning for ',' or ';'");
324 return false;
329 /* GTY(()) option handling. */
330 static type_p type (options_p *optsp, bool nested);
332 /* Optional parenthesized string: ('(' string_seq ')')? */
333 static options_p
334 str_optvalue_opt (options_p prev)
336 const char *name = advance ();
337 const char *value = "";
338 if (token () == '(')
340 advance ();
341 value = string_seq ();
342 require (')');
344 return create_option (prev, name, value);
347 /* absdecl: type '*'*
348 -- a vague approximation to what the C standard calls an abstract
349 declarator. The only kinds that are actually used are those that
350 are just a bare type and those that have trailing pointer-stars.
351 Further kinds should be implemented if and when they become
352 necessary. Used only within GTY(()) option values, therefore
353 further GTY(()) tags within the type are invalid. Note that the
354 return value has already been run through adjust_field_type. */
355 static type_p
356 absdecl (void)
358 type_p ty;
359 options_p opts;
361 ty = type (&opts, true);
362 while (token () == '*')
364 ty = create_pointer (ty);
365 advance ();
368 if (opts)
369 parse_error ("nested GTY(()) options are invalid");
371 return adjust_field_type (ty, 0);
374 /* Type-option: '(' absdecl ')' */
375 static options_p
376 type_optvalue (options_p prev, const char *name)
378 type_p ty;
379 require ('(');
380 ty = absdecl ();
381 require (')');
382 return create_option (prev, name, ty);
385 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
386 static options_p
387 nestedptr_optvalue (options_p prev)
389 type_p ty;
390 const char *from, *to;
392 require ('(');
393 ty = absdecl ();
394 require (',');
395 to = string_seq ();
396 require (',');
397 from = string_seq ();
398 require (')');
400 return create_nested_ptr_option (prev, ty, to, from);
403 /* One GTY(()) option:
404 ID str_optvalue_opt
405 | PTR_ALIAS type_optvalue
406 | PARAM_IS type_optvalue
407 | NESTED_PTR nestedptr_optvalue
409 static options_p
410 option (options_p prev)
412 switch (token ())
414 case ID:
415 return str_optvalue_opt (prev);
417 case PTR_ALIAS:
418 advance ();
419 return type_optvalue (prev, "ptr_alias");
421 case PARAM_IS:
422 return type_optvalue (prev, advance ());
424 case NESTED_PTR:
425 advance ();
426 return nestedptr_optvalue (prev);
428 default:
429 parse_error ("expected an option keyword, have %s",
430 print_cur_token ());
431 advance ();
432 return create_option (prev, "", "");
436 /* One comma-separated list of options. */
437 static options_p
438 option_seq (void)
440 options_p o;
442 o = option (0);
443 while (token () == ',')
445 advance ();
446 o = option (o);
448 return o;
451 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
452 static options_p
453 gtymarker (void)
455 options_p result = 0;
456 require (GTY_TOKEN);
457 require ('(');
458 require ('(');
459 if (token () != ')')
460 result = option_seq ();
461 require (')');
462 require (')');
463 return result;
466 /* Optional GTY marker. */
467 static options_p
468 gtymarker_opt (void)
470 if (token () != GTY_TOKEN)
471 return 0;
472 return gtymarker ();
475 /* Declarators. The logic here is largely lifted from c-parser.c.
476 Note that we do not have to process abstract declarators, which can
477 appear only in parameter type lists or casts (but see absdecl,
478 above). Also, type qualifiers are thrown out in gengtype-lex.l so
479 we don't have to do it. */
481 /* array_and_function_declarators_opt:
482 \epsilon
483 array_and_function_declarators_opt ARRAY
484 array_and_function_declarators_opt '(' ... ')'
486 where '...' indicates stuff we ignore except insofar as grouping
487 symbols ()[]{} must balance.
489 Subroutine of direct_declarator - do not use elsewhere. */
491 static type_p
492 array_and_function_declarators_opt (type_p ty)
494 if (token () == ARRAY)
496 const char *array = advance ();
497 return create_array (array_and_function_declarators_opt (ty), array);
499 else if (token () == '(')
501 /* We don't need exact types for functions. */
502 consume_balanced ('(', ')');
503 array_and_function_declarators_opt (ty);
504 return create_scalar_type ("function type");
506 else
507 return ty;
510 static type_p inner_declarator (type_p, const char **, options_p *);
512 /* direct_declarator:
513 '(' inner_declarator ')'
514 gtymarker_opt ID array_and_function_declarators_opt
516 Subroutine of declarator, mutually recursive with inner_declarator;
517 do not use elsewhere. */
518 static type_p
519 direct_declarator (type_p ty, const char **namep, options_p *optsp)
521 /* The first token in a direct-declarator must be an ID, a
522 GTY marker, or an open parenthesis. */
523 switch (token ())
525 case GTY_TOKEN:
526 *optsp = gtymarker ();
527 /* fall through */
528 case ID:
529 *namep = require (ID);
530 break;
532 case '(':
533 advance ();
534 ty = inner_declarator (ty, namep, optsp);
535 require (')');
536 break;
538 default:
539 parse_error ("expected '(', 'GTY', or an identifier, have %s",
540 print_cur_token ());
541 /* Do _not_ advance if what we have is a close squiggle brace, as
542 we will get much better error recovery that way. */
543 if (token () != '}')
544 advance ();
545 return 0;
547 return array_and_function_declarators_opt (ty);
550 /* The difference between inner_declarator and declarator is in the
551 handling of stars. Consider this declaration:
553 char * (*pfc) (void)
555 It declares a pointer to a function that takes no arguments and
556 returns a char*. To construct the correct type for this
557 declaration, the star outside the parentheses must be processed
558 _before_ the function type, the star inside the parentheses must
559 be processed _after_ the function type. To accomplish this,
560 declarator() creates pointers before recursing (it is actually
561 coded as a while loop), whereas inner_declarator() recurses before
562 creating pointers. */
564 /* inner_declarator:
565 '*' inner_declarator
566 direct_declarator
568 Mutually recursive subroutine of direct_declarator; do not use
569 elsewhere. */
571 static type_p
572 inner_declarator (type_p ty, const char **namep, options_p *optsp)
574 if (token () == '*')
576 type_p inner;
577 advance ();
578 inner = inner_declarator (ty, namep, optsp);
579 if (inner == 0)
580 return 0;
581 else
582 return create_pointer (ty);
584 else
585 return direct_declarator (ty, namep, optsp);
588 /* declarator: '*'+ direct_declarator
590 This is the sole public interface to this part of the grammar.
591 Arguments are the type known so far, a pointer to where the name
592 may be stored, and a pointer to where GTY options may be stored.
593 Returns the final type. */
595 static type_p
596 declarator (type_p ty, const char **namep, options_p *optsp)
598 *namep = 0;
599 *optsp = 0;
600 while (token () == '*')
602 advance ();
603 ty = create_pointer (ty);
605 return direct_declarator (ty, namep, optsp);
608 /* Types and declarations. */
610 /* Structure field(s) declaration:
612 type bitfield ';'
613 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
616 Knows that such declarations must end with a close brace (or,
617 erroneously, at EOF).
619 static pair_p
620 struct_field_seq (void)
622 pair_p f = 0;
623 type_p ty, dty;
624 options_p opts, dopts;
625 const char *name;
626 bool another;
630 ty = type (&opts, true);
631 /* Another piece of the IFCVT_EXTRA_FIELDS special case, see type(). */
632 if (!ty && token () == '}')
633 break;
635 if (!ty || token () == ':')
637 consume_until_semi (false);
638 continue;
643 dty = declarator (ty, &name, &dopts);
644 /* There could be any number of weird things after the declarator,
645 notably bitfield declarations and __attribute__s. If this
646 function returns true, the last thing was a comma, so we have
647 more than one declarator paired with the current type. */
648 another = consume_until_comma_or_semi (false);
650 if (!dty)
651 continue;
653 if (opts && dopts)
654 parse_error ("two GTY(()) options for field %s", name);
655 if (opts && !dopts)
656 dopts = opts;
658 f = create_field_at (f, dty, name, dopts, &lexer_line);
660 while (another);
662 while (token () != '}' && token () != EOF_TOKEN);
663 return nreverse_pairs (f);
666 /* This is called type(), but what it parses (sort of) is what C calls
667 declaration-specifiers and specifier-qualifier-list:
669 SCALAR
670 | ID // typedef
671 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
672 | ENUM ID ( '{' ... '}' )?
674 Returns a partial type; under some conditions (notably
675 "struct foo GTY((...)) thing;") it may write an options
676 structure to *OPTSP.
678 static type_p
679 type (options_p *optsp, bool nested)
681 const char *s;
682 *optsp = 0;
683 switch (token ())
685 case SCALAR:
686 s = advance ();
687 return create_scalar_type (s);
689 case ID:
690 case VEC_TOKEN:
691 s = typedef_name ();
692 return resolve_typedef (s, &lexer_line);
694 case STRUCT:
695 case UNION:
697 options_p opts = 0;
698 /* GTY annotations follow attribute syntax
699 GTY_BEFORE_ID is for union/struct declarations
700 GTY_AFTER_ID is for variable declarations. */
701 enum {
702 NO_GTY,
703 GTY_BEFORE_ID,
704 GTY_AFTER_ID
705 } is_gty = NO_GTY;
706 bool is_union = (token () == UNION);
707 advance ();
709 /* Top-level structures that are not explicitly tagged GTY(())
710 are treated as mere forward declarations. This is because
711 there are a lot of structures that we don't need to know
712 about, and some of those have weird macro stuff in them
713 that we can't handle. */
714 if (nested || token () == GTY_TOKEN)
716 is_gty = GTY_BEFORE_ID;
717 opts = gtymarker_opt ();
720 if (token () == ID)
721 s = advance ();
722 else
723 s = xasprintf ("anonymous:%s:%d", lexer_line.file, lexer_line.line);
725 /* Unfortunately above GTY_TOKEN check does not capture the
726 typedef struct_type GTY case. */
727 if (token () == GTY_TOKEN)
729 is_gty = GTY_AFTER_ID;
730 opts = gtymarker_opt ();
733 if (is_gty)
735 if (token () == '{')
737 pair_p fields;
739 if (is_gty == GTY_AFTER_ID)
740 parse_error ("GTY must be specified before identifier");
742 advance ();
743 fields = struct_field_seq ();
744 require ('}');
745 return new_structure (s, is_union, &lexer_line, fields, opts);
748 else if (token () == '{')
749 consume_balanced ('{', '}');
750 if (opts)
751 *optsp = opts;
752 return find_structure (s, is_union);
755 case ENUM:
756 advance ();
757 if (token () == ID)
758 s = advance ();
759 else
760 s = xasprintf ("anonymous:%s:%d", lexer_line.file, lexer_line.line);
762 if (token () == '{')
763 consume_balanced ('{','}');
764 return create_scalar_type (s);
766 default:
767 parse_error ("expected a type specifier, have %s", print_cur_token ());
768 advance ();
769 return create_scalar_type ("erroneous type");
773 /* Top level constructs. */
775 /* Dispatch declarations beginning with 'typedef'. */
777 static void
778 typedef_decl (void)
780 type_p ty, dty;
781 const char *name;
782 options_p opts;
783 bool another;
785 gcc_assert (token () == TYPEDEF);
786 advance ();
788 ty = type (&opts, false);
789 if (!ty)
790 return;
791 if (opts)
792 parse_error ("GTY((...)) cannot be applied to a typedef");
795 dty = declarator (ty, &name, &opts);
796 if (opts)
797 parse_error ("GTY((...)) cannot be applied to a typedef");
799 /* Yet another place where we could have junk (notably attributes)
800 after the declarator. */
801 another = consume_until_comma_or_semi (false);
802 if (dty)
803 do_typedef (name, dty, &lexer_line);
805 while (another);
808 /* Structure definition: type() does all the work. */
810 static void
811 struct_or_union (void)
813 options_p dummy;
814 type (&dummy, false);
815 /* There may be junk after the type: notably, we cannot currently
816 distinguish 'struct foo *function(prototype);' from 'struct foo;'
817 ... we could call declarator(), but it's a waste of time at
818 present. Instead, just eat whatever token is currently lookahead
819 and go back to lexical skipping mode. */
820 advance ();
823 /* GC root declaration:
824 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
825 If the gtymarker is not present, we ignore the rest of the declaration. */
826 static void
827 extern_or_static (void)
829 options_p opts, opts2, dopts;
830 type_p ty, dty;
831 const char *name;
832 require2 (EXTERN, STATIC);
834 if (token () != GTY_TOKEN)
836 advance ();
837 return;
840 opts = gtymarker ();
841 ty = type (&opts2, true); /* if we get here, it's got a GTY(()) */
842 dty = declarator (ty, &name, &dopts);
844 if ((opts && dopts) || (opts && opts2) || (opts2 && dopts))
845 parse_error ("GTY((...)) specified more than once for %s", name);
846 else if (opts2)
847 opts = opts2;
848 else if (dopts)
849 opts = dopts;
851 if (dty)
853 note_variable (name, adjust_field_type (dty, opts), opts, &lexer_line);
854 require2 (';', '=');
858 /* Definition of a generic VEC structure:
860 'DEF_VEC_[IPO]' '(' id ')' ';'
862 Scalar VECs require slightly different treatment than otherwise -
863 that's handled in note_def_vec, we just pass it along.*/
864 static void
865 def_vec (void)
867 bool is_scalar = (token() == DEFVEC_I);
868 const char *type;
870 require2 (DEFVEC_OP, DEFVEC_I);
871 require ('(');
872 type = require2 (ID, SCALAR);
873 require (')');
874 require (';');
876 if (!type)
877 return;
879 note_def_vec (type, is_scalar, &lexer_line);
880 note_def_vec_alloc (type, "none", &lexer_line);
883 /* Definition of an allocation strategy for a VEC structure:
885 'DEF_VEC_ALLOC_[IPO]' '(' id ',' id ')' ';'
887 For purposes of gengtype, this just declares a wrapper structure. */
888 static void
889 def_vec_alloc (void)
891 const char *type, *astrat;
893 require (DEFVEC_ALLOC);
894 require ('(');
895 type = require2 (ID, SCALAR);
896 require (',');
897 astrat = require (ID);
898 require (')');
899 require (';');
901 if (!type || !astrat)
902 return;
904 note_def_vec_alloc (type, astrat, &lexer_line);
907 /* Parse the file FNAME for GC-relevant declarations and definitions.
908 This is the only entry point to this file. */
909 void
910 parse_file (const char *fname)
912 yybegin (fname);
913 for (;;)
915 switch (token ())
917 case EXTERN:
918 case STATIC:
919 extern_or_static ();
920 break;
922 case STRUCT:
923 case UNION:
924 struct_or_union ();
925 break;
927 case TYPEDEF:
928 typedef_decl ();
929 break;
931 case DEFVEC_OP:
932 case DEFVEC_I:
933 def_vec ();
934 break;
936 case DEFVEC_ALLOC:
937 def_vec_alloc ();
938 break;
940 case EOF_TOKEN:
941 goto eof;
943 default:
944 parse_error ("unexpected top level token, %s", print_cur_token ());
945 goto eof;
947 lexer_toplevel_done = 1;
950 eof:
951 advance ();
952 yyend ();