1 /* Process source files and output type information.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #ifdef HOST_GENERATOR_FILE
22 #define GENERATOR_FILE 1
29 /* This is a simple recursive-descent parser which understands a subset of
32 Rule functions are suffixed _seq if they scan a sequence of items;
33 _opt if they may consume zero tokens; _seqopt if both are true. The
34 "consume_" prefix indicates that a sequence of tokens is parsed for
35 syntactic correctness and then thrown away. */
37 /* Simple one-token lookahead mechanism. */
45 static struct token T
;
47 /* Retrieve the code of the current token; if there is no current token,
48 get the next one from the lexer. */
54 T
.code
= yylex (&T
.value
);
60 /* Retrieve the value of the current token (if any) and mark it consumed.
61 The next call to token() will get another token from the lexer. */
62 static inline const char *
71 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
72 static const char *const token_names
[] = {
83 "a param<N>_is option",
88 "a character constant",
89 "an array declarator",
90 "a C++ keyword to ignore"
93 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
94 static const char *const token_value_format
[] = {
105 /* Produce a printable representation for a token defined by CODE and
106 VALUE. This sometimes returns pointers into malloc memory and
107 sometimes not, therefore it is unsafe to free the pointer it
108 returns, so that memory is leaked. This does not matter, as this
109 function is only used for diagnostics, and in a successful run of
110 the program there will be none. */
112 print_token (int code
, const char *value
)
114 if (code
< CHAR_TOKEN_OFFSET
)
115 return xasprintf ("'%c'", code
);
116 else if (code
< FIRST_TOKEN_WITH_VALUE
)
117 return xasprintf ("'%s'", token_names
[code
- CHAR_TOKEN_OFFSET
]);
119 return token_names
[code
- CHAR_TOKEN_OFFSET
]; /* don't quote these */
121 return xasprintf (token_value_format
[code
- FIRST_TOKEN_WITH_VALUE
],
125 /* Convenience wrapper around print_token which produces the printable
126 representation of the current token. */
127 static inline const char *
128 print_cur_token (void)
130 return print_token (T
.code
, T
.value
);
133 /* Report a parse error on the current line, with diagnostic MSG.
134 Behaves as standard printf with respect to additional arguments and
136 static void ATTRIBUTE_PRINTF_1
137 parse_error (const char *msg
, ...)
141 fprintf (stderr
, "%s:%d: parse error: ",
142 get_input_file_name (lexer_line
.file
), lexer_line
.line
);
145 vfprintf (stderr
, msg
, ap
);
148 fputc ('\n', stderr
);
153 /* If the next token does not have code T, report a parse error; otherwise
154 return the token's value. */
159 const char *v
= advance ();
162 parse_error ("expected %s, have %s",
163 print_token (t
, 0), print_token (u
, v
));
169 /* As per require, but do not advance. */
171 require_without_advance (int t
)
174 const char *v
= T
.value
;
177 parse_error ("expected %s, have %s",
178 print_token (t
, 0), print_token (u
, v
));
184 /* If the next token does not have one of the codes T1 or T2, report a
185 parse error; otherwise return the token's value. */
187 require2 (int t1
, int t2
)
190 const char *v
= advance ();
191 if (u
!= t1
&& u
!= t2
)
193 parse_error ("expected %s or %s, have %s",
194 print_token (t1
, 0), print_token (t2
, 0),
201 /* If the next token does not have one of the codes T1, T2, T3 or T4, report a
202 parse error; otherwise return the token's value. */
204 require4 (int t1
, int t2
, int t3
, int t4
)
207 const char *v
= advance ();
208 if (u
!= t1
&& u
!= t2
&& u
!= t3
&& u
!= t4
)
210 parse_error ("expected %s, %s, %s or %s, have %s",
211 print_token (t1
, 0), print_token (t2
, 0),
212 print_token (t3
, 0), print_token (t4
, 0),
219 /* Near-terminals. */
221 /* C-style string constant concatenation: STRING+
222 Bare STRING should appear nowhere else in this file. */
230 s1
= require (STRING
);
233 while (token () == STRING
)
239 buf
= XRESIZEVEC (char, CONST_CAST (char *, s1
), l1
+ l2
+ 1);
240 memcpy (buf
+ l1
, s2
, l2
+ 1);
241 XDELETE (CONST_CAST (char *, s2
));
248 /* The caller has detected a template declaration that starts
249 with TMPL_NAME. Parse up to the closing '>'. This recognizes
250 simple template declarations of the form ID<ID1,ID2,...,IDn>,
251 potentially with a single level of indirection e.g.
252 ID<ID1 *, ID2, ID3 *, ..., IDn>.
253 It does not try to parse anything more sophisticated than that.
255 Returns the template declaration string "ID<ID1,ID2,...,IDn>". */
258 require_template_declaration (const char *tmpl_name
)
261 int num_indirections
= 0;
263 /* Recognize the opening '<'. */
265 str
= concat (tmpl_name
, "<", (char *) 0);
267 /* Read the comma-separated list of identifiers. */
271 if (token () == ENUM
)
274 str
= concat (str
, "enum ", (char *) 0);
281 str
= concat (str
, advance (), (char *) 0);
287 str
= concat (str
, "<", (char *) 0);
294 str
= concat (str
, ">", (char *) 0);
298 const char *id
= require4 (SCALAR
, ID
, '*', ',');
304 if (num_indirections
++)
305 parse_error ("only one level of indirection is supported"
306 " in template arguments");
312 num_indirections
= 0;
313 str
= concat (str
, id
, (char *) 0);
319 /* typedef_name: either an ID, or a template type
320 specification of the form ID<t1,t2,...,tn>. */
325 const char *id
= require (ID
);
327 return require_template_declaration (id
);
332 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
334 consume_balanced (int opener
, int closer
)
344 consume_balanced ('(', ')');
347 consume_balanced ('[', ']');
350 consume_balanced ('{', '}');
356 if (token () != closer
)
357 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
363 parse_error ("unexpected end of file within %c%c-delimited construct",
369 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
370 expressions, until we encounter an end-of-statement marker (a ';' or
371 a '}') outside any such delimiters; absorb that too. */
374 consume_until_eos (void)
384 consume_balanced ('{', '}');
388 consume_balanced ('(', ')');
392 consume_balanced ('[', ']');
398 parse_error ("unmatched '%c' while scanning for ';'", token ());
402 parse_error ("unexpected end of file while scanning for ';'");
411 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
412 expressions, until we encounter a comma or semicolon outside any
413 such delimiters; absorb that too. Returns true if the loop ended
417 consume_until_comma_or_eos ()
431 consume_balanced ('{', '}');
435 consume_balanced ('(', ')');
439 consume_balanced ('[', ']');
445 parse_error ("unmatched '%s' while scanning for ',' or ';'",
450 parse_error ("unexpected end of file while scanning for ',' or ';'");
460 /* GTY(()) option handling. */
461 static type_p
type (options_p
*optsp
, bool nested
);
463 /* Optional parenthesized string: ('(' string_seq ')')? */
465 str_optvalue_opt (options_p prev
)
467 const char *name
= advance ();
468 const char *value
= "";
472 value
= string_seq ();
475 return create_string_option (prev
, name
, value
);
478 /* absdecl: type '*'*
479 -- a vague approximation to what the C standard calls an abstract
480 declarator. The only kinds that are actually used are those that
481 are just a bare type and those that have trailing pointer-stars.
482 Further kinds should be implemented if and when they become
483 necessary. Used only within GTY(()) option values, therefore
484 further GTY(()) tags within the type are invalid. Note that the
485 return value has already been run through adjust_field_type. */
492 ty
= type (&opts
, true);
493 while (token () == '*')
495 ty
= create_pointer (ty
);
500 parse_error ("nested GTY(()) options are invalid");
502 return adjust_field_type (ty
, 0);
505 /* Type-option: '(' absdecl ')' */
507 type_optvalue (options_p prev
, const char *name
)
513 return create_type_option (prev
, name
, ty
);
516 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
518 nestedptr_optvalue (options_p prev
)
521 const char *from
, *to
;
528 from
= string_seq ();
531 return create_nested_ptr_option (prev
, ty
, to
, from
);
534 /* One GTY(()) option:
536 | PTR_ALIAS type_optvalue
537 | NESTED_PTR nestedptr_optvalue
540 option (options_p prev
)
545 return str_optvalue_opt (prev
);
549 return type_optvalue (prev
, "ptr_alias");
553 return nestedptr_optvalue (prev
);
557 return create_string_option (prev
, "user", "");
560 parse_error ("expected an option keyword, have %s", print_cur_token ());
562 return create_string_option (prev
, "", "");
566 /* One comma-separated list of options. */
573 while (token () == ',')
581 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
585 options_p result
= 0;
590 result
= option_seq ();
596 /* Optional GTY marker. */
600 if (token () != GTY_TOKEN
)
607 /* Declarators. The logic here is largely lifted from c-parser.cc.
608 Note that we do not have to process abstract declarators, which can
609 appear only in parameter type lists or casts (but see absdecl,
610 above). Also, type qualifiers are thrown out in gengtype-lex.l so
611 we don't have to do it. */
613 /* array_and_function_declarators_opt:
615 array_and_function_declarators_opt ARRAY
616 array_and_function_declarators_opt '(' ... ')'
618 where '...' indicates stuff we ignore except insofar as grouping
619 symbols ()[]{} must balance.
621 Subroutine of direct_declarator - do not use elsewhere. */
624 array_and_function_declarators_opt (type_p ty
)
626 if (token () == ARRAY
)
628 const char *array
= advance ();
629 return create_array (array_and_function_declarators_opt (ty
), array
);
631 else if (token () == '(')
633 /* We don't need exact types for functions. */
634 consume_balanced ('(', ')');
635 array_and_function_declarators_opt (ty
);
636 return create_scalar_type ("function type");
642 static type_p
inner_declarator (type_p
, const char **, options_p
*, bool);
644 /* direct_declarator:
645 '(' inner_declarator ')'
646 '(' \epsilon ')' <-- C++ ctors/dtors
647 gtymarker_opt ID array_and_function_declarators_opt
649 Subroutine of declarator, mutually recursive with inner_declarator;
650 do not use elsewhere.
652 IN_STRUCT is true if we are called while parsing structures or classes. */
655 direct_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
658 /* The first token in a direct-declarator must be an ID, a
659 GTY marker, or an open parenthesis. */
663 *optsp
= gtymarker ();
667 *namep
= require (ID
);
668 /* If the next token is '(', we are parsing a function declaration.
669 Functions are ignored by gengtype, so we return NULL. */
675 /* If the declarator starts with a '(', we have three options. We
676 are either parsing 'TYPE (*ID)' (i.e., a function pointer)
679 The latter will be a constructor iff we are inside a
680 structure or class. Otherwise, it could be a typedef, but
681 since we explicitly reject typedefs inside structures, we can
682 assume that we found a ctor and return NULL. */
684 if (in_struct
&& token () != '*')
686 /* Found a constructor. Find and consume the closing ')'. */
687 while (token () != ')')
690 /* Tell the caller to ignore this. */
693 ty
= inner_declarator (ty
, namep
, optsp
, in_struct
);
697 case IGNORABLE_CXX_KEYWORD
:
698 /* Any C++ keyword like 'operator' means that we are not looking
699 at a regular data declarator. */
703 parse_error ("expected '(', ')', 'GTY', or an identifier, have %s",
705 /* Do _not_ advance if what we have is a close squiggle brace, as
706 we will get much better error recovery that way. */
711 return array_and_function_declarators_opt (ty
);
714 /* The difference between inner_declarator and declarator is in the
715 handling of stars. Consider this declaration:
719 It declares a pointer to a function that takes no arguments and
720 returns a char*. To construct the correct type for this
721 declaration, the star outside the parentheses must be processed
722 _before_ the function type, the star inside the parentheses must
723 be processed _after_ the function type. To accomplish this,
724 declarator() creates pointers before recursing (it is actually
725 coded as a while loop), whereas inner_declarator() recurses before
726 creating pointers. */
732 Mutually recursive subroutine of direct_declarator; do not use
735 IN_STRUCT is true if we are called while parsing structures or classes. */
738 inner_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
745 inner
= inner_declarator (ty
, namep
, optsp
, in_struct
);
749 return create_pointer (ty
);
752 return direct_declarator (ty
, namep
, optsp
, in_struct
);
755 /* declarator: '*'+ direct_declarator
757 This is the sole public interface to this part of the grammar.
758 Arguments are the type known so far, a pointer to where the name
759 may be stored, and a pointer to where GTY options may be stored.
761 IN_STRUCT is true when we are called to parse declarators inside
762 a structure or class.
764 Returns the final type. */
767 declarator (type_p ty
, const char **namep
, options_p
*optsp
,
768 bool in_struct
= false)
772 while (token () == '*')
775 ty
= create_pointer (ty
);
777 return direct_declarator (ty
, namep
, optsp
, in_struct
);
780 /* Types and declarations. */
782 /* Structure field(s) declaration:
785 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
788 Knows that such declarations must end with a close brace (or,
789 erroneously, at EOF).
792 struct_field_seq (void)
796 options_p opts
, dopts
;
800 while (token () != '}' && token () != EOF_TOKEN
)
802 ty
= type (&opts
, true);
804 /* Ignore access-control keywords ("public:" etc). */
805 while (!ty
&& token () == IGNORABLE_CXX_KEYWORD
)
807 const char *keyword
= advance ();
808 if (strcmp (keyword
, "public:") != 0
809 && strcmp (keyword
, "private:") != 0
810 && strcmp (keyword
, "protected:") != 0)
812 ty
= type (&opts
, true);
815 if (!ty
|| token () == ':')
817 consume_until_eos ();
823 dty
= declarator (ty
, &name
, &dopts
, true);
825 /* There could be any number of weird things after the declarator,
826 notably bitfield declarations and __attribute__s. If this
827 function returns true, the last thing was a comma, so we have
828 more than one declarator paired with the current type. */
829 another
= consume_until_comma_or_eos ();
835 parse_error ("two GTY(()) options for field %s", name
);
839 f
= create_field_at (f
, dty
, name
, dopts
, &lexer_line
);
843 return nreverse_pairs (f
);
846 /* Return true if OPTS contain the option named STR. */
849 opts_have (options_p opts
, const char *str
)
851 for (options_p opt
= opts
; opt
; opt
= opt
->next
)
852 if (strcmp (opt
->name
, str
) == 0)
858 /* This is called type(), but what it parses (sort of) is what C calls
859 declaration-specifiers and specifier-qualifier-list:
863 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
864 | ENUM ID ( '{' ... '}' )?
866 Returns a partial type; under some conditions (notably
867 "struct foo GTY((...)) thing;") it may write an options
870 NESTED is true when parsing a declaration already known to have a
871 GTY marker. In these cases, typedef and enum declarations are not
872 allowed because gengtype only understands types at the global
876 type (options_p
*optsp
, bool nested
)
884 return create_scalar_type (s
);
888 return resolve_typedef (s
, &lexer_line
);
890 case IGNORABLE_CXX_KEYWORD
:
891 /* By returning NULL here, we indicate to the caller that they
892 should ignore everything following this keyword up to the
899 type_p base_class
= NULL
;
901 /* GTY annotations follow attribute syntax
902 GTY_BEFORE_ID is for union/struct declarations
903 GTY_AFTER_ID is for variable declarations. */
910 enum typekind kind
= (token () == UNION
) ? TYPE_UNION
: TYPE_STRUCT
;
913 /* Top-level structures that are not explicitly tagged GTY(())
914 are treated as mere forward declarations. This is because
915 there are a lot of structures that we don't need to know
916 about, and some of those have C++ and macro constructs that
918 if (nested
|| token () == GTY_TOKEN
)
920 is_gty
= GTY_BEFORE_ID
;
921 opts
= gtymarker_opt ();
927 s
= xasprintf ("anonymous:%s:%d",
928 get_input_file_name (lexer_line
.file
),
931 /* Unfortunately above GTY_TOKEN check does not capture the
932 typedef struct_type GTY case. */
933 if (token () == GTY_TOKEN
)
935 is_gty
= GTY_AFTER_ID
;
936 opts
= gtymarker_opt ();
939 bool is_user_gty
= opts_have (opts
, "user");
943 if (is_gty
&& !is_user_gty
)
945 /* For GTY-marked types that are not "user", parse some C++
946 inheritance specifications.
947 We require single-inheritance from a non-template type. */
949 const char *basename
= require (ID
);
950 /* This may be either an access specifier, or the base name. */
951 if (strcmp (basename
, "public") == 0
952 || strcmp (basename
, "protected") == 0
953 || strcmp (basename
, "private") == 0)
954 basename
= require (ID
);
955 base_class
= find_structure (basename
, TYPE_STRUCT
);
957 parse_error ("unrecognized base class: %s", basename
);
958 require_without_advance ('{');
962 /* For types lacking GTY-markings, skip over C++ inheritance
963 specification (and thus avoid having to parse e.g. template
965 while (token () != '{')
976 if (is_gty
== GTY_AFTER_ID
)
977 parse_error ("GTY must be specified before identifier");
982 fields
= struct_field_seq ();
987 /* Do not look inside user defined structures. */
989 kind
= TYPE_USER_STRUCT
;
990 consume_balanced ('{', '}');
991 return create_user_defined_type (s
, &lexer_line
);
994 return new_structure (s
, kind
, &lexer_line
, fields
, opts
,
998 else if (token () == '{')
999 consume_balanced ('{', '}');
1002 return find_structure (s
, kind
);
1006 /* In C++, a typedef inside a struct/class/union defines a new
1007 type for that inner scope. We cannot support this in
1008 gengtype because we have no concept of scoping.
1010 We handle typedefs in the global scope separately (see
1011 parse_file), so if we find a 'typedef', we must be inside
1013 gcc_assert (nested
);
1014 parse_error ("typedefs not supported in structures marked with "
1015 "automatic GTY markers. Use GTY((user)) to mark "
1025 s
= xasprintf ("anonymous:%s:%d",
1026 get_input_file_name (lexer_line
.file
),
1029 if (token () == '{')
1030 consume_balanced ('{', '}');
1032 /* If after parsing the enum we are at the end of the statement,
1033 and we are currently inside a structure, then this was an
1034 enum declaration inside this scope.
1036 We cannot support this for the same reason we cannot support
1037 'typedef' inside structures (see the TYPEDEF handler above).
1038 If this happens, emit an error and return NULL. */
1039 if (nested
&& token () == ';')
1041 parse_error ("enum definitions not supported in structures marked "
1042 "with automatic GTY markers. Use GTY((user)) to mark "
1048 return create_scalar_type (s
);
1051 parse_error ("expected a type specifier, have %s", print_cur_token ());
1053 return create_scalar_type ("erroneous type");
1057 /* Top level constructs. */
1059 /* Dispatch declarations beginning with 'typedef'. */
1069 gcc_assert (token () == TYPEDEF
);
1072 ty
= type (&opts
, false);
1076 parse_error ("GTY((...)) cannot be applied to a typedef");
1079 dty
= declarator (ty
, &name
, &opts
);
1081 parse_error ("GTY((...)) cannot be applied to a typedef");
1083 /* Yet another place where we could have junk (notably attributes)
1084 after the declarator. */
1085 another
= consume_until_comma_or_eos ();
1087 do_typedef (name
, dty
, &lexer_line
);
1092 /* Structure definition: type() does all the work. */
1095 struct_or_union (void)
1098 type (&dummy
, false);
1099 /* There may be junk after the type: notably, we cannot currently
1100 distinguish 'struct foo *function(prototype);' from 'struct foo;'
1101 ... we could call declarator(), but it's a waste of time at
1102 present. Instead, just eat whatever token is currently lookahead
1103 and go back to lexical skipping mode. */
1107 /* GC root declaration:
1108 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
1109 If the gtymarker is not present, we ignore the rest of the declaration. */
1111 extern_or_static (void)
1113 options_p opts
, opts2
, dopts
;
1116 require2 (EXTERN
, STATIC
);
1118 if (token () != GTY_TOKEN
)
1124 opts
= gtymarker ();
1125 ty
= type (&opts2
, true); /* if we get here, it's got a GTY(()) */
1126 dty
= declarator (ty
, &name
, &dopts
);
1128 if ((opts
&& dopts
) || (opts
&& opts2
) || (opts2
&& dopts
))
1129 parse_error ("GTY((...)) specified more than once for %s", name
);
1137 note_variable (name
, adjust_field_type (dty
, opts
), opts
, &lexer_line
);
1138 require2 (';', '=');
1142 /* Parse the file FNAME for GC-relevant declarations and definitions.
1143 This is the only entry point to this file. */
1145 parse_file (const char *fname
)
1154 extern_or_static ();
1170 parse_error ("unexpected top level token, %s", print_cur_token ());
1173 lexer_toplevel_done
= 1;