1 /* Process source files and output type information.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #ifdef HOST_GENERATOR_FILE
22 #define GENERATOR_FILE 1
29 /* This is a simple recursive-descent parser which understands a subset of
32 Rule functions are suffixed _seq if they scan a sequence of items;
33 _opt if they may consume zero tokens; _seqopt if both are true. The
34 "consume_" prefix indicates that a sequence of tokens is parsed for
35 syntactic correctness and then thrown away. */
37 /* Simple one-token lookahead mechanism. */
45 static struct token T
;
47 /* Retrieve the code of the current token; if there is no current token,
48 get the next one from the lexer. */
54 T
.code
= yylex (&T
.value
);
60 /* Retrieve the value of the current token (if any) and mark it consumed.
61 The next call to token() will get another token from the lexer. */
62 static inline const char *
71 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
72 /* Keep in sync with 'gengtype.h:enum gty_token'. */
73 static const char *const token_names
[] = {
89 "a character constant",
90 "an array declarator",
91 "a C++ keyword to ignore"
94 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
95 /* Keep in sync with 'gengtype.h:enum gty_token'. */
96 static const char *const token_value_format
[] = {
106 /* Produce a printable representation for a token defined by CODE and
107 VALUE. This sometimes returns pointers into malloc memory and
108 sometimes not, therefore it is unsafe to free the pointer it
109 returns, so that memory is leaked. This does not matter, as this
110 function is only used for diagnostics, and in a successful run of
111 the program there will be none. */
113 print_token (int code
, const char *value
)
115 if (code
< CHAR_TOKEN_OFFSET
)
116 return xasprintf ("'%c'", code
);
117 else if (code
< FIRST_TOKEN_WITH_VALUE
)
118 return xasprintf ("'%s'", token_names
[code
- CHAR_TOKEN_OFFSET
]);
120 return token_names
[code
- CHAR_TOKEN_OFFSET
]; /* don't quote these */
122 return xasprintf (token_value_format
[code
- FIRST_TOKEN_WITH_VALUE
],
126 /* Convenience wrapper around print_token which produces the printable
127 representation of the current token. */
128 static inline const char *
129 print_cur_token (void)
131 return print_token (T
.code
, T
.value
);
134 /* Report a parse error on the current line, with diagnostic MSG.
135 Behaves as standard printf with respect to additional arguments and
137 static void ATTRIBUTE_PRINTF_1
138 parse_error (const char *msg
, ...)
142 fprintf (stderr
, "%s:%d: parse error: ",
143 get_input_file_name (lexer_line
.file
), lexer_line
.line
);
146 vfprintf (stderr
, msg
, ap
);
149 fputc ('\n', stderr
);
154 /* If the next token does not have code T, report a parse error; otherwise
155 return the token's value. */
160 const char *v
= advance ();
163 parse_error ("expected %s, have %s",
164 print_token (t
, 0), print_token (u
, v
));
170 /* As per require, but do not advance. */
172 require_without_advance (int t
)
175 const char *v
= T
.value
;
178 parse_error ("expected %s, have %s",
179 print_token (t
, 0), print_token (u
, v
));
185 /* If the next token does not have one of the codes T1 or T2, report a
186 parse error; otherwise return the token's value. */
188 require2 (int t1
, int t2
)
191 const char *v
= advance ();
192 if (u
!= t1
&& u
!= t2
)
194 parse_error ("expected %s or %s, have %s",
195 print_token (t1
, 0), print_token (t2
, 0),
202 /* If the next token does not have one of the codes T1, T2, T3 or T4, report a
203 parse error; otherwise return the token's value. */
205 require4 (int t1
, int t2
, int t3
, int t4
)
208 const char *v
= advance ();
209 if (u
!= t1
&& u
!= t2
&& u
!= t3
&& u
!= t4
)
211 parse_error ("expected %s, %s, %s or %s, have %s",
212 print_token (t1
, 0), print_token (t2
, 0),
213 print_token (t3
, 0), print_token (t4
, 0),
220 /* Near-terminals. */
222 /* C-style string constant concatenation: STRING+
223 Bare STRING should appear nowhere else in this file. */
231 s1
= require (STRING
);
234 while (token () == STRING
)
240 buf
= XRESIZEVEC (char, CONST_CAST (char *, s1
), l1
+ l2
+ 1);
241 memcpy (buf
+ l1
, s2
, l2
+ 1);
242 XDELETE (CONST_CAST (char *, s2
));
249 /* The caller has detected a template declaration that starts
250 with TMPL_NAME. Parse up to the closing '>'. This recognizes
251 simple template declarations of the form ID<ID1,ID2,...,IDn>,
252 potentially with a single level of indirection e.g.
253 ID<ID1 *, ID2, ID3 *, ..., IDn>.
254 It does not try to parse anything more sophisticated than that.
256 Returns the template declaration string "ID<ID1,ID2,...,IDn>". */
259 require_template_declaration (const char *tmpl_name
)
262 int num_indirections
= 0;
264 /* Recognize the opening '<'. */
266 str
= concat (tmpl_name
, "<", (char *) 0);
268 /* Read the comma-separated list of identifiers. */
272 if (token () == ENUM
)
275 str
= concat (str
, "enum ", (char *) 0);
282 str
= concat (str
, advance (), (char *) 0);
288 str
= concat (str
, "<", (char *) 0);
295 str
= concat (str
, ">", (char *) 0);
299 const char *id
= require4 (SCALAR
, ID
, '*', ',');
305 if (num_indirections
++)
306 parse_error ("only one level of indirection is supported"
307 " in template arguments");
313 num_indirections
= 0;
314 str
= concat (str
, id
, (char *) 0);
320 /* typedef_name: either an ID, or a template type
321 specification of the form ID<t1,t2,...,tn>. */
326 const char *id
= require (ID
);
328 return require_template_declaration (id
);
333 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
335 consume_balanced (int opener
, int closer
)
345 consume_balanced ('(', ')');
348 consume_balanced ('[', ']');
351 consume_balanced ('{', '}');
357 if (token () != closer
)
358 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
364 parse_error ("unexpected end of file within %c%c-delimited construct",
370 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
371 expressions, until we encounter an end-of-statement marker (a ';' or
372 a '}') outside any such delimiters; absorb that too. */
375 consume_until_eos (void)
385 consume_balanced ('{', '}');
389 consume_balanced ('(', ')');
393 consume_balanced ('[', ']');
399 parse_error ("unmatched '%c' while scanning for ';'", token ());
403 parse_error ("unexpected end of file while scanning for ';'");
412 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
413 expressions, until we encounter a comma or semicolon outside any
414 such delimiters; absorb that too. Returns true if the loop ended
418 consume_until_comma_or_eos ()
432 consume_balanced ('{', '}');
436 consume_balanced ('(', ')');
440 consume_balanced ('[', ']');
446 parse_error ("unmatched '%s' while scanning for ',' or ';'",
451 parse_error ("unexpected end of file while scanning for ',' or ';'");
457 consume_balanced ('{', '}');
467 /* GTY(()) option handling. */
468 static type_p
type (options_p
*optsp
, bool nested
);
470 /* Optional parenthesized string: ('(' string_seq ')')? */
472 str_optvalue_opt (options_p prev
)
474 const char *name
= advance ();
475 const char *value
= "";
479 value
= string_seq ();
482 return create_string_option (prev
, name
, value
);
485 /* absdecl: type '*'*
486 -- a vague approximation to what the C standard calls an abstract
487 declarator. The only kinds that are actually used are those that
488 are just a bare type and those that have trailing pointer-stars.
489 Further kinds should be implemented if and when they become
490 necessary. Used only within GTY(()) option values, therefore
491 further GTY(()) tags within the type are invalid. Note that the
492 return value has already been run through adjust_field_type. */
499 ty
= type (&opts
, true);
500 while (token () == '*')
502 ty
= create_pointer (ty
);
507 parse_error ("nested GTY(()) options are invalid");
509 return adjust_field_type (ty
, 0);
512 /* Type-option: '(' absdecl ')' */
514 type_optvalue (options_p prev
, const char *name
)
520 return create_type_option (prev
, name
, ty
);
523 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
525 nestedptr_optvalue (options_p prev
)
528 const char *from
, *to
;
535 from
= string_seq ();
538 return create_nested_ptr_option (prev
, ty
, to
, from
);
541 /* One GTY(()) option:
543 | PTR_ALIAS type_optvalue
544 | NESTED_PTR nestedptr_optvalue
547 option (options_p prev
)
552 return str_optvalue_opt (prev
);
556 return type_optvalue (prev
, "ptr_alias");
560 return nestedptr_optvalue (prev
);
564 return create_string_option (prev
, "user", "");
567 parse_error ("expected an option keyword, have %s", print_cur_token ());
569 return create_string_option (prev
, "", "");
573 /* One comma-separated list of options. */
580 while (token () == ',')
588 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
592 options_p result
= 0;
597 result
= option_seq ();
603 /* Optional GTY marker. */
607 if (token () != GTY_TOKEN
)
614 /* Declarators. The logic here is largely lifted from c-parser.cc.
615 Note that we do not have to process abstract declarators, which can
616 appear only in parameter type lists or casts (but see absdecl,
617 above). Also, type qualifiers are thrown out in gengtype-lex.l so
618 we don't have to do it. */
620 /* array_and_function_declarators_opt:
622 array_and_function_declarators_opt ARRAY
623 array_and_function_declarators_opt '(' ... ')'
625 where '...' indicates stuff we ignore except insofar as grouping
626 symbols ()[]{} must balance.
628 Subroutine of direct_declarator - do not use elsewhere. */
631 array_and_function_declarators_opt (type_p ty
)
633 if (token () == ARRAY
)
635 const char *array
= advance ();
636 return create_array (array_and_function_declarators_opt (ty
), array
);
638 else if (token () == '(')
640 /* We don't need exact types for functions. */
641 consume_balanced ('(', ')');
642 array_and_function_declarators_opt (ty
);
643 return create_scalar_type ("function type");
649 static type_p
inner_declarator (type_p
, const char **, options_p
*, bool);
651 /* direct_declarator:
652 '(' inner_declarator ')'
653 '(' \epsilon ')' <-- C++ ctors/dtors
654 gtymarker_opt ID array_and_function_declarators_opt
656 Subroutine of declarator, mutually recursive with inner_declarator;
657 do not use elsewhere.
659 IN_STRUCT is true if we are called while parsing structures or classes. */
662 direct_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
665 /* The first token in a direct-declarator must be an ID, a
666 GTY marker, or an open parenthesis. */
670 *optsp
= gtymarker ();
674 *namep
= require (ID
);
675 /* If the next token is '(', we are parsing a function declaration.
676 Functions are ignored by gengtype, so we return NULL. */
682 /* If the declarator starts with a '(', we have three options. We
683 are either parsing 'TYPE (*ID)' (i.e., a function pointer)
686 The latter will be a constructor iff we are inside a
687 structure or class. Otherwise, it could be a typedef, but
688 since we explicitly reject typedefs inside structures, we can
689 assume that we found a ctor and return NULL. */
691 if (in_struct
&& token () != '*')
693 /* Found a constructor. Find and consume the closing ')'. */
694 while (token () != ')')
697 /* Tell the caller to ignore this. */
700 ty
= inner_declarator (ty
, namep
, optsp
, in_struct
);
704 case IGNORABLE_CXX_KEYWORD
:
705 /* Any C++ keyword like 'operator' means that we are not looking
706 at a regular data declarator. */
710 parse_error ("expected '(', ')', 'GTY', or an identifier, have %s",
712 /* Do _not_ advance if what we have is a close squiggle brace, as
713 we will get much better error recovery that way. */
718 return array_and_function_declarators_opt (ty
);
721 /* The difference between inner_declarator and declarator is in the
722 handling of stars. Consider this declaration:
726 It declares a pointer to a function that takes no arguments and
727 returns a char*. To construct the correct type for this
728 declaration, the star outside the parentheses must be processed
729 _before_ the function type, the star inside the parentheses must
730 be processed _after_ the function type. To accomplish this,
731 declarator() creates pointers before recursing (it is actually
732 coded as a while loop), whereas inner_declarator() recurses before
733 creating pointers. */
739 Mutually recursive subroutine of direct_declarator; do not use
742 IN_STRUCT is true if we are called while parsing structures or classes. */
745 inner_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
752 inner
= inner_declarator (ty
, namep
, optsp
, in_struct
);
756 return create_pointer (ty
);
759 return direct_declarator (ty
, namep
, optsp
, in_struct
);
762 /* declarator: '*'+ direct_declarator
764 This is the sole public interface to this part of the grammar.
765 Arguments are the type known so far, a pointer to where the name
766 may be stored, and a pointer to where GTY options may be stored.
768 IN_STRUCT is true when we are called to parse declarators inside
769 a structure or class.
771 Returns the final type. */
774 declarator (type_p ty
, const char **namep
, options_p
*optsp
,
775 bool in_struct
= false)
779 while (token () == '*')
782 ty
= create_pointer (ty
);
784 return direct_declarator (ty
, namep
, optsp
, in_struct
);
787 /* Types and declarations. */
789 /* Structure field(s) declaration:
792 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
795 Knows that such declarations must end with a close brace (or,
796 erroneously, at EOF).
799 struct_field_seq (void)
803 options_p opts
, dopts
;
807 while (token () != '}' && token () != EOF_TOKEN
)
809 ty
= type (&opts
, true);
811 /* Ignore access-control keywords ("public:" etc). */
812 while (!ty
&& token () == IGNORABLE_CXX_KEYWORD
)
814 const char *keyword
= advance ();
815 if (strcmp (keyword
, "public:") != 0
816 && strcmp (keyword
, "private:") != 0
817 && strcmp (keyword
, "protected:") != 0)
819 ty
= type (&opts
, true);
822 if (!ty
|| token () == ':')
824 consume_until_eos ();
830 dty
= declarator (ty
, &name
, &dopts
, true);
832 /* There could be any number of weird things after the declarator,
833 notably bitfield declarations and __attribute__s. If this
834 function returns true, the last thing was a comma, so we have
835 more than one declarator paired with the current type. */
836 another
= consume_until_comma_or_eos ();
842 parse_error ("two GTY(()) options for field %s", name
);
846 f
= create_field_at (f
, dty
, name
, dopts
, &lexer_line
);
850 return nreverse_pairs (f
);
853 /* Return true if OPTS contain the option named STR. */
856 opts_have (options_p opts
, const char *str
)
858 for (options_p opt
= opts
; opt
; opt
= opt
->next
)
859 if (strcmp (opt
->name
, str
) == 0)
865 /* This is called type(), but what it parses (sort of) is what C calls
866 declaration-specifiers and specifier-qualifier-list:
870 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
871 | ENUM ID ( '{' ... '}' )?
873 Returns a partial type; under some conditions (notably
874 "struct foo GTY((...)) thing;") it may write an options
877 NESTED is true when parsing a declaration already known to have a
878 GTY marker. In these cases, typedef and enum declarations are not
879 allowed because gengtype only understands types at the global
883 type (options_p
*optsp
, bool nested
)
891 return create_scalar_type (s
);
895 return resolve_typedef (s
, &lexer_line
);
897 case IGNORABLE_CXX_KEYWORD
:
898 /* By returning NULL here, we indicate to the caller that they
899 should ignore everything following this keyword up to the
906 type_p base_class
= NULL
;
908 /* GTY annotations follow attribute syntax
909 GTY_BEFORE_ID is for union/struct declarations
910 GTY_AFTER_ID is for variable declarations. */
917 enum typekind kind
= (token () == UNION
) ? TYPE_UNION
: TYPE_STRUCT
;
920 /* Top-level structures that are not explicitly tagged GTY(())
921 are treated as mere forward declarations. This is because
922 there are a lot of structures that we don't need to know
923 about, and some of those have C++ and macro constructs that
925 if (nested
|| token () == GTY_TOKEN
)
927 is_gty
= GTY_BEFORE_ID
;
928 opts
= gtymarker_opt ();
934 s
= xasprintf ("anonymous:%s:%d",
935 get_input_file_name (lexer_line
.file
),
938 /* Unfortunately above GTY_TOKEN check does not capture the
939 typedef struct_type GTY case. */
940 if (token () == GTY_TOKEN
)
942 is_gty
= GTY_AFTER_ID
;
943 opts
= gtymarker_opt ();
946 bool is_user_gty
= opts_have (opts
, "user");
950 if (is_gty
&& !is_user_gty
)
952 /* For GTY-marked types that are not "user", parse some C++
953 inheritance specifications.
954 We require single-inheritance from a non-template type. */
956 const char *basename
= require (ID
);
957 /* This may be either an access specifier, or the base name. */
958 if (strcmp (basename
, "public") == 0
959 || strcmp (basename
, "protected") == 0
960 || strcmp (basename
, "private") == 0)
961 basename
= require (ID
);
962 base_class
= find_structure (basename
, TYPE_STRUCT
);
964 parse_error ("unrecognized base class: %s", basename
);
965 require_without_advance ('{');
969 /* For types lacking GTY-markings, skip over C++ inheritance
970 specification (and thus avoid having to parse e.g. template
972 while (token () != '{')
983 if (is_gty
== GTY_AFTER_ID
)
984 parse_error ("GTY must be specified before identifier");
989 fields
= struct_field_seq ();
994 /* Do not look inside user defined structures. */
996 kind
= TYPE_USER_STRUCT
;
997 consume_balanced ('{', '}');
998 return create_user_defined_type (s
, &lexer_line
);
1001 return new_structure (s
, kind
, &lexer_line
, fields
, opts
,
1005 else if (token () == '{')
1006 consume_balanced ('{', '}');
1009 return find_structure (s
, kind
);
1013 /* In C++, a typedef inside a struct/class/union defines a new
1014 type for that inner scope. We cannot support this in
1015 gengtype because we have no concept of scoping.
1017 We handle typedefs in the global scope separately (see
1018 parse_file), so if we find a 'typedef', we must be inside
1020 gcc_assert (nested
);
1021 parse_error ("typedefs not supported in structures marked with "
1022 "automatic GTY markers. Use GTY((user)) to mark "
1032 s
= xasprintf ("anonymous:%s:%d",
1033 get_input_file_name (lexer_line
.file
),
1036 if (token () == '{')
1037 consume_balanced ('{', '}');
1039 /* If after parsing the enum we are at the end of the statement,
1040 and we are currently inside a structure, then this was an
1041 enum declaration inside this scope.
1043 We cannot support this for the same reason we cannot support
1044 'typedef' inside structures (see the TYPEDEF handler above).
1045 If this happens, emit an error and return NULL. */
1046 if (nested
&& token () == ';')
1048 parse_error ("enum definitions not supported in structures marked "
1049 "with automatic GTY markers. Use GTY((user)) to mark "
1055 return create_scalar_type (s
);
1058 parse_error ("expected a type specifier, have %s", print_cur_token ());
1060 return create_scalar_type ("erroneous type");
1064 /* Top level constructs. */
1066 /* Dispatch declarations beginning with 'typedef'. */
1076 gcc_assert (token () == TYPEDEF
);
1079 ty
= type (&opts
, false);
1083 parse_error ("GTY((...)) cannot be applied to a typedef");
1086 dty
= declarator (ty
, &name
, &opts
);
1088 parse_error ("GTY((...)) cannot be applied to a typedef");
1090 /* Yet another place where we could have junk (notably attributes)
1091 after the declarator. */
1092 another
= consume_until_comma_or_eos ();
1094 do_typedef (name
, dty
, &lexer_line
);
1099 /* Structure definition: type() does all the work. */
1102 struct_or_union (void)
1105 type (&dummy
, false);
1106 /* There may be junk after the type: notably, we cannot currently
1107 distinguish 'struct foo *function(prototype);' from 'struct foo;'
1108 ... we could call declarator(), but it's a waste of time at
1109 present. Instead, just eat whatever token is currently lookahead
1110 and go back to lexical skipping mode. */
1114 /* GC root declaration:
1115 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
1116 If the gtymarker is not present, we ignore the rest of the declaration. */
1118 extern_or_static (void)
1120 options_p opts
, opts2
, dopts
;
1123 require2 (EXTERN
, STATIC
);
1125 if (token () != GTY_TOKEN
)
1131 opts
= gtymarker ();
1132 ty
= type (&opts2
, true); /* if we get here, it's got a GTY(()) */
1133 dty
= declarator (ty
, &name
, &dopts
);
1135 if ((opts
&& dopts
) || (opts
&& opts2
) || (opts2
&& dopts
))
1136 parse_error ("GTY((...)) specified more than once for %s", name
);
1144 note_variable (name
, adjust_field_type (dty
, opts
), opts
, &lexer_line
);
1145 require2 (';', '=');
1149 /* Parse the file FNAME for GC-relevant declarations and definitions.
1150 This is the only entry point to this file. */
1152 parse_file (const char *fname
)
1161 extern_or_static ();
1177 parse_error ("unexpected top level token, %s", print_cur_token ());
1180 lexer_toplevel_done
= 1;