1 /* Process source files and output type information.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
28 /* This is a simple recursive-descent parser which understands a subset of
31 Rule functions are suffixed _seq if they scan a sequence of items;
32 _opt if they may consume zero tokens; _seqopt if both are true. The
33 "consume_" prefix indicates that a sequence of tokens is parsed for
34 syntactic correctness and then thrown away. */
36 /* Simple one-token lookahead mechanism. */
44 static struct token T
;
46 /* Retrieve the code of the current token; if there is no current token,
47 get the next one from the lexer. */
53 T
.code
= yylex (&T
.value
);
59 /* Retrieve the value of the current token (if any) and mark it consumed.
60 The next call to token() will get another token from the lexer. */
61 static inline const char *
70 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
71 static const char *const token_names
[] = {
82 "a param<N>_is option",
87 "a character constant",
88 "an array declarator",
89 "a C++ keyword to ignore"
92 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
93 static const char *const token_value_format
[] = {
104 /* Produce a printable representation for a token defined by CODE and
105 VALUE. This sometimes returns pointers into malloc memory and
106 sometimes not, therefore it is unsafe to free the pointer it
107 returns, so that memory is leaked. This does not matter, as this
108 function is only used for diagnostics, and in a successful run of
109 the program there will be none. */
111 print_token (int code
, const char *value
)
113 if (code
< CHAR_TOKEN_OFFSET
)
114 return xasprintf ("'%c'", code
);
115 else if (code
< FIRST_TOKEN_WITH_VALUE
)
116 return xasprintf ("'%s'", token_names
[code
- CHAR_TOKEN_OFFSET
]);
118 return token_names
[code
- CHAR_TOKEN_OFFSET
]; /* don't quote these */
120 return xasprintf (token_value_format
[code
- FIRST_TOKEN_WITH_VALUE
],
124 /* Convenience wrapper around print_token which produces the printable
125 representation of the current token. */
126 static inline const char *
127 print_cur_token (void)
129 return print_token (T
.code
, T
.value
);
132 /* Report a parse error on the current line, with diagnostic MSG.
133 Behaves as standard printf with respect to additional arguments and
135 static void ATTRIBUTE_PRINTF_1
136 parse_error (const char *msg
, ...)
140 fprintf (stderr
, "%s:%d: parse error: ",
141 get_input_file_name (lexer_line
.file
), lexer_line
.line
);
144 vfprintf (stderr
, msg
, ap
);
147 fputc ('\n', stderr
);
152 /* If the next token does not have code T, report a parse error; otherwise
153 return the token's value. */
158 const char *v
= advance ();
161 parse_error ("expected %s, have %s",
162 print_token (t
, 0), print_token (u
, v
));
168 /* If the next token does not have one of the codes T1 or T2, report a
169 parse error; otherwise return the token's value. */
171 require2 (int t1
, int t2
)
174 const char *v
= advance ();
175 if (u
!= t1
&& u
!= t2
)
177 parse_error ("expected %s or %s, have %s",
178 print_token (t1
, 0), print_token (t2
, 0),
185 /* If the next token does not have one of the codes T1, T2 or T3, report a
186 parse error; otherwise return the token's value. */
188 require3 (int t1
, int t2
, int t3
)
191 const char *v
= advance ();
192 if (u
!= t1
&& u
!= t2
&& u
!= t3
)
194 parse_error ("expected %s, %s or %s, have %s",
195 print_token (t1
, 0), print_token (t2
, 0),
196 print_token (t3
, 0), print_token (u
, v
));
202 /* Near-terminals. */
204 /* C-style string constant concatenation: STRING+
205 Bare STRING should appear nowhere else in this file. */
213 s1
= require (STRING
);
216 while (token () == STRING
)
222 buf
= XRESIZEVEC (char, CONST_CAST (char *, s1
), l1
+ l2
+ 1);
223 memcpy (buf
+ l1
, s2
, l2
+ 1);
224 XDELETE (CONST_CAST (char *, s2
));
231 /* The caller has detected a template declaration that starts
232 with TMPL_NAME. Parse up to the closing '>'. This recognizes
233 simple template declarations of the form ID<ID1,ID2,...,IDn>.
234 It does not try to parse anything more sophisticated than that.
236 Returns the template declaration string "ID<ID1,ID2,...,IDn>". */
239 require_template_declaration (const char *tmpl_name
)
243 /* Recognize the opening '<'. */
245 str
= concat (tmpl_name
, "<", (char *) 0);
247 /* Read the comma-separated list of identifiers. */
251 if (token () == ENUM
)
254 str
= concat (str
, "enum ", (char *) 0);
259 str
= concat (str
, advance (), (char *) 0);
265 str
= concat (str
, ":", (char *) 0);
271 str
= concat (str
, "<", (char *) 0);
278 str
= concat (str
, ">", (char *) 0);
282 const char *id
= require3 (SCALAR
, ID
, ',');
285 str
= concat (str
, id
, (char *) 0);
291 /* typedef_name: either an ID, or a template type
292 specification of the form ID<t1,t2,...,tn>. */
297 const char *id
= require (ID
);
299 return require_template_declaration (id
);
304 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
306 consume_balanced (int opener
, int closer
)
316 consume_balanced ('(', ')');
319 consume_balanced ('[', ']');
322 consume_balanced ('{', '}');
328 if (token () != closer
)
329 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
335 parse_error ("unexpected end of file within %c%c-delimited construct",
341 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
342 expressions, until we encounter an end-of-statement marker (a ';' or
343 a '}') outside any such delimiters; absorb that too. */
346 consume_until_eos (void)
356 consume_balanced ('{', '}');
360 consume_balanced ('(', ')');
364 consume_balanced ('[', ']');
370 parse_error ("unmatched '%c' while scanning for ';'", token ());
374 parse_error ("unexpected end of file while scanning for ';'");
383 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
384 expressions, until we encounter a comma or semicolon outside any
385 such delimiters; absorb that too. Returns true if the loop ended
389 consume_until_comma_or_eos ()
403 consume_balanced ('{', '}');
407 consume_balanced ('(', ')');
411 consume_balanced ('[', ']');
417 parse_error ("unmatched '%s' while scanning for ',' or ';'",
422 parse_error ("unexpected end of file while scanning for ',' or ';'");
432 /* GTY(()) option handling. */
433 static type_p
type (options_p
*optsp
, bool nested
);
435 /* Optional parenthesized string: ('(' string_seq ')')? */
437 str_optvalue_opt (options_p prev
)
439 const char *name
= advance ();
440 const char *value
= "";
444 value
= string_seq ();
447 return create_string_option (prev
, name
, value
);
450 /* absdecl: type '*'*
451 -- a vague approximation to what the C standard calls an abstract
452 declarator. The only kinds that are actually used are those that
453 are just a bare type and those that have trailing pointer-stars.
454 Further kinds should be implemented if and when they become
455 necessary. Used only within GTY(()) option values, therefore
456 further GTY(()) tags within the type are invalid. Note that the
457 return value has already been run through adjust_field_type. */
464 ty
= type (&opts
, true);
465 while (token () == '*')
467 ty
= create_pointer (ty
);
472 parse_error ("nested GTY(()) options are invalid");
474 return adjust_field_type (ty
, 0);
477 /* Type-option: '(' absdecl ')' */
479 type_optvalue (options_p prev
, const char *name
)
485 return create_type_option (prev
, name
, ty
);
488 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
490 nestedptr_optvalue (options_p prev
)
493 const char *from
, *to
;
500 from
= string_seq ();
503 return create_nested_ptr_option (prev
, ty
, to
, from
);
506 /* One GTY(()) option:
508 | PTR_ALIAS type_optvalue
509 | PARAM_IS type_optvalue
510 | NESTED_PTR nestedptr_optvalue
513 option (options_p prev
)
518 return str_optvalue_opt (prev
);
522 return type_optvalue (prev
, "ptr_alias");
525 return type_optvalue (prev
, advance ());
529 return nestedptr_optvalue (prev
);
533 return create_string_option (prev
, "user", "");
536 parse_error ("expected an option keyword, have %s", print_cur_token ());
538 return create_string_option (prev
, "", "");
542 /* One comma-separated list of options. */
549 while (token () == ',')
557 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
561 options_p result
= 0;
566 result
= option_seq ();
572 /* Optional GTY marker. */
576 if (token () != GTY_TOKEN
)
583 /* Declarators. The logic here is largely lifted from c-parser.c.
584 Note that we do not have to process abstract declarators, which can
585 appear only in parameter type lists or casts (but see absdecl,
586 above). Also, type qualifiers are thrown out in gengtype-lex.l so
587 we don't have to do it. */
589 /* array_and_function_declarators_opt:
591 array_and_function_declarators_opt ARRAY
592 array_and_function_declarators_opt '(' ... ')'
594 where '...' indicates stuff we ignore except insofar as grouping
595 symbols ()[]{} must balance.
597 Subroutine of direct_declarator - do not use elsewhere. */
600 array_and_function_declarators_opt (type_p ty
)
602 if (token () == ARRAY
)
604 const char *array
= advance ();
605 return create_array (array_and_function_declarators_opt (ty
), array
);
607 else if (token () == '(')
609 /* We don't need exact types for functions. */
610 consume_balanced ('(', ')');
611 array_and_function_declarators_opt (ty
);
612 return create_scalar_type ("function type");
618 static type_p
inner_declarator (type_p
, const char **, options_p
*, bool);
620 /* direct_declarator:
621 '(' inner_declarator ')'
622 '(' \epsilon ')' <-- C++ ctors/dtors
623 gtymarker_opt ID array_and_function_declarators_opt
625 Subroutine of declarator, mutually recursive with inner_declarator;
626 do not use elsewhere.
628 IN_STRUCT is true if we are called while parsing structures or classes. */
631 direct_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
634 /* The first token in a direct-declarator must be an ID, a
635 GTY marker, or an open parenthesis. */
639 *optsp
= gtymarker ();
643 *namep
= require (ID
);
644 /* If the next token is '(', we are parsing a function declaration.
645 Functions are ignored by gengtype, so we return NULL. */
651 /* If the declarator starts with a '(', we have three options. We
652 are either parsing 'TYPE (*ID)' (i.e., a function pointer)
655 The latter will be a constructor iff we are inside a
656 structure or class. Otherwise, it could be a typedef, but
657 since we explicitly reject typedefs inside structures, we can
658 assume that we found a ctor and return NULL. */
660 if (in_struct
&& token () != '*')
662 /* Found a constructor. Find and consume the closing ')'. */
663 while (token () != ')')
666 /* Tell the caller to ignore this. */
669 ty
= inner_declarator (ty
, namep
, optsp
, in_struct
);
673 case IGNORABLE_CXX_KEYWORD
:
674 /* Any C++ keyword like 'operator' means that we are not looking
675 at a regular data declarator. */
679 parse_error ("expected '(', ')', 'GTY', or an identifier, have %s",
681 /* Do _not_ advance if what we have is a close squiggle brace, as
682 we will get much better error recovery that way. */
687 return array_and_function_declarators_opt (ty
);
690 /* The difference between inner_declarator and declarator is in the
691 handling of stars. Consider this declaration:
695 It declares a pointer to a function that takes no arguments and
696 returns a char*. To construct the correct type for this
697 declaration, the star outside the parentheses must be processed
698 _before_ the function type, the star inside the parentheses must
699 be processed _after_ the function type. To accomplish this,
700 declarator() creates pointers before recursing (it is actually
701 coded as a while loop), whereas inner_declarator() recurses before
702 creating pointers. */
708 Mutually recursive subroutine of direct_declarator; do not use
711 IN_STRUCT is true if we are called while parsing structures or classes. */
714 inner_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
721 inner
= inner_declarator (ty
, namep
, optsp
, in_struct
);
725 return create_pointer (ty
);
728 return direct_declarator (ty
, namep
, optsp
, in_struct
);
731 /* declarator: '*'+ direct_declarator
733 This is the sole public interface to this part of the grammar.
734 Arguments are the type known so far, a pointer to where the name
735 may be stored, and a pointer to where GTY options may be stored.
737 IN_STRUCT is true when we are called to parse declarators inside
738 a structure or class.
740 Returns the final type. */
743 declarator (type_p ty
, const char **namep
, options_p
*optsp
,
744 bool in_struct
= false)
748 while (token () == '*')
751 ty
= create_pointer (ty
);
753 return direct_declarator (ty
, namep
, optsp
, in_struct
);
756 /* Types and declarations. */
758 /* Structure field(s) declaration:
761 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
764 Knows that such declarations must end with a close brace (or,
765 erroneously, at EOF).
768 struct_field_seq (void)
772 options_p opts
, dopts
;
778 ty
= type (&opts
, true);
780 /* Ignore access-control keywords ("public:" etc). */
781 while (!ty
&& token () == IGNORABLE_CXX_KEYWORD
)
783 const char *keyword
= advance ();
784 if (strcmp (keyword
, "public:") != 0
785 && strcmp (keyword
, "private:") != 0
786 && strcmp (keyword
, "protected:") != 0)
788 ty
= type (&opts
, true);
791 if (!ty
|| token () == ':')
793 consume_until_eos ();
799 dty
= declarator (ty
, &name
, &dopts
, true);
801 /* There could be any number of weird things after the declarator,
802 notably bitfield declarations and __attribute__s. If this
803 function returns true, the last thing was a comma, so we have
804 more than one declarator paired with the current type. */
805 another
= consume_until_comma_or_eos ();
811 parse_error ("two GTY(()) options for field %s", name
);
815 f
= create_field_at (f
, dty
, name
, dopts
, &lexer_line
);
819 while (token () != '}' && token () != EOF_TOKEN
);
820 return nreverse_pairs (f
);
823 /* Return true if OPTS contain the option named STR. */
826 opts_have (options_p opts
, const char *str
)
828 for (options_p opt
= opts
; opt
; opt
= opt
->next
)
829 if (strcmp (opt
->name
, str
) == 0)
835 /* This is called type(), but what it parses (sort of) is what C calls
836 declaration-specifiers and specifier-qualifier-list:
840 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
841 | ENUM ID ( '{' ... '}' )?
843 Returns a partial type; under some conditions (notably
844 "struct foo GTY((...)) thing;") it may write an options
847 NESTED is true when parsing a declaration already known to have a
848 GTY marker. In these cases, typedef and enum declarations are not
849 allowed because gengtype only understands types at the global
853 type (options_p
*optsp
, bool nested
)
861 return create_scalar_type (s
);
865 return resolve_typedef (s
, &lexer_line
);
867 case IGNORABLE_CXX_KEYWORD
:
868 /* By returning NULL here, we indicate to the caller that they
869 should ignore everything following this keyword up to the
877 /* GTY annotations follow attribute syntax
878 GTY_BEFORE_ID is for union/struct declarations
879 GTY_AFTER_ID is for variable declarations. */
886 enum typekind kind
= (token () == UNION
) ? TYPE_UNION
: TYPE_STRUCT
;
889 /* Top-level structures that are not explicitly tagged GTY(())
890 are treated as mere forward declarations. This is because
891 there are a lot of structures that we don't need to know
892 about, and some of those have C++ and macro constructs that
894 if (nested
|| token () == GTY_TOKEN
)
896 is_gty
= GTY_BEFORE_ID
;
897 opts
= gtymarker_opt ();
903 s
= xasprintf ("anonymous:%s:%d",
904 get_input_file_name (lexer_line
.file
),
907 /* Unfortunately above GTY_TOKEN check does not capture the
908 typedef struct_type GTY case. */
909 if (token () == GTY_TOKEN
)
911 is_gty
= GTY_AFTER_ID
;
912 opts
= gtymarker_opt ();
917 /* Skip over C++ inheritance specification. */
918 while (token () != '{')
924 bool is_user_gty
= opts_have (opts
, "user");
929 if (is_gty
== GTY_AFTER_ID
)
930 parse_error ("GTY must be specified before identifier");
935 fields
= struct_field_seq ();
940 /* Do not look inside user defined structures. */
942 kind
= TYPE_USER_STRUCT
;
943 consume_balanced ('{', '}');
944 return create_user_defined_type (s
, &lexer_line
);
947 return new_structure (s
, kind
, &lexer_line
, fields
, opts
);
950 else if (token () == '{')
951 consume_balanced ('{', '}');
954 return find_structure (s
, kind
);
958 /* In C++, a typedef inside a struct/class/union defines a new
959 type for that inner scope. We cannot support this in
960 gengtype because we have no concept of scoping.
962 We handle typedefs in the global scope separately (see
963 parse_file), so if we find a 'typedef', we must be inside
966 parse_error ("typedefs not supported in structures marked with "
967 "automatic GTY markers. Use GTY((user)) to mark "
977 s
= xasprintf ("anonymous:%s:%d",
978 get_input_file_name (lexer_line
.file
),
982 consume_balanced ('{', '}');
984 /* If after parsing the enum we are at the end of the statement,
985 and we are currently inside a structure, then this was an
986 enum declaration inside this scope.
988 We cannot support this for the same reason we cannot support
989 'typedef' inside structures (see the TYPEDEF handler above).
990 If this happens, emit an error and return NULL. */
991 if (nested
&& token () == ';')
993 parse_error ("enum definitions not supported in structures marked "
994 "with automatic GTY markers. Use GTY((user)) to mark "
1000 return create_scalar_type (s
);
1003 parse_error ("expected a type specifier, have %s", print_cur_token ());
1005 return create_scalar_type ("erroneous type");
1009 /* Top level constructs. */
1011 /* Dispatch declarations beginning with 'typedef'. */
1021 gcc_assert (token () == TYPEDEF
);
1024 ty
= type (&opts
, false);
1028 parse_error ("GTY((...)) cannot be applied to a typedef");
1031 dty
= declarator (ty
, &name
, &opts
);
1033 parse_error ("GTY((...)) cannot be applied to a typedef");
1035 /* Yet another place where we could have junk (notably attributes)
1036 after the declarator. */
1037 another
= consume_until_comma_or_eos ();
1039 do_typedef (name
, dty
, &lexer_line
);
1044 /* Structure definition: type() does all the work. */
1047 struct_or_union (void)
1050 type (&dummy
, false);
1051 /* There may be junk after the type: notably, we cannot currently
1052 distinguish 'struct foo *function(prototype);' from 'struct foo;'
1053 ... we could call declarator(), but it's a waste of time at
1054 present. Instead, just eat whatever token is currently lookahead
1055 and go back to lexical skipping mode. */
1059 /* GC root declaration:
1060 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
1061 If the gtymarker is not present, we ignore the rest of the declaration. */
1063 extern_or_static (void)
1065 options_p opts
, opts2
, dopts
;
1068 require2 (EXTERN
, STATIC
);
1070 if (token () != GTY_TOKEN
)
1076 opts
= gtymarker ();
1077 ty
= type (&opts2
, true); /* if we get here, it's got a GTY(()) */
1078 dty
= declarator (ty
, &name
, &dopts
);
1080 if ((opts
&& dopts
) || (opts
&& opts2
) || (opts2
&& dopts
))
1081 parse_error ("GTY((...)) specified more than once for %s", name
);
1089 note_variable (name
, adjust_field_type (dty
, opts
), opts
, &lexer_line
);
1090 require2 (';', '=');
1094 /* Parse the file FNAME for GC-relevant declarations and definitions.
1095 This is the only entry point to this file. */
1097 parse_file (const char *fname
)
1106 extern_or_static ();
1122 parse_error ("unexpected top level token, %s", print_cur_token ());
1125 lexer_toplevel_done
= 1;