1 /* Process source files and output type information.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
28 /* This is a simple recursive-descent parser which understands a subset of
31 Rule functions are suffixed _seq if they scan a sequence of items;
32 _opt if they may consume zero tokens; _seqopt if both are true. The
33 "consume_" prefix indicates that a sequence of tokens is parsed for
34 syntactic correctness and then thrown away. */
36 /* Simple one-token lookahead mechanism. */
44 static struct token T
;
46 /* Retrieve the code of the current token; if there is no current token,
47 get the next one from the lexer. */
53 T
.code
= yylex (&T
.value
);
59 /* Retrieve the value of the current token (if any) and mark it consumed.
60 The next call to token() will get another token from the lexer. */
61 static inline const char *
70 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
71 static const char *const token_names
[] = {
82 "a param<N>_is option",
87 "a character constant",
88 "an array declarator",
89 "a C++ keyword to ignore"
92 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
93 static const char *const token_value_format
[] = {
104 /* Produce a printable representation for a token defined by CODE and
105 VALUE. This sometimes returns pointers into malloc memory and
106 sometimes not, therefore it is unsafe to free the pointer it
107 returns, so that memory is leaked. This does not matter, as this
108 function is only used for diagnostics, and in a successful run of
109 the program there will be none. */
111 print_token (int code
, const char *value
)
113 if (code
< CHAR_TOKEN_OFFSET
)
114 return xasprintf ("'%c'", code
);
115 else if (code
< FIRST_TOKEN_WITH_VALUE
)
116 return xasprintf ("'%s'", token_names
[code
- CHAR_TOKEN_OFFSET
]);
118 return token_names
[code
- CHAR_TOKEN_OFFSET
]; /* don't quote these */
120 return xasprintf (token_value_format
[code
- FIRST_TOKEN_WITH_VALUE
],
124 /* Convenience wrapper around print_token which produces the printable
125 representation of the current token. */
126 static inline const char *
127 print_cur_token (void)
129 return print_token (T
.code
, T
.value
);
132 /* Report a parse error on the current line, with diagnostic MSG.
133 Behaves as standard printf with respect to additional arguments and
135 static void ATTRIBUTE_PRINTF_1
136 parse_error (const char *msg
, ...)
140 fprintf (stderr
, "%s:%d: parse error: ",
141 get_input_file_name (lexer_line
.file
), lexer_line
.line
);
144 vfprintf (stderr
, msg
, ap
);
147 fputc ('\n', stderr
);
152 /* If the next token does not have code T, report a parse error; otherwise
153 return the token's value. */
158 const char *v
= advance ();
161 parse_error ("expected %s, have %s",
162 print_token (t
, 0), print_token (u
, v
));
168 /* As per require, but do not advance. */
170 require_without_advance (int t
)
173 const char *v
= T
.value
;
176 parse_error ("expected %s, have %s",
177 print_token (t
, 0), print_token (u
, v
));
183 /* If the next token does not have one of the codes T1 or T2, report a
184 parse error; otherwise return the token's value. */
186 require2 (int t1
, int t2
)
189 const char *v
= advance ();
190 if (u
!= t1
&& u
!= t2
)
192 parse_error ("expected %s or %s, have %s",
193 print_token (t1
, 0), print_token (t2
, 0),
200 /* Near-terminals. */
202 /* C-style string constant concatenation: STRING+
203 Bare STRING should appear nowhere else in this file. */
211 s1
= require (STRING
);
214 while (token () == STRING
)
220 buf
= XRESIZEVEC (char, CONST_CAST (char *, s1
), l1
+ l2
+ 1);
221 memcpy (buf
+ l1
, s2
, l2
+ 1);
222 XDELETE (CONST_CAST (char *, s2
));
229 /* The caller has detected a template declaration that starts
230 with TMPL_NAME. Parse up to the closing '>'. This recognizes
231 simple template declarations of the form ID<ID1,ID2,...,IDn>.
232 It does not try to parse anything more sophisticated than that.
234 Returns the template declaration string "ID<ID1,ID2,...,IDn>". */
237 require_template_declaration (const char *tmpl_name
)
241 /* Recognize the opening '<'. */
243 str
= concat (tmpl_name
, "<", (char *) 0);
245 /* Read the comma-separated list of identifiers. */
246 while (token () != '>')
248 const char *id
= require2 (ID
, ',');
251 str
= concat (str
, id
, (char *) 0);
254 /* Recognize the closing '>'. */
256 str
= concat (str
, ">", (char *) 0);
262 /* typedef_name: either an ID, or a template type
263 specification of the form ID<t1,t2,...,tn>. */
268 const char *id
= require (ID
);
270 return require_template_declaration (id
);
275 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
277 consume_balanced (int opener
, int closer
)
287 consume_balanced ('(', ')');
290 consume_balanced ('[', ']');
293 consume_balanced ('{', '}');
299 if (token () != closer
)
300 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
306 parse_error ("unexpected end of file within %c%c-delimited construct",
312 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
313 expressions, until we encounter an end-of-statement marker (a ';' or
314 a '}') outside any such delimiters; absorb that too. */
317 consume_until_eos (void)
327 consume_balanced ('{', '}');
331 consume_balanced ('(', ')');
335 consume_balanced ('[', ']');
341 parse_error ("unmatched '%c' while scanning for ';'", token ());
345 parse_error ("unexpected end of file while scanning for ';'");
354 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
355 expressions, until we encounter a comma or semicolon outside any
356 such delimiters; absorb that too. Returns true if the loop ended
360 consume_until_comma_or_eos ()
374 consume_balanced ('{', '}');
378 consume_balanced ('(', ')');
382 consume_balanced ('[', ']');
388 parse_error ("unmatched '%s' while scanning for ',' or ';'",
393 parse_error ("unexpected end of file while scanning for ',' or ';'");
403 /* GTY(()) option handling. */
404 static type_p
type (options_p
*optsp
, bool nested
);
406 /* Optional parenthesized string: ('(' string_seq ')')? */
408 str_optvalue_opt (options_p prev
)
410 const char *name
= advance ();
411 const char *value
= "";
415 value
= string_seq ();
418 return create_string_option (prev
, name
, value
);
421 /* absdecl: type '*'*
422 -- a vague approximation to what the C standard calls an abstract
423 declarator. The only kinds that are actually used are those that
424 are just a bare type and those that have trailing pointer-stars.
425 Further kinds should be implemented if and when they become
426 necessary. Used only within GTY(()) option values, therefore
427 further GTY(()) tags within the type are invalid. Note that the
428 return value has already been run through adjust_field_type. */
435 ty
= type (&opts
, true);
436 while (token () == '*')
438 ty
= create_pointer (ty
);
443 parse_error ("nested GTY(()) options are invalid");
445 return adjust_field_type (ty
, 0);
448 /* Type-option: '(' absdecl ')' */
450 type_optvalue (options_p prev
, const char *name
)
456 return create_type_option (prev
, name
, ty
);
459 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
461 nestedptr_optvalue (options_p prev
)
464 const char *from
, *to
;
471 from
= string_seq ();
474 return create_nested_ptr_option (prev
, ty
, to
, from
);
477 /* One GTY(()) option:
479 | PTR_ALIAS type_optvalue
480 | PARAM_IS type_optvalue
481 | NESTED_PTR nestedptr_optvalue
484 option (options_p prev
)
489 return str_optvalue_opt (prev
);
493 return type_optvalue (prev
, "ptr_alias");
496 return type_optvalue (prev
, advance ());
500 return nestedptr_optvalue (prev
);
504 return create_string_option (prev
, "user", "");
507 parse_error ("expected an option keyword, have %s", print_cur_token ());
509 return create_string_option (prev
, "", "");
513 /* One comma-separated list of options. */
520 while (token () == ',')
528 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
532 options_p result
= 0;
537 result
= option_seq ();
543 /* Optional GTY marker. */
547 if (token () != GTY_TOKEN
)
554 /* Declarators. The logic here is largely lifted from c-parser.c.
555 Note that we do not have to process abstract declarators, which can
556 appear only in parameter type lists or casts (but see absdecl,
557 above). Also, type qualifiers are thrown out in gengtype-lex.l so
558 we don't have to do it. */
560 /* array_and_function_declarators_opt:
562 array_and_function_declarators_opt ARRAY
563 array_and_function_declarators_opt '(' ... ')'
565 where '...' indicates stuff we ignore except insofar as grouping
566 symbols ()[]{} must balance.
568 Subroutine of direct_declarator - do not use elsewhere. */
571 array_and_function_declarators_opt (type_p ty
)
573 if (token () == ARRAY
)
575 const char *array
= advance ();
576 return create_array (array_and_function_declarators_opt (ty
), array
);
578 else if (token () == '(')
580 /* We don't need exact types for functions. */
581 consume_balanced ('(', ')');
582 array_and_function_declarators_opt (ty
);
583 return create_scalar_type ("function type");
589 static type_p
inner_declarator (type_p
, const char **, options_p
*, bool);
591 /* direct_declarator:
592 '(' inner_declarator ')'
593 '(' \epsilon ')' <-- C++ ctors/dtors
594 gtymarker_opt ID array_and_function_declarators_opt
596 Subroutine of declarator, mutually recursive with inner_declarator;
597 do not use elsewhere.
599 IN_STRUCT is true if we are called while parsing structures or classes. */
602 direct_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
605 /* The first token in a direct-declarator must be an ID, a
606 GTY marker, or an open parenthesis. */
610 *optsp
= gtymarker ();
614 *namep
= require (ID
);
615 /* If the next token is '(', we are parsing a function declaration.
616 Functions are ignored by gengtype, so we return NULL. */
622 /* If the declarator starts with a '(', we have three options. We
623 are either parsing 'TYPE (*ID)' (i.e., a function pointer)
626 The latter will be a constructor iff we are inside a
627 structure or class. Otherwise, it could be a typedef, but
628 since we explicitly reject typedefs inside structures, we can
629 assume that we found a ctor and return NULL. */
631 if (in_struct
&& token () != '*')
633 /* Found a constructor. Find and consume the closing ')'. */
634 while (token () != ')')
637 /* Tell the caller to ignore this. */
640 ty
= inner_declarator (ty
, namep
, optsp
, in_struct
);
644 case IGNORABLE_CXX_KEYWORD
:
645 /* Any C++ keyword like 'operator' means that we are not looking
646 at a regular data declarator. */
650 parse_error ("expected '(', ')', 'GTY', or an identifier, have %s",
652 /* Do _not_ advance if what we have is a close squiggle brace, as
653 we will get much better error recovery that way. */
658 return array_and_function_declarators_opt (ty
);
661 /* The difference between inner_declarator and declarator is in the
662 handling of stars. Consider this declaration:
666 It declares a pointer to a function that takes no arguments and
667 returns a char*. To construct the correct type for this
668 declaration, the star outside the parentheses must be processed
669 _before_ the function type, the star inside the parentheses must
670 be processed _after_ the function type. To accomplish this,
671 declarator() creates pointers before recursing (it is actually
672 coded as a while loop), whereas inner_declarator() recurses before
673 creating pointers. */
679 Mutually recursive subroutine of direct_declarator; do not use
682 IN_STRUCT is true if we are called while parsing structures or classes. */
685 inner_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
692 inner
= inner_declarator (ty
, namep
, optsp
, in_struct
);
696 return create_pointer (ty
);
699 return direct_declarator (ty
, namep
, optsp
, in_struct
);
702 /* declarator: '*'+ direct_declarator
704 This is the sole public interface to this part of the grammar.
705 Arguments are the type known so far, a pointer to where the name
706 may be stored, and a pointer to where GTY options may be stored.
708 IN_STRUCT is true when we are called to parse declarators inside
709 a structure or class.
711 Returns the final type. */
714 declarator (type_p ty
, const char **namep
, options_p
*optsp
,
715 bool in_struct
= false)
719 while (token () == '*')
722 ty
= create_pointer (ty
);
724 return direct_declarator (ty
, namep
, optsp
, in_struct
);
727 /* Types and declarations. */
729 /* Structure field(s) declaration:
732 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
735 Knows that such declarations must end with a close brace (or,
736 erroneously, at EOF).
739 struct_field_seq (void)
743 options_p opts
, dopts
;
747 while (token () != '}' && token () != EOF_TOKEN
)
749 ty
= type (&opts
, true);
751 /* Ignore access-control keywords ("public:" etc). */
752 while (!ty
&& token () == IGNORABLE_CXX_KEYWORD
)
754 const char *keyword
= advance ();
755 if (strcmp (keyword
, "public:") != 0
756 && strcmp (keyword
, "private:") != 0
757 && strcmp (keyword
, "protected:") != 0)
759 ty
= type (&opts
, true);
762 if (!ty
|| token () == ':')
764 consume_until_eos ();
770 dty
= declarator (ty
, &name
, &dopts
, true);
772 /* There could be any number of weird things after the declarator,
773 notably bitfield declarations and __attribute__s. If this
774 function returns true, the last thing was a comma, so we have
775 more than one declarator paired with the current type. */
776 another
= consume_until_comma_or_eos ();
782 parse_error ("two GTY(()) options for field %s", name
);
786 f
= create_field_at (f
, dty
, name
, dopts
, &lexer_line
);
790 return nreverse_pairs (f
);
793 /* Return true if OPTS contain the option named STR. */
796 opts_have (options_p opts
, const char *str
)
798 for (options_p opt
= opts
; opt
; opt
= opt
->next
)
799 if (strcmp (opt
->name
, str
) == 0)
805 /* This is called type(), but what it parses (sort of) is what C calls
806 declaration-specifiers and specifier-qualifier-list:
810 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
811 | ENUM ID ( '{' ... '}' )?
813 Returns a partial type; under some conditions (notably
814 "struct foo GTY((...)) thing;") it may write an options
817 NESTED is true when parsing a declaration already known to have a
818 GTY marker. In these cases, typedef and enum declarations are not
819 allowed because gengtype only understands types at the global
823 type (options_p
*optsp
, bool nested
)
831 return create_scalar_type (s
);
835 return resolve_typedef (s
, &lexer_line
);
837 case IGNORABLE_CXX_KEYWORD
:
838 /* By returning NULL here, we indicate to the caller that they
839 should ignore everything following this keyword up to the
846 type_p base_class
= NULL
;
848 /* GTY annotations follow attribute syntax
849 GTY_BEFORE_ID is for union/struct declarations
850 GTY_AFTER_ID is for variable declarations. */
857 enum typekind kind
= (token () == UNION
) ? TYPE_UNION
: TYPE_STRUCT
;
860 /* Top-level structures that are not explicitly tagged GTY(())
861 are treated as mere forward declarations. This is because
862 there are a lot of structures that we don't need to know
863 about, and some of those have C++ and macro constructs that
865 if (nested
|| token () == GTY_TOKEN
)
867 is_gty
= GTY_BEFORE_ID
;
868 opts
= gtymarker_opt ();
874 s
= xasprintf ("anonymous:%s:%d",
875 get_input_file_name (lexer_line
.file
),
878 /* Unfortunately above GTY_TOKEN check does not capture the
879 typedef struct_type GTY case. */
880 if (token () == GTY_TOKEN
)
882 is_gty
= GTY_AFTER_ID
;
883 opts
= gtymarker_opt ();
886 bool is_user_gty
= opts_have (opts
, "user");
890 if (is_gty
&& !is_user_gty
)
892 /* For GTY-marked types that are not "user", parse some C++
893 inheritance specifications.
894 We require single-inheritance from a non-template type. */
896 const char *basename
= require (ID
);
897 /* This may be either an access specifier, or the base name. */
898 if (0 == strcmp (basename
, "public")
899 || 0 == strcmp (basename
, "protected")
900 || 0 == strcmp (basename
, "private"))
901 basename
= require (ID
);
902 base_class
= find_structure (basename
, TYPE_STRUCT
);
904 parse_error ("unrecognized base class: %s", basename
);
905 require_without_advance ('{');
909 /* For types lacking GTY-markings, skip over C++ inheritance
910 specification (and thus avoid having to parse e.g. template
912 while (token () != '{')
923 if (is_gty
== GTY_AFTER_ID
)
924 parse_error ("GTY must be specified before identifier");
929 fields
= struct_field_seq ();
934 /* Do not look inside user defined structures. */
936 kind
= TYPE_USER_STRUCT
;
937 consume_balanced ('{', '}');
938 return create_user_defined_type (s
, &lexer_line
);
941 return new_structure (s
, kind
, &lexer_line
, fields
, opts
,
945 else if (token () == '{')
946 consume_balanced ('{', '}');
949 return find_structure (s
, kind
);
953 /* In C++, a typedef inside a struct/class/union defines a new
954 type for that inner scope. We cannot support this in
955 gengtype because we have no concept of scoping.
957 We handle typedefs in the global scope separately (see
958 parse_file), so if we find a 'typedef', we must be inside
961 parse_error ("typedefs not supported in structures marked with "
962 "automatic GTY markers. Use GTY((user)) to mark "
972 s
= xasprintf ("anonymous:%s:%d",
973 get_input_file_name (lexer_line
.file
),
977 consume_balanced ('{', '}');
979 /* If after parsing the enum we are at the end of the statement,
980 and we are currently inside a structure, then this was an
981 enum declaration inside this scope.
983 We cannot support this for the same reason we cannot support
984 'typedef' inside structures (see the TYPEDEF handler above).
985 If this happens, emit an error and return NULL. */
986 if (nested
&& token () == ';')
988 parse_error ("enum definitions not supported in structures marked "
989 "with automatic GTY markers. Use GTY((user)) to mark "
995 return create_scalar_type (s
);
998 parse_error ("expected a type specifier, have %s", print_cur_token ());
1000 return create_scalar_type ("erroneous type");
1004 /* Top level constructs. */
1006 /* Dispatch declarations beginning with 'typedef'. */
1016 gcc_assert (token () == TYPEDEF
);
1019 ty
= type (&opts
, false);
1023 parse_error ("GTY((...)) cannot be applied to a typedef");
1026 dty
= declarator (ty
, &name
, &opts
);
1028 parse_error ("GTY((...)) cannot be applied to a typedef");
1030 /* Yet another place where we could have junk (notably attributes)
1031 after the declarator. */
1032 another
= consume_until_comma_or_eos ();
1034 do_typedef (name
, dty
, &lexer_line
);
1039 /* Structure definition: type() does all the work. */
1042 struct_or_union (void)
1045 type (&dummy
, false);
1046 /* There may be junk after the type: notably, we cannot currently
1047 distinguish 'struct foo *function(prototype);' from 'struct foo;'
1048 ... we could call declarator(), but it's a waste of time at
1049 present. Instead, just eat whatever token is currently lookahead
1050 and go back to lexical skipping mode. */
1054 /* GC root declaration:
1055 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
1056 If the gtymarker is not present, we ignore the rest of the declaration. */
1058 extern_or_static (void)
1060 options_p opts
, opts2
, dopts
;
1063 require2 (EXTERN
, STATIC
);
1065 if (token () != GTY_TOKEN
)
1071 opts
= gtymarker ();
1072 ty
= type (&opts2
, true); /* if we get here, it's got a GTY(()) */
1073 dty
= declarator (ty
, &name
, &dopts
);
1075 if ((opts
&& dopts
) || (opts
&& opts2
) || (opts2
&& dopts
))
1076 parse_error ("GTY((...)) specified more than once for %s", name
);
1084 note_variable (name
, adjust_field_type (dty
, opts
), opts
, &lexer_line
);
1085 require2 (';', '=');
1089 /* Parse the file FNAME for GC-relevant declarations and definitions.
1090 This is the only entry point to this file. */
1092 parse_file (const char *fname
)
1101 extern_or_static ();
1117 parse_error ("unexpected top level token, %s", print_cur_token ());
1120 lexer_toplevel_done
= 1;