1 /* Process source files and output type information.
2 Copyright (C) 2006, 2007, 2010, 2012 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
28 /* This is a simple recursive-descent parser which understands a subset of
31 Rule functions are suffixed _seq if they scan a sequence of items;
32 _opt if they may consume zero tokens; _seqopt if both are true. The
33 "consume_" prefix indicates that a sequence of tokens is parsed for
34 syntactic correctness and then thrown away. */
36 /* Simple one-token lookahead mechanism. */
44 static struct token T
;
46 /* Retrieve the code of the current token; if there is no current token,
47 get the next one from the lexer. */
53 T
.code
= yylex (&T
.value
);
59 /* Retrieve the value of the current token (if any) and mark it consumed.
60 The next call to token() will get another token from the lexer. */
61 static inline const char *
70 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
71 static const char *const token_names
[] = {
83 "a param<N>_is option",
88 "a character constant",
89 "an array declarator",
90 "a C++ keyword to ignore"
93 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
94 static const char *const token_value_format
[] = {
105 /* Produce a printable representation for a token defined by CODE and
106 VALUE. This sometimes returns pointers into malloc memory and
107 sometimes not, therefore it is unsafe to free the pointer it
108 returns, so that memory is leaked. This does not matter, as this
109 function is only used for diagnostics, and in a successful run of
110 the program there will be none. */
112 print_token (int code
, const char *value
)
114 if (code
< CHAR_TOKEN_OFFSET
)
115 return xasprintf ("'%c'", code
);
116 else if (code
< FIRST_TOKEN_WITH_VALUE
)
117 return xasprintf ("'%s'", token_names
[code
- CHAR_TOKEN_OFFSET
]);
119 return token_names
[code
- CHAR_TOKEN_OFFSET
]; /* don't quote these */
121 return xasprintf (token_value_format
[code
- FIRST_TOKEN_WITH_VALUE
],
125 /* Convenience wrapper around print_token which produces the printable
126 representation of the current token. */
127 static inline const char *
128 print_cur_token (void)
130 return print_token (T
.code
, T
.value
);
133 /* Report a parse error on the current line, with diagnostic MSG.
134 Behaves as standard printf with respect to additional arguments and
136 static void ATTRIBUTE_PRINTF_1
137 parse_error (const char *msg
, ...)
141 fprintf (stderr
, "%s:%d: parse error: ",
142 get_input_file_name (lexer_line
.file
), lexer_line
.line
);
145 vfprintf (stderr
, msg
, ap
);
148 fputc ('\n', stderr
);
153 /* If the next token does not have code T, report a parse error; otherwise
154 return the token's value. */
159 const char *v
= advance ();
162 parse_error ("expected %s, have %s",
163 print_token (t
, 0), print_token (u
, v
));
169 /* If the next token does not have one of the codes T1 or T2, report a
170 parse error; otherwise return the token's value. */
172 require2 (int t1
, int t2
)
175 const char *v
= advance ();
176 if (u
!= t1
&& u
!= t2
)
178 parse_error ("expected %s or %s, have %s",
179 print_token (t1
, 0), print_token (t2
, 0),
186 /* Near-terminals. */
188 /* C-style string constant concatenation: STRING+
189 Bare STRING should appear nowhere else in this file. */
197 s1
= require (STRING
);
200 while (token () == STRING
)
206 buf
= XRESIZEVEC (char, CONST_CAST (char *, s1
), l1
+ l2
+ 1);
207 memcpy (buf
+ l1
, s2
, l2
+ 1);
208 XDELETE (CONST_CAST (char *, s2
));
215 /* The caller has detected a template declaration that starts
216 with TMPL_NAME. Parse up to the closing '>'. This recognizes
217 simple template declarations of the form ID<ID1,ID2,...,IDn>.
218 It does not try to parse anything more sophisticated than that.
220 Returns the template declaration string "ID<ID1,ID2,...,IDn>". */
223 require_template_declaration (const char *tmpl_name
)
227 /* Recognize the opening '<'. */
229 str
= concat (tmpl_name
, "<", (char *) 0);
231 /* Read the comma-separated list of identifiers. */
232 while (token () != '>')
234 const char *id
= require2 (ID
, ',');
237 str
= concat (str
, id
, (char *) 0);
240 /* Recognize the closing '>'. */
242 str
= concat (str
, ">", (char *) 0);
248 /* typedef_name: either an ID, or VEC(x,y), or a template type
249 specification of the form ID<t1,t2,...,tn>.
251 FIXME cxx-conversion. VEC(x,y) is currently translated to the
252 template 'vec_t<x>'. This is to support the transition to C++ and
253 avoid re-writing all the 'VEC(x,y)' declarations in the code. This
254 needs to be fixed when the branch is merged into trunk. */
259 if (token () == VEC_TOKEN
)
264 c1
= require2 (ID
, SCALAR
);
268 r
= concat ("vec_t<", c1
, ">", (char *) 0);
269 free (CONST_CAST (char *, c1
));
273 const char *id
= require (ID
);
275 return require_template_declaration (id
);
280 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
282 consume_balanced (int opener
, int closer
)
292 consume_balanced ('(', ')');
295 consume_balanced ('[', ']');
298 consume_balanced ('{', '}');
304 if (token () != closer
)
305 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
311 parse_error ("unexpected end of file within %c%c-delimited construct",
317 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
318 expressions, until we encounter an end-of-statement marker (a ';' or
319 a '}') outside any such delimiters; absorb that too. */
322 consume_until_eos (void)
332 consume_balanced ('{', '}');
336 consume_balanced ('(', ')');
340 consume_balanced ('[', ']');
346 parse_error ("unmatched '%c' while scanning for ';'", token ());
350 parse_error ("unexpected end of file while scanning for ';'");
359 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
360 expressions, until we encounter a comma or semicolon outside any
361 such delimiters; absorb that too. Returns true if the loop ended
365 consume_until_comma_or_eos ()
379 consume_balanced ('{', '}');
383 consume_balanced ('(', ')');
387 consume_balanced ('[', ']');
393 parse_error ("unmatched '%s' while scanning for ',' or ';'",
398 parse_error ("unexpected end of file while scanning for ',' or ';'");
408 /* GTY(()) option handling. */
409 static type_p
type (options_p
*optsp
, bool nested
);
411 /* Optional parenthesized string: ('(' string_seq ')')? */
413 str_optvalue_opt (options_p prev
)
415 const char *name
= advance ();
416 const char *value
= "";
420 value
= string_seq ();
423 return create_string_option (prev
, name
, value
);
426 /* absdecl: type '*'*
427 -- a vague approximation to what the C standard calls an abstract
428 declarator. The only kinds that are actually used are those that
429 are just a bare type and those that have trailing pointer-stars.
430 Further kinds should be implemented if and when they become
431 necessary. Used only within GTY(()) option values, therefore
432 further GTY(()) tags within the type are invalid. Note that the
433 return value has already been run through adjust_field_type. */
440 ty
= type (&opts
, true);
441 while (token () == '*')
443 ty
= create_pointer (ty
);
448 parse_error ("nested GTY(()) options are invalid");
450 return adjust_field_type (ty
, 0);
453 /* Type-option: '(' absdecl ')' */
455 type_optvalue (options_p prev
, const char *name
)
461 return create_type_option (prev
, name
, ty
);
464 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
466 nestedptr_optvalue (options_p prev
)
469 const char *from
, *to
;
476 from
= string_seq ();
479 return create_nested_ptr_option (prev
, ty
, to
, from
);
482 /* One GTY(()) option:
484 | PTR_ALIAS type_optvalue
485 | PARAM_IS type_optvalue
486 | NESTED_PTR nestedptr_optvalue
489 option (options_p prev
)
494 return str_optvalue_opt (prev
);
498 return type_optvalue (prev
, "ptr_alias");
501 return type_optvalue (prev
, advance ());
505 return nestedptr_optvalue (prev
);
509 return create_string_option (prev
, "user", "");
512 parse_error ("expected an option keyword, have %s", print_cur_token ());
514 return create_string_option (prev
, "", "");
518 /* One comma-separated list of options. */
525 while (token () == ',')
533 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
537 options_p result
= 0;
542 result
= option_seq ();
548 /* Optional GTY marker. */
552 if (token () != GTY_TOKEN
)
559 /* Declarators. The logic here is largely lifted from c-parser.c.
560 Note that we do not have to process abstract declarators, which can
561 appear only in parameter type lists or casts (but see absdecl,
562 above). Also, type qualifiers are thrown out in gengtype-lex.l so
563 we don't have to do it. */
565 /* array_and_function_declarators_opt:
567 array_and_function_declarators_opt ARRAY
568 array_and_function_declarators_opt '(' ... ')'
570 where '...' indicates stuff we ignore except insofar as grouping
571 symbols ()[]{} must balance.
573 Subroutine of direct_declarator - do not use elsewhere. */
576 array_and_function_declarators_opt (type_p ty
)
578 if (token () == ARRAY
)
580 const char *array
= advance ();
581 return create_array (array_and_function_declarators_opt (ty
), array
);
583 else if (token () == '(')
585 /* We don't need exact types for functions. */
586 consume_balanced ('(', ')');
587 array_and_function_declarators_opt (ty
);
588 return create_scalar_type ("function type");
594 static type_p
inner_declarator (type_p
, const char **, options_p
*, bool);
596 /* direct_declarator:
597 '(' inner_declarator ')'
598 '(' \epsilon ')' <-- C++ ctors/dtors
599 gtymarker_opt ID array_and_function_declarators_opt
601 Subroutine of declarator, mutually recursive with inner_declarator;
602 do not use elsewhere.
604 IN_STRUCT is true if we are called while parsing structures or classes. */
607 direct_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
610 /* The first token in a direct-declarator must be an ID, a
611 GTY marker, or an open parenthesis. */
615 *optsp
= gtymarker ();
619 *namep
= require (ID
);
620 /* If the next token is '(', we are parsing a function declaration.
621 Functions are ignored by gengtype, so we return NULL. */
627 /* If the declarator starts with a '(', we have three options. We
628 are either parsing 'TYPE (*ID)' (i.e., a function pointer)
631 The latter will be a constructor iff we are inside a
632 structure or class. Otherwise, it could be a typedef, but
633 since we explicitly reject typedefs inside structures, we can
634 assume that we found a ctor and return NULL. */
636 if (in_struct
&& token () != '*')
638 /* Found a constructor. Find and consume the closing ')'. */
639 while (token () != ')')
642 /* Tell the caller to ignore this. */
645 ty
= inner_declarator (ty
, namep
, optsp
, in_struct
);
649 case IGNORABLE_CXX_KEYWORD
:
650 /* Any C++ keyword like 'operator' means that we are not looking
651 at a regular data declarator. */
655 parse_error ("expected '(', ')', 'GTY', or an identifier, have %s",
657 /* Do _not_ advance if what we have is a close squiggle brace, as
658 we will get much better error recovery that way. */
663 return array_and_function_declarators_opt (ty
);
666 /* The difference between inner_declarator and declarator is in the
667 handling of stars. Consider this declaration:
671 It declares a pointer to a function that takes no arguments and
672 returns a char*. To construct the correct type for this
673 declaration, the star outside the parentheses must be processed
674 _before_ the function type, the star inside the parentheses must
675 be processed _after_ the function type. To accomplish this,
676 declarator() creates pointers before recursing (it is actually
677 coded as a while loop), whereas inner_declarator() recurses before
678 creating pointers. */
684 Mutually recursive subroutine of direct_declarator; do not use
687 IN_STRUCT is true if we are called while parsing structures or classes. */
690 inner_declarator (type_p ty
, const char **namep
, options_p
*optsp
,
697 inner
= inner_declarator (ty
, namep
, optsp
, in_struct
);
701 return create_pointer (ty
);
704 return direct_declarator (ty
, namep
, optsp
, in_struct
);
707 /* declarator: '*'+ direct_declarator
709 This is the sole public interface to this part of the grammar.
710 Arguments are the type known so far, a pointer to where the name
711 may be stored, and a pointer to where GTY options may be stored.
713 IN_STRUCT is true when we are called to parse declarators inside
714 a structure or class.
716 Returns the final type. */
719 declarator (type_p ty
, const char **namep
, options_p
*optsp
,
720 bool in_struct
= false)
724 while (token () == '*')
727 ty
= create_pointer (ty
);
729 return direct_declarator (ty
, namep
, optsp
, in_struct
);
732 /* Types and declarations. */
734 /* Structure field(s) declaration:
737 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
740 Knows that such declarations must end with a close brace (or,
741 erroneously, at EOF).
744 struct_field_seq (void)
748 options_p opts
, dopts
;
754 ty
= type (&opts
, true);
756 if (!ty
|| token () == ':')
758 consume_until_eos ();
764 dty
= declarator (ty
, &name
, &dopts
, true);
766 /* There could be any number of weird things after the declarator,
767 notably bitfield declarations and __attribute__s. If this
768 function returns true, the last thing was a comma, so we have
769 more than one declarator paired with the current type. */
770 another
= consume_until_comma_or_eos ();
776 parse_error ("two GTY(()) options for field %s", name
);
780 f
= create_field_at (f
, dty
, name
, dopts
, &lexer_line
);
784 while (token () != '}' && token () != EOF_TOKEN
);
785 return nreverse_pairs (f
);
788 /* Return true if OPTS contain the option named STR. */
791 opts_have (options_p opts
, const char *str
)
793 for (options_p opt
= opts
; opt
; opt
= opt
->next
)
794 if (strcmp (opt
->name
, str
) == 0)
800 /* This is called type(), but what it parses (sort of) is what C calls
801 declaration-specifiers and specifier-qualifier-list:
805 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
806 | ENUM ID ( '{' ... '}' )?
808 Returns a partial type; under some conditions (notably
809 "struct foo GTY((...)) thing;") it may write an options
812 NESTED is true when parsing a declaration already known to have a
813 GTY marker. In these cases, typedef and enum declarations are not
814 allowed because gengtype only understands types at the global
818 type (options_p
*optsp
, bool nested
)
826 return create_scalar_type (s
);
831 return resolve_typedef (s
, &lexer_line
);
833 case IGNORABLE_CXX_KEYWORD
:
834 /* By returning NULL here, we indicate to the caller that they
835 should ignore everything following this keyword up to the
843 /* GTY annotations follow attribute syntax
844 GTY_BEFORE_ID is for union/struct declarations
845 GTY_AFTER_ID is for variable declarations. */
852 enum typekind kind
= (token () == UNION
) ? TYPE_UNION
: TYPE_STRUCT
;
855 /* Top-level structures that are not explicitly tagged GTY(())
856 are treated as mere forward declarations. This is because
857 there are a lot of structures that we don't need to know
858 about, and some of those have C++ and macro constructs that
860 if (nested
|| token () == GTY_TOKEN
)
862 is_gty
= GTY_BEFORE_ID
;
863 opts
= gtymarker_opt ();
869 s
= xasprintf ("anonymous:%s:%d",
870 get_input_file_name (lexer_line
.file
),
873 /* Unfortunately above GTY_TOKEN check does not capture the
874 typedef struct_type GTY case. */
875 if (token () == GTY_TOKEN
)
877 is_gty
= GTY_AFTER_ID
;
878 opts
= gtymarker_opt ();
883 /* Skip over C++ inheritance specification. */
884 while (token () != '{')
890 bool is_user_gty
= opts_have (opts
, "user");
895 if (is_gty
== GTY_AFTER_ID
)
896 parse_error ("GTY must be specified before identifier");
901 fields
= struct_field_seq ();
906 /* Do not look inside user defined structures. */
908 kind
= TYPE_USER_STRUCT
;
909 consume_balanced ('{', '}');
912 return new_structure (s
, kind
, &lexer_line
, fields
, opts
);
915 else if (token () == '{')
916 consume_balanced ('{', '}');
919 return find_structure (s
, kind
);
923 /* In C++, a typedef inside a struct/class/union defines a new
924 type for that inner scope. We cannot support this in
925 gengtype because we have no concept of scoping.
927 We handle typedefs in the global scope separately (see
928 parse_file), so if we find a 'typedef', we must be inside
931 parse_error ("typedefs not supported in structures marked with "
932 "automatic GTY markers. Use GTY((user)) to mark "
942 s
= xasprintf ("anonymous:%s:%d",
943 get_input_file_name (lexer_line
.file
),
947 consume_balanced ('{', '}');
949 /* If after parsing the enum we are at the end of the statement,
950 and we are currently inside a structure, then this was an
951 enum declaration inside this scope.
953 We cannot support this for the same reason we cannot support
954 'typedef' inside structures (see the TYPEDEF handler above).
955 If this happens, emit an error and return NULL. */
956 if (nested
&& token () == ';')
958 parse_error ("enum definitions not supported in structures marked "
959 "with automatic GTY markers. Use GTY((user)) to mark "
965 return create_scalar_type (s
);
968 parse_error ("expected a type specifier, have %s", print_cur_token ());
970 return create_scalar_type ("erroneous type");
974 /* Top level constructs. */
976 /* Dispatch declarations beginning with 'typedef'. */
986 gcc_assert (token () == TYPEDEF
);
989 ty
= type (&opts
, false);
993 parse_error ("GTY((...)) cannot be applied to a typedef");
996 dty
= declarator (ty
, &name
, &opts
);
998 parse_error ("GTY((...)) cannot be applied to a typedef");
1000 /* Yet another place where we could have junk (notably attributes)
1001 after the declarator. */
1002 another
= consume_until_comma_or_eos ();
1004 do_typedef (name
, dty
, &lexer_line
);
1009 /* Structure definition: type() does all the work. */
1012 struct_or_union (void)
1015 type (&dummy
, false);
1016 /* There may be junk after the type: notably, we cannot currently
1017 distinguish 'struct foo *function(prototype);' from 'struct foo;'
1018 ... we could call declarator(), but it's a waste of time at
1019 present. Instead, just eat whatever token is currently lookahead
1020 and go back to lexical skipping mode. */
1024 /* GC root declaration:
1025 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
1026 If the gtymarker is not present, we ignore the rest of the declaration. */
1028 extern_or_static (void)
1030 options_p opts
, opts2
, dopts
;
1033 require2 (EXTERN
, STATIC
);
1035 if (token () != GTY_TOKEN
)
1041 opts
= gtymarker ();
1042 ty
= type (&opts2
, true); /* if we get here, it's got a GTY(()) */
1043 dty
= declarator (ty
, &name
, &dopts
);
1045 if ((opts
&& dopts
) || (opts
&& opts2
) || (opts2
&& dopts
))
1046 parse_error ("GTY((...)) specified more than once for %s", name
);
1054 note_variable (name
, adjust_field_type (dty
, opts
), opts
, &lexer_line
);
1055 require2 (';', '=');
1059 /* Parse the file FNAME for GC-relevant declarations and definitions.
1060 This is the only entry point to this file. */
1062 parse_file (const char *fname
)
1071 extern_or_static ();
1087 parse_error ("unexpected top level token, %s", print_cur_token ());
1090 lexer_toplevel_done
= 1;