1 /* Process source files and output type information.
2 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
24 /* This is a simple recursive-descent parser which understands a subset of
27 Rule functions are suffixed _seq if they scan a sequence of items;
28 _opt if they may consume zero tokens; _seqopt if both are true. The
29 "consume_" prefix indicates that a sequence of tokens is parsed for
30 syntactic correctness and then thrown away. */
32 /* Simple one-token lookahead mechanism. */
40 static struct token T
;
42 /* Retrieve the code of the current token; if there is no current token,
43 get the next one from the lexer. */
49 T
.code
= yylex (&T
.value
);
55 /* Retrieve the value of the current token (if any) and mark it consumed.
56 The next call to token() will get another token from the lexer. */
57 static inline const char *
66 /* This array is indexed by the token code minus CHAR_TOKEN_OFFSET. */
67 static const char *const token_names
[] = {
78 "DEF_VEC_ALLOC_[IOP]",
82 "a param<N>_is option",
87 "a character constant",
88 "an array declarator",
91 /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */
92 static const char *const token_value_format
[] = {
102 /* Produce a printable representation for a token defined by CODE and
103 VALUE. This sometimes returns pointers into malloc memory and
104 sometimes not, therefore it is unsafe to free the pointer it
105 returns, so that memory is leaked. This does not matter, as this
106 function is only used for diagnostics, and in a successful run of
107 the program there will be none. */
109 print_token (int code
, const char *value
)
111 if (code
< CHAR_TOKEN_OFFSET
)
112 return xasprintf ("'%c'", code
);
113 else if (code
< FIRST_TOKEN_WITH_VALUE
)
114 return xasprintf ("'%s'", token_names
[code
- CHAR_TOKEN_OFFSET
]);
116 return token_names
[code
- CHAR_TOKEN_OFFSET
]; /* don't quote these */
118 return xasprintf (token_value_format
[code
- FIRST_TOKEN_WITH_VALUE
],
122 /* Convenience wrapper around print_token which produces the printable
123 representation of the current token. */
124 static inline const char *
125 print_cur_token (void)
127 return print_token (T
.code
, T
.value
);
130 /* Report a parse error on the current line, with diagnostic MSG.
131 Behaves as standard printf with respect to additional arguments and
133 static void ATTRIBUTE_PRINTF_1
134 parse_error (const char *msg
, ...)
138 fprintf (stderr
, "%s:%d: parse error: ", lexer_line
.file
, lexer_line
.line
);
141 vfprintf (stderr
, msg
, ap
);
144 fputc ('\n', stderr
);
149 /* If the next token does not have code T, report a parse error; otherwise
150 return the token's value. */
155 const char *v
= advance ();
158 parse_error ("expected %s, have %s",
159 print_token (t
, 0), print_token (u
, v
));
165 /* If the next token does not have one of the codes T1 or T2, report a
166 parse error; otherwise return the token's value. */
168 require2 (int t1
, int t2
)
171 const char *v
= advance ();
172 if (u
!= t1
&& u
!= t2
)
174 parse_error ("expected %s or %s, have %s",
175 print_token (t1
, 0), print_token (t2
, 0),
182 /* Near-terminals. */
184 /* C-style string constant concatenation: STRING+
185 Bare STRING should appear nowhere else in this file. */
193 s1
= require (STRING
);
196 while (token () == STRING
)
202 buf
= XRESIZEVEC (char, CONST_CAST(char *, s1
), l1
+ l2
+ 1);
203 memcpy (buf
+ l1
, s2
, l2
+ 1);
204 XDELETE (CONST_CAST (char *, s2
));
210 /* typedef_name: either an ID, or VEC(x,y) which is translated to VEC_x_y.
211 Use only where VEC(x,y) is legitimate, i.e. in positions where a
212 typedef name may appear. */
216 if (token () == VEC_TOKEN
)
218 const char *c1
, *c2
, *r
;
221 c1
= require2 (ID
, SCALAR
);
225 r
= concat ("VEC_", c1
, "_", c2
, (char *)0);
226 free (CONST_CAST (char *, c1
));
227 free (CONST_CAST (char *, c2
));
234 /* Absorb a sequence of tokens delimited by balanced ()[]{}. */
236 consume_balanced (int opener
, int closer
)
242 default: advance (); break;
243 case '(': consume_balanced ('(',')'); break;
244 case '[': consume_balanced ('[',']'); break;
245 case '{': consume_balanced ('{','}'); break;
250 if (token () != closer
)
251 parse_error ("unbalanced delimiters - expected '%c', have '%c'",
257 parse_error ("unexpected end of file within %c%c-delimited construct",
263 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
264 expressions, until we encounter a semicolon outside any such
265 delimiters; absorb that too. If IMMEDIATE is true, it is an error
266 if the semicolon is not the first token encountered. */
268 consume_until_semi (bool immediate
)
270 if (immediate
&& token () != ';')
275 case ';': advance (); return;
276 default: advance (); break;
278 case '(': consume_balanced ('(',')'); break;
279 case '[': consume_balanced ('[',']'); break;
280 case '{': consume_balanced ('{','}'); break;
285 parse_error ("unmatched '%c' while scanning for ';'", token ());
289 parse_error ("unexpected end of file while scanning for ';'");
294 /* Absorb a sequence of tokens, possibly including ()[]{}-delimited
295 expressions, until we encounter a comma or semicolon outside any
296 such delimiters; absorb that too. If IMMEDIATE is true, it is an
297 error if the comma or semicolon is not the first token encountered.
298 Returns true if the loop ended with a comma. */
300 consume_until_comma_or_semi (bool immediate
)
302 if (immediate
&& token () != ',' && token () != ';')
307 case ',': advance (); return true;
308 case ';': advance (); return false;
309 default: advance (); break;
311 case '(': consume_balanced ('(',')'); break;
312 case '[': consume_balanced ('[',']'); break;
313 case '{': consume_balanced ('{','}'); break;
318 parse_error ("unmatched '%s' while scanning for ',' or ';'",
323 parse_error ("unexpected end of file while scanning for ',' or ';'");
329 /* GTY(()) option handling. */
330 static type_p
type (options_p
*optsp
, bool nested
);
332 /* Optional parenthesized string: ('(' string_seq ')')? */
334 str_optvalue_opt (options_p prev
)
336 const char *name
= advance ();
337 const char *value
= "";
341 value
= string_seq ();
344 return create_option (prev
, name
, value
);
347 /* absdecl: type '*'*
348 -- a vague approximation to what the C standard calls an abstract
349 declarator. The only kinds that are actually used are those that
350 are just a bare type and those that have trailing pointer-stars.
351 Further kinds should be implemented if and when they become
352 necessary. Used only within GTY(()) option values, therefore
353 further GTY(()) tags within the type are invalid. Note that the
354 return value has already been run through adjust_field_type. */
361 ty
= type (&opts
, true);
362 while (token () == '*')
364 ty
= create_pointer (ty
);
369 parse_error ("nested GTY(()) options are invalid");
371 return adjust_field_type (ty
, 0);
374 /* Type-option: '(' absdecl ')' */
376 type_optvalue (options_p prev
, const char *name
)
382 return create_option (prev
, name
, ty
);
385 /* Nested pointer data: '(' type '*'* ',' string_seq ',' string_seq ')' */
387 nestedptr_optvalue (options_p prev
)
390 const char *from
, *to
;
397 from
= string_seq ();
400 return create_nested_ptr_option (prev
, ty
, to
, from
);
403 /* One GTY(()) option:
405 | PTR_ALIAS type_optvalue
406 | PARAM_IS type_optvalue
407 | NESTED_PTR nestedptr_optvalue
410 option (options_p prev
)
415 return str_optvalue_opt (prev
);
419 return type_optvalue (prev
, "ptr_alias");
422 return type_optvalue (prev
, advance ());
426 return nestedptr_optvalue (prev
);
429 parse_error ("expected an option keyword, have %s",
432 return create_option (prev
, "", "");
436 /* One comma-separated list of options. */
443 while (token () == ',')
451 /* GTY marker: 'GTY' '(' '(' option_seq? ')' ')' */
455 options_p result
= 0;
460 result
= option_seq ();
466 /* Optional GTY marker. */
470 if (token () != GTY_TOKEN
)
475 /* Declarators. The logic here is largely lifted from c-parser.c.
476 Note that we do not have to process abstract declarators, which can
477 appear only in parameter type lists or casts (but see absdecl,
478 above). Also, type qualifiers are thrown out in gengtype-lex.l so
479 we don't have to do it. */
481 /* array_and_function_declarators_opt:
483 array_and_function_declarators_opt ARRAY
484 array_and_function_declarators_opt '(' ... ')'
486 where '...' indicates stuff we ignore except insofar as grouping
487 symbols ()[]{} must balance.
489 Subroutine of direct_declarator - do not use elsewhere. */
492 array_and_function_declarators_opt (type_p ty
)
494 if (token () == ARRAY
)
496 const char *array
= advance ();
497 return create_array (array_and_function_declarators_opt (ty
), array
);
499 else if (token () == '(')
501 /* We don't need exact types for functions. */
502 consume_balanced ('(', ')');
503 array_and_function_declarators_opt (ty
);
504 return create_scalar_type ("function type");
510 static type_p
inner_declarator (type_p
, const char **, options_p
*);
512 /* direct_declarator:
513 '(' inner_declarator ')'
514 gtymarker_opt ID array_and_function_declarators_opt
516 Subroutine of declarator, mutually recursive with inner_declarator;
517 do not use elsewhere. */
519 direct_declarator (type_p ty
, const char **namep
, options_p
*optsp
)
521 /* The first token in a direct-declarator must be an ID, a
522 GTY marker, or an open parenthesis. */
526 *optsp
= gtymarker ();
529 *namep
= require (ID
);
534 ty
= inner_declarator (ty
, namep
, optsp
);
539 parse_error ("expected '(', 'GTY', or an identifier, have %s",
541 /* Do _not_ advance if what we have is a close squiggle brace, as
542 we will get much better error recovery that way. */
547 return array_and_function_declarators_opt (ty
);
550 /* The difference between inner_declarator and declarator is in the
551 handling of stars. Consider this declaration:
555 It declares a pointer to a function that takes no arguments and
556 returns a char*. To construct the correct type for this
557 declaration, the star outside the parentheses must be processed
558 _before_ the function type, the star inside the parentheses must
559 be processed _after_ the function type. To accomplish this,
560 declarator() creates pointers before recursing (it is actually
561 coded as a while loop), whereas inner_declarator() recurses before
562 creating pointers. */
568 Mutually recursive subroutine of direct_declarator; do not use
572 inner_declarator (type_p ty
, const char **namep
, options_p
*optsp
)
578 inner
= inner_declarator (ty
, namep
, optsp
);
582 return create_pointer (ty
);
585 return direct_declarator (ty
, namep
, optsp
);
588 /* declarator: '*'+ direct_declarator
590 This is the sole public interface to this part of the grammar.
591 Arguments are the type known so far, a pointer to where the name
592 may be stored, and a pointer to where GTY options may be stored.
593 Returns the final type. */
596 declarator (type_p ty
, const char **namep
, options_p
*optsp
)
600 while (token () == '*')
603 ty
= create_pointer (ty
);
605 return direct_declarator (ty
, namep
, optsp
);
608 /* Types and declarations. */
610 /* Structure field(s) declaration:
613 | type declarator bitfield? ( ',' declarator bitfield? )+ ';'
616 Knows that such declarations must end with a close brace (or,
617 erroneously, at EOF).
620 struct_field_seq (void)
624 options_p opts
, dopts
;
630 ty
= type (&opts
, true);
631 /* Another piece of the IFCVT_EXTRA_FIELDS special case, see type(). */
632 if (!ty
&& token () == '}')
635 if (!ty
|| token () == ':')
637 consume_until_semi (false);
643 dty
= declarator (ty
, &name
, &dopts
);
644 /* There could be any number of weird things after the declarator,
645 notably bitfield declarations and __attribute__s. If this
646 function returns true, the last thing was a comma, so we have
647 more than one declarator paired with the current type. */
648 another
= consume_until_comma_or_semi (false);
654 parse_error ("two GTY(()) options for field %s", name
);
658 f
= create_field_at (f
, dty
, name
, dopts
, &lexer_line
);
662 while (token () != '}' && token () != EOF_TOKEN
);
663 return nreverse_pairs (f
);
666 /* This is called type(), but what it parses (sort of) is what C calls
667 declaration-specifiers and specifier-qualifier-list:
671 | (STRUCT|UNION) ID? gtymarker? ( '{' gtymarker? struct_field_seq '}' )?
672 | ENUM ID ( '{' ... '}' )?
674 Returns a partial type; under some conditions (notably
675 "struct foo GTY((...)) thing;") it may write an options
679 type (options_p
*optsp
, bool nested
)
687 return create_scalar_type (s
);
692 return resolve_typedef (s
, &lexer_line
);
698 /* GTY annotations follow attribute syntax
699 GTY_BEFORE_ID is for union/struct declarations
700 GTY_AFTER_ID is for variable declarations. */
706 bool is_union
= (token () == UNION
);
709 /* Top-level structures that are not explicitly tagged GTY(())
710 are treated as mere forward declarations. This is because
711 there are a lot of structures that we don't need to know
712 about, and some of those have weird macro stuff in them
713 that we can't handle. */
714 if (nested
|| token () == GTY_TOKEN
)
716 is_gty
= GTY_BEFORE_ID
;
717 opts
= gtymarker_opt ();
723 s
= xasprintf ("anonymous:%s:%d", lexer_line
.file
, lexer_line
.line
);
725 /* Unfortunately above GTY_TOKEN check does not capture the
726 typedef struct_type GTY case. */
727 if (token () == GTY_TOKEN
)
729 is_gty
= GTY_AFTER_ID
;
730 opts
= gtymarker_opt ();
739 if (is_gty
== GTY_AFTER_ID
)
740 parse_error ("GTY must be specified before identifier");
743 fields
= struct_field_seq ();
745 return new_structure (s
, is_union
, &lexer_line
, fields
, opts
);
748 else if (token () == '{')
749 consume_balanced ('{', '}');
752 return find_structure (s
, is_union
);
760 s
= xasprintf ("anonymous:%s:%d", lexer_line
.file
, lexer_line
.line
);
763 consume_balanced ('{','}');
764 return create_scalar_type (s
);
767 parse_error ("expected a type specifier, have %s", print_cur_token ());
769 return create_scalar_type ("erroneous type");
773 /* Top level constructs. */
775 /* Dispatch declarations beginning with 'typedef'. */
785 gcc_assert (token () == TYPEDEF
);
788 ty
= type (&opts
, false);
792 parse_error ("GTY((...)) cannot be applied to a typedef");
795 dty
= declarator (ty
, &name
, &opts
);
797 parse_error ("GTY((...)) cannot be applied to a typedef");
799 /* Yet another place where we could have junk (notably attributes)
800 after the declarator. */
801 another
= consume_until_comma_or_semi (false);
803 do_typedef (name
, dty
, &lexer_line
);
808 /* Structure definition: type() does all the work. */
811 struct_or_union (void)
814 type (&dummy
, false);
815 /* There may be junk after the type: notably, we cannot currently
816 distinguish 'struct foo *function(prototype);' from 'struct foo;'
817 ... we could call declarator(), but it's a waste of time at
818 present. Instead, just eat whatever token is currently lookahead
819 and go back to lexical skipping mode. */
823 /* GC root declaration:
824 (extern|static) gtymarker? type ID array_declarators_opt (';'|'=')
825 If the gtymarker is not present, we ignore the rest of the declaration. */
827 extern_or_static (void)
829 options_p opts
, opts2
, dopts
;
832 require2 (EXTERN
, STATIC
);
834 if (token () != GTY_TOKEN
)
841 ty
= type (&opts2
, true); /* if we get here, it's got a GTY(()) */
842 dty
= declarator (ty
, &name
, &dopts
);
844 if ((opts
&& dopts
) || (opts
&& opts2
) || (opts2
&& dopts
))
845 parse_error ("GTY((...)) specified more than once for %s", name
);
853 note_variable (name
, adjust_field_type (dty
, opts
), opts
, &lexer_line
);
858 /* Definition of a generic VEC structure:
860 'DEF_VEC_[IPO]' '(' id ')' ';'
862 Scalar VECs require slightly different treatment than otherwise -
863 that's handled in note_def_vec, we just pass it along.*/
867 bool is_scalar
= (token() == DEFVEC_I
);
870 require2 (DEFVEC_OP
, DEFVEC_I
);
872 type
= require2 (ID
, SCALAR
);
879 note_def_vec (type
, is_scalar
, &lexer_line
);
880 note_def_vec_alloc (type
, "none", &lexer_line
);
883 /* Definition of an allocation strategy for a VEC structure:
885 'DEF_VEC_ALLOC_[IPO]' '(' id ',' id ')' ';'
887 For purposes of gengtype, this just declares a wrapper structure. */
891 const char *type
, *astrat
;
893 require (DEFVEC_ALLOC
);
895 type
= require2 (ID
, SCALAR
);
897 astrat
= require (ID
);
901 if (!type
|| !astrat
)
904 note_def_vec_alloc (type
, astrat
, &lexer_line
);
907 /* Parse the file FNAME for GC-relevant declarations and definitions.
908 This is the only entry point to this file. */
910 parse_file (const char *fname
)
944 parse_error ("unexpected top level token, %s", print_cur_token ());
947 lexer_toplevel_done
= 1;