[PATCH] attributes on bitfields
[smatch.git] / parse.c
blob0b939f427f13a591c23848de5be06fe289eada32
1 /*
2 * Stupid C parser, version 1e-6.
4 * Let's see how hard this is to do.
6 * Copyright (C) 2003 Transmeta Corp.
7 * 2003 Linus Torvalds
9 * Licensed under the Open Software License version 1.1
12 #include <stdarg.h>
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <ctype.h>
17 #include <unistd.h>
18 #include <fcntl.h>
20 #include "lib.h"
21 #include "token.h"
22 #include "parse.h"
23 #include "symbol.h"
24 #include "scope.h"
25 #include "expression.h"
26 #include "target.h"
28 static struct symbol_list **function_symbol_list;
29 struct symbol_list *function_computed_target_list;
30 struct statement_list *function_computed_goto_list;
32 // Add a symbol to the list of function-local symbols
33 #define fn_local_symbol(x) add_symbol(function_symbol_list, (x))
35 static struct token *statement(struct token *token, struct statement **tree);
36 static struct token *external_declaration(struct token *token, struct symbol_list **list);
38 static int match_idents(struct token *token, ...)
40 va_list args;
42 if (token_type(token) != TOKEN_IDENT)
43 return 0;
45 va_start(args, token);
46 for (;;) {
47 struct ident * next = va_arg(args, struct ident *);
48 if (!next)
49 return 0;
50 if (token->ident == next)
51 return 1;
56 struct statement *alloc_statement(struct position pos, int type)
58 struct statement *stmt = __alloc_statement(0);
59 stmt->type = type;
60 stmt->pos = pos;
61 return stmt;
64 static struct token *struct_declaration_list(struct token *token, struct symbol_list **list);
66 static struct symbol * indirect(struct position pos, struct ctype *ctype, int type)
68 struct symbol *sym = alloc_symbol(pos, type);
70 sym->ctype.base_type = ctype->base_type;
71 sym->ctype.modifiers = ctype->modifiers & ~MOD_STORAGE;
73 ctype->base_type = sym;
74 ctype->modifiers &= MOD_STORAGE;
75 return sym;
78 static struct symbol *lookup_or_create_symbol(enum namespace ns, enum type type, struct token *token)
80 struct symbol *sym = lookup_symbol(token->ident, ns);
81 if (!sym) {
82 sym = alloc_symbol(token->pos, type);
83 sym->ident = token->ident;
84 bind_symbol(sym, token->ident, ns);
85 if (type == SYM_LABEL)
86 fn_local_symbol(sym);
88 return sym;
92 * NOTE! NS_LABEL is not just a different namespace,
93 * it also ends up using function scope instead of the
94 * regular symbol scope.
96 struct symbol *label_symbol(struct token *token)
98 return lookup_or_create_symbol(NS_LABEL, SYM_LABEL, token);
101 struct token *struct_union_enum_specifier(enum namespace ns, enum type type,
102 struct token *token, struct ctype *ctype,
103 struct token *(*parse)(struct token *, struct symbol *))
105 struct symbol *sym;
107 ctype->modifiers = 0;
108 if (token_type(token) == TOKEN_IDENT) {
109 sym = lookup_or_create_symbol(ns, type, token);
110 token = token->next;
111 ctype->base_type = sym;
112 if (match_op(token, '{')) {
113 token = parse(token->next, sym);
114 token = expect(token, '}', "at end of struct-union-enum-specifier");
116 return token;
119 // private struct/union/enum type
120 if (!match_op(token, '{')) {
121 warn(token->pos, "expected declaration");
122 ctype->base_type = &bad_type;
123 return token;
126 sym = alloc_symbol(token->pos, type);
127 token = parse(token->next, sym);
128 ctype->base_type = sym;
129 return expect(token, '}', "at end of specifier");
132 static struct token *parse_struct_declaration(struct token *token, struct symbol *sym)
134 return struct_declaration_list(token, &sym->symbol_list);
137 struct token *struct_or_union_specifier(enum type type, struct token *token, struct ctype *ctype)
139 return struct_union_enum_specifier(NS_STRUCT, type, token, ctype, parse_struct_declaration);
142 static struct token *parse_enum_declaration(struct token *token, struct symbol *parent)
144 int nextval = 0;
145 while (token_type(token) == TOKEN_IDENT) {
146 struct token *next = token->next;
147 struct symbol *sym;
149 sym = alloc_symbol(token->pos, SYM_ENUM);
150 bind_symbol(sym, token->ident, NS_SYMBOL);
151 sym->ctype.base_type = parent;
152 parent->ctype.base_type = &int_ctype;
154 if (match_op(next, '=')) {
155 struct expression *expr;
156 next = constant_expression(next->next, &expr);
157 nextval = get_expression_value(expr);
159 sym->value = nextval;
161 token = next;
162 if (!match_op(token, ','))
163 break;
164 token = token->next;
165 nextval = nextval + 1;
167 return token;
170 struct token *enum_specifier(struct token *token, struct ctype *ctype)
172 return struct_union_enum_specifier(NS_ENUM, SYM_ENUM, token, ctype, parse_enum_declaration);
175 struct token *typeof_specifier(struct token *token, struct ctype *ctype)
177 struct symbol *sym;
179 if (!match_op(token, '(')) {
180 warn(token->pos, "expected '(' after typeof");
181 return token;
183 if (lookup_type(token->next)) {
184 token = typename(token->next, &sym);
185 *ctype = sym->ctype;
186 } else {
187 struct symbol *typeof_sym = alloc_symbol(token->pos, SYM_TYPEOF);
188 token = parse_expression(token->next, &typeof_sym->initializer);
190 ctype->modifiers = 0;
191 ctype->base_type = typeof_sym;
193 return expect(token, ')', "after typeof");
196 static const char * handle_attribute(struct ctype *ctype, struct ident *attribute, struct expression *expr)
198 if (match_string_ident(attribute, "packed") ||
199 match_string_ident(attribute, "__packed__")) {
200 ctype->alignment = 1;
201 return NULL;
203 if (match_string_ident(attribute, "aligned") ||
204 match_string_ident(attribute, "__aligned__")) {
205 int alignment = max_alignment;
206 if (expr)
207 alignment = get_expression_value(expr);
208 ctype->alignment = alignment;
209 return NULL;
211 if (match_string_ident(attribute, "nocast")) {
212 ctype->modifiers |= MOD_NOCAST;
213 return NULL;
215 if (match_string_ident(attribute, "noderef")) {
216 ctype->modifiers |= MOD_NODEREF;
217 return NULL;
219 if (match_string_ident(attribute, "safe")) {
220 ctype->modifiers |= MOD_SAFE;
221 return NULL;
223 if (match_string_ident(attribute, "force")) {
224 ctype->modifiers |= MOD_FORCE;
225 return NULL;
227 if (match_string_ident(attribute, "address_space")) {
228 if (!expr)
229 return "expected address space number";
230 ctype->as = get_expression_value(expr);
231 return NULL;
233 if (match_string_ident(attribute, "context")) {
234 if (expr && expr->type == EXPR_COMMA) {
235 int mask = get_expression_value(expr->left);
236 int value = get_expression_value(expr->right);
237 if (value & ~mask)
238 return "nonsense attribute types";
239 ctype->contextmask |= mask;
240 ctype->context |= value;
241 return NULL;
243 return "expected context mask and value";
245 if (match_string_ident(attribute, "mode") ||
246 match_string_ident(attribute, "__mode__")) {
247 if (expr && expr->type == EXPR_SYMBOL) {
248 struct ident *ident = expr->symbol_name;
251 * Match against __QI__/__HI__/__SI__/__DI__
253 * FIXME! This is broken - we don't actually get
254 * the type information updated properly at this
255 * stage for some reason.
257 if (match_string_ident(ident, "__QI__") ||
258 match_string_ident(ident, "QI")) {
259 ctype->modifiers |= MOD_CHAR;
260 ctype->base_type = ctype_integer(ctype->modifiers);
261 return NULL;
263 if (match_string_ident(ident, "__HI__") ||
264 match_string_ident(ident, "HI")) {
265 ctype->modifiers |= MOD_SHORT;
266 ctype->base_type = ctype_integer(ctype->modifiers);
267 return NULL;
269 if (match_string_ident(ident, "__SI__") ||
270 match_string_ident(ident, "SI")) {
271 /* Nothing? */
272 return NULL;
274 if (match_string_ident(ident, "__DI__") ||
275 match_string_ident(ident, "DI")) {
276 ctype->modifiers |= MOD_LONGLONG;
277 ctype->base_type = ctype_integer(ctype->modifiers);
278 return NULL;
280 if (match_string_ident(ident, "__word__") ||
281 match_string_ident(ident, "word")) {
282 ctype->modifiers |= MOD_LONG;
283 ctype->base_type = ctype_integer(ctype->modifiers);
284 return NULL;
286 return "unknown mode attribute";
288 return "expected attribute mode symbol";
291 /* Throw away for now.. */
292 if (match_string_ident(attribute, "format") ||
293 match_string_ident(attribute, "__format__"))
294 return NULL;
295 if (match_string_ident(attribute, "section") ||
296 match_string_ident(attribute, "__section__"))
297 return NULL;
298 if (match_string_ident(attribute, "unused") ||
299 match_string_ident(attribute, "__unused__"))
300 return NULL;
301 if (match_string_ident(attribute, "const") ||
302 match_string_ident(attribute, "__const__"))
303 return NULL;
304 if (match_string_ident(attribute, "noreturn"))
305 return NULL;
306 if (match_string_ident(attribute, "regparm"))
307 return NULL;
308 if (match_string_ident(attribute, "weak"))
309 return NULL;
310 if (match_string_ident(attribute, "alias"))
311 return NULL;
312 if (match_string_ident(attribute, "pure"))
313 return NULL;
314 if (match_string_ident(attribute, "always_inline"))
315 return NULL;
316 if (match_string_ident(attribute, "syscall_linkage"))
317 return NULL;
318 if (match_string_ident(attribute, "visibility"))
319 return NULL;
321 return "unknown attribute";
324 struct token *attribute_specifier(struct token *token, struct ctype *ctype)
326 ctype->modifiers = 0;
327 token = expect(token, '(', "after attribute");
328 token = expect(token, '(', "after attribute");
330 for (;;) {
331 const char *error;
332 struct ident *attribute_name;
333 struct expression *attribute_expr;
335 if (eof_token(token))
336 break;
337 if (match_op(token, ';'))
338 break;
339 if (token_type(token) != TOKEN_IDENT)
340 break;
341 attribute_name = token->ident;
342 token = token->next;
343 attribute_expr = NULL;
344 if (match_op(token, '('))
345 token = parens_expression(token, &attribute_expr, "in attribute");
346 error = handle_attribute(ctype, attribute_name, attribute_expr);
347 if (error)
348 warn(token->pos, "attribute '%s': %s", show_ident(attribute_name), error);
349 if (!match_op(token, ','))
350 break;
351 token = token->next;
354 token = expect(token, ')', "after attribute");
355 token = expect(token, ')', "after attribute");
356 return token;
359 #define MOD_SPECIALBITS (MOD_STRUCTOF | MOD_UNIONOF | MOD_ENUMOF | MOD_ATTRIBUTE | MOD_TYPEOF)
360 #define MOD_SPECIFIER (MOD_CHAR | MOD_SHORT | MOD_LONG | MOD_LONGLONG | MOD_SIGNED | MOD_UNSIGNED)
362 struct symbol * ctype_integer(unsigned int spec)
364 static struct symbol *const integer_ctypes[][2] = {
365 { &llong_ctype, &ullong_ctype },
366 { &long_ctype, &ulong_ctype },
367 { &short_ctype, &ushort_ctype },
368 { &char_ctype, &uchar_ctype },
369 { &int_ctype, &uint_ctype },
371 struct symbol *const (*ctype)[2];
373 ctype = integer_ctypes;
374 if (!(spec & MOD_LONGLONG)) {
375 ctype++;
376 if (!(spec & MOD_LONG)) {
377 ctype++;
378 if (!(spec & MOD_SHORT)) {
379 ctype++;
380 if (!(spec & MOD_CHAR))
381 ctype++;
385 return ctype[0][(spec & MOD_UNSIGNED) != 0];
388 struct symbol * ctype_fp(unsigned int spec)
390 if (spec & MOD_LONGLONG)
391 return &ldouble_ctype;
392 if (spec & MOD_LONG)
393 return &double_ctype;
394 return &float_ctype;
397 static void apply_ctype(struct position pos, struct ctype *thistype, struct ctype *ctype)
399 unsigned long mod = thistype->modifiers;
401 if (mod) {
402 unsigned long old = ctype->modifiers;
403 unsigned long extra = 0, dup, conflict;
405 if (mod & old & MOD_LONG) {
406 extra = MOD_LONGLONG | MOD_LONG;
407 mod &= ~MOD_LONG;
408 old &= ~MOD_LONG;
410 dup = (mod & old) | (extra & old) | (extra & mod);
411 if (dup)
412 warn(pos, "Just how %sdo you want this type to be?",
413 modifier_string(dup));
415 conflict = !(~mod & ~old & (MOD_LONG | MOD_SHORT));
416 if (conflict)
417 warn(pos, "You cannot have both long and short modifiers.");
419 conflict = !(~mod & ~old & (MOD_SIGNED | MOD_UNSIGNED));
420 if (conflict)
421 warn(pos, "You cannot have both signed and unsigned modifiers.");
423 ctype->modifiers = old | mod | extra;
426 /* Context mask and value */
427 if ((ctype->context ^ thistype->context) & (ctype->contextmask & thistype->contextmask)) {
428 warn(pos, "inconsistend attribute types");
429 thistype->context = 0;
430 thistype->contextmask = 0;
432 ctype->context |= thistype->context;
433 ctype->contextmask |= thistype->contextmask;
435 /* Alignment */
436 if (thistype->alignment & (thistype->alignment-1)) {
437 warn(pos, "I don't like non-power-of-2 alignments");
438 thistype->alignment = 0;
440 if (thistype->alignment > ctype->alignment)
441 ctype->alignment = thistype->alignment;
443 /* Address space */
444 ctype->as = thistype->as;
448 static struct token *declaration_specifiers(struct token *next, struct ctype *ctype, int qual)
450 struct token *token;
452 while ( (token = next) != NULL ) {
453 struct ctype thistype;
454 struct ident *ident;
455 struct symbol *s, *type;
456 unsigned long mod;
458 next = token->next;
459 if (token_type(token) != TOKEN_IDENT)
460 break;
461 ident = token->ident;
463 s = lookup_symbol(ident, NS_TYPEDEF);
464 if (!s)
465 break;
466 thistype = s->ctype;
467 mod = thistype.modifiers;
468 if (qual && (mod & ~(MOD_ATTRIBUTE | MOD_CONST | MOD_VOLATILE)))
469 break;
470 if (mod & MOD_SPECIALBITS) {
471 if (mod & MOD_STRUCTOF)
472 next = struct_or_union_specifier(SYM_STRUCT, next, &thistype);
473 else if (mod & MOD_UNIONOF)
474 next = struct_or_union_specifier(SYM_UNION, next, &thistype);
475 else if (mod & MOD_ENUMOF)
476 next = enum_specifier(next, &thistype);
477 else if (mod & MOD_ATTRIBUTE)
478 next = attribute_specifier(next, &thistype);
479 else if (mod & MOD_TYPEOF)
480 next = typeof_specifier(next, &thistype);
481 mod = thistype.modifiers;
483 type = thistype.base_type;
484 if (type) {
485 if (qual)
486 break;
487 if (ctype->base_type)
488 break;
489 /* User types only mix with qualifiers */
490 if (mod & MOD_USERTYPE) {
491 if (ctype->modifiers & MOD_SPECIFIER)
492 break;
494 ctype->base_type = type;
497 apply_ctype(token->pos, &thistype, ctype);
500 /* Turn the "virtual types" into real types with real sizes etc */
501 if (!ctype->base_type) {
502 struct symbol *base = &incomplete_ctype;
505 * If we have modifiers, we'll default to an integer
506 * type, and "ctype_integer()" will turn this into
507 * a specific one.
509 if (ctype->modifiers & MOD_SPECIFIER)
510 base = &int_type;
511 ctype->base_type = base;
514 if (ctype->base_type == &int_type) {
515 ctype->base_type = ctype_integer(ctype->modifiers & MOD_SPECIFIER);
516 ctype->modifiers &= ~MOD_SPECIFIER;
517 return token;
519 if (ctype->base_type == &fp_type) {
520 ctype->base_type = ctype_fp(ctype->modifiers & MOD_SPECIFIER);
521 ctype->modifiers &= ~MOD_SPECIFIER;
522 return token;
524 return token;
527 static struct token *abstract_array_declarator(struct token *token, struct symbol *sym)
529 struct expression *expr = NULL;
531 token = parse_expression(token, &expr);
532 sym->array_size = expr;
533 return token;
536 static struct token *parameter_type_list(struct token *, struct symbol *);
537 static struct token *declarator(struct token *token, struct symbol **tree, struct ident **p);
539 static struct token *direct_declarator(struct token *token, struct symbol **tree, struct ident **p)
541 struct ctype *ctype = &(*tree)->ctype;
543 if (p && token_type(token) == TOKEN_IDENT) {
544 *p = token->ident;
545 token = token->next;
548 for (;;) {
549 if (match_idents(token, &__attribute___ident, &__attribute_ident, NULL)) {
550 struct ctype thistype = { 0, };
551 token = attribute_specifier(token->next, &thistype);
552 apply_ctype(token->pos, &thistype, ctype);
553 continue;
555 if (token_type(token) != TOKEN_SPECIAL)
556 return token;
559 * This can be either a parameter list or a grouping.
560 * For the direct (non-abstract) case, we know if must be
561 * a paramter list if we already saw the identifier.
562 * For the abstract case, we know if must be a parameter
563 * list if it is empty or starts with a type.
565 if (token->special == '(') {
566 struct symbol *sym;
567 struct token *next = token->next;
568 int fn = (p && *p) || match_op(next, ')') || lookup_type(next);
570 if (!fn) {
571 struct symbol *base_type = ctype->base_type;
572 token = declarator(next, tree, p);
573 token = expect(token, ')', "in nested declarator");
574 while (ctype->base_type != base_type)
575 ctype = &ctype->base_type->ctype;
576 p = NULL;
577 continue;
580 sym = indirect(token->pos, ctype, SYM_FN);
581 token = parameter_type_list(next, sym);
582 token = expect(token, ')', "in function declarator");
583 continue;
585 if (token->special == '[') {
586 struct symbol *array = indirect(token->pos, ctype, SYM_ARRAY);
587 token = abstract_array_declarator(token->next, array);
588 token = expect(token, ']', "in abstract_array_declarator");
589 ctype = &array->ctype;
590 continue;
592 if (token->special == ':') {
593 if (is_int_type (ctype->base_type)) {
594 struct symbol *bitfield = indirect(token->pos, ctype, SYM_BITFIELD);
595 struct expression *expr;
596 token = conditional_expression(token->next, &expr);
597 bitfield->fieldwidth = get_expression_value(expr);
598 } else
599 error(token->pos, "Invalid bitfield specifier for type %s.", show_typename (ctype->base_type));
600 while (match_idents(token, &__attribute___ident, &__attribute_ident, NULL)) {
601 struct ctype thistype = { 0, };
602 token = attribute_specifier(token->next, &thistype);
603 apply_ctype(token->pos, &thistype, ctype);
605 break;
607 break;
609 if (p) {
610 (*tree)->ident = *p;
612 return token;
615 static struct token *pointer(struct token *token, struct ctype *ctype)
617 unsigned long modifiers;
618 struct symbol *base_type;
620 modifiers = ctype->modifiers & ~(MOD_TYPEDEF | MOD_ATTRIBUTE);
621 base_type = ctype->base_type;
622 ctype->modifiers = modifiers;
624 while (match_op(token,'*')) {
625 struct symbol *ptr = alloc_symbol(token->pos, SYM_PTR);
626 ptr->ctype.modifiers = modifiers & ~MOD_STORAGE;
627 ptr->ctype.as = ctype->as;
628 ptr->ctype.context = ctype->context;
629 ptr->ctype.contextmask = ctype->contextmask;
630 ptr->ctype.base_type = base_type;
632 base_type = ptr;
633 ctype->modifiers = modifiers & MOD_STORAGE;
634 ctype->base_type = base_type;
635 ctype->as = 0;
636 ctype->context = 0;
637 ctype->contextmask = 0;
639 token = declaration_specifiers(token->next, ctype, 1);
640 modifiers = ctype->modifiers;
642 return token;
645 static struct token *declarator(struct token *token, struct symbol **tree, struct ident **p)
647 token = pointer(token, &(*tree)->ctype);
648 return direct_declarator(token, tree, p);
651 static struct token *struct_declaration_list(struct token *token, struct symbol_list **list)
653 while (!match_op(token, '}')) {
654 struct ctype ctype = {0, };
656 token = declaration_specifiers(token, &ctype, 0);
657 for (;;) {
658 struct ident *ident = NULL;
659 struct symbol *decl = alloc_symbol(token->pos, SYM_NODE);
660 decl->ctype = ctype;
661 token = declarator(token, &decl, &ident);
662 if (match_op(token, ':')) {
663 struct expression *expr;
664 token = parse_expression(token->next, &expr);
666 add_symbol(list, decl);
667 if (!match_op(token, ','))
668 break;
669 token = token->next;
671 if (!match_op(token, ';')) {
672 warn(token->pos, "expected ; at end of declaration");
673 break;
675 token = token->next;
677 return token;
680 static struct token *parameter_declaration(struct token *token, struct symbol **tree)
682 struct ident *ident = NULL;
683 struct symbol *sym;
684 struct ctype ctype = { 0, };
686 token = declaration_specifiers(token, &ctype, 0);
687 sym = alloc_symbol(token->pos, SYM_NODE);
688 sym->ctype = ctype;
689 *tree = sym;
690 token = declarator(token, tree, &ident);
691 return token;
694 struct token *typename(struct token *token, struct symbol **p)
696 struct symbol *sym = alloc_symbol(token->pos, SYM_NODE);
697 *p = sym;
698 token = declaration_specifiers(token, &sym->ctype, 0);
699 return declarator(token, &sym, NULL);
702 struct token *expression_statement(struct token *token, struct expression **tree)
704 token = parse_expression(token, tree);
705 return expect(token, ';', "at end of statement");
708 static struct token *parse_asm_operands(struct token *token, struct statement *stmt)
710 struct expression *expr;
712 /* Allow empty operands */
713 if (match_op(token->next, ':') || match_op(token->next, ')'))
714 return token->next;
715 do {
716 if (match_op(token->next, '[') &&
717 token_type(token->next->next) == TOKEN_IDENT &&
718 match_op(token->next->next->next, ']'))
719 token = token->next->next->next;
720 token = primary_expression(token->next, &expr);
721 token = parens_expression(token, &expr, "in asm parameter");
722 } while (match_op(token, ','));
723 return token;
726 static struct token *parse_asm_clobbers(struct token *token, struct statement *stmt)
728 struct expression *expr;
730 do {
731 token = primary_expression(token->next, &expr);
732 } while (match_op(token, ','));
733 return token;
736 static struct token *parse_asm(struct token *token, struct statement *stmt)
738 struct expression *expr;
740 stmt->type = STMT_ASM;
741 if (match_idents(token, &__volatile___ident, &volatile_ident)) {
742 token = token->next;
744 token = expect(token, '(', "after asm");
745 token = parse_expression(token->next, &expr);
746 if (match_op(token, ':'))
747 token = parse_asm_operands(token, stmt);
748 if (match_op(token, ':'))
749 token = parse_asm_operands(token, stmt);
750 if (match_op(token, ':'))
751 token = parse_asm_clobbers(token, stmt);
752 token = expect(token, ')', "after asm");
753 return expect(token, ';', "at end of asm-statement");
756 /* Make a statement out of an expression */
757 static struct statement *make_statement(struct expression *expr)
759 struct statement *stmt;
761 if (!expr)
762 return NULL;
763 stmt = alloc_statement(expr->pos, STMT_EXPRESSION);
764 stmt->expression = expr;
765 return stmt;
769 * All iterators have two symbols associated with them:
770 * the "continue" and "break" symbols, which are targets
771 * for continue and break statements respectively.
773 * They are in a special name-space, but they follow
774 * all the normal visibility rules, so nested iterators
775 * automatically work right.
777 static void start_iterator(struct statement *stmt)
779 struct symbol *cont, *brk;
781 start_symbol_scope();
782 cont = alloc_symbol(stmt->pos, SYM_NODE);
783 cont->ident = &continue_ident;
784 bind_symbol(cont, &continue_ident, NS_ITERATOR);
785 brk = alloc_symbol(stmt->pos, SYM_NODE);
786 brk->ident = &break_ident;
787 bind_symbol(brk, &break_ident, NS_ITERATOR);
789 stmt->type = STMT_ITERATOR;
790 stmt->iterator_break = brk;
791 stmt->iterator_continue = cont;
792 fn_local_symbol(brk);
793 fn_local_symbol(cont);
796 static void end_iterator(struct statement *stmt)
798 end_symbol_scope();
801 static struct statement *start_function(struct symbol *sym)
803 struct symbol *ret;
804 struct statement *stmt = alloc_statement(sym->pos, STMT_COMPOUND);
806 start_function_scope();
807 ret = alloc_symbol(sym->pos, SYM_NODE);
808 ret->ident = &return_ident;
809 ret->ctype = sym->ctype.base_type->ctype;
810 ret->ctype.modifiers &= ~(MOD_STORAGE | MOD_CONST | MOD_VOLATILE | MOD_INLINE | MOD_ADDRESSABLE | MOD_NOCAST | MOD_NODEREF | MOD_ACCESSED | MOD_TOPLEVEL);
811 ret->ctype.modifiers |= (MOD_AUTO | MOD_REGISTER);
812 bind_symbol(ret, &return_ident, NS_ITERATOR);
813 stmt->ret = ret;
815 fn_local_symbol(ret);
816 return stmt;
819 static void end_function(struct symbol *sym)
821 end_function_scope();
825 * A "switch()" statement, like an iterator, has a
826 * the "break" symbol associated with it. It works
827 * exactly like the iterator break - it's the target
828 * for any break-statements in scope, and means that
829 * "break" handling doesn't even need to know whether
830 * it's breaking out of an iterator or a switch.
832 * In addition, the "case" symbol is a marker for the
833 * case/default statements to find the switch statement
834 * that they are associated with.
836 static void start_switch(struct statement *stmt)
838 struct symbol *brk, *switch_case;
840 start_symbol_scope();
841 brk = alloc_symbol(stmt->pos, SYM_NODE);
842 brk->ident = &break_ident;
843 bind_symbol(brk, &break_ident, NS_ITERATOR);
845 switch_case = alloc_symbol(stmt->pos, SYM_NODE);
846 switch_case->ident = &case_ident;
847 bind_symbol(switch_case, &case_ident, NS_ITERATOR);
848 switch_case->stmt = stmt;
850 stmt->type = STMT_SWITCH;
851 stmt->switch_break = brk;
852 stmt->switch_case = switch_case;
854 fn_local_symbol(brk);
855 fn_local_symbol(switch_case);
858 static void end_switch(struct statement *stmt)
860 if (!stmt->switch_case->symbol_list)
861 warn(stmt->pos, "switch with no cases");
862 end_symbol_scope();
865 static void add_case_statement(struct statement *stmt)
867 struct symbol *target = lookup_symbol(&case_ident, NS_ITERATOR);
868 struct symbol *sym;
870 if (!target) {
871 warn(stmt->pos, "not in switch scope");
872 return;
874 sym = alloc_symbol(stmt->pos, SYM_NODE);
875 add_symbol(&target->symbol_list, sym);
876 sym->stmt = stmt;
877 stmt->case_label = sym;
878 fn_local_symbol(sym);
881 static struct token *parse_return_statement(struct token *token, struct statement *stmt)
883 struct symbol *target = lookup_symbol(&return_ident, NS_ITERATOR);
885 if (!target)
886 error(token->pos, "internal error: return without a function target");
887 stmt->type = STMT_RETURN;
888 stmt->ret_target = target;
889 return expression_statement(token->next, &stmt->ret_value);
892 static struct token *parse_for_statement(struct token *token, struct statement *stmt)
894 struct symbol_list *syms;
895 struct expression *e1, *e2, *e3;
896 struct statement *iterator;
898 start_iterator(stmt);
899 token = expect(token->next, '(', "after 'for'");
901 syms = NULL;
902 e1 = NULL;
903 /* C99 variable declaration? */
904 if (lookup_type(token)) {
905 token = external_declaration(token, &syms);
906 } else {
907 token = parse_expression(token, &e1);
908 token = expect(token, ';', "in 'for'");
910 token = parse_expression(token, &e2);
911 token = expect(token, ';', "in 'for'");
912 token = parse_expression(token, &e3);
913 token = expect(token, ')', "in 'for'");
914 token = statement(token, &iterator);
916 stmt->iterator_syms = syms;
917 stmt->iterator_pre_statement = make_statement(e1);
918 stmt->iterator_pre_condition = e2;
919 stmt->iterator_post_statement = make_statement(e3);
920 stmt->iterator_post_condition = e2;
921 stmt->iterator_statement = iterator;
922 end_iterator(stmt);
924 return token;
927 struct token *parse_while_statement(struct token *token, struct statement *stmt)
929 struct expression *expr;
930 struct statement *iterator;
932 start_iterator(stmt);
933 token = parens_expression(token->next, &expr, "after 'while'");
934 token = statement(token, &iterator);
936 stmt->iterator_pre_condition = expr;
937 stmt->iterator_post_condition = expr;
938 stmt->iterator_statement = iterator;
939 end_iterator(stmt);
941 return token;
944 struct token *parse_do_statement(struct token *token, struct statement *stmt)
946 struct expression *expr;
947 struct statement *iterator;
949 start_iterator(stmt);
950 token = statement(token->next, &iterator);
951 if (token_type(token) == TOKEN_IDENT && token->ident == &while_ident)
952 token = token->next;
953 else
954 warn(token->pos, "expected 'while' after 'do'");
955 token = parens_expression(token, &expr, "after 'do-while'");
957 stmt->iterator_post_condition = expr;
958 stmt->iterator_statement = iterator;
959 end_iterator(stmt);
961 return expect(token, ';', "after statement");
964 static struct token *statement(struct token *token, struct statement **tree)
966 struct statement *stmt = alloc_statement(token->pos, STMT_NONE);
968 *tree = stmt;
969 if (token_type(token) == TOKEN_IDENT) {
970 if (token->ident == &if_ident) {
971 stmt->type = STMT_IF;
972 token = parens_expression(token->next, &stmt->if_conditional, "after if");
973 token = statement(token, &stmt->if_true);
974 if (token_type(token) != TOKEN_IDENT)
975 return token;
976 if (token->ident != &else_ident)
977 return token;
978 return statement(token->next, &stmt->if_false);
981 if (token->ident == &return_ident)
982 return parse_return_statement(token, stmt);
984 if (token->ident == &break_ident || token->ident == &continue_ident) {
985 struct symbol *target = lookup_symbol(token->ident, NS_ITERATOR);
986 stmt->type = STMT_GOTO;
987 stmt->goto_label = target;
988 if (!target)
989 warn(stmt->pos, "break/continue not in iterator scope");
990 return expect(token->next, ';', "at end of statement");
992 if (token->ident == &default_ident) {
993 token = token->next;
994 goto default_statement;
996 if (token->ident == &case_ident) {
997 token = parse_expression(token->next, &stmt->case_expression);
998 if (match_op(token, SPECIAL_ELLIPSIS))
999 token = parse_expression(token->next, &stmt->case_to);
1000 default_statement:
1001 stmt->type = STMT_CASE;
1002 token = expect(token, ':', "after default/case");
1003 add_case_statement(stmt);
1004 return statement(token, &stmt->case_statement);
1006 if (token->ident == &switch_ident) {
1007 stmt->type = STMT_SWITCH;
1008 start_switch(stmt);
1009 token = parens_expression(token->next, &stmt->switch_expression, "after 'switch'");
1010 token = statement(token, &stmt->switch_statement);
1011 end_switch(stmt);
1012 return token;
1014 if (token->ident == &for_ident)
1015 return parse_for_statement(token, stmt);
1017 if (token->ident == &while_ident)
1018 return parse_while_statement(token, stmt);
1020 if (token->ident == &do_ident)
1021 return parse_do_statement(token, stmt);
1023 if (token->ident == &goto_ident) {
1024 stmt->type = STMT_GOTO;
1025 token = token->next;
1026 if (match_op(token, '*')) {
1027 token = parse_expression(token->next, &stmt->goto_expression);
1028 add_statement(&function_computed_goto_list, stmt);
1029 } else if (token_type(token) == TOKEN_IDENT) {
1030 stmt->goto_label = label_symbol(token);
1031 token = token->next;
1032 } else {
1033 warn(token->pos, "Expected identifier or goto expression");
1035 return expect(token, ';', "at end of statement");
1037 if (match_idents(token, &asm_ident, &__asm___ident, &__asm_ident, NULL)) {
1038 return parse_asm(token->next, stmt);
1040 if (match_op(token->next, ':')) {
1041 stmt->type = STMT_LABEL;
1042 stmt->label_identifier = label_symbol(token);
1043 return statement(token->next->next, &stmt->label_statement);
1047 if (match_op(token, '{')) {
1048 stmt->type = STMT_COMPOUND;
1049 start_symbol_scope();
1050 token = compound_statement(token->next, stmt);
1051 end_symbol_scope();
1053 return expect(token, '}', "at end of compound statement");
1056 stmt->type = STMT_EXPRESSION;
1057 return expression_statement(token, &stmt->expression);
1060 struct token * statement_list(struct token *token, struct statement_list **list)
1062 for (;;) {
1063 struct statement * stmt;
1064 if (eof_token(token))
1065 break;
1066 if (match_op(token, '}'))
1067 break;
1068 token = statement(token, &stmt);
1069 add_statement(list, stmt);
1071 return token;
1074 static struct token *parameter_type_list(struct token *token, struct symbol *fn)
1076 struct symbol_list **list = &fn->arguments;
1077 for (;;) {
1078 struct symbol *sym = alloc_symbol(token->pos, SYM_NODE);
1080 if (match_op(token, SPECIAL_ELLIPSIS)) {
1081 fn->variadic = 1;
1082 token = token->next;
1083 break;
1086 if (!lookup_type(token)) {
1087 warn(token->pos, "non-ANSI parameter list");
1088 break;
1090 token = parameter_declaration(token, &sym);
1091 /* Special case: (void) */
1092 if (!*list && !sym->ident && sym->ctype.base_type == &void_ctype)
1093 break;
1094 add_symbol(list, sym);
1095 if (!match_op(token, ','))
1096 break;
1097 token = token->next;
1100 return token;
1103 struct token *compound_statement(struct token *token, struct statement *stmt)
1105 while (!eof_token(token)) {
1106 if (!lookup_type(token))
1107 break;
1108 token = external_declaration(token, &stmt->syms);
1110 token = statement_list(token, &stmt->stmts);
1111 return token;
1114 static struct expression *identifier_expression(struct token *token)
1116 struct expression *expr = alloc_expression(token->pos, EXPR_IDENTIFIER);
1117 expr->expr_ident = token->ident;
1118 return expr;
1121 static struct expression *index_expression(struct expression *from, struct expression *to)
1123 int idx_from, idx_to;
1124 struct expression *expr = alloc_expression(from->pos, EXPR_INDEX);
1126 idx_from = get_expression_value(from);
1127 idx_to = idx_from;
1128 if (to) {
1129 idx_to = get_expression_value(to);
1130 if (idx_to < idx_from || idx_from < 0)
1131 warn(from->pos, "nonsense array initializer index range");
1133 expr->idx_from = idx_from;
1134 expr->idx_to = idx_to;
1135 return expr;
1138 static struct token *initializer_list(struct expression_list **list, struct token *token)
1140 for (;;) {
1141 struct token *next = token->next;
1142 struct expression *expr;
1144 if (match_op(token, '.') && (token_type(next) == TOKEN_IDENT) && match_op(next->next, '=')) {
1145 add_expression(list, identifier_expression(next));
1146 token = next->next->next;
1147 } else if ((token_type(token) == TOKEN_IDENT) && match_op(next, ':')) {
1148 add_expression(list, identifier_expression(token));
1149 token = next->next;
1150 } else if (match_op(token, '[')) {
1151 struct expression *from = NULL, *to = NULL;
1152 token = constant_expression(token->next, &from);
1153 if (match_op(token, SPECIAL_ELLIPSIS))
1154 token = constant_expression(token->next, &to);
1155 add_expression(list, index_expression(from, to));
1156 token = expect(token, ']', "at end of initializer index");
1157 token = expect(token, '=', "at end of initializer index");
1160 expr = NULL;
1161 token = initializer(&expr, token);
1162 if (!expr)
1163 break;
1164 add_expression(list, expr);
1165 if (!match_op(token, ','))
1166 break;
1167 token = token->next;
1169 return token;
1172 struct token *initializer(struct expression **tree, struct token *token)
1174 if (match_op(token, '{')) {
1175 struct expression *expr = alloc_expression(token->pos, EXPR_INITIALIZER);
1176 *tree = expr;
1177 token = initializer_list(&expr->expr_list, token->next);
1178 return expect(token, '}', "at end of initializer");
1180 return assignment_expression(token, tree);
1183 static void declare_argument(struct symbol *sym, struct symbol *fn)
1185 if (!sym->ident) {
1186 warn(sym->pos, "no identifier for function argument");
1187 return;
1189 bind_symbol(sym, sym->ident, NS_SYMBOL);
1192 static struct token *parse_function_body(struct token *token, struct symbol *decl,
1193 struct symbol_list **list)
1195 struct symbol *base_type = decl->ctype.base_type;
1196 struct statement *stmt;
1197 struct symbol *arg;
1199 function_symbol_list = &decl->symbol_list;
1200 function_computed_target_list = NULL;
1201 function_computed_goto_list = NULL;
1203 if (decl->ctype.modifiers & MOD_EXTERN) {
1204 if (!(decl->ctype.modifiers & MOD_INLINE))
1205 warn(decl->pos, "function with external linkage has definition");
1207 if (!(decl->ctype.modifiers & MOD_STATIC))
1208 decl->ctype.modifiers |= MOD_EXTERN;
1210 stmt = start_function(decl);
1212 base_type->stmt = stmt;
1213 FOR_EACH_PTR (base_type->arguments, arg) {
1214 declare_argument(arg, base_type);
1215 } END_FOR_EACH_PTR;
1217 token = compound_statement(token->next, stmt);
1219 end_function(decl);
1220 if (!(decl->ctype.modifiers & MOD_INLINE))
1221 add_symbol(list, decl);
1222 check_declaration(decl);
1223 function_symbol_list = NULL;
1224 if (function_computed_goto_list) {
1225 if (!function_computed_target_list)
1226 warn(decl->pos, "function has computed goto but no targets?");
1227 else {
1228 struct statement *stmt;
1229 FOR_EACH_PTR(function_computed_goto_list, stmt) {
1230 stmt->target_list = function_computed_target_list;
1231 } END_FOR_EACH_PTR;
1234 return expect(token, '}', "at end of function");
1237 static struct token *external_declaration(struct token *token, struct symbol_list **list)
1239 struct ident *ident = NULL;
1240 struct symbol *decl;
1241 struct ctype ctype = { 0, };
1242 struct symbol *base_type;
1243 int is_typedef;
1245 /* Top-level inline asm? */
1246 if (match_idents(token, &asm_ident, &__asm___ident, &__asm_ident)) {
1247 struct symbol *anon = alloc_symbol(token->pos, SYM_NODE);
1248 struct symbol *fn = alloc_symbol(token->pos, SYM_FN);
1249 struct statement *stmt;
1251 anon->ctype.base_type = fn;
1252 function_symbol_list = &anon->symbol_list;
1253 stmt = start_function(anon);
1254 token = parse_asm(token->next, stmt);
1255 end_function(anon);
1256 function_symbol_list = NULL;
1257 add_symbol(list, anon);
1258 return token;
1261 /* Parse declaration-specifiers, if any */
1262 token = declaration_specifiers(token, &ctype, 0);
1263 decl = alloc_symbol(token->pos, SYM_NODE);
1264 decl->ctype = ctype;
1265 token = declarator(token, &decl, &ident);
1267 /* Just a type declaration? */
1268 if (!ident)
1269 return expect(token, ';', "end of type declaration");
1271 decl->ident = ident;
1273 /* type define declaration? */
1274 is_typedef = (ctype.modifiers & MOD_TYPEDEF) != 0;
1276 /* Typedef's don't have meaningful storage */
1277 if (is_typedef) {
1278 ctype.modifiers &= ~MOD_STORAGE;
1279 decl->ctype.modifiers &= ~MOD_STORAGE;
1280 decl->ctype.modifiers |= MOD_USERTYPE;
1283 bind_symbol(decl, ident, is_typedef ? NS_TYPEDEF: NS_SYMBOL);
1285 base_type = decl->ctype.base_type;
1286 if (!is_typedef && base_type && base_type->type == SYM_FN) {
1287 if (match_op(token, '{'))
1288 return parse_function_body(token, decl, list);
1290 if (!(decl->ctype.modifiers & MOD_STATIC))
1291 decl->ctype.modifiers |= MOD_EXTERN;
1294 for (;;) {
1295 if (token_type(token) == TOKEN_IDENT) {
1296 if (token->ident == &asm_ident || token->ident == &__asm_ident || token->ident == &__asm___ident) {
1297 struct expression *expr;
1299 token = expect(token->next, '(', "after asm");
1300 token = parse_expression(token->next, &expr);
1301 token = expect(token, ')', "after asm");
1304 if (!is_typedef && match_op(token, '=')) {
1305 if (decl->ctype.modifiers & MOD_EXTERN) {
1306 warn(decl->pos, "symbol with external linkage has initializer");
1307 decl->ctype.modifiers &= ~MOD_EXTERN;
1309 token = initializer(&decl->initializer, token->next);
1311 if (!is_typedef) {
1312 if (!(decl->ctype.modifiers & (MOD_EXTERN | MOD_INLINE))) {
1313 add_symbol(list, decl);
1314 if (function_symbol_list)
1315 fn_local_symbol(decl);
1318 check_declaration(decl);
1320 if (!match_op(token, ','))
1321 break;
1323 token = token->next;
1324 ident = NULL;
1325 decl = alloc_symbol(token->pos, SYM_NODE);
1326 decl->ctype = ctype;
1327 token = declaration_specifiers(token, &decl->ctype, 1);
1328 token = declarator(token, &decl, &ident);
1329 if (!ident) {
1330 warn(token->pos, "expected identifier name in type definition");
1331 return token;
1334 bind_symbol(decl, ident, is_typedef ? NS_TYPEDEF: NS_SYMBOL);
1336 /* Function declarations are automatically extern unless specifically static */
1337 base_type = decl->ctype.base_type;
1338 if (!is_typedef && base_type && base_type->type == SYM_FN) {
1339 if (!(decl->ctype.modifiers & MOD_STATIC))
1340 decl->ctype.modifiers |= MOD_EXTERN;
1343 return expect(token, ';', "at end of declaration");
1346 void translation_unit(struct token *token, struct symbol_list **list)
1348 while (!eof_token(token))
1349 token = external_declaration(token, list);
1350 // They aren't needed any more
1351 clear_token_alloc();