2 * Stupid C parser, version 1e-6.
4 * Let's see how hard this is to do.
6 * Copyright (C) 2003 Linus Torvalds, all rights reserved.
22 #include "expression.h"
24 struct statement
*alloc_statement(struct token
* token
, int type
)
26 struct statement
*stmt
= __alloc_statement(0);
31 static struct token
*struct_declaration_list(struct token
*token
, struct symbol_list
**list
);
33 static struct symbol
* indirect(struct token
*token
, struct symbol
*base
, int type
)
35 struct symbol
*sym
= alloc_symbol(token
, type
);
37 sym
->ctype
.base_type
= base
->ctype
.base_type
;
38 sym
->ctype
.modifiers
= base
->ctype
.modifiers
& ~MOD_STORAGE
;
40 base
->ctype
.base_type
= sym
;
41 base
->ctype
.modifiers
&= MOD_STORAGE
;
45 static struct symbol
*lookup_or_create_symbol(enum namespace ns
, enum type type
, struct token
*token
)
47 struct symbol
*sym
= lookup_symbol(token
->ident
, ns
);
49 sym
= alloc_symbol(token
, type
);
50 bind_symbol(sym
, token
->ident
, ns
);
55 struct token
*struct_union_enum_specifier(enum namespace ns
, enum type type
,
56 struct token
*token
, struct ctype
*ctype
,
57 struct token
*(*parse
)(struct token
*, struct symbol
*))
62 if (token
->type
== TOKEN_IDENT
) {
63 sym
= lookup_or_create_symbol(ns
, type
, token
);
65 ctype
->base_type
= sym
;
66 if (match_op(token
, '{')) {
67 token
= parse(token
->next
, sym
);
68 token
= expect(token
, '}', "at end of struct-union-enum-specifier");
73 // private struct/union/enum type
74 if (!match_op(token
, '{')) {
75 warn(token
, "expected declaration");
76 ctype
->base_type
= &bad_type
;
80 sym
= alloc_symbol(token
, type
);
81 token
= parse(token
->next
, sym
);
82 ctype
->base_type
= sym
;
83 return expect(token
, '}', "at end of specifier");
86 static struct token
*parse_struct_declaration(struct token
*token
, struct symbol
*sym
)
88 return struct_declaration_list(token
, &sym
->symbol_list
);
91 struct token
*struct_or_union_specifier(enum type type
, struct token
*token
, struct ctype
*ctype
)
93 return struct_union_enum_specifier(NS_STRUCT
, type
, token
, ctype
, parse_struct_declaration
);
96 static struct token
*parse_enum_declaration(struct token
*token
, struct symbol
*parent
)
99 while (token
->type
== TOKEN_IDENT
) {
100 struct token
*next
= token
->next
;
103 sym
= alloc_symbol(token
, SYM_NODE
);
104 bind_symbol(sym
, token
->ident
, NS_SYMBOL
);
105 sym
->ctype
.base_type
= parent
;
107 if (match_op(next
, '=')) {
108 struct expression
*expr
;
109 next
= constant_expression(next
->next
, &expr
);
110 nextval
= get_expression_value(expr
);
112 sym
->value
= nextval
;
115 if (!match_op(token
, ','))
118 nextval
= nextval
+ 1;
123 struct token
*enum_specifier(struct token
*token
, struct ctype
*ctype
)
125 return struct_union_enum_specifier(NS_ENUM
, SYM_ENUM
, token
, ctype
, parse_enum_declaration
);
128 struct token
*typeof_specifier(struct token
*token
, struct ctype
*ctype
)
131 struct expression
*expr
;
133 if (!match_op(token
, '(')) {
134 warn(token
, "expected '(' after typeof");
137 if (lookup_type(token
->next
)) {
138 token
= typename(token
->next
, &sym
);
141 token
= parse_expression(token
->next
, &expr
);
142 /* Leave ctype at NULL, we'll evaluate it lazily later.. */
143 ctype
->modifiers
= 0;
144 ctype
->base_type
= NULL
;
146 return expect(token
, ')', "after typeof");
149 struct token
*attribute_specifier(struct token
*token
, struct ctype
*ctype
)
153 token
= expect(token
, '(', "after attribute");
154 token
= expect(token
, '(', "after attribute");
157 if (eof_token(token
))
159 if (match_op(token
, ';'))
161 if (match_op(token
, ')')) {
166 if (match_op(token
, '('))
171 token
= expect(token
, ')', "after attribute");
172 token
= expect(token
, ')', "after attribute");
176 #define MOD_SPECIALBITS (MOD_STRUCTOF | MOD_UNIONOF | MOD_ENUMOF | MOD_ATTRIBUTE | MOD_TYPEOF)
178 static struct token
*type_qualifiers(struct token
*next
, struct ctype
*ctype
)
181 while ( (token
= next
) != NULL
) {
182 struct symbol
*s
, *base_type
;
186 if (token
->type
!= TOKEN_IDENT
)
188 s
= lookup_symbol(token
->ident
, NS_TYPEDEF
);
191 mod
= s
->ctype
.modifiers
;
192 base_type
= s
->ctype
.base_type
;
195 if (mod
& ~(MOD_CONST
| MOD_VOLATILE
))
197 ctype
->modifiers
|= mod
;
202 #define MOD_SPECIFIER (MOD_CHAR | MOD_SHORT | MOD_LONG | MOD_LONGLONG | MOD_SIGNED | MOD_UNSIGNED)
204 struct symbol
* ctype_integer(unsigned int spec
)
206 static struct symbol
*const integer_ctypes
[][2] = {
207 { &llong_ctype
, &ullong_ctype
},
208 { &long_ctype
, &ulong_ctype
},
209 { &short_ctype
, &ushort_ctype
},
210 { &char_ctype
, &uchar_ctype
},
211 { &int_ctype
, &uint_ctype
},
213 struct symbol
*const (*ctype
)[2];
215 ctype
= integer_ctypes
;
216 if (!(spec
& MOD_LONGLONG
)) {
218 if (!(spec
& MOD_LONG
)) {
220 if (!(spec
& MOD_SHORT
)) {
222 if (!(spec
& MOD_CHAR
))
227 return ctype
[0][(spec
& MOD_UNSIGNED
) != 0];
230 struct symbol
* ctype_fp(unsigned int spec
)
232 if (spec
& MOD_LONGLONG
)
233 return &ldouble_ctype
;
235 return &double_ctype
;
239 static struct token
*declaration_specifiers(struct token
*next
, struct ctype
*ctype
)
243 while ( (token
= next
) != NULL
) {
244 struct ctype thistype
;
246 struct symbol
*s
, *type
;
250 if (token
->type
!= TOKEN_IDENT
)
252 ident
= token
->ident
;
254 s
= lookup_symbol(ident
, NS_TYPEDEF
);
258 mod
= thistype
.modifiers
;
259 if (mod
& MOD_SPECIALBITS
) {
260 if (mod
& MOD_STRUCTOF
)
261 next
= struct_or_union_specifier(SYM_STRUCT
, next
, &thistype
);
262 else if (mod
& MOD_UNIONOF
)
263 next
= struct_or_union_specifier(SYM_UNION
, next
, &thistype
);
264 else if (mod
& MOD_ENUMOF
)
265 next
= enum_specifier(next
, &thistype
);
266 else if (mod
& MOD_ATTRIBUTE
)
267 next
= attribute_specifier(next
, &thistype
);
268 else if (mod
& MOD_TYPEOF
)
269 next
= typeof_specifier(next
, &thistype
);
270 mod
= thistype
.modifiers
;
273 type
= thistype
.base_type
;
275 if (type
!= ctype
->base_type
) {
276 if (ctype
->base_type
) {
277 warn(token
, "Strange mix of types");
280 ctype
->base_type
= type
;
284 unsigned long old
= ctype
->modifiers
;
285 unsigned long extra
= 0, dup
;
287 if (mod
& old
& MOD_LONG
) {
288 extra
= MOD_LONGLONG
| MOD_LONG
;
292 dup
= (mod
& old
) | (extra
& old
) | (extra
& mod
);
294 warn(token
, "Just how %s do you want this type to be?",
295 modifier_string(dup
));
296 ctype
->modifiers
= old
| mod
| extra
;
300 /* Turn the "virtual types" into real types with real sizes etc */
301 if (!ctype
->base_type
&& (ctype
->modifiers
& MOD_SPECIFIER
))
302 ctype
->base_type
= &int_type
;
304 if (ctype
->base_type
== &int_type
) {
305 ctype
->base_type
= ctype_integer(ctype
->modifiers
& MOD_SPECIFIER
);
306 ctype
->modifiers
&= ~MOD_SPECIFIER
;
309 if (ctype
->base_type
== &fp_type
) {
310 ctype
->base_type
= ctype_fp(ctype
->modifiers
& MOD_SPECIFIER
);
311 ctype
->modifiers
&= ~MOD_SPECIFIER
;
317 static struct token
*abstract_array_declarator(struct token
*token
, struct symbol
*sym
)
319 struct expression
*expr
= NULL
;
321 token
= parse_expression(token
, &expr
);
323 sym
->array_size
= get_expression_value(expr
);
325 sym
->array_size
= -1;
329 static struct token
*parameter_type_list(struct token
*, struct symbol
*);
330 static struct token
*declarator(struct token
*token
, struct symbol
**tree
, struct token
**p
);
332 static struct token
*direct_declarator(struct token
*token
, struct symbol
**tree
, struct token
**p
)
334 if (p
&& token
->type
== TOKEN_IDENT
) {
340 if (match_ident(token
, &__attribute___ident
) || match_ident(token
, &__attribute_ident
)) {
341 struct ctype ctype
= { 0, };
342 token
= attribute_specifier(token
->next
, &ctype
);
345 if (token
->type
!= TOKEN_SPECIAL
)
349 * This can be either a parameter list or a grouping.
350 * For the direct (non-abstract) case, we know if must be
351 * a paramter list if we already saw the identifier.
352 * For the abstract case, we know if must be a parameter
353 * list if it is empty or starts with a type.
355 if (token
->special
== '(') {
357 struct token
*next
= token
->next
;
358 int fn
= (p
&& *p
) || match_op(next
, ')') || lookup_type(next
);
361 token
= declarator(next
, tree
, p
);
362 token
= expect(token
, ')', "in nested declarator");
366 sym
= indirect(token
, *tree
, SYM_FN
);
367 token
= parameter_type_list(next
, sym
);
368 token
= expect(token
, ')', "in function declarator");
371 if (token
->special
== '[') {
372 struct symbol
*ctype
= indirect(token
, *tree
, SYM_ARRAY
);
373 token
= abstract_array_declarator(token
->next
, ctype
);
374 token
= expect(token
, ']', "in abstract_array_declarator");
377 if (token
->special
== ':') {
378 struct symbol
*bitfield
;
379 struct expression
*expr
;
380 bitfield
= indirect(token
, *tree
, SYM_BITFIELD
);
381 token
= conditional_expression(token
->next
, &expr
);
382 bitfield
->fieldwidth
= get_expression_value(expr
);
394 static struct token
*pointer(struct token
*token
, struct ctype
*ctype
)
396 unsigned long modifiers
;
397 struct symbol
*base_type
;
399 modifiers
= ctype
->modifiers
& ~(MOD_TYPEDEF
| MOD_ATTRIBUTE
);
400 base_type
= ctype
->base_type
;
402 while (match_op(token
,'*')) {
403 struct symbol
*ptr
= alloc_symbol(NULL
, SYM_PTR
);
404 ptr
->ctype
.modifiers
= modifiers
& ~MOD_STORAGE
;
405 ptr
->ctype
.base_type
= base_type
;
408 modifiers
&= MOD_STORAGE
;
409 ctype
->base_type
= base_type
;
411 token
= type_qualifiers(token
->next
, ctype
);
413 ctype
->modifiers
= modifiers
;
417 static struct token
*declarator(struct token
*token
, struct symbol
**tree
, struct token
**p
)
419 token
= pointer(token
, &(*tree
)->ctype
);
420 return direct_declarator(token
, tree
, p
);
423 static struct token
*struct_declaration_list(struct token
*token
, struct symbol_list
**list
)
425 while (!match_op(token
, '}')) {
426 struct ctype ctype
= {0, };
428 token
= declaration_specifiers(token
, &ctype
);
430 struct token
*ident
= NULL
;
431 struct symbol
*decl
= alloc_symbol(token
, SYM_NODE
);
433 token
= pointer(token
, &decl
->ctype
);
434 token
= direct_declarator(token
, &decl
, &ident
);
435 if (match_op(token
, ':')) {
436 struct expression
*expr
;
437 token
= parse_expression(token
->next
, &expr
);
439 add_symbol(list
, decl
);
440 if (!match_op(token
, ','))
444 if (!match_op(token
, ';'))
451 static struct token
*parameter_declaration(struct token
*token
, struct symbol
**tree
)
453 struct token
*ident
= NULL
;
455 struct ctype ctype
= { 0, };
457 token
= declaration_specifiers(token
, &ctype
);
458 sym
= alloc_symbol(token
, SYM_NODE
);
461 token
= pointer(token
, &sym
->ctype
);
462 token
= direct_declarator(token
, tree
, &ident
);
466 struct token
*typename(struct token
*token
, struct symbol
**p
)
468 struct symbol
*sym
= alloc_symbol(token
, SYM_NODE
);
470 token
= declaration_specifiers(token
, &sym
->ctype
);
471 return declarator(token
, &sym
, NULL
);
474 struct token
*expression_statement(struct token
*token
, struct expression
**tree
)
476 token
= parse_expression(token
, tree
);
477 return expect(token
, ';', "at end of statement");
480 static struct token
*parse_asm_operands(struct token
*token
, struct statement
*stmt
)
482 struct expression
*expr
;
484 /* Allow empty operands */
485 if (match_op(token
->next
, ':') || match_op(token
->next
, ')'))
488 token
= primary_expression(token
->next
, &expr
);
489 token
= parens_expression(token
, &expr
, "in asm parameter");
490 } while (match_op(token
, ','));
494 static struct token
*parse_asm_clobbers(struct token
*token
, struct statement
*stmt
)
496 struct expression
*expr
;
499 token
= primary_expression(token
->next
, &expr
);
500 } while (match_op(token
, ','));
504 /* Make a statement out of an expression */
505 static struct statement
*make_statement(struct expression
*expr
)
507 struct statement
*stmt
;
511 stmt
= alloc_statement(expr
->token
, STMT_EXPRESSION
);
512 stmt
->expression
= expr
;
516 struct token
*statement(struct token
*token
, struct statement
**tree
)
518 struct statement
*stmt
= alloc_statement(token
, STMT_NONE
);
521 if (token
->type
== TOKEN_IDENT
) {
522 if (token
->ident
== &if_ident
) {
523 stmt
->type
= STMT_IF
;
524 token
= parens_expression(token
->next
, &stmt
->if_conditional
, "after if");
525 token
= statement(token
, &stmt
->if_true
);
526 if (token
->type
!= TOKEN_IDENT
)
528 if (token
->ident
!= &else_ident
)
530 return statement(token
->next
, &stmt
->if_false
);
532 if (token
->ident
== &return_ident
) {
533 stmt
->type
= STMT_RETURN
;
534 return expression_statement(token
->next
, &stmt
->expression
);
536 if (token
->ident
== &break_ident
) {
537 stmt
->type
= STMT_BREAK
;
538 return expect(token
->next
, ';', "at end of statement");
540 if (token
->ident
== &continue_ident
) {
541 stmt
->type
= STMT_CONTINUE
;
542 return expect(token
->next
, ';', "at end of statement");
544 if (token
->ident
== &default_ident
) {
546 goto default_statement
;
548 if (token
->ident
== &case_ident
) {
549 token
= parse_expression(token
->next
, &stmt
->case_expression
);
550 if (match_op(token
, SPECIAL_ELLIPSIS
))
551 token
= parse_expression(token
->next
, &stmt
->case_to
);
553 stmt
->type
= STMT_CASE
;
554 token
= expect(token
, ':', "after default/case");
555 return statement(token
, &stmt
->case_statement
);
557 if (token
->ident
== &switch_ident
) {
558 stmt
->type
= STMT_SWITCH
;
559 token
= parens_expression(token
->next
, &stmt
->switch_expression
, "after 'switch'");
560 return statement(token
, &stmt
->switch_statement
);
562 if (token
->ident
== &for_ident
) {
563 struct expression
*e1
, *e2
, *e3
;
564 struct statement
*iterator
;
566 token
= expect(token
->next
, '(', "after 'for'");
567 token
= parse_expression(token
, &e1
);
568 token
= expect(token
, ';', "in 'for'");
569 token
= parse_expression(token
, &e2
);
570 token
= expect(token
, ';', "in 'for'");
571 token
= parse_expression(token
, &e3
);
572 token
= expect(token
, ')', "in 'for'");
573 token
= statement(token
, &iterator
);
575 stmt
->type
= STMT_ITERATOR
;
576 stmt
->iterator_pre_statement
= make_statement(e1
);
577 stmt
->iterator_pre_condition
= e2
;
578 stmt
->iterator_post_statement
= make_statement(e3
);
579 stmt
->iterator_post_condition
= e2
;
580 stmt
->iterator_statement
= iterator
;
584 if (token
->ident
== &while_ident
) {
585 struct expression
*expr
;
586 struct statement
*iterator
;
588 token
= parens_expression(token
->next
, &expr
, "after 'while'");
589 token
= statement(token
, &iterator
);
591 stmt
->type
= STMT_ITERATOR
;
592 stmt
->iterator_pre_condition
= expr
;
593 stmt
->iterator_post_condition
= expr
;
594 stmt
->iterator_statement
= iterator
;
598 if (token
->ident
== &do_ident
) {
599 struct expression
*expr
;
600 struct statement
*iterator
;
602 token
= statement(token
->next
, &iterator
);
603 if (token
->type
== TOKEN_IDENT
&& token
->ident
== &while_ident
)
606 warn(token
, "expected 'while' after 'do'");
607 token
= parens_expression(token
, &expr
, "after 'do-while'");
609 stmt
->type
= STMT_ITERATOR
;
610 stmt
->iterator_post_condition
= expr
;
611 stmt
->iterator_statement
= iterator
;
613 return expect(token
, ';', "after statement");
615 if (token
->ident
== &goto_ident
) {
616 stmt
->type
= STMT_GOTO
;
618 if (token
->type
== TOKEN_IDENT
) {
619 stmt
->goto_label
= token
;
622 warn(token
, "invalid label");
623 return expect(token
, ';', "at end of statement");
625 if (token
->ident
== &asm_ident
|| token
->ident
== &__asm___ident
|| token
->ident
== &__asm_ident
) {
626 struct expression
*expr
;
627 stmt
->type
= STMT_ASM
;
629 if (token
->type
== TOKEN_IDENT
) {
630 if (token
->ident
== &__volatile___ident
|| token
->ident
== &volatile_ident
)
633 token
= expect(token
, '(', "after asm");
634 token
= parse_expression(token
->next
, &expr
);
635 if (match_op(token
, ':'))
636 token
= parse_asm_operands(token
, stmt
);
637 if (match_op(token
, ':'))
638 token
= parse_asm_operands(token
, stmt
);
639 if (match_op(token
, ':'))
640 token
= parse_asm_clobbers(token
, stmt
);
641 token
= expect(token
, ')', "after asm");
642 return expect(token
, ';', "at end of asm-statement");
644 if (match_op(token
->next
, ':')) {
645 stmt
->type
= STMT_LABEL
;
646 stmt
->label_identifier
= token
;
647 return statement(token
->next
->next
, &stmt
->label_statement
);
651 if (match_op(token
, '{')) {
652 stmt
->type
= STMT_COMPOUND
;
653 start_symbol_scope();
654 token
= compound_statement(token
->next
, stmt
);
657 return expect(token
, '}', "at end of compound statement");
660 stmt
->type
= STMT_EXPRESSION
;
661 return expression_statement(token
, &stmt
->expression
);
664 struct token
* statement_list(struct token
*token
, struct statement_list
**list
)
667 struct statement
* stmt
;
668 if (eof_token(token
))
670 if (match_op(token
, '}'))
672 token
= statement(token
, &stmt
);
673 add_statement(list
, stmt
);
678 static struct token
*parameter_type_list(struct token
*token
, struct symbol
*fn
)
680 struct symbol_list
**list
= &fn
->arguments
;
682 struct symbol
*sym
= alloc_symbol(token
, SYM_NODE
);
684 if (match_op(token
, SPECIAL_ELLIPSIS
)) {
690 token
= parameter_declaration(token
, &sym
);
691 /* Special case: (void) */
692 if (!*list
&& !sym
->token
&& sym
->ctype
.base_type
== &void_ctype
)
694 add_symbol(list
, sym
);
695 if (!match_op(token
, ','))
703 static struct token
*external_declaration(struct token
*token
, struct symbol_list
**list
);
705 struct token
*compound_statement(struct token
*token
, struct statement
*stmt
)
707 while (!eof_token(token
)) {
708 if (!lookup_type(token
))
710 token
= external_declaration(token
, &stmt
->syms
);
712 token
= statement_list(token
, &stmt
->stmts
);
716 static struct token
*initializer_list(struct token
*token
, struct ctype
*type
)
719 token
= initializer(token
, type
);
720 if (!match_op(token
, ','))
727 struct token
*parse_named_initializer(struct token
*id
, struct token
*token
)
729 struct expression
*expr
;
731 return assignment_expression(token
, &expr
);
734 struct token
*initializer(struct token
*token
, struct ctype
*type
)
736 struct expression
*expr
;
737 struct token
*next
, *name
= NULL
;
740 if (match_op(token
, '.') && (next
->type
== TOKEN_IDENT
) && match_op(next
->next
, '=')) {
742 token
= next
->next
->next
;
743 } else if ((token
->type
== TOKEN_IDENT
) && match_op(next
, ':')) {
748 if (match_op(token
, '{')) {
749 token
= initializer_list(token
->next
, type
);
750 return expect(token
, '}', "at end of initializer");
752 return assignment_expression(token
, &expr
);
755 static void declare_argument(struct symbol
*sym
, void *data
, int flags
)
757 struct symbol
*decl
= data
;
760 warn(decl
->token
, "no identifier for function argument");
763 bind_symbol(sym
, sym
->ident
->ident
, NS_SYMBOL
);
766 static struct token
*external_declaration(struct token
*token
, struct symbol_list
**list
)
768 struct token
*ident
= NULL
;
770 struct ctype ctype
= { 0, };
771 struct symbol
*base_type
;
773 /* Parse declaration-specifiers, if any */
774 token
= declaration_specifiers(token
, &ctype
);
775 decl
= alloc_symbol(token
, SYM_NODE
);
777 token
= pointer(token
, &decl
->ctype
);
778 token
= declarator(token
, &decl
, &ident
);
780 /* Just a type declaration? */
782 return expect(token
, ';', "end of type declaration");
786 /* type define declaration? */
787 if (ctype
.modifiers
& MOD_TYPEDEF
) {
788 bind_symbol(decl
, ident
->ident
, NS_TYPEDEF
);
790 add_symbol(list
, decl
);
791 bind_symbol(decl
, ident
->ident
, NS_SYMBOL
);
794 base_type
= decl
->ctype
.base_type
;
795 if (base_type
&& base_type
->type
== SYM_FN
&& match_op(token
, '{')) {
796 base_type
->stmt
= alloc_statement(token
, STMT_COMPOUND
);
797 start_symbol_scope();
798 symbol_iterate(base_type
->arguments
, declare_argument
, decl
);
799 token
= compound_statement(token
->next
, base_type
->stmt
);
801 return expect(token
, '}', "at end of function");
805 if (match_op(token
, '='))
806 token
= initializer(token
->next
, &decl
->ctype
);
807 if (!match_op(token
, ','))
811 decl
= alloc_symbol(token
, SYM_NODE
);
813 token
= pointer(token
, &decl
->ctype
);
814 token
= declarator(token
->next
, &decl
, &ident
);
816 warn(token
, "expected identifier name in type definition");
820 if (ctype
.modifiers
& MOD_TYPEDEF
) {
821 bind_symbol(decl
, ident
->ident
, NS_TYPEDEF
);
823 add_symbol(list
, decl
);
824 bind_symbol(decl
, ident
->ident
, NS_SYMBOL
);
827 return expect(token
, ';', "at end of declaration");
830 void translation_unit(struct token
*token
, struct symbol_list
**list
)
832 while (!eof_token(token
))
833 token
= external_declaration(token
, list
);