2 * Stupid C parser, version 1e-6.
4 * Let's see how hard this is to do.
6 * Copyright (C) 2003 Linus Torvalds, all rights reserved.
22 #include "expression.h"
24 struct statement
*alloc_statement(struct token
* token
, int type
)
26 struct statement
*stmt
= __alloc_statement(0);
31 static struct token
*struct_declaration_list(struct token
*token
, struct symbol_list
**list
);
33 static struct symbol
* indirect(struct position pos
, struct ctype
*ctype
, int type
)
35 struct symbol
*sym
= alloc_symbol(pos
, type
);
37 sym
->ctype
.base_type
= ctype
->base_type
;
38 sym
->ctype
.modifiers
= ctype
->modifiers
& ~MOD_STORAGE
;
40 ctype
->base_type
= sym
;
41 ctype
->modifiers
&= MOD_STORAGE
;
45 static struct symbol
*lookup_or_create_symbol(enum namespace ns
, enum type type
, struct token
*token
)
47 struct symbol
*sym
= lookup_symbol(token
->ident
, ns
);
49 sym
= alloc_symbol(token
->pos
, type
);
50 sym
->ident
= token
->ident
;
51 bind_symbol(sym
, token
->ident
, ns
);
56 struct token
*struct_union_enum_specifier(enum namespace ns
, enum type type
,
57 struct token
*token
, struct ctype
*ctype
,
58 struct token
*(*parse
)(struct token
*, struct symbol
*))
63 if (token_type(token
) == TOKEN_IDENT
) {
64 sym
= lookup_or_create_symbol(ns
, type
, token
);
66 ctype
->base_type
= sym
;
67 if (match_op(token
, '{')) {
68 token
= parse(token
->next
, sym
);
69 token
= expect(token
, '}', "at end of struct-union-enum-specifier");
74 // private struct/union/enum type
75 if (!match_op(token
, '{')) {
76 warn(token
->pos
, "expected declaration");
77 ctype
->base_type
= &bad_type
;
81 sym
= alloc_symbol(token
->pos
, type
);
82 token
= parse(token
->next
, sym
);
83 ctype
->base_type
= sym
;
84 return expect(token
, '}', "at end of specifier");
87 static struct token
*parse_struct_declaration(struct token
*token
, struct symbol
*sym
)
89 return struct_declaration_list(token
, &sym
->symbol_list
);
92 struct token
*struct_or_union_specifier(enum type type
, struct token
*token
, struct ctype
*ctype
)
94 return struct_union_enum_specifier(NS_STRUCT
, type
, token
, ctype
, parse_struct_declaration
);
97 static struct token
*parse_enum_declaration(struct token
*token
, struct symbol
*parent
)
100 while (token_type(token
) == TOKEN_IDENT
) {
101 struct token
*next
= token
->next
;
104 sym
= alloc_symbol(token
->pos
, SYM_NODE
);
105 bind_symbol(sym
, token
->ident
, NS_SYMBOL
);
106 sym
->ctype
.base_type
= parent
;
108 if (match_op(next
, '=')) {
109 struct expression
*expr
;
110 next
= constant_expression(next
->next
, &expr
);
111 nextval
= get_expression_value(expr
);
113 sym
->value
= nextval
;
116 if (!match_op(token
, ','))
119 nextval
= nextval
+ 1;
124 struct token
*enum_specifier(struct token
*token
, struct ctype
*ctype
)
126 return struct_union_enum_specifier(NS_ENUM
, SYM_ENUM
, token
, ctype
, parse_enum_declaration
);
129 struct token
*typeof_specifier(struct token
*token
, struct ctype
*ctype
)
132 struct expression
*expr
;
134 if (!match_op(token
, '(')) {
135 warn(token
->pos
, "expected '(' after typeof");
138 if (lookup_type(token
->next
)) {
139 token
= typename(token
->next
, &sym
);
142 token
= parse_expression(token
->next
, &expr
);
143 /* Leave ctype at NULL, we'll evaluate it lazily later.. */
144 ctype
->modifiers
= 0;
145 ctype
->base_type
= NULL
;
147 return expect(token
, ')', "after typeof");
150 struct token
*attribute_specifier(struct token
*token
, struct ctype
*ctype
)
154 token
= expect(token
, '(', "after attribute");
155 token
= expect(token
, '(', "after attribute");
158 if (eof_token(token
))
160 if (match_op(token
, ';'))
162 if (match_op(token
, ')')) {
167 if (match_op(token
, '('))
172 token
= expect(token
, ')', "after attribute");
173 token
= expect(token
, ')', "after attribute");
177 #define MOD_SPECIALBITS (MOD_STRUCTOF | MOD_UNIONOF | MOD_ENUMOF | MOD_ATTRIBUTE | MOD_TYPEOF)
179 static struct token
*type_qualifiers(struct token
*next
, struct ctype
*ctype
)
182 while ( (token
= next
) != NULL
) {
183 struct symbol
*s
, *base_type
;
187 if (token_type(token
) != TOKEN_IDENT
)
189 s
= lookup_symbol(token
->ident
, NS_TYPEDEF
);
192 mod
= s
->ctype
.modifiers
;
193 base_type
= s
->ctype
.base_type
;
196 if (mod
& ~(MOD_CONST
| MOD_VOLATILE
))
198 ctype
->modifiers
|= mod
;
203 #define MOD_SPECIFIER (MOD_CHAR | MOD_SHORT | MOD_LONG | MOD_LONGLONG | MOD_SIGNED | MOD_UNSIGNED)
205 struct symbol
* ctype_integer(unsigned int spec
)
207 static struct symbol
*const integer_ctypes
[][2] = {
208 { &llong_ctype
, &ullong_ctype
},
209 { &long_ctype
, &ulong_ctype
},
210 { &short_ctype
, &ushort_ctype
},
211 { &char_ctype
, &uchar_ctype
},
212 { &int_ctype
, &uint_ctype
},
214 struct symbol
*const (*ctype
)[2];
216 ctype
= integer_ctypes
;
217 if (!(spec
& MOD_LONGLONG
)) {
219 if (!(spec
& MOD_LONG
)) {
221 if (!(spec
& MOD_SHORT
)) {
223 if (!(spec
& MOD_CHAR
))
228 return ctype
[0][(spec
& MOD_UNSIGNED
) != 0];
231 struct symbol
* ctype_fp(unsigned int spec
)
233 if (spec
& MOD_LONGLONG
)
234 return &ldouble_ctype
;
236 return &double_ctype
;
240 static struct token
*declaration_specifiers(struct token
*next
, struct ctype
*ctype
)
244 while ( (token
= next
) != NULL
) {
245 struct ctype thistype
;
247 struct symbol
*s
, *type
;
251 if (token_type(token
) != TOKEN_IDENT
)
253 ident
= token
->ident
;
255 s
= lookup_symbol(ident
, NS_TYPEDEF
);
259 mod
= thistype
.modifiers
;
260 if (mod
& MOD_SPECIALBITS
) {
261 if (mod
& MOD_STRUCTOF
)
262 next
= struct_or_union_specifier(SYM_STRUCT
, next
, &thistype
);
263 else if (mod
& MOD_UNIONOF
)
264 next
= struct_or_union_specifier(SYM_UNION
, next
, &thistype
);
265 else if (mod
& MOD_ENUMOF
)
266 next
= enum_specifier(next
, &thistype
);
267 else if (mod
& MOD_ATTRIBUTE
)
268 next
= attribute_specifier(next
, &thistype
);
269 else if (mod
& MOD_TYPEOF
)
270 next
= typeof_specifier(next
, &thistype
);
271 mod
= thistype
.modifiers
;
274 type
= thistype
.base_type
;
276 if (type
!= ctype
->base_type
) {
277 if (ctype
->base_type
) {
278 warn(token
->pos
, "Strange mix of types");
281 ctype
->base_type
= type
;
285 unsigned long old
= ctype
->modifiers
;
286 unsigned long extra
= 0, dup
;
288 if (mod
& old
& MOD_LONG
) {
289 extra
= MOD_LONGLONG
| MOD_LONG
;
293 dup
= (mod
& old
) | (extra
& old
) | (extra
& mod
);
295 warn(token
->pos
, "Just how %s do you want this type to be?",
296 modifier_string(dup
));
297 ctype
->modifiers
= old
| mod
| extra
;
301 /* Turn the "virtual types" into real types with real sizes etc */
302 if (!ctype
->base_type
&& (ctype
->modifiers
& MOD_SPECIFIER
))
303 ctype
->base_type
= &int_type
;
305 if (ctype
->base_type
== &int_type
) {
306 ctype
->base_type
= ctype_integer(ctype
->modifiers
& MOD_SPECIFIER
);
307 ctype
->modifiers
&= ~MOD_SPECIFIER
;
310 if (ctype
->base_type
== &fp_type
) {
311 ctype
->base_type
= ctype_fp(ctype
->modifiers
& MOD_SPECIFIER
);
312 ctype
->modifiers
&= ~MOD_SPECIFIER
;
318 static struct token
*abstract_array_declarator(struct token
*token
, struct symbol
*sym
)
320 struct expression
*expr
= NULL
;
322 token
= parse_expression(token
, &expr
);
324 sym
->array_size
= get_expression_value(expr
);
326 sym
->array_size
= -1;
330 static struct token
*parameter_type_list(struct token
*, struct symbol
*);
331 static struct token
*declarator(struct token
*token
, struct symbol
**tree
, struct ident
**p
);
333 static struct token
*direct_declarator(struct token
*token
, struct symbol
**tree
, struct ident
**p
)
335 struct ctype
*ctype
= &(*tree
)->ctype
;
337 if (p
&& token_type(token
) == TOKEN_IDENT
) {
343 if (match_ident(token
, &__attribute___ident
) || match_ident(token
, &__attribute_ident
)) {
344 struct ctype ctype
= { 0, };
345 token
= attribute_specifier(token
->next
, &ctype
);
348 if (token_type(token
) != TOKEN_SPECIAL
)
352 * This can be either a parameter list or a grouping.
353 * For the direct (non-abstract) case, we know if must be
354 * a paramter list if we already saw the identifier.
355 * For the abstract case, we know if must be a parameter
356 * list if it is empty or starts with a type.
358 if (token
->special
== '(') {
360 struct token
*next
= token
->next
;
361 int fn
= (p
&& *p
) || match_op(next
, ')') || lookup_type(next
);
364 struct symbol
*base_type
= ctype
->base_type
;
365 token
= declarator(next
, tree
, p
);
366 token
= expect(token
, ')', "in nested declarator");
367 while (ctype
->base_type
!= base_type
)
368 ctype
= &ctype
->base_type
->ctype
;
373 sym
= indirect(token
->pos
, ctype
, SYM_FN
);
374 token
= parameter_type_list(next
, sym
);
375 token
= expect(token
, ')', "in function declarator");
378 if (token
->special
== '[') {
379 struct symbol
*array
= indirect(token
->pos
, ctype
, SYM_ARRAY
);
380 token
= abstract_array_declarator(token
->next
, array
);
381 token
= expect(token
, ']', "in abstract_array_declarator");
384 if (token
->special
== ':') {
385 struct symbol
*bitfield
;
386 struct expression
*expr
;
387 bitfield
= indirect(token
->pos
, ctype
, SYM_BITFIELD
);
388 token
= conditional_expression(token
->next
, &expr
);
389 bitfield
->fieldwidth
= get_expression_value(expr
);
400 static struct token
*pointer(struct token
*token
, struct ctype
*ctype
)
402 unsigned long modifiers
;
403 struct symbol
*base_type
;
405 modifiers
= ctype
->modifiers
& ~(MOD_TYPEDEF
| MOD_ATTRIBUTE
);
406 base_type
= ctype
->base_type
;
408 while (match_op(token
,'*')) {
409 struct symbol
*ptr
= alloc_symbol(token
->pos
, SYM_PTR
);
410 ptr
->ctype
.modifiers
= modifiers
& ~MOD_STORAGE
;
411 ptr
->ctype
.base_type
= base_type
;
414 modifiers
&= MOD_STORAGE
;
415 ctype
->base_type
= base_type
;
417 token
= type_qualifiers(token
->next
, ctype
);
419 ctype
->modifiers
= modifiers
;
423 static struct token
*declarator(struct token
*token
, struct symbol
**tree
, struct ident
**p
)
425 token
= pointer(token
, &(*tree
)->ctype
);
426 return direct_declarator(token
, tree
, p
);
429 static struct token
*struct_declaration_list(struct token
*token
, struct symbol_list
**list
)
431 while (!match_op(token
, '}')) {
432 struct ctype ctype
= {0, };
434 token
= declaration_specifiers(token
, &ctype
);
436 struct ident
*ident
= NULL
;
437 struct symbol
*decl
= alloc_symbol(token
->pos
, SYM_NODE
);
439 token
= pointer(token
, &decl
->ctype
);
440 token
= direct_declarator(token
, &decl
, &ident
);
441 if (match_op(token
, ':')) {
442 struct expression
*expr
;
443 token
= parse_expression(token
->next
, &expr
);
445 add_symbol(list
, decl
);
446 if (!match_op(token
, ','))
450 if (!match_op(token
, ';'))
457 static struct token
*parameter_declaration(struct token
*token
, struct symbol
**tree
)
459 struct ident
*ident
= NULL
;
461 struct ctype ctype
= { 0, };
463 token
= declaration_specifiers(token
, &ctype
);
464 sym
= alloc_symbol(token
->pos
, SYM_NODE
);
467 token
= pointer(token
, &sym
->ctype
);
468 token
= direct_declarator(token
, tree
, &ident
);
472 struct token
*typename(struct token
*token
, struct symbol
**p
)
474 struct symbol
*sym
= alloc_symbol(token
->pos
, SYM_NODE
);
476 token
= declaration_specifiers(token
, &sym
->ctype
);
477 return declarator(token
, &sym
, NULL
);
480 struct token
*expression_statement(struct token
*token
, struct expression
**tree
)
482 token
= parse_expression(token
, tree
);
483 return expect(token
, ';', "at end of statement");
486 static struct token
*parse_asm_operands(struct token
*token
, struct statement
*stmt
)
488 struct expression
*expr
;
490 /* Allow empty operands */
491 if (match_op(token
->next
, ':') || match_op(token
->next
, ')'))
494 token
= primary_expression(token
->next
, &expr
);
495 token
= parens_expression(token
, &expr
, "in asm parameter");
496 } while (match_op(token
, ','));
500 static struct token
*parse_asm_clobbers(struct token
*token
, struct statement
*stmt
)
502 struct expression
*expr
;
505 token
= primary_expression(token
->next
, &expr
);
506 } while (match_op(token
, ','));
510 /* Make a statement out of an expression */
511 static struct statement
*make_statement(struct expression
*expr
)
513 struct statement
*stmt
;
517 stmt
= alloc_statement(expr
->token
, STMT_EXPRESSION
);
518 stmt
->expression
= expr
;
522 struct token
*statement(struct token
*token
, struct statement
**tree
)
524 struct statement
*stmt
= alloc_statement(token
, STMT_NONE
);
527 if (token_type(token
) == TOKEN_IDENT
) {
528 if (token
->ident
== &if_ident
) {
529 stmt
->type
= STMT_IF
;
530 token
= parens_expression(token
->next
, &stmt
->if_conditional
, "after if");
531 token
= statement(token
, &stmt
->if_true
);
532 if (token_type(token
) != TOKEN_IDENT
)
534 if (token
->ident
!= &else_ident
)
536 return statement(token
->next
, &stmt
->if_false
);
538 if (token
->ident
== &return_ident
) {
539 stmt
->type
= STMT_RETURN
;
540 return expression_statement(token
->next
, &stmt
->expression
);
542 if (token
->ident
== &break_ident
) {
543 stmt
->type
= STMT_BREAK
;
544 return expect(token
->next
, ';', "at end of statement");
546 if (token
->ident
== &continue_ident
) {
547 stmt
->type
= STMT_CONTINUE
;
548 return expect(token
->next
, ';', "at end of statement");
550 if (token
->ident
== &default_ident
) {
552 goto default_statement
;
554 if (token
->ident
== &case_ident
) {
555 token
= parse_expression(token
->next
, &stmt
->case_expression
);
556 if (match_op(token
, SPECIAL_ELLIPSIS
))
557 token
= parse_expression(token
->next
, &stmt
->case_to
);
559 stmt
->type
= STMT_CASE
;
560 token
= expect(token
, ':', "after default/case");
561 return statement(token
, &stmt
->case_statement
);
563 if (token
->ident
== &switch_ident
) {
564 stmt
->type
= STMT_SWITCH
;
565 token
= parens_expression(token
->next
, &stmt
->switch_expression
, "after 'switch'");
566 return statement(token
, &stmt
->switch_statement
);
568 if (token
->ident
== &for_ident
) {
569 struct expression
*e1
, *e2
, *e3
;
570 struct statement
*iterator
;
572 token
= expect(token
->next
, '(', "after 'for'");
573 token
= parse_expression(token
, &e1
);
574 token
= expect(token
, ';', "in 'for'");
575 token
= parse_expression(token
, &e2
);
576 token
= expect(token
, ';', "in 'for'");
577 token
= parse_expression(token
, &e3
);
578 token
= expect(token
, ')', "in 'for'");
579 token
= statement(token
, &iterator
);
581 stmt
->type
= STMT_ITERATOR
;
582 stmt
->iterator_pre_statement
= make_statement(e1
);
583 stmt
->iterator_pre_condition
= e2
;
584 stmt
->iterator_post_statement
= make_statement(e3
);
585 stmt
->iterator_post_condition
= e2
;
586 stmt
->iterator_statement
= iterator
;
590 if (token
->ident
== &while_ident
) {
591 struct expression
*expr
;
592 struct statement
*iterator
;
594 token
= parens_expression(token
->next
, &expr
, "after 'while'");
595 token
= statement(token
, &iterator
);
597 stmt
->type
= STMT_ITERATOR
;
598 stmt
->iterator_pre_condition
= expr
;
599 stmt
->iterator_post_condition
= expr
;
600 stmt
->iterator_statement
= iterator
;
604 if (token
->ident
== &do_ident
) {
605 struct expression
*expr
;
606 struct statement
*iterator
;
608 token
= statement(token
->next
, &iterator
);
609 if (token_type(token
) == TOKEN_IDENT
&& token
->ident
== &while_ident
)
612 warn(token
->pos
, "expected 'while' after 'do'");
613 token
= parens_expression(token
, &expr
, "after 'do-while'");
615 stmt
->type
= STMT_ITERATOR
;
616 stmt
->iterator_post_condition
= expr
;
617 stmt
->iterator_statement
= iterator
;
619 return expect(token
, ';', "after statement");
621 if (token
->ident
== &goto_ident
) {
622 stmt
->type
= STMT_GOTO
;
624 if (token_type(token
) == TOKEN_IDENT
) {
625 stmt
->goto_label
= token
;
628 warn(token
->pos
, "invalid label");
629 return expect(token
, ';', "at end of statement");
631 if (token
->ident
== &asm_ident
|| token
->ident
== &__asm___ident
|| token
->ident
== &__asm_ident
) {
632 struct expression
*expr
;
633 stmt
->type
= STMT_ASM
;
635 if (token_type(token
) == TOKEN_IDENT
) {
636 if (token
->ident
== &__volatile___ident
|| token
->ident
== &volatile_ident
)
639 token
= expect(token
, '(', "after asm");
640 token
= parse_expression(token
->next
, &expr
);
641 if (match_op(token
, ':'))
642 token
= parse_asm_operands(token
, stmt
);
643 if (match_op(token
, ':'))
644 token
= parse_asm_operands(token
, stmt
);
645 if (match_op(token
, ':'))
646 token
= parse_asm_clobbers(token
, stmt
);
647 token
= expect(token
, ')', "after asm");
648 return expect(token
, ';', "at end of asm-statement");
650 if (match_op(token
->next
, ':')) {
651 stmt
->type
= STMT_LABEL
;
652 stmt
->label_identifier
= token
;
653 return statement(token
->next
->next
, &stmt
->label_statement
);
657 if (match_op(token
, '{')) {
658 stmt
->type
= STMT_COMPOUND
;
659 start_symbol_scope();
660 token
= compound_statement(token
->next
, stmt
);
663 return expect(token
, '}', "at end of compound statement");
666 stmt
->type
= STMT_EXPRESSION
;
667 return expression_statement(token
, &stmt
->expression
);
670 struct token
* statement_list(struct token
*token
, struct statement_list
**list
)
673 struct statement
* stmt
;
674 if (eof_token(token
))
676 if (match_op(token
, '}'))
678 token
= statement(token
, &stmt
);
679 add_statement(list
, stmt
);
684 static struct token
*parameter_type_list(struct token
*token
, struct symbol
*fn
)
686 struct symbol_list
**list
= &fn
->arguments
;
688 struct symbol
*sym
= alloc_symbol(token
->pos
, SYM_NODE
);
690 if (match_op(token
, SPECIAL_ELLIPSIS
)) {
696 token
= parameter_declaration(token
, &sym
);
697 /* Special case: (void) */
698 if (!*list
&& !sym
->ident
&& sym
->ctype
.base_type
== &void_ctype
)
700 add_symbol(list
, sym
);
701 if (!match_op(token
, ','))
709 static struct token
*external_declaration(struct token
*token
, struct symbol_list
**list
);
711 struct token
*compound_statement(struct token
*token
, struct statement
*stmt
)
713 while (!eof_token(token
)) {
714 if (!lookup_type(token
))
716 token
= external_declaration(token
, &stmt
->syms
);
718 token
= statement_list(token
, &stmt
->stmts
);
722 static struct token
*initializer_list(struct token
*token
, struct ctype
*type
)
725 token
= initializer(token
, type
);
726 if (!match_op(token
, ','))
733 struct token
*parse_named_initializer(struct token
*id
, struct token
*token
)
735 struct expression
*expr
;
737 return assignment_expression(token
, &expr
);
740 struct token
*initializer(struct token
*token
, struct ctype
*type
)
742 struct expression
*expr
;
743 struct token
*next
, *name
= NULL
;
746 if (match_op(token
, '.') && (token_type(next
) == TOKEN_IDENT
) && match_op(next
->next
, '=')) {
748 token
= next
->next
->next
;
749 } else if ((token_type(token
) == TOKEN_IDENT
) && match_op(next
, ':')) {
754 if (match_op(token
, '{')) {
755 token
= initializer_list(token
->next
, type
);
756 return expect(token
, '}', "at end of initializer");
758 return assignment_expression(token
, &expr
);
761 static void declare_argument(struct symbol
*sym
, void *data
, int flags
)
763 struct symbol
*decl
= data
;
766 warn(decl
->pos
, "no identifier for function argument");
769 bind_symbol(sym
, sym
->ident
, NS_SYMBOL
);
772 static struct token
*external_declaration(struct token
*token
, struct symbol_list
**list
)
774 struct ident
*ident
= NULL
;
776 struct ctype ctype
= { 0, };
777 struct symbol
*base_type
;
779 /* Parse declaration-specifiers, if any */
780 token
= declaration_specifiers(token
, &ctype
);
781 decl
= alloc_symbol(token
->pos
, SYM_NODE
);
783 token
= pointer(token
, &decl
->ctype
);
784 token
= declarator(token
, &decl
, &ident
);
786 /* Just a type declaration? */
788 return expect(token
, ';', "end of type declaration");
792 /* type define declaration? */
793 if (ctype
.modifiers
& MOD_TYPEDEF
) {
794 bind_symbol(decl
, ident
, NS_TYPEDEF
);
796 add_symbol(list
, decl
);
797 bind_symbol(decl
, ident
, NS_SYMBOL
);
800 base_type
= decl
->ctype
.base_type
;
801 if (base_type
&& base_type
->type
== SYM_FN
&& match_op(token
, '{')) {
802 base_type
->stmt
= alloc_statement(token
, STMT_COMPOUND
);
803 start_symbol_scope();
804 symbol_iterate(base_type
->arguments
, declare_argument
, decl
);
805 token
= compound_statement(token
->next
, base_type
->stmt
);
807 return expect(token
, '}', "at end of function");
811 if (match_op(token
, '='))
812 token
= initializer(token
->next
, &decl
->ctype
);
813 if (!match_op(token
, ','))
817 decl
= alloc_symbol(token
->pos
, SYM_NODE
);
819 token
= pointer(token
, &decl
->ctype
);
820 token
= declarator(token
->next
, &decl
, &ident
);
822 warn(token
->pos
, "expected identifier name in type definition");
826 if (ctype
.modifiers
& MOD_TYPEDEF
) {
827 bind_symbol(decl
, ident
, NS_TYPEDEF
);
829 add_symbol(list
, decl
);
830 bind_symbol(decl
, ident
, NS_SYMBOL
);
833 return expect(token
, ';', "at end of declaration");
836 void translation_unit(struct token
*token
, struct symbol_list
**list
)
838 while (!eof_token(token
))
839 token
= external_declaration(token
, list
);