10 // make-it-easier macros
11 #define LEX (&cc->lex) // struct lexer *
12 #define TOK (cc->lex.tok) // tok_t
14 static void cc_accept(struct cc
*cc
, const char *description
, ...);
15 static void cc_sync(struct cc
*cc
, ...);
16 static int get_precedence(tok_t tok
);
17 static bool cc_isglobal(struct cc
*cc
);
18 static struct stree
*cc_get_current_func(struct cc
*cc
);
19 static struct stree
*cc_parse_factor(struct cc
*cc
);
20 static struct stree
*cc_parse_expr(struct cc
*cc
);
21 static struct stree
*cc_parse_expr_nocomma(struct cc
*cc
);
22 static bool cc_parse_struct(struct cc
*cc
, struct btype
*struct_type_info
);
23 static bool cc_parse_type(struct cc
*cc
, struct btype
*type_info
);
24 static struct stree
*cc_parse_pointer_type(struct cc
*cc
, struct btype
*type_info
);
25 static struct stree
*cc_parse_parameters(struct cc
*cc
);
26 static bool type_identical(struct btype
*type1
, struct btype
*type2
);
27 static void cc_compare_functions(struct cc
*cc
, struct stree
*func1
, struct stree
*func2
);
28 static void cc_parse_decl_function(struct cc
*cc
, struct stree
**psym_node
);
29 static void cc_parse_decl_variable(struct cc
*cc
, struct stree
*sym_node
);
30 static void cc_parse_defn_function(struct cc
*cc
, struct stree
*func
);
31 static bool cc_parse_decl(struct cc
*cc
, struct stree
*insert_here
);
33 // throw a 'foo expected but bar found' error
34 void cc_expect(struct cc
*cc
, const char *fmt
, ...)
38 fprintf(stderr
, "%s:%d: error: ", LEX
->tok_sloc
.name
, LEX
->tok_sloc
.line
);
39 vfprintf(stderr
, fmt
, ap
);
41 fprintf(stderr
, " expected but \"%s\" found\n", lex_get_tok_str(LEX
, TOK
, LEX
->tok_str
));
44 // throw a compile error with explicit source code location (va_list form)
45 void cc_error_loc_v(struct cc
*cc
, struct sloc
*sloc
, const char *fmt
, va_list ap
)
47 fprintf(stderr
, "%s:%d: error: ", sloc
->name
, sloc
->line
);
48 vfprintf(stderr
, fmt
, ap
);
52 // throw a compile error with explicit source code location (... form)
53 void cc_error_loc(struct cc
*cc
, struct sloc
*sloc
, const char *fmt
, ...)
57 cc_error_loc_v(cc
, sloc
, fmt
, ap
);
61 // throw a compile error at current token
62 void cc_error(struct cc
*cc
, const char *fmt
, ...)
66 cc_error_loc_v(cc
, &LEX
->tok_sloc
, fmt
, ap
);
70 // If the next token is in the given set of tokens,
71 // then accept it, otherwise throw an error.
72 // 'description' describes what is wanted. This could be
73 // a token or a non-terminal name (eg. "type" or "expression").
74 // Token set is a zero-terminated list of tokens. Eg.:
75 // > cc_accept(cc, "operator", '+', '-', TOK_SHL, 0);
76 // would throw "operator expected, but '*' found" if the next
77 // token wasn't either '+', '-' or TOK_SHL (<<)
78 static void cc_accept(struct cc
*cc
, const char *description
, ...)
82 va_start(ap
, description
);
83 while ((tok
= va_arg(ap
, int))){
90 cc_expect(cc
, "%s", description
);
93 // keep eating tokens until the current token is in the given set.
94 // The set is a zero-terminated list of tokens, eg.:
95 // > cc_sync(cc, ')', ';', 0);
96 static void cc_sync(struct cc
*cc
, ...)
102 while ((tok
= va_arg(ap
, int))){
112 int prec
; // operator precedence level (1=highest)
113 tok_t tok
; // operator token
114 } operator_table
[] = {
115 {1, '*'}, {1, '/'}, {1, '%'}, // multiplicative
116 {2, '+'}, {2, '-'}, // additive
117 {3, TOK_SHL
}, {3, TOK_SHR
}, // shiftive?
118 {4, '<'}, {4, TOK_LE
}, {4, '>'}, {4, TOK_GE
}, // comparison
119 {5, TOK_EQ
}, {5, TOK_NE
}, // equality
120 {6, '&'}, // bitwise and
121 {7, '^'}, // bitwise xor
122 {8, '|'}, // bitwise or
123 {9, TOK_LAND
}, // logical and
124 {10, TOK_LOR
}, // logical or
125 {11, '?'}, // ternary
126 {12, '='}, {12, TOK_EADD
}, {12, TOK_ESUB
}, // augmented assignment
127 {12, TOK_EMUL
}, {12, TOK_EDIV
}, {12, TOK_EMOD
},// .
128 {12, TOK_ESHL
}, {12, TOK_ESHR
}, {12, TOK_EAND
},// .
129 {12, TOK_EXOR
}, {12, TOK_EOR
}, // .
130 {13, ','}, // comma operator
133 static int get_precedence(tok_t tok
)
136 for (i
=0; i
<(sizeof operator_table
/ sizeof *operator_table
); i
++){
137 if (operator_table
[i
].tok
== tok
){
138 return operator_table
[i
].prec
;
144 // return true if parser state is in global scope
145 // (i.e. what is being passed is at global (file) scope)
146 static bool cc_isglobal(struct cc
*cc
)
148 return (cc
->scope
== cc
->stree
);
151 // return the current function node
152 static struct stree
*cc_get_current_func(struct cc
*cc
)
158 // parse a factor, except subexpressions (...),
159 // due to cast/subexpression ambiguity
160 static struct stree
*cc_parse_factor(struct cc
*cc
)
162 struct stree
*fac
= NULL
;
163 if (lex_is_ident(LEX
, TOK
)){
164 fac
= stree_create();
165 fac
->form
= STF_FACTOR
;
168 } else if (TOK
== TOK_NUMBER
){
169 fac
= stree_create();
170 fac
->form
= STF_FACTOR
;
171 fac
->tok
= TOK_NUMBER
;
172 const_parse_number(&fac
->cv
, LEX
->tok_str
);
175 cc_expect(cc
, "expression");
180 static struct stree
*cc_parse_unary(struct cc
*cc
)
182 struct stree
*fac
, *un
, *prefix_first
= NULL
, *prefix_last
= NULL
;
184 // prefix unary operators
185 while (TOK
== TOK_INC
|| TOK
== TOK_DEC
|| TOK
== '+'
186 || TOK
== '-' || TOK
== '!' || TOK
== '~' || TOK
== '*'
187 || TOK
== '&' || TOK
== TOK_sizeof
){
192 // The '+' does nothing. Does anyone ever use it?!
193 fprintf(stderr
, "You're using a unary plus? Weird.\n");
197 stree_append_child(prefix_last
, un
);
205 // subexpression or cast
208 struct stree
*type_node
;
211 if (cc_parse_type(cc
, &btype
)){
213 type_node
= cc_parse_pointer_type(cc
, &btype
);
219 // excessive type information
220 stree_append_child(un
, type_node
);
222 // I could choose between one goto, or reams and reams
223 // of tangled spaghetti-loops.
224 // I preferred the goto.
226 goto link_prefix_unop
;
228 fac
= cc_parse_expr(cc
);
229 cc_accept(cc
, "\")\"", ')', 0);
232 fac
= cc_parse_factor(cc
);
235 // postfix unary operators
236 while (TOK
== TOK_INC
|| TOK
== TOK_DEC
|| TOK
== '[' ||
237 TOK
== TOK_ARROW
|| TOK
== '.' || TOK
== '('){
240 stree_append_child(un
, fac
);
242 un
->tok
= TOK_POSTINC
;
244 } else if (TOK
== TOK_DEC
){
245 un
->tok
= TOK_POSTDEC
;
247 } else if (TOK
== '['){
249 struct stree
*subscript
;
252 subscript
= cc_parse_expr(cc
);
254 stree_append_child(un
, subscript
);
255 cc_accept(cc
, "\"]\"", ']', 0);
257 } else if (TOK
== '.' || TOK
== TOK_ARROW
){
258 // struct/union member access
259 struct stree
*member_name
;
262 if (lex_is_ident(LEX
, TOK
)){
263 member_name
= stree_create();
264 member_name
->form
= STF_ATOM
;
265 member_name
->tok
= TOK
;
266 stree_append_child(un
, member_name
);
269 cc_expect(cc
, "identifier (member name)");
271 } else if (TOK
== '('){
272 struct stree
*argument
;
278 argument
= cc_parse_expr_nocomma(cc
);
280 stree_append_child(un
, argument
);
282 } while (TOK
== ',' ? (lex_next(LEX
), 1) : 0);
284 cc_accept(cc
, "\")\"", ')', 0);
290 stree_append_child(prefix_last
, fac
);
297 static struct stree
*cc_parse_expr_internal(struct cc
*cc
, int min_prec
)
299 struct stree
*lhs
, *rhs
, *op_node
, *node
, *node_parent
, *node_right
;
300 int prec
, node_prec
, op_tok
;
301 lhs
= cc_parse_unary(cc
);
304 prec
= get_precedence(TOK
);
305 while (prec
!= 0 && prec
<= min_prec
){
308 rhs
= cc_parse_unary(cc
);
312 // find where to insert it
315 while (node
&& node
->form
== STF_BINOP
){
316 node_prec
= get_precedence(node
->tok
);
317 if (prec
< node_prec
){
318 // carry on descending
319 node_right
= stree_right(node
);
331 // insert between node and node_parent
332 op_node
= stree_create();
333 op_node
->form
= STF_BINOP
;
334 op_node
->tok
= op_tok
;
336 stree_remove_child(node
);
337 stree_append_child(op_node
, node
);
338 stree_append_child(op_node
, rhs
);
339 stree_append_child(node_parent
, op_node
);
341 stree_append_child(op_node
, node
);
342 stree_append_child(op_node
, rhs
);
348 prec
= get_precedence(TOK
);
353 static struct stree
*cc_parse_expr(struct cc
*cc
)
355 return cc_parse_expr_internal(cc
, 13);
358 // same as cc_parse_expr, but don't parse the comma operator
359 // used in function calls
360 static struct stree
*cc_parse_expr_nocomma(struct cc
*cc
)
362 return cc_parse_expr_internal(cc
, 12);
365 // parse struct or union (they're syntactically identical)
366 static bool cc_parse_struct(struct cc
*cc
, struct btype
*struct_type_info
)
369 struct btype type_info
, original_type_info
;
370 struct stree
*type_node
, *sym_node
;
371 tag
= stree_create();
373 tag
->sloc
= LEX
->tok_sloc
;
375 if (lex_is_ident(LEX
, TOK
)){
376 // prev_tag = stree_find(cc->scope, STF_TAG, TOK); TODO
378 tag
->sloc
= LEX
->tok_sloc
;
384 while (TOK
!= 0 && TOK
!= '}'){
385 if (cc_parse_type(cc
, &original_type_info
)){
387 type_info
= original_type_info
;
388 type_node
= cc_parse_pointer_type(cc
, &type_info
);
389 if (!lex_is_ident(LEX
, TOK
)){
390 cc_expect(cc
, "identifier");
392 stree_destroy(type_node
);
394 return false; // XXX: LEAKS?
396 sym_node
= stree_create();
397 sym_node
->form
= STF_VARIABLE
;
398 sym_node
->btype
= type_info
;
400 sym_node
->sloc
= LEX
->tok_sloc
;
402 stree_append_child(sym_node
, type_node
);
404 stree_append_child(tag
, sym_node
);
406 // TODO: check for duplicate fields
407 } while (TOK
== ',' ? (lex_next(LEX
), true) : false);
410 cc_expect(cc
, "field or \"}\"");
412 cc_accept(cc
, "\";\"", ';', 0);
414 cc_accept(cc
, "\"}\"", '}', 0);
417 stree_append_child(cc
->scope
, tag
);
418 struct_type_info
->base_type
= BT_STRUCT
;
419 struct_type_info
->node
= tag
;
423 static bool cc_parse_type(struct cc
*cc
, struct btype
*type_info
)
425 type_info
->base_type
= 0;
426 type_info
->qualifiers
= 0;
429 type_info
->base_type
= BT_VOID
; // well, it's a type, isn't it?
430 // (HINT: the check for variables of type void is not done here)
433 type_info
->base_type
= BT_CHAR
;
436 type_info
->base_type
= BT_INT
;
439 type_info
->base_type
= BT_FLOAT
;
442 type_info
->base_type
= BT_DOUBLE
;
446 return cc_parse_struct(cc
, type_info
);
454 // Parse a pointer type: next token must be after initial type declaration
456 // ^--current token: '*'
457 // Returns a sub-stree of additional type information.
458 // THIS MUST BE PUT SOMEWHERE IN A TREE.
459 // *type_info will be modified to represent the new pointer type
460 static struct stree
*cc_parse_pointer_type(struct cc
*cc
, struct btype
*type_info
)
462 struct stree
*type_node
= NULL
, *new_type_node
;
464 // create a type node to store the excess type information
465 new_type_node
= stree_create();
466 new_type_node
->form
= STF_TYPE
;
467 new_type_node
->btype
= *type_info
;
469 stree_append_child(type_node
, new_type_node
);
471 type_node
= new_type_node
;
472 // create pointer type
473 type_info
->base_type
= BT_POINTER
;
474 type_info
->qualifiers
= 0; // TODO: parse qualifiers
475 type_info
->node
= new_type_node
;
482 // Parse function parameters
483 // void foo(int a, int b);
484 // ^-- current token: 'int'
485 // returns an STF_PARAMETERS node full of parameters, or
486 // NULL if there aren't any.
487 static struct stree
*cc_parse_parameters(struct cc
*cc
)
489 struct stree
*parameters
= NULL
;
490 if (TOK
== TOK_void
){
491 // just 'void' - no parameters
493 } else if (TOK
!= ')'){
494 parameters
= stree_create(); // create parameter list
495 parameters
->form
= STF_PARAMETERS
;
497 struct btype type_info
;
498 struct stree
*type_node
, *param_node
;
499 if (TOK
== TOK_ELLIPSIS
){
501 param_node
= stree_create();
502 param_node
->form
= STF_VARIABLE
;
503 param_node
->tok
= TOK_ELLIPSIS
;
504 if (stree_child_count(parameters
) == 0){
505 cc_error(cc
, "a named argument is required before \"...\"");
509 cc_error(cc
, "variadic function's \"...\" must be placed at the end of the parameter list");
510 cc_sync(cc
, ')', ';', 0);
513 // parse parameter's type
514 if (!cc_parse_type(cc
, &type_info
)){
515 cc_expect(cc
, "type");
517 type_node
= cc_parse_pointer_type(cc
, &type_info
);
518 // create parameter node
519 param_node
= stree_create();
520 param_node
->form
= STF_VARIABLE
;
521 param_node
->btype
= type_info
;
522 // save extra type info, if there is any
524 stree_append_child(param_node
, type_node
);
526 // does the parameter have a name yet? it may not.
527 if (lex_is_ident(LEX
, TOK
)){
528 // parameter has a name
529 param_node
->tok
= TOK
;
532 // if it doesn't, that's ok too
533 // as long as it's not a function definition;
534 // we don't know yet, though.
536 // Now, put the parameter in the list
537 stree_append_child(parameters
, param_node
);
538 } while (TOK
== ',' ? (lex_next(LEX
), true) : false);
540 cc_accept(cc
, "\")\"", ')', 0);
544 // return true if the two types are identical
545 static bool type_identical(struct btype
*type1
, struct btype
*type2
)
548 if (type1
->base_type
!= type2
->base_type
)
550 // TODO: check qualifiers?
551 if (type1
->base_type
== BT_POINTER
){
552 // compare pointed types
553 type1
= &type1
->node
->btype
;
554 type2
= &type2
->node
->btype
;
557 return true; // all checks passed
560 // compare two functions, func1 and func2, for compatibility
561 static void cc_compare_functions(struct cc
*cc
, struct stree
*func1
, struct stree
*func2
)
563 struct stree
*param_list_1
, *param_list_2
, *param_1
, *param_2
;
565 if (!type_identical(&func1
->btype
, &func2
->btype
)){
566 cc_error_loc(cc
, &func2
->sloc
, "conflicting types for \"%s\"",
567 lex_get_tok_str(LEX
, func2
->tok
, NULL
));
568 cc_error_loc(cc
, &func1
->sloc
, "previous declaration of \"%s\" was here",
569 lex_get_tok_str(LEX
, func1
->tok
, NULL
));
572 param_list_1
= stree_get_child_by_form(func1
, STF_PARAMETERS
);
573 param_list_2
= stree_get_child_by_form(func2
, STF_PARAMETERS
);
574 if (stree_child_count(param_list_1
) != stree_child_count(param_list_2
)){
575 cc_error_loc(cc
, &func2
->sloc
, "conflicting number of parameters for \"%s\"",
576 lex_get_tok_str(LEX
, func2
->tok
, NULL
));
577 cc_error_loc(cc
, &func1
->sloc
, "previous declaration of \"%s\" was here",
578 lex_get_tok_str(LEX
, func1
->tok
, NULL
));
583 while (stree_next_child(param_list_1
, ¶m_1
),
584 stree_next_child(param_list_2
, ¶m_2
),
585 (param_1
&& param_2
)){
587 if (param_1
->tok
== TOK_ELLIPSIS
|| param_2
->tok
== TOK_ELLIPSIS
){
589 if (param_1
->tok
== TOK_ELLIPSIS
&& param_2
->tok
== TOK_ELLIPSIS
){
592 cc_error_loc(cc
, &func2
->sloc
, "\"%s\" is %svariadic, unlike previous declaration",
593 lex_get_tok_str(LEX
, func2
->tok
, NULL
),
594 param_1
->tok
== TOK_ELLIPSIS
? "not " : "");
595 cc_error_loc(cc
, &func1
->sloc
, "previous declaration of \"%s\" was here",
596 lex_get_tok_str(LEX
, func1
->tok
, NULL
));
599 // compare parameter types
600 else if (!type_identical(¶m_1
->btype
, ¶m_2
->btype
)){
601 cc_error_loc(cc
, &func2
->sloc
, "conflicting types for parameter %d of \"%s\"",
602 n
, lex_get_tok_str(LEX
, func2
->tok
, NULL
));
603 cc_error_loc(cc
, &func1
->sloc
, "previous declaration of \"%s\" was here",
604 lex_get_tok_str(LEX
, func1
->tok
, NULL
));
606 // merge name information
608 param_1
->tok
= param_2
->tok
;
613 static void cc_parse_decl_function(struct cc
*cc
, struct stree
**psym_node
)
615 struct stree
*parameters
, *prev_decl
;
616 struct stree
*sym_node
= *psym_node
;
617 if (!cc_isglobal(cc
)){
618 struct stree
*current_func
;
619 current_func
= cc_get_current_func(cc
);
620 cc_error(cc
, "nested functions are not permitted: \"%s\" inside \"%s\"",
621 lex_get_tok_str(LEX
, sym_node
->tok
, NULL
),
622 current_func
? lex_get_tok_str(LEX
, current_func
->tok
, NULL
) : "???");
624 // find previous declaration of this symbol
625 prev_decl
= stree_find_local(cc
->scope
, STF_FUNCTION
, sym_node
->tok
);
626 if (!prev_decl
) prev_decl
= stree_find_local(cc
->scope
, STF_VARIABLE
, sym_node
->tok
);
628 if (prev_decl
->form
== STF_FUNCTION
){
629 // must check that declarations match later
631 cc_error_loc(cc
, &sym_node
->sloc
,
632 "\"%s\" declared as different kind of symbol",
633 lex_get_tok_str(LEX
, sym_node
->tok
, NULL
));
634 cc_error_loc(cc
, &prev_decl
->sloc
,
635 "previous declaration of \"%s\" was here",
636 lex_get_tok_str(LEX
, sym_node
->tok
, NULL
));
639 sym_node
->form
= STF_FUNCTION
;
641 lex_next(LEX
); // eat '('
642 parameters
= cc_parse_parameters(cc
);
644 stree_append_child(sym_node
, parameters
);
646 if (prev_decl
&& prev_decl
->form
== STF_FUNCTION
){
647 // make sure declarations match
648 cc_compare_functions(cc
, prev_decl
, sym_node
);
649 // either way, we don't want two copies, so delete the later one
650 stree_destroy(sym_node
);
655 static void cc_parse_decl_variable(struct cc
*cc
, struct stree
*sym_node
)
657 struct stree
*prev_defn
;
658 if (sym_node
->btype
.base_type
== BT_VOID
){
659 cc_error_loc(cc
, &sym_node
->sloc
, "cannot declare variable \"%s\" of type void: impossible without vacuum pump",
660 lex_get_tok_str(LEX
, sym_node
->tok
, NULL
));
662 prev_defn
= stree_find_local(cc
->scope
, STF_VARIABLE
, sym_node
->tok
);
664 cc_error_loc(cc
, &sym_node
->sloc
, "redefinition of \"%s\"",
665 lex_get_tok_str(LEX
, sym_node
->tok
, NULL
));
666 cc_error_loc(cc
, &prev_defn
->sloc
,
667 "previous definition of \"%s\" was here",
668 lex_get_tok_str(LEX
, sym_node
->tok
, NULL
));
670 sym_node
->form
= STF_VARIABLE
;
673 static void cc_parse_defn_function(struct cc
*cc
, struct stree
*func
)
675 struct stree
*old_scope
, *block
, *expr
;
676 // TODO: check presence of parameter names
678 block
= stree_create();
679 block
->sloc
= LEX
->tok_sloc
;
680 block
->form
= STF_BLOCK
;
681 stree_append_child(func
, block
);
682 old_scope
= cc
->scope
;
684 while (TOK
!= 0 && TOK
!= '}'){
685 if (cc_parse_decl(cc
, block
)){
687 expr
= cc_parse_expr(cc
);
689 stree_append_child(block
, expr
);
690 cc_accept(cc
, "\";\"", ';', 0);
694 cc
->scope
= old_scope
;
697 // Parse a declaration/definition.
698 // Returns true if it found one, false if there's no declaration (no tokens will have been eaten).
699 // Will return true on syntax error, since tokens were eaten.
700 static bool cc_parse_decl(struct cc
*cc
, struct stree
*insert_here
)
702 struct btype type_info
, original_type_info
;
703 struct stree
*type_node
, *sym_node
= NULL
;
704 bool was_func_defn
= false;
705 if (cc_parse_type(cc
, &original_type_info
)){
707 // restore the type before the '*'s (pointer types)
708 type_info
= original_type_info
;
709 type_node
= cc_parse_pointer_type(cc
, &type_info
);
710 if (!lex_is_ident(LEX
, TOK
)){
711 cc_expect(cc
, "identifier");
713 stree_destroy(type_node
);
718 // variable declaration or definition
719 // or function declaration or definition
720 sym_node
= stree_create();
721 sym_node
->btype
= type_info
;
723 sym_node
->sloc
= LEX
->tok_sloc
;
725 // put the type info somewhere
727 stree_append_child(sym_node
, type_node
);
731 cc_parse_decl_function(cc
, &sym_node
);
733 // function definition
735 cc_parse_defn_function(cc
, sym_node
);
736 cc_accept(cc
, "\"}\"", '}', 0);
737 was_func_defn
= true;
741 cc_parse_decl_variable(cc
, sym_node
);
744 stree_append_child(insert_here
, sym_node
);
746 } while (TOK
== ',' ? (lex_next(LEX
), true) : false);
748 cc_accept(cc
, "\";\"", ';', 0);
757 void cc_parse(struct cc
*cc
)
759 cc
->stree
= cc
->scope
= stree_create();
761 if (!cc_parse_decl(cc
, cc
->scope
)){