allow extrn symbols to be defined by the unit itself (part II)
[xorcyst.git] / astproc.c
blob02adc74b0ef5a59ea2dd6605779a47b866a678f2
1 /*
2 * $Id: astproc.c,v 1.21 2007/11/11 22:35:22 khansen Exp $
3 * $Log: astproc.c,v $
4 * Revision 1.21 2007/11/11 22:35:22 khansen
5 * compile on mac
7 * Revision 1.20 2007/08/19 10:17:39 khansen
8 * allow symbols to be used without having been declared
10 * Revision 1.19 2007/08/12 18:58:12 khansen
11 * ability to generate pure 6502 binary (--pure-binary switch)
13 * Revision 1.18 2007/08/12 02:42:46 khansen
14 * prettify, const
16 * Revision 1.17 2007/08/09 22:06:10 khansen
17 * ability to pass in reference to local label as argument to macro
19 * Revision 1.16 2007/08/09 20:48:46 khansen
20 * disable buggy code that can cause crash
22 * Revision 1.15 2007/08/09 20:33:40 khansen
23 * progress
25 * Revision 1.14 2007/08/08 22:40:01 khansen
26 * improved symbol lookup, definitions must precede usage
28 * Revision 1.13 2007/07/22 13:33:26 khansen
29 * convert tabs to whitespaces
31 * Revision 1.12 2005/01/09 11:17:57 kenth
32 * xorcyst 1.4.5
33 * fixed bug in process_data(), merge_data()
34 * no longer truncation warning when fits in signed byte/word
36 * Revision 1.11 2005/01/05 02:28:13 kenth
37 * xorcyst 1.4.3
38 * support for anonymous unions
39 * fixed sizeof bug
41 * Revision 1.10 2004/12/29 21:44:41 kenth
42 * xorcyst 1.4.2
43 * static indexing, sizeof improved
45 * Revision 1.9 2004/12/25 02:22:35 kenth
46 * fixed bug in reduce_user_storage()
48 * Revision 1.8 2004/12/19 19:58:29 kenth
49 * xorcyst 1.4.0
51 * Revision 1.7 2004/12/18 16:57:39 kenth
52 * STORAGE_NODE(WORD/DWORD_DATATYPE) converts to BYTE
54 * Revision 1.6 2004/12/16 13:19:47 kenth
55 * xorcyst 1.3.5
57 * Revision 1.5 2004/12/14 01:49:05 kenth
58 * xorcyst 1.3.0
60 * Revision 1.4 2004/12/11 02:01:25 kenth
61 * added forward/backward branching
63 * Revision 1.3 2004/12/09 11:18:13 kenth
64 * added: warning, error node processing
66 * Revision 1.2 2004/12/06 04:52:24 kenth
67 * Major updates (xorcyst 1.1.0)
69 * Revision 1.1 2004/06/30 07:55:31 kenth
70 * Initial revision
74 /**
75 * (C) 2004 Kent Hansen
77 * The XORcyst is free software; you can redistribute it and/or modify
78 * it under the terms of the GNU General Public License as published by
79 * the Free Software Foundation; either version 2 of the License, or
80 * (at your option) any later version.
82 * The XORcyst is distributed in the hope that it will be useful,
83 * but WITHOUT ANY WARRANTY; without even the implied warranty of
84 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
85 * GNU General Public License for more details.
87 * You should have received a copy of the GNU General Public License
88 * along with The XORcyst; if not, write to the Free Software
89 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
92 /**
93 * This file contains functions that process the Abstract Syntax Tree (AST).
94 * After the assembly file has been parsed into an AST, a number of passes are
95 * made on it to process it and transform it. The functions here are
96 * concerned with things like
97 * - macro expansion
98 * - symbol table generation
99 * - equates substitution
100 * - constant folding
101 * - code and symbol validation
104 #include <stdlib.h>
105 #include <stdio.h>
106 #include <stdarg.h>
107 #include <string.h>
108 #include <assert.h>
109 #include "astproc.h"
110 #include "symtab.h"
111 #include "opcode.h"
112 #include "charmap.h"
113 #include "xasm.h"
115 #define IS_SIGNED_BYTE_VALUE(v) (((v) >= -128) && ((v) <= 127))
116 #define IS_UNSIGNED_BYTE_VALUE(v) (((v) >= 0) && ((v) <= 255))
117 #define IS_BYTE_VALUE(v) (IS_SIGNED_BYTE_VALUE(v) || IS_UNSIGNED_BYTE_VALUE(v))
119 #define IS_SIGNED_WORD_VALUE(v) (((v) >= -32768) && ((v) <= 32767))
120 #define IS_UNSIGNED_WORD_VALUE(v) (((v) >= 0) && ((v) <= 65535))
121 #define IS_WORD_VALUE(v) (IS_SIGNED_WORD_VALUE(v) || IS_UNSIGNED_WORD_VALUE(v))
123 /*---------------------------------------------------------------------------*/
125 /** Number of errors issued during processing. */
126 static int err_count = 0;
128 /** Number of warnings issued during processing. */
129 static int warn_count = 0;
131 /* Keeps track of number of global labels encountered. */
132 static int label_count = 0;
134 /* Keeps track of whether statement is in dataseg or codeseg. */
135 static int in_dataseg = 0;
137 /* Default symbol modifiers, i.e. ZEROPAGE_FLAG, PUBLIC_FLAG */
138 static int symbol_modifiers = 0;
140 /* Used when we are outputting pure 6502 binary */
141 static int dataseg_pc;
142 static int codeseg_pc;
144 /*---------------------------------------------------------------------------*/
146 /** Mapping from regular ASCII characters to custom character values.
147 * Used to transform .char arrays to regular .db arrays.
149 static unsigned char charmap[256];
152 * Resets the custom character map.
153 * Every ASCII character is mapped to itself.
155 static void reset_charmap()
157 int i;
158 for (i=0; i<256; i++) {
159 charmap[i] = (char)i;
163 /*---------------------------------------------------------------------------*/
164 /* Forward/backward branching stuff */
166 struct tag_forward_branch_info {
167 astnode *refs[128];
168 int index; /* Index into refs */
169 int counter;
172 typedef struct tag_forward_branch_info forward_branch_info;
174 struct tag_backward_branch_info {
175 astnode *decl;
176 int counter;
179 typedef struct tag_backward_branch_info backward_branch_info;
181 #define BRANCH_MAX 8
183 static forward_branch_info forward_branch[BRANCH_MAX];
185 static backward_branch_info backward_branch[BRANCH_MAX];
188 * Zaps forward/backward branch data.
190 static void branch_init()
192 int i, j;
193 for (i=0; i<BRANCH_MAX; i++) {
194 for (j=0; j<128; j++) {
195 forward_branch[i].refs[j] = NULL;
197 forward_branch[i].index = 0;
198 forward_branch[i].counter = 0;
199 backward_branch[i].decl = NULL;
200 backward_branch[i].counter = 0;
204 /*---------------------------------------------------------------------------*/
207 * Issues an error.
208 * @param loc File location of error
209 * @param fmt printf-style format string
211 static void err(location loc, const char *fmt, ...)
213 va_list ap;
214 va_start(ap, fmt);
216 fprintf(stderr, "%s:", loc.file);
217 LOCATION_PRINT(stderr, loc);
218 fprintf(stderr, ": error: ");
219 vfprintf(stderr, fmt, ap);
220 fprintf(stderr, "\n");
222 va_end(ap);
224 err_count++;
228 * Issues a warning.
229 * @param loc File location of warning
230 * @param fmt printf-style format string
232 static void warn(location loc, const char *fmt, ...)
234 va_list ap;
235 if (!xasm_args.no_warn) {
236 va_start(ap, fmt);
237 fprintf(stderr, "%s:", loc.file);
238 LOCATION_PRINT(stderr, loc);
239 fprintf(stderr, ": warning: ");
240 vfprintf(stderr, fmt, ap);
241 fprintf(stderr, "\n");
242 va_end(ap);
245 warn_count++;
249 * Gets the number of errors encountered during processing.
250 * @return Number of errors
252 int astproc_err_count()
254 return err_count;
257 /*---------------------------------------------------------------------------*/
260 * Gets the processor function for a node type from a map.
261 * Used by astproc_walk().
262 * @param type The node type
263 * @param map A mapping from node types to processor functions
265 static astnodeproc astproc_node_type_to_proc(astnode_type type, const astnodeprocmap *map)
267 for (; map->proc != NULL; map += 1) {
268 if (map->type == type) {
269 return map->proc;
272 return NULL;
275 /*---------------------------------------------------------------------------*/
278 * Walks an abstract syntax tree recursively.
279 * @param n Node to walk
280 * @param arg Optional argument to pass to processor function
281 * @param map Mapping of node types to processor functions
283 static void astproc_walk_recursive(astnode *n, void *arg, const astnodeprocmap *map, astnode **next)
285 astnode *c;
286 astnode *t;
287 if (n == NULL) { return; }
288 astnodeproc p = astproc_node_type_to_proc(astnode_get_type(n), map);
289 if (p != NULL) {
290 if (!p(n, arg, next))
291 return; /* Don't walk children */
293 /* Walk the node's children recursively */
294 for (c=n->first_child; c != NULL; c = t) {
295 t = c->next_sibling; /* default next node */
296 astproc_walk_recursive(c, arg, map, &t);
301 * Generic tree walker function.
302 * @param n Root
303 * @param arg General-purpose argument passed to each node handler function
304 * @param map Array of (nodetype, handler function) tuples
306 void astproc_walk(astnode *n, void *arg, const astnodeprocmap *map)
308 astnode *dummy;
309 astproc_walk_recursive(n, arg, map, &dummy);
312 /*---------------------------------------------------------------------------*/
315 * Don't do any processing of this node or its children on this pass.
317 static int noop(astnode *n, void *arg, astnode **next)
319 return 0;
323 * Substitutes an identifier node with subst_expr if the id is equal to subst_id.
324 * @param id A node of type IDENTIFIER_NODE
325 * @param arg Array of length 2, containing (expr, id) pair
327 static int substitute_id(astnode *id, void *arg, astnode **next)
329 /* arg is array containing expression and identifier */
330 astnode **array = (astnode **)arg;
331 astnode *subst_expr = array[0];
332 astnode *subst_id = array[1];
333 if (astnode_equal(id, subst_id)) {
334 /* They're equal, replace it by expression. */
335 astnode *cl = astnode_clone(subst_expr, id->loc);
336 /* ### Generalize: traverse all children, set the flag */
337 if (astnode_get_type(cl) == LOCAL_ID_NODE) {
338 cl->flags |= 0x80; /* don't globalize it */
340 astnode_replace(id, cl);
341 astnode_finalize(id);
342 *next = cl;
343 return 0;
344 } else {
345 return 1;
350 * Substitutes expr for id in list.
351 * Used by macro expander to substitute a macro body parameter name with the
352 * actual expression used in the macro expansion.
353 * @param expr An expression
354 * @param id An identifier
355 * @param list A list of statements (macro body)
357 static void substitute_expr_for_id(astnode *expr, astnode *id, astnode *list)
359 /* Prepare argument to astproc_walk */
360 astnode *array[2];
361 array[0] = expr;
362 array[1] = id;
363 /* Table of callback functions for our purpose. */
364 static astnodeprocmap map[] = {
365 { IDENTIFIER_NODE, substitute_id },
366 { 0, NULL }
368 /* Do the walk. */
369 astproc_walk(list, array, map);
372 /*---------------------------------------------------------------------------*/
375 * Globalizes a macro expanded local.
376 * This is done simply by concatenating the local label identifier with the
377 * global macro invocation counter.
378 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
379 * @param arg Namespace counter (int)
381 static int globalize_macro_expanded_local(astnode *n, void *arg, astnode **next)
383 /* Only globalize if it's a reference to a label defined in the macro */
384 if (!(n->flags & 0x80)) {
385 char str[16];
386 int count;
387 /* Make it global by appending the macro expansion counter to the id */
388 count = (int)arg;
389 snprintf(str, sizeof (str), "#%d", count);
390 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
391 /* LOCAL_LABEL_NODE, use label field */
392 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
393 strcat(n->label, str);
394 } else {
395 /* LOCAL_ID_NODE, use ident field */
396 assert(astnode_is_type(n, LOCAL_ID_NODE));
397 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
398 strcat(n->ident, str);
401 return 1;
405 * Globalizes all locals in the body of a macro expansion.
406 * Used by the macro expander to ensure that local labels in macro expansions
407 * are unique.
408 * @param exp_body The expanded macro body
409 * @param count Unique macro namespace counter
411 static void globalize_macro_expanded_locals(astnode *exp_body, int count)
413 /* Table of callback functions for our purpose. */
414 static astnodeprocmap map[] = {
415 { LOCAL_ID_NODE, globalize_macro_expanded_local },
416 { LOCAL_LABEL_NODE, globalize_macro_expanded_local },
417 { 0, NULL }
419 /* Do the walk. */
420 astproc_walk(exp_body, (void *)count, map);
424 * Expands a macro; that is, replaces a macro invocation in the AST with the
425 * macro body. Substitutes parameter names for values.
426 * @param macro Must be a node of type MACRO_NODE
427 * @param arg Not used
429 static int expand_macro(astnode *macro, void *arg, astnode **next)
431 astnode *decl;
432 astnode *decl_body;
433 astnode *exp_body;
434 astnode *formals;
435 astnode *actuals;
436 astnode *id;
437 astnode *expr;
438 int i;
439 /* Keeps track of the current/total number of macro expansions */
440 static int macro_expansion_count = 0;
442 id = astnode_get_child(macro, 0);
443 assert(astnode_is_type(id, IDENTIFIER_NODE));
444 symtab_entry *e = symtab_lookup(id->ident);
445 if (e == NULL) {
446 err(macro->loc, "unknown macro or directive `%s'", id->ident);
447 astnode_remove(macro);
448 astnode_finalize(macro);
449 return 0;
451 else if (e->type != MACRO_SYMBOL) {
452 err(macro->loc, "cannot expand `%s'; not a macro", e->id);
453 astnode_remove(macro);
454 astnode_finalize(macro);
455 return 0;
457 else {
458 decl = (astnode *)e->def;
459 formals = astnode_get_child(decl, 1);
460 actuals = astnode_get_child(macro, 1);
461 if (astnode_get_child_count(formals) != astnode_get_child_count(actuals)) {
462 err(macro->loc, "macro `%s' does not take %d argument(s)", id->ident, astnode_get_child_count(actuals) );
463 astnode_remove(macro);
464 astnode_finalize(macro);
465 return 0;
467 /* Expand the body */
468 decl_body = astnode_get_child(decl, 2);
469 exp_body = astnode_clone(decl_body, macro->loc);
470 assert(astnode_get_type(exp_body) == LIST_NODE);
471 /* Substitute actuals for formals */
472 for (i=0; i<astnode_get_child_count(actuals); i++) {
473 /* The id to substitute */
474 id = astnode_get_child(formals, i);
475 /* The expression to substitute it with */
476 expr = astnode_get_child(actuals, i);
477 /* Do it! */
478 substitute_expr_for_id(expr, id, exp_body);
480 /* Make locals a bit more global */
481 globalize_macro_expanded_locals(exp_body, macro_expansion_count);
482 /* Replace MACRO_NODE by the macro body instance */
484 astnode *stmts = astnode_remove_children(exp_body);
485 astnode_replace(macro, stmts);
486 *next = stmts;
487 astnode_finalize(exp_body);
490 astnode_finalize(macro);
491 macro_expansion_count++;
493 return 0;
496 /*---------------------------------------------------------------------------*/
499 * Does constant folding of expression.
500 * If the expression can be folded, the original expression is replaced by the
501 * new one, and the original expression is finalized.
502 * @param expr Expression
503 * @return Original expression, if couldn't fold, otherwise new, folded expression
505 astnode *astproc_fold_constants(astnode *expr)
507 astnode *folded;
508 astnode *lhs;
509 astnode *rhs;
510 if (expr == NULL) { return NULL; }
511 folded = NULL;
512 if (astnode_is_type(expr, ARITHMETIC_NODE)) {
513 /* Fold operands recursively */
514 lhs = astproc_fold_constants(LHS(expr));
515 rhs = astproc_fold_constants(RHS(expr));
516 switch (expr->oper) {
517 /* Binary ops */
518 case PLUS_OPERATOR:
519 case MINUS_OPERATOR:
520 case MUL_OPERATOR:
521 case DIV_OPERATOR:
522 case MOD_OPERATOR:
523 case AND_OPERATOR:
524 case OR_OPERATOR:
525 case XOR_OPERATOR:
526 case SHL_OPERATOR:
527 case SHR_OPERATOR:
528 case LT_OPERATOR:
529 case GT_OPERATOR:
530 case EQ_OPERATOR:
531 case NE_OPERATOR:
532 case LE_OPERATOR:
533 case GE_OPERATOR:
534 /* See if it can be folded */
535 if ( (astnode_is_type(lhs, INTEGER_NODE)) &&
536 (astnode_is_type(rhs, INTEGER_NODE)) ) {
537 /* Both sides are integer literals, so fold. */
538 switch (expr->oper) {
539 case PLUS_OPERATOR: folded = astnode_create_integer(lhs->integer + rhs->integer, expr->loc); break;
540 case MINUS_OPERATOR: folded = astnode_create_integer(lhs->integer - rhs->integer, expr->loc); break;
541 case MUL_OPERATOR: folded = astnode_create_integer(lhs->integer * rhs->integer, expr->loc); break;
542 case DIV_OPERATOR: folded = astnode_create_integer(lhs->integer / rhs->integer, expr->loc); break;
543 case MOD_OPERATOR: folded = astnode_create_integer(lhs->integer % rhs->integer, expr->loc); break;
544 case AND_OPERATOR: folded = astnode_create_integer(lhs->integer & rhs->integer, expr->loc); break;
545 case OR_OPERATOR: folded = astnode_create_integer(lhs->integer | rhs->integer, expr->loc); break;
546 case XOR_OPERATOR: folded = astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc); break;
547 case SHL_OPERATOR: folded = astnode_create_integer(lhs->integer << rhs->integer, expr->loc); break;
548 case SHR_OPERATOR: folded = astnode_create_integer(lhs->integer >> rhs->integer, expr->loc); break;
549 case LT_OPERATOR: folded = astnode_create_integer(lhs->integer < rhs->integer, expr->loc); break;
550 case GT_OPERATOR: folded = astnode_create_integer(lhs->integer > rhs->integer, expr->loc); break;
551 case EQ_OPERATOR: folded = astnode_create_integer(lhs->integer == rhs->integer, expr->loc); break;
552 case NE_OPERATOR: folded = astnode_create_integer(lhs->integer != rhs->integer, expr->loc); break;
553 case LE_OPERATOR: folded = astnode_create_integer(lhs->integer <= rhs->integer, expr->loc); break;
554 case GE_OPERATOR: folded = astnode_create_integer(lhs->integer >= rhs->integer, expr->loc); break;
556 default:
557 fprintf(stderr, "internal error: operator not handled in astproc_fold_constants()\n");
558 assert(0);
559 folded = expr;
560 break;
562 if (folded != expr) {
563 /* Replace expression by folded one. */
564 astnode_replace(expr, folded);
565 astnode_finalize(expr);
566 return folded;
569 else if ( (astnode_is_type(lhs, STRING_NODE)) &&
570 (astnode_is_type(rhs, STRING_NODE)) ) {
571 /* Both sides are string literals. */
572 /* Folding is defined only for certain operators. */
573 switch (expr->oper) {
574 case PLUS_OPERATOR:
575 /* String concatenation. */
576 folded = astnode_create(STRING_NODE, expr->loc);
577 folded->string = (char *)malloc(strlen(lhs->string) + strlen(rhs->string) + 1);
578 if (folded->string != NULL) {
579 strcpy(folded->string, lhs->string);
580 strcat(folded->string, rhs->string);
582 break;
584 /* String comparison. */
585 case LT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) < 0, expr->loc); break;
586 case GT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) > 0, expr->loc); break;
587 case EQ_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) == 0, expr->loc); break;
588 case NE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) != 0, expr->loc); break;
589 case LE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) <= 0, expr->loc); break;
590 case GE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) >= 0, expr->loc); break;
592 default:
593 folded = expr;
594 break;
596 if (folded != expr) {
597 /* Replace expression by folded one. */
598 astnode_replace(expr, folded);
599 astnode_finalize(expr);
600 return folded;
603 else if ((astnode_get_type(lhs) == STRING_NODE) &&
604 (astnode_get_type(rhs) == INTEGER_NODE) &&
605 (expr->oper == PLUS_OPERATOR)) {
606 /* Left side is string and right side is integer.
607 Result is a string. */
608 char str[32];
609 snprintf(str, sizeof (str), "%d", rhs->integer);
610 folded = astnode_create(STRING_NODE, expr->loc);
611 folded->string = (char *)malloc(strlen(lhs->string) + strlen(str) + 1);
612 if (folded->string != NULL) {
613 strcpy(folded->string, lhs->string);
614 strcat(folded->string, str);
616 /* Replace expression by folded one. */
617 astnode_replace(expr, folded);
618 astnode_finalize(expr);
619 return folded;
621 else if ((astnode_get_type(rhs) == STRING_NODE) &&
622 (astnode_get_type(lhs) == INTEGER_NODE) &&
623 (expr->oper == PLUS_OPERATOR)) {
624 /* Left side is integer and right side is string.
625 Result is a string. */
626 char str[32];
627 snprintf(str, sizeof (str), "%d", lhs->integer);
628 folded = astnode_create(STRING_NODE, expr->loc);
629 folded->string = (char *)malloc(strlen(str) + strlen(rhs->string) + 1);
630 if (folded->string != NULL) {
631 strcpy(folded->string, str);
632 strcat(folded->string, rhs->string);
634 /* Replace expression by folded one. */
635 astnode_replace(expr, folded);
636 astnode_finalize(expr);
637 return folded;
639 /* Use some mathematical identities... */
640 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
641 && (expr->oper == PLUS_OPERATOR)) {
642 /* 0+expr == expr */
643 astnode_remove_child(expr, rhs);
644 astnode_replace(expr, rhs);
645 astnode_finalize(expr);
646 return rhs;
648 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
649 && (expr->oper == PLUS_OPERATOR)) {
650 /* expr+0 == expr */
651 astnode_remove_child(expr, lhs);
652 astnode_replace(expr, lhs);
653 astnode_finalize(expr);
654 return lhs;
656 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
657 && (expr->oper == MUL_OPERATOR)) {
658 /* 1*expr == expr */
659 astnode_remove_child(expr, rhs);
660 astnode_replace(expr, rhs);
661 astnode_finalize(expr);
662 return rhs;
664 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
665 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
666 /* expr*1 == expr */
667 /* expr/1 == expr */
668 astnode_remove_child(expr, lhs);
669 astnode_replace(expr, lhs);
670 astnode_finalize(expr);
671 return lhs;
673 else {
674 /* No chance of folding this one. */
676 break;
678 /* Unary ops */
679 case NEG_OPERATOR:
680 case NOT_OPERATOR:
681 case LO_OPERATOR:
682 case HI_OPERATOR:
683 case UMINUS_OPERATOR:
684 case BANK_OPERATOR:
685 /* See if it can be folded */
686 if (astnode_is_type(lhs, INTEGER_NODE)) {
687 /* Fold it. */
688 switch (expr->oper) {
689 case NEG_OPERATOR: folded = astnode_create_integer(~lhs->integer, expr->loc); break;
690 case NOT_OPERATOR: folded = astnode_create_integer(!lhs->integer, expr->loc); break;
691 case LO_OPERATOR: folded = astnode_create_integer(lhs->integer & 0xFF, expr->loc); break;
692 case HI_OPERATOR: folded = astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc); break;
693 case UMINUS_OPERATOR: folded = astnode_create_integer(-lhs->integer, expr->loc); break;
694 default: break;
696 /* Replace expression by folded one. */
697 astnode_replace(expr, folded);
698 astnode_finalize(expr);
699 return folded;
701 else {
702 /* Couldn't fold this one. */
704 break;
707 /* Couldn't fold it, return original expression */
708 return expr;
711 /*---------------------------------------------------------------------------*/
714 * Substitutes identifier if it has a constant definition in symbol table.
715 * @param expr Node of type IDENTIFIER_NODE
717 static astnode *substitute_ident(astnode *expr)
719 astnode *c;
720 symtab_entry *e;
721 e = symtab_lookup(expr->ident);
722 if (e != NULL) {
723 if (e->type == CONSTANT_SYMBOL) {
724 /* This is a defined symbol that should be
725 replaced by the expression it stands for */
726 c = astnode_clone((astnode *)e->def, expr->loc);
727 astnode_replace(expr, c);
728 astnode_finalize(expr);
729 expr = c;
732 return expr;
736 * Substitutes sizeof with proper constant.
737 * @param expr Node of type SIZEOF_NODE
739 static astnode *reduce_sizeof(astnode *expr)
741 int ok;
742 astnode *c;
743 astnode *id;
744 astnode *type;
745 astnode *count;
746 symtab_entry *e;
748 count = NULL;
749 if (astnode_is_type(LHS(expr), IDENTIFIER_NODE)) {
750 /* Identifier might be the name of a user-defined type, OR
751 it might be the name of a variable of a user-defined type */
752 type = NULL;
753 /* Look it up */
754 id = LHS(expr);
755 e = symtab_global_lookup(id->ident);
756 if (e != NULL) {
757 switch (e->type) {
758 case STRUC_SYMBOL:
759 case UNION_SYMBOL:
760 case RECORD_SYMBOL:
761 case ENUM_SYMBOL:
762 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
763 break;
765 case VAR_SYMBOL:
766 type = astnode_clone(LHS(e->def), id->loc);
767 if (astnode_is_type(e->def, STORAGE_NODE)) {
768 count = astnode_clone(RHS(e->def), id->loc);
770 else {
771 count = astnode_create_integer(astnode_get_child_count(e->def)-1, id->loc);
773 break;
775 default:
776 /* Can't take sizeof of this symbol type */
777 break;
780 if (type == NULL) {
781 /* Unknown */
782 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
784 /* Replace identifier by datatype node */
785 astnode_replace(id, type);
786 astnode_finalize(id);
788 type = LHS(expr);
789 switch (type->datatype) {
790 case BYTE_DATATYPE:
791 case CHAR_DATATYPE:
792 c = astnode_create_integer(1, expr->loc);
793 astnode_replace(expr, c);
794 astnode_finalize(expr);
795 expr = c;
796 break;
798 case WORD_DATATYPE:
799 c = astnode_create_integer(2, expr->loc);
800 astnode_replace(expr, c);
801 astnode_finalize(expr);
802 expr = c;
803 break;
805 case DWORD_DATATYPE:
806 c = astnode_create_integer(4, expr->loc);
807 astnode_replace(expr, c);
808 astnode_finalize(expr);
809 expr = c;
810 break;
812 case USER_DATATYPE:
813 id = LHS(type);
814 e = symtab_global_lookup(id->ident);
815 ok = 0;
816 if (e != NULL) {
817 switch (e->type) {
818 case STRUC_SYMBOL:
819 case UNION_SYMBOL:
820 /* Datatype is defined, replace sizeof with proper expression */
821 c = astnode_clone((astnode *)(e->struc.size), ((astnode *)(e->struc.size))->loc);
822 astnode_replace(expr, c);
823 astnode_finalize(expr);
824 expr = c;
825 ok = 1;
826 break;
828 case RECORD_SYMBOL:
829 case ENUM_SYMBOL:
830 /* 1 byte */
831 c = astnode_create_integer(1, expr->loc);
832 astnode_replace(expr, c);
833 astnode_finalize(expr);
834 expr = c;
835 ok = 1;
836 break;
838 default:
839 /* Dunno the size of this symbol type */
840 break;
843 if (!ok) {
844 /* Datatype not defined, error */
845 err(expr->loc, "size of `%s' is unknown", id->ident);
846 /* Replace by 1 */
847 c = astnode_create_integer(1, expr->loc);
848 astnode_replace(expr, c);
849 astnode_finalize(expr);
850 return c;
852 break;
854 default:
855 err(expr->loc, "substitute_sizeof(): unknown type");
856 break;
858 if (count != NULL) {
859 c = astnode_create_arithmetic(
860 MUL_OPERATOR,
861 astnode_clone(expr, expr->loc),
862 count,
863 expr->loc
865 astnode_replace(expr, c);
866 astnode_finalize(expr);
867 expr = c;
869 return expr;
873 * Substitutes A::B with an expression.
874 * If A is a struct: substitute with offset of B
875 * If A is a union: substitute with 0
876 * If A is an enumeration: substitute with value for B
877 * @param expr Node of type SCOPE_NODE
879 static astnode *reduce_scope(astnode *expr)
881 symtab_entry *ns;
882 symtab_entry *sym;
883 astnode *c;
884 astnode *namespace;
885 astnode *symbol;
886 /* Look up the namespace */
887 namespace = LHS(expr);
888 ns = symtab_lookup(namespace->ident);
889 if (ns != NULL) {
890 /* Look up the local symbol */
891 symtab_push(ns->symtab);
892 symbol = RHS(expr);
893 sym = symtab_lookup(symbol->ident);
894 if (sym != NULL) {
895 /* See if we can replace it */
896 switch (ns->type) {
897 case STRUC_SYMBOL:
898 case UNION_SYMBOL:
899 case RECORD_SYMBOL:
900 /* Replace with field offset */
901 c = astnode_clone(sym->field.offset, sym->field.offset->loc);
902 astnode_replace(expr, c);
903 astnode_finalize(expr);
904 expr = c;
905 break;
907 case ENUM_SYMBOL:
908 /* Replace with enum entry value */
909 c = astnode_clone(sym->def, sym->def->loc);
910 astnode_replace(expr, c);
911 astnode_finalize(expr);
912 expr = c;
913 break;
915 default:
916 break;
919 symtab_pop();
921 return expr;
924 static astnode *reduce_expression(astnode *expr);
927 * Handles remainder of fields in A.B.C.D . ..., where one or more fields may be indexed.
928 * @param expr Node of type DOT_NODE, INDEX_NODE or IDENTIFIER_NODE
930 static astnode *reduce_dot_recursive(astnode *expr)
932 astnode *term;
933 astnode *offset;
934 astnode *left;
935 astnode *right;
936 astnode *type;
937 symtab_entry *field;
938 symtab_entry *def;
939 astnode *index = NULL;
940 /* Get identifiers involved: 'right' is field in 'left' */
941 left = LHS(expr);
942 if (astnode_is_type(left, INDEX_NODE)) {
943 left = LHS(left); /* Need identifier */
945 right = RHS(expr);
946 if (astnode_is_type(right, DOT_NODE)) {
947 right = LHS(right); /* Need identifier */
949 if (astnode_is_type(right, INDEX_NODE)) {
950 index = RHS(right);
951 right = LHS(right); /* Need identifier */
953 /* Lookup 'right' in 'left's symbol table (on stack) */
954 field = symtab_lookup(right->ident);
955 /* Look up variable's type definition */
956 type = LHS(field->def);
957 /* Copy its offset */
958 offset = astnode_clone(field->field.offset, right->loc);
959 if (index != NULL) {
960 /* Create expression: identifier + sizeof(datatype) * index */
961 offset = astnode_create_arithmetic(
962 PLUS_OPERATOR,
963 offset,
964 astnode_create_arithmetic(
965 MUL_OPERATOR,
966 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
967 astnode_clone(index, index->loc),
968 index->loc
970 expr->loc
973 /* See if more subfields to process */
974 expr = RHS(expr);
975 if (astnode_is_type(expr, DOT_NODE)) {
976 /* Next field */
977 def = symtab_global_lookup(LHS(type)->ident);
978 symtab_push(def->symtab);
979 term = reduce_dot_recursive(expr);
980 symtab_pop();
981 /* Construct sum */
982 offset = astnode_create_arithmetic(
983 PLUS_OPERATOR,
984 offset,
985 term,
986 expr->loc
989 return offset;
993 * Transforms A.B.C.D . ... to A + offset(B) + offset(C) + ...
994 * No error checking, since validate_dotref() should have been called previously.
995 * @param expr Node of type DOT_NODE
997 static astnode *reduce_dot(astnode *expr)
999 symtab_entry *father;
1000 symtab_entry *def;
1001 astnode *type;
1002 astnode *left;
1003 astnode *term1;
1004 astnode *term2;
1005 astnode *sum;
1006 astnode *index = NULL;
1007 /* Look up parent in global symbol table */
1008 left = LHS(expr); /* expr := left . right */
1009 if (astnode_is_type(left, INDEX_NODE)) {
1010 index = RHS(left);
1011 left = LHS(left); /* Need identifier */
1013 father = symtab_lookup(left->ident);
1014 /* Look up variable's type definition */
1015 type = LHS(father->def); /* DATATYPE_NODE */
1016 def = symtab_lookup(LHS(type)->ident);
1017 /* 1st term of sum is the leftmost structure identifier */
1018 term1 = astnode_clone(left, left->loc);
1019 if (index != NULL) {
1020 /* Create expression: identifier + sizeof(datatype) * index */
1021 term1 = astnode_create_arithmetic(
1022 PLUS_OPERATOR,
1023 term1,
1024 astnode_create_arithmetic(
1025 MUL_OPERATOR,
1026 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1027 astnode_clone(index, index->loc),
1028 index->loc
1030 expr->loc
1033 /* Add offsets recursively */
1034 symtab_push(def->symtab);
1035 term2 = reduce_dot_recursive(expr);
1036 symtab_pop();
1037 /* Calculate final sum */
1038 sum = astnode_create_arithmetic(
1039 PLUS_OPERATOR,
1040 term1,
1041 term2,
1042 expr->loc
1044 sum = reduce_expression(sum);
1045 /* Replace dotted expression by sum */
1046 astnode_replace(expr, sum);
1047 astnode_finalize(expr);
1048 return sum;
1052 * Reduces MASK operation to a field mask.
1053 * @param mask A node of type MASK_NODE
1055 static astnode *reduce_mask(astnode *mask)
1057 symtab_entry *ns;
1058 symtab_entry *sym;
1059 astnode *c;
1060 astnode *namespace;
1061 astnode *symbol;
1062 astnode *expr;
1063 /* Child is a scope node, record::field */
1064 expr = LHS(mask);
1065 /* Look up the namespace */
1066 namespace = LHS(expr);
1067 ns = symtab_lookup(namespace->ident);
1068 if (ns != NULL) {
1069 /* Make sure it's a record */
1070 if (ns->type != RECORD_SYMBOL) {
1071 err(expr->loc, "`%s' is not a record");
1072 /* Replace by 0 */
1073 c = astnode_create_integer(0, expr->loc);
1074 astnode_replace(mask, c);
1075 astnode_finalize(mask);
1076 expr = c;
1078 else {
1079 /* Look up the local symbol */
1080 symtab_push(ns->symtab);
1081 symbol = RHS(expr);
1082 sym = symtab_lookup(symbol->ident);
1083 if (sym != NULL) {
1084 /* Calculate field mask */
1085 // mask = ((1 << width) - 1) << offset
1086 c = astnode_create_arithmetic(
1087 SHL_OPERATOR,
1088 astnode_create_arithmetic(
1089 MINUS_OPERATOR,
1090 astnode_create_arithmetic(
1091 SHL_OPERATOR,
1092 astnode_create_integer(1, expr->loc),
1093 astnode_clone(sym->field.size, expr->loc),
1094 expr->loc
1096 astnode_create_integer(1, expr->loc),
1097 expr->loc
1099 astnode_clone(sym->field.offset, expr->loc),
1100 expr->loc
1102 c = reduce_expression(c);
1103 astnode_replace(mask, c);
1104 astnode_finalize(mask);
1105 expr = c;
1107 symtab_pop();
1110 return expr;
1114 * Reduces identifier[expression] to identifier + sizeof(identifier type) * expression
1116 static astnode *reduce_index(astnode *expr)
1118 symtab_entry *e;
1119 astnode *c;
1120 astnode *type;
1121 astnode *id;
1122 astnode *index;
1123 id = LHS(expr);
1124 assert(astnode_is_type(id, IDENTIFIER_NODE));
1125 index = reduce_expression(RHS(expr));
1126 /* Lookup identifier */
1127 e = symtab_lookup(id->ident);
1128 assert(e != 0);
1129 /* Get its datatype */
1130 type = LHS(e->def);
1131 /* Create expression: identifier + sizeof(datatype) * index */
1132 c = astnode_create_arithmetic(
1133 PLUS_OPERATOR,
1134 astnode_clone(id, id->loc),
1135 astnode_create_arithmetic(
1136 MUL_OPERATOR,
1137 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1138 astnode_clone(index, index->loc),
1139 index->loc
1141 expr->loc
1143 /* Replace index expression */
1144 astnode_replace(expr, c);
1145 astnode_finalize(expr);
1146 return c;
1150 * Substitutes all identifiers that represent EQU defines with their
1151 * corresponding expression.
1152 * @param expr The expression whose defines to substitute
1154 static astnode *substitute_defines(astnode *expr)
1156 switch (astnode_get_type(expr)) {
1157 case ARITHMETIC_NODE:
1158 substitute_defines(LHS(expr));
1159 substitute_defines(RHS(expr));
1160 break;
1162 case IDENTIFIER_NODE:
1163 expr = substitute_ident(expr);
1164 break;
1166 case SIZEOF_NODE:
1167 expr = reduce_sizeof(expr);
1168 break;
1170 case MASK_NODE:
1171 expr = reduce_mask(expr);
1172 break;
1174 case INDEX_NODE:
1175 substitute_defines(LHS(expr));
1176 substitute_defines(RHS(expr));
1177 break;
1179 case DOT_NODE:
1180 substitute_defines(LHS(expr));
1181 substitute_defines(RHS(expr));
1182 break;
1184 default:
1185 /* Nada */
1186 break;
1188 return expr;
1194 static astnode *reduce_highlevel_constructs(astnode *expr)
1196 switch (astnode_get_type(expr)) {
1197 case ARITHMETIC_NODE:
1198 reduce_highlevel_constructs(LHS(expr));
1199 reduce_highlevel_constructs(RHS(expr));
1200 break;
1202 case SCOPE_NODE:
1203 expr = reduce_scope(expr);
1204 break;
1206 case DOT_NODE:
1207 expr = reduce_dot(expr);
1208 break;
1210 case INDEX_NODE:
1211 expr = reduce_index(expr);
1212 break;
1214 default:
1215 /* Nada */
1216 break;
1218 return expr;
1222 * Really reduces an expression.
1223 * @param expr Expression to attempt to reduce
1225 static astnode *reduce_expression_complete(astnode *expr)
1227 return astproc_fold_constants( reduce_highlevel_constructs( substitute_defines(expr) ) );
1231 * Reduces an expression.
1232 * It does two things:
1233 * 1. Substitute all equates by their value
1234 * 2. Folds constants in the resulting expression
1235 * If the expression is reduced, the original expression is replaced by the
1236 * new one, the original is finalized, and a pointer to the new expression
1237 * is returned.
1238 * If the expression is not reduced, the original pointer is returned.
1240 static astnode *reduce_expression(astnode *expr)
1242 return astproc_fold_constants( substitute_defines(expr) );
1246 * Reduces RECORD instance to a single byte (DB statement).
1247 * @param r Record's symbol table entry
1248 * @param expr Record initializer
1249 * @param flat List on which to append the reduced form
1251 static void reduce_record(symtab_entry *r, astnode *init, astnode *flat)
1253 ordered_field_list *list;
1254 symtab_entry *e;
1255 astnode *val;
1256 astnode *term;
1257 astnode *result;
1258 astnode *mask;
1259 astnode *repl;
1260 if (!astnode_is_type(init, STRUC_NODE)) {
1261 err(init->loc, "record initializer expected");
1262 return;
1264 /* Go through fields */
1265 symtab_push(r->symtab);
1266 result = astnode_create_integer(0, init->loc);
1267 for (val = init->first_child, list = r->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1268 if (astnode_is_type(val, NULL_NODE)) {
1269 continue;
1271 if (astnode_is_type(val, STRUC_NODE)) {
1272 err(init->loc, "record field initializer expected");
1273 continue;
1275 /* Get field definition */
1276 e = list->entry;
1277 /* Calculate field mask */
1278 // mask = ((1 << width) - 1) << offset
1279 mask = astnode_create_arithmetic(
1280 SHL_OPERATOR,
1281 astnode_create_arithmetic(
1282 MINUS_OPERATOR,
1283 astnode_create_arithmetic(
1284 SHL_OPERATOR,
1285 astnode_create_integer(1, val->loc),
1286 astnode_clone(e->field.size, val->loc),
1287 val->loc
1289 astnode_create_integer(1, val->loc),
1290 val->loc
1292 astnode_clone(e->field.offset, val->loc),
1293 val->loc
1295 /* Shift val left e->field.offset bits, AND with mask */
1296 term = astnode_create_arithmetic(
1297 AND_OPERATOR,
1298 astnode_create_arithmetic(
1299 SHL_OPERATOR,
1300 astnode_clone(val, val->loc),
1301 astnode_clone(e->field.offset, val->loc),
1302 val->loc
1304 mask,
1305 val->loc
1307 /* OR the value with the result so far */
1308 result = astnode_create_arithmetic(
1309 OR_OPERATOR,
1310 result,
1311 term,
1312 val->loc
1314 result = reduce_expression(result);
1316 /* Determine reason for stopping loop */
1317 if (val != NULL) {
1318 err(init->loc, "too many field initializers");
1320 /* Make byte data node (packed record value) */
1321 repl = astnode_create_data(
1322 astnode_create_datatype(BYTE_DATATYPE, NULL, init->loc),
1323 result,
1324 init->loc
1326 astnode_add_child(flat, repl);
1327 symtab_pop();
1331 * Reduces ENUM instance to DB.
1332 * @param e Enumeration's symbol table entry
1333 * @param expr Expression
1334 * @param flat List on which to append the reduced form
1336 static void reduce_enum(symtab_entry *e, astnode *expr, astnode *list)
1338 symtab_entry *sym;
1339 astnode *repl;
1340 if (!astnode_is_type(expr, IDENTIFIER_NODE)) {
1341 err(expr->loc, "identifier expected");
1343 else {
1344 /* Look up the enumeration symbol */
1345 symtab_push(e->symtab);
1346 sym = symtab_lookup(expr->ident);
1347 symtab_pop();
1348 /* Make byte data node (symbol value) */
1349 repl = astnode_create_data(
1350 astnode_create_datatype(BYTE_DATATYPE, NULL, expr->loc),
1351 astnode_clone(sym->def, expr->loc),
1352 expr->loc
1354 astnode_add_child(list, repl);
1358 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat);
1361 * Flattens a union initializer to a sequence of native data values.
1362 * Verify similar to flattening of structure, but only single field allowed.
1363 * @param s Union's symbol table definition
1364 * @param init Union initializer
1365 * @param flat List on which to append the flattened form
1367 static void flatten_union_recursive(symtab_entry *s, astnode *init, astnode *flat)
1369 astnode *fill;
1370 astnode *type;
1371 astnode *count;
1372 symtab_entry *e;
1373 symtab_entry *t;
1374 astnode *val;
1375 astnode *valvals;
1376 astnode *temp;
1377 ordered_field_list *list;
1378 int num;
1379 if (!astnode_is_type(init, STRUC_NODE)) {
1380 err(init->loc, "union initializer expected");
1381 return;
1383 /* Go through fields */
1384 symtab_push(s->symtab);
1385 fill = astnode_clone(s->struc.size, flat->loc);
1386 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1387 if (astnode_is_type(val, NULL_NODE)) {
1388 continue;
1390 if (!astnode_equal(fill, s->struc.size)) {
1391 err(init->loc, "only one field of union can be initialized");
1392 continue;
1394 /* Get field definition */
1395 e = list->entry;
1396 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1397 type = LHS(e->def);
1398 count = RHS(e->def);
1399 /* Decide what to do based on field type and value */
1400 switch (type->datatype) {
1401 case BYTE_DATATYPE:
1402 case CHAR_DATATYPE:
1403 case WORD_DATATYPE:
1404 case DWORD_DATATYPE:
1405 if (astnode_is_type(val, STRUC_NODE)) {
1406 /* Handle multi-value array */
1407 temp = astnode_clone(val, val->loc);
1408 valvals = astnode_remove_children(temp);
1409 astnode_finalize(temp);
1410 astnode_add_child(flat,
1411 astnode_create_data(
1412 astnode_create_datatype(type->datatype, NULL, type->loc),
1413 valvals,
1414 val->loc
1417 num = astnode_get_child_count(val);
1418 } else {
1419 /* Output single value */
1420 astnode_add_child(flat,
1421 astnode_create_data(
1422 astnode_create_datatype(type->datatype, NULL, type->loc),
1423 astnode_clone(val, val->loc),
1424 val->loc
1427 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1429 if (num > count->integer) {
1430 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1432 /* Fill in remainder of field if necessary: count - 1 */
1433 else if (count->integer > num) {
1434 astnode_add_child(flat,
1435 astnode_create_storage(
1436 astnode_create_datatype(type->datatype, NULL, type->loc),
1437 astproc_fold_constants(
1438 astnode_create_arithmetic(
1439 MINUS_OPERATOR,
1440 astnode_clone(count, count->loc),
1441 astnode_create_integer(num, flat->loc),
1442 count->loc
1445 val->loc
1449 break;
1451 case USER_DATATYPE:
1452 t = symtab_global_lookup(LHS(type)->ident);
1453 switch (t->type) {
1454 case STRUC_SYMBOL:
1455 flatten_struc_recursive(t, val, flat);
1456 break;
1458 case UNION_SYMBOL:
1459 flatten_union_recursive(t, val, flat);
1460 break;
1462 case RECORD_SYMBOL:
1463 reduce_record(t, val, flat);
1464 break;
1466 case ENUM_SYMBOL:
1467 reduce_enum(t, val, flat);
1468 break;
1470 default:
1471 break;
1473 break;
1475 /* Decrease fill amount according to field size */
1476 fill = astproc_fold_constants(
1477 astnode_create_arithmetic(
1478 MINUS_OPERATOR,
1479 fill,
1480 astnode_clone(e->field.size, flat->loc),
1481 flat->loc
1485 if (val != NULL) {
1486 err(init->loc, "too many field initializers");
1488 if (fill->integer > 0) {
1489 /* Fill remainder of union with zeroes */
1490 astnode_add_child(flat,
1491 astnode_create_storage(
1492 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1493 fill,
1494 flat->loc
1498 symtab_pop();
1502 * Flattens a structure initializer to a sequence of native data values.
1503 * @param s Structure's symbol table definition
1504 * @param init Structure initializer
1505 * @param flat List on which to append the flattened form
1507 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat)
1509 astnode *fill;
1510 astnode *type;
1511 astnode *count;
1512 astnode *temp;
1513 symtab_entry *e;
1514 symtab_entry *t;
1515 astnode *val;
1516 astnode *valvals;
1517 ordered_field_list *list;
1518 int num;
1519 if (!astnode_is_type(init, STRUC_NODE)) {
1520 err(init->loc, "structure initializer expected");
1521 return;
1523 /* Go through fields */
1524 symtab_push(s->symtab);
1525 fill = astnode_clone(s->struc.size, flat->loc);
1526 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1527 e = list->entry;
1528 /* Check if normal field or anonymous union */
1529 if (e->type == UNION_SYMBOL) {
1530 if (astnode_is_type(val, NULL_NODE)) {
1531 /* Output union size bytes to fill in field */
1532 astnode_add_child(flat,
1533 astnode_create_storage(
1534 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1535 astnode_clone(e->struc.size, val->loc),
1536 val->loc
1539 } else {
1540 flatten_union_recursive(e, val, flat);
1541 /* Decrease fill amount according to union size */
1542 fill = astproc_fold_constants(
1543 astnode_create_arithmetic(
1544 MINUS_OPERATOR,
1545 fill,
1546 astnode_clone(e->struc.size, flat->loc),
1547 flat->loc
1551 } else {
1552 /* VAR_SYMBOL */
1553 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1554 type = LHS(e->def);
1555 count = RHS(e->def);
1556 /* Decide what to do based on field type and value */
1557 switch (type->datatype) {
1558 case BYTE_DATATYPE:
1559 case CHAR_DATATYPE:
1560 case WORD_DATATYPE:
1561 case DWORD_DATATYPE:
1562 if (astnode_is_type(val, NULL_NODE)) {
1563 /* Output field_size bytes to fill in field */
1564 astnode_add_child(flat,
1565 astnode_create_storage(
1566 astnode_create_datatype(type->datatype, NULL, type->loc),
1567 astnode_clone(count, count->loc),
1568 val->loc
1571 } else {
1572 if (astnode_is_type(val, STRUC_NODE)) {
1573 /* Handle multi-value array */
1574 temp = astnode_clone(val, val->loc);
1575 valvals = astnode_remove_children(temp);
1576 astnode_finalize(temp);
1577 astnode_add_child(flat,
1578 astnode_create_data(
1579 astnode_create_datatype(type->datatype, NULL, type->loc),
1580 valvals,
1581 val->loc
1584 num = astnode_get_child_count(val);
1585 } else {
1586 /* Output single value */
1587 astnode_add_child(flat,
1588 astnode_create_data(
1589 astnode_create_datatype(type->datatype, NULL, type->loc),
1590 astnode_clone(val, val->loc),
1591 val->loc
1594 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1596 if (astnode_is_type(count, INTEGER_NODE) && (count->integer < num)) {
1597 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1599 /* Fill in remainder of field if necessary: count - 1 */
1600 else if ( (astnode_is_type(count, INTEGER_NODE) && (count->integer > num))
1601 || !astnode_is_type(count, INTEGER_NODE) ) {
1602 astnode_add_child(flat,
1603 astnode_create_storage(
1604 astnode_create_datatype(type->datatype, NULL, flat->loc),
1605 astproc_fold_constants(
1606 astnode_create_arithmetic(
1607 MINUS_OPERATOR,
1608 astnode_clone(count, flat->loc),
1609 astnode_create_integer(num, flat->loc),
1610 flat->loc
1613 flat->loc
1618 break;
1620 case USER_DATATYPE:
1621 t = symtab_global_lookup(LHS(type)->ident);
1622 if (astnode_is_type(val, NULL_NODE)) {
1623 /* Output sizeof(type) bytes to fill in */
1624 astnode_add_child(flat,
1625 astnode_create_storage(
1626 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1627 astnode_clone(t->struc.size, val->loc),
1628 val->loc
1631 } else {
1632 switch (t->type) {
1633 case STRUC_SYMBOL:
1634 flatten_struc_recursive(t, val, flat);
1635 break;
1637 case UNION_SYMBOL:
1638 flatten_union_recursive(t, val, flat);
1639 break;
1641 case RECORD_SYMBOL:
1642 reduce_record(t, val, flat);
1643 break;
1645 case ENUM_SYMBOL:
1646 reduce_enum(t, val, flat);
1647 break;
1649 default:
1650 break;
1653 break;
1655 /* Decrease fill amount according to field size */
1656 fill = astproc_fold_constants(
1657 astnode_create_arithmetic(
1658 MINUS_OPERATOR,
1659 fill,
1660 astnode_clone(e->field.size, flat->loc),
1661 flat->loc
1666 if (val != NULL) {
1667 err(init->loc, "too many field initializers");
1669 else if (list != NULL) {
1670 /* All fields not initialized; fill remainder of struc with zeroes */
1671 astnode_add_child(flat,
1672 astnode_create_storage(
1673 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1674 fill,
1675 flat->loc
1678 } else {
1679 astnode_finalize(fill);
1681 symtab_pop();
1685 * Converts data that is expressed in a high-level form (such as structure initializers)
1686 * to a simple sequence of bytes.
1687 * @param n The source node to flatten
1688 * @param type The type of data that n is an instance of
1689 * @param list List on which to append the resulting sequence of items (bytes/words/dwords)
1691 static void flatten_user_data(astnode *n, astnode *type, astnode *list)
1693 symtab_entry *def;
1694 def = symtab_global_lookup(LHS(type)->ident);
1695 if (def != NULL) {
1696 switch (def->type) {
1697 case STRUC_SYMBOL:
1698 flatten_struc_recursive(def, n, list);
1699 break;
1701 case UNION_SYMBOL:
1702 flatten_union_recursive(def, n, list);
1703 break;
1705 case RECORD_SYMBOL:
1706 reduce_record(def, n, list);
1707 break;
1709 case ENUM_SYMBOL:
1710 reduce_enum(def, n, list);
1711 break;
1713 default:
1714 break;
1719 /*---------------------------------------------------------------------------*/
1722 * Loads the character map specified by the node.
1723 * @param n Node of type CHARMAP_NODE
1725 static int load_charmap(astnode *n, void *arg, astnode **next)
1727 /* TODO: should probably be done in the parsing phase (same path resolution as for INCSRC and INCBIN) */
1728 astnode *file;
1729 file = astnode_get_child(n, 0);
1730 if (charmap_parse(file->file_path, charmap) == 0) {
1731 err(n->loc, "could not open `%s' for reading", file->file_path);
1733 return 0;
1737 * First-time processing of instruction node.
1738 * @param instr Node of type INSTRUCTION_NODE
1739 * @param arg Not used
1741 static int process_instruction(astnode *instr, void *arg, astnode **next)
1743 astnode *expr;
1744 if (in_dataseg) {
1745 err(instr->loc, "instructions not allowed in data segment");
1746 astnode_remove(instr);
1747 astnode_finalize(instr);
1748 return 0;
1750 else {
1751 expr = astnode_get_child(instr, 0);
1752 reduce_expression(expr);
1753 return 1;
1758 * First-time processing of data node.
1759 * @param data Node of type DATA_NODE
1760 * @param arg Not used
1762 static int process_data(astnode *data, void *arg, astnode **next)
1764 int j;
1765 int k;
1766 astnode *type;
1767 astnode *expr;
1768 astnode *list;
1769 astnode *stmts;
1770 type = astnode_get_child(data, 0);
1771 assert(astnode_is_type(type, DATATYPE_NODE));
1772 if (in_dataseg) {
1773 err(data->loc, "value not allowed in data segment");
1774 /* Replace with storage node */
1775 astnode_replace(
1776 data,
1777 astnode_create_storage(
1778 astnode_create_datatype(BYTE_DATATYPE, NULL, data->loc),
1779 astnode_create_integer(1, data->loc),
1780 data->loc
1783 astnode_finalize(data);
1784 return 0;
1786 if (type->datatype == USER_DATATYPE) {
1787 /* Make sure the type exists */
1788 if (symtab_global_lookup(LHS(type)->ident) == NULL) {
1789 err(data->loc, "unknown type `%s'", LHS(type)->ident);
1790 astnode_remove(data);
1791 astnode_finalize(data);
1792 return 0;
1793 } else {
1794 /* Attempt to reduce user data to native data */
1795 list = astnode_create(LIST_NODE, data->loc);
1796 for (expr = type->next_sibling; expr != NULL; expr = expr->next_sibling) {
1797 flatten_user_data(expr, type, list);
1799 /* Replace initializers with generated list */
1800 stmts = astnode_remove_children(list);
1801 astnode_replace(data, stmts);
1802 astnode_finalize(data);
1803 astnode_finalize(list);
1804 *next = stmts;
1805 return 0;
1808 /* Go through the list of data values, replacing defines and folding constants */
1809 for (j=1; j<astnode_get_child_count(data); j++) {
1810 expr = astnode_get_child(data, j);
1811 /* Substitute defines and fold constants */
1812 expr = reduce_expression(expr);
1813 /* If it's a string, replace by array of integers */
1814 /* (makes it easier to process later... favour regularity) */
1815 if (astnode_is_type(expr, STRING_NODE)) {
1816 astnode_remove_child(data, expr); /* Remove string */
1817 for (k=strlen(expr->string)-1; k>=0; k--) {
1818 /* Check if we should map character from custom charmap */
1819 if (type->datatype == CHAR_DATATYPE) {
1820 expr->string[k] = charmap[(unsigned)expr->string[k]];
1822 /* Append character value to array */
1823 astnode_insert_child(data, astnode_create_integer((unsigned char)expr->string[k], data->loc), j);
1825 if (type->datatype == CHAR_DATATYPE) {
1826 /* It's normal byte array now */
1827 type->datatype = BYTE_DATATYPE;
1829 j += strlen(expr->string)-1;
1830 astnode_finalize(expr);
1833 return 1;
1837 * First-time processing of storage node.
1838 * @param storage Node of type STORAGE_NODE
1839 * @param arg Not used
1841 static int process_storage(astnode *storage, void *arg, astnode **next)
1843 int item_size;
1844 astnode *type;
1845 astnode *expr;
1846 astnode *new_expr;
1847 type = LHS(storage);
1848 expr = RHS(storage);
1849 /* If not BYTE_DATATYPE, multiply by word/dword-size */
1850 switch (type->datatype) {
1851 case BYTE_DATATYPE:
1852 case CHAR_DATATYPE: item_size = 1; break;
1853 case WORD_DATATYPE: item_size = 2; break;
1854 case DWORD_DATATYPE: item_size = 4; break;
1855 default: item_size = 1; break; // ### Hmmm...
1857 if (item_size != 1) {
1858 new_expr = astnode_create_arithmetic(
1859 MUL_OPERATOR,
1860 astnode_clone(expr, expr->loc),
1861 astnode_create_integer(item_size, expr->loc),
1862 expr->loc
1864 astnode_replace(expr, new_expr);
1865 astnode_finalize(expr);
1866 expr = new_expr;
1867 type->datatype = BYTE_DATATYPE;
1869 /* Substitute defines and fold constants */
1870 expr = reduce_expression(expr);
1871 // TODO: Validate range somewhere else than here please... ???
1872 if (astnode_is_type(expr, INTEGER_NODE)) {
1873 if ((expr->integer <= 0) || (expr->integer >= 0x10000)) {
1874 err(storage->loc, "operand out of range");
1877 return 1;
1881 * Process EQU node.
1882 * @param equ Node of type EQU_NODE
1883 * @param arg Not used
1885 static int process_equ(astnode *equ, void *arg, astnode **next)
1887 symtab_entry *e;
1888 astnode *id;
1889 astnode *expr;
1890 expr = astnode_clone(astnode_get_child(equ, 1), equ->loc);
1891 expr = reduce_expression(expr);
1892 id = astnode_get_child(equ, 0);
1893 assert(astnode_is_type(id, IDENTIFIER_NODE));
1894 e = symtab_lookup(id->ident);
1895 if (e == NULL) {
1896 // TODO: Check that expression is a constant?
1897 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, 0);
1898 } else {
1899 /* Symbol is being redefined */
1900 /* This is not allowed for EQU equate! */
1901 if (!astnode_equal((astnode *)(e->def), expr)) {
1902 warn(equ->loc, "redefinition of `%s' is not identical; ignored", id->ident);
1905 astnode_remove(equ);
1906 astnode_finalize(equ);
1907 return 0;
1911 * Process '=' node.
1912 * @param assign Node of type ASSIGN_NODE
1913 * @param arg Not used
1915 static int process_assign(astnode *assign, void *arg, astnode **next)
1917 symtab_entry *e;
1918 astnode *id;
1919 astnode *expr;
1920 /* If it's part of ENUM declaration, don't touch */
1921 if (astnode_has_ancestor_of_type(assign, ENUM_DECL_NODE)) {
1922 return 0;
1924 /* Very similar to EQU, except symbol 1) can be
1925 redefined and 2) is volatile (see end of proc) */
1926 expr = astnode_clone(astnode_get_child(assign, 1), assign->loc);
1927 expr = reduce_expression(expr);
1928 id = astnode_get_child(assign, 0);
1929 assert(astnode_is_type(id, IDENTIFIER_NODE));
1930 e = symtab_lookup(id->ident);
1931 if (e == NULL) {
1932 /* Symbol is being defined for the first time */
1933 /* Note that the VOLATILE_FLAG is set */
1934 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, VOLATILE_FLAG);
1935 } else {
1936 /* Symbol is being redefined */
1937 /* This is OK for ASSIGN equate, simply replace definition */
1938 // ### store a list of definitions, otherwise we leak
1939 expr->loc = e->def->loc;
1940 e->def = expr;
1942 astnode_remove(assign);
1943 astnode_finalize(assign);
1944 return 0;
1948 * Process IFDEF-node.
1949 * @param ifdef Node of type IFDEF_NODE
1950 * @param arg Not used
1952 static int process_ifdef(astnode *ifdef, void *arg, astnode **next)
1954 symtab_entry *e;
1955 astnode *id;
1956 astnode *stmts;
1957 id = astnode_get_child(ifdef, 0);
1958 assert(astnode_is_type(id, IDENTIFIER_NODE));
1959 e = symtab_lookup(id->ident);
1960 if (e != NULL) {
1961 /* Symbol is defined. */
1962 /* Replace IFDEF node by the true-branch statement list */
1963 stmts = astnode_remove_children(astnode_get_child(ifdef, 1));
1964 astnode_replace(ifdef, stmts);
1965 *next = stmts;
1966 } else {
1967 /* Symbol is not defined. */
1968 /* Replace IFDEF node by the false-branch statement list (if any) */
1969 stmts = astnode_remove_children( astnode_get_child(ifdef, 2));
1970 if (stmts != NULL) {
1971 astnode_replace(ifdef, stmts);
1972 *next = stmts;
1973 } else {
1974 astnode_remove(ifdef);
1977 astnode_finalize(ifdef);
1978 return 0;
1982 * Process IFNDEF-node.
1983 * @param ifndef Node of type IFNDEF_NODE
1984 * @param arg Not used
1986 static int process_ifndef(astnode *ifndef, void *arg, astnode **next)
1988 symtab_entry *e;
1989 astnode *id;
1990 astnode *stmts;
1991 id = astnode_get_child(ifndef, 0);
1992 assert(astnode_is_type(id, IDENTIFIER_NODE));
1993 e = symtab_lookup(id->ident);
1994 if (e == NULL) {
1995 /* Symbol is not defined. */
1996 /* Replace IFNDEF node by the true-branch statement list */
1997 stmts = astnode_remove_children(astnode_get_child(ifndef, 1));
1998 astnode_replace(ifndef, stmts);
1999 *next = stmts;
2000 } else {
2001 /* Symbol is defined. */
2002 /* Replace IFNDEF node by the false-branch statement list, if any */
2003 stmts = astnode_remove_children(astnode_get_child(ifndef, 2));
2004 if (stmts != NULL) {
2005 astnode_replace(ifndef, stmts);
2006 *next = stmts;
2007 } else {
2008 astnode_remove(ifndef);
2011 astnode_finalize(ifndef);
2012 return 0;
2016 * Process IF-node.
2017 * @param if_node Node of type IF_NODE
2018 * @param arg Not used
2020 static int process_if(astnode *if_node, void *arg, astnode **next)
2022 astnode *expr;
2023 astnode *stmts;
2024 astnode *c;
2025 int ret = 0;
2026 /* IF_NODE has a list of CASE, DEFAULT nodes as children */
2027 for (c = astnode_get_first_child(if_node); c != NULL; c = astnode_get_next_sibling(c) ) {
2028 if (astnode_is_type(c, CASE_NODE)) {
2029 /* The expression which is being tested */
2030 expr = astnode_get_child(c, 0);
2031 /* Try to reduce expression to literal */
2032 expr = reduce_expression(expr);
2033 /* Resulting expression must be an integer literal,
2034 since this is static evaluation.
2035 In other words, it can't contain label references.
2037 if (astnode_is_type(expr, INTEGER_NODE)) {
2038 /* Non-zero is true, zero is false */
2039 if (expr->integer) {
2040 /* Replace IF node by the true-branch statement list */
2041 stmts = astnode_remove_children( astnode_get_child(c, 1) );
2042 astnode_replace(if_node, stmts);
2043 astnode_finalize(if_node);
2044 *next = stmts;
2045 return ret;
2047 } else {
2048 /* Error, expression is not constant */
2049 err(expr->loc, "conditional expression does not evaluate to literal");
2051 } else { /* DEFAULT_NODE */
2052 /* Replace IF node by the false-branch statement list */
2053 stmts = astnode_remove_children(c);
2054 astnode_replace(if_node, stmts);
2055 astnode_finalize(if_node);
2056 *next = stmts;
2057 return ret;
2060 /* No match, remove IF node from AST */
2061 astnode_remove(if_node);
2062 astnode_finalize(if_node);
2063 return ret;
2067 * Process dataseg-node.
2068 * @param dataseg Node of type DATASEG_NODE
2069 * @param arg Not used
2071 static int process_dataseg(astnode *dataseg, void *arg, astnode **next)
2073 symbol_modifiers = dataseg->modifiers;
2074 in_dataseg = 1; /* true */
2075 return 0;
2079 * Process codeseg-node.
2080 * @param codeseg Node of type CODESEG_NODE
2081 * @param arg Not used
2083 static int process_codeseg(astnode *codeseg, void *arg, astnode **next)
2085 symbol_modifiers = 0;
2086 in_dataseg = 0; /* false */
2087 return 0;
2091 * Process org-node.
2092 * @param n Node of type ORG_NODE
2093 * @param arg Not used
2095 static int process_org(astnode *org, void *arg, astnode **next)
2097 if (!xasm_args.pure_binary) {
2098 err(org->loc, "org directive can only be used when output format is pure 6502 binary");
2099 } else {
2100 astnode *addr = astnode_get_child(org, 0);
2101 addr = reduce_expression_complete(addr);
2102 if (astnode_is_type(addr, INTEGER_NODE)) {
2103 /* Range check */
2104 if ((addr->integer < 0) || (addr->integer >= 0x10000)) {
2105 err(org->loc, "org address out of 64K range");
2107 } else {
2108 err(org->loc, "org address does not evaluate to literal");
2109 astnode_remove(org);
2110 astnode_finalize(org);
2113 return 0;
2117 * Process REPT node.
2118 * @param rept Node of type REPT_NODE
2119 * @param arg Not used
2121 static int process_rept(astnode *rept, void *arg, astnode **next)
2123 astnode *count;
2124 astnode *stmts;
2125 astnode *list;
2126 count = astnode_get_child(rept, 0);
2127 /* Try to reduce count expression to literal */
2128 count = reduce_expression_complete(count);
2129 /* Resulting expression must be an integer literal,
2130 since this is static evaluation.
2132 if (astnode_is_type(count, INTEGER_NODE)) {
2133 if (count->integer < 0) {
2134 warn(rept->loc, "REPT ignored; negative repeat count (%d)", count->integer);
2135 astnode_remove(rept);
2136 astnode_finalize(rept);
2137 } else if (count->integer > 0) {
2138 /* Expand body <count> times */
2139 list = astnode_clone(astnode_get_child(rept, 1), rept->loc);
2140 stmts = astnode_remove_children(list);
2141 astnode_finalize(list);
2142 while (--count->integer > 0) {
2143 list = astnode_clone(astnode_get_child(rept, 1), rept->loc);
2144 astnode_add_sibling(stmts, astnode_remove_children(list) );
2145 astnode_finalize(list);
2147 astnode_replace(rept, stmts);
2148 astnode_finalize(rept);
2149 *next = stmts;
2150 } else {
2151 /* count == 0 */
2152 astnode_remove(rept);
2153 astnode_finalize(rept);
2155 } else {
2156 err(rept->loc, "repeat count does not evaluate to literal");
2157 astnode_remove(rept);
2158 astnode_finalize(rept);
2160 return 0;
2164 * Process WHILE node.
2165 * @param while_node Node of type WHILE_NODE
2166 * @param arg Not used
2168 static int process_while(astnode *while_node, void *arg, astnode **next)
2170 astnode *expr;
2171 astnode *stmts;
2172 astnode *list;
2173 expr = astnode_get_child(while_node, 0);
2174 /* Try to reduce expression to literal */
2175 expr = reduce_expression(astnode_clone(expr, expr->loc));
2176 /* Resulting expression must be an integer literal,
2177 since this is static evaluation.
2179 if (astnode_is_type(expr, INTEGER_NODE)) {
2180 /* Expand body if the expression is true */
2181 if (expr->integer) {
2182 list = astnode_clone(astnode_get_child(while_node, 1), while_node->loc);
2183 stmts = astnode_remove_children(list);
2184 astnode_finalize(list);
2185 astnode_replace(while_node, stmts);
2186 astnode_add_sibling(stmts, while_node); /* Clever huh? */
2187 *next = stmts;
2188 } else {
2189 astnode_remove(while_node);
2190 astnode_finalize(while_node);
2192 } else {
2193 err(while_node->loc, "while expression does not evaluate to literal");
2194 astnode_remove(while_node);
2195 astnode_finalize(while_node);
2197 astnode_finalize(expr);
2198 return 0;
2201 /*---------------------------------------------------------------------------*/
2204 * Enters a macro into the symbol table.
2205 * @param n Must be a node of type MACRO_DECL_NODE
2206 * @param arg Not used
2208 static int enter_macro(astnode *macro_def, void *arg, astnode **next)
2210 astnode *id = astnode_get_child(macro_def, 0);
2211 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2212 if (symtab_enter(id->ident, MACRO_SYMBOL, macro_def, 0) == NULL) {
2213 /* ### This could be allowed, you know... */
2214 err(macro_def->loc, "duplicate symbol `%s'", id->ident);
2216 astnode_remove(macro_def);
2217 return 0;
2221 * Enters a label into the symbol table.
2222 * @param label Must be a node of type LABEL_NODE
2224 static int enter_label(astnode *label, void *arg, astnode **next)
2226 symtab_entry *e;
2227 astnode *addr;
2228 e = symtab_lookup(label->ident);
2229 if (e) {
2230 if (!(e->flags & EXTRN_FLAG) || (e->type != LABEL_SYMBOL)) {
2231 err(label->loc, "duplicate symbol `%s'", label->ident);
2232 astnode_remove(label);
2233 astnode_finalize(label);
2234 return 0;
2236 /* Allow a symbol declared as extrn to be defined in the same unit */
2237 symtab_remove(label->ident);
2239 e = symtab_enter(label->ident, LABEL_SYMBOL, label, (in_dataseg ? DATA_FLAG : 0) | symbol_modifiers );
2240 /* Check if hardcoded address */
2241 addr = reduce_expression_complete(RHS(label));
2242 if (astnode_is_type(addr, INTEGER_NODE)) {
2243 /* Store it */
2244 e->address = addr->integer;
2245 e->flags |= ADDR_FLAG;
2246 } else if (!astnode_is_type(addr, CURRENT_PC_NODE)) {
2247 err(label->loc, "label address does not evaluate to literal");
2249 label_count++;
2250 return 0;
2254 * Enters a variable declaration in symbol table.
2255 * @param var Must be a node of type VAR_DECL_NODE
2257 static int enter_var(astnode *var, void *arg, astnode **next)
2259 symtab_entry *e;
2260 astnode *id = LHS(var);
2261 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2262 e = symtab_lookup(id->ident);
2263 if (e) {
2264 if (!(e->flags & EXTRN_FLAG) || (e->type != VAR_SYMBOL)) {
2265 err(var->loc, "duplicate symbol `%s'", id->ident);
2266 astnode_remove(var);
2267 astnode_finalize(var);
2268 return 0;
2270 /* Allow a symbol declared as extrn to be defined in the same unit */
2271 symtab_remove(id->ident);
2273 if ((var->modifiers & ZEROPAGE_FLAG) && !in_dataseg) {
2274 warn(var->loc, "zeropage modifier has no effect in code segment");
2275 var->modifiers &= ~ZEROPAGE_FLAG;
2277 symtab_enter(id->ident, VAR_SYMBOL, astnode_clone(RHS(var), var->loc), (in_dataseg ? DATA_FLAG : 0) | var->modifiers | symbol_modifiers);
2278 return 1;
2282 * Enters a procedure declaration in symbol table.
2283 * @param proc Must be a node of type PROC_NODE
2285 static int enter_proc(astnode *proc, void *arg, astnode **next)
2287 astnode *id;
2288 symtab_entry *e;
2289 if (in_dataseg) {
2290 err(proc->loc, "procedures not allowed in data segment");
2291 astnode_remove(proc);
2292 astnode_finalize(proc);
2293 return 0;
2295 id = LHS(proc);
2296 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2297 e = symtab_lookup(id->ident);
2298 if (e) {
2299 if (!(e->flags & EXTRN_FLAG) || (e->type != PROC_SYMBOL)) {
2300 err(proc->loc, "duplicate symbol `%s'", id->ident);
2301 astnode_remove(proc);
2302 astnode_finalize(proc);
2303 return 0;
2305 /* Allow a symbol declared as extrn to be defined in the same unit */
2306 symtab_remove(id->ident);
2308 /* Enter it! RHS(proc) is the list of procedure statements */
2309 symtab_enter(id->ident, PROC_SYMBOL, RHS(proc), (in_dataseg ? DATA_FLAG : 0) );
2310 label_count++;
2311 return 1;
2315 * Enters a simple <identifier> <storage> structure member.
2316 * @param c Node of type VAR_DECL_NODE
2317 * @param offset Offset of this field
2318 * @param plist List of symbol table's entries
2319 * @param struc_id Structure identifier (for error messages)
2320 * @return New offset (old offset + size of this field)
2322 static astnode *enter_struc_atomic_field(astnode *c, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2324 astnode *field_id;
2325 astnode *field_data;
2326 astnode *field_size;
2327 symtab_entry *fe;
2328 /* c has two children: id and STORAGE_NODE */
2329 field_id = LHS(c);
2330 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2331 field_data = RHS(c);
2332 reduce_expression(RHS(field_data));
2333 if (astnode_is_type(field_data, DATA_NODE)) {
2334 err(c->loc, "data initialization not allowed here");
2335 return(offset);
2337 fe = symtab_enter(
2338 field_id->ident,
2339 VAR_SYMBOL,
2340 astnode_clone(field_data, field_data->loc),
2343 if (fe == NULL) {
2344 err(c->loc, "duplicate symbol `%s' in structure `%s'", field_id->ident, struc_id->ident);
2345 return(offset);
2347 /* Add to ordered list of fields */
2348 (**plist) = malloc(sizeof(ordered_field_list));
2349 (**plist)->entry = fe;
2350 (**plist)->next = NULL;
2351 *plist = &((**plist)->next);
2352 /* Set field offset */
2353 fe->field.offset = astnode_clone(offset, offset->loc);
2354 /* Calculate field size in bytes: sizeof(datatype) * count */
2355 field_size = astnode_create_arithmetic(
2356 MUL_OPERATOR,
2357 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2358 astnode_clone(RHS(field_data), field_data->loc),
2359 field_data->loc
2361 field_size = reduce_expression(field_size);
2362 /* Set field size */
2363 fe->field.size = astnode_clone(field_size, field_size->loc);
2364 /* Add field size to total offset */
2365 offset = astnode_create_arithmetic(
2366 PLUS_OPERATOR,
2367 offset,
2368 field_size,
2369 offset->loc
2371 offset = reduce_expression(offset);
2372 return(offset);
2375 static void enter_union_fields(symtab_entry *, astnode *);
2378 * Attempts to enter an (anonymous) union's members into structure's symbol table.
2379 * @param n Node of type UNION_DECL_NODE
2380 * @param offset Current parent structure offset
2381 * @param plist Ordered list of parent structure's fields
2383 astnode *enter_struc_union_field(astnode *n, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2385 ordered_field_list *ls;
2386 symtab_entry *se;
2387 symtab_entry *fe;
2388 static int id = 0;
2389 char id_str[16];
2390 astnode *union_id;
2391 union_id = LHS(n);
2392 if (astnode_is_type(union_id, IDENTIFIER_NODE)) {
2393 err(n->loc, "anonymous union expected");
2394 return(offset);
2396 snprintf(id_str, sizeof (id_str), "%d", id++);
2397 se = symtab_enter(id_str, UNION_SYMBOL, n, 0);
2398 enter_union_fields(se, n);
2399 /* Add to ordered list of fields */
2400 (**plist) = malloc(sizeof(ordered_field_list));
2401 (**plist)->entry = se;
2402 (**plist)->next = NULL;
2403 *plist = &((**plist)->next);
2404 /* Add to parent structure as well, with same offsets */
2405 for (ls = se->struc.fields; ls != NULL; ls = ls->next) {
2406 /* Try to enter field in structure's symbol table */
2407 fe = symtab_enter(
2408 ls->entry->id,
2409 VAR_SYMBOL,
2410 astnode_clone(ls->entry->def, ls->entry->def->loc),
2413 if (fe == NULL) {
2414 err(ls->entry->def->loc, "duplicate symbol `%s' in structure `%s'", ls->entry->id, struc_id->ident);
2415 continue;
2417 /* Set field offset */
2418 fe->field.offset = astnode_clone(offset, offset->loc);
2419 /* Set field size */
2420 fe->field.size = astnode_clone(se->struc.size, offset->loc);
2422 /* Advance offset by size of union */
2423 offset = astnode_create_arithmetic(
2424 PLUS_OPERATOR,
2425 offset,
2426 astnode_clone(se->struc.size, offset->loc),
2427 offset->loc
2429 offset = reduce_expression(offset);
2430 return(offset);
2434 * Enters struc type into symbol table based on AST node.
2435 * - Creates a symbol table for the structure
2436 * - Validates and enters all its fields
2437 * - Calculates offset of each field in the structure, and total size
2438 * @param struc_def Node of type STRUC_DECL_NODE
2440 static int enter_struc(astnode *struc_def, void *arg, astnode **next)
2442 ordered_field_list **plist;
2443 symtab_entry *se;
2444 astnode *c;
2445 astnode *offset;
2446 astnode *struc_id = LHS(struc_def);
2447 assert(astnode_is_type(struc_id, IDENTIFIER_NODE));
2448 se = symtab_enter(struc_id->ident, STRUC_SYMBOL, struc_def, 0);
2449 if (se == NULL) {
2450 err(struc_def->loc, "duplicate symbol `%s'", struc_id->ident);
2451 } else {
2452 /* Put the fields of the structure in local symbol table */
2453 se->symtab = symtab_create();
2454 offset = astnode_create_integer(0, struc_def->loc); /* offset = 0 */
2455 plist = &se->struc.fields;
2456 for (c = struc_id->next_sibling; c != NULL; c = c->next_sibling) {
2457 /* Check if it's a field declaration */
2458 if (astnode_is_type(c, VAR_DECL_NODE)) {
2459 offset = enter_struc_atomic_field(c, offset, &plist, struc_id);
2461 /* Check if (anonymous) union */
2462 else if (astnode_is_type(c, UNION_DECL_NODE)) {
2463 offset = enter_struc_union_field(c, offset, &plist, struc_id);
2464 } else {
2465 err(c->loc, "field declaration expected");
2466 continue;
2469 /* Store total size of structure */
2470 se->struc.size = offset;
2471 /* Restore previous symbol table */
2472 symtab_pop();
2474 /* ### Remove STRUC node from AST */
2475 // astnode_remove(struc_def);
2476 // astnode_finalize(struc_def);
2477 return 0;
2481 * Enters fields of union into its symbol table.
2483 static void enter_union_fields(symtab_entry *se, astnode *union_def)
2485 ordered_field_list **plist;
2486 astnode *c;
2487 astnode *field_id;
2488 astnode *field_data;
2489 astnode *field_size;
2490 symtab_entry *fe;
2492 se->symtab = symtab_create();
2493 se->struc.size = astnode_create_integer(0, union_def->loc);
2494 plist = &se->struc.fields;
2495 /* Process field declarations */
2496 for (c = RHS(union_def); c != NULL; c = c->next_sibling) {
2497 if (!astnode_is_type(c, VAR_DECL_NODE)) {
2498 err(c->loc, "field declaration expected");
2499 continue;
2501 /* c has two children: id and STORAGE_NODE */
2502 field_id = LHS(c);
2503 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2504 field_data = RHS(c);
2505 reduce_expression(RHS(field_data));
2506 if (astnode_is_type(field_data, DATA_NODE)) {
2507 err(c->loc, "data initialization not allowed here");
2508 continue;
2510 /* Calculate field size in bytes: sizeof(datatype) * count */
2511 field_size = astnode_create_arithmetic(
2512 MUL_OPERATOR,
2513 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2514 astnode_clone(RHS(field_data), field_data->loc),
2515 field_data->loc
2517 field_size = reduce_expression(field_size);
2518 if (!astnode_is_type(field_size, INTEGER_NODE)) {
2519 err(c->loc, "union member must be of constant size");
2520 astnode_finalize(field_size);
2521 /* Use default size: 1 byte */
2522 field_size = astnode_create_integer(1, field_data->loc);
2524 fe = symtab_enter(
2525 field_id->ident,
2526 VAR_SYMBOL,
2527 astnode_clone(field_data, field_data->loc),
2530 if (fe == NULL) {
2531 err(c->loc, "duplicate symbol `%s' in union `%s'", field_id->ident, se->id);
2532 astnode_finalize(field_size);
2533 continue;
2535 /* Add to ordered list of fields */
2536 (*plist) = malloc(sizeof(ordered_field_list));
2537 (*plist)->entry = fe;
2538 (*plist)->next = NULL;
2539 plist = &((*plist)->next);
2540 /* Set field offset (0 for all) and size */
2541 fe->field.offset = astnode_create_integer(0, union_def->loc);
2542 fe->field.size = astnode_clone(field_size, field_size->loc);
2543 /* See if field size of this member is largest so far */
2544 if (se->struc.size->integer < field_size->integer) {
2545 astnode_finalize(se->struc.size);
2546 se->struc.size = field_size;
2547 } else {
2548 astnode_finalize(field_size);
2551 symtab_pop();
2555 * Enters union type into symbol table based on AST node.
2556 * @param union_def Node of type UNION_DECL_NODE
2558 static int enter_union(astnode *union_def, void *arg, astnode **next)
2560 symtab_entry *se;
2561 astnode *union_id = astnode_get_child(union_def, 0);
2562 assert(astnode_is_type(union_id, IDENTIFIER_NODE));
2563 if (astnode_is_type(union_id, NULL_NODE)) {
2564 err(union_def->loc, "anonymous union not allowed in global scope");
2565 } else {
2566 assert(astnode_get_type(union_id) == IDENTIFIER_NODE);
2567 se = symtab_enter(union_id->ident, UNION_SYMBOL, union_def, 0);
2568 if (se == NULL) {
2569 err(union_def->loc, "duplicate symbol `%s'", union_id->ident);
2570 } else {
2571 /* Put the fields of the union in local symbol table */
2572 enter_union_fields(se, union_def);
2575 /* ### Remove UNION node from AST */
2576 // astnode_remove(union_def);
2577 // astnode_finalize(union_def);
2578 return 0;
2582 * Enters enumerated type into symbol table based on AST node.
2583 * @param n Node of type ENUM_DECL_NODE
2585 static int enter_enum(astnode *enum_def, void *arg, astnode **next)
2587 astnode *c;
2588 astnode *id;
2589 astnode *val;
2590 symtab_entry *se;
2591 astnode *enum_id = astnode_get_child(enum_def, 0);
2592 assert(astnode_get_type(enum_id) == IDENTIFIER_NODE);
2593 se = symtab_enter(enum_id->ident, ENUM_SYMBOL, enum_def, 0);
2594 if (se == NULL) {
2595 err(enum_def->loc, "duplicate symbol `%s'", enum_id->ident);
2596 } else {
2597 /* Add all the enum symbols to its own symbol table */
2598 se->symtab = symtab_create();
2599 val = NULL;
2600 for (c = enum_id->next_sibling; c != NULL; c = c->next_sibling) {
2601 if (astnode_is_type(c, IDENTIFIER_NODE)) {
2602 id = c;
2603 if (val == NULL) {
2604 val = astnode_create_integer(0, c->loc);
2605 } else {
2606 val = astnode_create_integer(val->integer+1, c->loc);
2608 } else {
2609 id = LHS(c);
2610 val = reduce_expression_complete(astnode_clone(RHS(c), RHS(c)->loc));
2611 if (!astnode_is_type(val, INTEGER_NODE)) {
2612 err(c->loc, "initializer does not evaluate to integer literal");
2613 astnode_finalize(val);
2614 /* Use default value */
2615 val = astnode_create_integer(0, c->loc);
2618 if (symtab_enter(id->ident, CONSTANT_SYMBOL, val, 0) == NULL) {
2619 err(c->loc, "duplicate symbol `%s' in enumeration `%s'", id->ident, enum_id->ident);
2620 continue;
2623 symtab_pop();
2625 /* ### Remove ENUM node from AST */
2626 // astnode_remove(enum_def);
2627 // astnode_finalize(enum_def);
2628 return 0;
2632 * Enters record type into symbol table based on AST node.
2633 * @param n Node of type RECORD_DECL_NODE
2635 static int enter_record(astnode *record_def, void *arg, astnode **next)
2637 ordered_field_list **plist;
2638 astnode *c;
2639 astnode *field_id;
2640 astnode *field_width;
2641 int size;
2642 int offset;
2643 symtab_entry *se;
2644 symtab_entry *fe;
2645 astnode *record_id = astnode_get_child(record_def, 0);
2646 assert(astnode_get_type(record_id) == IDENTIFIER_NODE);
2647 se = symtab_enter(record_id->ident, RECORD_SYMBOL, record_def, 0);
2648 if (se == NULL) {
2649 err(record_def->loc, "duplicate symbol `%s'", record_id->ident);
2651 else {
2652 /* Add all the record fields to record's own symbol table */
2653 se->symtab = symtab_create();
2654 offset = 8;
2655 plist = &se->struc.fields;
2656 for (c = record_id->next_sibling; c != NULL; c = c->next_sibling) {
2657 /* c has two children: field identifier and its width */
2658 field_id = LHS(c);
2659 field_width = astnode_clone(reduce_expression(RHS(c)), RHS(c)->loc);
2660 /* Validate the width -- must be positive integer literal */
2661 if (!astnode_is_type(field_width, INTEGER_NODE)) {
2662 err(c->loc, "record member `%s' is not of constant size", field_id->ident);
2663 continue;
2665 if ((field_width->integer <= 0) || (field_width->integer >= 8)) {
2666 err(c->loc, "width of record member `%s' is out of range (%d)", field_id->ident, field_width->integer);
2667 continue;
2669 /* Attempt to enter field in record's symbol table */
2670 fe = symtab_enter(field_id->ident, VAR_SYMBOL, c, 0);
2671 if (fe == NULL) {
2672 err(c->loc, "duplicate symbol `%s' in record `%s'", field_id->ident, record_id->ident);
2673 continue;
2675 /* Add to ordered list of fields */
2676 (*plist) = malloc(sizeof(ordered_field_list));
2677 (*plist)->entry = fe;
2678 (*plist)->next = NULL;
2679 plist = &((*plist)->next);
2680 /* Set field offset */
2681 offset = offset - field_width->integer;
2682 fe->field.offset = astnode_create_integer(offset, c->loc);
2683 /* Set field size (width) */
2684 fe->field.size = field_width;
2686 size = 8 - offset;
2687 if (size > 8) {
2688 err(record_def->loc, "size of record `%s' (%d) exceeds 8 bits", record_id->ident, size);
2689 } else {
2690 /* Set size of record (in bits) */
2691 se->struc.size = astnode_create_integer(size, record_def->loc);
2693 symtab_pop();
2695 /* ### Remove RECORD node from AST */
2696 // astnode_remove(record_def);
2697 // astnode_finalize(record_def);
2698 return 0;
2702 * Globalizes a local.
2703 * The node is morphed into its global equivalent (LABEL_NODE or IDENTIFIER_NODE).
2704 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
2705 * @param arg Pointer to namespace counter
2707 static int globalize_local(astnode *n, void *arg, astnode **next)
2709 char str[32];
2710 /* Make it global by appending namespace counter to the id */
2711 snprintf(str, sizeof (str), "#%d", label_count);
2712 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
2713 /* Local label definition, use label field */
2714 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
2715 strcat(n->label, str);
2716 /* This node is now a unique, global label */
2717 n->type = LABEL_NODE;
2718 /* Make sure it's unique */
2719 if (symtab_lookup(n->label)) {
2720 err(n->loc, "duplicate symbol `%s'", n->label);
2721 astnode_remove(n);
2722 astnode_finalize(n);
2723 return 0;
2724 } else {
2725 symtab_enter(n->label, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) );
2727 } else {
2728 /* Local label reference, use ident field */
2729 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2730 strcat(n->ident, str);
2731 /* This node is now a unique, global identifier */
2732 n->type = IDENTIFIER_NODE;
2734 return 1;
2738 * Tags symbols as extrn.
2739 * @param n A node of type EXTRN_NODE
2741 static int tag_extrn_symbols(astnode *extrn, void *arg, astnode **next)
2743 astnode *id;
2744 astnode *type;
2745 astnode *list;
2746 symtab_entry *e;
2747 type = astnode_get_child(extrn, 0);
2748 /* Go through the list of identifiers */
2749 list = astnode_get_child(extrn, 1);
2750 for (id=astnode_get_first_child(list); id != NULL; id=astnode_get_next_sibling(id) ) {
2751 e = symtab_lookup(id->ident);
2752 if (e != NULL) {
2753 if (!(e->flags & EXTRN_FLAG)) {
2754 /* Error, can't import a symbol that's defined locally! */
2755 // TODO: this is okay?
2756 err(extrn->loc, "`%s' declared as extrn but is defined locally", id->ident);
2759 else {
2760 // TODO: store external unit name
2761 switch (astnode_get_type(type)) {
2762 case DATATYPE_NODE:
2763 symtab_enter(id->ident, VAR_SYMBOL, astnode_create_data(astnode_clone(type, extrn->loc), NULL, extrn->loc), EXTRN_FLAG);
2764 break;
2766 case INTEGER_NODE:
2767 /* type->integer is (LABEL|PROC)_SYMBOL */
2768 symtab_enter(id->ident, type->integer, NULL, EXTRN_FLAG);
2769 break;
2771 default:
2772 break;
2776 astnode_remove(extrn);
2777 astnode_finalize(extrn);
2778 return 0;
2784 static int process_message(astnode *message, void *arg, astnode **next)
2786 astnode *expr = reduce_expression_complete(LHS(message));
2787 if (astnode_is_type(expr, STRING_NODE)) {
2788 printf("%s\n", expr->string);
2789 } else if (astnode_is_type(expr, INTEGER_NODE)) {
2790 printf("%d\n", expr->integer);
2791 } else {
2792 err(expr->loc, "string or integer argument expected");
2794 astnode_remove(message);
2795 astnode_finalize(message);
2796 return 0;
2802 static int process_warning(astnode *warning, void *arg, astnode **next)
2804 astnode *expr = reduce_expression_complete(LHS(warning));
2805 if (astnode_is_type(expr, STRING_NODE)) {
2806 warn(warning->loc, expr->string);
2807 } else {
2808 err(warning->loc, "string argument expected");
2810 astnode_remove(warning);
2811 astnode_finalize(warning);
2812 return 0;
2818 static int process_error(astnode *error, void *arg, astnode **next)
2820 astnode *expr = reduce_expression_complete(LHS(error));
2821 if (astnode_is_type(expr, STRING_NODE)) {
2822 err(error->loc, expr->string);
2823 } else {
2824 err(expr->loc, "string argument expected");
2826 astnode_remove(error);
2827 astnode_finalize(error);
2828 return 0;
2832 * Processes a forward branch declaration.
2833 * @param forward_branch Node of type FORWARD_BRANCH_DECL_NODE
2834 * @param arg Not used
2836 static int process_forward_branch_decl(astnode *n, void *arg, astnode **next)
2838 astnode *l;
2839 int i;
2840 char str[32];
2841 assert(!strchr(n->ident, '#'));
2842 /* Get branch info structure for label (+, ++, ...) */
2843 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2844 /* Morph n to globally unique label */
2845 snprintf(str, sizeof (str), "#%d", fwd->counter);
2846 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2847 strcat(n->label, str);
2848 n->type = LABEL_NODE;
2849 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2850 /* Fix reference identifiers */
2851 for (i=0; i<fwd->index; i++) {
2852 l = fwd->refs[i];
2853 l->ident = (char *)realloc(l->ident, strlen(n->ident)+1);
2854 strcpy(l->ident, n->ident);
2856 /* Prepare for next declaration */
2857 fwd->index = 0;
2858 fwd->counter++;
2859 return 0;
2863 * Processes a backward branch declaration.
2864 * @param n Node of type BACKWARD_BRANCH_DECL_NODE
2865 * @param arg Not used
2867 static int process_backward_branch_decl(astnode *n, void *arg, astnode **next)
2869 char str[32];
2870 assert(!strchr(n->ident, '#'));
2871 /* Get branch info */
2872 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2873 bwd->decl = n;
2874 /* Morph n to globally unique label */
2875 snprintf(str, sizeof (str), "#%d", bwd->counter);
2876 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2877 strcat(n->label, str);
2878 n->type = LABEL_NODE;
2879 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2880 /* Prepare for next declaration */
2881 bwd->counter++;
2882 return 0;
2886 * Processes a forward branch label reference.
2887 * @param n Node of type FORWARD_BRANCH_NODE
2888 * @param arg Not used
2890 static int process_forward_branch(astnode *n, void *arg, astnode **next)
2892 /* Add n to proper forward_branch array */
2893 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2894 fwd->refs[fwd->index++] = n;
2895 /* Change to identifier node */
2896 n->type = IDENTIFIER_NODE;
2897 return 0;
2901 * Processes a backward branch label reference.
2902 * @param n Node of type BACKWARD_BRANCH_NODE
2903 * @param arg Not used
2905 static int process_backward_branch(astnode *n, void *arg, astnode **next)
2907 /* Get branch info */
2908 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2909 /* Make sure it's a valid reference */
2910 if (bwd->decl != NULL) {
2911 /* Fix n->ident */
2912 n->ident = (char *)realloc(n->ident, strlen(bwd->decl->ident)+1);
2913 strcpy(n->ident, bwd->decl->ident);
2915 /* Change to identifier node */
2916 n->type = IDENTIFIER_NODE;
2917 return 0;
2920 /*---------------------------------------------------------------------------*/
2922 static int is_field_ref(astnode *n)
2924 astnode *p = astnode_get_parent(n);
2925 /* Case 1: id.id */
2926 if (astnode_is_type(p, DOT_NODE)) return 1;
2927 /* Case 2: id.id[expr] */
2928 if (astnode_is_type(p, INDEX_NODE) && (n == LHS(p)) && astnode_is_type(astnode_get_parent(p), DOT_NODE) ) return 1;
2929 return 0;
2933 * Checks that the given identifier node is present in symbol table.
2934 * Issues error if it is not, and replaces with integer 0.
2935 * @param n A node of type IDENTIFIER_NODE
2937 static int validate_ref(astnode *n, void *arg, astnode **next)
2939 int i;
2940 symbol_ident_list list;
2941 symtab_entry *enum_def;
2942 int ret = 1;
2943 if (is_field_ref(n)) {
2944 return 1; /* Validated by validate_dotref() */
2946 symtab_entry * e = symtab_lookup(n->ident);
2947 if (e == NULL) {
2948 /* Maybe it is part of an enumeration */
2949 symtab_list_type(ENUM_SYMBOL, &list);
2950 for (i=0; i<list.size; i++) {
2951 enum_def = symtab_lookup(list.idents[i]);
2952 symtab_push(enum_def->symtab);
2953 e = symtab_lookup(n->ident);
2954 symtab_pop();
2955 if (e != NULL) {
2956 /* Replace id by SCOPE_NODE */
2957 astnode *scope = astnode_create_scope(
2958 astnode_create_identifier(enum_def->id, n->loc),
2959 astnode_clone(n, n->loc), n->loc);
2960 astnode_replace(n, scope);
2961 astnode_finalize(n);
2962 *next = scope;
2963 ret = 0;
2964 break;
2967 symtab_list_finalize(&list);
2968 if (e == NULL) {
2969 strtok(n->ident, "#"); /* Remove globalize junk */
2970 // err(n->loc, "unknown symbol `%s'", n->ident);
2971 /* ### Replace by integer 0 */
2972 //astnode_replace(n, astnode_create_integer(0, n->loc) );
2973 //astnode_finalize(n);
2974 warn(n->loc, "`%s' undeclared; assuming external label", n->ident);
2975 e = symtab_enter(n->ident, LABEL_SYMBOL, NULL, EXTRN_FLAG);
2978 if (e)
2979 e->ref_count++;
2980 return ret;
2984 * Validates top-level (not part of structure) indexed identifier.
2985 * @param n Node of type INDEX_NODE
2986 * @param arg Not used
2988 static int validate_index(astnode *n, void *arg, astnode **next)
2990 symtab_entry *e;
2991 astnode *id;
2992 astnode *type;
2993 if (is_field_ref(LHS(n))) {
2994 return 1; /* Validated by validate_dotref() */
2996 id = LHS(n);
2997 if (!astnode_is_type(id, IDENTIFIER_NODE)) {
2998 err(n->loc, "identifier expected");
2999 astnode_replace(n, astnode_create_integer(0, n->loc) );
3000 astnode_finalize(n);
3001 return 0;
3003 e = symtab_lookup(id->ident);
3004 if (e != NULL) {
3005 type = LHS(e->def);
3006 if (!astnode_is_type(type, DATATYPE_NODE)) {
3007 err(n->loc, "`%s' cannot be indexed", id->ident);
3008 astnode_replace(n, astnode_create_integer(0, n->loc) );
3009 astnode_finalize(n);
3010 return 0;
3011 } else {
3012 // TODO: bounds check
3013 astnode *reduced = reduce_index(n);
3014 if (reduced != n) {
3015 *next = reduced;
3016 return 0;
3019 } else {
3020 err(n->loc, "unknown symbol `%s'", id->ident);
3021 astnode_replace(n, astnode_create_integer(0, n->loc) );
3022 astnode_finalize(n);
3023 return 0;
3025 return 1;
3029 * Checks that A::B is valid.
3030 * If it's not valid it is replaced by integer 0.
3031 * @param n Node of type SCOPE_NODE
3033 static int validate_scoperef(astnode *n, void *arg, astnode **next)
3035 astnode *symbol;
3036 astnode *namespace = LHS(n);
3037 symtab_entry * e = symtab_lookup(namespace->ident);
3038 if (e == NULL) {
3039 err(n->loc, "unknown namespace `%s'", namespace->ident);
3040 /* Replace by integer 0 */
3041 astnode_replace(n, astnode_create_integer(0, n->loc) );
3042 astnode_finalize(n);
3043 return 0;
3044 } else {
3045 /* Get symbol on right of :: operator */
3046 symbol = RHS(n);
3047 /* Namespace was found, check its type */
3048 switch (e->type) {
3049 case STRUC_SYMBOL:
3050 case UNION_SYMBOL:
3051 case RECORD_SYMBOL:
3052 case ENUM_SYMBOL:
3053 /* OK, check the symbol */
3054 symtab_push(e->symtab);
3055 e = symtab_lookup(symbol->ident);
3056 if (e == NULL) {
3057 err(n->loc, "unknown symbol `%s' in namespace `%s'", symbol->ident, namespace->ident);
3058 /* Replace by integer 0 */
3059 astnode_replace(n, astnode_create_integer(0, n->loc) );
3060 astnode_finalize(n);
3062 symtab_pop();
3063 break;
3065 default:
3066 err(n->loc, "`%s' is not a namespace", namespace->ident);
3067 /* Replace by integer 0 */
3068 astnode_replace(n, astnode_create_integer(0, n->loc) );
3069 astnode_finalize(n);
3070 break;
3073 return 0;
3077 * Validates right part of dotted reference recursively.
3078 * Assumes that left part's symbol table is on stack.
3079 * @param n Node of type DOT_NODE
3081 static void validate_dotref_recursive(astnode *n, astnode *top)
3083 astnode *left;
3084 astnode *right;
3085 astnode *type;
3086 symtab_entry *field;
3087 symtab_entry *def;
3088 left = LHS(n);
3089 if (astnode_is_type(left, INDEX_NODE)) {
3090 left = LHS(left); /* Need identifier */
3092 right = RHS(n);
3093 if (astnode_is_type(right, DOT_NODE)) {
3094 right = LHS(right); /* Need identifier */
3096 if (astnode_is_type(right, INDEX_NODE)) {
3097 right = LHS(right); /* Need identifier */
3099 /* Lookup 'right' in 'left's symbol table */
3100 assert(astnode_get_type(right) == IDENTIFIER_NODE);
3101 field = symtab_lookup(right->ident);
3102 if (field == NULL) {
3103 err(n->loc, "`%s' is not a member of `%s'", right->ident, left->ident);
3104 /* Replace by integer 0 */
3105 astnode_replace(top, astnode_create_integer(0, top->loc) );
3106 astnode_finalize(top);
3107 } else {
3108 /* See if more subfields to process */
3109 n = RHS(n);
3110 if (astnode_is_type(n, DOT_NODE)) {
3111 /* Verify the variable's type -- should be user-defined */
3112 type = LHS(field->def);
3113 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3114 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3115 /* Replace by integer 0 */
3116 astnode_replace(top, astnode_create_integer(0, top->loc) );
3117 astnode_finalize(top);
3118 } else {
3119 /* Look up variable's type definition and verify it's a structure */
3120 def = symtab_global_lookup(LHS(type)->ident);
3121 if (def == NULL) {
3122 err(n->loc, "member '%s' of '%s' is of unknown type (`%s')", right->ident, left->ident, LHS(type)->ident);
3123 /* Replace by integer 0 */
3124 astnode_replace(top, astnode_create_integer(0, top->loc) );
3125 astnode_finalize(top);
3126 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3127 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3128 /* Replace by integer 0 */
3129 astnode_replace(top, astnode_create_integer(0, top->loc) );
3130 astnode_finalize(top);
3131 } else {
3132 /* Next field */
3133 symtab_push(def->symtab);
3134 validate_dotref_recursive(n, top);
3135 symtab_pop();
3143 * Validates A.B.C.D. . ...
3144 * Replaces the whole thing with integer 0 if not.
3145 * @param n Node of type DOT_NODE
3147 static int validate_dotref(astnode *n, void *arg, astnode **next)
3149 symtab_entry *father;
3150 symtab_entry *def;
3151 astnode *type;
3152 astnode *left;
3153 if (astnode_has_ancestor_of_type(n, DOT_NODE)) {
3154 return 1; /* Already validated, since this function is recursive */
3156 /* Look up parent in global symbol table */
3157 left = LHS(n); /* n := left . right */
3158 if (astnode_is_type(left, INDEX_NODE)) {
3159 left = LHS(left); /* Need identifier */
3161 father = symtab_lookup(left->ident);
3162 if (father == NULL) {
3163 err(n->loc, "unknown symbol `%s'", left->ident);
3164 /* Replace by integer 0 */
3165 astnode_replace(n, astnode_create_integer(0, n->loc) );
3166 astnode_finalize(n);
3167 return 0;
3168 } else {
3169 father->ref_count++;
3170 /* Verify the variable's type -- should be user-defined */
3171 type = LHS(father->def);
3172 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3173 err(n->loc, "`%s' is not a structure", left->ident);
3174 /* Replace by integer 0 */
3175 astnode_replace(n, astnode_create_integer(0, n->loc) );
3176 astnode_finalize(n);
3177 return 0;
3178 } else {
3179 def = symtab_lookup(LHS(type)->ident);
3180 if (def == NULL) {
3181 err(n->loc, "'%s' is of unknown type (`%s')", left->ident, LHS(type)->ident);
3182 /* Replace by integer 0 */
3183 astnode_replace(n, astnode_create_integer(0, n->loc) );
3184 astnode_finalize(n);
3185 return 0;
3186 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3187 err(n->loc, "`%s' is not a structure", left->ident);
3188 /* Replace by integer 0 */
3189 astnode_replace(n, astnode_create_integer(0, n->loc) );
3190 astnode_finalize(n);
3191 return 0;
3192 } else {
3193 /* Verify fields recursively */
3194 symtab_push(def->symtab);
3195 validate_dotref_recursive(n, n);
3196 symtab_pop();
3200 return 1;
3203 /*---------------------------------------------------------------------------*/
3206 * Evaluates expressions involved in conditional assembly, and removes the
3207 * appropriate branches from the AST.
3208 * Does some other stuff too, such as substitute equates and fold constants.
3210 void astproc_first_pass(astnode *root)
3212 /* Table of callback functions for our purpose. */
3213 static astnodeprocmap map[] = {
3214 { LABEL_NODE, enter_label },
3215 { VAR_DECL_NODE, enter_var },
3216 { PROC_NODE, enter_proc },
3217 { STRUC_DECL_NODE, enter_struc },
3218 { UNION_DECL_NODE, enter_union },
3219 { ENUM_DECL_NODE, enter_enum },
3220 { RECORD_DECL_NODE, enter_record },
3221 { LOCAL_LABEL_NODE, globalize_local },
3222 { LOCAL_ID_NODE, globalize_local },
3223 { MACRO_DECL_NODE, enter_macro },
3224 { MACRO_NODE, expand_macro },
3225 { REPT_NODE, process_rept },
3226 { WHILE_NODE, process_while },
3227 { DATASEG_NODE, process_dataseg },
3228 { CODESEG_NODE, process_codeseg },
3229 { ORG_NODE, process_org },
3230 { CHARMAP_NODE, load_charmap },
3231 { INSTRUCTION_NODE, process_instruction },
3232 { DATA_NODE, process_data },
3233 { STORAGE_NODE, process_storage },
3234 { EQU_NODE, process_equ },
3235 { ASSIGN_NODE, process_assign },
3236 { IFDEF_NODE, process_ifdef },
3237 { IFNDEF_NODE, process_ifndef },
3238 { IF_NODE, process_if },
3239 { EXTRN_NODE, tag_extrn_symbols },
3240 { MESSAGE_NODE, process_message },
3241 { WARNING_NODE, process_warning },
3242 { ERROR_NODE, process_error },
3243 { FORWARD_BRANCH_DECL_NODE, process_forward_branch_decl },
3244 { BACKWARD_BRANCH_DECL_NODE, process_backward_branch_decl },
3245 { FORWARD_BRANCH_NODE, process_forward_branch },
3246 { BACKWARD_BRANCH_NODE, process_backward_branch },
3247 { 0, NULL }
3249 reset_charmap();
3250 branch_init();
3251 in_dataseg = 0; /* codeseg is default */
3252 /* Do the walk. */
3253 astproc_walk(root, NULL, map);
3254 /* Remove all the volatile constants from the symbol table */
3255 /* These are the ones defined with the '=' operator, whose identifiers should
3256 all have been replaced by their value in the syntax tree now. Since
3257 they're not referenced anywhere we can safely dispose of them.
3258 The EQUates on the other hand should be kept, since they will
3259 possibly be exported. */
3260 #ifdef ENABLE_BUGGY_THING // ### FIXME
3262 int i;
3263 symbol_ident_list list;
3264 symtab_entry *e;
3265 symtab_list_type(CONSTANT_SYMBOL, &list);
3266 for (i = 0; i < list.size; ++i) {
3267 e = symtab_lookup(list.idents[i]);
3268 if (e->flags & VOLATILE_FLAG) {
3269 symtab_remove(list.idents[i]);
3272 symtab_list_finalize(&list);
3274 #endif
3277 /*---------------------------------------------------------------------------*/
3280 * Tags labels as public.
3281 * @param public A node of type PUBLIC_NODE
3283 static int tag_public_symbols(astnode *public, void *arg, astnode **next)
3285 astnode *id;
3286 symtab_entry *e;
3287 /* Go through the list of identifiers */
3288 for (id=astnode_get_first_child(public); id != NULL; id = astnode_get_next_sibling(id) ) {
3289 e = symtab_lookup(id->ident);
3290 if (e != NULL) {
3291 if (e->flags & EXTRN_FLAG) {
3292 err(public->loc, "`%s' already declared extrn", id->ident);
3293 } else {
3294 switch (e->type) {
3295 case LABEL_SYMBOL:
3296 case CONSTANT_SYMBOL:
3297 case VAR_SYMBOL:
3298 case PROC_SYMBOL:
3299 /* GO! */
3300 e->flags |= PUBLIC_FLAG;
3301 break;
3303 default:
3304 err(public->loc, "`%s' is of non-exportable type", id->ident);
3305 break;
3308 } else {
3309 warn(public->loc, "`%s' declared as public but is not defined", id->ident);
3312 astnode_remove(public);
3313 astnode_finalize(public);
3314 return 0;
3318 * Sets alignment for a set of (data) labels.
3319 * @param align A node of type ALIGN_NODE
3321 static int tag_align_symbols(astnode *align, void *arg, astnode **next)
3323 int pow;
3324 astnode *id;
3325 astnode *idents;
3326 astnode *expr;
3327 symtab_entry *e;
3328 /* Go through the list of identifiers */
3329 idents = LHS(align);
3330 for (id=astnode_get_first_child(idents); id != NULL; id = astnode_get_next_sibling(id) ) {
3331 e = symtab_lookup(id->ident);
3332 if (e != NULL) {
3333 if (!(e->flags & DATA_FLAG)) {
3334 err(align->loc, "cannot align a code symbol (`%s')", id->ident);
3335 } else {
3336 switch (e->type) {
3337 case LABEL_SYMBOL:
3338 case VAR_SYMBOL:
3339 expr = reduce_expression(RHS(align));
3340 if (!astnode_is_type(expr, INTEGER_NODE)) {
3341 err(align->loc, "alignment expression must be an integer literal");
3342 } else if ((expr->integer < 0) || (expr->integer >= 0x10000)) {
3343 err(align->loc, "alignment expression out of range");
3344 } else if (expr->integer > 1) {
3345 pow = 0;
3346 switch (expr->integer) {
3347 case 32768: pow++;
3348 case 16384: pow++;
3349 case 8192: pow++;
3350 case 4096: pow++;
3351 case 2048: pow++;
3352 case 1024: pow++;
3353 case 512: pow++;
3354 case 256: pow++;
3355 case 128: pow++;
3356 case 64: pow++;
3357 case 32: pow++;
3358 case 16: pow++;
3359 case 8: pow++;
3360 case 4: pow++;
3361 case 2: pow++;
3362 /* GO! */
3363 e->flags |= ALIGN_FLAG;
3364 e->align = pow;
3365 break;
3367 default:
3368 err(align->loc, "alignment expression must be a power of 2");
3369 break;
3372 break;
3374 default:
3375 err(align->loc, "`%s' cannot be aligned", id->ident);
3376 break;
3380 else {
3381 warn(align->loc, "alignment ignored for undefined symbol `%s'", id->ident);
3384 astnode_remove(align);
3385 astnode_finalize(align);
3386 return 0;
3389 /*---------------------------------------------------------------------------*/
3392 * Removes unused labels from a syntax tree (and symbol table).
3393 * Unused labels are labels that are defined but not referenced anywhere.
3394 * This function assumes that the reference counts have already been calculated.
3396 void remove_unused_labels()
3398 int i;
3399 char *id;
3400 astnode *n;
3401 symbol_ident_list list;
3402 symtab_list_type(LABEL_SYMBOL, &list);
3403 for (i=0; i<list.size; i++) {
3404 id = list.idents[i];
3405 symtab_entry * e = symtab_lookup(id);
3406 if ((e->ref_count == 0) && ((e->flags & (PUBLIC_FLAG | EXTRN_FLAG)) == 0)) {
3407 n = e->def;
3408 strtok(n->label, "#"); /* Remove globalize junk */
3409 warn(n->loc, "`%s' defined but not used", n->label);
3410 astnode_remove(n);
3411 astnode_finalize(n);
3412 //symtab_remove(n->label); ### FIXME leads to crash sometimes...
3415 symtab_list_finalize(&list);
3419 * If the storage is of user-defined type, replaces it with
3420 * .DSB sizeof(type) * count
3422 static int reduce_user_storage(astnode *n, void *arg, astnode **next)
3424 astnode *type;
3425 astnode *count;
3426 astnode *byte_storage;
3427 symtab_entry *e;
3428 type = LHS(n);
3429 if (type->datatype == USER_DATATYPE) {
3430 e = symtab_lookup(LHS(type)->ident);
3431 if (e != NULL) {
3432 /* Replace by DSB */
3433 count = RHS(n);
3434 byte_storage = astnode_create_storage(
3435 astnode_create_datatype(BYTE_DATATYPE, NULL, type->loc),
3436 astnode_create_arithmetic(
3437 MUL_OPERATOR,
3438 astnode_create_sizeof(
3439 astnode_create_identifier(LHS(type)->ident, n->loc),
3440 n->loc
3442 astnode_clone(count, n->loc),
3443 n->loc
3445 n->loc
3447 astnode_replace(n, byte_storage);
3448 astnode_finalize(n);
3449 *next = byte_storage;
3450 return 0;
3451 } else {
3452 err(n->loc, "unknown symbol `%s'", LHS(type)->ident);
3453 astnode_remove(n);
3454 astnode_finalize(n);
3455 return 0;
3458 return 1;
3462 * Second major pass over AST.
3464 void astproc_second_pass(astnode *root)
3466 /* Table of callback functions for our purpose. */
3467 static astnodeprocmap map[] = {
3468 { IDENTIFIER_NODE, validate_ref },
3469 { SCOPE_NODE, validate_scoperef },
3470 { DOT_NODE, validate_dotref },
3471 { INDEX_NODE, validate_index },
3472 { PUBLIC_NODE, tag_public_symbols },
3473 { STORAGE_NODE, reduce_user_storage },
3474 { ALIGN_NODE, tag_align_symbols },
3475 { STRUC_DECL_NODE, noop },
3476 { UNION_DECL_NODE, noop },
3477 { ENUM_DECL_NODE, noop },
3478 { RECORD_DECL_NODE, noop },
3479 { 0, NULL }
3481 in_dataseg = 0; /* codeseg is default */
3482 /* Do the walk. */
3483 astproc_walk(root, NULL, map);
3484 /* */
3485 remove_unused_labels();
3488 /*---------------------------------------------------------------------------*/
3491 * Translates a single instruction.
3492 * @param instr A node of type INSTRUCTION_NODE
3494 static int translate_instruction(astnode *instr, void *arg, astnode **next)
3496 unsigned char c;
3497 /* Put the operand in final form */
3498 astnode *o = reduce_expression_complete( LHS(instr) );
3499 assert(o == LHS(instr));
3500 /* Convert (mnemonic, addressing mode) pair to opcode */
3501 instr->instr.opcode = opcode_get(instr->instr.mnemonic, instr->instr.mode);
3502 if (instr->instr.opcode == 0xFF) {
3503 /* Check for the special cases */
3504 if ((instr->instr.mnemonic == STX_MNEMONIC) && (instr->instr.mode == ABSOLUTE_Y_MODE)) {
3505 /* Doesn't have absolute version, "scale down" to zeropage */
3506 instr->instr.mode = ZEROPAGE_Y_MODE;
3507 instr->instr.opcode = opcode_get(instr->instr.mnemonic, instr->instr.mode);
3508 } else if ((instr->instr.mnemonic == STY_MNEMONIC) && (instr->instr.mode == ABSOLUTE_X_MODE)) {
3509 /* Doesn't have absolute version, "scale down" to zeropage */
3510 instr->instr.mode = ZEROPAGE_X_MODE;
3511 instr->instr.opcode = opcode_get(instr->instr.mnemonic, instr->instr.mode);
3512 } else if (instr->instr.mode == ABSOLUTE_MODE) {
3513 /* Check for relative addressing (these are parsed as absolute mode) */
3514 switch (instr->instr.mnemonic) {
3515 case BCC_MNEMONIC:
3516 case BCS_MNEMONIC:
3517 case BEQ_MNEMONIC:
3518 case BMI_MNEMONIC:
3519 case BNE_MNEMONIC:
3520 case BPL_MNEMONIC:
3521 case BVC_MNEMONIC:
3522 case BVS_MNEMONIC:
3523 /* Fix addressing mode and opcode */
3524 instr->instr.mode = RELATIVE_MODE;
3525 instr->instr.opcode = opcode_get(instr->instr.mnemonic, instr->instr.mode);
3526 break;
3530 if (instr->instr.opcode != 0xFF) {
3531 /* If the operand is a constant, see if we can "reduce" from
3532 absolute mode to zeropage mode */
3533 if ((astnode_is_type(o, INTEGER_NODE)) &&
3534 ((unsigned long)o->integer < 256) &&
3535 ((c = opcode_zp_equiv(instr->instr.opcode)) != 0xFF)) {
3536 /* Switch to the zeromode version */
3537 instr->instr.opcode = c;
3538 switch (instr->instr.mode) {
3539 case ABSOLUTE_MODE: instr->instr.mode = ZEROPAGE_MODE; break;
3540 case ABSOLUTE_X_MODE: instr->instr.mode = ZEROPAGE_X_MODE; break;
3541 case ABSOLUTE_Y_MODE: instr->instr.mode = ZEROPAGE_Y_MODE; break;
3542 default: /* Impossible to get here, right? */ break;
3545 /* If the operand is a constant, make sure it fits */
3546 if (astnode_is_type(o, INTEGER_NODE)) {
3547 switch (instr->instr.mode) {
3548 case IMMEDIATE_MODE:
3549 case ZEROPAGE_MODE:
3550 case ZEROPAGE_X_MODE:
3551 case ZEROPAGE_Y_MODE:
3552 case PREINDEXED_INDIRECT_MODE:
3553 case POSTINDEXED_INDIRECT_MODE:
3554 /* Operand must fit in 8 bits */
3555 if (!IS_BYTE_VALUE(o->integer)) {
3556 warn(o->loc, "operand out of range; truncated");
3557 o->integer &= 0xFF;
3559 break;
3561 case ABSOLUTE_MODE:
3562 case ABSOLUTE_X_MODE:
3563 case ABSOLUTE_Y_MODE:
3564 case INDIRECT_MODE:
3565 /* Operand must fit in 16 bits */
3566 if ((unsigned long)o->integer >= 0x10000) {
3567 warn(o->loc, "operand out of range; truncated");
3568 o->integer &= 0xFFFF;
3570 break;
3572 case RELATIVE_MODE:
3573 /* Constant isn't allowed here is it? */
3574 break;
3576 default:
3577 break;
3580 else if (astnode_is_type(o, STRING_NODE)) {
3581 /* String operand doesn't make sense here */
3582 err(instr->loc, "invalid operand");
3584 } else {
3585 err(instr->loc, "invalid addressing mode");
3587 return 0;
3591 * ### Is this really such a good idea?
3593 static int maybe_merge_data(astnode *n, void *arg, astnode **next)
3595 astnode *temp;
3596 astnode *type;
3597 type = LHS(n);
3598 /* Only merge if no debugging, otherwise line information is lost. */
3599 if (!xasm_args.debug && astnode_is_type(*next, DATA_NODE)
3600 && astnode_equal(type, LHS(*next)) ) {
3601 /* Merge ahead */
3602 temp = *next;
3603 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3604 astnode_add_child(n, astnode_remove_children(temp) );
3605 astnode_finalize(temp);
3606 *next = n;
3607 } else {
3608 /* Reduce expressions to final form */
3609 for (n = n->first_child; n != NULL; n = temp->next_sibling) {
3610 temp = reduce_expression_complete(n);
3611 if (astnode_is_type(temp, INTEGER_NODE)) {
3612 /* Check that value fits according to datatype */
3613 switch (type->datatype) {
3614 case BYTE_DATATYPE:
3615 if (!IS_BYTE_VALUE(temp->integer)) {
3616 warn(temp->loc, "operand out of range; truncated");
3617 temp->integer &= 0xFF;
3619 break;
3621 case WORD_DATATYPE:
3622 if (!IS_WORD_VALUE(temp->integer)) {
3623 warn(temp->loc, "operand out of range; truncated");
3624 temp->integer &= 0xFFFF;
3626 break;
3628 case DWORD_DATATYPE:
3629 break;
3631 default:
3632 break;
3637 return 0;
3643 static int maybe_merge_storage(astnode *storage, void *arg, astnode **next)
3645 astnode *temp;
3646 astnode *new_count;
3647 astnode *old_count;
3648 if (astnode_is_type(*next, STORAGE_NODE)
3649 && astnode_equal(LHS(storage), LHS(*next)) ) {
3650 /* Merge ahead */
3651 temp = *next;
3652 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3653 old_count = RHS(storage);
3654 /* Calculate new count */
3655 new_count = astnode_create_arithmetic(
3656 PLUS_OPERATOR,
3657 astnode_remove_child_at(temp, 0),
3658 astnode_clone(old_count, storage->loc),
3659 storage->loc
3661 new_count = reduce_expression_complete(new_count);
3662 astnode_replace(old_count, new_count);
3663 astnode_finalize(old_count);
3664 astnode_finalize(temp);
3665 *next = storage;
3666 } else {
3667 reduce_expression_complete(RHS(storage));
3669 return 0;
3673 * Replaces .proc by its label followed by statements.
3675 static int flatten_proc(astnode *proc, void *arg, astnode **next)
3677 astnode *id = LHS(proc);
3678 astnode *list = RHS(proc);
3679 astnode_remove(id);
3680 id->type = LABEL_NODE;
3681 astnode_insert_child(list, id, 0);
3682 astnode *stmts = astnode_remove_children(list);
3683 astnode_replace(proc, stmts);
3684 astnode_finalize(proc);
3685 *next = stmts;
3686 return 0;
3692 static int flatten_var_decl(astnode *var, void *arg, astnode **next)
3694 astnode *stmts = LHS(var);
3695 astnode_remove_children(var);
3696 stmts->type = LABEL_NODE;
3697 astnode_replace(var, stmts);
3698 astnode_finalize(var);
3699 *next = stmts;
3700 return 0;
3704 * Third and final pass (if the output isn't pure 6502).
3705 * Translates instructions, merges data and storage nodes,
3706 * and reduces their operands to final form on the way.
3708 void astproc_third_pass(astnode *root)
3710 /* Table of callback functions for our purpose. */
3711 static astnodeprocmap map[] = {
3712 { INSTRUCTION_NODE, translate_instruction },
3713 { DATA_NODE, maybe_merge_data },
3714 { STORAGE_NODE, maybe_merge_storage },
3715 { VAR_DECL_NODE, flatten_var_decl },
3716 { PROC_NODE, flatten_proc },
3717 { STRUC_DECL_NODE, noop },
3718 { UNION_DECL_NODE, noop },
3719 { ENUM_DECL_NODE, noop },
3720 { RECORD_DECL_NODE, noop },
3721 { 0, NULL }
3723 in_dataseg = 0; /* codeseg is default */
3724 /* Do the walk. */
3725 astproc_walk(root, NULL, map);
3728 /*---------------------------------------------------------------------------*/
3731 * Evaluates the given expression, _without_ replacing it in the AST
3732 * (unlike astproc_reduce_expression() and friends).
3734 static astnode *eval_expression(astnode *expr)
3736 switch (astnode_get_type(expr)) {
3738 case ARITHMETIC_NODE: {
3739 astnode *lhs = eval_expression(LHS(expr));
3740 astnode *rhs = eval_expression(RHS(expr));
3741 switch (expr->oper) {
3742 /* Binary ops */
3743 case PLUS_OPERATOR:
3744 case MINUS_OPERATOR:
3745 case MUL_OPERATOR:
3746 case DIV_OPERATOR:
3747 case MOD_OPERATOR:
3748 case AND_OPERATOR:
3749 case OR_OPERATOR:
3750 case XOR_OPERATOR:
3751 case SHL_OPERATOR:
3752 case SHR_OPERATOR:
3753 case LT_OPERATOR:
3754 case GT_OPERATOR:
3755 case EQ_OPERATOR:
3756 case NE_OPERATOR:
3757 case LE_OPERATOR:
3758 case GE_OPERATOR:
3759 if (astnode_is_type(lhs, INTEGER_NODE)
3760 && astnode_is_type(rhs, INTEGER_NODE)) {
3761 /* Both sides are integer literals. */
3762 switch (expr->oper) {
3763 case PLUS_OPERATOR: return astnode_create_integer(lhs->integer + rhs->integer, expr->loc);
3764 case MINUS_OPERATOR: return astnode_create_integer(lhs->integer - rhs->integer, expr->loc);
3765 case MUL_OPERATOR: return astnode_create_integer(lhs->integer * rhs->integer, expr->loc);
3766 case DIV_OPERATOR: return astnode_create_integer(lhs->integer / rhs->integer, expr->loc);
3767 case MOD_OPERATOR: return astnode_create_integer(lhs->integer % rhs->integer, expr->loc);
3768 case AND_OPERATOR: return astnode_create_integer(lhs->integer & rhs->integer, expr->loc);
3769 case OR_OPERATOR: return astnode_create_integer(lhs->integer | rhs->integer, expr->loc);
3770 case XOR_OPERATOR: return astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc);
3771 case SHL_OPERATOR: return astnode_create_integer(lhs->integer << rhs->integer, expr->loc);
3772 case SHR_OPERATOR: return astnode_create_integer(lhs->integer >> rhs->integer, expr->loc);
3773 case LT_OPERATOR: return astnode_create_integer(lhs->integer < rhs->integer, expr->loc);
3774 case GT_OPERATOR: return astnode_create_integer(lhs->integer > rhs->integer, expr->loc);
3775 case EQ_OPERATOR: return astnode_create_integer(lhs->integer == rhs->integer, expr->loc);
3776 case NE_OPERATOR: return astnode_create_integer(lhs->integer != rhs->integer, expr->loc);
3777 case LE_OPERATOR: return astnode_create_integer(lhs->integer <= rhs->integer, expr->loc);
3778 case GE_OPERATOR: return astnode_create_integer(lhs->integer >= rhs->integer, expr->loc);
3780 default: /* ### Error, actually */
3781 break;
3784 /* Use some mathematical identities... */
3785 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
3786 && (expr->oper == PLUS_OPERATOR)) {
3787 /* 0+expr == expr */
3788 return astnode_clone(rhs, rhs->loc);
3789 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
3790 && (expr->oper == PLUS_OPERATOR)) {
3791 /* expr+0 == expr */
3792 return astnode_clone(lhs, lhs->loc);
3793 } else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
3794 && (expr->oper == MUL_OPERATOR)) {
3795 /* 1*expr == expr */
3796 return astnode_clone(rhs, rhs->loc);
3797 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
3798 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
3799 /* expr*1 == expr */
3800 /* expr/1 == expr */
3801 return astnode_clone(lhs, lhs->loc);
3803 break;
3805 /* Unary ops */
3806 case NEG_OPERATOR:
3807 case NOT_OPERATOR:
3808 case LO_OPERATOR:
3809 case HI_OPERATOR:
3810 case UMINUS_OPERATOR:
3811 case BANK_OPERATOR:
3812 if (astnode_is_type(lhs, INTEGER_NODE)) {
3813 switch (expr->oper) {
3814 case NEG_OPERATOR: return astnode_create_integer(~lhs->integer, expr->loc);
3815 case NOT_OPERATOR: return astnode_create_integer(!lhs->integer, expr->loc);
3816 case LO_OPERATOR: return astnode_create_integer(lhs->integer & 0xFF, expr->loc);
3817 case HI_OPERATOR: return astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc);
3818 case UMINUS_OPERATOR: return astnode_create_integer(-lhs->integer, expr->loc);
3819 default: break;
3822 break;
3823 } /* switch */
3824 } break;
3826 case INTEGER_NODE:
3827 return astnode_clone(expr, expr->loc);
3829 case IDENTIFIER_NODE: {
3830 symtab_entry *e = symtab_lookup(expr->ident);
3831 // ### assert(e->type == LABEL_SYMBOL);
3832 if (e->flags & ADDR_FLAG)
3833 return astnode_create_integer(e->address, expr->loc);
3834 } break;
3836 case CURRENT_PC_NODE:
3837 return astnode_create_integer(in_dataseg ? dataseg_pc : codeseg_pc, expr->loc);
3839 default:
3840 break;
3841 } /* switch */
3842 return 0;
3846 * Sets the address of the label to be the currently calculated PC.
3848 static int set_label_address(astnode *label, void *arg, astnode **next)
3850 symtab_entry *e = symtab_lookup(label->ident);
3851 // ### assert(e && (e->type == LABEL_SYMBOL));
3852 e->address = in_dataseg ? dataseg_pc : codeseg_pc;
3853 e->flags |= ADDR_FLAG;
3854 return 0;
3858 * Sets the current PC to the address specified by the ORG node.
3860 static int set_pc_from_org(astnode *org, void *arg, astnode **next)
3862 astnode *addr = LHS(org);
3863 assert(astnode_is_type(addr, INTEGER_NODE));
3864 if (in_dataseg)
3865 dataseg_pc = addr->integer;
3866 else
3867 codeseg_pc = addr->integer;
3868 return 0;
3872 * Ensures that the given symbol is defined.
3874 static int ensure_symbol_is_defined(astnode *id, void *arg, astnode **next)
3876 symtab_entry *e = symtab_lookup(id->ident);
3877 assert(e);
3878 if ((e->flags & EXTRN_FLAG) && !(e->flags & ERROR_UNDEFINED_FLAG)) {
3879 err(id->loc, "cannot generate pure binary because `%s' is not defined", id->ident);
3880 e->flags |= ERROR_UNDEFINED_FLAG;
3882 return 0;
3886 * Increments PC according to the size of the instruction.
3888 static int inc_pc_by_instruction(astnode *instr, void *arg, astnode **next)
3890 assert(!in_dataseg);
3891 if (LHS(instr)) {
3892 /* Has operand */
3893 unsigned char zp_op = opcode_zp_equiv(instr->instr.opcode);
3894 if (zp_op != 0xFF) {
3895 /* See if we can optimize this to a ZP-instruction */
3896 astnode *operand = eval_expression(LHS(instr));
3897 if (operand && astnode_is_type(operand, INTEGER_NODE)) {
3898 if ((operand->integer >= 0) && (operand->integer < 256)) {
3899 instr->instr.opcode = zp_op;
3901 astnode_finalize(operand);
3905 codeseg_pc += opcode_length(instr->instr.opcode);
3906 return 1;
3910 * Increments PC according to the size of the defined data.
3912 static int inc_pc_by_data(astnode *data, void *arg, astnode **next)
3914 astnode *type = LHS(data);
3915 int count = astnode_get_child_count(data) - 1;
3916 int nbytes;
3917 assert(!in_dataseg);
3918 switch (type->datatype) {
3919 case BYTE_DATATYPE: nbytes = count; break;
3920 case WORD_DATATYPE: nbytes = count * 2; break;
3921 case DWORD_DATATYPE: nbytes = count * 4; break;
3922 default:
3923 assert(0);
3924 break;
3926 codeseg_pc += nbytes;
3927 return 0;
3931 * Increments PC according to the size of the included binary.
3933 static int inc_pc_by_binary(astnode *node, void *arg, astnode **next)
3935 assert(!in_dataseg);
3936 codeseg_pc += node->binary.size;
3937 return 0;
3941 * Increments PC according to the size of the storage.
3943 static int inc_pc_by_storage(astnode *storage, void *arg, astnode **next)
3945 astnode *type = LHS(storage);
3946 assert(type->datatype == BYTE_DATATYPE);
3947 astnode *count = eval_expression(RHS(storage));
3948 if (count) {
3949 if (astnode_get_type(count) == INTEGER_NODE) {
3950 if (in_dataseg)
3951 dataseg_pc += count->integer;
3952 else
3953 codeseg_pc += count->integer;
3955 astnode_finalize(count);
3957 return 1;
3961 * This pass is only performed if the output format is pure 6502.
3962 * It ensures that it is actually possible to generate pure 6502
3963 * for this syntax tree (i.e. no external symbols).
3964 * Furthermore, it calculates the address of all labels, so that
3965 * everything is ready for the final output phase.
3967 void astproc_fourth_pass(astnode *root)
3969 int x;
3970 /* ### Should loop while there's a change in the address of
3971 one or more labels */
3972 for (x = 0; x < 2; ++x) {
3973 in_dataseg = 0; /* codeseg is default */
3974 dataseg_pc = 0;
3975 codeseg_pc = 0;
3976 /* Table of callback functions for our purpose. */
3977 static astnodeprocmap map[] = {
3978 { DATASEG_NODE, process_dataseg },
3979 { CODESEG_NODE, process_codeseg },
3980 { ORG_NODE, set_pc_from_org },
3981 { LABEL_NODE, set_label_address },
3982 { IDENTIFIER_NODE, ensure_symbol_is_defined },
3983 { INSTRUCTION_NODE, inc_pc_by_instruction },
3984 { DATA_NODE, inc_pc_by_data },
3985 { STORAGE_NODE, inc_pc_by_storage },
3986 { BINARY_NODE, inc_pc_by_binary },
3987 { STRUC_DECL_NODE, noop },
3988 { UNION_DECL_NODE, noop },
3989 { ENUM_DECL_NODE, noop },
3990 { RECORD_DECL_NODE, noop },
3991 { 0, NULL }
3993 /* Do the walk. */
3994 astproc_walk(root, NULL, map);
3998 /*---------------------------------------------------------------------------*/
4001 * Writes an instruction.
4003 static int write_instruction(astnode *instr, void *arg, astnode **next)
4005 FILE *fp = (FILE *)arg;
4006 unsigned char op = instr->instr.opcode;
4007 int len = opcode_length(op);
4008 fputc(op, fp);
4009 if (len > 1) {
4010 /* Write operand */
4011 astnode *operand = eval_expression(LHS(instr));
4012 if(!astnode_is_type(operand, INTEGER_NODE)) {
4013 /* ### This is rather fatal, it should be a literal by this point */
4014 err(instr->loc, "operand does not evaluate to literal");
4015 } else {
4016 int value = operand->integer;
4017 if (len == 2) {
4018 /* Check if it's a relative jump */
4019 switch (op) {
4020 case 0x10:
4021 case 0x30:
4022 case 0x50:
4023 case 0x70:
4024 case 0x90:
4025 case 0xB0:
4026 case 0xD0:
4027 case 0xF0:
4028 /* Calculate difference between target and address of next instruction */
4029 value = value - (codeseg_pc + 2);
4030 if (!IS_BYTE_VALUE(value)) {
4031 err(operand->loc, "branch out of range");
4032 value &= 0xFF;
4034 break;
4036 default:
4037 if (!IS_BYTE_VALUE(value)) {
4038 warn(operand->loc, "operand out of range; truncated");
4039 value &= 0xFF;
4041 break;
4043 fputc((unsigned char)value, fp);
4044 } else {
4045 assert(len == 3);
4046 if (!IS_WORD_VALUE(value)) {
4047 warn(operand->loc, "operand out of range; truncated");
4048 value &= 0xFFFF;
4050 fputc((unsigned char)value, fp);
4051 fputc((unsigned char)(value >> 8), fp);
4054 astnode_finalize(operand);
4056 codeseg_pc += opcode_length(instr->instr.opcode);
4057 return 0;
4061 * Writes data.
4063 static int write_data(astnode *data, void *arg, astnode **next)
4065 FILE *fp = (FILE *)arg;
4066 astnode *type = LHS(data);
4067 astnode *expr;
4068 assert(!in_dataseg);
4069 for (expr = RHS(data); expr != NULL; expr = astnode_get_next_sibling(expr) ) {
4070 int value;
4071 astnode *e = eval_expression(expr);
4072 assert(e->type == INTEGER_NODE);
4073 value = e->integer;
4074 switch (type->datatype) {
4075 case BYTE_DATATYPE:
4076 if (!IS_BYTE_VALUE(value)) {
4077 warn(expr->loc, "operand out of range; truncated");
4078 value &= 0xFF;
4080 fputc((unsigned char)value, fp);
4081 codeseg_pc += 1;
4082 break;
4084 case WORD_DATATYPE:
4085 if (!IS_WORD_VALUE(value)) {
4086 warn(expr->loc, "operand out of range; truncated");
4087 value &= 0xFFFF;
4089 fputc((unsigned char)value, fp);
4090 fputc((unsigned char)(value >> 8), fp);
4091 codeseg_pc += 2;
4092 break;
4094 case DWORD_DATATYPE:
4095 fputc((unsigned char)value, fp);
4096 fputc((unsigned char)(value >> 8), fp);
4097 fputc((unsigned char)(value >> 16), fp);
4098 fputc((unsigned char)(value >> 24), fp);
4099 codeseg_pc += 4;
4100 break;
4102 default:
4103 assert(0);
4104 break;
4106 astnode_finalize(e);
4108 return 0;
4112 * Writes storage (padding).
4114 static int write_storage(astnode *storage, void *arg, astnode **next)
4116 FILE *fp = (FILE *)arg;
4117 astnode *type = LHS(storage);
4118 astnode *count = eval_expression(RHS(storage));
4119 assert(type->datatype == BYTE_DATATYPE);
4120 assert(!in_dataseg);
4121 if (count) {
4122 int i;
4123 assert(astnode_get_type(count) == INTEGER_NODE);
4124 for (i = 0; i < count->integer; ++i)
4125 fputc(0, fp);
4126 codeseg_pc += count->integer;
4127 astnode_finalize(count);
4129 return 0;
4133 * Writes binary.
4135 static int write_binary(astnode *node, void *arg, astnode **next)
4137 FILE *fp = (FILE *)arg;
4138 assert(!in_dataseg);
4139 fwrite(node->binary.data, 1, node->binary.size, fp);
4140 codeseg_pc += node->binary.size;
4141 return 0;
4145 * This pass is only performed if the output format is pure 6502.
4146 * It writes the binary code.
4148 void astproc_fifth_pass(astnode *root, FILE *fp)
4150 /* Table of callback functions for our purpose. */
4151 static astnodeprocmap map[] = {
4152 { DATASEG_NODE, process_dataseg },
4153 { CODESEG_NODE, process_codeseg },
4154 { ORG_NODE, set_pc_from_org },
4155 { INSTRUCTION_NODE, write_instruction },
4156 { DATA_NODE, write_data },
4157 { STORAGE_NODE, write_storage },
4158 { BINARY_NODE, write_binary },
4159 { STRUC_DECL_NODE, noop },
4160 { UNION_DECL_NODE, noop },
4161 { ENUM_DECL_NODE, noop },
4162 { RECORD_DECL_NODE, noop },
4163 { 0, NULL }
4165 in_dataseg = 0; /* codeseg is default */
4166 dataseg_pc = 0;
4167 codeseg_pc = 0;
4168 /* Do the walk. */
4169 astproc_walk(root, fp, map);