fix dangling pointer issue. If the node being processed is replaced,
[xorcyst.git] / astproc.c
blob203df558c70a0fad0ef3f91b56533011f99d27d9
1 /*
2 * $Id: astproc.c,v 1.21 2007/11/11 22:35:22 khansen Exp $
3 * $Log: astproc.c,v $
4 * Revision 1.21 2007/11/11 22:35:22 khansen
5 * compile on mac
7 * Revision 1.20 2007/08/19 10:17:39 khansen
8 * allow symbols to be used without having been declared
10 * Revision 1.19 2007/08/12 18:58:12 khansen
11 * ability to generate pure 6502 binary (--pure-binary switch)
13 * Revision 1.18 2007/08/12 02:42:46 khansen
14 * prettify, const
16 * Revision 1.17 2007/08/09 22:06:10 khansen
17 * ability to pass in reference to local label as argument to macro
19 * Revision 1.16 2007/08/09 20:48:46 khansen
20 * disable buggy code that can cause crash
22 * Revision 1.15 2007/08/09 20:33:40 khansen
23 * progress
25 * Revision 1.14 2007/08/08 22:40:01 khansen
26 * improved symbol lookup, definitions must precede usage
28 * Revision 1.13 2007/07/22 13:33:26 khansen
29 * convert tabs to whitespaces
31 * Revision 1.12 2005/01/09 11:17:57 kenth
32 * xorcyst 1.4.5
33 * fixed bug in process_data(), merge_data()
34 * no longer truncation warning when fits in signed byte/word
36 * Revision 1.11 2005/01/05 02:28:13 kenth
37 * xorcyst 1.4.3
38 * support for anonymous unions
39 * fixed sizeof bug
41 * Revision 1.10 2004/12/29 21:44:41 kenth
42 * xorcyst 1.4.2
43 * static indexing, sizeof improved
45 * Revision 1.9 2004/12/25 02:22:35 kenth
46 * fixed bug in reduce_user_storage()
48 * Revision 1.8 2004/12/19 19:58:29 kenth
49 * xorcyst 1.4.0
51 * Revision 1.7 2004/12/18 16:57:39 kenth
52 * STORAGE_NODE(WORD/DWORD_DATATYPE) converts to BYTE
54 * Revision 1.6 2004/12/16 13:19:47 kenth
55 * xorcyst 1.3.5
57 * Revision 1.5 2004/12/14 01:49:05 kenth
58 * xorcyst 1.3.0
60 * Revision 1.4 2004/12/11 02:01:25 kenth
61 * added forward/backward branching
63 * Revision 1.3 2004/12/09 11:18:13 kenth
64 * added: warning, error node processing
66 * Revision 1.2 2004/12/06 04:52:24 kenth
67 * Major updates (xorcyst 1.1.0)
69 * Revision 1.1 2004/06/30 07:55:31 kenth
70 * Initial revision
74 /**
75 * (C) 2004 Kent Hansen
77 * The XORcyst is free software; you can redistribute it and/or modify
78 * it under the terms of the GNU General Public License as published by
79 * the Free Software Foundation; either version 2 of the License, or
80 * (at your option) any later version.
82 * The XORcyst is distributed in the hope that it will be useful,
83 * but WITHOUT ANY WARRANTY; without even the implied warranty of
84 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
85 * GNU General Public License for more details.
87 * You should have received a copy of the GNU General Public License
88 * along with The XORcyst; if not, write to the Free Software
89 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
92 /**
93 * This file contains functions that process the Abstract Syntax Tree (AST).
94 * After the assembly file has been parsed into an AST, a number of passes are
95 * made on it to process it and transform it. The functions here are
96 * concerned with things like
97 * - macro expansion
98 * - symbol table generation
99 * - equates substitution
100 * - constant folding
101 * - code and symbol validation
104 #include <stdlib.h>
105 #include <stdio.h>
106 #include <stdarg.h>
107 #include <string.h>
108 #include <assert.h>
109 #include "astproc.h"
110 #include "symtab.h"
111 #include "opcode.h"
112 #include "charmap.h"
113 #include "xasm.h"
115 #define IS_SIGNED_BYTE_VALUE(v) (((v) >= -128) && ((v) <= 127))
116 #define IS_UNSIGNED_BYTE_VALUE(v) (((v) >= 0) && ((v) <= 255))
117 #define IS_BYTE_VALUE(v) (IS_SIGNED_BYTE_VALUE(v) || IS_UNSIGNED_BYTE_VALUE(v))
119 #define IS_SIGNED_WORD_VALUE(v) (((v) >= -32768) && ((v) <= 32767))
120 #define IS_UNSIGNED_WORD_VALUE(v) (((v) >= 0) && ((v) <= 65535))
121 #define IS_WORD_VALUE(v) (IS_SIGNED_WORD_VALUE(v) || IS_UNSIGNED_WORD_VALUE(v))
123 /*---------------------------------------------------------------------------*/
125 /** Number of errors issued during processing. */
126 static int err_count = 0;
128 /** Number of warnings issued during processing. */
129 static int warn_count = 0;
131 /* Keeps track of number of global labels encountered. */
132 static int label_count = 0;
134 /* Keeps track of whether statement is in dataseg or codeseg. */
135 static int in_dataseg = 0;
137 /* Default symbol modifiers, i.e. ZEROPAGE_FLAG, PUBLIC_FLAG */
138 static int modifiers = 0;
140 /* Used when we are outputting pure 6502 binary */
141 static int dataseg_pc;
142 static int codeseg_pc;
144 /*---------------------------------------------------------------------------*/
146 /** Mapping from regular ASCII characters to custom character values.
147 * Used to transform .char arrays to regular .db arrays.
149 static unsigned char charmap[256];
152 * Resets the custom character map.
153 * Every ASCII character is mapped to itself.
155 static void reset_charmap()
157 int i;
158 for (i=0; i<256; i++) {
159 charmap[i] = (char)i;
163 /*---------------------------------------------------------------------------*/
164 /* Forward/backward branching stuff */
166 struct tag_forward_branch_info {
167 astnode *refs[128];
168 int index; /* Index into refs */
169 int counter;
172 typedef struct tag_forward_branch_info forward_branch_info;
174 struct tag_backward_branch_info {
175 astnode *decl;
176 int counter;
179 typedef struct tag_backward_branch_info backward_branch_info;
181 #define BRANCH_MAX 8
183 static forward_branch_info forward_branch[BRANCH_MAX];
185 static backward_branch_info backward_branch[BRANCH_MAX];
188 * Zaps forward/backward branch data.
190 static void branch_init()
192 int i, j;
193 for (i=0; i<BRANCH_MAX; i++) {
194 for (j=0; j<128; j++) {
195 forward_branch[i].refs[j] = NULL;
197 forward_branch[i].index = 0;
198 forward_branch[i].counter = 0;
199 backward_branch[i].decl = NULL;
200 backward_branch[i].counter = 0;
204 /*---------------------------------------------------------------------------*/
207 * Issues an error.
208 * @param loc File location of error
209 * @param fmt printf-style format string
211 static void err(location loc, const char *fmt, ...)
213 va_list ap;
214 va_start(ap, fmt);
216 /* Print error message w/ location info */
217 fprintf(stderr, "error: %s:", loc.file);
218 LOCATION_PRINT(stderr, loc);
219 fprintf(stderr, ": ");
220 vfprintf(stderr, fmt, ap);
221 fprintf(stderr, "\n");
223 va_end(ap);
225 /* Increase total error count */
226 err_count++;
230 * Issues a warning.
231 * @param loc File location of warning
232 * @param fmt printf-style format string
234 static void warn(location loc, const char *fmt, ...)
236 va_list ap;
237 if (!xasm_args.no_warn) {
238 va_start(ap, fmt);
239 /* Print warning message w/ location info */
240 fprintf(stderr, "warning: %s:", loc.file);
241 LOCATION_PRINT(stderr, loc);
242 fprintf(stderr, ": ");
243 vfprintf(stderr, fmt, ap);
244 fprintf(stderr, "\n");
245 va_end(ap);
248 /* Increase total warning count */
249 warn_count++;
253 * Gets the number of errors encountered during processing.
254 * @return Number of errors
256 int astproc_err_count()
258 return err_count;
261 /*---------------------------------------------------------------------------*/
264 * Gets the processor function for a node type from a map.
265 * Used by astproc_walk().
266 * @param type The node type
267 * @param map A mapping from node types to processor functions
269 static astnodeproc astproc_node_type_to_proc(astnode_type type, const astnodeprocmap *map)
271 /* Try all map entries */
272 for (; map->proc != NULL; map += 1) {
273 if (map->type == type) {
274 return map->proc; /* Match */
277 /* No match */
278 return NULL;
281 /*---------------------------------------------------------------------------*/
284 * Walks an abstract syntax tree recursively.
285 * @param n Node to walk
286 * @param arg Optional argument to pass to processor function
287 * @param map Mapping of node types to processor functions
289 static void astproc_walk_recursive(astnode *n, void *arg, const astnodeprocmap *map, astnode **next)
291 astnode *c;
292 astnode *t;
293 if (n == NULL) { return; }
294 /* Process this node if it has a processor function */
295 astnodeproc p = astproc_node_type_to_proc(astnode_get_type(n), map);
296 if (p != NULL) {
297 if (!p(n, arg, next)) return; /* Don't walk children */
299 /* Walk the node's children recursively */
300 for (c=n->first_child; c != NULL; c = t) {
301 t = c->next_sibling; /* default next node */
302 astproc_walk_recursive(c, arg, map, &t);
307 * Generic tree walker function.
308 * @param n Root
309 * @param arg General-purpose argument passed to each node handler function
310 * @param map Array of (nodetype, handler function) tuples
312 void astproc_walk(astnode *n, void *arg, const astnodeprocmap *map)
314 astnode *dummy;
315 astproc_walk_recursive(n, arg, map, &dummy);
318 /*---------------------------------------------------------------------------*/
321 * Don't do any processing of this node or its children on this pass.
323 static int noop(astnode *n, void *arg, astnode **next)
325 return 0;
329 * Substitutes an identifier node with subst_expr if the id is equal to subst_id.
330 * @param n A node of type IDENTIFIER_NODE
331 * @param arg Array of length 2, containing (expr, id) pair
333 static int substitute_id(astnode *n, void *arg, astnode **next)
335 /* arg is array containing expression and identifier */
336 astnode **array = (astnode **)arg;
337 astnode *subst_expr = array[0];
338 astnode *subst_id = array[1];
339 /* Test if this node and the identifier to replace are equal */
340 if (astnode_equal(n, subst_id)) {
341 /* They're equal, replace it by expression. */
342 astnode *cl = astnode_clone(subst_expr, n->loc);
343 /* ### Generalize: traverse all children, set the flag */
344 if (astnode_get_type(cl) == LOCAL_ID_NODE) {
345 cl->flags |= 0x80; /* don't globalize it */
347 astnode_replace(n, cl);
348 astnode_finalize(n);
349 *next = cl;
350 return 0;
351 } else {
352 return 1;
357 * Substitutes expr for id in list.
358 * Used by macro expander to substitute a macro body parameter name with the
359 * actual expression used in the macro expansion.
360 * @param expr An expression
361 * @param id An identifier
362 * @param list A list of statements (macro body)
364 static void substitute_expr_for_id(astnode *expr, astnode *id, astnode *list)
366 /* Prepare argument to astproc_walk */
367 astnode *array[2];
368 array[0] = expr;
369 array[1] = id;
370 /* Table of callback functions for our purpose. */
371 static astnodeprocmap map[] = {
372 { IDENTIFIER_NODE, substitute_id },
373 { 0, NULL }
375 /* Do the walk. */
376 astproc_walk(list, array, map);
379 /*---------------------------------------------------------------------------*/
382 * Globalizes a macro expanded local.
383 * This is done simply by concatenating the local label identifier with the
384 * global macro invocation counter.
385 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
386 * @param arg Namespace counter (int)
388 static int globalize_macro_expanded_local(astnode *n, void *arg, astnode **next)
390 /* Only globalize if it's a reference to a label defined in the macro */
391 if (!(n->flags & 0x80)) {
392 char str[16];
393 int count;
394 /* Make it global by appending the macro expansion counter to the id */
395 count = (int)arg;
396 sprintf(str, "#%d", count);
397 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
398 /* LOCAL_LABEL_NODE, use label field */
399 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
400 strcat(n->label, str);
401 } else {
402 /* LOCAL_ID_NODE, use ident field */
403 assert(astnode_is_type(n, LOCAL_ID_NODE));
404 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
405 strcat(n->ident, str);
408 /* */
409 return 1;
413 * Globalizes all locals in the body of a macro expansion.
414 * Used by the macro expander to ensure that local labels in macro expansions
415 * are unique.
416 * @param exp_body The expanded macro body
417 * @param count Unique macro namespace counter
419 static void globalize_macro_expanded_locals(astnode *exp_body, int count)
421 /* Table of callback functions for our purpose. */
422 static astnodeprocmap map[] = {
423 { LOCAL_ID_NODE, globalize_macro_expanded_local },
424 { LOCAL_LABEL_NODE, globalize_macro_expanded_local },
425 { 0, NULL }
427 /* Do the walk. */
428 astproc_walk(exp_body, (void *)count, map);
432 * Expands a macro; that is, replaces a macro invocation in the AST with the
433 * macro body. Substitutes parameter names for values.
434 * @param n Must be a node of type MACRO_NODE
435 * @param arg Not used
437 static int expand_macro(astnode *n, void *arg, astnode **next)
439 astnode *decl;
440 astnode *decl_body;
441 astnode *exp_body;
442 astnode *formals;
443 astnode *actuals;
444 astnode *id;
445 astnode *expr;
446 int i;
447 /* Keeps track of the current/total number of macro expansions */
448 static int count = 0;
449 /* Get the name of the macro to expand */
450 id = astnode_get_child(n, 0);
451 /* Look up its definition in symbol table */
452 symtab_entry *e = symtab_lookup(id->ident);
453 /* If it's not in the symbol table, error. */
454 if (e == NULL) {
455 err(n->loc, "unknown macro or directive `%s'", id->ident);
456 /* Remove from AST */
457 astnode_remove(n);
458 astnode_finalize(n);
459 return 0;
461 else if (e->type != MACRO_SYMBOL) {
462 err(n->loc, "cannot expand `%s'; not a macro", e->id);
463 /* Remove from AST */
464 astnode_remove(n);
465 astnode_finalize(n);
466 return 0;
468 else {
469 /* e->def has pointer to proper MACRO_DECL_NODE */
470 decl = (astnode *)e->def;
471 /* Get the lists of formals and actuals */
472 formals = astnode_get_child(decl, 1);
473 actuals = astnode_get_child(n, 1);
474 /* Verify that argument count is correct */
475 if (astnode_get_child_count(formals) != astnode_get_child_count(actuals)) {
476 err(n->loc, "macro `%s' does not take %d argument(s)", id->ident, astnode_get_child_count(actuals) );
477 /* Remove from AST */
478 astnode_remove(n);
479 astnode_finalize(n);
480 return 0;
482 /* Expand the body */
483 decl_body = astnode_get_child(decl, 2);
484 exp_body = astnode_clone(decl_body, n->loc);
485 /* Substitute actuals for formals */
486 for (i=0; i<astnode_get_child_count(actuals); i++) {
487 /* The id to substitute */
488 id = astnode_get_child(formals, i);
489 /* The expression to substitute it with */
490 expr = astnode_get_child(actuals, i);
491 /* Do it! */
492 substitute_expr_for_id(expr, id, exp_body);
494 /* Make locals a bit more global */
495 globalize_macro_expanded_locals(exp_body, count);
496 /* Replace MACRO_NODE by the macro body instance */
497 astnode_replace(n, astnode_get_child(exp_body, 0));
498 /* Discard the replaced node */
499 astnode_finalize(n);
500 /* Increase macro expansion counter */
501 count++;
502 /* Set next node to start of body */
503 *next = exp_body;
505 /* */
506 return 0;
509 /*---------------------------------------------------------------------------*/
512 * Does constant folding of expression.
513 * If the expression can be folded, the original expression is replaced by the
514 * new one, and the original expression is finalized.
515 * @param expr Expression
516 * @return Original expression, if couldn't fold, otherwise new, folded expression
518 astnode *astproc_fold_constants(astnode *expr)
520 astnode *folded;
521 astnode *lhs;
522 astnode *rhs;
523 if (expr == NULL) { return NULL; }
524 folded = NULL;
525 if (astnode_is_type(expr, ARITHMETIC_NODE)) {
526 /* Fold operands recursively */
527 lhs = astproc_fold_constants(LHS(expr));
528 rhs = astproc_fold_constants(RHS(expr));
529 switch (expr->oper) {
530 /* Binary ops */
531 case PLUS_OPERATOR:
532 case MINUS_OPERATOR:
533 case MUL_OPERATOR:
534 case DIV_OPERATOR:
535 case MOD_OPERATOR:
536 case AND_OPERATOR:
537 case OR_OPERATOR:
538 case XOR_OPERATOR:
539 case SHL_OPERATOR:
540 case SHR_OPERATOR:
541 case LT_OPERATOR:
542 case GT_OPERATOR:
543 case EQ_OPERATOR:
544 case NE_OPERATOR:
545 case LE_OPERATOR:
546 case GE_OPERATOR:
547 /* See if it can be folded */
548 if ( (astnode_is_type(lhs, INTEGER_NODE)) &&
549 (astnode_is_type(rhs, INTEGER_NODE)) ) {
550 /* Both sides are integer literals, so fold. */
551 switch (expr->oper) {
552 case PLUS_OPERATOR: folded = astnode_create_integer(lhs->integer + rhs->integer, expr->loc); break;
553 case MINUS_OPERATOR: folded = astnode_create_integer(lhs->integer - rhs->integer, expr->loc); break;
554 case MUL_OPERATOR: folded = astnode_create_integer(lhs->integer * rhs->integer, expr->loc); break;
555 case DIV_OPERATOR: folded = astnode_create_integer(lhs->integer / rhs->integer, expr->loc); break;
556 case MOD_OPERATOR: folded = astnode_create_integer(lhs->integer % rhs->integer, expr->loc); break;
557 case AND_OPERATOR: folded = astnode_create_integer(lhs->integer & rhs->integer, expr->loc); break;
558 case OR_OPERATOR: folded = astnode_create_integer(lhs->integer | rhs->integer, expr->loc); break;
559 case XOR_OPERATOR: folded = astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc); break;
560 case SHL_OPERATOR: folded = astnode_create_integer(lhs->integer << rhs->integer, expr->loc); break;
561 case SHR_OPERATOR: folded = astnode_create_integer(lhs->integer >> rhs->integer, expr->loc); break;
562 case LT_OPERATOR: folded = astnode_create_integer(lhs->integer < rhs->integer, expr->loc); break;
563 case GT_OPERATOR: folded = astnode_create_integer(lhs->integer > rhs->integer, expr->loc); break;
564 case EQ_OPERATOR: folded = astnode_create_integer(lhs->integer == rhs->integer, expr->loc); break;
565 case NE_OPERATOR: folded = astnode_create_integer(lhs->integer != rhs->integer, expr->loc); break;
566 case LE_OPERATOR: folded = astnode_create_integer(lhs->integer <= rhs->integer, expr->loc); break;
567 case GE_OPERATOR: folded = astnode_create_integer(lhs->integer >= rhs->integer, expr->loc); break;
569 default: /* Error, actually */
570 folded = expr;
571 break;
573 if (folded != expr) {
574 /* Replace expression by folded one. */
575 astnode_replace(expr, folded);
576 astnode_finalize(expr);
577 return folded;
580 else if ( (astnode_is_type(lhs, STRING_NODE)) &&
581 (astnode_is_type(rhs, STRING_NODE)) ) {
582 /* Both sides are string literals. */
583 /* Folding is defined only for certain operators. */
584 switch (expr->oper) {
585 case PLUS_OPERATOR:
586 /* String concatenation. */
587 folded = astnode_create(STRING_NODE, expr->loc);
588 folded->string = (char *)malloc(strlen(lhs->string) + strlen(rhs->string) + 1);
589 if (folded->string != NULL) {
590 strcpy(folded->string, lhs->string);
591 strcat(folded->string, rhs->string);
593 break;
595 /* String comparison. */
596 case LT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) < 0, expr->loc); break;
597 case GT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) > 0, expr->loc); break;
598 case EQ_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) == 0, expr->loc); break;
599 case NE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) != 0, expr->loc); break;
600 case LE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) <= 0, expr->loc); break;
601 case GE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) >= 0, expr->loc); break;
603 default:
604 folded = expr;
605 break;
607 if (folded != expr) {
608 /* Replace expression by folded one. */
609 astnode_replace(expr, folded);
610 astnode_finalize(expr);
611 return folded;
614 else if ((astnode_get_type(lhs) == STRING_NODE) &&
615 (astnode_get_type(rhs) == INTEGER_NODE) &&
616 (expr->oper == PLUS_OPERATOR)) {
617 /* Left side is string and right side is integer.
618 Result is a string. */
619 char str[32];
620 sprintf(str, "%d", rhs->integer);
621 folded = astnode_create(STRING_NODE, expr->loc);
622 folded->string = (char *)malloc(strlen(lhs->string) + strlen(str) + 1);
623 if (folded->string != NULL) {
624 strcpy(folded->string, lhs->string);
625 strcat(folded->string, str);
627 /* Replace expression by folded one. */
628 astnode_replace(expr, folded);
629 astnode_finalize(expr);
630 return folded;
632 else if ((astnode_get_type(rhs) == STRING_NODE) &&
633 (astnode_get_type(lhs) == INTEGER_NODE) &&
634 (expr->oper == PLUS_OPERATOR)) {
635 /* Left side is integer and right side is string.
636 Result is a string. */
637 char str[32];
638 sprintf(str, "%d", lhs->integer);
639 folded = astnode_create(STRING_NODE, expr->loc);
640 folded->string = (char *)malloc(strlen(str) + strlen(rhs->string) + 1);
641 if (folded->string != NULL) {
642 strcpy(folded->string, str);
643 strcat(folded->string, rhs->string);
645 /* Replace expression by folded one. */
646 astnode_replace(expr, folded);
647 astnode_finalize(expr);
648 return folded;
650 /* Use some mathematical identities... */
651 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
652 && (expr->oper == PLUS_OPERATOR)) {
653 /* 0+expr == expr */
654 astnode_remove_child(expr, rhs);
655 astnode_replace(expr, rhs);
656 return rhs;
658 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
659 && (expr->oper == PLUS_OPERATOR)) {
660 /* expr+0 == expr */
661 astnode_remove_child(expr, lhs);
662 astnode_replace(expr, lhs);
663 return lhs;
665 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
666 && (expr->oper == MUL_OPERATOR)) {
667 /* 1*expr == expr */
668 astnode_remove_child(expr, rhs);
669 astnode_replace(expr, rhs);
670 return rhs;
672 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
673 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
674 /* expr*1 == expr */
675 /* expr/1 == expr */
676 astnode_remove_child(expr, lhs);
677 astnode_replace(expr, lhs);
678 return lhs;
680 else {
681 /* No chance of folding this one. */
683 break;
685 /* Unary ops */
686 case NEG_OPERATOR:
687 case NOT_OPERATOR:
688 case LO_OPERATOR:
689 case HI_OPERATOR:
690 case UMINUS_OPERATOR:
691 case BANK_OPERATOR:
692 /* See if it can be folded */
693 if (astnode_is_type(lhs, INTEGER_NODE)) {
694 /* Fold it. */
695 switch (expr->oper) {
696 case NEG_OPERATOR: folded = astnode_create_integer(~lhs->integer, expr->loc); break;
697 case NOT_OPERATOR: folded = astnode_create_integer(!lhs->integer, expr->loc); break;
698 case LO_OPERATOR: folded = astnode_create_integer(lhs->integer & 0xFF, expr->loc); break;
699 case HI_OPERATOR: folded = astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc); break;
700 case UMINUS_OPERATOR: folded = astnode_create_integer(-lhs->integer, expr->loc); break;
701 default: break;
703 /* Replace expression by folded one. */
704 astnode_replace(expr, folded);
705 astnode_finalize(expr);
706 return folded;
708 else {
709 /* Couldn't fold this one. */
711 break;
714 /* Couldn't fold it, return original expression */
715 return expr;
718 /*---------------------------------------------------------------------------*/
721 * Substitutes identifier if it has a constant definition in symbol table.
722 * @param expr Node of type IDENTIFIER_NODE
724 static astnode *substitute_ident(astnode *expr)
726 astnode *c;
727 symtab_entry *e;
728 /* Look it up in symbol table */
729 e = symtab_lookup(expr->ident);
730 if (e != NULL) {
731 /* Found it. Test if it's a define. */
732 if (e->type == CONSTANT_SYMBOL) {
733 /* This is a defined symbol that should be
734 replaced by the expression it stands for */
735 c = astnode_clone((astnode *)e->def, expr->loc);
736 astnode_replace(expr, c);
737 astnode_finalize(expr);
738 expr = c;
741 else {
742 /* Didn't find it in symbol table. */
744 return expr;
748 * Substitutes sizeof with proper constant.
749 * @param expr Node of type SIZEOF_NODE
751 static astnode *reduce_sizeof(astnode *expr)
753 int ok;
754 astnode *c;
755 astnode *id;
756 astnode *type;
757 astnode *count;
758 symtab_entry *e;
760 count = NULL;
761 if (astnode_is_type(LHS(expr), IDENTIFIER_NODE)) {
762 /* Identifier might be the name of a user-defined type, OR
763 it might be the name of a variable of a user-defined type */
764 type = NULL;
765 /* Look it up */
766 id = LHS(expr);
767 e = symtab_global_lookup(id->ident);
768 if (e != NULL) {
769 switch (e->type) {
770 case STRUC_SYMBOL:
771 case UNION_SYMBOL:
772 case RECORD_SYMBOL:
773 case ENUM_SYMBOL:
774 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
775 break;
777 case VAR_SYMBOL:
778 type = astnode_clone(LHS(e->def), id->loc);
779 if (astnode_is_type(e->def, STORAGE_NODE)) {
780 count = astnode_clone(RHS(e->def), id->loc);
782 else {
783 count = astnode_create_integer(astnode_get_child_count(e->def)-1, id->loc);
785 break;
787 default:
788 /* Can't take sizeof of this symbol type */
789 break;
792 if (type == NULL) {
793 /* Unknown */
794 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
796 /* Replace identifier by datatype node */
797 astnode_replace(id, type);
798 astnode_finalize(id);
800 type = LHS(expr);
801 switch (type->datatype) {
802 case BYTE_DATATYPE:
803 case CHAR_DATATYPE:
804 c = astnode_create_integer(1, expr->loc);
805 astnode_replace(expr, c);
806 astnode_finalize(expr);
807 expr = c;
808 break;
810 case WORD_DATATYPE:
811 c = astnode_create_integer(2, expr->loc);
812 astnode_replace(expr, c);
813 astnode_finalize(expr);
814 expr = c;
815 break;
817 case DWORD_DATATYPE:
818 c = astnode_create_integer(4, expr->loc);
819 astnode_replace(expr, c);
820 astnode_finalize(expr);
821 expr = c;
822 break;
824 case USER_DATATYPE:
825 /* Look up the data type in symbol table */
826 id = LHS(type);
827 e = symtab_global_lookup(id->ident);
828 ok = 0;
829 if (e != NULL) {
830 switch (e->type) {
831 case STRUC_SYMBOL:
832 case UNION_SYMBOL:
833 /* Datatype is defined, replace sizeof with proper expression */
834 c = astnode_clone((astnode *)(e->struc.size), ((astnode *)(e->struc.size))->loc);
835 astnode_replace(expr, c);
836 astnode_finalize(expr);
837 expr = c;
838 ok = 1;
839 break;
841 case RECORD_SYMBOL:
842 case ENUM_SYMBOL:
843 /* 1 byte */
844 c = astnode_create_integer(1, expr->loc);
845 astnode_replace(expr, c);
846 astnode_finalize(expr);
847 expr = c;
848 ok = 1;
849 break;
851 default:
852 /* Dunno the size of this symbol type */
853 break;
856 if (!ok) {
857 /* Datatype not defined, error */
858 err(expr->loc, "size of `%s' is unknown", id->ident);
859 /* Replace by 1 */
860 c = astnode_create_integer(1, expr->loc);
861 astnode_replace(expr, c);
862 astnode_finalize(expr);
863 return c;
865 break;
867 default:
868 err(expr->loc, "substitute_sizeof(): unknown type");
869 break;
871 if (count != NULL) {
872 c = astnode_create_arithmetic(
873 MUL_OPERATOR,
874 astnode_clone(expr, expr->loc),
875 count,
876 expr->loc
878 astnode_replace(expr, c);
879 astnode_finalize(expr);
880 expr = c;
882 return expr;
886 * Substitutes A::B with an expression.
887 * If A is a struct: substitute with offset of B
888 * If A is a union: substitute with 0
889 * If A is an enumeration: substitute with value for B
890 * @param expr Node of type SCOPE_NODE
892 static astnode *reduce_scope(astnode *expr)
894 symtab_entry *ns;
895 symtab_entry *sym;
896 astnode *c;
897 astnode *namespace;
898 astnode *symbol;
899 /* Look up the namespace */
900 namespace = LHS(expr);
901 ns = symtab_lookup(namespace->ident);
902 if (ns != NULL) {
903 /* Look up the local symbol */
904 symtab_push(ns->symtab);
905 symbol = RHS(expr);
906 sym = symtab_lookup(symbol->ident);
907 if (sym != NULL) {
908 /* See if we can replace it */
909 switch (ns->type) {
910 case STRUC_SYMBOL:
911 case UNION_SYMBOL:
912 case RECORD_SYMBOL:
913 /* Replace with field offset */
914 c = astnode_clone(sym->field.offset, sym->field.offset->loc);
915 astnode_replace(expr, c);
916 astnode_finalize(expr);
917 expr = c;
918 break;
920 case ENUM_SYMBOL:
921 /* Replace with enum entry value */
922 c = astnode_clone(sym->def, sym->def->loc);
923 astnode_replace(expr, c);
924 astnode_finalize(expr);
925 expr = c;
926 break;
928 default:
929 break;
932 symtab_pop();
934 return expr;
937 static astnode *reduce_expression(astnode *expr);
940 * Handles remainder of fields in A.B.C.D . ..., where one or more fields may be indexed.
941 * @param expr Node of type DOT_NODE, INDEX_NODE or IDENTIFIER_NODE
943 static astnode *reduce_dot_recursive(astnode *expr)
945 astnode *term;
946 astnode *offset;
947 astnode *left;
948 astnode *right;
949 astnode *type;
950 symtab_entry *field;
951 symtab_entry *def;
952 astnode *index = NULL;
953 /* Get identifiers involved: 'right' is field in 'left' */
954 left = LHS(expr);
955 if (astnode_is_type(left, INDEX_NODE)) {
956 left = LHS(left); /* Need identifier */
958 right = RHS(expr);
959 if (astnode_is_type(right, DOT_NODE)) {
960 right = LHS(right); /* Need identifier */
962 if (astnode_is_type(right, INDEX_NODE)) {
963 index = RHS(right);
964 right = LHS(right); /* Need identifier */
966 /* Lookup 'right' in 'left's symbol table (on stack) */
967 field = symtab_lookup(right->ident);
968 /* Look up variable's type definition */
969 type = LHS(field->def);
970 /* Copy its offset */
971 offset = astnode_clone(field->field.offset, right->loc);
972 if (index != NULL) {
973 /* Create expression: identifier + sizeof(datatype) * index */
974 offset = astnode_create_arithmetic(
975 PLUS_OPERATOR,
976 offset,
977 astnode_create_arithmetic(
978 MUL_OPERATOR,
979 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
980 astnode_clone(index, index->loc),
981 index->loc
983 expr->loc
986 /* See if more subfields to process */
987 expr = RHS(expr);
988 if (astnode_is_type(expr, DOT_NODE)) {
989 /* Next field */
990 def = symtab_global_lookup(LHS(type)->ident);
991 symtab_push(def->symtab);
992 term = reduce_dot_recursive(expr);
993 symtab_pop();
994 /* Construct sum */
995 offset = astnode_create_arithmetic(
996 PLUS_OPERATOR,
997 offset,
998 term,
999 expr->loc
1002 return offset;
1006 * Transforms A.B.C.D . ... to A + offset(B) + offset(C) + ...
1007 * No error checking, since validate_dotref() should have been called previously.
1008 * @param expr Node of type DOT_NODE
1010 static astnode *reduce_dot(astnode *expr)
1012 symtab_entry *father;
1013 symtab_entry *def;
1014 astnode *type;
1015 astnode *left;
1016 astnode *term1;
1017 astnode *term2;
1018 astnode *sum;
1019 astnode *index = NULL;
1020 /* Look up parent in global symbol table */
1021 left = LHS(expr); /* expr := left . right */
1022 if (astnode_is_type(left, INDEX_NODE)) {
1023 index = RHS(left);
1024 left = LHS(left); /* Need identifier */
1026 father = symtab_lookup(left->ident);
1027 /* Look up variable's type definition */
1028 type = LHS(father->def); /* DATATYPE_NODE */
1029 def = symtab_lookup(LHS(type)->ident);
1030 /* 1st term of sum is the leftmost structure identifier */
1031 term1 = astnode_clone(left, left->loc);
1032 if (index != NULL) {
1033 /* Create expression: identifier + sizeof(datatype) * index */
1034 term1 = astnode_create_arithmetic(
1035 PLUS_OPERATOR,
1036 term1,
1037 astnode_create_arithmetic(
1038 MUL_OPERATOR,
1039 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1040 astnode_clone(index, index->loc),
1041 index->loc
1043 expr->loc
1046 /* Add offsets recursively */
1047 symtab_push(def->symtab);
1048 term2 = reduce_dot_recursive(expr);
1049 symtab_pop();
1050 /* Calculate final sum */
1051 sum = astnode_create_arithmetic(
1052 PLUS_OPERATOR,
1053 term1,
1054 term2,
1055 expr->loc
1057 sum = reduce_expression(sum);
1058 /* Replace dotted expression by sum */
1059 astnode_replace(expr, sum);
1060 astnode_finalize(expr);
1061 return sum;
1065 * Reduces MASK operation to a field mask.
1066 * @param mask A node of type MASK_NODE
1068 static astnode *reduce_mask(astnode *mask)
1070 symtab_entry *ns;
1071 symtab_entry *sym;
1072 astnode *c;
1073 astnode *namespace;
1074 astnode *symbol;
1075 astnode *expr;
1076 /* Child is a scope node, record::field */
1077 expr = LHS(mask);
1078 /* Look up the namespace */
1079 namespace = LHS(expr);
1080 ns = symtab_lookup(namespace->ident);
1081 if (ns != NULL) {
1082 /* Make sure it's a record */
1083 if (ns->type != RECORD_SYMBOL) {
1084 err(expr->loc, "`%s' is not a record");
1085 /* Replace by 0 */
1086 c = astnode_create_integer(0, expr->loc);
1087 astnode_replace(mask, c);
1088 astnode_finalize(mask);
1089 expr = c;
1091 else {
1092 /* Look up the local symbol */
1093 symtab_push(ns->symtab);
1094 symbol = RHS(expr);
1095 sym = symtab_lookup(symbol->ident);
1096 if (sym != NULL) {
1097 /* Calculate field mask */
1098 // mask = ((1 << width) - 1) << offset
1099 c = astnode_create_arithmetic(
1100 SHL_OPERATOR,
1101 astnode_create_arithmetic(
1102 MINUS_OPERATOR,
1103 astnode_create_arithmetic(
1104 SHL_OPERATOR,
1105 astnode_create_integer(1, expr->loc),
1106 astnode_clone(sym->field.size, expr->loc),
1107 expr->loc
1109 astnode_create_integer(1, expr->loc),
1110 expr->loc
1112 astnode_clone(sym->field.offset, expr->loc),
1113 expr->loc
1115 c = reduce_expression(c);
1116 astnode_replace(mask, c);
1117 astnode_finalize(mask);
1118 expr = c;
1120 symtab_pop();
1123 return expr;
1127 * Reduces identifier[expression] to identifier + sizeof(identifier type) * expression
1129 static astnode *reduce_index(astnode *expr)
1131 symtab_entry *e;
1132 astnode *c;
1133 astnode *type;
1134 astnode *id;
1135 astnode *index;
1136 id = LHS(expr);
1137 index = reduce_expression(RHS(expr));
1138 /* Lookup identifier */
1139 e = symtab_lookup(id->ident);
1140 /* Get its datatype */
1141 type = LHS(e->def);
1142 /* Create expression: identifier + sizeof(datatype) * index */
1143 c = astnode_create_arithmetic(
1144 PLUS_OPERATOR,
1145 astnode_clone(id, id->loc),
1146 astnode_create_arithmetic(
1147 MUL_OPERATOR,
1148 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1149 astnode_clone(index, index->loc),
1150 index->loc
1152 expr->loc
1154 /* Replace index expression */
1155 astnode_replace(expr, c);
1156 astnode_finalize(expr);
1157 /* Return the new expression */
1158 return c;
1162 * Substitutes all identifiers that represent EQU defines with their
1163 * corresponding expression.
1164 * @param expr The expression whose defines to substitute
1166 static astnode *substitute_defines(astnode *expr)
1168 switch (astnode_get_type(expr)) {
1169 case ARITHMETIC_NODE:
1170 substitute_defines(LHS(expr));
1171 substitute_defines(RHS(expr));
1172 break;
1174 case IDENTIFIER_NODE:
1175 expr = substitute_ident(expr);
1176 break;
1178 case SIZEOF_NODE:
1179 expr = reduce_sizeof(expr);
1180 break;
1182 case MASK_NODE:
1183 expr = reduce_mask(expr);
1184 break;
1186 case INDEX_NODE:
1187 substitute_defines(LHS(expr));
1188 substitute_defines(RHS(expr));
1189 break;
1191 case DOT_NODE:
1192 substitute_defines(LHS(expr));
1193 substitute_defines(RHS(expr));
1194 break;
1196 default:
1197 /* Nada */
1198 break;
1200 return expr;
1206 static astnode *reduce_highlevel_constructs(astnode *expr)
1208 switch (astnode_get_type(expr)) {
1209 case ARITHMETIC_NODE:
1210 reduce_highlevel_constructs(LHS(expr));
1211 reduce_highlevel_constructs(RHS(expr));
1212 break;
1214 case SCOPE_NODE:
1215 expr = reduce_scope(expr);
1216 break;
1218 case DOT_NODE:
1219 expr = reduce_dot(expr);
1220 break;
1222 case INDEX_NODE:
1223 expr = reduce_index(expr);
1224 break;
1226 default:
1227 /* Nada */
1228 break;
1230 return expr;
1234 * Really reduces an expression.
1235 * @param expr Expression to attempt to reduce
1237 static astnode *reduce_expression_complete(astnode *expr)
1239 return astproc_fold_constants( reduce_highlevel_constructs( substitute_defines(expr) ) );
1243 * Reduces an expression.
1244 * It does two things:
1245 * 1. Substitute all equates by their value
1246 * 2. Folds constants in the resulting expression
1247 * If the expression is reduced, the original expression is replaced by the
1248 * new one, the original is finalized, and a pointer to the new expression
1249 * is returned.
1250 * If the expression is not reduced, the original pointer is returned.
1252 static astnode *reduce_expression(astnode *expr)
1254 return astproc_fold_constants( substitute_defines(expr) );
1258 * Reduces RECORD instance to a single byte (DB statement).
1259 * @param r Record's symbol table entry
1260 * @param expr Record initializer
1261 * @param flat List on which to append the reduced form
1263 static void reduce_record(symtab_entry *r, astnode *init, astnode *flat)
1265 ordered_field_list *list;
1266 symtab_entry *e;
1267 astnode *val;
1268 astnode *term;
1269 astnode *result;
1270 astnode *mask;
1271 astnode *repl;
1272 /* Validate initializer */
1273 if (!astnode_is_type(init, STRUC_NODE)) {
1274 err(init->loc, "record initializer expected");
1275 return;
1277 /* Go through fields */
1278 symtab_push(r->symtab);
1279 result = astnode_create_integer(0, init->loc);
1280 for (val = init->first_child, list = r->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1281 if (astnode_is_type(val, NULL_NODE)) {
1282 continue;
1284 if (astnode_is_type(val, STRUC_NODE)) {
1285 err(init->loc, "record field initializer expected");
1286 continue;
1288 /* Get field definition */
1289 e = list->entry;
1290 /* Calculate field mask */
1291 // mask = ((1 << width) - 1) << offset
1292 mask = astnode_create_arithmetic(
1293 SHL_OPERATOR,
1294 astnode_create_arithmetic(
1295 MINUS_OPERATOR,
1296 astnode_create_arithmetic(
1297 SHL_OPERATOR,
1298 astnode_create_integer(1, val->loc),
1299 astnode_clone(e->field.size, val->loc),
1300 val->loc
1302 astnode_create_integer(1, val->loc),
1303 val->loc
1305 astnode_clone(e->field.offset, val->loc),
1306 val->loc
1308 /* Shift val left e->field.offset bits, AND with mask */
1309 term = astnode_create_arithmetic(
1310 AND_OPERATOR,
1311 astnode_create_arithmetic(
1312 SHL_OPERATOR,
1313 astnode_clone(val, val->loc),
1314 astnode_clone(e->field.offset, val->loc),
1315 val->loc
1317 mask,
1318 val->loc
1320 /* OR the value with the result so far */
1321 result = astnode_create_arithmetic(
1322 OR_OPERATOR,
1323 result,
1324 term,
1325 val->loc
1327 result = reduce_expression(result);
1329 /* Determine reason for stopping loop */
1330 if (val != NULL) {
1331 err(init->loc, "too many field initializers");
1333 /* Make byte data node (packed record value) */
1334 repl = astnode_create_data(
1335 astnode_create_datatype(BYTE_DATATYPE, NULL, init->loc),
1336 result,
1337 init->loc
1339 /* Add to list */
1340 astnode_add_child(flat, repl);
1341 /* Restore old symbol table */
1342 symtab_pop();
1346 * Reduces ENUM instance to DB.
1347 * @param e Enumeration's symbol table entry
1348 * @param expr Expression
1349 * @param flat List on which to append the reduced form
1351 static void reduce_enum(symtab_entry *e, astnode *expr, astnode *list)
1353 symtab_entry *sym;
1354 astnode *repl;
1355 if (!astnode_is_type(expr, IDENTIFIER_NODE)) {
1356 err(expr->loc, "identifier expected");
1358 else {
1359 /* Look up the enumeration symbol */
1360 symtab_push(e->symtab);
1361 sym = symtab_lookup(expr->ident);
1362 symtab_pop();
1363 /* Make byte data node (symbol value) */
1364 repl = astnode_create_data(
1365 astnode_create_datatype(BYTE_DATATYPE, NULL, expr->loc),
1366 astnode_clone(sym->def, expr->loc),
1367 expr->loc
1369 /* Add to list */
1370 astnode_add_child(list, repl);
1374 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat);
1377 * Flattens a union initializer to a sequence of native data values.
1378 * Verify similar to flattening of structure, but only single field allowed.
1379 * @param s Union's symbol table definition
1380 * @param init Union initializer
1381 * @param flat List on which to append the flattened form
1383 static void flatten_union_recursive(symtab_entry *s, astnode *init, astnode *flat)
1385 astnode *fill;
1386 astnode *type;
1387 astnode *count;
1388 symtab_entry *e;
1389 symtab_entry *t;
1390 astnode *val;
1391 astnode *valvals;
1392 astnode *temp;
1393 ordered_field_list *list;
1394 int num;
1395 /* Validate initializer */
1396 if (!astnode_is_type(init, STRUC_NODE)) {
1397 err(init->loc, "union initializer expected");
1398 return;
1400 /* Go through fields */
1401 symtab_push(s->symtab);
1402 fill = astnode_clone(s->struc.size, flat->loc);
1403 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1404 if (astnode_is_type(val, NULL_NODE)) {
1405 continue;
1407 if (!astnode_equal(fill, s->struc.size)) {
1408 err(init->loc, "only one field of union can be initialized");
1409 continue;
1411 /* Get field definition */
1412 e = list->entry;
1413 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1414 type = LHS(e->def);
1415 count = RHS(e->def);
1416 /* Decide what to do based on field type and value */
1417 switch (type->datatype) {
1418 case BYTE_DATATYPE:
1419 case CHAR_DATATYPE:
1420 case WORD_DATATYPE:
1421 case DWORD_DATATYPE:
1422 if (astnode_is_type(val, STRUC_NODE)) {
1423 /* Handle multi-value array */
1424 temp = astnode_clone(val, val->loc);
1425 valvals = astnode_remove_children(temp);
1426 astnode_finalize(temp);
1427 astnode_add_child(flat,
1428 astnode_create_data(
1429 astnode_create_datatype(type->datatype, NULL, type->loc),
1430 valvals,
1431 val->loc
1434 num = astnode_get_child_count(val);
1435 } else {
1436 /* Output single value */
1437 astnode_add_child(flat,
1438 astnode_create_data(
1439 astnode_create_datatype(type->datatype, NULL, type->loc),
1440 astnode_clone(val, val->loc),
1441 val->loc
1444 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1446 if (num > count->integer) {
1447 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1449 /* Fill in remainder of field if necessary: count - 1 */
1450 else if (count->integer > num) {
1451 astnode_add_child(flat,
1452 astnode_create_storage(
1453 astnode_create_datatype(type->datatype, NULL, type->loc),
1454 astproc_fold_constants(
1455 astnode_create_arithmetic(
1456 MINUS_OPERATOR,
1457 astnode_clone(count, count->loc),
1458 astnode_create_integer(num, flat->loc),
1459 count->loc
1462 val->loc
1466 break;
1468 case USER_DATATYPE:
1469 /* Look up user type definition */
1470 t = symtab_global_lookup(LHS(type)->ident);
1471 switch (t->type) {
1472 case STRUC_SYMBOL:
1473 flatten_struc_recursive(t, val, flat);
1474 break;
1476 case UNION_SYMBOL:
1477 flatten_union_recursive(t, val, flat);
1478 break;
1480 case RECORD_SYMBOL:
1481 reduce_record(t, val, flat);
1482 break;
1484 case ENUM_SYMBOL:
1485 reduce_enum(t, val, flat);
1486 break;
1488 default:
1489 break;
1491 break;
1493 /* Decrease fill amount according to field size */
1494 fill = astproc_fold_constants(
1495 astnode_create_arithmetic(
1496 MINUS_OPERATOR,
1497 fill,
1498 astnode_clone(e->field.size, flat->loc),
1499 flat->loc
1503 /* Determine reason for stopping loop */
1504 if (val != NULL) {
1505 err(init->loc, "too many field initializers");
1507 if (fill->integer > 0) {
1508 /* Fill remainder of union with zeroes */
1509 astnode_add_child(flat,
1510 astnode_create_storage(
1511 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1512 fill,
1513 flat->loc
1517 symtab_pop();
1521 * Flattens a structure initializer to a sequence of native data values.
1522 * @param s Structure's symbol table definition
1523 * @param init Structure initializer
1524 * @param flat List on which to append the flattened form
1526 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat)
1528 astnode *fill;
1529 astnode *type;
1530 astnode *count;
1531 astnode *temp;
1532 symtab_entry *e;
1533 symtab_entry *t;
1534 astnode *val;
1535 astnode *valvals;
1536 ordered_field_list *list;
1537 int num;
1538 /* Validate initializer */
1539 if (!astnode_is_type(init, STRUC_NODE)) {
1540 err(init->loc, "structure initializer expected");
1541 return;
1543 /* Go through fields */
1544 symtab_push(s->symtab);
1545 fill = astnode_clone(s->struc.size, flat->loc);
1546 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1547 /* Get field definition */
1548 e = list->entry;
1549 /* Check if normal field or anonymous union */
1550 if (e->type == UNION_SYMBOL) {
1551 if (astnode_is_type(val, NULL_NODE)) {
1552 /* Output union size bytes to fill in field */
1553 astnode_add_child(flat,
1554 astnode_create_storage(
1555 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1556 astnode_clone(e->struc.size, val->loc),
1557 val->loc
1560 } else {
1561 flatten_union_recursive(e, val, flat);
1562 /* Decrease fill amount according to union size */
1563 fill = astproc_fold_constants(
1564 astnode_create_arithmetic(
1565 MINUS_OPERATOR,
1566 fill,
1567 astnode_clone(e->struc.size, flat->loc),
1568 flat->loc
1572 } else {
1573 /* VAR_SYMBOL */
1574 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1575 type = LHS(e->def);
1576 count = RHS(e->def);
1577 /* Decide what to do based on field type and value */
1578 switch (type->datatype) {
1579 case BYTE_DATATYPE:
1580 case CHAR_DATATYPE:
1581 case WORD_DATATYPE:
1582 case DWORD_DATATYPE:
1583 if (astnode_is_type(val, NULL_NODE)) {
1584 /* Output field_size bytes to fill in field */
1585 astnode_add_child(flat,
1586 astnode_create_storage(
1587 astnode_create_datatype(type->datatype, NULL, type->loc),
1588 astnode_clone(count, count->loc),
1589 val->loc
1592 } else {
1593 if (astnode_is_type(val, STRUC_NODE)) {
1594 /* Handle multi-value array */
1595 temp = astnode_clone(val, val->loc);
1596 valvals = astnode_remove_children(temp);
1597 astnode_finalize(temp);
1598 astnode_add_child(flat,
1599 astnode_create_data(
1600 astnode_create_datatype(type->datatype, NULL, type->loc),
1601 valvals,
1602 val->loc
1605 num = astnode_get_child_count(val);
1606 } else {
1607 /* Output single value */
1608 astnode_add_child(flat,
1609 astnode_create_data(
1610 astnode_create_datatype(type->datatype, NULL, type->loc),
1611 astnode_clone(val, val->loc),
1612 val->loc
1615 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1617 if (astnode_is_type(count, INTEGER_NODE) && (count->integer < num)) {
1618 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1620 /* Fill in remainder of field if necessary: count - 1 */
1621 else if ( (astnode_is_type(count, INTEGER_NODE) && (count->integer > num))
1622 || !astnode_is_type(count, INTEGER_NODE) ) {
1623 astnode_add_child(flat,
1624 astnode_create_storage(
1625 astnode_create_datatype(type->datatype, NULL, flat->loc),
1626 astproc_fold_constants(
1627 astnode_create_arithmetic(
1628 MINUS_OPERATOR,
1629 astnode_clone(count, flat->loc),
1630 astnode_create_integer(num, flat->loc),
1631 flat->loc
1634 flat->loc
1639 break;
1641 case USER_DATATYPE:
1642 /* Look up user type definition */
1643 t = symtab_global_lookup(LHS(type)->ident);
1644 if (astnode_is_type(val, NULL_NODE)) {
1645 /* Output sizeof(type) bytes to fill in */
1646 astnode_add_child(flat,
1647 astnode_create_storage(
1648 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1649 astnode_clone(t->struc.size, val->loc),
1650 val->loc
1653 } else {
1654 switch (t->type) {
1655 case STRUC_SYMBOL:
1656 flatten_struc_recursive(t, val, flat);
1657 break;
1659 case UNION_SYMBOL:
1660 flatten_union_recursive(t, val, flat);
1661 break;
1663 case RECORD_SYMBOL:
1664 reduce_record(t, val, flat);
1665 break;
1667 case ENUM_SYMBOL:
1668 reduce_enum(t, val, flat);
1669 break;
1671 default:
1672 break;
1675 break;
1677 /* Decrease fill amount according to field size */
1678 fill = astproc_fold_constants(
1679 astnode_create_arithmetic(
1680 MINUS_OPERATOR,
1681 fill,
1682 astnode_clone(e->field.size, flat->loc),
1683 flat->loc
1688 /* Determine reason for stopping loop */
1689 if (val != NULL) {
1690 err(init->loc, "too many field initializers");
1692 else if (list != NULL) {
1693 /* All fields not initialized; fill remainder of struc with zeroes */
1694 astnode_add_child(flat,
1695 astnode_create_storage(
1696 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1697 fill,
1698 flat->loc
1702 symtab_pop();
1706 * Converts data that is expressed in a high-level form (such as structure initializers)
1707 * to a simple sequence of bytes.
1708 * @param n The source node to flatten
1709 * @param type The type of data that n is an instance of
1710 * @param list List on which to append the resulting sequence of items (bytes/words/dwords)
1712 static void flatten_user_data(astnode *n, astnode *type, astnode *list)
1714 symtab_entry *def;
1715 /* Look up type definition */
1716 def = symtab_global_lookup(LHS(type)->ident);
1717 if (def != NULL) {
1718 switch (def->type) {
1719 case STRUC_SYMBOL:
1720 /* Flatten structure initializer to series of simple data statements */
1721 flatten_struc_recursive(def, n, list);
1722 break;
1724 case UNION_SYMBOL:
1725 /* Flatten union initializer to series of simple data statements */
1726 flatten_union_recursive(def, n, list);
1727 break;
1729 case RECORD_SYMBOL:
1730 reduce_record(def, n, list);
1731 break;
1733 case ENUM_SYMBOL:
1734 reduce_enum(def, n, list);
1735 break;
1737 default:
1738 break;
1743 /*---------------------------------------------------------------------------*/
1746 * Loads the character map specified by the node.
1747 * @param n Node of type CHARMAP_NODE
1749 static int load_charmap(astnode *n, void *arg, astnode **next)
1751 /* TODO: should probably be done in the parsing phase (same path resolution as for INCSRC and INCBIN) */
1752 astnode *file;
1753 /* Get file descriptor */
1754 file = astnode_get_child(n, 0);
1755 /* Try to load the charmap */
1756 if (charmap_parse(file->file_path, charmap) == 0) {
1757 err(n->loc, "could not open `%s' for reading", file->file_path);
1759 return 0;
1763 * First-time processing of instruction node.
1764 * @param n Node of type INSTRUCTION_NODE
1765 * @param arg Not used
1767 static int process_instruction(astnode *n, void *arg, astnode **next)
1769 astnode *expr;
1770 if (in_dataseg) {
1771 err(n->loc, "instructions not allowed in data segment");
1772 /* Remove from AST */
1773 astnode_remove(n);
1774 astnode_finalize(n);
1775 return 0;
1777 else {
1778 /* The instruction operand */
1779 expr = astnode_get_child(n, 0);
1780 /* Substitute defines and fold constants */
1781 reduce_expression(expr);
1782 return 1;
1787 * First-time processing of data node.
1788 * @param n Node of type DATA_NODE
1789 * @param arg Not used
1791 static int process_data(astnode *n, void *arg, astnode **next)
1793 int j;
1794 int k;
1795 astnode *type;
1796 astnode *expr;
1797 astnode *list;
1798 astnode *stmts;
1799 int ret = 1;
1800 type = astnode_get_child(n, 0); /* DATATYPE_NODE */
1801 if (in_dataseg) {
1802 err(n->loc, "value not allowed in data segment");
1803 /* Replace with storage node */
1804 astnode_replace(
1806 astnode_create_storage(
1807 astnode_create_datatype(BYTE_DATATYPE, NULL, n->loc),
1808 astnode_create_integer(1, n->loc),
1809 n->loc
1812 astnode_finalize(n);
1813 return 0;
1815 if (type->datatype == USER_DATATYPE) {
1816 /* Make sure the type exists */
1817 if (symtab_global_lookup(LHS(type)->ident) == NULL) {
1818 err(n->loc, "unknown type `%s'", LHS(type)->ident);
1819 /* Remove from AST */
1820 astnode_remove(n);
1821 astnode_finalize(n);
1822 return 0;
1823 } else {
1824 /* Attempt to reduce user data to native data */
1825 list = astnode_create(LIST_NODE, n->loc);
1826 for (expr = type->next_sibling; expr != NULL; expr = expr->next_sibling) {
1827 flatten_user_data(expr, type, list);
1829 /* Replace initializers with generated list */
1830 stmts = astnode_remove_children(list);
1831 astnode_replace(n, stmts);
1832 astnode_finalize(n);
1833 astnode_finalize(list);
1834 *next = stmts;
1835 ret = 0;
1838 /* Go through the list of data values, replacing defines and folding constants */
1839 for (j=1; j<astnode_get_child_count(n); j++) {
1840 expr = astnode_get_child(n, j);
1841 /* Substitute defines and fold constants */
1842 expr = reduce_expression(expr);
1843 /* If it's a string, replace by array of integers */
1844 /* (makes it easier to process later... favour regularity) */
1845 if (astnode_is_type(expr, STRING_NODE)) {
1846 astnode_remove_child_at(n, j); /* Remove string */
1847 for (k=strlen(expr->string)-1; k>=0; k--) {
1848 /* Check if we should map character from custom charmap */
1849 if (type->datatype == CHAR_DATATYPE) {
1850 expr->string[k] = charmap[(unsigned)expr->string[k]];
1852 /* Append character value to array */
1853 astnode_insert_child(n, astnode_create_integer((unsigned char)expr->string[k], n->loc), j);
1855 if (type->datatype == CHAR_DATATYPE) {
1856 /* It's normal byte array now */
1857 type->datatype = BYTE_DATATYPE;
1859 j += strlen(expr->string)-1;
1860 astnode_finalize(expr);
1863 return ret;
1867 * First-time processing of storage node.
1868 * @param n Node of type STORAGE_NODE
1869 * @param arg Not used
1871 static int process_storage(astnode *n, void *arg, astnode **next)
1873 int item_size;
1874 astnode *type;
1875 astnode *expr;
1876 astnode *new_expr;
1877 type = LHS(n);
1878 expr = RHS(n);
1879 /* If not BYTE_DATATYPE, multiply by word/dword-size */
1880 switch (type->datatype) {
1881 case BYTE_DATATYPE:
1882 case CHAR_DATATYPE: item_size = 1; break;
1883 case WORD_DATATYPE: item_size = 2; break;
1884 case DWORD_DATATYPE: item_size = 4; break;
1885 default: item_size = 1; break; // ### Hmmm...
1887 if (item_size != 1) {
1888 new_expr = astnode_create_arithmetic(
1889 MUL_OPERATOR,
1890 astnode_clone(expr, expr->loc),
1891 astnode_create_integer(item_size, expr->loc),
1892 expr->loc
1894 astnode_replace(expr, new_expr);
1895 astnode_finalize(expr);
1896 expr = new_expr;
1897 type->datatype = BYTE_DATATYPE;
1899 /* Substitute defines and fold constants */
1900 expr = reduce_expression(expr);
1901 // TODO: Validate range somewhere else than here please... ???
1902 if (astnode_is_type(expr, INTEGER_NODE)) {
1903 if ((expr->integer <= 0) || (expr->integer >= 0x10000)) {
1904 err(n->loc, "operand out of range");
1907 return 1;
1911 * Process EQU node.
1912 * @param n Node of type EQU_NODE
1913 * @param arg Not used
1915 static int process_equ(astnode *n, void *arg, astnode **next)
1917 symtab_entry *e;
1918 astnode *id;
1919 astnode *expr;
1920 /* The expression which describes the value */
1921 expr = astnode_clone(astnode_get_child(n, 1), n->loc);
1922 /* Substitute defines and fold constants */
1923 expr = reduce_expression(expr);
1924 /* The identifier which is being defined */
1925 id = astnode_get_child(n, 0);
1926 /* Look up in symbol table */
1927 e = symtab_lookup(id->ident);
1928 if (e == NULL) {
1929 /* Symbol is being defined */
1930 // TODO: Check that expression is a constant?
1931 /* Enter it in symbol table */
1932 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, 0);
1933 } else {
1934 /* Symbol is being redefined */
1935 /* This is not allowed for EQU equate! */
1936 if (!astnode_equal((astnode *)(e->def), expr)) {
1937 warn(n->loc, "redefinition of `%s' is not identical; ignored", id->ident);
1940 /* Remove the equate node from the tree. */
1941 astnode_remove(n);
1942 astnode_finalize(n);
1943 return 0;
1947 * Process '=' node.
1948 * @param n Node of type ASSIGN_NODE
1949 * @param arg Not used
1951 static int process_assign(astnode *n, void *arg, astnode **next)
1953 symtab_entry *e;
1954 astnode *id;
1955 astnode *expr;
1956 /* If it's part of ENUM declaration, don't touch */
1957 if (astnode_has_ancestor_of_type(n, ENUM_DECL_NODE)) {
1958 return 0;
1960 /* Very similar to EQU, except symbol 1) can be
1961 redefined and 2) is volatile (see end of proc) */
1962 /* The expression which describes the value */
1963 expr = astnode_clone(astnode_get_child(n, 1), n->loc);
1964 /* Substitute defines and fold constants */
1965 expr = reduce_expression(expr);
1966 /* The identifier which is being (re)defined */
1967 id = astnode_get_child(n, 0);
1968 /* Look up in symbol table */
1969 e = symtab_lookup(id->ident);
1970 if (e == NULL) {
1971 /* Symbol is being defined for the first time */
1972 /* Note that the VOLATILE_FLAG is set */
1973 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, VOLATILE_FLAG);
1974 } else {
1975 /* Symbol is being redefined */
1976 /* This is OK for ASSIGN equate, simply replace definition */
1977 // ### store a list of definitions
1978 expr->loc = e->def->loc;
1979 e->def = expr;
1981 /* Remove the equate node from the tree. */
1982 astnode_remove(n);
1983 astnode_finalize(n);
1984 return 0;
1988 * Process IFDEF-node.
1989 * @param n Node of type IFDEF_NODE
1990 * @param arg Not used
1992 static int process_ifdef(astnode *n, void *arg, astnode **next)
1994 symtab_entry *e;
1995 astnode *id;
1996 astnode *stmts;
1997 /* The identifier which is being tested */
1998 id = astnode_get_child(n, 0);
1999 e = symtab_lookup(id->ident);
2000 if (e != NULL) {
2001 /* Symbol is defined. */
2002 /* Replace IFDEF node by the true-branch statement list */
2003 stmts = astnode_remove_children( astnode_remove_child_at(n, 1));
2004 astnode_replace(n, stmts);
2005 *next = stmts;
2006 } else {
2007 /* Symbol is not defined. */
2008 /* Replace IFDEF node by the false-branch statement list (if any) */
2009 stmts = astnode_remove_children( astnode_remove_child_at(n, 2));
2010 if (stmts != NULL) {
2011 astnode_replace(n, stmts);
2012 *next = stmts;
2013 } else {
2014 astnode_remove(n);
2017 /* Discard the original node */
2018 astnode_finalize(n);
2019 return 0;
2023 * Process IFNDEF-node.
2024 * @param n Node of type IFNDEF_NODE
2025 * @param arg Not used
2027 static int process_ifndef(astnode *n, void *arg, astnode **next)
2029 symtab_entry *e;
2030 astnode *id;
2031 astnode *stmts;
2032 /* The identifier which is being tested */
2033 id = astnode_get_child(n, 0);
2034 e = symtab_lookup(id->ident);
2035 if (e == NULL) {
2036 /* Symbol is not defined. */
2037 /* Replace IFNDEF node by the true-branch statement list */
2038 stmts = astnode_remove_children( astnode_remove_child_at(n, 1));
2039 astnode_replace(n, stmts);
2040 *next = stmts;
2041 } else {
2042 /* Symbol is defined. */
2043 /* Replace IFNDEF node by the false-branch statement list, if any */
2044 stmts = astnode_remove_children( astnode_remove_child_at(n, 2));
2045 if (stmts != NULL) {
2046 astnode_replace(n, stmts);
2047 *next = stmts;
2048 } else {
2049 astnode_remove(n);
2052 /* Discard the original node */
2053 astnode_finalize(n);
2054 return 0;
2058 * Process IF-node.
2059 * @param n Node of type IF_NODE
2060 * @param arg Not used
2062 static int process_if(astnode *n, void *arg, astnode **next)
2064 astnode *expr;
2065 astnode *stmts;
2066 astnode *c;
2067 int ret = 0;
2068 /* IF_NODE has a list of CASE, DEFAULT nodes as children */
2069 for (c = astnode_get_first_child(n); c != NULL; c = astnode_get_next_sibling(c) ) {
2070 if (astnode_is_type(c, CASE_NODE)) {
2071 /* The expression which is being tested */
2072 expr = astnode_get_child(c, 0);
2073 /* Try to reduce expression to literal */
2074 expr = reduce_expression(expr);
2075 /* Resulting expression must be an integer literal,
2076 since this is static evaluation.
2077 In other words, it can't contain label references.
2079 if (astnode_is_type(expr, INTEGER_NODE)) {
2080 /* Non-zero is true, zero is false */
2081 if (expr->integer) {
2082 /* Replace IF node by the true-branch statement list */
2083 stmts = astnode_remove_children( astnode_remove_child_at(c, 1) );
2084 astnode_replace(n, stmts);
2085 astnode_finalize(n);
2086 *next = stmts;
2087 return ret;
2089 } else {
2090 /* Error, expression is not constant */
2091 err(expr->loc, "conditional expression does not evaluate to literal");
2093 } else { /* DEFAULT_NODE */
2094 /* Replace IF node by the false-branch statement list */
2095 stmts = astnode_remove_children(c);
2096 astnode_replace(n, stmts);
2097 astnode_finalize(n);
2098 *next = stmts;
2099 return ret;
2102 /* No match, remove IF node from AST */
2103 astnode_remove(n);
2104 astnode_finalize(n);
2105 return ret;
2109 * Process dataseg-node.
2110 * @param n Node of type DATASEG_NODE
2111 * @param arg Not used
2113 static int process_dataseg(astnode *n, void *arg, astnode **next)
2115 modifiers = n->modifiers;
2116 in_dataseg = 1; /* true */
2117 return 0;
2121 * Process codeseg-node.
2122 * @param n Node of type CODESEG_NODE
2123 * @param arg Not used
2125 static int process_codeseg(astnode *n, void *arg, astnode **next)
2127 modifiers = 0;
2128 in_dataseg = 0; /* false */
2129 return 0;
2133 * Process org-node.
2134 * @param n Node of type ORG_NODE
2135 * @param arg Not used
2137 static int process_org(astnode *n, void *arg, astnode **next)
2139 if (!xasm_args.pure_binary) {
2140 err(n->loc, "org directive can only be used when output format is pure 6502 binary");
2141 } else {
2142 astnode *addr = astnode_get_child(n, 0);
2143 addr = reduce_expression_complete(addr);
2144 if (astnode_is_type(addr, INTEGER_NODE)) {
2145 /* Range check */
2146 if ((addr->integer < 0) || (addr->integer >= 0x10000)) {
2147 err(n->loc, "org address out of 64K range");
2149 } else {
2150 err(n->loc, "org address does not evaluate to literal");
2151 /* Remove from AST */
2152 astnode_remove(n);
2153 astnode_finalize(n);
2156 return 0;
2160 * Process REPT node.
2161 * @param n Node of type REPT_NODE
2162 * @param arg Not used
2164 static int process_rept(astnode *n, void *arg, astnode **next)
2166 astnode *count;
2167 astnode *stmts;
2168 astnode *list;
2169 /* The repeat count */
2170 count = astnode_get_child(n, 0);
2171 /* Try to reduce count expression to literal */
2172 count = reduce_expression_complete(count);
2173 /* Resulting expression must be an integer literal,
2174 since this is static evaluation.
2176 if (astnode_is_type(count, INTEGER_NODE)) {
2177 if (count->integer < 0) {
2178 warn(n->loc, "REPT ignored; negative repeat count (%d)", count->integer);
2179 /* Remove from AST */
2180 astnode_remove(n);
2181 astnode_finalize(n);
2182 } else if (count->integer > 0) {
2183 /* Expand body <count> times */
2184 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2185 stmts = astnode_remove_children(list);
2186 astnode_finalize(list);
2187 while (--count->integer > 0) {
2188 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2189 astnode_add_sibling(stmts, astnode_remove_children(list) );
2190 astnode_finalize(list);
2192 astnode_replace(n, stmts);
2193 astnode_finalize(n);
2194 *next = stmts;
2195 } else {
2196 /* count == 0, remove from AST */
2197 astnode_remove(n);
2198 astnode_finalize(n);
2200 } else {
2201 err(n->loc, "repeat count does not evaluate to literal");
2202 /* Remove from AST */
2203 astnode_remove(n);
2204 astnode_finalize(n);
2206 return 0;
2210 * Process WHILE node.
2211 * @param n Node of type WHILE_NODE
2212 * @param arg Not used
2214 static int process_while(astnode *n, void *arg, astnode **next)
2216 astnode *expr;
2217 astnode *stmts;
2218 astnode *list;
2219 /* The boolean expression */
2220 expr = astnode_get_child(n, 0);
2221 /* Try to reduce expression to literal */
2222 expr = reduce_expression(astnode_clone(expr, expr->loc));
2223 /* Resulting expression must be an integer literal,
2224 since this is static evaluation.
2226 if (astnode_is_type(expr, INTEGER_NODE)) {
2227 /* Expand body if the expression is true */
2228 if (expr->integer) {
2229 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2230 stmts = astnode_remove_children(list);
2231 astnode_finalize(list);
2232 astnode_replace(n, stmts);
2233 astnode_add_sibling(stmts, n); /* Clever huh? */
2234 *next = stmts;
2235 } else {
2236 /* Remove WHILE node from AST */
2237 astnode_remove(n);
2238 astnode_finalize(n);
2240 } else {
2241 err(n->loc, "while expression does not evaluate to literal");
2242 /* Remove WHILE node from AST */
2243 astnode_remove(n);
2244 astnode_finalize(n);
2246 astnode_finalize(expr);
2247 return 0;
2250 /*---------------------------------------------------------------------------*/
2253 * Enters a macro into the symbol table.
2254 * @param n Must be a node of type MACRO_DECL_NODE
2255 * @param arg Not used
2257 static int enter_macro(astnode *n, void *arg, astnode **next)
2259 astnode *id = astnode_get_child(n, 0); /* Child 0 is macro identifier */
2260 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2261 if (symtab_enter(id->ident, MACRO_SYMBOL, n, 0) == NULL) {
2262 /* ### This could be allowed, you know... */
2263 err(n->loc, "duplicate symbol `%s'", id->ident);
2265 /* Remove from AST */
2266 astnode_remove(n);
2267 // ### n is not finalized???
2268 return 0;
2272 * Enters a label into the symbol table.
2273 * @param n Must be a node of type LABEL_NODE
2275 static int enter_label(astnode *n, void *arg, astnode **next)
2277 symtab_entry *e;
2278 astnode *addr;
2279 /* Make sure it's unique first */
2280 if (symtab_lookup(n->ident)) {
2281 err(n->loc, "duplicate symbol `%s'", n->ident);
2282 /* Remove from AST */
2283 astnode_remove(n);
2284 astnode_finalize(n);
2285 } else {
2286 /* Enter it! */
2287 e = symtab_enter(n->ident, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) | modifiers );
2288 /* Check if hardcoded address */
2289 addr = reduce_expression_complete(RHS(n));
2290 if (astnode_is_type(addr, INTEGER_NODE)) {
2291 /* Store it */
2292 e->address = addr->integer;
2293 e->flags |= ADDR_FLAG;
2294 } else if (!astnode_is_type(addr, CURRENT_PC_NODE)) {
2295 err(n->loc, "label address does not evaluate to literal");
2297 /* Increase namespace counter */
2298 label_count++;
2300 /* */
2301 return 0;
2305 * Enters a variable declaration in symbol table.
2306 * @param n Must be a node of type VAR_DECL_NODE
2308 static int enter_var(astnode *n, void *arg, astnode **next)
2310 astnode *id = LHS(n); /* Variable identifier */
2311 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2312 /* Make sure it's unique first */
2313 if (symtab_lookup(id->ident)) {
2314 err(n->loc, "duplicate symbol `%s'", id->ident);
2315 /* Remove from AST */
2316 astnode_remove(n);
2317 astnode_finalize(n);
2318 return 0;
2319 } else {
2320 /* Validate modifiers */
2321 if ((n->modifiers & ZEROPAGE_FLAG) && !in_dataseg) {
2322 warn(n->loc, "zeropage modifier has no effect in code segment");
2323 n->modifiers &= ~ZEROPAGE_FLAG;
2325 /* Enter it! */
2326 symtab_enter(id->ident, VAR_SYMBOL, astnode_clone(RHS(n), n->loc), (in_dataseg ? DATA_FLAG : 0) | n->modifiers | modifiers);
2327 /* */
2328 return 1;
2333 * Enters a procedure declaration in symbol table.
2334 * @param n Must be a node of type PROC_NODE
2336 static int enter_proc(astnode *n, void *arg, astnode **next)
2338 astnode *id;
2339 if (in_dataseg) {
2340 err(n->loc, "procedures not allowed in data segment");
2341 /* Remove from AST */
2342 astnode_remove(n);
2343 astnode_finalize(n);
2344 return 0;
2346 id = LHS(n); /* Procedure identifier */
2347 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2348 /* Make sure it's unique first */
2349 if (symtab_lookup(id->ident)) {
2350 err(n->loc, "duplicate symbol `%s'", id->ident);
2351 /* Remove from AST */
2352 astnode_remove(n);
2353 astnode_finalize(n);
2354 return 0;
2355 } else {
2356 /* Enter it! RHS(n) is the list of procedure statements */
2357 symtab_enter(id->ident, PROC_SYMBOL, RHS(n), (in_dataseg ? DATA_FLAG : 0) );
2358 /* Increase global namespace counter */
2359 label_count++;
2360 /* */
2361 return 1;
2366 * Enters a simple <identifier> <storage> structure member.
2367 * @param c Node of type VAR_DECL_NODE
2368 * @param offset Offset of this field
2369 * @param plist List of symbol table's entries
2370 * @param struc_id Structure identifier (for error messages)
2371 * @return New offset (old offset + size of this field)
2373 static astnode *enter_struc_atomic_field(astnode *c, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2375 astnode *field_id;
2376 astnode *field_data;
2377 astnode *field_size;
2378 symtab_entry *fe;
2379 /* c has two children: id and STORAGE_NODE */
2380 field_id = LHS(c);
2381 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2382 field_data = RHS(c);
2383 reduce_expression(RHS(field_data));
2384 /* Validate the declaration -- no data initialized */
2385 if (astnode_is_type(field_data, DATA_NODE)) {
2386 err(c->loc, "data initialization not allowed here");
2387 return(offset);
2389 /* Try to enter field in structure's symbol table */
2390 fe = symtab_enter(
2391 field_id->ident,
2392 VAR_SYMBOL,
2393 astnode_clone(field_data, field_data->loc),
2396 if (fe == NULL) {
2397 err(c->loc, "duplicate symbol `%s' in structure `%s'", field_id->ident, struc_id->ident);
2398 return(offset);
2400 /* Add to ordered list of fields */
2401 (**plist) = malloc(sizeof(ordered_field_list));
2402 (**plist)->entry = fe;
2403 (**plist)->next = NULL;
2404 *plist = &((**plist)->next);
2405 /* Set field offset */
2406 fe->field.offset = astnode_clone(offset, offset->loc);
2407 /* Calculate field size in bytes: sizeof(datatype) * count */
2408 field_size = astnode_create_arithmetic(
2409 MUL_OPERATOR,
2410 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2411 astnode_clone(RHS(field_data), field_data->loc),
2412 field_data->loc
2414 field_size = reduce_expression(field_size);
2415 /* Set field size */
2416 fe->field.size = astnode_clone(field_size, field_size->loc);
2417 /* Add field size to total offset */
2418 offset = astnode_create_arithmetic(
2419 PLUS_OPERATOR,
2420 offset,
2421 field_size,
2422 offset->loc
2424 offset = reduce_expression(offset);
2425 return(offset);
2428 static void enter_union_fields(symtab_entry *, astnode *);
2431 * Attempts to enter an (anonymous) union's members into structure's symbol table.
2432 * @param n Node of type UNION_DECL_NODE
2433 * @param offset Current parent structure offset
2434 * @param plist Ordered list of parent structure's fields
2436 astnode *enter_struc_union_field(astnode *n, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2438 ordered_field_list *ls;
2439 symtab_entry *se;
2440 symtab_entry *fe;
2441 static int id = 0;
2442 char id_str[16];
2443 astnode *union_id;
2444 union_id = LHS(n);
2445 if (astnode_is_type(union_id, IDENTIFIER_NODE)) {
2446 err(n->loc, "anonymous union expected");
2447 return(offset);
2449 /* Put UNION in symbol table */
2450 sprintf(id_str, "%d", id++);
2451 se = symtab_enter(id_str, UNION_SYMBOL, n, 0);
2452 enter_union_fields(se, n);
2453 /* Add to ordered list of fields */
2454 (**plist) = malloc(sizeof(ordered_field_list));
2455 (**plist)->entry = se;
2456 (**plist)->next = NULL;
2457 *plist = &((**plist)->next);
2458 /* Add to parent structure as well, with same offsets */
2459 for (ls = se->struc.fields; ls != NULL; ls = ls->next) {
2460 /* Try to enter field in structure's symbol table */
2461 fe = symtab_enter(
2462 ls->entry->id,
2463 VAR_SYMBOL,
2464 astnode_clone(ls->entry->def, ls->entry->def->loc),
2467 if (fe == NULL) {
2468 err(ls->entry->def->loc, "duplicate symbol `%s' in structure `%s'", ls->entry->id, struc_id->ident);
2469 continue;
2471 /* Set field offset */
2472 fe->field.offset = astnode_clone(offset, offset->loc);
2473 /* Set field size */
2474 fe->field.size = astnode_clone(se->struc.size, offset->loc);
2476 /* Advance offset by size of union */
2477 offset = astnode_create_arithmetic(
2478 PLUS_OPERATOR,
2479 offset,
2480 astnode_clone(se->struc.size, offset->loc),
2481 offset->loc
2483 offset = reduce_expression(offset);
2484 return(offset);
2488 * Enters struc type into symbol table based on AST node.
2489 * - Creates a symbol table for the structure
2490 * - Validates and enters all its fields
2491 * - Calculates offset of each field in the structure, and total size
2492 * @param n Node of type STRUC_DECL_NODE
2494 static int enter_struc(astnode *n, void *arg, astnode **next)
2496 ordered_field_list **plist;
2497 symtab_entry *se;
2498 astnode *c;
2499 astnode *offset;
2500 astnode *struc_id = LHS(n); /* Child 0 is struc identifier */
2501 /* Put STRUC in symbol table */
2502 se = symtab_enter(struc_id->ident, STRUC_SYMBOL, n, 0);
2503 if (se == NULL) {
2504 err(n->loc, "duplicate symbol `%s'", struc_id->ident);
2505 } else {
2506 /* Put the fields of the structure in local symbol table */
2507 se->symtab = symtab_create();
2508 offset = astnode_create_integer(0, n->loc); /* offset = 0 */
2509 plist = &se->struc.fields;
2510 for (c = struc_id->next_sibling; c != NULL; c = c->next_sibling) {
2511 /* Check if it's a field declaration */
2512 if (astnode_is_type(c, VAR_DECL_NODE)) {
2513 offset = enter_struc_atomic_field(c, offset, &plist, struc_id);
2515 /* Check if (anonymous) union */
2516 else if (astnode_is_type(c, UNION_DECL_NODE)) {
2517 offset = enter_struc_union_field(c, offset, &plist, struc_id);
2518 } else {
2519 err(c->loc, "field declaration expected");
2520 continue;
2523 /* Store total size of structure */
2524 se->struc.size = offset;
2525 /* Restore previous symbol table */
2526 symtab_pop();
2528 /* ### Remove STRUC node from AST */
2529 // astnode_remove(n);
2530 // astnode_finalize(n);
2531 return 0;
2535 * Enters fields of union into its symbol table.
2537 static void enter_union_fields(symtab_entry *se, astnode *n)
2539 ordered_field_list **plist;
2540 astnode *c;
2541 astnode *field_id;
2542 astnode *field_data;
2543 astnode *field_size;
2544 symtab_entry *fe;
2546 se->symtab = symtab_create();
2547 se->struc.size = astnode_create_integer(0, n->loc);
2548 plist = &se->struc.fields;
2549 /* Process field declarations */
2550 for (c = RHS(n); c != NULL; c = c->next_sibling) {
2551 /* Make sure it's a field declaration */
2552 if (!astnode_is_type(c, VAR_DECL_NODE)) {
2553 err(c->loc, "field declaration expected");
2554 continue;
2556 /* c has two children: id and STORAGE_NODE */
2557 field_id = LHS(c);
2558 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2559 field_data = RHS(c);
2560 reduce_expression(RHS(field_data));
2561 /* Validate the declaration -- no data initialized */
2562 if (astnode_is_type(field_data, DATA_NODE)) {
2563 err(c->loc, "data initialization not allowed here");
2564 continue;
2566 /* Calculate field size in bytes: sizeof(datatype) * count */
2567 field_size = astnode_create_arithmetic(
2568 MUL_OPERATOR,
2569 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2570 astnode_clone(RHS(field_data), field_data->loc),
2571 field_data->loc
2573 field_size = reduce_expression(field_size);
2574 /* Make sure field size is a constant */
2575 if (!astnode_is_type(field_size, INTEGER_NODE)) {
2576 err(c->loc, "union member must be of constant size");
2577 astnode_finalize(field_size);
2578 /* Use default size: 1 byte */
2579 field_size = astnode_create_integer(1, field_data->loc);
2581 /* Try to enter field in structure's symbol table */
2582 fe = symtab_enter(
2583 field_id->ident,
2584 VAR_SYMBOL,
2585 astnode_clone(field_data, field_data->loc),
2588 if (fe == NULL) {
2589 err(c->loc, "duplicate symbol `%s' in union `%s'", field_id->ident, se->id);
2590 astnode_finalize(field_size);
2591 continue;
2593 /* Add to ordered list of fields */
2594 (*plist) = malloc(sizeof(ordered_field_list));
2595 (*plist)->entry = fe;
2596 (*plist)->next = NULL;
2597 plist = &((*plist)->next);
2598 /* Set field offset (0 for all) and size */
2599 fe->field.offset = astnode_create_integer(0, n->loc);
2600 fe->field.size = astnode_clone(field_size, field_size->loc);
2601 /* See if field size of this member is largest so far */
2602 if (se->struc.size->integer < field_size->integer) {
2603 astnode_finalize(se->struc.size);
2604 se->struc.size = field_size;
2605 } else {
2606 astnode_finalize(field_size);
2609 symtab_pop();
2613 * Enters union type into symbol table based on AST node.
2614 * @param n Node of type UNION_DECL_NODE
2616 static int enter_union(astnode *n, void *arg, astnode **next)
2618 symtab_entry *se;
2619 astnode *union_id = astnode_get_child(n, 0); /* Child 0 is union identifier */
2620 /* Check for anonymous union */
2621 if (astnode_is_type(union_id, NULL_NODE)) {
2622 err(n->loc, "anonymous union not allowed in global scope");
2623 } else {
2624 /* Put UNION in symbol table */
2625 assert(astnode_get_type(union_id) == IDENTIFIER_NODE);
2626 se = symtab_enter(union_id->ident, UNION_SYMBOL, n, 0);
2627 if (se == NULL) {
2628 err(n->loc, "duplicate symbol `%s'", union_id->ident);
2629 } else {
2630 /* Put the fields of the union in local symbol table */
2631 enter_union_fields(se, n);
2634 /* ### Remove UNION node from AST */
2635 // astnode_remove(n);
2636 // astnode_finalize(n);
2637 return 0;
2641 * Enters enumerated type into symbol table based on AST node.
2642 * @param n Node of type ENUM_DECL_NODE
2644 static int enter_enum(astnode *n, void *arg, astnode **next)
2646 astnode *c;
2647 astnode *id;
2648 astnode *val;
2649 symtab_entry *se;
2650 astnode *enum_id = astnode_get_child(n, 0); /* Child 0 is enum identifier */
2651 /* Enter in global symbol table */
2652 assert(astnode_get_type(enum_id) == IDENTIFIER_NODE);
2653 se = symtab_enter(enum_id->ident, ENUM_SYMBOL, n, 0);
2654 if (se == NULL) {
2655 err(n->loc, "duplicate symbol `%s'", enum_id->ident);
2656 } else {
2657 /* Add all the enum symbols to its own symbol table */
2658 se->symtab = symtab_create();
2659 val = NULL;
2660 for (c = enum_id->next_sibling; c != NULL; c = c->next_sibling) {
2661 if (astnode_is_type(c, IDENTIFIER_NODE)) {
2662 id = c;
2663 if (val == NULL) {
2664 val = astnode_create_integer(0, c->loc);
2665 } else {
2666 val = astnode_create_integer(val->integer+1, c->loc);
2668 } else {
2669 id = LHS(c);
2670 val = reduce_expression_complete(astnode_clone(RHS(c), RHS(c)->loc));
2671 if (!astnode_is_type(val, INTEGER_NODE)) {
2672 err(c->loc, "initializer does not evaluate to integer literal");
2673 astnode_finalize(val);
2674 /* Use default value */
2675 val = astnode_create_integer(0, c->loc);
2678 if (symtab_enter(id->ident, CONSTANT_SYMBOL, val, 0) == NULL) {
2679 err(c->loc, "duplicate symbol `%s' in enumeration `%s'", id->ident, enum_id->ident);
2680 continue;
2683 symtab_pop();
2685 /* ### Remove ENUM node from AST */
2686 // astnode_remove(n);
2687 // astnode_finalize(n);
2688 return 0;
2692 * Enters record type into symbol table based on AST node.
2693 * @param n Node of type RECORD_DECL_NODE
2695 static int enter_record(astnode *n, void *arg, astnode **next)
2697 ordered_field_list **plist;
2698 astnode *c;
2699 astnode *field_id;
2700 astnode *field_width;
2701 int size;
2702 int offset;
2703 symtab_entry *se;
2704 symtab_entry *fe;
2705 astnode *record_id = astnode_get_child(n, 0); /* Child 0 is record identifier */
2706 assert(astnode_get_type(record_id) == IDENTIFIER_NODE);
2707 /* Enter in global symbol table */
2708 se = symtab_enter(record_id->ident, RECORD_SYMBOL, n, 0);
2709 if (se == NULL) {
2710 err(n->loc, "duplicate symbol `%s'", record_id->ident);
2712 else {
2713 /* Add all the record fields to record's own symbol table */
2714 se->symtab = symtab_create();
2715 offset = 8;
2716 plist = &se->struc.fields;
2717 for (c = record_id->next_sibling; c != NULL; c = c->next_sibling) {
2718 /* c has two children: field identifier and its width */
2719 field_id = LHS(c);
2720 field_width = astnode_clone(reduce_expression(RHS(c)), RHS(c)->loc);
2721 /* Validate the width -- must be positive integer literal */
2722 if (!astnode_is_type(field_width, INTEGER_NODE)) {
2723 err(c->loc, "record member `%s' is not of constant size", field_id->ident);
2724 continue;
2726 if ((field_width->integer <= 0) || (field_width->integer >= 8)) {
2727 err(c->loc, "width of record member `%s' is out of range (%d)", field_id->ident, field_width->integer);
2728 continue;
2730 /* Attempt to enter field in record's symbol table */
2731 fe = symtab_enter(field_id->ident, VAR_SYMBOL, c, 0);
2732 if (fe == NULL) {
2733 err(c->loc, "duplicate symbol `%s' in record `%s'", field_id->ident, record_id->ident);
2734 continue;
2736 /* Add to ordered list of fields */
2737 (*plist) = malloc(sizeof(ordered_field_list));
2738 (*plist)->entry = fe;
2739 (*plist)->next = NULL;
2740 plist = &((*plist)->next);
2741 /* Set field offset */
2742 offset = offset - field_width->integer;
2743 fe->field.offset = astnode_create_integer(offset, c->loc);
2744 /* Set field size (width) */
2745 fe->field.size = field_width;
2747 size = 8 - offset;
2748 if (size > 8) {
2749 err(n->loc, "size of record `%s' (%d) exceeds 8 bits", record_id->ident, size);
2750 } else {
2751 /* Set size of record (in bits) */
2752 se->struc.size = astnode_create_integer(size, n->loc);
2754 symtab_pop();
2756 /* ### Remove RECORD node from AST */
2757 // astnode_remove(n);
2758 // astnode_finalize(n);
2759 return 0;
2763 * Globalizes a local.
2764 * The node is morphed into its global equivalent (LABEL_NODE or IDENTIFIER_NODE).
2765 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
2766 * @param arg Pointer to namespace counter
2768 static int globalize_local(astnode *n, void *arg, astnode **next)
2770 char str[32];
2771 /* Make it global by appending namespace counter to the id */
2772 sprintf(str, "#%d", label_count);
2773 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
2774 /* Local label definition, use label field */
2775 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
2776 strcat(n->label, str);
2777 /* This node is now a unique, global label */
2778 n->type = LABEL_NODE;
2779 /* Make sure it's unique */
2780 if (symtab_lookup(n->label)) {
2781 err(n->loc, "duplicate symbol `%s'", n->label);
2782 /* Remove from AST */
2783 astnode_remove(n);
2784 astnode_finalize(n);
2785 return 0;
2786 } else {
2787 /* Enter it in symbol table */
2788 symtab_enter(n->label, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) );
2790 } else {
2791 /* Local label reference, use ident field */
2792 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2793 strcat(n->ident, str);
2794 /* This node is now a unique, global identifier */
2795 n->type = IDENTIFIER_NODE;
2797 return 1;
2801 * Tags symbols as extrn.
2802 * @param n A node of type EXTRN_NODE
2804 static int tag_extrn_symbols(astnode *n, void *arg, astnode **next)
2806 astnode *id;
2807 astnode *type;
2808 astnode *list;
2809 symtab_entry *e;
2810 /* Get symbol type specifier */
2811 type = astnode_get_child(n, 0);
2812 /* Go through the list of identifiers */
2813 list = astnode_get_child(n, 1);
2814 for (id=astnode_get_first_child(list); id != NULL; id=astnode_get_next_sibling(id) ) {
2815 /* Look up identifier in symbol table */
2816 e = symtab_lookup(id->ident);
2817 if (e != NULL) {
2818 if (!(e->flags & EXTRN_FLAG)) {
2819 /* Error, can't import a symbol that's defined locally! */
2820 // TODO: this is okay?
2821 err(n->loc, "`%s' declared as extrn but is defined locally", id->ident);
2824 else {
2825 // TODO: store external unit name
2826 switch (astnode_get_type(type)) {
2827 case DATATYPE_NODE:
2828 /* Put it in symbol table */
2829 symtab_enter(id->ident, VAR_SYMBOL, astnode_create_data(astnode_clone(type, n->loc), NULL, n->loc), EXTRN_FLAG);
2830 break;
2832 case INTEGER_NODE:
2833 /* type->integer is (LABEL|PROC)_SYMBOL */
2834 symtab_enter(id->ident, type->integer, NULL, EXTRN_FLAG);
2835 break;
2837 default:
2838 break;
2842 /* Remove extrn node from AST */
2843 astnode_remove(n);
2844 astnode_finalize(n);
2846 return 0;
2852 static int process_message(astnode *n, void *arg, astnode **next)
2854 astnode *mesg = reduce_expression_complete(LHS(n));
2855 if (astnode_is_type(mesg, STRING_NODE)) {
2856 printf("%s\n", mesg->string);
2858 else if (astnode_is_type(mesg, INTEGER_NODE)) {
2859 printf("%d\n", mesg->integer);
2861 else {
2862 err(mesg->loc, "string or integer argument expected");
2864 astnode_remove(n);
2865 astnode_finalize(n);
2866 return 0;
2872 static int process_warning(astnode *n, void *arg, astnode **next)
2874 astnode *mesg = reduce_expression_complete(LHS(n));
2875 if (astnode_is_type(mesg, STRING_NODE)) {
2876 warn(mesg->loc, mesg->string);
2878 else {
2879 err(mesg->loc, "string argument expected");
2881 astnode_remove(n);
2882 astnode_finalize(n);
2883 return 0;
2889 static int process_error(astnode *n, void *arg, astnode **next)
2891 astnode *mesg = reduce_expression_complete(LHS(n));
2892 if (astnode_is_type(mesg, STRING_NODE)) {
2893 err(mesg->loc, mesg->string);
2895 else {
2896 err(mesg->loc, "string argument expected");
2898 astnode_remove(n);
2899 astnode_finalize(n);
2900 return 0;
2904 * Processes a forward branch declaration.
2905 * @param n Node of type FORWARD_BRANCH_DECL_NODE
2906 * @param arg Not used
2908 static int process_forward_branch_decl(astnode *n, void *arg, astnode **next)
2910 astnode *l;
2911 int i;
2912 char str[32];
2913 /* Get branch info structure for label (+, ++, ...) */
2914 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2915 /* Morph n to globally unique label */
2916 sprintf(str, "#%d", fwd->counter);
2917 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2918 strcat(n->label, str);
2919 n->type = LABEL_NODE;
2920 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2921 /* Fix reference identifiers */
2922 for (i=0; i<fwd->index; i++) {
2923 l = fwd->refs[i];
2924 l->ident = (char *)realloc(l->ident, strlen(n->ident)+1);
2925 strcpy(l->ident, n->ident);
2927 /* Prepare for next declaration */
2928 fwd->index = 0;
2929 fwd->counter++;
2930 return 0;
2934 * Processes a backward branch declaration.
2935 * @param n Node of type BACKWARD_BRANCH_DECL_NODE
2936 * @param arg Not used
2938 static int process_backward_branch_decl(astnode *n, void *arg, astnode **next)
2940 char str[32];
2941 /* Get branch info */
2942 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2943 bwd->decl = n;
2944 /* Morph n to globally unique label */
2945 sprintf(str, "#%d", bwd->counter);
2946 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2947 strcat(n->label, str);
2948 n->type = LABEL_NODE;
2949 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2950 /* Prepare for next declaration */
2951 bwd->counter++;
2952 return 0;
2956 * Processes a forward branch label reference.
2957 * @param n Node of type FORWARD_BRANCH_NODE
2958 * @param arg Not used
2960 static int process_forward_branch(astnode *n, void *arg, astnode **next)
2962 /* Add n to proper forward_branch array */
2963 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2964 fwd->refs[fwd->index++] = n;
2965 /* Change to identifier node */
2966 n->type = IDENTIFIER_NODE;
2967 return 0;
2971 * Processes a backward branch label reference.
2972 * @param n Node of type BACKWARD_BRANCH_NODE
2973 * @param arg Not used
2975 static int process_backward_branch(astnode *n, void *arg, astnode **next)
2977 /* Get branch info */
2978 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2979 /* Make sure it's a valid reference */
2980 if (bwd->decl != NULL) {
2981 /* Fix n->ident */
2982 n->ident = (char *)realloc(n->ident, strlen(bwd->decl->ident)+1);
2983 strcpy(n->ident, bwd->decl->ident);
2985 /* Change to identifier node */
2986 n->type = IDENTIFIER_NODE;
2987 return 0;
2990 /*---------------------------------------------------------------------------*/
2992 static int is_field_ref(astnode *n)
2994 astnode *p = astnode_get_parent(n);
2995 /* Case 1: id.id */
2996 if (astnode_is_type(p, DOT_NODE)) return 1;
2997 /* Case 2: id.id[expr] */
2998 if (astnode_is_type(p, INDEX_NODE) && (n == LHS(p)) && astnode_is_type(astnode_get_parent(p), DOT_NODE) ) return 1;
2999 return 0;
3003 * Checks that the given identifier node is present in symbol table.
3004 * Issues error if it is not, and replaces with integer 0.
3005 * @param n A node of type IDENTIFIER_NODE
3007 static int validate_ref(astnode *n, void *arg, astnode **next)
3009 int i;
3010 symbol_ident_list list;
3011 symtab_entry *enum_def;
3012 int ret = 1;
3013 if (is_field_ref(n)) {
3014 return 1; /* Validated by validate_dotref() */
3016 /* Look it up in symbol table */
3017 symtab_entry * e = symtab_lookup(n->ident);
3018 if (e == NULL) {
3019 /* This identifier is unknown */
3020 /* Maybe it is part of an enumeration */
3021 symtab_list_type(ENUM_SYMBOL, &list);
3022 for (i=0; i<list.size; i++) {
3023 enum_def = symtab_lookup(list.idents[i]);
3024 symtab_push(enum_def->symtab);
3025 e = symtab_lookup(n->ident);
3026 symtab_pop();
3027 if (e != NULL) {
3028 /* Found it */
3029 /* Replace id by SCOPE_NODE */
3030 astnode *scope = astnode_create_scope(
3031 astnode_create_identifier(enum_def->id, n->loc),
3032 astnode_clone(n, n->loc), n->loc);
3033 astnode_replace(n, scope);
3034 astnode_finalize(n);
3035 *next = scope;
3036 ret = 0;
3037 break;
3040 symtab_list_finalize(&list);
3041 /* If still not found, error */
3042 if (e == NULL) {
3043 strtok(n->ident, "#"); /* Remove globalize junk */
3044 // err(n->loc, "unknown symbol `%s'", n->ident);
3045 /* ### Replace by integer 0 */
3046 //astnode_replace(n, astnode_create_integer(0, n->loc) );
3047 //astnode_finalize(n);
3048 warn(n->loc, "`%s' undeclared; assuming external label", n->ident);
3049 e = symtab_enter(n->ident, LABEL_SYMBOL, NULL, EXTRN_FLAG);
3052 assert(e);
3053 /* Increase reference count */
3054 e->ref_count++;
3055 return ret;
3059 * Validates top-level (not part of structure) indexed identifier.
3060 * @param n Node of type INDEX_NODE
3061 * @param arg Not used
3063 static int validate_index(astnode *n, void *arg, astnode **next)
3065 symtab_entry *e;
3066 astnode *id;
3067 astnode *type;
3068 if (is_field_ref(LHS(n))) {
3069 return 1; /* Validated by validate_dotref() */
3071 id = LHS(n);
3072 if (!astnode_is_type(id, IDENTIFIER_NODE)) {
3073 err(n->loc, "identifier expected");
3074 astnode_replace(n, astnode_create_integer(0, n->loc) );
3075 astnode_finalize(n);
3076 return 0;
3078 e = symtab_lookup(id->ident);
3079 if (e != NULL) {
3080 type = LHS(e->def);
3081 if (!astnode_is_type(type, DATATYPE_NODE)) {
3082 err(n->loc, "`%s' cannot be indexed", id->ident);
3083 astnode_replace(n, astnode_create_integer(0, n->loc) );
3084 astnode_finalize(n);
3085 return 0;
3086 } else {
3087 // TODO: bounds check
3088 reduce_index(n);
3090 } else {
3091 err(n->loc, "unknown symbol `%s'", id->ident);
3092 astnode_replace(n, astnode_create_integer(0, n->loc) );
3093 astnode_finalize(n);
3094 return 0;
3096 return 1;
3100 * Checks that A::B is valid.
3101 * If it's not valid it is replaced by integer 0.
3102 * @param n Node of type SCOPE_NODE
3104 static int validate_scoperef(astnode *n, void *arg, astnode **next)
3106 astnode *symbol;
3107 astnode *namespace = LHS(n);
3108 /* Look up namespace in global symbol table */
3109 symtab_entry * e = symtab_lookup(namespace->ident);
3110 if (e == NULL) {
3111 /* Error, this identifier is unknown */
3112 err(n->loc, "unknown namespace `%s'", namespace->ident);
3113 /* Replace by integer 0 */
3114 astnode_replace(n, astnode_create_integer(0, n->loc) );
3115 astnode_finalize(n);
3116 return 0;
3117 } else {
3118 /* Get symbol on right of :: operator */
3119 symbol = RHS(n);
3120 /* Namespace was found, check its type */
3121 switch (e->type) {
3122 case STRUC_SYMBOL:
3123 case UNION_SYMBOL:
3124 case RECORD_SYMBOL:
3125 case ENUM_SYMBOL:
3126 /* OK, check the symbol */
3127 symtab_push(e->symtab);
3128 e = symtab_lookup(symbol->ident);
3129 if (e == NULL) {
3130 /* Error, symbol is not in namespace */
3131 err(n->loc, "unknown symbol `%s' in namespace `%s'", symbol->ident, namespace->ident);
3132 /* Replace by integer 0 */
3133 astnode_replace(n, astnode_create_integer(0, n->loc) );
3134 astnode_finalize(n);
3136 symtab_pop();
3137 break;
3139 default:
3140 err(n->loc, "`%s' is not a namespace", namespace->ident);
3141 /* Replace by integer 0 */
3142 astnode_replace(n, astnode_create_integer(0, n->loc) );
3143 astnode_finalize(n);
3144 break;
3147 return 0;
3151 * Validates right part of dotted reference recursively.
3152 * Assumes that left part's symbol table is on stack.
3153 * @param n Node of type DOT_NODE
3155 static void validate_dotref_recursive(astnode *n, astnode *top)
3157 astnode *left;
3158 astnode *right;
3159 astnode *type;
3160 symtab_entry *field;
3161 symtab_entry *def;
3162 left = LHS(n);
3163 if (astnode_is_type(left, INDEX_NODE)) {
3164 left = LHS(left); /* Need identifier */
3166 right = RHS(n);
3167 if (astnode_is_type(right, DOT_NODE)) {
3168 right = LHS(right); /* Need identifier */
3170 if (astnode_is_type(right, INDEX_NODE)) {
3171 right = LHS(right); /* Need identifier */
3173 /* Lookup 'right' in 'left's symbol table */
3174 assert(astnode_get_type(right) == IDENTIFIER_NODE);
3175 field = symtab_lookup(right->ident);
3176 if (field == NULL) {
3177 /* Error, this symbol is unknown */
3178 err(n->loc, "`%s' is not a member of `%s'", right->ident, left->ident);
3179 /* Replace by integer 0 */
3180 astnode_replace(top, astnode_create_integer(0, top->loc) );
3181 astnode_finalize(top);
3182 } else {
3183 /* See if more subfields to process */
3184 n = RHS(n);
3185 if (astnode_is_type(n, DOT_NODE)) {
3186 /* Verify the variable's type -- should be user-defined */
3187 type = LHS(field->def);
3188 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3189 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3190 /* Replace by integer 0 */
3191 astnode_replace(top, astnode_create_integer(0, top->loc) );
3192 astnode_finalize(top);
3193 } else {
3194 /* Look up variable's type definition and verify it's a structure */
3195 def = symtab_global_lookup(LHS(type)->ident);
3196 if (def == NULL) {
3197 err(n->loc, "member '%s' of '%s' is of unknown type (`%s')", right->ident, left->ident, LHS(type)->ident);
3198 /* Replace by integer 0 */
3199 astnode_replace(top, astnode_create_integer(0, top->loc) );
3200 astnode_finalize(top);
3201 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3202 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3203 /* Replace by integer 0 */
3204 astnode_replace(top, astnode_create_integer(0, top->loc) );
3205 astnode_finalize(top);
3206 } else {
3207 /* Next field */
3208 symtab_push(def->symtab);
3209 validate_dotref_recursive(n, top);
3210 symtab_pop();
3218 * Validates A.B.C.D. . ...
3219 * Replaces the whole thing with integer 0 if not.
3220 * @param n Node of type DOT_NODE
3222 static int validate_dotref(astnode *n, void *arg, astnode **next)
3224 symtab_entry *father;
3225 symtab_entry *def;
3226 astnode *type;
3227 astnode *left;
3228 if (astnode_has_ancestor_of_type(n, DOT_NODE)) {
3229 return 1; /* Already validated, since this function is recursive */
3231 /* Look up parent in global symbol table */
3232 left = LHS(n); /* n := left . right */
3233 if (astnode_is_type(left, INDEX_NODE)) {
3234 left = LHS(left); /* Need identifier */
3236 father = symtab_lookup(left->ident);
3237 if (father == NULL) {
3238 /* Error, this symbol is unknown */
3239 err(n->loc, "unknown symbol `%s'", left->ident);
3240 /* Replace by integer 0 */
3241 astnode_replace(n, astnode_create_integer(0, n->loc) );
3242 astnode_finalize(n);
3243 return 0;
3244 } else {
3245 /* Increase reference count */
3246 father->ref_count++;
3247 /* Verify the variable's type -- should be user-defined */
3248 type = LHS(father->def);
3249 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3250 err(n->loc, "`%s' is not a structure", left->ident);
3251 /* Replace by integer 0 */
3252 astnode_replace(n, astnode_create_integer(0, n->loc) );
3253 astnode_finalize(n);
3254 return 0;
3255 } else {
3256 /* Look up variable's type definition and verify it's a structure */
3257 def = symtab_lookup(LHS(type)->ident);
3258 if (def == NULL) {
3259 err(n->loc, "'%s' is of unknown type (`%s')", left->ident, LHS(type)->ident);
3260 /* Replace by integer 0 */
3261 astnode_replace(n, astnode_create_integer(0, n->loc) );
3262 astnode_finalize(n);
3263 return 0;
3264 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3265 err(n->loc, "`%s' is not a structure", left->ident);
3266 /* Replace by integer 0 */
3267 astnode_replace(n, astnode_create_integer(0, n->loc) );
3268 astnode_finalize(n);
3269 return 0;
3270 } else {
3271 /* Verify fields recursively */
3272 symtab_push(def->symtab);
3273 validate_dotref_recursive(n, n);
3274 symtab_pop();
3278 return 1;
3281 /*---------------------------------------------------------------------------*/
3284 * Evaluates expressions involved in conditional assembly, and removes the
3285 * appropriate branches from the AST.
3286 * Does some other stuff too, such as substitute equates and fold constants.
3288 void astproc_first_pass(astnode *root)
3290 /* Table of callback functions for our purpose. */
3291 static astnodeprocmap map[] = {
3292 { LABEL_NODE, enter_label },
3293 { VAR_DECL_NODE, enter_var },
3294 { PROC_NODE, enter_proc },
3295 { STRUC_DECL_NODE, enter_struc },
3296 { UNION_DECL_NODE, enter_union },
3297 { ENUM_DECL_NODE, enter_enum },
3298 { RECORD_DECL_NODE, enter_record },
3299 { LOCAL_LABEL_NODE, globalize_local },
3300 { LOCAL_ID_NODE, globalize_local },
3301 { MACRO_DECL_NODE, enter_macro },
3302 { MACRO_NODE, expand_macro },
3303 { REPT_NODE, process_rept },
3304 { WHILE_NODE, process_while },
3305 { DATASEG_NODE, process_dataseg },
3306 { CODESEG_NODE, process_codeseg },
3307 { ORG_NODE, process_org },
3308 { CHARMAP_NODE, load_charmap },
3309 { INSTRUCTION_NODE, process_instruction },
3310 { DATA_NODE, process_data },
3311 { STORAGE_NODE, process_storage },
3312 { EQU_NODE, process_equ },
3313 { ASSIGN_NODE, process_assign },
3314 { IFDEF_NODE, process_ifdef },
3315 { IFNDEF_NODE, process_ifndef },
3316 { IF_NODE, process_if },
3317 { EXTRN_NODE, tag_extrn_symbols },
3318 { MESSAGE_NODE, process_message },
3319 { WARNING_NODE, process_warning },
3320 { ERROR_NODE, process_error },
3321 { FORWARD_BRANCH_DECL_NODE, process_forward_branch_decl },
3322 { BACKWARD_BRANCH_DECL_NODE, process_backward_branch_decl },
3323 { FORWARD_BRANCH_NODE, process_forward_branch },
3324 { BACKWARD_BRANCH_NODE, process_backward_branch },
3325 { 0, NULL }
3327 reset_charmap();
3328 branch_init();
3329 in_dataseg = 0; /* codeseg is default */
3330 /* Do the walk. */
3331 astproc_walk(root, NULL, map);
3332 /* Remove all the volatile constants from the symbol table */
3333 /* These are the ones defined with the '=' operator, whose identifiers should
3334 all have been replaced by their value in the syntax tree now. Since
3335 they're not referenced anywhere we can safely dispose of them.
3336 The EQUates on the other hand should be kept, since they will
3337 possibly be exported. */
3338 #ifdef ENABLE_BUGGY_THING // ### FIXME
3340 int i;
3341 symbol_ident_list list;
3342 symtab_entry *e;
3343 symtab_list_type(CONSTANT_SYMBOL, &list);
3344 for (i = 0; i < list.size; ++i) {
3345 e = symtab_lookup(list.idents[i]);
3346 if (e->flags & VOLATILE_FLAG) {
3347 symtab_remove(list.idents[i]);
3350 symtab_list_finalize(&list);
3352 #endif
3355 /*---------------------------------------------------------------------------*/
3358 * Tags labels as public.
3359 * @param n A node of type PUBLIC_NODE
3361 static int tag_public_symbols(astnode *n, void *arg, astnode **next)
3363 astnode *id;
3364 symtab_entry *e;
3365 /* Go through the list of identifiers */
3366 for (id=astnode_get_first_child(n); id != NULL; id = astnode_get_next_sibling(id) ) {
3367 /* Look up identifier in symbol table */
3368 e = symtab_lookup(id->ident);
3369 if (e != NULL) {
3370 /* Symbol exists. Set the proper flag unless ambiguous. */
3371 if (e->flags & EXTRN_FLAG) {
3372 err(n->loc, "`%s' already declared extrn", id->ident);
3373 } else {
3374 switch (e->type) {
3375 case LABEL_SYMBOL:
3376 case CONSTANT_SYMBOL:
3377 case VAR_SYMBOL:
3378 case PROC_SYMBOL:
3379 /* GO! */
3380 e->flags |= PUBLIC_FLAG;
3381 break;
3383 default:
3384 err(n->loc, "`%s' is of non-exportable type", id->ident);
3385 break;
3388 } else {
3389 /* Warning, can't export a symbol that's not defined. */
3390 warn(n->loc, "`%s' declared as public but is not defined", id->ident);
3393 /* Remove PUBLIC_NODE from AST */
3394 astnode_remove(n);
3395 astnode_finalize(n);
3397 return 0;
3401 * Sets alignment for a set of (data) labels.
3402 * @param n A node of type ALIGN_NODE
3404 static int tag_align_symbols(astnode *n, void *arg, astnode **next)
3406 int pow;
3407 astnode *id;
3408 astnode *idents;
3409 astnode *expr;
3410 symtab_entry *e;
3411 /* Go through the list of identifiers */
3412 idents = LHS(n);
3413 for (id=astnode_get_first_child(idents); id != NULL; id = astnode_get_next_sibling(id) ) {
3414 /* Look up identifier in symbol table */
3415 e = symtab_lookup(id->ident);
3416 if (e != NULL) {
3417 /* Symbol exists. Set the proper flag unless ambiguous. */
3418 if (!(e->flags & DATA_FLAG)) {
3419 err(n->loc, "cannot align a code symbol (`%s')", id->ident);
3420 } else {
3421 switch (e->type) {
3422 case LABEL_SYMBOL:
3423 case VAR_SYMBOL:
3424 expr = reduce_expression(RHS(n));
3425 if (!astnode_is_type(expr, INTEGER_NODE)) {
3426 err(n->loc, "alignment expression must be an integer literal");
3427 } else if ((expr->integer < 0) || (expr->integer >= 0x10000)) {
3428 err(n->loc, "alignment expression out of range");
3429 } else if (expr->integer > 1) {
3430 pow = 0;
3431 switch (expr->integer) {
3432 case 32768: pow++;
3433 case 16384: pow++;
3434 case 8192: pow++;
3435 case 4096: pow++;
3436 case 2048: pow++;
3437 case 1024: pow++;
3438 case 512: pow++;
3439 case 256: pow++;
3440 case 128: pow++;
3441 case 64: pow++;
3442 case 32: pow++;
3443 case 16: pow++;
3444 case 8: pow++;
3445 case 4: pow++;
3446 case 2: pow++;
3447 /* GO! */
3448 e->flags |= ALIGN_FLAG;
3449 e->align = pow;
3450 break;
3452 default:
3453 err(n->loc, "alignment expression must be a power of 2");
3454 break;
3457 break;
3459 default:
3460 err(n->loc, "`%s' cannot be aligned", id->ident);
3461 break;
3465 else {
3466 /* Warning, can't align a symbol that's not defined. */
3467 warn(n->loc, "alignment ignored for undefined symbol `%s'", id->ident);
3470 /* Remove ALIGN_NODE from AST */
3471 astnode_remove(n);
3472 astnode_finalize(n);
3474 return 0;
3477 /*---------------------------------------------------------------------------*/
3480 * Removes unused labels from a syntax tree (and symbol table).
3481 * Unused labels are labels that are defined but not referenced anywhere.
3482 * This function assumes that the reference counts have already been calculated.
3484 void remove_unused_labels()
3486 int i;
3487 char *id;
3488 astnode *n;
3489 symbol_ident_list list;
3490 symtab_list_type(LABEL_SYMBOL, &list);
3491 for (i=0; i<list.size; i++) {
3492 /* Look up label in symbol table */
3493 id = list.idents[i];
3494 symtab_entry * e = symtab_lookup(id);
3495 /* If reference count is zero, AND label isn't declared public, remove it. */
3496 if ((e->ref_count == 0) && ((e->flags & PUBLIC_FLAG) == 0)) {
3497 n = e->def;
3498 strtok(n->label, "#"); /* Remove globalize junk */
3499 warn(n->loc, "`%s' defined but not used", n->label);
3500 /* Remove label from AST */
3501 astnode_remove(n);
3502 astnode_finalize(n);
3503 //symtab_remove(n->label); ### FIXME leads to crash sometimes...
3506 symtab_list_finalize(&list);
3510 * If the storage is of user-defined type, replaces it with
3511 * .DSB sizeof(type) * count
3513 static int reduce_user_storage(astnode *n, void *arg, astnode **next)
3515 astnode *type;
3516 astnode *count;
3517 astnode *byte_storage;
3518 symtab_entry *e;
3519 type = LHS(n);
3520 if (type->datatype == USER_DATATYPE) {
3521 /* Look it up */
3522 e = symtab_lookup(LHS(type)->ident);
3523 if (e != NULL) {
3524 /* Replace by DSB */
3525 count = RHS(n);
3526 byte_storage = astnode_create_storage(
3527 astnode_create_datatype(BYTE_DATATYPE, NULL, type->loc),
3528 astnode_create_arithmetic(
3529 MUL_OPERATOR,
3530 astnode_create_sizeof(
3531 astnode_create_identifier(LHS(type)->ident, n->loc),
3532 n->loc
3534 astnode_clone(count, n->loc),
3535 n->loc
3537 n->loc
3539 astnode_replace(n, byte_storage);
3540 astnode_finalize(n);
3541 *next = byte_storage;
3542 return 0;
3543 } else {
3544 err(n->loc, "unknown symbol `%s'", LHS(type)->ident);
3545 /* Remove from AST */
3546 astnode_remove(n);
3547 astnode_finalize(n);
3548 return 0;
3551 return 1;
3555 * Second major pass over AST.
3557 void astproc_second_pass(astnode *root)
3559 /* Table of callback functions for our purpose. */
3560 static astnodeprocmap map[] = {
3561 { IDENTIFIER_NODE, validate_ref },
3562 { SCOPE_NODE, validate_scoperef },
3563 { DOT_NODE, validate_dotref },
3564 { INDEX_NODE, validate_index },
3565 { PUBLIC_NODE, tag_public_symbols },
3566 { STORAGE_NODE, reduce_user_storage },
3567 { ALIGN_NODE, tag_align_symbols },
3568 { STRUC_DECL_NODE, noop },
3569 { UNION_DECL_NODE, noop },
3570 { ENUM_DECL_NODE, noop },
3571 { RECORD_DECL_NODE, noop },
3572 { 0, NULL }
3574 in_dataseg = 0; /* codeseg is default */
3575 /* Do the walk. */
3576 astproc_walk(root, NULL, map);
3577 /* */
3578 remove_unused_labels();
3581 /*---------------------------------------------------------------------------*/
3584 * Translates a single instruction.
3585 * @param n A node of type INSTRUCTION_NODE
3587 static int translate_instruction(astnode *n, void *arg, astnode **next)
3589 unsigned char c;
3590 /* Put the operand in final form */
3591 astnode *o = reduce_expression_complete( LHS(n) );
3592 assert(o == LHS(n));
3593 /* Convert (mnemonic, addressing mode) pair to opcode */
3594 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3595 /* Test if opcode is invalid */
3596 if (n->instr.opcode == 0xFF) {
3597 /* Check for the special cases */
3598 if ((n->instr.mnemonic == STX_MNEMONIC) && (n->instr.mode == ABSOLUTE_Y_MODE)) {
3599 /* Doesn't have absolute version, "scale down" to zeropage */
3600 n->instr.mode = ZEROPAGE_Y_MODE;
3601 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3602 } else if ((n->instr.mnemonic == STY_MNEMONIC) && (n->instr.mode == ABSOLUTE_X_MODE)) {
3603 /* Doesn't have absolute version, "scale down" to zeropage */
3604 n->instr.mode = ZEROPAGE_X_MODE;
3605 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3606 } else if (n->instr.mode == ABSOLUTE_MODE) {
3607 /* Check for relative addressing (these are parsed as absolute mode) */
3608 switch (n->instr.mnemonic) {
3609 case BCC_MNEMONIC:
3610 case BCS_MNEMONIC:
3611 case BEQ_MNEMONIC:
3612 case BMI_MNEMONIC:
3613 case BNE_MNEMONIC:
3614 case BPL_MNEMONIC:
3615 case BVC_MNEMONIC:
3616 case BVS_MNEMONIC:
3617 /* Fix addressing mode and opcode */
3618 n->instr.mode = RELATIVE_MODE;
3619 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3620 break;
3624 if (n->instr.opcode != 0xFF) {
3625 /* If the operand is a constant, see if we can "reduce" from
3626 absolute mode to zeropage mode */
3627 if ((astnode_is_type(o, INTEGER_NODE)) &&
3628 ((unsigned long)o->integer < 256) &&
3629 ((c = opcode_zp_equiv(n->instr.opcode)) != 0xFF)) {
3630 /* Switch to the zeromode version */
3631 n->instr.opcode = c;
3632 switch (n->instr.mode) {
3633 case ABSOLUTE_MODE: n->instr.mode = ZEROPAGE_MODE; break;
3634 case ABSOLUTE_X_MODE: n->instr.mode = ZEROPAGE_X_MODE;break;
3635 case ABSOLUTE_Y_MODE: n->instr.mode = ZEROPAGE_Y_MODE;break;
3636 default: /* Impossible to get here, right? */ break;
3639 /* If the operand is a constant, make sure it fits */
3640 if (astnode_is_type(o, INTEGER_NODE)) {
3641 switch (n->instr.mode) {
3642 case IMMEDIATE_MODE:
3643 case ZEROPAGE_MODE:
3644 case ZEROPAGE_X_MODE:
3645 case ZEROPAGE_Y_MODE:
3646 case PREINDEXED_INDIRECT_MODE:
3647 case POSTINDEXED_INDIRECT_MODE:
3648 /* Operand must fit in 8 bits */
3649 if (!IS_BYTE_VALUE(o->integer)) {
3650 warn(o->loc, "operand out of range; truncated");
3651 o->integer &= 0xFF;
3653 break;
3655 case ABSOLUTE_MODE:
3656 case ABSOLUTE_X_MODE:
3657 case ABSOLUTE_Y_MODE:
3658 case INDIRECT_MODE:
3659 /* Operand must fit in 8 bits */
3660 if ((unsigned long)o->integer >= 0x10000) {
3661 warn(o->loc, "operand out of range; truncated");
3662 o->integer &= 0xFFFF;
3664 break;
3666 case RELATIVE_MODE:
3667 /* Constant isn't allowed here is it? */
3668 break;
3670 default:
3671 break;
3674 else if (astnode_is_type(o, STRING_NODE)) {
3675 /* String operand doesn't make sense here */
3676 err(n->loc, "invalid operand");
3678 } else {
3679 /* opcode_get() returned 0xFF */
3680 err(n->loc, "invalid addressing mode");
3682 return 0;
3686 * ### Is this really such a good idea?
3688 static int maybe_merge_data(astnode *n, void *arg, astnode **next)
3690 astnode *temp;
3691 astnode *type;
3692 type = LHS(n);
3693 /* Only merge if no debugging, otherwise line information is lost. */
3694 if (!xasm_args.debug && astnode_is_type(*next, DATA_NODE) &&
3695 astnode_equal(type, LHS(*next)) ) {
3696 /* Merge ahead */
3697 temp = *next;
3698 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3699 astnode_add_child(n, astnode_remove_children(temp) );
3700 astnode_finalize(temp);
3701 *next = n;
3702 } else {
3703 /* Reduce expressions to final form */
3704 for (n = n->first_child; n != NULL; n = temp->next_sibling) {
3705 temp = reduce_expression_complete(n);
3706 if (astnode_is_type(temp, INTEGER_NODE)) {
3707 /* Check that value fits according to datatype */
3708 switch (type->datatype) {
3709 case BYTE_DATATYPE:
3710 if (!IS_BYTE_VALUE(temp->integer)) {
3711 warn(temp->loc, "operand out of range; truncated");
3712 temp->integer &= 0xFF;
3714 break;
3716 case WORD_DATATYPE:
3717 if (!IS_WORD_VALUE(temp->integer)) {
3718 warn(temp->loc, "operand out of range; truncated");
3719 temp->integer &= 0xFFFF;
3721 break;
3723 case DWORD_DATATYPE:
3724 break;
3726 default:
3727 break;
3732 return 0;
3738 static int maybe_merge_storage(astnode *n, void *arg, astnode **next)
3740 astnode *temp;
3741 astnode *new_count;
3742 astnode *old_count;
3743 if (astnode_is_type(*next, STORAGE_NODE) &&
3744 astnode_equal(LHS(n), LHS(*next)) ) {
3745 /* Merge ahead */
3746 temp = *next;
3747 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3748 old_count = RHS(n);
3749 /* Calculate new count */
3750 new_count = astnode_create_arithmetic(
3751 PLUS_OPERATOR,
3752 astnode_remove_child_at(temp, 0),
3753 astnode_clone(old_count, n->loc),
3754 n->loc
3756 new_count = reduce_expression_complete(new_count);
3757 astnode_replace(old_count, new_count);
3758 astnode_finalize(old_count);
3759 astnode_finalize(temp);
3760 *next = n;
3761 } else {
3762 reduce_expression_complete(RHS(n));
3764 return 0;
3768 * Replaces .proc by its label followed by statements.
3770 static int flatten_proc(astnode *n, void *arg, astnode **next)
3772 astnode *id = LHS(n);
3773 astnode *list = RHS(n);
3774 astnode_remove(id);
3775 id->type = LABEL_NODE;
3776 astnode_insert_child(list, id, 0);
3777 astnode *stmts = astnode_remove_children(list);
3778 astnode_replace(n, stmts);
3779 astnode_finalize(n);
3780 *next = stmts;
3781 return 0;
3787 static int flatten_var_decl(astnode *n, void *arg, astnode **next)
3789 astnode *stmts = LHS(n);
3790 astnode_remove_children(n);
3791 stmts->type = LABEL_NODE;
3792 astnode_replace(n, stmts);
3793 astnode_finalize(n);
3794 *next = stmts;
3795 return 0;
3799 * Third and final pass (if the output isn't pure 6502).
3800 * Translates instructions, merges data and storage nodes,
3801 * and reduces their operands to final form on the way.
3803 void astproc_third_pass(astnode *root)
3805 /* Table of callback functions for our purpose. */
3806 static astnodeprocmap map[] = {
3807 { INSTRUCTION_NODE, translate_instruction },
3808 { DATA_NODE, maybe_merge_data },
3809 { STORAGE_NODE, maybe_merge_storage },
3810 { VAR_DECL_NODE, flatten_var_decl },
3811 { PROC_NODE, flatten_proc },
3812 { STRUC_DECL_NODE, noop },
3813 { UNION_DECL_NODE, noop },
3814 { ENUM_DECL_NODE, noop },
3815 { RECORD_DECL_NODE, noop },
3816 { 0, NULL }
3818 in_dataseg = 0; /* codeseg is default */
3819 /* Do the walk. */
3820 astproc_walk(root, NULL, map);
3823 /*---------------------------------------------------------------------------*/
3826 * Evaluates the given expression, _without_ replacing it in the AST
3827 * (unlike astproc_reduce_expression() and friends).
3829 static astnode *eval_expression(astnode *expr)
3831 switch (astnode_get_type(expr)) {
3833 case ARITHMETIC_NODE: {
3834 astnode *lhs = eval_expression(LHS(expr));
3835 astnode *rhs = eval_expression(RHS(expr));
3836 switch (expr->oper) {
3837 /* Binary ops */
3838 case PLUS_OPERATOR:
3839 case MINUS_OPERATOR:
3840 case MUL_OPERATOR:
3841 case DIV_OPERATOR:
3842 case MOD_OPERATOR:
3843 case AND_OPERATOR:
3844 case OR_OPERATOR:
3845 case XOR_OPERATOR:
3846 case SHL_OPERATOR:
3847 case SHR_OPERATOR:
3848 case LT_OPERATOR:
3849 case GT_OPERATOR:
3850 case EQ_OPERATOR:
3851 case NE_OPERATOR:
3852 case LE_OPERATOR:
3853 case GE_OPERATOR:
3854 if (astnode_is_type(lhs, INTEGER_NODE)
3855 && astnode_is_type(rhs, INTEGER_NODE)) {
3856 /* Both sides are integer literals. */
3857 switch (expr->oper) {
3858 case PLUS_OPERATOR: return astnode_create_integer(lhs->integer + rhs->integer, expr->loc);
3859 case MINUS_OPERATOR: return astnode_create_integer(lhs->integer - rhs->integer, expr->loc);
3860 case MUL_OPERATOR: return astnode_create_integer(lhs->integer * rhs->integer, expr->loc);
3861 case DIV_OPERATOR: return astnode_create_integer(lhs->integer / rhs->integer, expr->loc);
3862 case MOD_OPERATOR: return astnode_create_integer(lhs->integer % rhs->integer, expr->loc);
3863 case AND_OPERATOR: return astnode_create_integer(lhs->integer & rhs->integer, expr->loc);
3864 case OR_OPERATOR: return astnode_create_integer(lhs->integer | rhs->integer, expr->loc);
3865 case XOR_OPERATOR: return astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc);
3866 case SHL_OPERATOR: return astnode_create_integer(lhs->integer << rhs->integer, expr->loc);
3867 case SHR_OPERATOR: return astnode_create_integer(lhs->integer >> rhs->integer, expr->loc);
3868 case LT_OPERATOR: return astnode_create_integer(lhs->integer < rhs->integer, expr->loc);
3869 case GT_OPERATOR: return astnode_create_integer(lhs->integer > rhs->integer, expr->loc);
3870 case EQ_OPERATOR: return astnode_create_integer(lhs->integer == rhs->integer, expr->loc);
3871 case NE_OPERATOR: return astnode_create_integer(lhs->integer != rhs->integer, expr->loc);
3872 case LE_OPERATOR: return astnode_create_integer(lhs->integer <= rhs->integer, expr->loc);
3873 case GE_OPERATOR: return astnode_create_integer(lhs->integer >= rhs->integer, expr->loc);
3875 default: /* ### Error, actually */
3876 break;
3879 /* Use some mathematical identities... */
3880 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
3881 && (expr->oper == PLUS_OPERATOR)) {
3882 /* 0+expr == expr */
3883 return astnode_clone(rhs, rhs->loc);
3884 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
3885 && (expr->oper == PLUS_OPERATOR)) {
3886 /* expr+0 == expr */
3887 return astnode_clone(lhs, lhs->loc);
3888 } else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
3889 && (expr->oper == MUL_OPERATOR)) {
3890 /* 1*expr == expr */
3891 return astnode_clone(rhs, rhs->loc);
3892 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
3893 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
3894 /* expr*1 == expr */
3895 /* expr/1 == expr */
3896 return astnode_clone(lhs, lhs->loc);
3898 break;
3900 /* Unary ops */
3901 case NEG_OPERATOR:
3902 case NOT_OPERATOR:
3903 case LO_OPERATOR:
3904 case HI_OPERATOR:
3905 case UMINUS_OPERATOR:
3906 case BANK_OPERATOR:
3907 if (astnode_is_type(lhs, INTEGER_NODE)) {
3908 switch (expr->oper) {
3909 case NEG_OPERATOR: return astnode_create_integer(~lhs->integer, expr->loc);
3910 case NOT_OPERATOR: return astnode_create_integer(!lhs->integer, expr->loc);
3911 case LO_OPERATOR: return astnode_create_integer(lhs->integer & 0xFF, expr->loc);
3912 case HI_OPERATOR: return astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc);
3913 case UMINUS_OPERATOR: return astnode_create_integer(-lhs->integer, expr->loc);
3914 default: break;
3917 break;
3918 } /* switch */
3919 } break;
3921 case INTEGER_NODE:
3922 return astnode_clone(expr, expr->loc);
3924 case IDENTIFIER_NODE: {
3925 symtab_entry *e = symtab_lookup(expr->ident);
3926 // ### assert(e->type == LABEL_SYMBOL);
3927 if (e->flags & ADDR_FLAG)
3928 return astnode_create_integer(e->address, expr->loc);
3929 } break;
3931 case CURRENT_PC_NODE:
3932 return astnode_create_integer(in_dataseg ? dataseg_pc : codeseg_pc, expr->loc);
3934 default:
3935 break;
3936 } /* switch */
3937 return 0;
3941 * Sets the address of the label to be the currently calculated PC.
3943 static int set_label_address(astnode *label, void *arg, astnode **next)
3945 symtab_entry *e = symtab_lookup(label->ident);
3946 // ### assert(e && (e->type == LABEL_SYMBOL));
3947 e->address = in_dataseg ? dataseg_pc : codeseg_pc;
3948 e->flags |= ADDR_FLAG;
3949 return 0;
3953 * Sets the current PC to the address specified by the ORG node.
3955 static int set_pc_from_org(astnode *org, void *arg, astnode **next)
3957 astnode *addr = LHS(org);
3958 assert(astnode_is_type(addr, INTEGER_NODE));
3959 if (in_dataseg)
3960 dataseg_pc = addr->integer;
3961 else
3962 codeseg_pc = addr->integer;
3963 return 0;
3967 * Ensures that the given symbol is defined.
3969 static int ensure_symbol_is_defined(astnode *id, void *arg, astnode **next)
3971 symtab_entry *e = symtab_lookup(id->ident);
3972 assert(e);
3973 if ((e->flags & EXTRN_FLAG) && !(e->flags & ERROR_UNDEFINED_FLAG)) {
3974 err(id->loc, "cannot generate pure binary because `%s' is not defined", id->ident);
3975 e->flags |= ERROR_UNDEFINED_FLAG;
3977 return 0;
3981 * Increments PC according to the size of the instruction.
3983 static int inc_pc_by_instruction(astnode *instr, void *arg, astnode **next)
3985 assert(!in_dataseg);
3986 if (LHS(instr)) {
3987 /* Has operand */
3988 unsigned char zp_op = opcode_zp_equiv(instr->instr.opcode);
3989 if (zp_op != 0xFF) {
3990 /* See if we can optimize this to a ZP-instruction */
3991 astnode *operand = eval_expression(LHS(instr));
3992 if (operand && astnode_is_type(operand, INTEGER_NODE)) {
3993 if ((operand->integer >= 0) && (operand->integer < 256)) {
3994 instr->instr.opcode = zp_op;
3996 astnode_finalize(operand);
4000 codeseg_pc += opcode_length(instr->instr.opcode);
4001 return 1;
4005 * Increments PC according to the size of the defined data.
4007 static int inc_pc_by_data(astnode *data, void *arg, astnode **next)
4009 astnode *type = LHS(data);
4010 int count = astnode_get_child_count(data) - 1;
4011 int nbytes;
4012 assert(!in_dataseg);
4013 switch (type->datatype) {
4014 case BYTE_DATATYPE: nbytes = count; break;
4015 case WORD_DATATYPE: nbytes = count * 2; break;
4016 case DWORD_DATATYPE: nbytes = count * 4; break;
4017 default:
4018 assert(0);
4019 break;
4021 codeseg_pc += nbytes;
4022 return 0;
4026 * Increments PC according to the size of the included binary.
4028 static int inc_pc_by_binary(astnode *node, void *arg, astnode **next)
4030 assert(!in_dataseg);
4031 codeseg_pc += node->binary.size;
4032 return 0;
4036 * Increments PC according to the size of the storage.
4038 static int inc_pc_by_storage(astnode *storage, void *arg, astnode **next)
4040 astnode *type = LHS(storage);
4041 assert(type->datatype == BYTE_DATATYPE);
4042 astnode *count = eval_expression(RHS(storage));
4043 if (count) {
4044 if (astnode_get_type(count) == INTEGER_NODE) {
4045 if (in_dataseg)
4046 dataseg_pc += count->integer;
4047 else
4048 codeseg_pc += count->integer;
4050 astnode_finalize(count);
4052 return 1;
4056 * This pass is only performed if the output format is pure 6502.
4057 * It ensures that it is actually possible to generate pure 6502
4058 * for this syntax tree (i.e. no external symbols).
4059 * Furthermore, it calculates the address of all labels, so that
4060 * everything is ready for the final output phase.
4062 void astproc_fourth_pass(astnode *root)
4064 int x;
4065 /* ### Should loop while there's a change in the address of
4066 one or more labels */
4067 for (x = 0; x < 2; ++x) {
4068 in_dataseg = 0; /* codeseg is default */
4069 dataseg_pc = 0;
4070 codeseg_pc = 0;
4071 /* Table of callback functions for our purpose. */
4072 static astnodeprocmap map[] = {
4073 { DATASEG_NODE, process_dataseg },
4074 { CODESEG_NODE, process_codeseg },
4075 { ORG_NODE, set_pc_from_org },
4076 { LABEL_NODE, set_label_address },
4077 { IDENTIFIER_NODE, ensure_symbol_is_defined },
4078 { INSTRUCTION_NODE, inc_pc_by_instruction },
4079 { DATA_NODE, inc_pc_by_data },
4080 { STORAGE_NODE, inc_pc_by_storage },
4081 { BINARY_NODE, inc_pc_by_binary },
4082 { STRUC_DECL_NODE, noop },
4083 { UNION_DECL_NODE, noop },
4084 { ENUM_DECL_NODE, noop },
4085 { RECORD_DECL_NODE, noop },
4086 { 0, NULL }
4088 /* Do the walk. */
4089 astproc_walk(root, NULL, map);
4093 /*---------------------------------------------------------------------------*/
4096 * Writes an instruction.
4098 static int write_instruction(astnode *instr, void *arg, astnode **next)
4100 FILE *fp = (FILE *)arg;
4101 unsigned char op = instr->instr.opcode;
4102 int len = opcode_length(op);
4103 fputc(op, fp);
4104 if (len > 1) {
4105 /* Write operand */
4106 astnode *operand = eval_expression(LHS(instr));
4107 if(!astnode_is_type(operand, INTEGER_NODE)) {
4108 /* ### This is rather fatal, it should be a literal by this point */
4109 err(instr->loc, "operand does not evaluate to literal");
4110 } else {
4111 int value = operand->integer;
4112 if (len == 2) {
4113 /* Check if it's a relative jump */
4114 switch (op) {
4115 case 0x10:
4116 case 0x30:
4117 case 0x50:
4118 case 0x70:
4119 case 0x90:
4120 case 0xB0:
4121 case 0xD0:
4122 case 0xF0:
4123 /* Calculate difference between target and address of next instruction */
4124 value = value - (codeseg_pc + 2);
4125 if (!IS_BYTE_VALUE(value)) {
4126 err(operand->loc, "branch out of range");
4127 value &= 0xFF;
4129 break;
4131 default:
4132 if (!IS_BYTE_VALUE(value)) {
4133 warn(operand->loc, "operand out of range; truncated");
4134 value &= 0xFF;
4136 break;
4138 fputc((unsigned char)value, fp);
4139 } else {
4140 assert(len == 3);
4141 if (!IS_WORD_VALUE(value)) {
4142 warn(operand->loc, "operand out of range; truncated");
4143 value &= 0xFFFF;
4145 fputc((unsigned char)value, fp);
4146 fputc((unsigned char)(value >> 8), fp);
4149 astnode_finalize(operand);
4151 codeseg_pc += opcode_length(instr->instr.opcode);
4152 return 0;
4156 * Writes data.
4158 static int write_data(astnode *data, void *arg, astnode **next)
4160 FILE *fp = (FILE *)arg;
4161 astnode *type = LHS(data);
4162 astnode *expr;
4163 assert(!in_dataseg);
4164 for (expr = RHS(data); expr != NULL; expr = astnode_get_next_sibling(expr) ) {
4165 int value;
4166 astnode *e = eval_expression(expr);
4167 assert(e->type == INTEGER_NODE);
4168 value = e->integer;
4169 switch (type->datatype) {
4170 case BYTE_DATATYPE:
4171 if (!IS_BYTE_VALUE(value)) {
4172 warn(expr->loc, "operand out of range; truncated");
4173 value &= 0xFF;
4175 fputc((unsigned char)value, fp);
4176 codeseg_pc += 1;
4177 break;
4179 case WORD_DATATYPE:
4180 if (!IS_WORD_VALUE(value)) {
4181 warn(expr->loc, "operand out of range; truncated");
4182 value &= 0xFFFF;
4184 fputc((unsigned char)value, fp);
4185 fputc((unsigned char)(value >> 8), fp);
4186 codeseg_pc += 2;
4187 break;
4189 case DWORD_DATATYPE:
4190 fputc((unsigned char)value, fp);
4191 fputc((unsigned char)(value >> 8), fp);
4192 fputc((unsigned char)(value >> 16), fp);
4193 fputc((unsigned char)(value >> 24), fp);
4194 codeseg_pc += 4;
4195 break;
4197 default:
4198 assert(0);
4199 break;
4201 astnode_finalize(e);
4203 return 0;
4207 * Writes storage (padding).
4209 static int write_storage(astnode *storage, void *arg, astnode **next)
4211 FILE *fp = (FILE *)arg;
4212 astnode *type = LHS(storage);
4213 astnode *count = eval_expression(RHS(storage));
4214 assert(type->datatype == BYTE_DATATYPE);
4215 assert(!in_dataseg);
4216 if (count) {
4217 int i;
4218 assert(astnode_get_type(count) == INTEGER_NODE);
4219 for (i = 0; i < count->integer; ++i)
4220 fputc(0, fp);
4221 codeseg_pc += count->integer;
4222 astnode_finalize(count);
4224 return 0;
4228 * Writes binary.
4230 static int write_binary(astnode *node, void *arg, astnode **next)
4232 FILE *fp = (FILE *)arg;
4233 assert(!in_dataseg);
4234 fwrite(node->binary.data, 1, node->binary.size, fp);
4235 codeseg_pc += node->binary.size;
4236 return 0;
4240 * This pass is only performed if the output format is pure 6502.
4241 * It writes the binary code.
4243 void astproc_fifth_pass(astnode *root)
4245 FILE *fp = fopen(xasm_args.output_file, "wb");
4246 if (!fp) {
4247 fprintf(stderr, "could not open '%s' for writing\n", xasm_args.output_file);
4248 ++err_count;
4249 return;
4251 /* Table of callback functions for our purpose. */
4252 static astnodeprocmap map[] = {
4253 { DATASEG_NODE, process_dataseg },
4254 { CODESEG_NODE, process_codeseg },
4255 { ORG_NODE, set_pc_from_org },
4256 { INSTRUCTION_NODE, write_instruction },
4257 { DATA_NODE, write_data },
4258 { STORAGE_NODE, write_storage },
4259 { BINARY_NODE, write_binary },
4260 { STRUC_DECL_NODE, noop },
4261 { UNION_DECL_NODE, noop },
4262 { ENUM_DECL_NODE, noop },
4263 { RECORD_DECL_NODE, noop },
4264 { 0, NULL }
4266 in_dataseg = 0; /* codeseg is default */
4267 dataseg_pc = 0;
4268 codeseg_pc = 0;
4269 /* Do the walk. */
4270 astproc_walk(root, fp, map);
4271 fclose(fp);