memleak in expression reduction
[xorcyst.git] / astproc.c
blobb13b58af571a49371067061b370f5b95c3d4a793
1 /*
2 * $Id: astproc.c,v 1.21 2007/11/11 22:35:22 khansen Exp $
3 * $Log: astproc.c,v $
4 * Revision 1.21 2007/11/11 22:35:22 khansen
5 * compile on mac
7 * Revision 1.20 2007/08/19 10:17:39 khansen
8 * allow symbols to be used without having been declared
10 * Revision 1.19 2007/08/12 18:58:12 khansen
11 * ability to generate pure 6502 binary (--pure-binary switch)
13 * Revision 1.18 2007/08/12 02:42:46 khansen
14 * prettify, const
16 * Revision 1.17 2007/08/09 22:06:10 khansen
17 * ability to pass in reference to local label as argument to macro
19 * Revision 1.16 2007/08/09 20:48:46 khansen
20 * disable buggy code that can cause crash
22 * Revision 1.15 2007/08/09 20:33:40 khansen
23 * progress
25 * Revision 1.14 2007/08/08 22:40:01 khansen
26 * improved symbol lookup, definitions must precede usage
28 * Revision 1.13 2007/07/22 13:33:26 khansen
29 * convert tabs to whitespaces
31 * Revision 1.12 2005/01/09 11:17:57 kenth
32 * xorcyst 1.4.5
33 * fixed bug in process_data(), merge_data()
34 * no longer truncation warning when fits in signed byte/word
36 * Revision 1.11 2005/01/05 02:28:13 kenth
37 * xorcyst 1.4.3
38 * support for anonymous unions
39 * fixed sizeof bug
41 * Revision 1.10 2004/12/29 21:44:41 kenth
42 * xorcyst 1.4.2
43 * static indexing, sizeof improved
45 * Revision 1.9 2004/12/25 02:22:35 kenth
46 * fixed bug in reduce_user_storage()
48 * Revision 1.8 2004/12/19 19:58:29 kenth
49 * xorcyst 1.4.0
51 * Revision 1.7 2004/12/18 16:57:39 kenth
52 * STORAGE_NODE(WORD/DWORD_DATATYPE) converts to BYTE
54 * Revision 1.6 2004/12/16 13:19:47 kenth
55 * xorcyst 1.3.5
57 * Revision 1.5 2004/12/14 01:49:05 kenth
58 * xorcyst 1.3.0
60 * Revision 1.4 2004/12/11 02:01:25 kenth
61 * added forward/backward branching
63 * Revision 1.3 2004/12/09 11:18:13 kenth
64 * added: warning, error node processing
66 * Revision 1.2 2004/12/06 04:52:24 kenth
67 * Major updates (xorcyst 1.1.0)
69 * Revision 1.1 2004/06/30 07:55:31 kenth
70 * Initial revision
74 /**
75 * (C) 2004 Kent Hansen
77 * The XORcyst is free software; you can redistribute it and/or modify
78 * it under the terms of the GNU General Public License as published by
79 * the Free Software Foundation; either version 2 of the License, or
80 * (at your option) any later version.
82 * The XORcyst is distributed in the hope that it will be useful,
83 * but WITHOUT ANY WARRANTY; without even the implied warranty of
84 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
85 * GNU General Public License for more details.
87 * You should have received a copy of the GNU General Public License
88 * along with The XORcyst; if not, write to the Free Software
89 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
92 /**
93 * This file contains functions that process the Abstract Syntax Tree (AST).
94 * After the assembly file has been parsed into an AST, a number of passes are
95 * made on it to process it and transform it. The functions here are
96 * concerned with things like
97 * - macro expansion
98 * - symbol table generation
99 * - equates substitution
100 * - constant folding
101 * - code and symbol validation
104 #include <stdlib.h>
105 #include <stdio.h>
106 #include <stdarg.h>
107 #include <string.h>
108 #include <assert.h>
109 #include "astproc.h"
110 #include "symtab.h"
111 #include "opcode.h"
112 #include "charmap.h"
113 #include "xasm.h"
115 #define IS_SIGNED_BYTE_VALUE(v) (((v) >= -128) && ((v) <= 127))
116 #define IS_UNSIGNED_BYTE_VALUE(v) (((v) >= 0) && ((v) <= 255))
117 #define IS_BYTE_VALUE(v) (IS_SIGNED_BYTE_VALUE(v) || IS_UNSIGNED_BYTE_VALUE(v))
119 #define IS_SIGNED_WORD_VALUE(v) (((v) >= -32768) && ((v) <= 32767))
120 #define IS_UNSIGNED_WORD_VALUE(v) (((v) >= 0) && ((v) <= 65535))
121 #define IS_WORD_VALUE(v) (IS_SIGNED_WORD_VALUE(v) || IS_UNSIGNED_WORD_VALUE(v))
123 /*---------------------------------------------------------------------------*/
125 /** Number of errors issued during processing. */
126 static int err_count = 0;
128 /** Number of warnings issued during processing. */
129 static int warn_count = 0;
131 /* Keeps track of number of global labels encountered. */
132 static int label_count = 0;
134 /* Keeps track of whether statement is in dataseg or codeseg. */
135 static int in_dataseg = 0;
137 /* Default symbol modifiers, i.e. ZEROPAGE_FLAG, PUBLIC_FLAG */
138 static int modifiers = 0;
140 /* Used when we are outputting pure 6502 binary */
141 static int dataseg_pc;
142 static int codeseg_pc;
144 /*---------------------------------------------------------------------------*/
146 /** Mapping from regular ASCII characters to custom character values.
147 * Used to transform .char arrays to regular .db arrays.
149 static unsigned char charmap[256];
152 * Resets the custom character map.
153 * Every ASCII character is mapped to itself.
155 static void reset_charmap()
157 int i;
158 for (i=0; i<256; i++) {
159 charmap[i] = (char)i;
163 /*---------------------------------------------------------------------------*/
164 /* Forward/backward branching stuff */
166 struct tag_forward_branch_info {
167 astnode *refs[128];
168 int index; /* Index into refs */
169 int counter;
172 typedef struct tag_forward_branch_info forward_branch_info;
174 struct tag_backward_branch_info {
175 astnode *decl;
176 int counter;
179 typedef struct tag_backward_branch_info backward_branch_info;
181 #define BRANCH_MAX 8
183 static forward_branch_info forward_branch[BRANCH_MAX];
185 static backward_branch_info backward_branch[BRANCH_MAX];
188 * Zaps forward/backward branch data.
190 static void branch_init()
192 int i, j;
193 for (i=0; i<BRANCH_MAX; i++) {
194 for (j=0; j<128; j++) {
195 forward_branch[i].refs[j] = NULL;
197 forward_branch[i].index = 0;
198 forward_branch[i].counter = 0;
199 backward_branch[i].decl = NULL;
200 backward_branch[i].counter = 0;
204 /*---------------------------------------------------------------------------*/
207 * Issues an error.
208 * @param loc File location of error
209 * @param fmt printf-style format string
211 static void err(location loc, const char *fmt, ...)
213 va_list ap;
214 va_start(ap, fmt);
216 /* Print error message w/ location info */
217 fprintf(stderr, "error: %s:", loc.file);
218 LOCATION_PRINT(stderr, loc);
219 fprintf(stderr, ": ");
220 vfprintf(stderr, fmt, ap);
221 fprintf(stderr, "\n");
223 va_end(ap);
225 /* Increase total error count */
226 err_count++;
230 * Issues a warning.
231 * @param loc File location of warning
232 * @param fmt printf-style format string
234 static void warn(location loc, const char *fmt, ...)
236 va_list ap;
237 if (!xasm_args.no_warn) {
238 va_start(ap, fmt);
239 /* Print warning message w/ location info */
240 fprintf(stderr, "warning: %s:", loc.file);
241 LOCATION_PRINT(stderr, loc);
242 fprintf(stderr, ": ");
243 vfprintf(stderr, fmt, ap);
244 fprintf(stderr, "\n");
245 va_end(ap);
248 /* Increase total warning count */
249 warn_count++;
253 * Gets the number of errors encountered during processing.
254 * @return Number of errors
256 int astproc_err_count()
258 return err_count;
261 /*---------------------------------------------------------------------------*/
264 * Gets the processor function for a node type from a map.
265 * Used by astproc_walk().
266 * @param type The node type
267 * @param map A mapping from node types to processor functions
269 static astnodeproc astproc_node_type_to_proc(astnode_type type, const astnodeprocmap *map)
271 /* Try all map entries */
272 for (; map->proc != NULL; map += 1) {
273 if (map->type == type) {
274 return map->proc; /* Match */
277 /* No match */
278 return NULL;
281 /*---------------------------------------------------------------------------*/
284 * Walks an abstract syntax tree recursively.
285 * @param n Node to walk
286 * @param arg Optional argument to pass to processor function
287 * @param map Mapping of node types to processor functions
289 static void astproc_walk_recursive(astnode *n, void *arg, const astnodeprocmap *map, astnode **next)
291 astnode *c;
292 astnode *t;
293 if (n == NULL) { return; }
294 /* Process this node if it has a processor function */
295 astnodeproc p = astproc_node_type_to_proc(astnode_get_type(n), map);
296 if (p != NULL) {
297 if (!p(n, arg, next)) return; /* Don't walk children */
299 /* Walk the node's children recursively */
300 for (c=n->first_child; c != NULL; c = t) {
301 t = c->next_sibling; /* default next node */
302 astproc_walk_recursive(c, arg, map, &t);
307 * Generic tree walker function.
308 * @param n Root
309 * @param arg General-purpose argument passed to each node handler function
310 * @param map Array of (nodetype, handler function) tuples
312 void astproc_walk(astnode *n, void *arg, const astnodeprocmap *map)
314 astnode *dummy;
315 astproc_walk_recursive(n, arg, map, &dummy);
318 /*---------------------------------------------------------------------------*/
321 * Don't do any processing of this node or its children on this pass.
323 static int noop(astnode *n, void *arg, astnode **next)
325 return 0;
329 * Substitutes an identifier node with subst_expr if the id is equal to subst_id.
330 * @param n A node of type IDENTIFIER_NODE
331 * @param arg Array of length 2, containing (expr, id) pair
333 static int substitute_id(astnode *n, void *arg, astnode **next)
335 /* arg is array containing expression and identifier */
336 astnode **array = (astnode **)arg;
337 astnode *subst_expr = array[0];
338 astnode *subst_id = array[1];
339 /* Test if this node and the identifier to replace are equal */
340 if (astnode_equal(n, subst_id)) {
341 /* They're equal, replace it by expression. */
342 astnode *cl = astnode_clone(subst_expr, n->loc);
343 /* ### Generalize: traverse all children, set the flag */
344 if (astnode_get_type(cl) == LOCAL_ID_NODE) {
345 cl->flags |= 0x80; /* don't globalize it */
347 astnode_replace(n, cl);
348 astnode_finalize(n);
349 *next = cl;
350 return 0;
351 } else {
352 return 1;
357 * Substitutes expr for id in list.
358 * Used by macro expander to substitute a macro body parameter name with the
359 * actual expression used in the macro expansion.
360 * @param expr An expression
361 * @param id An identifier
362 * @param list A list of statements (macro body)
364 static void substitute_expr_for_id(astnode *expr, astnode *id, astnode *list)
366 /* Prepare argument to astproc_walk */
367 astnode *array[2];
368 array[0] = expr;
369 array[1] = id;
370 /* Table of callback functions for our purpose. */
371 static astnodeprocmap map[] = {
372 { IDENTIFIER_NODE, substitute_id },
373 { 0, NULL }
375 /* Do the walk. */
376 astproc_walk(list, array, map);
379 /*---------------------------------------------------------------------------*/
382 * Globalizes a macro expanded local.
383 * This is done simply by concatenating the local label identifier with the
384 * global macro invocation counter.
385 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
386 * @param arg Namespace counter (int)
388 static int globalize_macro_expanded_local(astnode *n, void *arg, astnode **next)
390 /* Only globalize if it's a reference to a label defined in the macro */
391 if (!(n->flags & 0x80)) {
392 char str[16];
393 int count;
394 /* Make it global by appending the macro expansion counter to the id */
395 count = (int)arg;
396 sprintf(str, "#%d", count);
397 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
398 /* LOCAL_LABEL_NODE, use label field */
399 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
400 strcat(n->label, str);
401 } else {
402 /* LOCAL_ID_NODE, use ident field */
403 assert(astnode_is_type(n, LOCAL_ID_NODE));
404 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
405 strcat(n->ident, str);
408 /* */
409 return 1;
413 * Globalizes all locals in the body of a macro expansion.
414 * Used by the macro expander to ensure that local labels in macro expansions
415 * are unique.
416 * @param exp_body The expanded macro body
417 * @param count Unique macro namespace counter
419 static void globalize_macro_expanded_locals(astnode *exp_body, int count)
421 /* Table of callback functions for our purpose. */
422 static astnodeprocmap map[] = {
423 { LOCAL_ID_NODE, globalize_macro_expanded_local },
424 { LOCAL_LABEL_NODE, globalize_macro_expanded_local },
425 { 0, NULL }
427 /* Do the walk. */
428 astproc_walk(exp_body, (void *)count, map);
432 * Expands a macro; that is, replaces a macro invocation in the AST with the
433 * macro body. Substitutes parameter names for values.
434 * @param n Must be a node of type MACRO_NODE
435 * @param arg Not used
437 static int expand_macro(astnode *n, void *arg, astnode **next)
439 astnode *decl;
440 astnode *decl_body;
441 astnode *exp_body;
442 astnode *formals;
443 astnode *actuals;
444 astnode *id;
445 astnode *expr;
446 int i;
447 /* Keeps track of the current/total number of macro expansions */
448 static int count = 0;
449 /* Get the name of the macro to expand */
450 id = astnode_get_child(n, 0);
451 /* Look up its definition in symbol table */
452 symtab_entry *e = symtab_lookup(id->ident);
453 /* If it's not in the symbol table, error. */
454 if (e == NULL) {
455 err(n->loc, "unknown macro or directive `%s'", id->ident);
456 /* Remove from AST */
457 astnode_remove(n);
458 astnode_finalize(n);
459 return 0;
461 else if (e->type != MACRO_SYMBOL) {
462 err(n->loc, "cannot expand `%s'; not a macro", e->id);
463 /* Remove from AST */
464 astnode_remove(n);
465 astnode_finalize(n);
466 return 0;
468 else {
469 /* e->def has pointer to proper MACRO_DECL_NODE */
470 decl = (astnode *)e->def;
471 /* Get the lists of formals and actuals */
472 formals = astnode_get_child(decl, 1);
473 actuals = astnode_get_child(n, 1);
474 /* Verify that argument count is correct */
475 if (astnode_get_child_count(formals) != astnode_get_child_count(actuals)) {
476 err(n->loc, "macro `%s' does not take %d argument(s)", id->ident, astnode_get_child_count(actuals) );
477 /* Remove from AST */
478 astnode_remove(n);
479 astnode_finalize(n);
480 return 0;
482 /* Expand the body */
483 decl_body = astnode_get_child(decl, 2);
484 exp_body = astnode_clone(decl_body, n->loc);
485 /* Substitute actuals for formals */
486 for (i=0; i<astnode_get_child_count(actuals); i++) {
487 /* The id to substitute */
488 id = astnode_get_child(formals, i);
489 /* The expression to substitute it with */
490 expr = astnode_get_child(actuals, i);
491 /* Do it! */
492 substitute_expr_for_id(expr, id, exp_body);
494 /* Make locals a bit more global */
495 globalize_macro_expanded_locals(exp_body, count);
496 /* Replace MACRO_NODE by the macro body instance */
497 astnode_replace(n, astnode_get_child(exp_body, 0));
498 /* Discard the replaced node */
499 astnode_finalize(n);
500 /* Increase macro expansion counter */
501 count++;
502 /* Set next node to start of body */
503 *next = exp_body;
505 /* */
506 return 0;
509 /*---------------------------------------------------------------------------*/
512 * Does constant folding of expression.
513 * If the expression can be folded, the original expression is replaced by the
514 * new one, and the original expression is finalized.
515 * @param expr Expression
516 * @return Original expression, if couldn't fold, otherwise new, folded expression
518 astnode *astproc_fold_constants(astnode *expr)
520 astnode *folded;
521 astnode *lhs;
522 astnode *rhs;
523 if (expr == NULL) { return NULL; }
524 folded = NULL;
525 if (astnode_is_type(expr, ARITHMETIC_NODE)) {
526 /* Fold operands recursively */
527 lhs = astproc_fold_constants(LHS(expr));
528 rhs = astproc_fold_constants(RHS(expr));
529 switch (expr->oper) {
530 /* Binary ops */
531 case PLUS_OPERATOR:
532 case MINUS_OPERATOR:
533 case MUL_OPERATOR:
534 case DIV_OPERATOR:
535 case MOD_OPERATOR:
536 case AND_OPERATOR:
537 case OR_OPERATOR:
538 case XOR_OPERATOR:
539 case SHL_OPERATOR:
540 case SHR_OPERATOR:
541 case LT_OPERATOR:
542 case GT_OPERATOR:
543 case EQ_OPERATOR:
544 case NE_OPERATOR:
545 case LE_OPERATOR:
546 case GE_OPERATOR:
547 /* See if it can be folded */
548 if ( (astnode_is_type(lhs, INTEGER_NODE)) &&
549 (astnode_is_type(rhs, INTEGER_NODE)) ) {
550 /* Both sides are integer literals, so fold. */
551 switch (expr->oper) {
552 case PLUS_OPERATOR: folded = astnode_create_integer(lhs->integer + rhs->integer, expr->loc); break;
553 case MINUS_OPERATOR: folded = astnode_create_integer(lhs->integer - rhs->integer, expr->loc); break;
554 case MUL_OPERATOR: folded = astnode_create_integer(lhs->integer * rhs->integer, expr->loc); break;
555 case DIV_OPERATOR: folded = astnode_create_integer(lhs->integer / rhs->integer, expr->loc); break;
556 case MOD_OPERATOR: folded = astnode_create_integer(lhs->integer % rhs->integer, expr->loc); break;
557 case AND_OPERATOR: folded = astnode_create_integer(lhs->integer & rhs->integer, expr->loc); break;
558 case OR_OPERATOR: folded = astnode_create_integer(lhs->integer | rhs->integer, expr->loc); break;
559 case XOR_OPERATOR: folded = astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc); break;
560 case SHL_OPERATOR: folded = astnode_create_integer(lhs->integer << rhs->integer, expr->loc); break;
561 case SHR_OPERATOR: folded = astnode_create_integer(lhs->integer >> rhs->integer, expr->loc); break;
562 case LT_OPERATOR: folded = astnode_create_integer(lhs->integer < rhs->integer, expr->loc); break;
563 case GT_OPERATOR: folded = astnode_create_integer(lhs->integer > rhs->integer, expr->loc); break;
564 case EQ_OPERATOR: folded = astnode_create_integer(lhs->integer == rhs->integer, expr->loc); break;
565 case NE_OPERATOR: folded = astnode_create_integer(lhs->integer != rhs->integer, expr->loc); break;
566 case LE_OPERATOR: folded = astnode_create_integer(lhs->integer <= rhs->integer, expr->loc); break;
567 case GE_OPERATOR: folded = astnode_create_integer(lhs->integer >= rhs->integer, expr->loc); break;
569 default: /* Error, actually */
570 folded = expr;
571 break;
573 if (folded != expr) {
574 /* Replace expression by folded one. */
575 astnode_replace(expr, folded);
576 astnode_finalize(expr);
577 return folded;
580 else if ( (astnode_is_type(lhs, STRING_NODE)) &&
581 (astnode_is_type(rhs, STRING_NODE)) ) {
582 /* Both sides are string literals. */
583 /* Folding is defined only for certain operators. */
584 switch (expr->oper) {
585 case PLUS_OPERATOR:
586 /* String concatenation. */
587 folded = astnode_create(STRING_NODE, expr->loc);
588 folded->string = (char *)malloc(strlen(lhs->string) + strlen(rhs->string) + 1);
589 if (folded->string != NULL) {
590 strcpy(folded->string, lhs->string);
591 strcat(folded->string, rhs->string);
593 break;
595 /* String comparison. */
596 case LT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) < 0, expr->loc); break;
597 case GT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) > 0, expr->loc); break;
598 case EQ_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) == 0, expr->loc); break;
599 case NE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) != 0, expr->loc); break;
600 case LE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) <= 0, expr->loc); break;
601 case GE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) >= 0, expr->loc); break;
603 default:
604 folded = expr;
605 break;
607 if (folded != expr) {
608 /* Replace expression by folded one. */
609 astnode_replace(expr, folded);
610 astnode_finalize(expr);
611 return folded;
614 else if ((astnode_get_type(lhs) == STRING_NODE) &&
615 (astnode_get_type(rhs) == INTEGER_NODE) &&
616 (expr->oper == PLUS_OPERATOR)) {
617 /* Left side is string and right side is integer.
618 Result is a string. */
619 char str[32];
620 sprintf(str, "%d", rhs->integer);
621 folded = astnode_create(STRING_NODE, expr->loc);
622 folded->string = (char *)malloc(strlen(lhs->string) + strlen(str) + 1);
623 if (folded->string != NULL) {
624 strcpy(folded->string, lhs->string);
625 strcat(folded->string, str);
627 /* Replace expression by folded one. */
628 astnode_replace(expr, folded);
629 astnode_finalize(expr);
630 return folded;
632 else if ((astnode_get_type(rhs) == STRING_NODE) &&
633 (astnode_get_type(lhs) == INTEGER_NODE) &&
634 (expr->oper == PLUS_OPERATOR)) {
635 /* Left side is integer and right side is string.
636 Result is a string. */
637 char str[32];
638 sprintf(str, "%d", lhs->integer);
639 folded = astnode_create(STRING_NODE, expr->loc);
640 folded->string = (char *)malloc(strlen(str) + strlen(rhs->string) + 1);
641 if (folded->string != NULL) {
642 strcpy(folded->string, str);
643 strcat(folded->string, rhs->string);
645 /* Replace expression by folded one. */
646 astnode_replace(expr, folded);
647 astnode_finalize(expr);
648 return folded;
650 /* Use some mathematical identities... */
651 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
652 && (expr->oper == PLUS_OPERATOR)) {
653 /* 0+expr == expr */
654 astnode_remove_child(expr, rhs);
655 astnode_replace(expr, rhs);
656 astnode_finalize(expr);
657 return rhs;
659 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
660 && (expr->oper == PLUS_OPERATOR)) {
661 /* expr+0 == expr */
662 astnode_remove_child(expr, lhs);
663 astnode_replace(expr, lhs);
664 astnode_finalize(expr);
665 return lhs;
667 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
668 && (expr->oper == MUL_OPERATOR)) {
669 /* 1*expr == expr */
670 astnode_remove_child(expr, rhs);
671 astnode_replace(expr, rhs);
672 astnode_finalize(expr);
673 return rhs;
675 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
676 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
677 /* expr*1 == expr */
678 /* expr/1 == expr */
679 astnode_remove_child(expr, lhs);
680 astnode_replace(expr, lhs);
681 astnode_finalize(expr);
682 return lhs;
684 else {
685 /* No chance of folding this one. */
687 break;
689 /* Unary ops */
690 case NEG_OPERATOR:
691 case NOT_OPERATOR:
692 case LO_OPERATOR:
693 case HI_OPERATOR:
694 case UMINUS_OPERATOR:
695 case BANK_OPERATOR:
696 /* See if it can be folded */
697 if (astnode_is_type(lhs, INTEGER_NODE)) {
698 /* Fold it. */
699 switch (expr->oper) {
700 case NEG_OPERATOR: folded = astnode_create_integer(~lhs->integer, expr->loc); break;
701 case NOT_OPERATOR: folded = astnode_create_integer(!lhs->integer, expr->loc); break;
702 case LO_OPERATOR: folded = astnode_create_integer(lhs->integer & 0xFF, expr->loc); break;
703 case HI_OPERATOR: folded = astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc); break;
704 case UMINUS_OPERATOR: folded = astnode_create_integer(-lhs->integer, expr->loc); break;
705 default: break;
707 /* Replace expression by folded one. */
708 astnode_replace(expr, folded);
709 astnode_finalize(expr);
710 return folded;
712 else {
713 /* Couldn't fold this one. */
715 break;
718 /* Couldn't fold it, return original expression */
719 return expr;
722 /*---------------------------------------------------------------------------*/
725 * Substitutes identifier if it has a constant definition in symbol table.
726 * @param expr Node of type IDENTIFIER_NODE
728 static astnode *substitute_ident(astnode *expr)
730 astnode *c;
731 symtab_entry *e;
732 /* Look it up in symbol table */
733 e = symtab_lookup(expr->ident);
734 if (e != NULL) {
735 /* Found it. Test if it's a define. */
736 if (e->type == CONSTANT_SYMBOL) {
737 /* This is a defined symbol that should be
738 replaced by the expression it stands for */
739 c = astnode_clone((astnode *)e->def, expr->loc);
740 astnode_replace(expr, c);
741 astnode_finalize(expr);
742 expr = c;
745 else {
746 /* Didn't find it in symbol table. */
748 return expr;
752 * Substitutes sizeof with proper constant.
753 * @param expr Node of type SIZEOF_NODE
755 static astnode *reduce_sizeof(astnode *expr)
757 int ok;
758 astnode *c;
759 astnode *id;
760 astnode *type;
761 astnode *count;
762 symtab_entry *e;
764 count = NULL;
765 if (astnode_is_type(LHS(expr), IDENTIFIER_NODE)) {
766 /* Identifier might be the name of a user-defined type, OR
767 it might be the name of a variable of a user-defined type */
768 type = NULL;
769 /* Look it up */
770 id = LHS(expr);
771 e = symtab_global_lookup(id->ident);
772 if (e != NULL) {
773 switch (e->type) {
774 case STRUC_SYMBOL:
775 case UNION_SYMBOL:
776 case RECORD_SYMBOL:
777 case ENUM_SYMBOL:
778 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
779 break;
781 case VAR_SYMBOL:
782 type = astnode_clone(LHS(e->def), id->loc);
783 if (astnode_is_type(e->def, STORAGE_NODE)) {
784 count = astnode_clone(RHS(e->def), id->loc);
786 else {
787 count = astnode_create_integer(astnode_get_child_count(e->def)-1, id->loc);
789 break;
791 default:
792 /* Can't take sizeof of this symbol type */
793 break;
796 if (type == NULL) {
797 /* Unknown */
798 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
800 /* Replace identifier by datatype node */
801 astnode_replace(id, type);
802 astnode_finalize(id);
804 type = LHS(expr);
805 switch (type->datatype) {
806 case BYTE_DATATYPE:
807 case CHAR_DATATYPE:
808 c = astnode_create_integer(1, expr->loc);
809 astnode_replace(expr, c);
810 astnode_finalize(expr);
811 expr = c;
812 break;
814 case WORD_DATATYPE:
815 c = astnode_create_integer(2, expr->loc);
816 astnode_replace(expr, c);
817 astnode_finalize(expr);
818 expr = c;
819 break;
821 case DWORD_DATATYPE:
822 c = astnode_create_integer(4, expr->loc);
823 astnode_replace(expr, c);
824 astnode_finalize(expr);
825 expr = c;
826 break;
828 case USER_DATATYPE:
829 /* Look up the data type in symbol table */
830 id = LHS(type);
831 e = symtab_global_lookup(id->ident);
832 ok = 0;
833 if (e != NULL) {
834 switch (e->type) {
835 case STRUC_SYMBOL:
836 case UNION_SYMBOL:
837 /* Datatype is defined, replace sizeof with proper expression */
838 c = astnode_clone((astnode *)(e->struc.size), ((astnode *)(e->struc.size))->loc);
839 astnode_replace(expr, c);
840 astnode_finalize(expr);
841 expr = c;
842 ok = 1;
843 break;
845 case RECORD_SYMBOL:
846 case ENUM_SYMBOL:
847 /* 1 byte */
848 c = astnode_create_integer(1, expr->loc);
849 astnode_replace(expr, c);
850 astnode_finalize(expr);
851 expr = c;
852 ok = 1;
853 break;
855 default:
856 /* Dunno the size of this symbol type */
857 break;
860 if (!ok) {
861 /* Datatype not defined, error */
862 err(expr->loc, "size of `%s' is unknown", id->ident);
863 /* Replace by 1 */
864 c = astnode_create_integer(1, expr->loc);
865 astnode_replace(expr, c);
866 astnode_finalize(expr);
867 return c;
869 break;
871 default:
872 err(expr->loc, "substitute_sizeof(): unknown type");
873 break;
875 if (count != NULL) {
876 c = astnode_create_arithmetic(
877 MUL_OPERATOR,
878 astnode_clone(expr, expr->loc),
879 count,
880 expr->loc
882 astnode_replace(expr, c);
883 astnode_finalize(expr);
884 expr = c;
886 return expr;
890 * Substitutes A::B with an expression.
891 * If A is a struct: substitute with offset of B
892 * If A is a union: substitute with 0
893 * If A is an enumeration: substitute with value for B
894 * @param expr Node of type SCOPE_NODE
896 static astnode *reduce_scope(astnode *expr)
898 symtab_entry *ns;
899 symtab_entry *sym;
900 astnode *c;
901 astnode *namespace;
902 astnode *symbol;
903 /* Look up the namespace */
904 namespace = LHS(expr);
905 ns = symtab_lookup(namespace->ident);
906 if (ns != NULL) {
907 /* Look up the local symbol */
908 symtab_push(ns->symtab);
909 symbol = RHS(expr);
910 sym = symtab_lookup(symbol->ident);
911 if (sym != NULL) {
912 /* See if we can replace it */
913 switch (ns->type) {
914 case STRUC_SYMBOL:
915 case UNION_SYMBOL:
916 case RECORD_SYMBOL:
917 /* Replace with field offset */
918 c = astnode_clone(sym->field.offset, sym->field.offset->loc);
919 astnode_replace(expr, c);
920 astnode_finalize(expr);
921 expr = c;
922 break;
924 case ENUM_SYMBOL:
925 /* Replace with enum entry value */
926 c = astnode_clone(sym->def, sym->def->loc);
927 astnode_replace(expr, c);
928 astnode_finalize(expr);
929 expr = c;
930 break;
932 default:
933 break;
936 symtab_pop();
938 return expr;
941 static astnode *reduce_expression(astnode *expr);
944 * Handles remainder of fields in A.B.C.D . ..., where one or more fields may be indexed.
945 * @param expr Node of type DOT_NODE, INDEX_NODE or IDENTIFIER_NODE
947 static astnode *reduce_dot_recursive(astnode *expr)
949 astnode *term;
950 astnode *offset;
951 astnode *left;
952 astnode *right;
953 astnode *type;
954 symtab_entry *field;
955 symtab_entry *def;
956 astnode *index = NULL;
957 /* Get identifiers involved: 'right' is field in 'left' */
958 left = LHS(expr);
959 if (astnode_is_type(left, INDEX_NODE)) {
960 left = LHS(left); /* Need identifier */
962 right = RHS(expr);
963 if (astnode_is_type(right, DOT_NODE)) {
964 right = LHS(right); /* Need identifier */
966 if (astnode_is_type(right, INDEX_NODE)) {
967 index = RHS(right);
968 right = LHS(right); /* Need identifier */
970 /* Lookup 'right' in 'left's symbol table (on stack) */
971 field = symtab_lookup(right->ident);
972 /* Look up variable's type definition */
973 type = LHS(field->def);
974 /* Copy its offset */
975 offset = astnode_clone(field->field.offset, right->loc);
976 if (index != NULL) {
977 /* Create expression: identifier + sizeof(datatype) * index */
978 offset = astnode_create_arithmetic(
979 PLUS_OPERATOR,
980 offset,
981 astnode_create_arithmetic(
982 MUL_OPERATOR,
983 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
984 astnode_clone(index, index->loc),
985 index->loc
987 expr->loc
990 /* See if more subfields to process */
991 expr = RHS(expr);
992 if (astnode_is_type(expr, DOT_NODE)) {
993 /* Next field */
994 def = symtab_global_lookup(LHS(type)->ident);
995 symtab_push(def->symtab);
996 term = reduce_dot_recursive(expr);
997 symtab_pop();
998 /* Construct sum */
999 offset = astnode_create_arithmetic(
1000 PLUS_OPERATOR,
1001 offset,
1002 term,
1003 expr->loc
1006 return offset;
1010 * Transforms A.B.C.D . ... to A + offset(B) + offset(C) + ...
1011 * No error checking, since validate_dotref() should have been called previously.
1012 * @param expr Node of type DOT_NODE
1014 static astnode *reduce_dot(astnode *expr)
1016 symtab_entry *father;
1017 symtab_entry *def;
1018 astnode *type;
1019 astnode *left;
1020 astnode *term1;
1021 astnode *term2;
1022 astnode *sum;
1023 astnode *index = NULL;
1024 /* Look up parent in global symbol table */
1025 left = LHS(expr); /* expr := left . right */
1026 if (astnode_is_type(left, INDEX_NODE)) {
1027 index = RHS(left);
1028 left = LHS(left); /* Need identifier */
1030 father = symtab_lookup(left->ident);
1031 /* Look up variable's type definition */
1032 type = LHS(father->def); /* DATATYPE_NODE */
1033 def = symtab_lookup(LHS(type)->ident);
1034 /* 1st term of sum is the leftmost structure identifier */
1035 term1 = astnode_clone(left, left->loc);
1036 if (index != NULL) {
1037 /* Create expression: identifier + sizeof(datatype) * index */
1038 term1 = astnode_create_arithmetic(
1039 PLUS_OPERATOR,
1040 term1,
1041 astnode_create_arithmetic(
1042 MUL_OPERATOR,
1043 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1044 astnode_clone(index, index->loc),
1045 index->loc
1047 expr->loc
1050 /* Add offsets recursively */
1051 symtab_push(def->symtab);
1052 term2 = reduce_dot_recursive(expr);
1053 symtab_pop();
1054 /* Calculate final sum */
1055 sum = astnode_create_arithmetic(
1056 PLUS_OPERATOR,
1057 term1,
1058 term2,
1059 expr->loc
1061 sum = reduce_expression(sum);
1062 /* Replace dotted expression by sum */
1063 astnode_replace(expr, sum);
1064 astnode_finalize(expr);
1065 return sum;
1069 * Reduces MASK operation to a field mask.
1070 * @param mask A node of type MASK_NODE
1072 static astnode *reduce_mask(astnode *mask)
1074 symtab_entry *ns;
1075 symtab_entry *sym;
1076 astnode *c;
1077 astnode *namespace;
1078 astnode *symbol;
1079 astnode *expr;
1080 /* Child is a scope node, record::field */
1081 expr = LHS(mask);
1082 /* Look up the namespace */
1083 namespace = LHS(expr);
1084 ns = symtab_lookup(namespace->ident);
1085 if (ns != NULL) {
1086 /* Make sure it's a record */
1087 if (ns->type != RECORD_SYMBOL) {
1088 err(expr->loc, "`%s' is not a record");
1089 /* Replace by 0 */
1090 c = astnode_create_integer(0, expr->loc);
1091 astnode_replace(mask, c);
1092 astnode_finalize(mask);
1093 expr = c;
1095 else {
1096 /* Look up the local symbol */
1097 symtab_push(ns->symtab);
1098 symbol = RHS(expr);
1099 sym = symtab_lookup(symbol->ident);
1100 if (sym != NULL) {
1101 /* Calculate field mask */
1102 // mask = ((1 << width) - 1) << offset
1103 c = astnode_create_arithmetic(
1104 SHL_OPERATOR,
1105 astnode_create_arithmetic(
1106 MINUS_OPERATOR,
1107 astnode_create_arithmetic(
1108 SHL_OPERATOR,
1109 astnode_create_integer(1, expr->loc),
1110 astnode_clone(sym->field.size, expr->loc),
1111 expr->loc
1113 astnode_create_integer(1, expr->loc),
1114 expr->loc
1116 astnode_clone(sym->field.offset, expr->loc),
1117 expr->loc
1119 c = reduce_expression(c);
1120 astnode_replace(mask, c);
1121 astnode_finalize(mask);
1122 expr = c;
1124 symtab_pop();
1127 return expr;
1131 * Reduces identifier[expression] to identifier + sizeof(identifier type) * expression
1133 static astnode *reduce_index(astnode *expr)
1135 symtab_entry *e;
1136 astnode *c;
1137 astnode *type;
1138 astnode *id;
1139 astnode *index;
1140 id = LHS(expr);
1141 index = reduce_expression(RHS(expr));
1142 /* Lookup identifier */
1143 e = symtab_lookup(id->ident);
1144 /* Get its datatype */
1145 type = LHS(e->def);
1146 /* Create expression: identifier + sizeof(datatype) * index */
1147 c = astnode_create_arithmetic(
1148 PLUS_OPERATOR,
1149 astnode_clone(id, id->loc),
1150 astnode_create_arithmetic(
1151 MUL_OPERATOR,
1152 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1153 astnode_clone(index, index->loc),
1154 index->loc
1156 expr->loc
1158 /* Replace index expression */
1159 astnode_replace(expr, c);
1160 astnode_finalize(expr);
1161 /* Return the new expression */
1162 return c;
1166 * Substitutes all identifiers that represent EQU defines with their
1167 * corresponding expression.
1168 * @param expr The expression whose defines to substitute
1170 static astnode *substitute_defines(astnode *expr)
1172 switch (astnode_get_type(expr)) {
1173 case ARITHMETIC_NODE:
1174 substitute_defines(LHS(expr));
1175 substitute_defines(RHS(expr));
1176 break;
1178 case IDENTIFIER_NODE:
1179 expr = substitute_ident(expr);
1180 break;
1182 case SIZEOF_NODE:
1183 expr = reduce_sizeof(expr);
1184 break;
1186 case MASK_NODE:
1187 expr = reduce_mask(expr);
1188 break;
1190 case INDEX_NODE:
1191 substitute_defines(LHS(expr));
1192 substitute_defines(RHS(expr));
1193 break;
1195 case DOT_NODE:
1196 substitute_defines(LHS(expr));
1197 substitute_defines(RHS(expr));
1198 break;
1200 default:
1201 /* Nada */
1202 break;
1204 return expr;
1210 static astnode *reduce_highlevel_constructs(astnode *expr)
1212 switch (astnode_get_type(expr)) {
1213 case ARITHMETIC_NODE:
1214 reduce_highlevel_constructs(LHS(expr));
1215 reduce_highlevel_constructs(RHS(expr));
1216 break;
1218 case SCOPE_NODE:
1219 expr = reduce_scope(expr);
1220 break;
1222 case DOT_NODE:
1223 expr = reduce_dot(expr);
1224 break;
1226 case INDEX_NODE:
1227 expr = reduce_index(expr);
1228 break;
1230 default:
1231 /* Nada */
1232 break;
1234 return expr;
1238 * Really reduces an expression.
1239 * @param expr Expression to attempt to reduce
1241 static astnode *reduce_expression_complete(astnode *expr)
1243 return astproc_fold_constants( reduce_highlevel_constructs( substitute_defines(expr) ) );
1247 * Reduces an expression.
1248 * It does two things:
1249 * 1. Substitute all equates by their value
1250 * 2. Folds constants in the resulting expression
1251 * If the expression is reduced, the original expression is replaced by the
1252 * new one, the original is finalized, and a pointer to the new expression
1253 * is returned.
1254 * If the expression is not reduced, the original pointer is returned.
1256 static astnode *reduce_expression(astnode *expr)
1258 return astproc_fold_constants( substitute_defines(expr) );
1262 * Reduces RECORD instance to a single byte (DB statement).
1263 * @param r Record's symbol table entry
1264 * @param expr Record initializer
1265 * @param flat List on which to append the reduced form
1267 static void reduce_record(symtab_entry *r, astnode *init, astnode *flat)
1269 ordered_field_list *list;
1270 symtab_entry *e;
1271 astnode *val;
1272 astnode *term;
1273 astnode *result;
1274 astnode *mask;
1275 astnode *repl;
1276 /* Validate initializer */
1277 if (!astnode_is_type(init, STRUC_NODE)) {
1278 err(init->loc, "record initializer expected");
1279 return;
1281 /* Go through fields */
1282 symtab_push(r->symtab);
1283 result = astnode_create_integer(0, init->loc);
1284 for (val = init->first_child, list = r->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1285 if (astnode_is_type(val, NULL_NODE)) {
1286 continue;
1288 if (astnode_is_type(val, STRUC_NODE)) {
1289 err(init->loc, "record field initializer expected");
1290 continue;
1292 /* Get field definition */
1293 e = list->entry;
1294 /* Calculate field mask */
1295 // mask = ((1 << width) - 1) << offset
1296 mask = astnode_create_arithmetic(
1297 SHL_OPERATOR,
1298 astnode_create_arithmetic(
1299 MINUS_OPERATOR,
1300 astnode_create_arithmetic(
1301 SHL_OPERATOR,
1302 astnode_create_integer(1, val->loc),
1303 astnode_clone(e->field.size, val->loc),
1304 val->loc
1306 astnode_create_integer(1, val->loc),
1307 val->loc
1309 astnode_clone(e->field.offset, val->loc),
1310 val->loc
1312 /* Shift val left e->field.offset bits, AND with mask */
1313 term = astnode_create_arithmetic(
1314 AND_OPERATOR,
1315 astnode_create_arithmetic(
1316 SHL_OPERATOR,
1317 astnode_clone(val, val->loc),
1318 astnode_clone(e->field.offset, val->loc),
1319 val->loc
1321 mask,
1322 val->loc
1324 /* OR the value with the result so far */
1325 result = astnode_create_arithmetic(
1326 OR_OPERATOR,
1327 result,
1328 term,
1329 val->loc
1331 result = reduce_expression(result);
1333 /* Determine reason for stopping loop */
1334 if (val != NULL) {
1335 err(init->loc, "too many field initializers");
1337 /* Make byte data node (packed record value) */
1338 repl = astnode_create_data(
1339 astnode_create_datatype(BYTE_DATATYPE, NULL, init->loc),
1340 result,
1341 init->loc
1343 /* Add to list */
1344 astnode_add_child(flat, repl);
1345 /* Restore old symbol table */
1346 symtab_pop();
1350 * Reduces ENUM instance to DB.
1351 * @param e Enumeration's symbol table entry
1352 * @param expr Expression
1353 * @param flat List on which to append the reduced form
1355 static void reduce_enum(symtab_entry *e, astnode *expr, astnode *list)
1357 symtab_entry *sym;
1358 astnode *repl;
1359 if (!astnode_is_type(expr, IDENTIFIER_NODE)) {
1360 err(expr->loc, "identifier expected");
1362 else {
1363 /* Look up the enumeration symbol */
1364 symtab_push(e->symtab);
1365 sym = symtab_lookup(expr->ident);
1366 symtab_pop();
1367 /* Make byte data node (symbol value) */
1368 repl = astnode_create_data(
1369 astnode_create_datatype(BYTE_DATATYPE, NULL, expr->loc),
1370 astnode_clone(sym->def, expr->loc),
1371 expr->loc
1373 /* Add to list */
1374 astnode_add_child(list, repl);
1378 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat);
1381 * Flattens a union initializer to a sequence of native data values.
1382 * Verify similar to flattening of structure, but only single field allowed.
1383 * @param s Union's symbol table definition
1384 * @param init Union initializer
1385 * @param flat List on which to append the flattened form
1387 static void flatten_union_recursive(symtab_entry *s, astnode *init, astnode *flat)
1389 astnode *fill;
1390 astnode *type;
1391 astnode *count;
1392 symtab_entry *e;
1393 symtab_entry *t;
1394 astnode *val;
1395 astnode *valvals;
1396 astnode *temp;
1397 ordered_field_list *list;
1398 int num;
1399 /* Validate initializer */
1400 if (!astnode_is_type(init, STRUC_NODE)) {
1401 err(init->loc, "union initializer expected");
1402 return;
1404 /* Go through fields */
1405 symtab_push(s->symtab);
1406 fill = astnode_clone(s->struc.size, flat->loc);
1407 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1408 if (astnode_is_type(val, NULL_NODE)) {
1409 continue;
1411 if (!astnode_equal(fill, s->struc.size)) {
1412 err(init->loc, "only one field of union can be initialized");
1413 continue;
1415 /* Get field definition */
1416 e = list->entry;
1417 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1418 type = LHS(e->def);
1419 count = RHS(e->def);
1420 /* Decide what to do based on field type and value */
1421 switch (type->datatype) {
1422 case BYTE_DATATYPE:
1423 case CHAR_DATATYPE:
1424 case WORD_DATATYPE:
1425 case DWORD_DATATYPE:
1426 if (astnode_is_type(val, STRUC_NODE)) {
1427 /* Handle multi-value array */
1428 temp = astnode_clone(val, val->loc);
1429 valvals = astnode_remove_children(temp);
1430 astnode_finalize(temp);
1431 astnode_add_child(flat,
1432 astnode_create_data(
1433 astnode_create_datatype(type->datatype, NULL, type->loc),
1434 valvals,
1435 val->loc
1438 num = astnode_get_child_count(val);
1439 } else {
1440 /* Output single value */
1441 astnode_add_child(flat,
1442 astnode_create_data(
1443 astnode_create_datatype(type->datatype, NULL, type->loc),
1444 astnode_clone(val, val->loc),
1445 val->loc
1448 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1450 if (num > count->integer) {
1451 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1453 /* Fill in remainder of field if necessary: count - 1 */
1454 else if (count->integer > num) {
1455 astnode_add_child(flat,
1456 astnode_create_storage(
1457 astnode_create_datatype(type->datatype, NULL, type->loc),
1458 astproc_fold_constants(
1459 astnode_create_arithmetic(
1460 MINUS_OPERATOR,
1461 astnode_clone(count, count->loc),
1462 astnode_create_integer(num, flat->loc),
1463 count->loc
1466 val->loc
1470 break;
1472 case USER_DATATYPE:
1473 /* Look up user type definition */
1474 t = symtab_global_lookup(LHS(type)->ident);
1475 switch (t->type) {
1476 case STRUC_SYMBOL:
1477 flatten_struc_recursive(t, val, flat);
1478 break;
1480 case UNION_SYMBOL:
1481 flatten_union_recursive(t, val, flat);
1482 break;
1484 case RECORD_SYMBOL:
1485 reduce_record(t, val, flat);
1486 break;
1488 case ENUM_SYMBOL:
1489 reduce_enum(t, val, flat);
1490 break;
1492 default:
1493 break;
1495 break;
1497 /* Decrease fill amount according to field size */
1498 fill = astproc_fold_constants(
1499 astnode_create_arithmetic(
1500 MINUS_OPERATOR,
1501 fill,
1502 astnode_clone(e->field.size, flat->loc),
1503 flat->loc
1507 /* Determine reason for stopping loop */
1508 if (val != NULL) {
1509 err(init->loc, "too many field initializers");
1511 if (fill->integer > 0) {
1512 /* Fill remainder of union with zeroes */
1513 astnode_add_child(flat,
1514 astnode_create_storage(
1515 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1516 fill,
1517 flat->loc
1521 symtab_pop();
1525 * Flattens a structure initializer to a sequence of native data values.
1526 * @param s Structure's symbol table definition
1527 * @param init Structure initializer
1528 * @param flat List on which to append the flattened form
1530 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat)
1532 astnode *fill;
1533 astnode *type;
1534 astnode *count;
1535 astnode *temp;
1536 symtab_entry *e;
1537 symtab_entry *t;
1538 astnode *val;
1539 astnode *valvals;
1540 ordered_field_list *list;
1541 int num;
1542 /* Validate initializer */
1543 if (!astnode_is_type(init, STRUC_NODE)) {
1544 err(init->loc, "structure initializer expected");
1545 return;
1547 /* Go through fields */
1548 symtab_push(s->symtab);
1549 fill = astnode_clone(s->struc.size, flat->loc);
1550 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1551 /* Get field definition */
1552 e = list->entry;
1553 /* Check if normal field or anonymous union */
1554 if (e->type == UNION_SYMBOL) {
1555 if (astnode_is_type(val, NULL_NODE)) {
1556 /* Output union size bytes to fill in field */
1557 astnode_add_child(flat,
1558 astnode_create_storage(
1559 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1560 astnode_clone(e->struc.size, val->loc),
1561 val->loc
1564 } else {
1565 flatten_union_recursive(e, val, flat);
1566 /* Decrease fill amount according to union size */
1567 fill = astproc_fold_constants(
1568 astnode_create_arithmetic(
1569 MINUS_OPERATOR,
1570 fill,
1571 astnode_clone(e->struc.size, flat->loc),
1572 flat->loc
1576 } else {
1577 /* VAR_SYMBOL */
1578 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1579 type = LHS(e->def);
1580 count = RHS(e->def);
1581 /* Decide what to do based on field type and value */
1582 switch (type->datatype) {
1583 case BYTE_DATATYPE:
1584 case CHAR_DATATYPE:
1585 case WORD_DATATYPE:
1586 case DWORD_DATATYPE:
1587 if (astnode_is_type(val, NULL_NODE)) {
1588 /* Output field_size bytes to fill in field */
1589 astnode_add_child(flat,
1590 astnode_create_storage(
1591 astnode_create_datatype(type->datatype, NULL, type->loc),
1592 astnode_clone(count, count->loc),
1593 val->loc
1596 } else {
1597 if (astnode_is_type(val, STRUC_NODE)) {
1598 /* Handle multi-value array */
1599 temp = astnode_clone(val, val->loc);
1600 valvals = astnode_remove_children(temp);
1601 astnode_finalize(temp);
1602 astnode_add_child(flat,
1603 astnode_create_data(
1604 astnode_create_datatype(type->datatype, NULL, type->loc),
1605 valvals,
1606 val->loc
1609 num = astnode_get_child_count(val);
1610 } else {
1611 /* Output single value */
1612 astnode_add_child(flat,
1613 astnode_create_data(
1614 astnode_create_datatype(type->datatype, NULL, type->loc),
1615 astnode_clone(val, val->loc),
1616 val->loc
1619 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1621 if (astnode_is_type(count, INTEGER_NODE) && (count->integer < num)) {
1622 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1624 /* Fill in remainder of field if necessary: count - 1 */
1625 else if ( (astnode_is_type(count, INTEGER_NODE) && (count->integer > num))
1626 || !astnode_is_type(count, INTEGER_NODE) ) {
1627 astnode_add_child(flat,
1628 astnode_create_storage(
1629 astnode_create_datatype(type->datatype, NULL, flat->loc),
1630 astproc_fold_constants(
1631 astnode_create_arithmetic(
1632 MINUS_OPERATOR,
1633 astnode_clone(count, flat->loc),
1634 astnode_create_integer(num, flat->loc),
1635 flat->loc
1638 flat->loc
1643 break;
1645 case USER_DATATYPE:
1646 /* Look up user type definition */
1647 t = symtab_global_lookup(LHS(type)->ident);
1648 if (astnode_is_type(val, NULL_NODE)) {
1649 /* Output sizeof(type) bytes to fill in */
1650 astnode_add_child(flat,
1651 astnode_create_storage(
1652 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1653 astnode_clone(t->struc.size, val->loc),
1654 val->loc
1657 } else {
1658 switch (t->type) {
1659 case STRUC_SYMBOL:
1660 flatten_struc_recursive(t, val, flat);
1661 break;
1663 case UNION_SYMBOL:
1664 flatten_union_recursive(t, val, flat);
1665 break;
1667 case RECORD_SYMBOL:
1668 reduce_record(t, val, flat);
1669 break;
1671 case ENUM_SYMBOL:
1672 reduce_enum(t, val, flat);
1673 break;
1675 default:
1676 break;
1679 break;
1681 /* Decrease fill amount according to field size */
1682 fill = astproc_fold_constants(
1683 astnode_create_arithmetic(
1684 MINUS_OPERATOR,
1685 fill,
1686 astnode_clone(e->field.size, flat->loc),
1687 flat->loc
1692 /* Determine reason for stopping loop */
1693 if (val != NULL) {
1694 err(init->loc, "too many field initializers");
1696 else if (list != NULL) {
1697 /* All fields not initialized; fill remainder of struc with zeroes */
1698 astnode_add_child(flat,
1699 astnode_create_storage(
1700 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1701 fill,
1702 flat->loc
1706 symtab_pop();
1710 * Converts data that is expressed in a high-level form (such as structure initializers)
1711 * to a simple sequence of bytes.
1712 * @param n The source node to flatten
1713 * @param type The type of data that n is an instance of
1714 * @param list List on which to append the resulting sequence of items (bytes/words/dwords)
1716 static void flatten_user_data(astnode *n, astnode *type, astnode *list)
1718 symtab_entry *def;
1719 /* Look up type definition */
1720 def = symtab_global_lookup(LHS(type)->ident);
1721 if (def != NULL) {
1722 switch (def->type) {
1723 case STRUC_SYMBOL:
1724 /* Flatten structure initializer to series of simple data statements */
1725 flatten_struc_recursive(def, n, list);
1726 break;
1728 case UNION_SYMBOL:
1729 /* Flatten union initializer to series of simple data statements */
1730 flatten_union_recursive(def, n, list);
1731 break;
1733 case RECORD_SYMBOL:
1734 reduce_record(def, n, list);
1735 break;
1737 case ENUM_SYMBOL:
1738 reduce_enum(def, n, list);
1739 break;
1741 default:
1742 break;
1747 /*---------------------------------------------------------------------------*/
1750 * Loads the character map specified by the node.
1751 * @param n Node of type CHARMAP_NODE
1753 static int load_charmap(astnode *n, void *arg, astnode **next)
1755 /* TODO: should probably be done in the parsing phase (same path resolution as for INCSRC and INCBIN) */
1756 astnode *file;
1757 /* Get file descriptor */
1758 file = astnode_get_child(n, 0);
1759 /* Try to load the charmap */
1760 if (charmap_parse(file->file_path, charmap) == 0) {
1761 err(n->loc, "could not open `%s' for reading", file->file_path);
1763 return 0;
1767 * First-time processing of instruction node.
1768 * @param n Node of type INSTRUCTION_NODE
1769 * @param arg Not used
1771 static int process_instruction(astnode *n, void *arg, astnode **next)
1773 astnode *expr;
1774 if (in_dataseg) {
1775 err(n->loc, "instructions not allowed in data segment");
1776 /* Remove from AST */
1777 astnode_remove(n);
1778 astnode_finalize(n);
1779 return 0;
1781 else {
1782 /* The instruction operand */
1783 expr = astnode_get_child(n, 0);
1784 /* Substitute defines and fold constants */
1785 reduce_expression(expr);
1786 return 1;
1791 * First-time processing of data node.
1792 * @param n Node of type DATA_NODE
1793 * @param arg Not used
1795 static int process_data(astnode *n, void *arg, astnode **next)
1797 int j;
1798 int k;
1799 astnode *type;
1800 astnode *expr;
1801 astnode *list;
1802 astnode *stmts;
1803 int ret = 1;
1804 type = astnode_get_child(n, 0); /* DATATYPE_NODE */
1805 if (in_dataseg) {
1806 err(n->loc, "value not allowed in data segment");
1807 /* Replace with storage node */
1808 astnode_replace(
1810 astnode_create_storage(
1811 astnode_create_datatype(BYTE_DATATYPE, NULL, n->loc),
1812 astnode_create_integer(1, n->loc),
1813 n->loc
1816 astnode_finalize(n);
1817 return 0;
1819 if (type->datatype == USER_DATATYPE) {
1820 /* Make sure the type exists */
1821 if (symtab_global_lookup(LHS(type)->ident) == NULL) {
1822 err(n->loc, "unknown type `%s'", LHS(type)->ident);
1823 /* Remove from AST */
1824 astnode_remove(n);
1825 astnode_finalize(n);
1826 return 0;
1827 } else {
1828 /* Attempt to reduce user data to native data */
1829 list = astnode_create(LIST_NODE, n->loc);
1830 for (expr = type->next_sibling; expr != NULL; expr = expr->next_sibling) {
1831 flatten_user_data(expr, type, list);
1833 /* Replace initializers with generated list */
1834 stmts = astnode_remove_children(list);
1835 astnode_replace(n, stmts);
1836 astnode_finalize(n);
1837 astnode_finalize(list);
1838 *next = stmts;
1839 ret = 0;
1842 /* Go through the list of data values, replacing defines and folding constants */
1843 for (j=1; j<astnode_get_child_count(n); j++) {
1844 expr = astnode_get_child(n, j);
1845 /* Substitute defines and fold constants */
1846 expr = reduce_expression(expr);
1847 /* If it's a string, replace by array of integers */
1848 /* (makes it easier to process later... favour regularity) */
1849 if (astnode_is_type(expr, STRING_NODE)) {
1850 astnode_remove_child(n, expr); /* Remove string */
1851 for (k=strlen(expr->string)-1; k>=0; k--) {
1852 /* Check if we should map character from custom charmap */
1853 if (type->datatype == CHAR_DATATYPE) {
1854 expr->string[k] = charmap[(unsigned)expr->string[k]];
1856 /* Append character value to array */
1857 astnode_insert_child(n, astnode_create_integer((unsigned char)expr->string[k], n->loc), j);
1859 if (type->datatype == CHAR_DATATYPE) {
1860 /* It's normal byte array now */
1861 type->datatype = BYTE_DATATYPE;
1863 j += strlen(expr->string)-1;
1864 astnode_finalize(expr);
1867 return ret;
1871 * First-time processing of storage node.
1872 * @param n Node of type STORAGE_NODE
1873 * @param arg Not used
1875 static int process_storage(astnode *n, void *arg, astnode **next)
1877 int item_size;
1878 astnode *type;
1879 astnode *expr;
1880 astnode *new_expr;
1881 type = LHS(n);
1882 expr = RHS(n);
1883 /* If not BYTE_DATATYPE, multiply by word/dword-size */
1884 switch (type->datatype) {
1885 case BYTE_DATATYPE:
1886 case CHAR_DATATYPE: item_size = 1; break;
1887 case WORD_DATATYPE: item_size = 2; break;
1888 case DWORD_DATATYPE: item_size = 4; break;
1889 default: item_size = 1; break; // ### Hmmm...
1891 if (item_size != 1) {
1892 new_expr = astnode_create_arithmetic(
1893 MUL_OPERATOR,
1894 astnode_clone(expr, expr->loc),
1895 astnode_create_integer(item_size, expr->loc),
1896 expr->loc
1898 astnode_replace(expr, new_expr);
1899 astnode_finalize(expr);
1900 expr = new_expr;
1901 type->datatype = BYTE_DATATYPE;
1903 /* Substitute defines and fold constants */
1904 expr = reduce_expression(expr);
1905 // TODO: Validate range somewhere else than here please... ???
1906 if (astnode_is_type(expr, INTEGER_NODE)) {
1907 if ((expr->integer <= 0) || (expr->integer >= 0x10000)) {
1908 err(n->loc, "operand out of range");
1911 return 1;
1915 * Process EQU node.
1916 * @param n Node of type EQU_NODE
1917 * @param arg Not used
1919 static int process_equ(astnode *n, void *arg, astnode **next)
1921 symtab_entry *e;
1922 astnode *id;
1923 astnode *expr;
1924 /* The expression which describes the value */
1925 expr = astnode_clone(astnode_get_child(n, 1), n->loc);
1926 /* Substitute defines and fold constants */
1927 expr = reduce_expression(expr);
1928 /* The identifier which is being defined */
1929 id = astnode_get_child(n, 0);
1930 /* Look up in symbol table */
1931 e = symtab_lookup(id->ident);
1932 if (e == NULL) {
1933 /* Symbol is being defined */
1934 // TODO: Check that expression is a constant?
1935 /* Enter it in symbol table */
1936 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, 0);
1937 } else {
1938 /* Symbol is being redefined */
1939 /* This is not allowed for EQU equate! */
1940 if (!astnode_equal((astnode *)(e->def), expr)) {
1941 warn(n->loc, "redefinition of `%s' is not identical; ignored", id->ident);
1944 /* Remove the equate node from the tree. */
1945 astnode_remove(n);
1946 astnode_finalize(n);
1947 return 0;
1951 * Process '=' node.
1952 * @param n Node of type ASSIGN_NODE
1953 * @param arg Not used
1955 static int process_assign(astnode *n, void *arg, astnode **next)
1957 symtab_entry *e;
1958 astnode *id;
1959 astnode *expr;
1960 /* If it's part of ENUM declaration, don't touch */
1961 if (astnode_has_ancestor_of_type(n, ENUM_DECL_NODE)) {
1962 return 0;
1964 /* Very similar to EQU, except symbol 1) can be
1965 redefined and 2) is volatile (see end of proc) */
1966 /* The expression which describes the value */
1967 expr = astnode_clone(astnode_get_child(n, 1), n->loc);
1968 /* Substitute defines and fold constants */
1969 expr = reduce_expression(expr);
1970 /* The identifier which is being (re)defined */
1971 id = astnode_get_child(n, 0);
1972 /* Look up in symbol table */
1973 e = symtab_lookup(id->ident);
1974 if (e == NULL) {
1975 /* Symbol is being defined for the first time */
1976 /* Note that the VOLATILE_FLAG is set */
1977 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, VOLATILE_FLAG);
1978 } else {
1979 /* Symbol is being redefined */
1980 /* This is OK for ASSIGN equate, simply replace definition */
1981 // ### store a list of definitions
1982 expr->loc = e->def->loc;
1983 e->def = expr;
1985 /* Remove the equate node from the tree. */
1986 astnode_remove(n);
1987 astnode_finalize(n);
1988 return 0;
1992 * Process IFDEF-node.
1993 * @param n Node of type IFDEF_NODE
1994 * @param arg Not used
1996 static int process_ifdef(astnode *n, void *arg, astnode **next)
1998 symtab_entry *e;
1999 astnode *id;
2000 astnode *stmts;
2001 /* The identifier which is being tested */
2002 id = astnode_get_child(n, 0);
2003 e = symtab_lookup(id->ident);
2004 if (e != NULL) {
2005 /* Symbol is defined. */
2006 /* Replace IFDEF node by the true-branch statement list */
2007 stmts = astnode_remove_children(astnode_get_child(n, 1));
2008 astnode_replace(n, stmts);
2009 *next = stmts;
2010 } else {
2011 /* Symbol is not defined. */
2012 /* Replace IFDEF node by the false-branch statement list (if any) */
2013 stmts = astnode_remove_children( astnode_get_child(n, 2));
2014 if (stmts != NULL) {
2015 astnode_replace(n, stmts);
2016 *next = stmts;
2017 } else {
2018 astnode_remove(n);
2021 /* Discard the original node */
2022 astnode_finalize(n);
2023 return 0;
2027 * Process IFNDEF-node.
2028 * @param n Node of type IFNDEF_NODE
2029 * @param arg Not used
2031 static int process_ifndef(astnode *n, void *arg, astnode **next)
2033 symtab_entry *e;
2034 astnode *id;
2035 astnode *stmts;
2036 /* The identifier which is being tested */
2037 id = astnode_get_child(n, 0);
2038 e = symtab_lookup(id->ident);
2039 if (e == NULL) {
2040 /* Symbol is not defined. */
2041 /* Replace IFNDEF node by the true-branch statement list */
2042 stmts = astnode_remove_children(astnode_get_child(n, 1));
2043 astnode_replace(n, stmts);
2044 *next = stmts;
2045 } else {
2046 /* Symbol is defined. */
2047 /* Replace IFNDEF node by the false-branch statement list, if any */
2048 stmts = astnode_remove_children(astnode_get_child(n, 2));
2049 if (stmts != NULL) {
2050 astnode_replace(n, stmts);
2051 *next = stmts;
2052 } else {
2053 astnode_remove(n);
2056 /* Discard the original node */
2057 astnode_finalize(n);
2058 return 0;
2062 * Process IF-node.
2063 * @param n Node of type IF_NODE
2064 * @param arg Not used
2066 static int process_if(astnode *n, void *arg, astnode **next)
2068 astnode *expr;
2069 astnode *stmts;
2070 astnode *c;
2071 int ret = 0;
2072 /* IF_NODE has a list of CASE, DEFAULT nodes as children */
2073 for (c = astnode_get_first_child(n); c != NULL; c = astnode_get_next_sibling(c) ) {
2074 if (astnode_is_type(c, CASE_NODE)) {
2075 /* The expression which is being tested */
2076 expr = astnode_get_child(c, 0);
2077 /* Try to reduce expression to literal */
2078 expr = reduce_expression(expr);
2079 /* Resulting expression must be an integer literal,
2080 since this is static evaluation.
2081 In other words, it can't contain label references.
2083 if (astnode_is_type(expr, INTEGER_NODE)) {
2084 /* Non-zero is true, zero is false */
2085 if (expr->integer) {
2086 /* Replace IF node by the true-branch statement list */
2087 stmts = astnode_remove_children( astnode_get_child(c, 1) );
2088 astnode_replace(n, stmts);
2089 astnode_finalize(n);
2090 *next = stmts;
2091 return ret;
2093 } else {
2094 /* Error, expression is not constant */
2095 err(expr->loc, "conditional expression does not evaluate to literal");
2097 } else { /* DEFAULT_NODE */
2098 /* Replace IF node by the false-branch statement list */
2099 stmts = astnode_remove_children(c);
2100 astnode_replace(n, stmts);
2101 astnode_finalize(n);
2102 *next = stmts;
2103 return ret;
2106 /* No match, remove IF node from AST */
2107 astnode_remove(n);
2108 astnode_finalize(n);
2109 return ret;
2113 * Process dataseg-node.
2114 * @param n Node of type DATASEG_NODE
2115 * @param arg Not used
2117 static int process_dataseg(astnode *n, void *arg, astnode **next)
2119 modifiers = n->modifiers;
2120 in_dataseg = 1; /* true */
2121 return 0;
2125 * Process codeseg-node.
2126 * @param n Node of type CODESEG_NODE
2127 * @param arg Not used
2129 static int process_codeseg(astnode *n, void *arg, astnode **next)
2131 modifiers = 0;
2132 in_dataseg = 0; /* false */
2133 return 0;
2137 * Process org-node.
2138 * @param n Node of type ORG_NODE
2139 * @param arg Not used
2141 static int process_org(astnode *n, void *arg, astnode **next)
2143 if (!xasm_args.pure_binary) {
2144 err(n->loc, "org directive can only be used when output format is pure 6502 binary");
2145 } else {
2146 astnode *addr = astnode_get_child(n, 0);
2147 addr = reduce_expression_complete(addr);
2148 if (astnode_is_type(addr, INTEGER_NODE)) {
2149 /* Range check */
2150 if ((addr->integer < 0) || (addr->integer >= 0x10000)) {
2151 err(n->loc, "org address out of 64K range");
2153 } else {
2154 err(n->loc, "org address does not evaluate to literal");
2155 /* Remove from AST */
2156 astnode_remove(n);
2157 astnode_finalize(n);
2160 return 0;
2164 * Process REPT node.
2165 * @param n Node of type REPT_NODE
2166 * @param arg Not used
2168 static int process_rept(astnode *n, void *arg, astnode **next)
2170 astnode *count;
2171 astnode *stmts;
2172 astnode *list;
2173 /* The repeat count */
2174 count = astnode_get_child(n, 0);
2175 /* Try to reduce count expression to literal */
2176 count = reduce_expression_complete(count);
2177 /* Resulting expression must be an integer literal,
2178 since this is static evaluation.
2180 if (astnode_is_type(count, INTEGER_NODE)) {
2181 if (count->integer < 0) {
2182 warn(n->loc, "REPT ignored; negative repeat count (%d)", count->integer);
2183 /* Remove from AST */
2184 astnode_remove(n);
2185 astnode_finalize(n);
2186 } else if (count->integer > 0) {
2187 /* Expand body <count> times */
2188 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2189 stmts = astnode_remove_children(list);
2190 astnode_finalize(list);
2191 while (--count->integer > 0) {
2192 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2193 astnode_add_sibling(stmts, astnode_remove_children(list) );
2194 astnode_finalize(list);
2196 astnode_replace(n, stmts);
2197 astnode_finalize(n);
2198 *next = stmts;
2199 } else {
2200 /* count == 0, remove from AST */
2201 astnode_remove(n);
2202 astnode_finalize(n);
2204 } else {
2205 err(n->loc, "repeat count does not evaluate to literal");
2206 /* Remove from AST */
2207 astnode_remove(n);
2208 astnode_finalize(n);
2210 return 0;
2214 * Process WHILE node.
2215 * @param n Node of type WHILE_NODE
2216 * @param arg Not used
2218 static int process_while(astnode *n, void *arg, astnode **next)
2220 astnode *expr;
2221 astnode *stmts;
2222 astnode *list;
2223 /* The boolean expression */
2224 expr = astnode_get_child(n, 0);
2225 /* Try to reduce expression to literal */
2226 expr = reduce_expression(astnode_clone(expr, expr->loc));
2227 /* Resulting expression must be an integer literal,
2228 since this is static evaluation.
2230 if (astnode_is_type(expr, INTEGER_NODE)) {
2231 /* Expand body if the expression is true */
2232 if (expr->integer) {
2233 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2234 stmts = astnode_remove_children(list);
2235 astnode_finalize(list);
2236 astnode_replace(n, stmts);
2237 astnode_add_sibling(stmts, n); /* Clever huh? */
2238 *next = stmts;
2239 } else {
2240 /* Remove WHILE node from AST */
2241 astnode_remove(n);
2242 astnode_finalize(n);
2244 } else {
2245 err(n->loc, "while expression does not evaluate to literal");
2246 /* Remove WHILE node from AST */
2247 astnode_remove(n);
2248 astnode_finalize(n);
2250 astnode_finalize(expr);
2251 return 0;
2254 /*---------------------------------------------------------------------------*/
2257 * Enters a macro into the symbol table.
2258 * @param n Must be a node of type MACRO_DECL_NODE
2259 * @param arg Not used
2261 static int enter_macro(astnode *n, void *arg, astnode **next)
2263 astnode *id = astnode_get_child(n, 0); /* Child 0 is macro identifier */
2264 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2265 if (symtab_enter(id->ident, MACRO_SYMBOL, n, 0) == NULL) {
2266 /* ### This could be allowed, you know... */
2267 err(n->loc, "duplicate symbol `%s'", id->ident);
2269 /* Remove from AST */
2270 astnode_remove(n);
2271 // ### n is not finalized???
2272 return 0;
2276 * Enters a label into the symbol table.
2277 * @param n Must be a node of type LABEL_NODE
2279 static int enter_label(astnode *n, void *arg, astnode **next)
2281 symtab_entry *e;
2282 astnode *addr;
2283 /* Make sure it's unique first */
2284 if (symtab_lookup(n->ident)) {
2285 err(n->loc, "duplicate symbol `%s'", n->ident);
2286 /* Remove from AST */
2287 astnode_remove(n);
2288 astnode_finalize(n);
2289 } else {
2290 /* Enter it! */
2291 e = symtab_enter(n->ident, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) | modifiers );
2292 /* Check if hardcoded address */
2293 addr = reduce_expression_complete(RHS(n));
2294 if (astnode_is_type(addr, INTEGER_NODE)) {
2295 /* Store it */
2296 e->address = addr->integer;
2297 e->flags |= ADDR_FLAG;
2298 } else if (!astnode_is_type(addr, CURRENT_PC_NODE)) {
2299 err(n->loc, "label address does not evaluate to literal");
2301 /* Increase namespace counter */
2302 label_count++;
2304 /* */
2305 return 0;
2309 * Enters a variable declaration in symbol table.
2310 * @param n Must be a node of type VAR_DECL_NODE
2312 static int enter_var(astnode *n, void *arg, astnode **next)
2314 astnode *id = LHS(n); /* Variable identifier */
2315 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2316 /* Make sure it's unique first */
2317 if (symtab_lookup(id->ident)) {
2318 err(n->loc, "duplicate symbol `%s'", id->ident);
2319 /* Remove from AST */
2320 astnode_remove(n);
2321 astnode_finalize(n);
2322 return 0;
2323 } else {
2324 /* Validate modifiers */
2325 if ((n->modifiers & ZEROPAGE_FLAG) && !in_dataseg) {
2326 warn(n->loc, "zeropage modifier has no effect in code segment");
2327 n->modifiers &= ~ZEROPAGE_FLAG;
2329 /* Enter it! */
2330 symtab_enter(id->ident, VAR_SYMBOL, astnode_clone(RHS(n), n->loc), (in_dataseg ? DATA_FLAG : 0) | n->modifiers | modifiers);
2331 /* */
2332 return 1;
2337 * Enters a procedure declaration in symbol table.
2338 * @param n Must be a node of type PROC_NODE
2340 static int enter_proc(astnode *n, void *arg, astnode **next)
2342 astnode *id;
2343 if (in_dataseg) {
2344 err(n->loc, "procedures not allowed in data segment");
2345 /* Remove from AST */
2346 astnode_remove(n);
2347 astnode_finalize(n);
2348 return 0;
2350 id = LHS(n); /* Procedure identifier */
2351 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2352 /* Make sure it's unique first */
2353 if (symtab_lookup(id->ident)) {
2354 err(n->loc, "duplicate symbol `%s'", id->ident);
2355 /* Remove from AST */
2356 astnode_remove(n);
2357 astnode_finalize(n);
2358 return 0;
2359 } else {
2360 /* Enter it! RHS(n) is the list of procedure statements */
2361 symtab_enter(id->ident, PROC_SYMBOL, RHS(n), (in_dataseg ? DATA_FLAG : 0) );
2362 /* Increase global namespace counter */
2363 label_count++;
2364 /* */
2365 return 1;
2370 * Enters a simple <identifier> <storage> structure member.
2371 * @param c Node of type VAR_DECL_NODE
2372 * @param offset Offset of this field
2373 * @param plist List of symbol table's entries
2374 * @param struc_id Structure identifier (for error messages)
2375 * @return New offset (old offset + size of this field)
2377 static astnode *enter_struc_atomic_field(astnode *c, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2379 astnode *field_id;
2380 astnode *field_data;
2381 astnode *field_size;
2382 symtab_entry *fe;
2383 /* c has two children: id and STORAGE_NODE */
2384 field_id = LHS(c);
2385 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2386 field_data = RHS(c);
2387 reduce_expression(RHS(field_data));
2388 /* Validate the declaration -- no data initialized */
2389 if (astnode_is_type(field_data, DATA_NODE)) {
2390 err(c->loc, "data initialization not allowed here");
2391 return(offset);
2393 /* Try to enter field in structure's symbol table */
2394 fe = symtab_enter(
2395 field_id->ident,
2396 VAR_SYMBOL,
2397 astnode_clone(field_data, field_data->loc),
2400 if (fe == NULL) {
2401 err(c->loc, "duplicate symbol `%s' in structure `%s'", field_id->ident, struc_id->ident);
2402 return(offset);
2404 /* Add to ordered list of fields */
2405 (**plist) = malloc(sizeof(ordered_field_list));
2406 (**plist)->entry = fe;
2407 (**plist)->next = NULL;
2408 *plist = &((**plist)->next);
2409 /* Set field offset */
2410 fe->field.offset = astnode_clone(offset, offset->loc);
2411 /* Calculate field size in bytes: sizeof(datatype) * count */
2412 field_size = astnode_create_arithmetic(
2413 MUL_OPERATOR,
2414 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2415 astnode_clone(RHS(field_data), field_data->loc),
2416 field_data->loc
2418 field_size = reduce_expression(field_size);
2419 /* Set field size */
2420 fe->field.size = astnode_clone(field_size, field_size->loc);
2421 /* Add field size to total offset */
2422 offset = astnode_create_arithmetic(
2423 PLUS_OPERATOR,
2424 offset,
2425 field_size,
2426 offset->loc
2428 offset = reduce_expression(offset);
2429 return(offset);
2432 static void enter_union_fields(symtab_entry *, astnode *);
2435 * Attempts to enter an (anonymous) union's members into structure's symbol table.
2436 * @param n Node of type UNION_DECL_NODE
2437 * @param offset Current parent structure offset
2438 * @param plist Ordered list of parent structure's fields
2440 astnode *enter_struc_union_field(astnode *n, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2442 ordered_field_list *ls;
2443 symtab_entry *se;
2444 symtab_entry *fe;
2445 static int id = 0;
2446 char id_str[16];
2447 astnode *union_id;
2448 union_id = LHS(n);
2449 if (astnode_is_type(union_id, IDENTIFIER_NODE)) {
2450 err(n->loc, "anonymous union expected");
2451 return(offset);
2453 /* Put UNION in symbol table */
2454 sprintf(id_str, "%d", id++);
2455 se = symtab_enter(id_str, UNION_SYMBOL, n, 0);
2456 enter_union_fields(se, n);
2457 /* Add to ordered list of fields */
2458 (**plist) = malloc(sizeof(ordered_field_list));
2459 (**plist)->entry = se;
2460 (**plist)->next = NULL;
2461 *plist = &((**plist)->next);
2462 /* Add to parent structure as well, with same offsets */
2463 for (ls = se->struc.fields; ls != NULL; ls = ls->next) {
2464 /* Try to enter field in structure's symbol table */
2465 fe = symtab_enter(
2466 ls->entry->id,
2467 VAR_SYMBOL,
2468 astnode_clone(ls->entry->def, ls->entry->def->loc),
2471 if (fe == NULL) {
2472 err(ls->entry->def->loc, "duplicate symbol `%s' in structure `%s'", ls->entry->id, struc_id->ident);
2473 continue;
2475 /* Set field offset */
2476 fe->field.offset = astnode_clone(offset, offset->loc);
2477 /* Set field size */
2478 fe->field.size = astnode_clone(se->struc.size, offset->loc);
2480 /* Advance offset by size of union */
2481 offset = astnode_create_arithmetic(
2482 PLUS_OPERATOR,
2483 offset,
2484 astnode_clone(se->struc.size, offset->loc),
2485 offset->loc
2487 offset = reduce_expression(offset);
2488 return(offset);
2492 * Enters struc type into symbol table based on AST node.
2493 * - Creates a symbol table for the structure
2494 * - Validates and enters all its fields
2495 * - Calculates offset of each field in the structure, and total size
2496 * @param n Node of type STRUC_DECL_NODE
2498 static int enter_struc(astnode *n, void *arg, astnode **next)
2500 ordered_field_list **plist;
2501 symtab_entry *se;
2502 astnode *c;
2503 astnode *offset;
2504 astnode *struc_id = LHS(n); /* Child 0 is struc identifier */
2505 /* Put STRUC in symbol table */
2506 se = symtab_enter(struc_id->ident, STRUC_SYMBOL, n, 0);
2507 if (se == NULL) {
2508 err(n->loc, "duplicate symbol `%s'", struc_id->ident);
2509 } else {
2510 /* Put the fields of the structure in local symbol table */
2511 se->symtab = symtab_create();
2512 offset = astnode_create_integer(0, n->loc); /* offset = 0 */
2513 plist = &se->struc.fields;
2514 for (c = struc_id->next_sibling; c != NULL; c = c->next_sibling) {
2515 /* Check if it's a field declaration */
2516 if (astnode_is_type(c, VAR_DECL_NODE)) {
2517 offset = enter_struc_atomic_field(c, offset, &plist, struc_id);
2519 /* Check if (anonymous) union */
2520 else if (astnode_is_type(c, UNION_DECL_NODE)) {
2521 offset = enter_struc_union_field(c, offset, &plist, struc_id);
2522 } else {
2523 err(c->loc, "field declaration expected");
2524 continue;
2527 /* Store total size of structure */
2528 se->struc.size = offset;
2529 /* Restore previous symbol table */
2530 symtab_pop();
2532 /* ### Remove STRUC node from AST */
2533 // astnode_remove(n);
2534 // astnode_finalize(n);
2535 return 0;
2539 * Enters fields of union into its symbol table.
2541 static void enter_union_fields(symtab_entry *se, astnode *n)
2543 ordered_field_list **plist;
2544 astnode *c;
2545 astnode *field_id;
2546 astnode *field_data;
2547 astnode *field_size;
2548 symtab_entry *fe;
2550 se->symtab = symtab_create();
2551 se->struc.size = astnode_create_integer(0, n->loc);
2552 plist = &se->struc.fields;
2553 /* Process field declarations */
2554 for (c = RHS(n); c != NULL; c = c->next_sibling) {
2555 /* Make sure it's a field declaration */
2556 if (!astnode_is_type(c, VAR_DECL_NODE)) {
2557 err(c->loc, "field declaration expected");
2558 continue;
2560 /* c has two children: id and STORAGE_NODE */
2561 field_id = LHS(c);
2562 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2563 field_data = RHS(c);
2564 reduce_expression(RHS(field_data));
2565 /* Validate the declaration -- no data initialized */
2566 if (astnode_is_type(field_data, DATA_NODE)) {
2567 err(c->loc, "data initialization not allowed here");
2568 continue;
2570 /* Calculate field size in bytes: sizeof(datatype) * count */
2571 field_size = astnode_create_arithmetic(
2572 MUL_OPERATOR,
2573 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2574 astnode_clone(RHS(field_data), field_data->loc),
2575 field_data->loc
2577 field_size = reduce_expression(field_size);
2578 /* Make sure field size is a constant */
2579 if (!astnode_is_type(field_size, INTEGER_NODE)) {
2580 err(c->loc, "union member must be of constant size");
2581 astnode_finalize(field_size);
2582 /* Use default size: 1 byte */
2583 field_size = astnode_create_integer(1, field_data->loc);
2585 /* Try to enter field in structure's symbol table */
2586 fe = symtab_enter(
2587 field_id->ident,
2588 VAR_SYMBOL,
2589 astnode_clone(field_data, field_data->loc),
2592 if (fe == NULL) {
2593 err(c->loc, "duplicate symbol `%s' in union `%s'", field_id->ident, se->id);
2594 astnode_finalize(field_size);
2595 continue;
2597 /* Add to ordered list of fields */
2598 (*plist) = malloc(sizeof(ordered_field_list));
2599 (*plist)->entry = fe;
2600 (*plist)->next = NULL;
2601 plist = &((*plist)->next);
2602 /* Set field offset (0 for all) and size */
2603 fe->field.offset = astnode_create_integer(0, n->loc);
2604 fe->field.size = astnode_clone(field_size, field_size->loc);
2605 /* See if field size of this member is largest so far */
2606 if (se->struc.size->integer < field_size->integer) {
2607 astnode_finalize(se->struc.size);
2608 se->struc.size = field_size;
2609 } else {
2610 astnode_finalize(field_size);
2613 symtab_pop();
2617 * Enters union type into symbol table based on AST node.
2618 * @param n Node of type UNION_DECL_NODE
2620 static int enter_union(astnode *n, void *arg, astnode **next)
2622 symtab_entry *se;
2623 astnode *union_id = astnode_get_child(n, 0); /* Child 0 is union identifier */
2624 /* Check for anonymous union */
2625 if (astnode_is_type(union_id, NULL_NODE)) {
2626 err(n->loc, "anonymous union not allowed in global scope");
2627 } else {
2628 /* Put UNION in symbol table */
2629 assert(astnode_get_type(union_id) == IDENTIFIER_NODE);
2630 se = symtab_enter(union_id->ident, UNION_SYMBOL, n, 0);
2631 if (se == NULL) {
2632 err(n->loc, "duplicate symbol `%s'", union_id->ident);
2633 } else {
2634 /* Put the fields of the union in local symbol table */
2635 enter_union_fields(se, n);
2638 /* ### Remove UNION node from AST */
2639 // astnode_remove(n);
2640 // astnode_finalize(n);
2641 return 0;
2645 * Enters enumerated type into symbol table based on AST node.
2646 * @param n Node of type ENUM_DECL_NODE
2648 static int enter_enum(astnode *n, void *arg, astnode **next)
2650 astnode *c;
2651 astnode *id;
2652 astnode *val;
2653 symtab_entry *se;
2654 astnode *enum_id = astnode_get_child(n, 0); /* Child 0 is enum identifier */
2655 /* Enter in global symbol table */
2656 assert(astnode_get_type(enum_id) == IDENTIFIER_NODE);
2657 se = symtab_enter(enum_id->ident, ENUM_SYMBOL, n, 0);
2658 if (se == NULL) {
2659 err(n->loc, "duplicate symbol `%s'", enum_id->ident);
2660 } else {
2661 /* Add all the enum symbols to its own symbol table */
2662 se->symtab = symtab_create();
2663 val = NULL;
2664 for (c = enum_id->next_sibling; c != NULL; c = c->next_sibling) {
2665 if (astnode_is_type(c, IDENTIFIER_NODE)) {
2666 id = c;
2667 if (val == NULL) {
2668 val = astnode_create_integer(0, c->loc);
2669 } else {
2670 val = astnode_create_integer(val->integer+1, c->loc);
2672 } else {
2673 id = LHS(c);
2674 val = reduce_expression_complete(astnode_clone(RHS(c), RHS(c)->loc));
2675 if (!astnode_is_type(val, INTEGER_NODE)) {
2676 err(c->loc, "initializer does not evaluate to integer literal");
2677 astnode_finalize(val);
2678 /* Use default value */
2679 val = astnode_create_integer(0, c->loc);
2682 if (symtab_enter(id->ident, CONSTANT_SYMBOL, val, 0) == NULL) {
2683 err(c->loc, "duplicate symbol `%s' in enumeration `%s'", id->ident, enum_id->ident);
2684 continue;
2687 symtab_pop();
2689 /* ### Remove ENUM node from AST */
2690 // astnode_remove(n);
2691 // astnode_finalize(n);
2692 return 0;
2696 * Enters record type into symbol table based on AST node.
2697 * @param n Node of type RECORD_DECL_NODE
2699 static int enter_record(astnode *n, void *arg, astnode **next)
2701 ordered_field_list **plist;
2702 astnode *c;
2703 astnode *field_id;
2704 astnode *field_width;
2705 int size;
2706 int offset;
2707 symtab_entry *se;
2708 symtab_entry *fe;
2709 astnode *record_id = astnode_get_child(n, 0); /* Child 0 is record identifier */
2710 assert(astnode_get_type(record_id) == IDENTIFIER_NODE);
2711 /* Enter in global symbol table */
2712 se = symtab_enter(record_id->ident, RECORD_SYMBOL, n, 0);
2713 if (se == NULL) {
2714 err(n->loc, "duplicate symbol `%s'", record_id->ident);
2716 else {
2717 /* Add all the record fields to record's own symbol table */
2718 se->symtab = symtab_create();
2719 offset = 8;
2720 plist = &se->struc.fields;
2721 for (c = record_id->next_sibling; c != NULL; c = c->next_sibling) {
2722 /* c has two children: field identifier and its width */
2723 field_id = LHS(c);
2724 field_width = astnode_clone(reduce_expression(RHS(c)), RHS(c)->loc);
2725 /* Validate the width -- must be positive integer literal */
2726 if (!astnode_is_type(field_width, INTEGER_NODE)) {
2727 err(c->loc, "record member `%s' is not of constant size", field_id->ident);
2728 continue;
2730 if ((field_width->integer <= 0) || (field_width->integer >= 8)) {
2731 err(c->loc, "width of record member `%s' is out of range (%d)", field_id->ident, field_width->integer);
2732 continue;
2734 /* Attempt to enter field in record's symbol table */
2735 fe = symtab_enter(field_id->ident, VAR_SYMBOL, c, 0);
2736 if (fe == NULL) {
2737 err(c->loc, "duplicate symbol `%s' in record `%s'", field_id->ident, record_id->ident);
2738 continue;
2740 /* Add to ordered list of fields */
2741 (*plist) = malloc(sizeof(ordered_field_list));
2742 (*plist)->entry = fe;
2743 (*plist)->next = NULL;
2744 plist = &((*plist)->next);
2745 /* Set field offset */
2746 offset = offset - field_width->integer;
2747 fe->field.offset = astnode_create_integer(offset, c->loc);
2748 /* Set field size (width) */
2749 fe->field.size = field_width;
2751 size = 8 - offset;
2752 if (size > 8) {
2753 err(n->loc, "size of record `%s' (%d) exceeds 8 bits", record_id->ident, size);
2754 } else {
2755 /* Set size of record (in bits) */
2756 se->struc.size = astnode_create_integer(size, n->loc);
2758 symtab_pop();
2760 /* ### Remove RECORD node from AST */
2761 // astnode_remove(n);
2762 // astnode_finalize(n);
2763 return 0;
2767 * Globalizes a local.
2768 * The node is morphed into its global equivalent (LABEL_NODE or IDENTIFIER_NODE).
2769 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
2770 * @param arg Pointer to namespace counter
2772 static int globalize_local(astnode *n, void *arg, astnode **next)
2774 char str[32];
2775 /* Make it global by appending namespace counter to the id */
2776 sprintf(str, "#%d", label_count);
2777 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
2778 /* Local label definition, use label field */
2779 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
2780 strcat(n->label, str);
2781 /* This node is now a unique, global label */
2782 n->type = LABEL_NODE;
2783 /* Make sure it's unique */
2784 if (symtab_lookup(n->label)) {
2785 err(n->loc, "duplicate symbol `%s'", n->label);
2786 /* Remove from AST */
2787 astnode_remove(n);
2788 astnode_finalize(n);
2789 return 0;
2790 } else {
2791 /* Enter it in symbol table */
2792 symtab_enter(n->label, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) );
2794 } else {
2795 /* Local label reference, use ident field */
2796 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2797 strcat(n->ident, str);
2798 /* This node is now a unique, global identifier */
2799 n->type = IDENTIFIER_NODE;
2801 return 1;
2805 * Tags symbols as extrn.
2806 * @param n A node of type EXTRN_NODE
2808 static int tag_extrn_symbols(astnode *n, void *arg, astnode **next)
2810 astnode *id;
2811 astnode *type;
2812 astnode *list;
2813 symtab_entry *e;
2814 /* Get symbol type specifier */
2815 type = astnode_get_child(n, 0);
2816 /* Go through the list of identifiers */
2817 list = astnode_get_child(n, 1);
2818 for (id=astnode_get_first_child(list); id != NULL; id=astnode_get_next_sibling(id) ) {
2819 /* Look up identifier in symbol table */
2820 e = symtab_lookup(id->ident);
2821 if (e != NULL) {
2822 if (!(e->flags & EXTRN_FLAG)) {
2823 /* Error, can't import a symbol that's defined locally! */
2824 // TODO: this is okay?
2825 err(n->loc, "`%s' declared as extrn but is defined locally", id->ident);
2828 else {
2829 // TODO: store external unit name
2830 switch (astnode_get_type(type)) {
2831 case DATATYPE_NODE:
2832 /* Put it in symbol table */
2833 symtab_enter(id->ident, VAR_SYMBOL, astnode_create_data(astnode_clone(type, n->loc), NULL, n->loc), EXTRN_FLAG);
2834 break;
2836 case INTEGER_NODE:
2837 /* type->integer is (LABEL|PROC)_SYMBOL */
2838 symtab_enter(id->ident, type->integer, NULL, EXTRN_FLAG);
2839 break;
2841 default:
2842 break;
2846 /* Remove extrn node from AST */
2847 astnode_remove(n);
2848 astnode_finalize(n);
2850 return 0;
2856 static int process_message(astnode *n, void *arg, astnode **next)
2858 astnode *mesg = reduce_expression_complete(LHS(n));
2859 if (astnode_is_type(mesg, STRING_NODE)) {
2860 printf("%s\n", mesg->string);
2862 else if (astnode_is_type(mesg, INTEGER_NODE)) {
2863 printf("%d\n", mesg->integer);
2865 else {
2866 err(mesg->loc, "string or integer argument expected");
2868 astnode_remove(n);
2869 astnode_finalize(n);
2870 return 0;
2876 static int process_warning(astnode *n, void *arg, astnode **next)
2878 astnode *mesg = reduce_expression_complete(LHS(n));
2879 if (astnode_is_type(mesg, STRING_NODE)) {
2880 warn(mesg->loc, mesg->string);
2882 else {
2883 err(mesg->loc, "string argument expected");
2885 astnode_remove(n);
2886 astnode_finalize(n);
2887 return 0;
2893 static int process_error(astnode *n, void *arg, astnode **next)
2895 astnode *mesg = reduce_expression_complete(LHS(n));
2896 if (astnode_is_type(mesg, STRING_NODE)) {
2897 err(mesg->loc, mesg->string);
2899 else {
2900 err(mesg->loc, "string argument expected");
2902 astnode_remove(n);
2903 astnode_finalize(n);
2904 return 0;
2908 * Processes a forward branch declaration.
2909 * @param n Node of type FORWARD_BRANCH_DECL_NODE
2910 * @param arg Not used
2912 static int process_forward_branch_decl(astnode *n, void *arg, astnode **next)
2914 astnode *l;
2915 int i;
2916 char str[32];
2917 /* Get branch info structure for label (+, ++, ...) */
2918 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2919 /* Morph n to globally unique label */
2920 sprintf(str, "#%d", fwd->counter);
2921 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2922 strcat(n->label, str);
2923 n->type = LABEL_NODE;
2924 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2925 /* Fix reference identifiers */
2926 for (i=0; i<fwd->index; i++) {
2927 l = fwd->refs[i];
2928 l->ident = (char *)realloc(l->ident, strlen(n->ident)+1);
2929 strcpy(l->ident, n->ident);
2931 /* Prepare for next declaration */
2932 fwd->index = 0;
2933 fwd->counter++;
2934 return 0;
2938 * Processes a backward branch declaration.
2939 * @param n Node of type BACKWARD_BRANCH_DECL_NODE
2940 * @param arg Not used
2942 static int process_backward_branch_decl(astnode *n, void *arg, astnode **next)
2944 char str[32];
2945 /* Get branch info */
2946 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2947 bwd->decl = n;
2948 /* Morph n to globally unique label */
2949 sprintf(str, "#%d", bwd->counter);
2950 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2951 strcat(n->label, str);
2952 n->type = LABEL_NODE;
2953 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2954 /* Prepare for next declaration */
2955 bwd->counter++;
2956 return 0;
2960 * Processes a forward branch label reference.
2961 * @param n Node of type FORWARD_BRANCH_NODE
2962 * @param arg Not used
2964 static int process_forward_branch(astnode *n, void *arg, astnode **next)
2966 /* Add n to proper forward_branch array */
2967 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2968 fwd->refs[fwd->index++] = n;
2969 /* Change to identifier node */
2970 n->type = IDENTIFIER_NODE;
2971 return 0;
2975 * Processes a backward branch label reference.
2976 * @param n Node of type BACKWARD_BRANCH_NODE
2977 * @param arg Not used
2979 static int process_backward_branch(astnode *n, void *arg, astnode **next)
2981 /* Get branch info */
2982 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2983 /* Make sure it's a valid reference */
2984 if (bwd->decl != NULL) {
2985 /* Fix n->ident */
2986 n->ident = (char *)realloc(n->ident, strlen(bwd->decl->ident)+1);
2987 strcpy(n->ident, bwd->decl->ident);
2989 /* Change to identifier node */
2990 n->type = IDENTIFIER_NODE;
2991 return 0;
2994 /*---------------------------------------------------------------------------*/
2996 static int is_field_ref(astnode *n)
2998 astnode *p = astnode_get_parent(n);
2999 /* Case 1: id.id */
3000 if (astnode_is_type(p, DOT_NODE)) return 1;
3001 /* Case 2: id.id[expr] */
3002 if (astnode_is_type(p, INDEX_NODE) && (n == LHS(p)) && astnode_is_type(astnode_get_parent(p), DOT_NODE) ) return 1;
3003 return 0;
3007 * Checks that the given identifier node is present in symbol table.
3008 * Issues error if it is not, and replaces with integer 0.
3009 * @param n A node of type IDENTIFIER_NODE
3011 static int validate_ref(astnode *n, void *arg, astnode **next)
3013 int i;
3014 symbol_ident_list list;
3015 symtab_entry *enum_def;
3016 int ret = 1;
3017 if (is_field_ref(n)) {
3018 return 1; /* Validated by validate_dotref() */
3020 /* Look it up in symbol table */
3021 symtab_entry * e = symtab_lookup(n->ident);
3022 if (e == NULL) {
3023 /* This identifier is unknown */
3024 /* Maybe it is part of an enumeration */
3025 symtab_list_type(ENUM_SYMBOL, &list);
3026 for (i=0; i<list.size; i++) {
3027 enum_def = symtab_lookup(list.idents[i]);
3028 symtab_push(enum_def->symtab);
3029 e = symtab_lookup(n->ident);
3030 symtab_pop();
3031 if (e != NULL) {
3032 /* Found it */
3033 /* Replace id by SCOPE_NODE */
3034 astnode *scope = astnode_create_scope(
3035 astnode_create_identifier(enum_def->id, n->loc),
3036 astnode_clone(n, n->loc), n->loc);
3037 astnode_replace(n, scope);
3038 astnode_finalize(n);
3039 *next = scope;
3040 ret = 0;
3041 break;
3044 symtab_list_finalize(&list);
3045 /* If still not found, error */
3046 if (e == NULL) {
3047 strtok(n->ident, "#"); /* Remove globalize junk */
3048 // err(n->loc, "unknown symbol `%s'", n->ident);
3049 /* ### Replace by integer 0 */
3050 //astnode_replace(n, astnode_create_integer(0, n->loc) );
3051 //astnode_finalize(n);
3052 warn(n->loc, "`%s' undeclared; assuming external label", n->ident);
3053 e = symtab_enter(n->ident, LABEL_SYMBOL, NULL, EXTRN_FLAG);
3056 assert(e);
3057 /* Increase reference count */
3058 e->ref_count++;
3059 return ret;
3063 * Validates top-level (not part of structure) indexed identifier.
3064 * @param n Node of type INDEX_NODE
3065 * @param arg Not used
3067 static int validate_index(astnode *n, void *arg, astnode **next)
3069 symtab_entry *e;
3070 astnode *id;
3071 astnode *type;
3072 if (is_field_ref(LHS(n))) {
3073 return 1; /* Validated by validate_dotref() */
3075 id = LHS(n);
3076 if (!astnode_is_type(id, IDENTIFIER_NODE)) {
3077 err(n->loc, "identifier expected");
3078 astnode_replace(n, astnode_create_integer(0, n->loc) );
3079 astnode_finalize(n);
3080 return 0;
3082 e = symtab_lookup(id->ident);
3083 if (e != NULL) {
3084 type = LHS(e->def);
3085 if (!astnode_is_type(type, DATATYPE_NODE)) {
3086 err(n->loc, "`%s' cannot be indexed", id->ident);
3087 astnode_replace(n, astnode_create_integer(0, n->loc) );
3088 astnode_finalize(n);
3089 return 0;
3090 } else {
3091 // TODO: bounds check
3092 reduce_index(n);
3094 } else {
3095 err(n->loc, "unknown symbol `%s'", id->ident);
3096 astnode_replace(n, astnode_create_integer(0, n->loc) );
3097 astnode_finalize(n);
3098 return 0;
3100 return 1;
3104 * Checks that A::B is valid.
3105 * If it's not valid it is replaced by integer 0.
3106 * @param n Node of type SCOPE_NODE
3108 static int validate_scoperef(astnode *n, void *arg, astnode **next)
3110 astnode *symbol;
3111 astnode *namespace = LHS(n);
3112 /* Look up namespace in global symbol table */
3113 symtab_entry * e = symtab_lookup(namespace->ident);
3114 if (e == NULL) {
3115 /* Error, this identifier is unknown */
3116 err(n->loc, "unknown namespace `%s'", namespace->ident);
3117 /* Replace by integer 0 */
3118 astnode_replace(n, astnode_create_integer(0, n->loc) );
3119 astnode_finalize(n);
3120 return 0;
3121 } else {
3122 /* Get symbol on right of :: operator */
3123 symbol = RHS(n);
3124 /* Namespace was found, check its type */
3125 switch (e->type) {
3126 case STRUC_SYMBOL:
3127 case UNION_SYMBOL:
3128 case RECORD_SYMBOL:
3129 case ENUM_SYMBOL:
3130 /* OK, check the symbol */
3131 symtab_push(e->symtab);
3132 e = symtab_lookup(symbol->ident);
3133 if (e == NULL) {
3134 /* Error, symbol is not in namespace */
3135 err(n->loc, "unknown symbol `%s' in namespace `%s'", symbol->ident, namespace->ident);
3136 /* Replace by integer 0 */
3137 astnode_replace(n, astnode_create_integer(0, n->loc) );
3138 astnode_finalize(n);
3140 symtab_pop();
3141 break;
3143 default:
3144 err(n->loc, "`%s' is not a namespace", namespace->ident);
3145 /* Replace by integer 0 */
3146 astnode_replace(n, astnode_create_integer(0, n->loc) );
3147 astnode_finalize(n);
3148 break;
3151 return 0;
3155 * Validates right part of dotted reference recursively.
3156 * Assumes that left part's symbol table is on stack.
3157 * @param n Node of type DOT_NODE
3159 static void validate_dotref_recursive(astnode *n, astnode *top)
3161 astnode *left;
3162 astnode *right;
3163 astnode *type;
3164 symtab_entry *field;
3165 symtab_entry *def;
3166 left = LHS(n);
3167 if (astnode_is_type(left, INDEX_NODE)) {
3168 left = LHS(left); /* Need identifier */
3170 right = RHS(n);
3171 if (astnode_is_type(right, DOT_NODE)) {
3172 right = LHS(right); /* Need identifier */
3174 if (astnode_is_type(right, INDEX_NODE)) {
3175 right = LHS(right); /* Need identifier */
3177 /* Lookup 'right' in 'left's symbol table */
3178 assert(astnode_get_type(right) == IDENTIFIER_NODE);
3179 field = symtab_lookup(right->ident);
3180 if (field == NULL) {
3181 /* Error, this symbol is unknown */
3182 err(n->loc, "`%s' is not a member of `%s'", right->ident, left->ident);
3183 /* Replace by integer 0 */
3184 astnode_replace(top, astnode_create_integer(0, top->loc) );
3185 astnode_finalize(top);
3186 } else {
3187 /* See if more subfields to process */
3188 n = RHS(n);
3189 if (astnode_is_type(n, DOT_NODE)) {
3190 /* Verify the variable's type -- should be user-defined */
3191 type = LHS(field->def);
3192 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3193 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3194 /* Replace by integer 0 */
3195 astnode_replace(top, astnode_create_integer(0, top->loc) );
3196 astnode_finalize(top);
3197 } else {
3198 /* Look up variable's type definition and verify it's a structure */
3199 def = symtab_global_lookup(LHS(type)->ident);
3200 if (def == NULL) {
3201 err(n->loc, "member '%s' of '%s' is of unknown type (`%s')", right->ident, left->ident, LHS(type)->ident);
3202 /* Replace by integer 0 */
3203 astnode_replace(top, astnode_create_integer(0, top->loc) );
3204 astnode_finalize(top);
3205 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3206 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3207 /* Replace by integer 0 */
3208 astnode_replace(top, astnode_create_integer(0, top->loc) );
3209 astnode_finalize(top);
3210 } else {
3211 /* Next field */
3212 symtab_push(def->symtab);
3213 validate_dotref_recursive(n, top);
3214 symtab_pop();
3222 * Validates A.B.C.D. . ...
3223 * Replaces the whole thing with integer 0 if not.
3224 * @param n Node of type DOT_NODE
3226 static int validate_dotref(astnode *n, void *arg, astnode **next)
3228 symtab_entry *father;
3229 symtab_entry *def;
3230 astnode *type;
3231 astnode *left;
3232 if (astnode_has_ancestor_of_type(n, DOT_NODE)) {
3233 return 1; /* Already validated, since this function is recursive */
3235 /* Look up parent in global symbol table */
3236 left = LHS(n); /* n := left . right */
3237 if (astnode_is_type(left, INDEX_NODE)) {
3238 left = LHS(left); /* Need identifier */
3240 father = symtab_lookup(left->ident);
3241 if (father == NULL) {
3242 /* Error, this symbol is unknown */
3243 err(n->loc, "unknown symbol `%s'", left->ident);
3244 /* Replace by integer 0 */
3245 astnode_replace(n, astnode_create_integer(0, n->loc) );
3246 astnode_finalize(n);
3247 return 0;
3248 } else {
3249 /* Increase reference count */
3250 father->ref_count++;
3251 /* Verify the variable's type -- should be user-defined */
3252 type = LHS(father->def);
3253 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3254 err(n->loc, "`%s' is not a structure", left->ident);
3255 /* Replace by integer 0 */
3256 astnode_replace(n, astnode_create_integer(0, n->loc) );
3257 astnode_finalize(n);
3258 return 0;
3259 } else {
3260 /* Look up variable's type definition and verify it's a structure */
3261 def = symtab_lookup(LHS(type)->ident);
3262 if (def == NULL) {
3263 err(n->loc, "'%s' is of unknown type (`%s')", left->ident, LHS(type)->ident);
3264 /* Replace by integer 0 */
3265 astnode_replace(n, astnode_create_integer(0, n->loc) );
3266 astnode_finalize(n);
3267 return 0;
3268 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3269 err(n->loc, "`%s' is not a structure", left->ident);
3270 /* Replace by integer 0 */
3271 astnode_replace(n, astnode_create_integer(0, n->loc) );
3272 astnode_finalize(n);
3273 return 0;
3274 } else {
3275 /* Verify fields recursively */
3276 symtab_push(def->symtab);
3277 validate_dotref_recursive(n, n);
3278 symtab_pop();
3282 return 1;
3285 /*---------------------------------------------------------------------------*/
3288 * Evaluates expressions involved in conditional assembly, and removes the
3289 * appropriate branches from the AST.
3290 * Does some other stuff too, such as substitute equates and fold constants.
3292 void astproc_first_pass(astnode *root)
3294 /* Table of callback functions for our purpose. */
3295 static astnodeprocmap map[] = {
3296 { LABEL_NODE, enter_label },
3297 { VAR_DECL_NODE, enter_var },
3298 { PROC_NODE, enter_proc },
3299 { STRUC_DECL_NODE, enter_struc },
3300 { UNION_DECL_NODE, enter_union },
3301 { ENUM_DECL_NODE, enter_enum },
3302 { RECORD_DECL_NODE, enter_record },
3303 { LOCAL_LABEL_NODE, globalize_local },
3304 { LOCAL_ID_NODE, globalize_local },
3305 { MACRO_DECL_NODE, enter_macro },
3306 { MACRO_NODE, expand_macro },
3307 { REPT_NODE, process_rept },
3308 { WHILE_NODE, process_while },
3309 { DATASEG_NODE, process_dataseg },
3310 { CODESEG_NODE, process_codeseg },
3311 { ORG_NODE, process_org },
3312 { CHARMAP_NODE, load_charmap },
3313 { INSTRUCTION_NODE, process_instruction },
3314 { DATA_NODE, process_data },
3315 { STORAGE_NODE, process_storage },
3316 { EQU_NODE, process_equ },
3317 { ASSIGN_NODE, process_assign },
3318 { IFDEF_NODE, process_ifdef },
3319 { IFNDEF_NODE, process_ifndef },
3320 { IF_NODE, process_if },
3321 { EXTRN_NODE, tag_extrn_symbols },
3322 { MESSAGE_NODE, process_message },
3323 { WARNING_NODE, process_warning },
3324 { ERROR_NODE, process_error },
3325 { FORWARD_BRANCH_DECL_NODE, process_forward_branch_decl },
3326 { BACKWARD_BRANCH_DECL_NODE, process_backward_branch_decl },
3327 { FORWARD_BRANCH_NODE, process_forward_branch },
3328 { BACKWARD_BRANCH_NODE, process_backward_branch },
3329 { 0, NULL }
3331 reset_charmap();
3332 branch_init();
3333 in_dataseg = 0; /* codeseg is default */
3334 /* Do the walk. */
3335 astproc_walk(root, NULL, map);
3336 /* Remove all the volatile constants from the symbol table */
3337 /* These are the ones defined with the '=' operator, whose identifiers should
3338 all have been replaced by their value in the syntax tree now. Since
3339 they're not referenced anywhere we can safely dispose of them.
3340 The EQUates on the other hand should be kept, since they will
3341 possibly be exported. */
3342 #ifdef ENABLE_BUGGY_THING // ### FIXME
3344 int i;
3345 symbol_ident_list list;
3346 symtab_entry *e;
3347 symtab_list_type(CONSTANT_SYMBOL, &list);
3348 for (i = 0; i < list.size; ++i) {
3349 e = symtab_lookup(list.idents[i]);
3350 if (e->flags & VOLATILE_FLAG) {
3351 symtab_remove(list.idents[i]);
3354 symtab_list_finalize(&list);
3356 #endif
3359 /*---------------------------------------------------------------------------*/
3362 * Tags labels as public.
3363 * @param n A node of type PUBLIC_NODE
3365 static int tag_public_symbols(astnode *n, void *arg, astnode **next)
3367 astnode *id;
3368 symtab_entry *e;
3369 /* Go through the list of identifiers */
3370 for (id=astnode_get_first_child(n); id != NULL; id = astnode_get_next_sibling(id) ) {
3371 /* Look up identifier in symbol table */
3372 e = symtab_lookup(id->ident);
3373 if (e != NULL) {
3374 /* Symbol exists. Set the proper flag unless ambiguous. */
3375 if (e->flags & EXTRN_FLAG) {
3376 err(n->loc, "`%s' already declared extrn", id->ident);
3377 } else {
3378 switch (e->type) {
3379 case LABEL_SYMBOL:
3380 case CONSTANT_SYMBOL:
3381 case VAR_SYMBOL:
3382 case PROC_SYMBOL:
3383 /* GO! */
3384 e->flags |= PUBLIC_FLAG;
3385 break;
3387 default:
3388 err(n->loc, "`%s' is of non-exportable type", id->ident);
3389 break;
3392 } else {
3393 /* Warning, can't export a symbol that's not defined. */
3394 warn(n->loc, "`%s' declared as public but is not defined", id->ident);
3397 /* Remove PUBLIC_NODE from AST */
3398 astnode_remove(n);
3399 astnode_finalize(n);
3401 return 0;
3405 * Sets alignment for a set of (data) labels.
3406 * @param n A node of type ALIGN_NODE
3408 static int tag_align_symbols(astnode *n, void *arg, astnode **next)
3410 int pow;
3411 astnode *id;
3412 astnode *idents;
3413 astnode *expr;
3414 symtab_entry *e;
3415 /* Go through the list of identifiers */
3416 idents = LHS(n);
3417 for (id=astnode_get_first_child(idents); id != NULL; id = astnode_get_next_sibling(id) ) {
3418 /* Look up identifier in symbol table */
3419 e = symtab_lookup(id->ident);
3420 if (e != NULL) {
3421 /* Symbol exists. Set the proper flag unless ambiguous. */
3422 if (!(e->flags & DATA_FLAG)) {
3423 err(n->loc, "cannot align a code symbol (`%s')", id->ident);
3424 } else {
3425 switch (e->type) {
3426 case LABEL_SYMBOL:
3427 case VAR_SYMBOL:
3428 expr = reduce_expression(RHS(n));
3429 if (!astnode_is_type(expr, INTEGER_NODE)) {
3430 err(n->loc, "alignment expression must be an integer literal");
3431 } else if ((expr->integer < 0) || (expr->integer >= 0x10000)) {
3432 err(n->loc, "alignment expression out of range");
3433 } else if (expr->integer > 1) {
3434 pow = 0;
3435 switch (expr->integer) {
3436 case 32768: pow++;
3437 case 16384: pow++;
3438 case 8192: pow++;
3439 case 4096: pow++;
3440 case 2048: pow++;
3441 case 1024: pow++;
3442 case 512: pow++;
3443 case 256: pow++;
3444 case 128: pow++;
3445 case 64: pow++;
3446 case 32: pow++;
3447 case 16: pow++;
3448 case 8: pow++;
3449 case 4: pow++;
3450 case 2: pow++;
3451 /* GO! */
3452 e->flags |= ALIGN_FLAG;
3453 e->align = pow;
3454 break;
3456 default:
3457 err(n->loc, "alignment expression must be a power of 2");
3458 break;
3461 break;
3463 default:
3464 err(n->loc, "`%s' cannot be aligned", id->ident);
3465 break;
3469 else {
3470 /* Warning, can't align a symbol that's not defined. */
3471 warn(n->loc, "alignment ignored for undefined symbol `%s'", id->ident);
3474 /* Remove ALIGN_NODE from AST */
3475 astnode_remove(n);
3476 astnode_finalize(n);
3478 return 0;
3481 /*---------------------------------------------------------------------------*/
3484 * Removes unused labels from a syntax tree (and symbol table).
3485 * Unused labels are labels that are defined but not referenced anywhere.
3486 * This function assumes that the reference counts have already been calculated.
3488 void remove_unused_labels()
3490 int i;
3491 char *id;
3492 astnode *n;
3493 symbol_ident_list list;
3494 symtab_list_type(LABEL_SYMBOL, &list);
3495 for (i=0; i<list.size; i++) {
3496 /* Look up label in symbol table */
3497 id = list.idents[i];
3498 symtab_entry * e = symtab_lookup(id);
3499 /* If reference count is zero, AND label isn't declared public, remove it. */
3500 if ((e->ref_count == 0) && ((e->flags & PUBLIC_FLAG) == 0)) {
3501 n = e->def;
3502 strtok(n->label, "#"); /* Remove globalize junk */
3503 warn(n->loc, "`%s' defined but not used", n->label);
3504 /* Remove label from AST */
3505 astnode_remove(n);
3506 astnode_finalize(n);
3507 //symtab_remove(n->label); ### FIXME leads to crash sometimes...
3510 symtab_list_finalize(&list);
3514 * If the storage is of user-defined type, replaces it with
3515 * .DSB sizeof(type) * count
3517 static int reduce_user_storage(astnode *n, void *arg, astnode **next)
3519 astnode *type;
3520 astnode *count;
3521 astnode *byte_storage;
3522 symtab_entry *e;
3523 type = LHS(n);
3524 if (type->datatype == USER_DATATYPE) {
3525 /* Look it up */
3526 e = symtab_lookup(LHS(type)->ident);
3527 if (e != NULL) {
3528 /* Replace by DSB */
3529 count = RHS(n);
3530 byte_storage = astnode_create_storage(
3531 astnode_create_datatype(BYTE_DATATYPE, NULL, type->loc),
3532 astnode_create_arithmetic(
3533 MUL_OPERATOR,
3534 astnode_create_sizeof(
3535 astnode_create_identifier(LHS(type)->ident, n->loc),
3536 n->loc
3538 astnode_clone(count, n->loc),
3539 n->loc
3541 n->loc
3543 astnode_replace(n, byte_storage);
3544 astnode_finalize(n);
3545 *next = byte_storage;
3546 return 0;
3547 } else {
3548 err(n->loc, "unknown symbol `%s'", LHS(type)->ident);
3549 /* Remove from AST */
3550 astnode_remove(n);
3551 astnode_finalize(n);
3552 return 0;
3555 return 1;
3559 * Second major pass over AST.
3561 void astproc_second_pass(astnode *root)
3563 /* Table of callback functions for our purpose. */
3564 static astnodeprocmap map[] = {
3565 { IDENTIFIER_NODE, validate_ref },
3566 { SCOPE_NODE, validate_scoperef },
3567 { DOT_NODE, validate_dotref },
3568 { INDEX_NODE, validate_index },
3569 { PUBLIC_NODE, tag_public_symbols },
3570 { STORAGE_NODE, reduce_user_storage },
3571 { ALIGN_NODE, tag_align_symbols },
3572 { STRUC_DECL_NODE, noop },
3573 { UNION_DECL_NODE, noop },
3574 { ENUM_DECL_NODE, noop },
3575 { RECORD_DECL_NODE, noop },
3576 { 0, NULL }
3578 in_dataseg = 0; /* codeseg is default */
3579 /* Do the walk. */
3580 astproc_walk(root, NULL, map);
3581 /* */
3582 remove_unused_labels();
3585 /*---------------------------------------------------------------------------*/
3588 * Translates a single instruction.
3589 * @param n A node of type INSTRUCTION_NODE
3591 static int translate_instruction(astnode *n, void *arg, astnode **next)
3593 unsigned char c;
3594 /* Put the operand in final form */
3595 astnode *o = reduce_expression_complete( LHS(n) );
3596 assert(o == LHS(n));
3597 /* Convert (mnemonic, addressing mode) pair to opcode */
3598 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3599 /* Test if opcode is invalid */
3600 if (n->instr.opcode == 0xFF) {
3601 /* Check for the special cases */
3602 if ((n->instr.mnemonic == STX_MNEMONIC) && (n->instr.mode == ABSOLUTE_Y_MODE)) {
3603 /* Doesn't have absolute version, "scale down" to zeropage */
3604 n->instr.mode = ZEROPAGE_Y_MODE;
3605 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3606 } else if ((n->instr.mnemonic == STY_MNEMONIC) && (n->instr.mode == ABSOLUTE_X_MODE)) {
3607 /* Doesn't have absolute version, "scale down" to zeropage */
3608 n->instr.mode = ZEROPAGE_X_MODE;
3609 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3610 } else if (n->instr.mode == ABSOLUTE_MODE) {
3611 /* Check for relative addressing (these are parsed as absolute mode) */
3612 switch (n->instr.mnemonic) {
3613 case BCC_MNEMONIC:
3614 case BCS_MNEMONIC:
3615 case BEQ_MNEMONIC:
3616 case BMI_MNEMONIC:
3617 case BNE_MNEMONIC:
3618 case BPL_MNEMONIC:
3619 case BVC_MNEMONIC:
3620 case BVS_MNEMONIC:
3621 /* Fix addressing mode and opcode */
3622 n->instr.mode = RELATIVE_MODE;
3623 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3624 break;
3628 if (n->instr.opcode != 0xFF) {
3629 /* If the operand is a constant, see if we can "reduce" from
3630 absolute mode to zeropage mode */
3631 if ((astnode_is_type(o, INTEGER_NODE)) &&
3632 ((unsigned long)o->integer < 256) &&
3633 ((c = opcode_zp_equiv(n->instr.opcode)) != 0xFF)) {
3634 /* Switch to the zeromode version */
3635 n->instr.opcode = c;
3636 switch (n->instr.mode) {
3637 case ABSOLUTE_MODE: n->instr.mode = ZEROPAGE_MODE; break;
3638 case ABSOLUTE_X_MODE: n->instr.mode = ZEROPAGE_X_MODE;break;
3639 case ABSOLUTE_Y_MODE: n->instr.mode = ZEROPAGE_Y_MODE;break;
3640 default: /* Impossible to get here, right? */ break;
3643 /* If the operand is a constant, make sure it fits */
3644 if (astnode_is_type(o, INTEGER_NODE)) {
3645 switch (n->instr.mode) {
3646 case IMMEDIATE_MODE:
3647 case ZEROPAGE_MODE:
3648 case ZEROPAGE_X_MODE:
3649 case ZEROPAGE_Y_MODE:
3650 case PREINDEXED_INDIRECT_MODE:
3651 case POSTINDEXED_INDIRECT_MODE:
3652 /* Operand must fit in 8 bits */
3653 if (!IS_BYTE_VALUE(o->integer)) {
3654 warn(o->loc, "operand out of range; truncated");
3655 o->integer &= 0xFF;
3657 break;
3659 case ABSOLUTE_MODE:
3660 case ABSOLUTE_X_MODE:
3661 case ABSOLUTE_Y_MODE:
3662 case INDIRECT_MODE:
3663 /* Operand must fit in 8 bits */
3664 if ((unsigned long)o->integer >= 0x10000) {
3665 warn(o->loc, "operand out of range; truncated");
3666 o->integer &= 0xFFFF;
3668 break;
3670 case RELATIVE_MODE:
3671 /* Constant isn't allowed here is it? */
3672 break;
3674 default:
3675 break;
3678 else if (astnode_is_type(o, STRING_NODE)) {
3679 /* String operand doesn't make sense here */
3680 err(n->loc, "invalid operand");
3682 } else {
3683 /* opcode_get() returned 0xFF */
3684 err(n->loc, "invalid addressing mode");
3686 return 0;
3690 * ### Is this really such a good idea?
3692 static int maybe_merge_data(astnode *n, void *arg, astnode **next)
3694 astnode *temp;
3695 astnode *type;
3696 type = LHS(n);
3697 /* Only merge if no debugging, otherwise line information is lost. */
3698 if (!xasm_args.debug && astnode_is_type(*next, DATA_NODE) &&
3699 astnode_equal(type, LHS(*next)) ) {
3700 /* Merge ahead */
3701 temp = *next;
3702 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3703 astnode_add_child(n, astnode_remove_children(temp) );
3704 astnode_finalize(temp);
3705 *next = n;
3706 } else {
3707 /* Reduce expressions to final form */
3708 for (n = n->first_child; n != NULL; n = temp->next_sibling) {
3709 temp = reduce_expression_complete(n);
3710 if (astnode_is_type(temp, INTEGER_NODE)) {
3711 /* Check that value fits according to datatype */
3712 switch (type->datatype) {
3713 case BYTE_DATATYPE:
3714 if (!IS_BYTE_VALUE(temp->integer)) {
3715 warn(temp->loc, "operand out of range; truncated");
3716 temp->integer &= 0xFF;
3718 break;
3720 case WORD_DATATYPE:
3721 if (!IS_WORD_VALUE(temp->integer)) {
3722 warn(temp->loc, "operand out of range; truncated");
3723 temp->integer &= 0xFFFF;
3725 break;
3727 case DWORD_DATATYPE:
3728 break;
3730 default:
3731 break;
3736 return 0;
3742 static int maybe_merge_storage(astnode *n, void *arg, astnode **next)
3744 astnode *temp;
3745 astnode *new_count;
3746 astnode *old_count;
3747 if (astnode_is_type(*next, STORAGE_NODE) &&
3748 astnode_equal(LHS(n), LHS(*next)) ) {
3749 /* Merge ahead */
3750 temp = *next;
3751 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3752 old_count = RHS(n);
3753 /* Calculate new count */
3754 new_count = astnode_create_arithmetic(
3755 PLUS_OPERATOR,
3756 astnode_remove_child_at(temp, 0),
3757 astnode_clone(old_count, n->loc),
3758 n->loc
3760 new_count = reduce_expression_complete(new_count);
3761 astnode_replace(old_count, new_count);
3762 astnode_finalize(old_count);
3763 astnode_finalize(temp);
3764 *next = n;
3765 } else {
3766 reduce_expression_complete(RHS(n));
3768 return 0;
3772 * Replaces .proc by its label followed by statements.
3774 static int flatten_proc(astnode *n, void *arg, astnode **next)
3776 astnode *id = LHS(n);
3777 astnode *list = RHS(n);
3778 astnode_remove(id);
3779 id->type = LABEL_NODE;
3780 astnode_insert_child(list, id, 0);
3781 astnode *stmts = astnode_remove_children(list);
3782 astnode_replace(n, stmts);
3783 astnode_finalize(n);
3784 *next = stmts;
3785 return 0;
3791 static int flatten_var_decl(astnode *n, void *arg, astnode **next)
3793 astnode *stmts = LHS(n);
3794 astnode_remove_children(n);
3795 stmts->type = LABEL_NODE;
3796 astnode_replace(n, stmts);
3797 astnode_finalize(n);
3798 *next = stmts;
3799 return 0;
3803 * Third and final pass (if the output isn't pure 6502).
3804 * Translates instructions, merges data and storage nodes,
3805 * and reduces their operands to final form on the way.
3807 void astproc_third_pass(astnode *root)
3809 /* Table of callback functions for our purpose. */
3810 static astnodeprocmap map[] = {
3811 { INSTRUCTION_NODE, translate_instruction },
3812 { DATA_NODE, maybe_merge_data },
3813 { STORAGE_NODE, maybe_merge_storage },
3814 { VAR_DECL_NODE, flatten_var_decl },
3815 { PROC_NODE, flatten_proc },
3816 { STRUC_DECL_NODE, noop },
3817 { UNION_DECL_NODE, noop },
3818 { ENUM_DECL_NODE, noop },
3819 { RECORD_DECL_NODE, noop },
3820 { 0, NULL }
3822 in_dataseg = 0; /* codeseg is default */
3823 /* Do the walk. */
3824 astproc_walk(root, NULL, map);
3827 /*---------------------------------------------------------------------------*/
3830 * Evaluates the given expression, _without_ replacing it in the AST
3831 * (unlike astproc_reduce_expression() and friends).
3833 static astnode *eval_expression(astnode *expr)
3835 switch (astnode_get_type(expr)) {
3837 case ARITHMETIC_NODE: {
3838 astnode *lhs = eval_expression(LHS(expr));
3839 astnode *rhs = eval_expression(RHS(expr));
3840 switch (expr->oper) {
3841 /* Binary ops */
3842 case PLUS_OPERATOR:
3843 case MINUS_OPERATOR:
3844 case MUL_OPERATOR:
3845 case DIV_OPERATOR:
3846 case MOD_OPERATOR:
3847 case AND_OPERATOR:
3848 case OR_OPERATOR:
3849 case XOR_OPERATOR:
3850 case SHL_OPERATOR:
3851 case SHR_OPERATOR:
3852 case LT_OPERATOR:
3853 case GT_OPERATOR:
3854 case EQ_OPERATOR:
3855 case NE_OPERATOR:
3856 case LE_OPERATOR:
3857 case GE_OPERATOR:
3858 if (astnode_is_type(lhs, INTEGER_NODE)
3859 && astnode_is_type(rhs, INTEGER_NODE)) {
3860 /* Both sides are integer literals. */
3861 switch (expr->oper) {
3862 case PLUS_OPERATOR: return astnode_create_integer(lhs->integer + rhs->integer, expr->loc);
3863 case MINUS_OPERATOR: return astnode_create_integer(lhs->integer - rhs->integer, expr->loc);
3864 case MUL_OPERATOR: return astnode_create_integer(lhs->integer * rhs->integer, expr->loc);
3865 case DIV_OPERATOR: return astnode_create_integer(lhs->integer / rhs->integer, expr->loc);
3866 case MOD_OPERATOR: return astnode_create_integer(lhs->integer % rhs->integer, expr->loc);
3867 case AND_OPERATOR: return astnode_create_integer(lhs->integer & rhs->integer, expr->loc);
3868 case OR_OPERATOR: return astnode_create_integer(lhs->integer | rhs->integer, expr->loc);
3869 case XOR_OPERATOR: return astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc);
3870 case SHL_OPERATOR: return astnode_create_integer(lhs->integer << rhs->integer, expr->loc);
3871 case SHR_OPERATOR: return astnode_create_integer(lhs->integer >> rhs->integer, expr->loc);
3872 case LT_OPERATOR: return astnode_create_integer(lhs->integer < rhs->integer, expr->loc);
3873 case GT_OPERATOR: return astnode_create_integer(lhs->integer > rhs->integer, expr->loc);
3874 case EQ_OPERATOR: return astnode_create_integer(lhs->integer == rhs->integer, expr->loc);
3875 case NE_OPERATOR: return astnode_create_integer(lhs->integer != rhs->integer, expr->loc);
3876 case LE_OPERATOR: return astnode_create_integer(lhs->integer <= rhs->integer, expr->loc);
3877 case GE_OPERATOR: return astnode_create_integer(lhs->integer >= rhs->integer, expr->loc);
3879 default: /* ### Error, actually */
3880 break;
3883 /* Use some mathematical identities... */
3884 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
3885 && (expr->oper == PLUS_OPERATOR)) {
3886 /* 0+expr == expr */
3887 return astnode_clone(rhs, rhs->loc);
3888 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
3889 && (expr->oper == PLUS_OPERATOR)) {
3890 /* expr+0 == expr */
3891 return astnode_clone(lhs, lhs->loc);
3892 } else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
3893 && (expr->oper == MUL_OPERATOR)) {
3894 /* 1*expr == expr */
3895 return astnode_clone(rhs, rhs->loc);
3896 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
3897 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
3898 /* expr*1 == expr */
3899 /* expr/1 == expr */
3900 return astnode_clone(lhs, lhs->loc);
3902 break;
3904 /* Unary ops */
3905 case NEG_OPERATOR:
3906 case NOT_OPERATOR:
3907 case LO_OPERATOR:
3908 case HI_OPERATOR:
3909 case UMINUS_OPERATOR:
3910 case BANK_OPERATOR:
3911 if (astnode_is_type(lhs, INTEGER_NODE)) {
3912 switch (expr->oper) {
3913 case NEG_OPERATOR: return astnode_create_integer(~lhs->integer, expr->loc);
3914 case NOT_OPERATOR: return astnode_create_integer(!lhs->integer, expr->loc);
3915 case LO_OPERATOR: return astnode_create_integer(lhs->integer & 0xFF, expr->loc);
3916 case HI_OPERATOR: return astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc);
3917 case UMINUS_OPERATOR: return astnode_create_integer(-lhs->integer, expr->loc);
3918 default: break;
3921 break;
3922 } /* switch */
3923 } break;
3925 case INTEGER_NODE:
3926 return astnode_clone(expr, expr->loc);
3928 case IDENTIFIER_NODE: {
3929 symtab_entry *e = symtab_lookup(expr->ident);
3930 // ### assert(e->type == LABEL_SYMBOL);
3931 if (e->flags & ADDR_FLAG)
3932 return astnode_create_integer(e->address, expr->loc);
3933 } break;
3935 case CURRENT_PC_NODE:
3936 return astnode_create_integer(in_dataseg ? dataseg_pc : codeseg_pc, expr->loc);
3938 default:
3939 break;
3940 } /* switch */
3941 return 0;
3945 * Sets the address of the label to be the currently calculated PC.
3947 static int set_label_address(astnode *label, void *arg, astnode **next)
3949 symtab_entry *e = symtab_lookup(label->ident);
3950 // ### assert(e && (e->type == LABEL_SYMBOL));
3951 e->address = in_dataseg ? dataseg_pc : codeseg_pc;
3952 e->flags |= ADDR_FLAG;
3953 return 0;
3957 * Sets the current PC to the address specified by the ORG node.
3959 static int set_pc_from_org(astnode *org, void *arg, astnode **next)
3961 astnode *addr = LHS(org);
3962 assert(astnode_is_type(addr, INTEGER_NODE));
3963 if (in_dataseg)
3964 dataseg_pc = addr->integer;
3965 else
3966 codeseg_pc = addr->integer;
3967 return 0;
3971 * Ensures that the given symbol is defined.
3973 static int ensure_symbol_is_defined(astnode *id, void *arg, astnode **next)
3975 symtab_entry *e = symtab_lookup(id->ident);
3976 assert(e);
3977 if ((e->flags & EXTRN_FLAG) && !(e->flags & ERROR_UNDEFINED_FLAG)) {
3978 err(id->loc, "cannot generate pure binary because `%s' is not defined", id->ident);
3979 e->flags |= ERROR_UNDEFINED_FLAG;
3981 return 0;
3985 * Increments PC according to the size of the instruction.
3987 static int inc_pc_by_instruction(astnode *instr, void *arg, astnode **next)
3989 assert(!in_dataseg);
3990 if (LHS(instr)) {
3991 /* Has operand */
3992 unsigned char zp_op = opcode_zp_equiv(instr->instr.opcode);
3993 if (zp_op != 0xFF) {
3994 /* See if we can optimize this to a ZP-instruction */
3995 astnode *operand = eval_expression(LHS(instr));
3996 if (operand && astnode_is_type(operand, INTEGER_NODE)) {
3997 if ((operand->integer >= 0) && (operand->integer < 256)) {
3998 instr->instr.opcode = zp_op;
4000 astnode_finalize(operand);
4004 codeseg_pc += opcode_length(instr->instr.opcode);
4005 return 1;
4009 * Increments PC according to the size of the defined data.
4011 static int inc_pc_by_data(astnode *data, void *arg, astnode **next)
4013 astnode *type = LHS(data);
4014 int count = astnode_get_child_count(data) - 1;
4015 int nbytes;
4016 assert(!in_dataseg);
4017 switch (type->datatype) {
4018 case BYTE_DATATYPE: nbytes = count; break;
4019 case WORD_DATATYPE: nbytes = count * 2; break;
4020 case DWORD_DATATYPE: nbytes = count * 4; break;
4021 default:
4022 assert(0);
4023 break;
4025 codeseg_pc += nbytes;
4026 return 0;
4030 * Increments PC according to the size of the included binary.
4032 static int inc_pc_by_binary(astnode *node, void *arg, astnode **next)
4034 assert(!in_dataseg);
4035 codeseg_pc += node->binary.size;
4036 return 0;
4040 * Increments PC according to the size of the storage.
4042 static int inc_pc_by_storage(astnode *storage, void *arg, astnode **next)
4044 astnode *type = LHS(storage);
4045 assert(type->datatype == BYTE_DATATYPE);
4046 astnode *count = eval_expression(RHS(storage));
4047 if (count) {
4048 if (astnode_get_type(count) == INTEGER_NODE) {
4049 if (in_dataseg)
4050 dataseg_pc += count->integer;
4051 else
4052 codeseg_pc += count->integer;
4054 astnode_finalize(count);
4056 return 1;
4060 * This pass is only performed if the output format is pure 6502.
4061 * It ensures that it is actually possible to generate pure 6502
4062 * for this syntax tree (i.e. no external symbols).
4063 * Furthermore, it calculates the address of all labels, so that
4064 * everything is ready for the final output phase.
4066 void astproc_fourth_pass(astnode *root)
4068 int x;
4069 /* ### Should loop while there's a change in the address of
4070 one or more labels */
4071 for (x = 0; x < 2; ++x) {
4072 in_dataseg = 0; /* codeseg is default */
4073 dataseg_pc = 0;
4074 codeseg_pc = 0;
4075 /* Table of callback functions for our purpose. */
4076 static astnodeprocmap map[] = {
4077 { DATASEG_NODE, process_dataseg },
4078 { CODESEG_NODE, process_codeseg },
4079 { ORG_NODE, set_pc_from_org },
4080 { LABEL_NODE, set_label_address },
4081 { IDENTIFIER_NODE, ensure_symbol_is_defined },
4082 { INSTRUCTION_NODE, inc_pc_by_instruction },
4083 { DATA_NODE, inc_pc_by_data },
4084 { STORAGE_NODE, inc_pc_by_storage },
4085 { BINARY_NODE, inc_pc_by_binary },
4086 { STRUC_DECL_NODE, noop },
4087 { UNION_DECL_NODE, noop },
4088 { ENUM_DECL_NODE, noop },
4089 { RECORD_DECL_NODE, noop },
4090 { 0, NULL }
4092 /* Do the walk. */
4093 astproc_walk(root, NULL, map);
4097 /*---------------------------------------------------------------------------*/
4100 * Writes an instruction.
4102 static int write_instruction(astnode *instr, void *arg, astnode **next)
4104 FILE *fp = (FILE *)arg;
4105 unsigned char op = instr->instr.opcode;
4106 int len = opcode_length(op);
4107 fputc(op, fp);
4108 if (len > 1) {
4109 /* Write operand */
4110 astnode *operand = eval_expression(LHS(instr));
4111 if(!astnode_is_type(operand, INTEGER_NODE)) {
4112 /* ### This is rather fatal, it should be a literal by this point */
4113 err(instr->loc, "operand does not evaluate to literal");
4114 } else {
4115 int value = operand->integer;
4116 if (len == 2) {
4117 /* Check if it's a relative jump */
4118 switch (op) {
4119 case 0x10:
4120 case 0x30:
4121 case 0x50:
4122 case 0x70:
4123 case 0x90:
4124 case 0xB0:
4125 case 0xD0:
4126 case 0xF0:
4127 /* Calculate difference between target and address of next instruction */
4128 value = value - (codeseg_pc + 2);
4129 if (!IS_BYTE_VALUE(value)) {
4130 err(operand->loc, "branch out of range");
4131 value &= 0xFF;
4133 break;
4135 default:
4136 if (!IS_BYTE_VALUE(value)) {
4137 warn(operand->loc, "operand out of range; truncated");
4138 value &= 0xFF;
4140 break;
4142 fputc((unsigned char)value, fp);
4143 } else {
4144 assert(len == 3);
4145 if (!IS_WORD_VALUE(value)) {
4146 warn(operand->loc, "operand out of range; truncated");
4147 value &= 0xFFFF;
4149 fputc((unsigned char)value, fp);
4150 fputc((unsigned char)(value >> 8), fp);
4153 astnode_finalize(operand);
4155 codeseg_pc += opcode_length(instr->instr.opcode);
4156 return 0;
4160 * Writes data.
4162 static int write_data(astnode *data, void *arg, astnode **next)
4164 FILE *fp = (FILE *)arg;
4165 astnode *type = LHS(data);
4166 astnode *expr;
4167 assert(!in_dataseg);
4168 for (expr = RHS(data); expr != NULL; expr = astnode_get_next_sibling(expr) ) {
4169 int value;
4170 astnode *e = eval_expression(expr);
4171 assert(e->type == INTEGER_NODE);
4172 value = e->integer;
4173 switch (type->datatype) {
4174 case BYTE_DATATYPE:
4175 if (!IS_BYTE_VALUE(value)) {
4176 warn(expr->loc, "operand out of range; truncated");
4177 value &= 0xFF;
4179 fputc((unsigned char)value, fp);
4180 codeseg_pc += 1;
4181 break;
4183 case WORD_DATATYPE:
4184 if (!IS_WORD_VALUE(value)) {
4185 warn(expr->loc, "operand out of range; truncated");
4186 value &= 0xFFFF;
4188 fputc((unsigned char)value, fp);
4189 fputc((unsigned char)(value >> 8), fp);
4190 codeseg_pc += 2;
4191 break;
4193 case DWORD_DATATYPE:
4194 fputc((unsigned char)value, fp);
4195 fputc((unsigned char)(value >> 8), fp);
4196 fputc((unsigned char)(value >> 16), fp);
4197 fputc((unsigned char)(value >> 24), fp);
4198 codeseg_pc += 4;
4199 break;
4201 default:
4202 assert(0);
4203 break;
4205 astnode_finalize(e);
4207 return 0;
4211 * Writes storage (padding).
4213 static int write_storage(astnode *storage, void *arg, astnode **next)
4215 FILE *fp = (FILE *)arg;
4216 astnode *type = LHS(storage);
4217 astnode *count = eval_expression(RHS(storage));
4218 assert(type->datatype == BYTE_DATATYPE);
4219 assert(!in_dataseg);
4220 if (count) {
4221 int i;
4222 assert(astnode_get_type(count) == INTEGER_NODE);
4223 for (i = 0; i < count->integer; ++i)
4224 fputc(0, fp);
4225 codeseg_pc += count->integer;
4226 astnode_finalize(count);
4228 return 0;
4232 * Writes binary.
4234 static int write_binary(astnode *node, void *arg, astnode **next)
4236 FILE *fp = (FILE *)arg;
4237 assert(!in_dataseg);
4238 fwrite(node->binary.data, 1, node->binary.size, fp);
4239 codeseg_pc += node->binary.size;
4240 return 0;
4244 * This pass is only performed if the output format is pure 6502.
4245 * It writes the binary code.
4247 void astproc_fifth_pass(astnode *root)
4249 FILE *fp = fopen(xasm_args.output_file, "wb");
4250 if (!fp) {
4251 fprintf(stderr, "could not open '%s' for writing\n", xasm_args.output_file);
4252 ++err_count;
4253 return;
4255 /* Table of callback functions for our purpose. */
4256 static astnodeprocmap map[] = {
4257 { DATASEG_NODE, process_dataseg },
4258 { CODESEG_NODE, process_codeseg },
4259 { ORG_NODE, set_pc_from_org },
4260 { INSTRUCTION_NODE, write_instruction },
4261 { DATA_NODE, write_data },
4262 { STORAGE_NODE, write_storage },
4263 { BINARY_NODE, write_binary },
4264 { STRUC_DECL_NODE, noop },
4265 { UNION_DECL_NODE, noop },
4266 { ENUM_DECL_NODE, noop },
4267 { RECORD_DECL_NODE, noop },
4268 { 0, NULL }
4270 in_dataseg = 0; /* codeseg is default */
4271 dataseg_pc = 0;
4272 codeseg_pc = 0;
4273 /* Do the walk. */
4274 astproc_walk(root, fp, map);
4275 fclose(fp);