initialize repository
[xorcyst.git] / astproc.c
blob9fcba7915c5f90470efed3bb50a873811b531584
1 /*
2 * $Id: astproc.c,v 1.21 2007/11/11 22:35:22 khansen Exp $
3 * $Log: astproc.c,v $
4 * Revision 1.21 2007/11/11 22:35:22 khansen
5 * compile on mac
7 * Revision 1.20 2007/08/19 10:17:39 khansen
8 * allow symbols to be used without having been declared
10 * Revision 1.19 2007/08/12 18:58:12 khansen
11 * ability to generate pure 6502 binary (--pure-binary switch)
13 * Revision 1.18 2007/08/12 02:42:46 khansen
14 * prettify, const
16 * Revision 1.17 2007/08/09 22:06:10 khansen
17 * ability to pass in reference to local label as argument to macro
19 * Revision 1.16 2007/08/09 20:48:46 khansen
20 * disable buggy code that can cause crash
22 * Revision 1.15 2007/08/09 20:33:40 khansen
23 * progress
25 * Revision 1.14 2007/08/08 22:40:01 khansen
26 * improved symbol lookup, definitions must precede usage
28 * Revision 1.13 2007/07/22 13:33:26 khansen
29 * convert tabs to whitespaces
31 * Revision 1.12 2005/01/09 11:17:57 kenth
32 * xorcyst 1.4.5
33 * fixed bug in process_data(), merge_data()
34 * no longer truncation warning when fits in signed byte/word
36 * Revision 1.11 2005/01/05 02:28:13 kenth
37 * xorcyst 1.4.3
38 * support for anonymous unions
39 * fixed sizeof bug
41 * Revision 1.10 2004/12/29 21:44:41 kenth
42 * xorcyst 1.4.2
43 * static indexing, sizeof improved
45 * Revision 1.9 2004/12/25 02:22:35 kenth
46 * fixed bug in reduce_user_storage()
48 * Revision 1.8 2004/12/19 19:58:29 kenth
49 * xorcyst 1.4.0
51 * Revision 1.7 2004/12/18 16:57:39 kenth
52 * STORAGE_NODE(WORD/DWORD_DATATYPE) converts to BYTE
54 * Revision 1.6 2004/12/16 13:19:47 kenth
55 * xorcyst 1.3.5
57 * Revision 1.5 2004/12/14 01:49:05 kenth
58 * xorcyst 1.3.0
60 * Revision 1.4 2004/12/11 02:01:25 kenth
61 * added forward/backward branching
63 * Revision 1.3 2004/12/09 11:18:13 kenth
64 * added: warning, error node processing
66 * Revision 1.2 2004/12/06 04:52:24 kenth
67 * Major updates (xorcyst 1.1.0)
69 * Revision 1.1 2004/06/30 07:55:31 kenth
70 * Initial revision
74 /**
75 * (C) 2004 Kent Hansen
77 * The XORcyst is free software; you can redistribute it and/or modify
78 * it under the terms of the GNU General Public License as published by
79 * the Free Software Foundation; either version 2 of the License, or
80 * (at your option) any later version.
82 * The XORcyst is distributed in the hope that it will be useful,
83 * but WITHOUT ANY WARRANTY; without even the implied warranty of
84 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
85 * GNU General Public License for more details.
87 * You should have received a copy of the GNU General Public License
88 * along with The XORcyst; if not, write to the Free Software
89 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
92 /**
93 * This file contains functions that process the Abstract Syntax Tree (AST).
94 * After the assembly file has been parsed into an AST, a number of passes are
95 * made on it to process it and transform it. The functions here are
96 * concerned with things like
97 * - macro expansion
98 * - symbol table generation
99 * - equates substitution
100 * - constant folding
101 * - code and symbol validation
104 #include <stdlib.h>
105 #include <stdio.h>
106 #include <stdarg.h>
107 #include <string.h>
108 #include <assert.h>
109 #include "astproc.h"
110 #include "symtab.h"
111 #include "opcode.h"
112 #include "charmap.h"
113 #include "xasm.h"
115 #define IS_SIGNED_BYTE_VALUE(v) (((v) >= -128) && ((v) <= 127))
116 #define IS_UNSIGNED_BYTE_VALUE(v) (((v) >= 0) && ((v) <= 255))
117 #define IS_BYTE_VALUE(v) (IS_SIGNED_BYTE_VALUE(v) || IS_UNSIGNED_BYTE_VALUE(v))
119 #define IS_SIGNED_WORD_VALUE(v) (((v) >= -32768) && ((v) <= 32767))
120 #define IS_UNSIGNED_WORD_VALUE(v) (((v) >= 0) && ((v) <= 65535))
121 #define IS_WORD_VALUE(v) (IS_SIGNED_WORD_VALUE(v) || IS_UNSIGNED_WORD_VALUE(v))
123 /*---------------------------------------------------------------------------*/
125 /** Number of errors issued during processing. */
126 static int err_count = 0;
128 /** Number of warnings issued during processing. */
129 static int warn_count = 0;
131 /* Keeps track of number of global labels encountered. */
132 static int label_count = 0;
134 /* Keeps track of whether statement is in dataseg or codeseg. */
135 static int in_dataseg = 0;
137 /* Default symbol modifiers, i.e. ZEROPAGE_FLAG, PUBLIC_FLAG */
138 static int modifiers = 0;
140 /* Used when we are outputting pure 6502 binary */
141 static int dataseg_pc;
142 static int codeseg_pc;
144 /*---------------------------------------------------------------------------*/
146 /** Mapping from regular ASCII characters to custom character values.
147 * Used to transform .char arrays to regular .db arrays.
149 static unsigned char charmap[256];
152 * Resets the custom character map.
153 * Every ASCII character is mapped to itself.
155 static void reset_charmap()
157 int i;
158 for (i=0; i<256; i++) {
159 charmap[i] = (char)i;
163 /*---------------------------------------------------------------------------*/
164 /* Forward/backward branching stuff */
166 struct tag_forward_branch_info {
167 astnode *refs[128];
168 int index; /* Index into refs */
169 int counter;
172 typedef struct tag_forward_branch_info forward_branch_info;
174 struct tag_backward_branch_info {
175 astnode *decl;
176 int counter;
179 typedef struct tag_backward_branch_info backward_branch_info;
181 #define BRANCH_MAX 8
183 static forward_branch_info forward_branch[BRANCH_MAX];
185 static backward_branch_info backward_branch[BRANCH_MAX];
188 * Zaps forward/backward branch data.
190 static void branch_init()
192 int i, j;
193 for (i=0; i<BRANCH_MAX; i++) {
194 for (j=0; j<128; j++) {
195 forward_branch[i].refs[j] = NULL;
197 forward_branch[i].index = 0;
198 forward_branch[i].counter = 0;
199 backward_branch[i].decl = NULL;
200 backward_branch[i].counter = 0;
204 /*---------------------------------------------------------------------------*/
207 * Issues an error.
208 * @param loc File location of error
209 * @param fmt printf-style format string
211 static void err(location loc, const char *fmt, ...)
213 va_list ap;
214 va_start(ap, fmt);
216 /* Print error message w/ location info */
217 fprintf(stderr, "error: %s:", loc.file);
218 LOCATION_PRINT(stderr, loc);
219 fprintf(stderr, ": ");
220 vfprintf(stderr, fmt, ap);
221 fprintf(stderr, "\n");
223 va_end(ap);
225 /* Increase total error count */
226 err_count++;
230 * Issues a warning.
231 * @param loc File location of warning
232 * @param fmt printf-style format string
234 static void warn(location loc, const char *fmt, ...)
236 va_list ap;
237 if (!xasm_args.no_warn) {
238 va_start(ap, fmt);
239 /* Print warning message w/ location info */
240 fprintf(stderr, "warning: %s:", loc.file);
241 LOCATION_PRINT(stderr, loc);
242 fprintf(stderr, ": ");
243 vfprintf(stderr, fmt, ap);
244 fprintf(stderr, "\n");
245 va_end(ap);
248 /* Increase total warning count */
249 warn_count++;
253 * Gets the number of errors encountered during processing.
254 * @return Number of errors
256 int astproc_err_count()
258 return err_count;
261 /*---------------------------------------------------------------------------*/
264 * Gets the processor function for a node type from a map.
265 * Used by astproc_walk().
266 * @param type The node type
267 * @param map A mapping from node types to processor functions
269 static astnodeproc astproc_node_type_to_proc(astnode_type type, const astnodeprocmap *map)
271 /* Try all map entries */
272 for (; map->proc != NULL; map += 1) {
273 if (map->type == type) {
274 return map->proc; /* Match */
277 /* No match */
278 return NULL;
281 /*---------------------------------------------------------------------------*/
284 * Walks an abstract syntax tree recursively.
285 * @param n Node to walk
286 * @param arg Optional argument to pass to processor function
287 * @param map Mapping of node types to processor functions
289 static void astproc_walk_recursive(astnode *n, void *arg, const astnodeprocmap *map, astnode **next)
291 astnode *c;
292 astnode *t;
293 if (n == NULL) { return; }
294 /* Process this node if it has a processor function */
295 astnodeproc p = astproc_node_type_to_proc(astnode_get_type(n), map);
296 if (p != NULL) {
297 if (!p(n, arg, next)) return; /* Don't walk children */
299 /* Walk the node's children recursively */
300 for (c=n->first_child; c != NULL; c = t) {
301 t = c->next_sibling; /* default next node */
302 astproc_walk_recursive(c, arg, map, &t);
307 * Generic tree walker function.
308 * @param n Root
309 * @param arg General-purpose argument passed to each node handler function
310 * @param map Array of (nodetype, handler function) tuples
312 void astproc_walk(astnode *n, void *arg, const astnodeprocmap *map)
314 astnode *dummy;
315 astproc_walk_recursive(n, arg, map, &dummy);
318 /*---------------------------------------------------------------------------*/
321 * Don't do any processing of this node or its children on this pass.
323 static int noop(astnode *n, void *arg, astnode **next)
325 return 0;
329 * Substitutes an identifier node with subst_expr if the id is equal to subst_id.
330 * @param n A node of type IDENTIFIER_NODE
331 * @param arg Array of length 2, containing (expr, id) pair
333 static int substitute_id(astnode *n, void *arg, astnode **next)
335 /* arg is array containing expression and identifier */
336 astnode **array = (astnode **)arg;
337 astnode *subst_expr = array[0];
338 astnode *subst_id = array[1];
339 /* Test if this node and the identifier to replace are equal */
340 if (astnode_equal(n, subst_id)) {
341 /* They're equal, replace it by expression. */
342 astnode *cl = astnode_clone(subst_expr, n->loc);
343 /* ### Generalize: traverse all children, set the flag */
344 if (astnode_get_type(cl) == LOCAL_ID_NODE) {
345 cl->flags |= 0x80; /* don't globalize it */
347 astnode_replace(n, cl);
348 astnode_finalize(n);
350 return 1;
354 * Substitutes expr for id in list.
355 * Used by macro expander to substitute a macro body parameter name with the
356 * actual expression used in the macro expansion.
357 * @param expr An expression
358 * @param id An identifier
359 * @param list A list of statements (macro body)
361 static void substitute_expr_for_id(astnode *expr, astnode *id, astnode *list)
363 /* Prepare argument to astproc_walk */
364 astnode *array[2];
365 array[0] = expr;
366 array[1] = id;
367 /* Table of callback functions for our purpose. */
368 static astnodeprocmap map[] = {
369 { IDENTIFIER_NODE, substitute_id },
370 { 0, NULL }
372 /* Do the walk. */
373 astproc_walk(list, array, map);
376 /*---------------------------------------------------------------------------*/
379 * Globalizes a macro expanded local.
380 * This is done simply by concatenating the local label identifier with the
381 * global macro invocation counter.
382 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
383 * @param arg Namespace counter (int)
385 static int globalize_macro_expanded_local(astnode *n, void *arg, astnode **next)
387 /* Only globalize if it's a reference to a label defined in the macro */
388 if (!(n->flags & 0x80)) {
389 char str[16];
390 int count;
391 /* Make it global by appending the macro expansion counter to the id */
392 count = (int)arg;
393 sprintf(str, "#%d", count);
394 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
395 /* LOCAL_LABEL_NODE, use label field */
396 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
397 strcat(n->label, str);
398 } else {
399 /* LOCAL_ID_NODE, use ident field */
400 assert(astnode_is_type(n, LOCAL_ID_NODE));
401 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
402 strcat(n->ident, str);
405 /* */
406 return 1;
410 * Globalizes all locals in the body of a macro expansion.
411 * Used by the macro expander to ensure that local labels in macro expansions
412 * are unique.
413 * @param exp_body The expanded macro body
414 * @param count Unique macro namespace counter
416 static void globalize_macro_expanded_locals(astnode *exp_body, int count)
418 /* Table of callback functions for our purpose. */
419 static astnodeprocmap map[] = {
420 { LOCAL_ID_NODE, globalize_macro_expanded_local },
421 { LOCAL_LABEL_NODE, globalize_macro_expanded_local },
422 { 0, NULL }
424 /* Do the walk. */
425 astproc_walk(exp_body, (void *)count, map);
429 * Expands a macro; that is, replaces a macro invocation in the AST with the
430 * macro body. Substitutes parameter names for values.
431 * @param n Must be a node of type MACRO_NODE
432 * @param arg Not used
434 static int expand_macro(astnode *n, void *arg, astnode **next)
436 astnode *decl;
437 astnode *decl_body;
438 astnode *exp_body;
439 astnode *formals;
440 astnode *actuals;
441 astnode *id;
442 astnode *expr;
443 int i;
444 /* Keeps track of the current/total number of macro expansions */
445 static int count = 0;
446 /* Get the name of the macro to expand */
447 id = astnode_get_child(n, 0);
448 /* Look up its definition in symbol table */
449 symtab_entry *e = symtab_lookup(id->ident);
450 /* If it's not in the symbol table, error. */
451 if (e == NULL) {
452 err(n->loc, "unknown macro or directive `%s'", id->ident);
453 /* Remove from AST */
454 astnode_remove(n);
455 astnode_finalize(n);
456 return 0;
458 else if (e->type != MACRO_SYMBOL) {
459 err(n->loc, "cannot expand `%s'; not a macro", e->id);
460 /* Remove from AST */
461 astnode_remove(n);
462 astnode_finalize(n);
463 return 0;
465 else {
466 /* e->def has pointer to proper MACRO_DECL_NODE */
467 decl = (astnode *)e->def;
468 /* Get the lists of formals and actuals */
469 formals = astnode_get_child(decl, 1);
470 actuals = astnode_get_child(n, 1);
471 /* Verify that argument count is correct */
472 if (astnode_get_child_count(formals) != astnode_get_child_count(actuals)) {
473 err(n->loc, "macro `%s' does not take %d argument(s)", id->ident, astnode_get_child_count(actuals) );
474 /* Remove from AST */
475 astnode_remove(n);
476 astnode_finalize(n);
477 return 0;
479 /* Expand the body */
480 decl_body = astnode_get_child(decl, 2);
481 exp_body = astnode_clone(decl_body, n->loc);
482 /* Substitute actuals for formals */
483 for (i=0; i<astnode_get_child_count(actuals); i++) {
484 /* The id to substitute */
485 id = astnode_get_child(formals, i);
486 /* The expression to substitute it with */
487 expr = astnode_get_child(actuals, i);
488 /* Do it! */
489 substitute_expr_for_id(expr, id, exp_body);
491 /* Make locals a bit more global */
492 globalize_macro_expanded_locals(exp_body, count);
493 /* Replace MACRO_NODE by the macro body instance */
494 astnode_replace(n, astnode_get_child(exp_body, 0));
495 /* Discard the replaced node */
496 astnode_finalize(n);
497 /* Increase macro expansion counter */
498 count++;
499 /* Set next node to start of body */
500 *next = exp_body;
502 /* */
503 return 0;
506 /*---------------------------------------------------------------------------*/
509 * Does constant folding of expression.
510 * If the expression can be folded, the original expression is replaced by the
511 * new one, and the original expression is finalized.
512 * @param expr Expression
513 * @return Original expression, if couldn't fold, otherwise new, folded expression
515 astnode *astproc_fold_constants(astnode *expr)
517 astnode *folded;
518 astnode *lhs;
519 astnode *rhs;
520 if (expr == NULL) { return NULL; }
521 folded = NULL;
522 if (astnode_is_type(expr, ARITHMETIC_NODE)) {
523 /* Fold operands recursively */
524 lhs = astproc_fold_constants(LHS(expr));
525 rhs = astproc_fold_constants(RHS(expr));
526 switch (expr->oper) {
527 /* Binary ops */
528 case PLUS_OPERATOR:
529 case MINUS_OPERATOR:
530 case MUL_OPERATOR:
531 case DIV_OPERATOR:
532 case MOD_OPERATOR:
533 case AND_OPERATOR:
534 case OR_OPERATOR:
535 case XOR_OPERATOR:
536 case SHL_OPERATOR:
537 case SHR_OPERATOR:
538 case LT_OPERATOR:
539 case GT_OPERATOR:
540 case EQ_OPERATOR:
541 case NE_OPERATOR:
542 case LE_OPERATOR:
543 case GE_OPERATOR:
544 /* See if it can be folded */
545 if ( (astnode_is_type(lhs, INTEGER_NODE)) &&
546 (astnode_is_type(rhs, INTEGER_NODE)) ) {
547 /* Both sides are integer literals, so fold. */
548 switch (expr->oper) {
549 case PLUS_OPERATOR: folded = astnode_create_integer(lhs->integer + rhs->integer, expr->loc); break;
550 case MINUS_OPERATOR: folded = astnode_create_integer(lhs->integer - rhs->integer, expr->loc); break;
551 case MUL_OPERATOR: folded = astnode_create_integer(lhs->integer * rhs->integer, expr->loc); break;
552 case DIV_OPERATOR: folded = astnode_create_integer(lhs->integer / rhs->integer, expr->loc); break;
553 case MOD_OPERATOR: folded = astnode_create_integer(lhs->integer % rhs->integer, expr->loc); break;
554 case AND_OPERATOR: folded = astnode_create_integer(lhs->integer & rhs->integer, expr->loc); break;
555 case OR_OPERATOR: folded = astnode_create_integer(lhs->integer | rhs->integer, expr->loc); break;
556 case XOR_OPERATOR: folded = astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc); break;
557 case SHL_OPERATOR: folded = astnode_create_integer(lhs->integer << rhs->integer, expr->loc); break;
558 case SHR_OPERATOR: folded = astnode_create_integer(lhs->integer >> rhs->integer, expr->loc); break;
559 case LT_OPERATOR: folded = astnode_create_integer(lhs->integer < rhs->integer, expr->loc); break;
560 case GT_OPERATOR: folded = astnode_create_integer(lhs->integer > rhs->integer, expr->loc); break;
561 case EQ_OPERATOR: folded = astnode_create_integer(lhs->integer == rhs->integer, expr->loc); break;
562 case NE_OPERATOR: folded = astnode_create_integer(lhs->integer != rhs->integer, expr->loc); break;
563 case LE_OPERATOR: folded = astnode_create_integer(lhs->integer <= rhs->integer, expr->loc); break;
564 case GE_OPERATOR: folded = astnode_create_integer(lhs->integer >= rhs->integer, expr->loc); break;
566 default: /* Error, actually */
567 folded = expr;
568 break;
570 if (folded != expr) {
571 /* Replace expression by folded one. */
572 astnode_replace(expr, folded);
573 astnode_finalize(expr);
574 return folded;
577 else if ( (astnode_is_type(lhs, STRING_NODE)) &&
578 (astnode_is_type(rhs, STRING_NODE)) ) {
579 /* Both sides are string literals. */
580 /* Folding is defined only for certain operators. */
581 switch (expr->oper) {
582 case PLUS_OPERATOR:
583 /* String concatenation. */
584 folded = astnode_create(STRING_NODE, expr->loc);
585 folded->string = (char *)malloc(strlen(lhs->string) + strlen(rhs->string) + 1);
586 if (folded->string != NULL) {
587 strcpy(folded->string, lhs->string);
588 strcat(folded->string, rhs->string);
590 break;
592 /* String comparison. */
593 case LT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) < 0, expr->loc); break;
594 case GT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) > 0, expr->loc); break;
595 case EQ_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) == 0, expr->loc); break;
596 case NE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) != 0, expr->loc); break;
597 case LE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) <= 0, expr->loc); break;
598 case GE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) >= 0, expr->loc); break;
600 default:
601 folded = expr;
602 break;
604 if (folded != expr) {
605 /* Replace expression by folded one. */
606 astnode_replace(expr, folded);
607 astnode_finalize(expr);
608 return folded;
611 else if ((astnode_get_type(lhs) == STRING_NODE) &&
612 (astnode_get_type(rhs) == INTEGER_NODE) &&
613 (expr->oper == PLUS_OPERATOR)) {
614 /* Left side is string and right side is integer.
615 Result is a string. */
616 char str[32];
617 sprintf(str, "%d", rhs->integer);
618 folded = astnode_create(STRING_NODE, expr->loc);
619 folded->string = (char *)malloc(strlen(lhs->string) + strlen(str) + 1);
620 if (folded->string != NULL) {
621 strcpy(folded->string, lhs->string);
622 strcat(folded->string, str);
624 /* Replace expression by folded one. */
625 astnode_replace(expr, folded);
626 astnode_finalize(expr);
627 return folded;
629 else if ((astnode_get_type(rhs) == STRING_NODE) &&
630 (astnode_get_type(lhs) == INTEGER_NODE) &&
631 (expr->oper == PLUS_OPERATOR)) {
632 /* Left side is integer and right side is string.
633 Result is a string. */
634 char str[32];
635 sprintf(str, "%d", lhs->integer);
636 folded = astnode_create(STRING_NODE, expr->loc);
637 folded->string = (char *)malloc(strlen(str) + strlen(rhs->string) + 1);
638 if (folded->string != NULL) {
639 strcpy(folded->string, str);
640 strcat(folded->string, rhs->string);
642 /* Replace expression by folded one. */
643 astnode_replace(expr, folded);
644 astnode_finalize(expr);
645 return folded;
647 /* Use some mathematical identities... */
648 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
649 && (expr->oper == PLUS_OPERATOR)) {
650 /* 0+expr == expr */
651 astnode_remove_child(expr, rhs);
652 astnode_replace(expr, rhs);
653 return rhs;
655 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
656 && (expr->oper == PLUS_OPERATOR)) {
657 /* expr+0 == expr */
658 astnode_remove_child(expr, lhs);
659 astnode_replace(expr, lhs);
660 return lhs;
662 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
663 && (expr->oper == MUL_OPERATOR)) {
664 /* 1*expr == expr */
665 astnode_remove_child(expr, rhs);
666 astnode_replace(expr, rhs);
667 return rhs;
669 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
670 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
671 /* expr*1 == expr */
672 /* expr/1 == expr */
673 astnode_remove_child(expr, lhs);
674 astnode_replace(expr, lhs);
675 return lhs;
677 else {
678 /* No chance of folding this one. */
680 break;
682 /* Unary ops */
683 case NEG_OPERATOR:
684 case NOT_OPERATOR:
685 case LO_OPERATOR:
686 case HI_OPERATOR:
687 case UMINUS_OPERATOR:
688 case BANK_OPERATOR:
689 /* See if it can be folded */
690 if (astnode_is_type(lhs, INTEGER_NODE)) {
691 /* Fold it. */
692 switch (expr->oper) {
693 case NEG_OPERATOR: folded = astnode_create_integer(~lhs->integer, expr->loc); break;
694 case NOT_OPERATOR: folded = astnode_create_integer(!lhs->integer, expr->loc); break;
695 case LO_OPERATOR: folded = astnode_create_integer(lhs->integer & 0xFF, expr->loc); break;
696 case HI_OPERATOR: folded = astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc); break;
697 case UMINUS_OPERATOR: folded = astnode_create_integer(-lhs->integer, expr->loc); break;
698 default: break;
700 /* Replace expression by folded one. */
701 astnode_replace(expr, folded);
702 astnode_finalize(expr);
703 return folded;
705 else {
706 /* Couldn't fold this one. */
708 break;
711 /* Couldn't fold it, return original expression */
712 return expr;
715 /*---------------------------------------------------------------------------*/
718 * Substitutes identifier if it has a constant definition in symbol table.
719 * @param expr Node of type IDENTIFIER_NODE
721 static astnode *substitute_ident(astnode *expr)
723 astnode *c;
724 symtab_entry *e;
725 /* Look it up in symbol table */
726 e = symtab_lookup(expr->ident);
727 if (e != NULL) {
728 /* Found it. Test if it's a define. */
729 if (e->type == CONSTANT_SYMBOL) {
730 /* This is a defined symbol that should be
731 replaced by the expression it stands for */
732 c = astnode_clone((astnode *)e->def, expr->loc);
733 astnode_replace(expr, c);
734 astnode_finalize(expr);
735 expr = c;
738 else {
739 /* Didn't find it in symbol table. */
741 return expr;
745 * Substitutes sizeof with proper constant.
746 * @param expr Node of type SIZEOF_NODE
748 static astnode *reduce_sizeof(astnode *expr)
750 int ok;
751 astnode *c;
752 astnode *id;
753 astnode *type;
754 astnode *count;
755 symtab_entry *e;
757 count = NULL;
758 if (astnode_is_type(LHS(expr), IDENTIFIER_NODE)) {
759 /* Identifier might be the name of a user-defined type, OR
760 it might be the name of a variable of a user-defined type */
761 type = NULL;
762 /* Look it up */
763 id = LHS(expr);
764 e = symtab_global_lookup(id->ident);
765 if (e != NULL) {
766 switch (e->type) {
767 case STRUC_SYMBOL:
768 case UNION_SYMBOL:
769 case RECORD_SYMBOL:
770 case ENUM_SYMBOL:
771 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
772 break;
774 case VAR_SYMBOL:
775 type = astnode_clone(LHS(e->def), id->loc);
776 if (astnode_is_type(e->def, STORAGE_NODE)) {
777 count = astnode_clone(RHS(e->def), id->loc);
779 else {
780 count = astnode_create_integer(astnode_get_child_count(e->def)-1, id->loc);
782 break;
784 default:
785 /* Can't take sizeof of this symbol type */
786 break;
789 if (type == NULL) {
790 /* Unknown */
791 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
793 /* Replace identifier by datatype node */
794 astnode_replace(id, type);
795 astnode_finalize(id);
797 type = LHS(expr);
798 switch (type->datatype) {
799 case BYTE_DATATYPE:
800 case CHAR_DATATYPE:
801 c = astnode_create_integer(1, expr->loc);
802 astnode_replace(expr, c);
803 astnode_finalize(expr);
804 expr = c;
805 break;
807 case WORD_DATATYPE:
808 c = astnode_create_integer(2, expr->loc);
809 astnode_replace(expr, c);
810 astnode_finalize(expr);
811 expr = c;
812 break;
814 case DWORD_DATATYPE:
815 c = astnode_create_integer(4, expr->loc);
816 astnode_replace(expr, c);
817 astnode_finalize(expr);
818 expr = c;
819 break;
821 case USER_DATATYPE:
822 /* Look up the data type in symbol table */
823 id = LHS(type);
824 e = symtab_global_lookup(id->ident);
825 ok = 0;
826 if (e != NULL) {
827 switch (e->type) {
828 case STRUC_SYMBOL:
829 case UNION_SYMBOL:
830 /* Datatype is defined, replace sizeof with proper expression */
831 c = astnode_clone((astnode *)(e->struc.size), ((astnode *)(e->struc.size))->loc);
832 astnode_replace(expr, c);
833 astnode_finalize(expr);
834 expr = c;
835 ok = 1;
836 break;
838 case RECORD_SYMBOL:
839 case ENUM_SYMBOL:
840 /* 1 byte */
841 c = astnode_create_integer(1, expr->loc);
842 astnode_replace(expr, c);
843 astnode_finalize(expr);
844 expr = c;
845 ok = 1;
846 break;
848 default:
849 /* Dunno the size of this symbol type */
850 break;
853 if (!ok) {
854 /* Datatype not defined, error */
855 err(expr->loc, "size of `%s' is unknown", id->ident);
856 /* Replace by 1 */
857 c = astnode_create_integer(1, expr->loc);
858 astnode_replace(expr, c);
859 astnode_finalize(expr);
860 return c;
862 break;
864 default:
865 err(expr->loc, "substitute_sizeof(): unknown type");
866 break;
868 if (count != NULL) {
869 c = astnode_create_arithmetic(
870 MUL_OPERATOR,
871 astnode_clone(expr, expr->loc),
872 count,
873 expr->loc
875 astnode_replace(expr, c);
876 astnode_finalize(expr);
877 expr = c;
879 return expr;
883 * Substitutes A::B with an expression.
884 * If A is a struct: substitute with offset of B
885 * If A is a union: substitute with 0
886 * If A is an enumeration: substitute with value for B
887 * @param expr Node of type SCOPE_NODE
889 static astnode *reduce_scope(astnode *expr)
891 symtab_entry *ns;
892 symtab_entry *sym;
893 astnode *c;
894 astnode *namespace;
895 astnode *symbol;
896 /* Look up the namespace */
897 namespace = LHS(expr);
898 ns = symtab_lookup(namespace->ident);
899 if (ns != NULL) {
900 /* Look up the local symbol */
901 symtab_push(ns->symtab);
902 symbol = RHS(expr);
903 sym = symtab_lookup(symbol->ident);
904 if (sym != NULL) {
905 /* See if we can replace it */
906 switch (ns->type) {
907 case STRUC_SYMBOL:
908 case UNION_SYMBOL:
909 case RECORD_SYMBOL:
910 /* Replace with field offset */
911 c = astnode_clone(sym->field.offset, sym->field.offset->loc);
912 astnode_replace(expr, c);
913 astnode_finalize(expr);
914 expr = c;
915 break;
917 case ENUM_SYMBOL:
918 /* Replace with enum entry value */
919 c = astnode_clone(sym->def, sym->def->loc);
920 astnode_replace(expr, c);
921 astnode_finalize(expr);
922 expr = c;
923 break;
925 default:
926 break;
929 symtab_pop();
931 return expr;
934 static astnode *reduce_expression(astnode *expr);
937 * Handles remainder of fields in A.B.C.D . ..., where one or more fields may be indexed.
938 * @param expr Node of type DOT_NODE, INDEX_NODE or IDENTIFIER_NODE
940 static astnode *reduce_dot_recursive(astnode *expr)
942 astnode *term;
943 astnode *offset;
944 astnode *left;
945 astnode *right;
946 astnode *type;
947 symtab_entry *field;
948 symtab_entry *def;
949 astnode *index = NULL;
950 /* Get identifiers involved: 'right' is field in 'left' */
951 left = LHS(expr);
952 if (astnode_is_type(left, INDEX_NODE)) {
953 left = LHS(left); /* Need identifier */
955 right = RHS(expr);
956 if (astnode_is_type(right, DOT_NODE)) {
957 right = LHS(right); /* Need identifier */
959 if (astnode_is_type(right, INDEX_NODE)) {
960 index = RHS(right);
961 right = LHS(right); /* Need identifier */
963 /* Lookup 'right' in 'left's symbol table (on stack) */
964 field = symtab_lookup(right->ident);
965 /* Look up variable's type definition */
966 type = LHS(field->def);
967 /* Copy its offset */
968 offset = astnode_clone(field->field.offset, right->loc);
969 if (index != NULL) {
970 /* Create expression: identifier + sizeof(datatype) * index */
971 offset = astnode_create_arithmetic(
972 PLUS_OPERATOR,
973 offset,
974 astnode_create_arithmetic(
975 MUL_OPERATOR,
976 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
977 astnode_clone(index, index->loc),
978 index->loc
980 expr->loc
983 /* See if more subfields to process */
984 expr = RHS(expr);
985 if (astnode_is_type(expr, DOT_NODE)) {
986 /* Next field */
987 def = symtab_global_lookup(LHS(type)->ident);
988 symtab_push(def->symtab);
989 term = reduce_dot_recursive(expr);
990 symtab_pop();
991 /* Construct sum */
992 offset = astnode_create_arithmetic(
993 PLUS_OPERATOR,
994 offset,
995 term,
996 expr->loc
999 return offset;
1003 * Transforms A.B.C.D . ... to A + offset(B) + offset(C) + ...
1004 * No error checking, since validate_dotref() should have been called previously.
1005 * @param expr Node of type DOT_NODE
1007 static astnode *reduce_dot(astnode *expr)
1009 symtab_entry *father;
1010 symtab_entry *def;
1011 astnode *type;
1012 astnode *left;
1013 astnode *term1;
1014 astnode *term2;
1015 astnode *sum;
1016 astnode *index = NULL;
1017 /* Look up parent in global symbol table */
1018 left = LHS(expr); /* expr := left . right */
1019 if (astnode_is_type(left, INDEX_NODE)) {
1020 index = RHS(left);
1021 left = LHS(left); /* Need identifier */
1023 father = symtab_lookup(left->ident);
1024 /* Look up variable's type definition */
1025 type = LHS(father->def); /* DATATYPE_NODE */
1026 def = symtab_lookup(LHS(type)->ident);
1027 /* 1st term of sum is the leftmost structure identifier */
1028 term1 = astnode_clone(left, left->loc);
1029 if (index != NULL) {
1030 /* Create expression: identifier + sizeof(datatype) * index */
1031 term1 = astnode_create_arithmetic(
1032 PLUS_OPERATOR,
1033 term1,
1034 astnode_create_arithmetic(
1035 MUL_OPERATOR,
1036 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1037 astnode_clone(index, index->loc),
1038 index->loc
1040 expr->loc
1043 /* Add offsets recursively */
1044 symtab_push(def->symtab);
1045 term2 = reduce_dot_recursive(expr);
1046 symtab_pop();
1047 /* Calculate final sum */
1048 sum = astnode_create_arithmetic(
1049 PLUS_OPERATOR,
1050 term1,
1051 term2,
1052 expr->loc
1054 sum = reduce_expression(sum);
1055 /* Replace dotted expression by sum */
1056 astnode_replace(expr, sum);
1057 astnode_finalize(expr);
1058 return sum;
1062 * Reduces MASK operation to a field mask.
1063 * @param mask A node of type MASK_NODE
1065 static astnode *reduce_mask(astnode *mask)
1067 symtab_entry *ns;
1068 symtab_entry *sym;
1069 astnode *c;
1070 astnode *namespace;
1071 astnode *symbol;
1072 astnode *expr;
1073 /* Child is a scope node, record::field */
1074 expr = LHS(mask);
1075 /* Look up the namespace */
1076 namespace = LHS(expr);
1077 ns = symtab_lookup(namespace->ident);
1078 if (ns != NULL) {
1079 /* Make sure it's a record */
1080 if (ns->type != RECORD_SYMBOL) {
1081 err(expr->loc, "`%s' is not a record");
1082 /* Replace by 0 */
1083 c = astnode_create_integer(0, expr->loc);
1084 astnode_replace(mask, c);
1085 astnode_finalize(mask);
1086 expr = c;
1088 else {
1089 /* Look up the local symbol */
1090 symtab_push(ns->symtab);
1091 symbol = RHS(expr);
1092 sym = symtab_lookup(symbol->ident);
1093 if (sym != NULL) {
1094 /* Calculate field mask */
1095 // mask = ((1 << width) - 1) << offset
1096 c = astnode_create_arithmetic(
1097 SHL_OPERATOR,
1098 astnode_create_arithmetic(
1099 MINUS_OPERATOR,
1100 astnode_create_arithmetic(
1101 SHL_OPERATOR,
1102 astnode_create_integer(1, expr->loc),
1103 astnode_clone(sym->field.size, expr->loc),
1104 expr->loc
1106 astnode_create_integer(1, expr->loc),
1107 expr->loc
1109 astnode_clone(sym->field.offset, expr->loc),
1110 expr->loc
1112 c = reduce_expression(c);
1113 astnode_replace(mask, c);
1114 astnode_finalize(mask);
1115 expr = c;
1117 symtab_pop();
1120 return expr;
1124 * Reduces identifier[expression] to identifier + sizeof(identifier type) * expression
1126 static astnode *reduce_index(astnode *expr)
1128 symtab_entry *e;
1129 astnode *c;
1130 astnode *type;
1131 astnode *id;
1132 astnode *index;
1133 id = LHS(expr);
1134 index = reduce_expression(RHS(expr));
1135 /* Lookup identifier */
1136 e = symtab_lookup(id->ident);
1137 /* Get its datatype */
1138 type = LHS(e->def);
1139 /* Create expression: identifier + sizeof(datatype) * index */
1140 c = astnode_create_arithmetic(
1141 PLUS_OPERATOR,
1142 astnode_clone(id, id->loc),
1143 astnode_create_arithmetic(
1144 MUL_OPERATOR,
1145 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1146 astnode_clone(index, index->loc),
1147 index->loc
1149 expr->loc
1151 /* Replace index expression */
1152 astnode_replace(expr, c);
1153 astnode_finalize(expr);
1154 /* Return the new expression */
1155 return c;
1159 * Substitutes all identifiers that represent EQU defines with their
1160 * corresponding expression.
1161 * @param expr The expression whose defines to substitute
1163 static astnode *substitute_defines(astnode *expr)
1165 switch (astnode_get_type(expr)) {
1166 case ARITHMETIC_NODE:
1167 substitute_defines(LHS(expr));
1168 substitute_defines(RHS(expr));
1169 break;
1171 case IDENTIFIER_NODE:
1172 expr = substitute_ident(expr);
1173 break;
1175 case SIZEOF_NODE:
1176 expr = reduce_sizeof(expr);
1177 break;
1179 case MASK_NODE:
1180 expr = reduce_mask(expr);
1181 break;
1183 case INDEX_NODE:
1184 substitute_defines(LHS(expr));
1185 substitute_defines(RHS(expr));
1186 break;
1188 case DOT_NODE:
1189 substitute_defines(LHS(expr));
1190 substitute_defines(RHS(expr));
1191 break;
1193 default:
1194 /* Nada */
1195 break;
1197 return expr;
1203 static astnode *reduce_highlevel_constructs(astnode *expr)
1205 switch (astnode_get_type(expr)) {
1206 case ARITHMETIC_NODE:
1207 reduce_highlevel_constructs(LHS(expr));
1208 reduce_highlevel_constructs(RHS(expr));
1209 break;
1211 case SCOPE_NODE:
1212 expr = reduce_scope(expr);
1213 break;
1215 case DOT_NODE:
1216 expr = reduce_dot(expr);
1217 break;
1219 case INDEX_NODE:
1220 expr = reduce_index(expr);
1221 break;
1223 default:
1224 /* Nada */
1225 break;
1227 return expr;
1231 * Really reduces an expression.
1232 * @param expr Expression to attempt to reduce
1234 static astnode *reduce_expression_complete(astnode *expr)
1236 return astproc_fold_constants( reduce_highlevel_constructs( substitute_defines(expr) ) );
1240 * Reduces an expression.
1241 * It does two things:
1242 * 1. Substitute all equates by their value
1243 * 2. Folds constants in the resulting expression
1244 * If the expression is reduced, the original expression is replaced by the
1245 * new one, the original is finalized, and a pointer to the new expression
1246 * is returned.
1247 * If the expression is not reduced, the original pointer is returned.
1249 static astnode *reduce_expression(astnode *expr)
1251 return astproc_fold_constants( substitute_defines(expr) );
1255 * Reduces RECORD instance to a single byte (DB statement).
1256 * @param r Record's symbol table entry
1257 * @param expr Record initializer
1258 * @param flat List on which to append the reduced form
1260 static void reduce_record(symtab_entry *r, astnode *init, astnode *flat)
1262 ordered_field_list *list;
1263 symtab_entry *e;
1264 astnode *val;
1265 astnode *term;
1266 astnode *result;
1267 astnode *mask;
1268 astnode *repl;
1269 /* Validate initializer */
1270 if (!astnode_is_type(init, STRUC_NODE)) {
1271 err(init->loc, "record initializer expected");
1272 return;
1274 /* Go through fields */
1275 symtab_push(r->symtab);
1276 result = astnode_create_integer(0, init->loc);
1277 for (val = init->first_child, list = r->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1278 if (astnode_is_type(val, NULL_NODE)) {
1279 continue;
1281 if (astnode_is_type(val, STRUC_NODE)) {
1282 err(init->loc, "record field initializer expected");
1283 continue;
1285 /* Get field definition */
1286 e = list->entry;
1287 /* Calculate field mask */
1288 // mask = ((1 << width) - 1) << offset
1289 mask = astnode_create_arithmetic(
1290 SHL_OPERATOR,
1291 astnode_create_arithmetic(
1292 MINUS_OPERATOR,
1293 astnode_create_arithmetic(
1294 SHL_OPERATOR,
1295 astnode_create_integer(1, val->loc),
1296 astnode_clone(e->field.size, val->loc),
1297 val->loc
1299 astnode_create_integer(1, val->loc),
1300 val->loc
1302 astnode_clone(e->field.offset, val->loc),
1303 val->loc
1305 /* Shift val left e->field.offset bits, AND with mask */
1306 term = astnode_create_arithmetic(
1307 AND_OPERATOR,
1308 astnode_create_arithmetic(
1309 SHL_OPERATOR,
1310 astnode_clone(val, val->loc),
1311 astnode_clone(e->field.offset, val->loc),
1312 val->loc
1314 mask,
1315 val->loc
1317 /* OR the value with the result so far */
1318 result = astnode_create_arithmetic(
1319 OR_OPERATOR,
1320 result,
1321 term,
1322 val->loc
1324 result = reduce_expression(result);
1326 /* Determine reason for stopping loop */
1327 if (val != NULL) {
1328 err(init->loc, "too many field initializers");
1330 /* Make byte data node (packed record value) */
1331 repl = astnode_create_data(
1332 astnode_create_datatype(BYTE_DATATYPE, NULL, init->loc),
1333 result,
1334 init->loc
1336 /* Add to list */
1337 astnode_add_child(flat, repl);
1338 /* Restore old symbol table */
1339 symtab_pop();
1343 * Reduces ENUM instance to DB.
1344 * @param e Enumeration's symbol table entry
1345 * @param expr Expression
1346 * @param flat List on which to append the reduced form
1348 static void reduce_enum(symtab_entry *e, astnode *expr, astnode *list)
1350 symtab_entry *sym;
1351 astnode *repl;
1352 if (!astnode_is_type(expr, IDENTIFIER_NODE)) {
1353 err(expr->loc, "identifier expected");
1355 else {
1356 /* Look up the enumeration symbol */
1357 symtab_push(e->symtab);
1358 sym = symtab_lookup(expr->ident);
1359 symtab_pop();
1360 /* Make byte data node (symbol value) */
1361 repl = astnode_create_data(
1362 astnode_create_datatype(BYTE_DATATYPE, NULL, expr->loc),
1363 astnode_clone(sym->def, expr->loc),
1364 expr->loc
1366 /* Add to list */
1367 astnode_add_child(list, repl);
1371 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat);
1374 * Flattens a union initializer to a sequence of native data values.
1375 * Verify similar to flattening of structure, but only single field allowed.
1376 * @param s Union's symbol table definition
1377 * @param init Union initializer
1378 * @param flat List on which to append the flattened form
1380 static void flatten_union_recursive(symtab_entry *s, astnode *init, astnode *flat)
1382 astnode *fill;
1383 astnode *type;
1384 astnode *count;
1385 symtab_entry *e;
1386 symtab_entry *t;
1387 astnode *val;
1388 astnode *valvals;
1389 astnode *temp;
1390 ordered_field_list *list;
1391 int num;
1392 /* Validate initializer */
1393 if (!astnode_is_type(init, STRUC_NODE)) {
1394 err(init->loc, "union initializer expected");
1395 return;
1397 /* Go through fields */
1398 symtab_push(s->symtab);
1399 fill = astnode_clone(s->struc.size, flat->loc);
1400 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1401 if (astnode_is_type(val, NULL_NODE)) {
1402 continue;
1404 if (!astnode_equal(fill, s->struc.size)) {
1405 err(init->loc, "only one field of union can be initialized");
1406 continue;
1408 /* Get field definition */
1409 e = list->entry;
1410 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1411 type = LHS(e->def);
1412 count = RHS(e->def);
1413 /* Decide what to do based on field type and value */
1414 switch (type->datatype) {
1415 case BYTE_DATATYPE:
1416 case CHAR_DATATYPE:
1417 case WORD_DATATYPE:
1418 case DWORD_DATATYPE:
1419 if (astnode_is_type(val, STRUC_NODE)) {
1420 /* Handle multi-value array */
1421 temp = astnode_clone(val, val->loc);
1422 valvals = astnode_remove_children(temp);
1423 astnode_finalize(temp);
1424 astnode_add_child(flat,
1425 astnode_create_data(
1426 astnode_create_datatype(type->datatype, NULL, type->loc),
1427 valvals,
1428 val->loc
1431 num = astnode_get_child_count(val);
1432 } else {
1433 /* Output single value */
1434 astnode_add_child(flat,
1435 astnode_create_data(
1436 astnode_create_datatype(type->datatype, NULL, type->loc),
1437 astnode_clone(val, val->loc),
1438 val->loc
1441 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1443 if (num > count->integer) {
1444 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1446 /* Fill in remainder of field if necessary: count - 1 */
1447 else if (count->integer > num) {
1448 astnode_add_child(flat,
1449 astnode_create_storage(
1450 astnode_create_datatype(type->datatype, NULL, type->loc),
1451 astproc_fold_constants(
1452 astnode_create_arithmetic(
1453 MINUS_OPERATOR,
1454 astnode_clone(count, count->loc),
1455 astnode_create_integer(num, flat->loc),
1456 count->loc
1459 val->loc
1463 break;
1465 case USER_DATATYPE:
1466 /* Look up user type definition */
1467 t = symtab_global_lookup(LHS(type)->ident);
1468 switch (t->type) {
1469 case STRUC_SYMBOL:
1470 flatten_struc_recursive(t, val, flat);
1471 break;
1473 case UNION_SYMBOL:
1474 flatten_union_recursive(t, val, flat);
1475 break;
1477 case RECORD_SYMBOL:
1478 reduce_record(t, val, flat);
1479 break;
1481 case ENUM_SYMBOL:
1482 reduce_enum(t, val, flat);
1483 break;
1485 default:
1486 break;
1488 break;
1490 /* Decrease fill amount according to field size */
1491 fill = astproc_fold_constants(
1492 astnode_create_arithmetic(
1493 MINUS_OPERATOR,
1494 fill,
1495 astnode_clone(e->field.size, flat->loc),
1496 flat->loc
1500 /* Determine reason for stopping loop */
1501 if (val != NULL) {
1502 err(init->loc, "too many field initializers");
1504 if (fill->integer > 0) {
1505 /* Fill remainder of union with zeroes */
1506 astnode_add_child(flat,
1507 astnode_create_storage(
1508 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1509 fill,
1510 flat->loc
1514 symtab_pop();
1518 * Flattens a structure initializer to a sequence of native data values.
1519 * @param s Structure's symbol table definition
1520 * @param init Structure initializer
1521 * @param flat List on which to append the flattened form
1523 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat)
1525 astnode *fill;
1526 astnode *type;
1527 astnode *count;
1528 astnode *temp;
1529 symtab_entry *e;
1530 symtab_entry *t;
1531 astnode *val;
1532 astnode *valvals;
1533 ordered_field_list *list;
1534 int num;
1535 /* Validate initializer */
1536 if (!astnode_is_type(init, STRUC_NODE)) {
1537 err(init->loc, "structure initializer expected");
1538 return;
1540 /* Go through fields */
1541 symtab_push(s->symtab);
1542 fill = astnode_clone(s->struc.size, flat->loc);
1543 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1544 /* Get field definition */
1545 e = list->entry;
1546 /* Check if normal field or anonymous union */
1547 if (e->type == UNION_SYMBOL) {
1548 if (astnode_is_type(val, NULL_NODE)) {
1549 /* Output union size bytes to fill in field */
1550 astnode_add_child(flat,
1551 astnode_create_storage(
1552 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1553 astnode_clone(e->struc.size, val->loc),
1554 val->loc
1557 } else {
1558 flatten_union_recursive(e, val, flat);
1559 /* Decrease fill amount according to union size */
1560 fill = astproc_fold_constants(
1561 astnode_create_arithmetic(
1562 MINUS_OPERATOR,
1563 fill,
1564 astnode_clone(e->struc.size, flat->loc),
1565 flat->loc
1569 } else {
1570 /* VAR_SYMBOL */
1571 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1572 type = LHS(e->def);
1573 count = RHS(e->def);
1574 /* Decide what to do based on field type and value */
1575 switch (type->datatype) {
1576 case BYTE_DATATYPE:
1577 case CHAR_DATATYPE:
1578 case WORD_DATATYPE:
1579 case DWORD_DATATYPE:
1580 if (astnode_is_type(val, NULL_NODE)) {
1581 /* Output field_size bytes to fill in field */
1582 astnode_add_child(flat,
1583 astnode_create_storage(
1584 astnode_create_datatype(type->datatype, NULL, type->loc),
1585 astnode_clone(count, count->loc),
1586 val->loc
1589 } else {
1590 if (astnode_is_type(val, STRUC_NODE)) {
1591 /* Handle multi-value array */
1592 temp = astnode_clone(val, val->loc);
1593 valvals = astnode_remove_children(temp);
1594 astnode_finalize(temp);
1595 astnode_add_child(flat,
1596 astnode_create_data(
1597 astnode_create_datatype(type->datatype, NULL, type->loc),
1598 valvals,
1599 val->loc
1602 num = astnode_get_child_count(val);
1603 } else {
1604 /* Output single value */
1605 astnode_add_child(flat,
1606 astnode_create_data(
1607 astnode_create_datatype(type->datatype, NULL, type->loc),
1608 astnode_clone(val, val->loc),
1609 val->loc
1612 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1614 if (astnode_is_type(count, INTEGER_NODE) && (count->integer < num)) {
1615 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1617 /* Fill in remainder of field if necessary: count - 1 */
1618 else if ( (astnode_is_type(count, INTEGER_NODE) && (count->integer > num))
1619 || !astnode_is_type(count, INTEGER_NODE) ) {
1620 astnode_add_child(flat,
1621 astnode_create_storage(
1622 astnode_create_datatype(type->datatype, NULL, flat->loc),
1623 astproc_fold_constants(
1624 astnode_create_arithmetic(
1625 MINUS_OPERATOR,
1626 astnode_clone(count, flat->loc),
1627 astnode_create_integer(num, flat->loc),
1628 flat->loc
1631 flat->loc
1636 break;
1638 case USER_DATATYPE:
1639 /* Look up user type definition */
1640 t = symtab_global_lookup(LHS(type)->ident);
1641 if (astnode_is_type(val, NULL_NODE)) {
1642 /* Output sizeof(type) bytes to fill in */
1643 astnode_add_child(flat,
1644 astnode_create_storage(
1645 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1646 astnode_clone(t->struc.size, val->loc),
1647 val->loc
1650 } else {
1651 switch (t->type) {
1652 case STRUC_SYMBOL:
1653 flatten_struc_recursive(t, val, flat);
1654 break;
1656 case UNION_SYMBOL:
1657 flatten_union_recursive(t, val, flat);
1658 break;
1660 case RECORD_SYMBOL:
1661 reduce_record(t, val, flat);
1662 break;
1664 case ENUM_SYMBOL:
1665 reduce_enum(t, val, flat);
1666 break;
1668 default:
1669 break;
1672 break;
1674 /* Decrease fill amount according to field size */
1675 fill = astproc_fold_constants(
1676 astnode_create_arithmetic(
1677 MINUS_OPERATOR,
1678 fill,
1679 astnode_clone(e->field.size, flat->loc),
1680 flat->loc
1685 /* Determine reason for stopping loop */
1686 if (val != NULL) {
1687 err(init->loc, "too many field initializers");
1689 else if (list != NULL) {
1690 /* All fields not initialized; fill remainder of struc with zeroes */
1691 astnode_add_child(flat,
1692 astnode_create_storage(
1693 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1694 fill,
1695 flat->loc
1699 symtab_pop();
1703 * Converts data that is expressed in a high-level form (such as structure initializers)
1704 * to a simple sequence of bytes.
1705 * @param n The source node to flatten
1706 * @param type The type of data that n is an instance of
1707 * @param list List on which to append the resulting sequence of items (bytes/words/dwords)
1709 static void flatten_user_data(astnode *n, astnode *type, astnode *list)
1711 symtab_entry *def;
1712 /* Look up type definition */
1713 def = symtab_global_lookup(LHS(type)->ident);
1714 if (def != NULL) {
1715 switch (def->type) {
1716 case STRUC_SYMBOL:
1717 /* Flatten structure initializer to series of simple data statements */
1718 flatten_struc_recursive(def, n, list);
1719 break;
1721 case UNION_SYMBOL:
1722 /* Flatten union initializer to series of simple data statements */
1723 flatten_union_recursive(def, n, list);
1724 break;
1726 case RECORD_SYMBOL:
1727 reduce_record(def, n, list);
1728 break;
1730 case ENUM_SYMBOL:
1731 reduce_enum(def, n, list);
1732 break;
1734 default:
1735 break;
1740 /*---------------------------------------------------------------------------*/
1743 * Loads the character map specified by the node.
1744 * @param n Node of type CHARMAP_NODE
1746 static int load_charmap(astnode *n, void *arg, astnode **next)
1748 astnode *file;
1749 /* Get file descriptor */
1750 file = astnode_get_child(n, 0);
1751 /* Try to load the charmap */
1752 if (charmap_parse(file->file_path, charmap) == 0) {
1753 err(n->loc, "could not open `%s' for reading", file->file_path);
1755 return 0;
1759 * First-time processing of instruction node.
1760 * @param n Node of type INSTRUCTION_NODE
1761 * @param arg Not used
1763 static int process_instruction(astnode *n, void *arg, astnode **next)
1765 astnode *expr;
1766 if (in_dataseg) {
1767 err(n->loc, "instructions not allowed in data segment");
1768 /* Remove from AST */
1769 astnode_remove(n);
1770 astnode_finalize(n);
1772 else {
1773 /* The instruction operand */
1774 expr = astnode_get_child(n, 0);
1775 /* Substitute defines and fold constants */
1776 reduce_expression(expr);
1778 return 1;
1782 * First-time processing of data node.
1783 * @param n Node of type DATA_NODE
1784 * @param arg Not used
1786 static int process_data(astnode *n, void *arg, astnode **next)
1788 int j;
1789 int k;
1790 astnode *type;
1791 astnode *expr;
1792 astnode *list;
1793 astnode *stmts;
1794 type = astnode_get_child(n, 0); /* DATATYPE_NODE */
1795 if (in_dataseg) {
1796 err(n->loc, "value not allowed in data segment");
1797 /* Replace with storage node */
1798 astnode_replace(
1800 astnode_create_storage(
1801 astnode_create_datatype(BYTE_DATATYPE, NULL, n->loc),
1802 astnode_create_integer(1, n->loc),
1803 n->loc
1806 astnode_finalize(n);
1807 return 0;
1809 if (type->datatype == USER_DATATYPE) {
1810 /* Make sure the type exists */
1811 if (symtab_global_lookup(LHS(type)->ident) == NULL) {
1812 err(n->loc, "unknown type `%s'", LHS(type)->ident);
1813 /* Remove from AST */
1814 astnode_remove(n);
1815 astnode_finalize(n);
1816 return 0;
1817 } else {
1818 /* Attempt to reduce user data to native data */
1819 list = astnode_create(LIST_NODE, n->loc);
1820 for (expr = type->next_sibling; expr != NULL; expr = expr->next_sibling) {
1821 flatten_user_data(expr, type, list);
1823 /* Replace initializers with generated list */
1824 stmts = astnode_remove_children(list);
1825 astnode_replace(n, stmts);
1826 astnode_finalize(n);
1827 astnode_finalize(list);
1828 *next = stmts;
1831 /* Go through the list of data values, replacing defines and folding constants */
1832 for (j=1; j<astnode_get_child_count(n); j++) {
1833 expr = astnode_get_child(n, j);
1834 /* Substitute defines and fold constants */
1835 expr = reduce_expression(expr);
1836 /* If it's a string, replace by array of integers */
1837 /* (makes it easier to process later... favour regularity) */
1838 if (astnode_is_type(expr, STRING_NODE)) {
1839 astnode_remove_child_at(n, j); /* Remove string */
1840 for (k=strlen(expr->string)-1; k>=0; k--) {
1841 /* Check if we should map character from custom charmap */
1842 if (type->datatype == CHAR_DATATYPE) {
1843 expr->string[k] = charmap[(unsigned)expr->string[k]];
1845 /* Append character value to array */
1846 astnode_insert_child(n, astnode_create_integer((unsigned char)expr->string[k], n->loc), j);
1848 if (type->datatype == CHAR_DATATYPE) {
1849 /* It's normal byte array now */
1850 type->datatype = BYTE_DATATYPE;
1852 j += strlen(expr->string)-1;
1853 astnode_finalize(expr);
1856 return 1;
1860 * First-time processing of storage node.
1861 * @param n Node of type STORAGE_NODE
1862 * @param arg Not used
1864 static int process_storage(astnode *n, void *arg, astnode **next)
1866 int item_size;
1867 astnode *type;
1868 astnode *expr;
1869 astnode *new_expr;
1870 type = LHS(n);
1871 expr = RHS(n);
1872 /* If not BYTE_DATATYPE, multiply by word/dword-size */
1873 switch (type->datatype) {
1874 case BYTE_DATATYPE:
1875 case CHAR_DATATYPE: item_size = 1; break;
1876 case WORD_DATATYPE: item_size = 2; break;
1877 case DWORD_DATATYPE: item_size = 4; break;
1878 default: item_size = 1; break; // ### Hmmm...
1880 if (item_size != 1) {
1881 new_expr = astnode_create_arithmetic(
1882 MUL_OPERATOR,
1883 astnode_clone(expr, expr->loc),
1884 astnode_create_integer(item_size, expr->loc),
1885 expr->loc
1887 astnode_replace(expr, new_expr);
1888 astnode_finalize(expr);
1889 expr = new_expr;
1890 type->datatype = BYTE_DATATYPE;
1892 /* Substitute defines and fold constants */
1893 expr = reduce_expression(expr);
1894 // TODO: Validate range somewhere else than here please... ???
1895 if (astnode_is_type(expr, INTEGER_NODE)) {
1896 if ((expr->integer <= 0) || (expr->integer >= 0x10000)) {
1897 err(n->loc, "operand out of range");
1900 return 1;
1904 * Process EQU node.
1905 * @param n Node of type EQU_NODE
1906 * @param arg Not used
1908 static int process_equ(astnode *n, void *arg, astnode **next)
1910 symtab_entry *e;
1911 astnode *id;
1912 astnode *expr;
1913 /* The expression which describes the value */
1914 expr = astnode_clone(astnode_get_child(n, 1), n->loc);
1915 /* Substitute defines and fold constants */
1916 expr = reduce_expression(expr);
1917 /* The identifier which is being defined */
1918 id = astnode_get_child(n, 0);
1919 /* Look up in symbol table */
1920 e = symtab_lookup(id->ident);
1921 if (e == NULL) {
1922 /* Symbol is being defined */
1923 // TODO: Check that expression is a constant?
1924 /* Enter it in symbol table */
1925 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, 0);
1926 } else {
1927 /* Symbol is being redefined */
1928 /* This is not allowed for EQU equate! */
1929 if (!astnode_equal((astnode *)(e->def), expr)) {
1930 warn(n->loc, "redefinition of `%s' is not identical; ignored", id->ident);
1933 /* Remove the equate node from the tree. */
1934 astnode_remove(n);
1935 astnode_finalize(n);
1936 return 0;
1940 * Process '=' node.
1941 * @param n Node of type ASSIGN_NODE
1942 * @param arg Not used
1944 static int process_assign(astnode *n, void *arg, astnode **next)
1946 symtab_entry *e;
1947 astnode *id;
1948 astnode *expr;
1949 /* If it's part of ENUM declaration, don't touch */
1950 if (astnode_has_ancestor_of_type(n, ENUM_DECL_NODE)) {
1951 return 0;
1953 /* Very similar to EQU, except symbol 1) can be
1954 redefined and 2) is volatile (see end of proc) */
1955 /* The expression which describes the value */
1956 expr = astnode_clone(astnode_get_child(n, 1), n->loc);
1957 /* Substitute defines and fold constants */
1958 expr = reduce_expression(expr);
1959 /* The identifier which is being (re)defined */
1960 id = astnode_get_child(n, 0);
1961 /* Look up in symbol table */
1962 e = symtab_lookup(id->ident);
1963 if (e == NULL) {
1964 /* Symbol is being defined for the first time */
1965 /* Note that the VOLATILE_FLAG is set */
1966 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, VOLATILE_FLAG);
1967 } else {
1968 /* Symbol is being redefined */
1969 /* This is OK for ASSIGN equate, simply replace definition */
1970 // ### store a list of definitions
1971 expr->loc = e->def->loc;
1972 e->def = expr;
1974 /* Remove the equate node from the tree. */
1975 astnode_remove(n);
1976 astnode_finalize(n);
1977 return 0;
1981 * Process IFDEF-node.
1982 * @param n Node of type IFDEF_NODE
1983 * @param arg Not used
1985 static int process_ifdef(astnode *n, void *arg, astnode **next)
1987 symtab_entry *e;
1988 astnode *id;
1989 astnode *stmts;
1990 /* The identifier which is being tested */
1991 id = astnode_get_child(n, 0);
1992 e = symtab_lookup(id->ident);
1993 if (e != NULL) {
1994 /* Symbol is defined. */
1995 /* Replace IFDEF node by the true-branch statement list */
1996 stmts = astnode_remove_children( astnode_remove_child_at(n, 1));
1997 astnode_replace(n, stmts);
1998 *next = stmts;
1999 } else {
2000 /* Symbol is not defined. */
2001 /* Replace IFDEF node by the false-branch statement list (if any) */
2002 stmts = astnode_remove_children( astnode_remove_child_at(n, 2));
2003 if (stmts != NULL) {
2004 astnode_replace(n, stmts);
2005 *next = stmts;
2006 } else {
2007 astnode_remove(n);
2010 /* Discard the original node */
2011 astnode_finalize(n);
2012 return 0;
2016 * Process IFNDEF-node.
2017 * @param n Node of type IFNDEF_NODE
2018 * @param arg Not used
2020 static int process_ifndef(astnode *n, void *arg, astnode **next)
2022 symtab_entry *e;
2023 astnode *id;
2024 astnode *stmts;
2025 /* The identifier which is being tested */
2026 id = astnode_get_child(n, 0);
2027 e = symtab_lookup(id->ident);
2028 if (e == NULL) {
2029 /* Symbol is not defined. */
2030 /* Replace IFNDEF node by the true-branch statement list */
2031 stmts = astnode_remove_children( astnode_remove_child_at(n, 1));
2032 astnode_replace(n, stmts);
2033 *next = stmts;
2034 } else {
2035 /* Symbol is defined. */
2036 /* Replace IFNDEF node by the false-branch statement list, if any */
2037 stmts = astnode_remove_children( astnode_remove_child_at(n, 2));
2038 if (stmts != NULL) {
2039 astnode_replace(n, stmts);
2040 *next = stmts;
2041 } else {
2042 astnode_remove(n);
2045 /* Discard the original node */
2046 astnode_finalize(n);
2047 return 0;
2051 * Process IF-node.
2052 * @param n Node of type IF_NODE
2053 * @param arg Not used
2055 static int process_if(astnode *n, void *arg, astnode **next)
2057 astnode *expr;
2058 astnode *stmts;
2059 astnode *c;
2060 int ret = 0;
2061 /* IF_NODE has a list of CASE, DEFAULT nodes as children */
2062 for (c = astnode_get_first_child(n); c != NULL; c = astnode_get_next_sibling(c) ) {
2063 if (astnode_is_type(c, CASE_NODE)) {
2064 /* The expression which is being tested */
2065 expr = astnode_get_child(c, 0);
2066 /* Try to reduce expression to literal */
2067 expr = reduce_expression(expr);
2068 /* Resulting expression must be an integer literal,
2069 since this is static evaluation.
2070 In other words, it can't contain label references.
2072 if (astnode_is_type(expr, INTEGER_NODE)) {
2073 /* Non-zero is true, zero is false */
2074 if (expr->integer) {
2075 /* Replace IF node by the true-branch statement list */
2076 stmts = astnode_remove_children( astnode_remove_child_at(c, 1) );
2077 astnode_replace(n, stmts);
2078 astnode_finalize(n);
2079 *next = stmts;
2080 return ret;
2082 } else {
2083 /* Error, expression is not constant */
2084 err(expr->loc, "conditional expression does not evaluate to literal");
2086 } else { /* DEFAULT_NODE */
2087 /* Replace IF node by the false-branch statement list */
2088 stmts = astnode_remove_children(c);
2089 astnode_replace(n, stmts);
2090 astnode_finalize(n);
2091 *next = stmts;
2092 return ret;
2095 /* No match, remove IF node from AST */
2096 astnode_remove(n);
2097 astnode_finalize(n);
2098 return ret;
2102 * Process dataseg-node.
2103 * @param n Node of type DATASEG_NODE
2104 * @param arg Not used
2106 static int process_dataseg(astnode *n, void *arg, astnode **next)
2108 modifiers = n->modifiers;
2109 in_dataseg = 1; /* true */
2110 return 0;
2114 * Process codeseg-node.
2115 * @param n Node of type CODESEG_NODE
2116 * @param arg Not used
2118 static int process_codeseg(astnode *n, void *arg, astnode **next)
2120 modifiers = 0;
2121 in_dataseg = 0; /* false */
2122 return 0;
2126 * Process org-node.
2127 * @param n Node of type ORG_NODE
2128 * @param arg Not used
2130 static int process_org(astnode *n, void *arg, astnode **next)
2132 if (!xasm_args.pure_binary) {
2133 err(n->loc, "org directive can only be used when output format is pure 6502 binary");
2134 } else {
2135 astnode *addr = astnode_get_child(n, 0);
2136 addr = reduce_expression_complete(addr);
2137 if (astnode_is_type(addr, INTEGER_NODE)) {
2138 /* Range check */
2139 if ((addr->integer < 0) || (addr->integer >= 0x10000)) {
2140 err(n->loc, "org address out of 64K range");
2142 } else {
2143 err(n->loc, "org address does not evaluate to literal");
2144 /* Remove from AST */
2145 astnode_remove(n);
2146 astnode_finalize(n);
2149 return 0;
2153 * Process REPT node.
2154 * @param n Node of type REPT_NODE
2155 * @param arg Not used
2157 static int process_rept(astnode *n, void *arg, astnode **next)
2159 astnode *count;
2160 astnode *stmts;
2161 astnode *list;
2162 /* The repeat count */
2163 count = astnode_get_child(n, 0);
2164 /* Try to reduce count expression to literal */
2165 count = reduce_expression_complete(count);
2166 /* Resulting expression must be an integer literal,
2167 since this is static evaluation.
2169 if (astnode_is_type(count, INTEGER_NODE)) {
2170 if (count->integer < 0) {
2171 warn(n->loc, "REPT ignored; negative repeat count (%d)", count->integer);
2172 /* Remove from AST */
2173 astnode_remove(n);
2174 astnode_finalize(n);
2175 } else if (count->integer > 0) {
2176 /* Expand body <count> times */
2177 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2178 stmts = astnode_remove_children(list);
2179 astnode_finalize(list);
2180 while (--count->integer > 0) {
2181 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2182 astnode_add_sibling(stmts, astnode_remove_children(list) );
2183 astnode_finalize(list);
2185 astnode_replace(n, stmts);
2186 astnode_finalize(n);
2187 *next = stmts;
2188 } else {
2189 /* count == 0, remove from AST */
2190 astnode_remove(n);
2191 astnode_finalize(n);
2193 } else {
2194 err(n->loc, "repeat count does not evaluate to literal");
2195 /* Remove from AST */
2196 astnode_remove(n);
2197 astnode_finalize(n);
2199 return 0;
2203 * Process WHILE node.
2204 * @param n Node of type WHILE_NODE
2205 * @param arg Not used
2207 static int process_while(astnode *n, void *arg, astnode **next)
2209 astnode *expr;
2210 astnode *stmts;
2211 astnode *list;
2212 /* The boolean expression */
2213 expr = astnode_get_child(n, 0);
2214 /* Try to reduce expression to literal */
2215 expr = reduce_expression(astnode_clone(expr, expr->loc));
2216 /* Resulting expression must be an integer literal,
2217 since this is static evaluation.
2219 if (astnode_is_type(expr, INTEGER_NODE)) {
2220 /* Expand body if the expression is true */
2221 if (expr->integer) {
2222 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2223 stmts = astnode_remove_children(list);
2224 astnode_finalize(list);
2225 astnode_replace(n, stmts);
2226 astnode_add_sibling(stmts, n); /* Clever huh? */
2227 *next = stmts;
2228 } else {
2229 /* Remove WHILE node from AST */
2230 astnode_remove(n);
2231 astnode_finalize(n);
2233 } else {
2234 err(n->loc, "while expression does not evaluate to literal");
2235 /* Remove WHILE node from AST */
2236 astnode_remove(n);
2237 astnode_finalize(n);
2239 astnode_finalize(expr);
2240 return 0;
2243 /*---------------------------------------------------------------------------*/
2246 * Enters a macro into the symbol table.
2247 * @param n Must be a node of type MACRO_DECL_NODE
2248 * @param arg Not used
2250 static int enter_macro(astnode *n, void *arg, astnode **next)
2252 astnode *id = astnode_get_child(n, 0); /* Child 0 is macro identifier */
2253 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2254 if (symtab_enter(id->ident, MACRO_SYMBOL, n, 0) == NULL) {
2255 /* ### This could be allowed, you know... */
2256 err(n->loc, "duplicate symbol `%s'", id->ident);
2258 /* Remove from AST */
2259 astnode_remove(n);
2260 // ### n is not finalized???
2261 return 0;
2265 * Enters a label into the symbol table.
2266 * @param n Must be a node of type LABEL_NODE
2268 static int enter_label(astnode *n, void *arg, astnode **next)
2270 symtab_entry *e;
2271 astnode *addr;
2272 /* Make sure it's unique first */
2273 if (symtab_lookup(n->ident)) {
2274 err(n->loc, "duplicate symbol `%s'", n->ident);
2275 /* Remove from AST */
2276 astnode_remove(n);
2277 astnode_finalize(n);
2278 } else {
2279 /* Enter it! */
2280 e = symtab_enter(n->ident, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) | modifiers );
2281 /* Check if hardcoded address */
2282 addr = reduce_expression_complete(RHS(n));
2283 if (astnode_is_type(addr, INTEGER_NODE)) {
2284 /* Store it */
2285 e->address = addr->integer;
2286 e->flags |= ADDR_FLAG;
2287 } else if (!astnode_is_type(addr, CURRENT_PC_NODE)) {
2288 err(n->loc, "label address does not evaluate to literal");
2290 /* Increase namespace counter */
2291 label_count++;
2293 /* */
2294 return 0;
2298 * Enters a variable declaration in symbol table.
2299 * @param n Must be a node of type VAR_DECL_NODE
2301 static int enter_var(astnode *n, void *arg, astnode **next)
2303 astnode *id = LHS(n); /* Variable identifier */
2304 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2305 /* Make sure it's unique first */
2306 if (symtab_lookup(id->ident)) {
2307 err(n->loc, "duplicate symbol `%s'", id->ident);
2308 /* Remove from AST */
2309 astnode_remove(n);
2310 astnode_finalize(n);
2311 } else {
2312 /* Validate modifiers */
2313 if ((n->modifiers & ZEROPAGE_FLAG) && !in_dataseg) {
2314 warn(n->loc, "zeropage modifier has no effect in code segment");
2315 n->modifiers &= ~ZEROPAGE_FLAG;
2317 /* Enter it! */
2318 symtab_enter(id->ident, VAR_SYMBOL, astnode_clone(RHS(n), n->loc), (in_dataseg ? DATA_FLAG : 0) | n->modifiers | modifiers);
2320 /* */
2321 return 1;
2325 * Enters a procedure declaration in symbol table.
2326 * @param n Must be a node of type PROC_NODE
2328 static int enter_proc(astnode *n, void *arg, astnode **next)
2330 astnode *id;
2331 if (in_dataseg) {
2332 err(n->loc, "procedures not allowed in data segment");
2333 /* Remove from AST */
2334 astnode_remove(n);
2335 astnode_finalize(n);
2336 return 0;
2338 id = LHS(n); /* Procedure identifier */
2339 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2340 /* Make sure it's unique first */
2341 if (symtab_lookup(id->ident)) {
2342 err(n->loc, "duplicate symbol `%s'", id->ident);
2343 /* Remove from AST */
2344 astnode_remove(n);
2345 astnode_finalize(n);
2346 } else {
2347 /* Enter it! RHS(n) is the list of procedure statements */
2348 symtab_enter(id->ident, PROC_SYMBOL, RHS(n), (in_dataseg ? DATA_FLAG : 0) );
2349 /* Increase global namespace counter */
2350 label_count++;
2352 /* */
2353 return 1;
2357 * Enters a simple <identifier> <storage> structure member.
2358 * @param c Node of type VAR_DECL_NODE
2359 * @param offset Offset of this field
2360 * @param plist List of symbol table's entries
2361 * @param struc_id Structure identifier (for error messages)
2362 * @return New offset (old offset + size of this field)
2364 static astnode *enter_struc_atomic_field(astnode *c, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2366 astnode *field_id;
2367 astnode *field_data;
2368 astnode *field_size;
2369 symtab_entry *fe;
2370 /* c has two children: id and STORAGE_NODE */
2371 field_id = LHS(c);
2372 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2373 field_data = RHS(c);
2374 reduce_expression(RHS(field_data));
2375 /* Validate the declaration -- no data initialized */
2376 if (astnode_is_type(field_data, DATA_NODE)) {
2377 err(c->loc, "data initialization not allowed here");
2378 return(offset);
2380 /* Try to enter field in structure's symbol table */
2381 fe = symtab_enter(
2382 field_id->ident,
2383 VAR_SYMBOL,
2384 astnode_clone(field_data, field_data->loc),
2387 if (fe == NULL) {
2388 err(c->loc, "duplicate symbol `%s' in structure `%s'", field_id->ident, struc_id->ident);
2389 return(offset);
2391 /* Add to ordered list of fields */
2392 (**plist) = malloc(sizeof(ordered_field_list));
2393 (**plist)->entry = fe;
2394 (**plist)->next = NULL;
2395 *plist = &((**plist)->next);
2396 /* Set field offset */
2397 fe->field.offset = astnode_clone(offset, offset->loc);
2398 /* Calculate field size in bytes: sizeof(datatype) * count */
2399 field_size = astnode_create_arithmetic(
2400 MUL_OPERATOR,
2401 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2402 astnode_clone(RHS(field_data), field_data->loc),
2403 field_data->loc
2405 field_size = reduce_expression(field_size);
2406 /* Set field size */
2407 fe->field.size = astnode_clone(field_size, field_size->loc);
2408 /* Add field size to total offset */
2409 offset = astnode_create_arithmetic(
2410 PLUS_OPERATOR,
2411 offset,
2412 field_size,
2413 offset->loc
2415 offset = reduce_expression(offset);
2416 return(offset);
2419 static void enter_union_fields(symtab_entry *, astnode *);
2422 * Attempts to enter an (anonymous) union's members into structure's symbol table.
2423 * @param n Node of type UNION_DECL_NODE
2424 * @param offset Current parent structure offset
2425 * @param plist Ordered list of parent structure's fields
2427 astnode *enter_struc_union_field(astnode *n, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2429 ordered_field_list *ls;
2430 symtab_entry *se;
2431 symtab_entry *fe;
2432 static int id = 0;
2433 char id_str[16];
2434 astnode *union_id;
2435 union_id = LHS(n);
2436 if (astnode_is_type(union_id, IDENTIFIER_NODE)) {
2437 err(n->loc, "anonymous union expected");
2438 return(offset);
2440 /* Put UNION in symbol table */
2441 sprintf(id_str, "%d", id++);
2442 se = symtab_enter(id_str, UNION_SYMBOL, n, 0);
2443 enter_union_fields(se, n);
2444 /* Add to ordered list of fields */
2445 (**plist) = malloc(sizeof(ordered_field_list));
2446 (**plist)->entry = se;
2447 (**plist)->next = NULL;
2448 *plist = &((**plist)->next);
2449 /* Add to parent structure as well, with same offsets */
2450 for (ls = se->struc.fields; ls != NULL; ls = ls->next) {
2451 /* Try to enter field in structure's symbol table */
2452 fe = symtab_enter(
2453 ls->entry->id,
2454 VAR_SYMBOL,
2455 astnode_clone(ls->entry->def, ls->entry->def->loc),
2458 if (fe == NULL) {
2459 err(ls->entry->def->loc, "duplicate symbol `%s' in structure `%s'", ls->entry->id, struc_id->ident);
2460 continue;
2462 /* Set field offset */
2463 fe->field.offset = astnode_clone(offset, offset->loc);
2464 /* Set field size */
2465 fe->field.size = astnode_clone(se->struc.size, offset->loc);
2467 /* Advance offset by size of union */
2468 offset = astnode_create_arithmetic(
2469 PLUS_OPERATOR,
2470 offset,
2471 astnode_clone(se->struc.size, offset->loc),
2472 offset->loc
2474 offset = reduce_expression(offset);
2475 return(offset);
2479 * Enters struc type into symbol table based on AST node.
2480 * - Creates a symbol table for the structure
2481 * - Validates and enters all its fields
2482 * - Calculates offset of each field in the structure, and total size
2483 * @param n Node of type STRUC_DECL_NODE
2485 static int enter_struc(astnode *n, void *arg, astnode **next)
2487 ordered_field_list **plist;
2488 symtab_entry *se;
2489 astnode *c;
2490 astnode *offset;
2491 astnode *struc_id = LHS(n); /* Child 0 is struc identifier */
2492 /* Put STRUC in symbol table */
2493 se = symtab_enter(struc_id->ident, STRUC_SYMBOL, n, 0);
2494 if (se == NULL) {
2495 err(n->loc, "duplicate symbol `%s'", struc_id->ident);
2496 } else {
2497 /* Put the fields of the structure in local symbol table */
2498 se->symtab = symtab_create();
2499 offset = astnode_create_integer(0, n->loc); /* offset = 0 */
2500 plist = &se->struc.fields;
2501 for (c = struc_id->next_sibling; c != NULL; c = c->next_sibling) {
2502 /* Check if it's a field declaration */
2503 if (astnode_is_type(c, VAR_DECL_NODE)) {
2504 offset = enter_struc_atomic_field(c, offset, &plist, struc_id);
2506 /* Check if (anonymous) union */
2507 else if (astnode_is_type(c, UNION_DECL_NODE)) {
2508 offset = enter_struc_union_field(c, offset, &plist, struc_id);
2509 } else {
2510 err(c->loc, "field declaration expected");
2511 continue;
2514 /* Store total size of structure */
2515 se->struc.size = offset;
2516 /* Restore previous symbol table */
2517 symtab_pop();
2519 /* Remove STRUC node from AST */
2520 // astnode_remove(n);
2521 // astnode_finalize(n);
2522 return 0;
2526 * Enters fields of union into its symbol table.
2528 static void enter_union_fields(symtab_entry *se, astnode *n)
2530 ordered_field_list **plist;
2531 astnode *c;
2532 astnode *field_id;
2533 astnode *field_data;
2534 astnode *field_size;
2535 symtab_entry *fe;
2537 se->symtab = symtab_create();
2538 se->struc.size = astnode_create_integer(0, n->loc);
2539 plist = &se->struc.fields;
2540 /* Process field declarations */
2541 for (c = RHS(n); c != NULL; c = c->next_sibling) {
2542 /* Make sure it's a field declaration */
2543 if (!astnode_is_type(c, VAR_DECL_NODE)) {
2544 err(c->loc, "field declaration expected");
2545 continue;
2547 /* c has two children: id and STORAGE_NODE */
2548 field_id = LHS(c);
2549 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2550 field_data = RHS(c);
2551 reduce_expression(RHS(field_data));
2552 /* Validate the declaration -- no data initialized */
2553 if (astnode_is_type(field_data, DATA_NODE)) {
2554 err(c->loc, "data initialization not allowed here");
2555 continue;
2557 /* Calculate field size in bytes: sizeof(datatype) * count */
2558 field_size = astnode_create_arithmetic(
2559 MUL_OPERATOR,
2560 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2561 astnode_clone(RHS(field_data), field_data->loc),
2562 field_data->loc
2564 field_size = reduce_expression(field_size);
2565 /* Make sure field size is a constant */
2566 if (!astnode_is_type(field_size, INTEGER_NODE)) {
2567 err(c->loc, "union member must be of constant size");
2568 astnode_finalize(field_size);
2569 /* Use default size: 1 byte */
2570 field_size = astnode_create_integer(1, field_data->loc);
2572 /* Try to enter field in structure's symbol table */
2573 fe = symtab_enter(
2574 field_id->ident,
2575 VAR_SYMBOL,
2576 astnode_clone(field_data, field_data->loc),
2579 if (fe == NULL) {
2580 err(c->loc, "duplicate symbol `%s' in union `%s'", field_id->ident, se->id);
2581 astnode_finalize(field_size);
2582 continue;
2584 /* Add to ordered list of fields */
2585 (*plist) = malloc(sizeof(ordered_field_list));
2586 (*plist)->entry = fe;
2587 (*plist)->next = NULL;
2588 plist = &((*plist)->next);
2589 /* Set field offset (0 for all) and size */
2590 fe->field.offset = astnode_create_integer(0, n->loc);
2591 fe->field.size = astnode_clone(field_size, field_size->loc);
2592 /* See if field size of this member is largest so far */
2593 if (se->struc.size->integer < field_size->integer) {
2594 astnode_finalize(se->struc.size);
2595 se->struc.size = field_size;
2596 } else {
2597 astnode_finalize(field_size);
2600 symtab_pop();
2604 * Enters union type into symbol table based on AST node.
2605 * @param n Node of type UNION_DECL_NODE
2607 static int enter_union(astnode *n, void *arg, astnode **next)
2609 symtab_entry *se;
2610 astnode *union_id = astnode_get_child(n, 0); /* Child 0 is union identifier */
2611 /* Check for anonymous union */
2612 if (astnode_is_type(union_id, NULL_NODE)) {
2613 err(n->loc, "anonymous union not allowed in global scope");
2614 } else {
2615 /* Put UNION in symbol table */
2616 assert(astnode_get_type(union_id) == IDENTIFIER_NODE);
2617 se = symtab_enter(union_id->ident, UNION_SYMBOL, n, 0);
2618 if (se == NULL) {
2619 err(n->loc, "duplicate symbol `%s'", union_id->ident);
2620 } else {
2621 /* Put the fields of the union in local symbol table */
2622 enter_union_fields(se, n);
2625 /* Remove UNION node from AST */
2626 // astnode_remove(n);
2627 // astnode_finalize(n);
2628 return 0;
2632 * Enters enumerated type into symbol table based on AST node.
2633 * @param n Node of type ENUM_DECL_NODE
2635 static int enter_enum(astnode *n, void *arg, astnode **next)
2637 astnode *c;
2638 astnode *id;
2639 astnode *val;
2640 symtab_entry *se;
2641 astnode *enum_id = astnode_get_child(n, 0); /* Child 0 is enum identifier */
2642 /* Enter in global symbol table */
2643 assert(astnode_get_type(enum_id) == IDENTIFIER_NODE);
2644 se = symtab_enter(enum_id->ident, ENUM_SYMBOL, n, 0);
2645 if (se == NULL) {
2646 err(n->loc, "duplicate symbol `%s'", enum_id->ident);
2647 } else {
2648 /* Add all the enum symbols to its own symbol table */
2649 se->symtab = symtab_create();
2650 val = NULL;
2651 for (c = enum_id->next_sibling; c != NULL; c = c->next_sibling) {
2652 if (astnode_is_type(c, IDENTIFIER_NODE)) {
2653 id = c;
2654 if (val == NULL) {
2655 val = astnode_create_integer(0, c->loc);
2656 } else {
2657 val = astnode_create_integer(val->integer+1, c->loc);
2659 } else {
2660 id = LHS(c);
2661 val = reduce_expression_complete(astnode_clone(RHS(c), RHS(c)->loc));
2662 if (!astnode_is_type(val, INTEGER_NODE)) {
2663 err(c->loc, "initializer does not evaluate to integer literal");
2664 astnode_finalize(val);
2665 /* Use default value */
2666 val = astnode_create_integer(0, c->loc);
2669 if (symtab_enter(id->ident, CONSTANT_SYMBOL, val, 0) == NULL) {
2670 err(c->loc, "duplicate symbol `%s' in enumeration `%s'", id->ident, enum_id->ident);
2671 continue;
2674 symtab_pop();
2676 /* Remove ENUM node from AST */
2677 // astnode_remove(n);
2678 // astnode_finalize(n);
2679 return 0;
2683 * Enters record type into symbol table based on AST node.
2684 * @param n Node of type RECORD_DECL_NODE
2686 static int enter_record(astnode *n, void *arg, astnode **next)
2688 ordered_field_list **plist;
2689 astnode *c;
2690 astnode *field_id;
2691 astnode *field_width;
2692 int size;
2693 int offset;
2694 symtab_entry *se;
2695 symtab_entry *fe;
2696 astnode *record_id = astnode_get_child(n, 0); /* Child 0 is record identifier */
2697 assert(astnode_get_type(record_id) == IDENTIFIER_NODE);
2698 /* Enter in global symbol table */
2699 se = symtab_enter(record_id->ident, RECORD_SYMBOL, n, 0);
2700 if (se == NULL) {
2701 err(n->loc, "duplicate symbol `%s'", record_id->ident);
2703 else {
2704 /* Add all the record fields to record's own symbol table */
2705 se->symtab = symtab_create();
2706 offset = 8;
2707 plist = &se->struc.fields;
2708 for (c = record_id->next_sibling; c != NULL; c = c->next_sibling) {
2709 /* c has two children: field identifier and its width */
2710 field_id = LHS(c);
2711 field_width = astnode_clone(reduce_expression(RHS(c)), RHS(c)->loc);
2712 /* Validate the width -- must be positive integer literal */
2713 if (!astnode_is_type(field_width, INTEGER_NODE)) {
2714 err(c->loc, "record member `%s' is not of constant size", field_id->ident);
2715 continue;
2717 if ((field_width->integer <= 0) || (field_width->integer >= 8)) {
2718 err(c->loc, "width of record member `%s' is out of range (%d)", field_id->ident, field_width->integer);
2719 continue;
2721 /* Attempt to enter field in record's symbol table */
2722 fe = symtab_enter(field_id->ident, VAR_SYMBOL, c, 0);
2723 if (fe == NULL) {
2724 err(c->loc, "duplicate symbol `%s' in record `%s'", field_id->ident, record_id->ident);
2725 continue;
2727 /* Add to ordered list of fields */
2728 (*plist) = malloc(sizeof(ordered_field_list));
2729 (*plist)->entry = fe;
2730 (*plist)->next = NULL;
2731 plist = &((*plist)->next);
2732 /* Set field offset */
2733 offset = offset - field_width->integer;
2734 fe->field.offset = astnode_create_integer(offset, c->loc);
2735 /* Set field size (width) */
2736 fe->field.size = field_width;
2738 size = 8 - offset;
2739 if (size > 8) {
2740 err(n->loc, "size of record `%s' (%d) exceeds 8 bits", record_id->ident, size);
2741 } else {
2742 /* Set size of record (in bits) */
2743 se->struc.size = astnode_create_integer(size, n->loc);
2745 symtab_pop();
2747 /* Remove RECORD node from AST */
2748 // astnode_remove(n);
2749 // astnode_finalize(n);
2750 return 0;
2754 * Globalizes a local.
2755 * The node is morphed into its global equivalent (LABEL_NODE or IDENTIFIER_NODE).
2756 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
2757 * @param arg Pointer to namespace counter
2759 static int globalize_local(astnode *n, void *arg, astnode **next)
2761 char str[32];
2762 /* Make it global by appending namespace counter to the id */
2763 sprintf(str, "#%d", label_count);
2764 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
2765 /* Local label definition, use label field */
2766 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
2767 strcat(n->label, str);
2768 /* This node is now a unique, global label */
2769 n->type = LABEL_NODE;
2770 /* Make sure it's unique */
2771 if (symtab_lookup(n->label)) {
2772 err(n->loc, "duplicate symbol `%s'", n->label);
2773 /* Remove from AST */
2774 astnode_remove(n);
2775 astnode_finalize(n);
2776 } else {
2777 /* Enter it in symbol table */
2778 symtab_enter(n->label, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) );
2780 } else {
2781 /* Local label reference, use ident field */
2782 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2783 strcat(n->ident, str);
2784 /* This node is now a unique, global identifier */
2785 n->type = IDENTIFIER_NODE;
2787 return 1;
2791 * Tags symbols as extrn.
2792 * @param n A node of type EXTRN_NODE
2794 static int tag_extrn_symbols(astnode *n, void *arg, astnode **next)
2796 astnode *id;
2797 astnode *type;
2798 astnode *list;
2799 symtab_entry *e;
2800 /* Get symbol type specifier */
2801 type = astnode_get_child(n, 0);
2802 /* Go through the list of identifiers */
2803 list = astnode_get_child(n, 1);
2804 for (id=astnode_get_first_child(list); id != NULL; id=astnode_get_next_sibling(id) ) {
2805 /* Look up identifier in symbol table */
2806 e = symtab_lookup(id->ident);
2807 if (e != NULL) {
2808 if (!(e->flags & EXTRN_FLAG)) {
2809 /* Error, can't import a symbol that's defined locally! */
2810 // TODO: this is okay?
2811 err(n->loc, "`%s' declared as extrn but is defined locally", id->ident);
2814 else {
2815 // TODO: store external unit name
2816 switch (astnode_get_type(type)) {
2817 case DATATYPE_NODE:
2818 /* Put it in symbol table */
2819 symtab_enter(id->ident, VAR_SYMBOL, astnode_create_data(astnode_clone(type, n->loc), NULL, n->loc), EXTRN_FLAG);
2820 break;
2822 case INTEGER_NODE:
2823 /* type->integer is (LABEL|PROC)_SYMBOL */
2824 symtab_enter(id->ident, type->integer, NULL, EXTRN_FLAG);
2825 break;
2827 default:
2828 break;
2832 /* Remove extrn node from AST */
2833 astnode_remove(n);
2834 astnode_finalize(n);
2836 return 0;
2842 static int process_message(astnode *n, void *arg, astnode **next)
2844 astnode *mesg = reduce_expression_complete(LHS(n));
2845 if (astnode_is_type(mesg, STRING_NODE)) {
2846 printf("%s\n", mesg->string);
2848 else if (astnode_is_type(mesg, INTEGER_NODE)) {
2849 printf("%d\n", mesg->integer);
2851 else {
2852 err(mesg->loc, "string or integer argument expected");
2854 astnode_remove(n);
2855 astnode_finalize(n);
2856 return 0;
2862 static int process_warning(astnode *n, void *arg, astnode **next)
2864 astnode *mesg = reduce_expression_complete(LHS(n));
2865 if (astnode_is_type(mesg, STRING_NODE)) {
2866 warn(mesg->loc, mesg->string);
2868 else {
2869 err(mesg->loc, "string argument expected");
2871 astnode_remove(n);
2872 astnode_finalize(n);
2873 return 0;
2879 static int process_error(astnode *n, void *arg, astnode **next)
2881 astnode *mesg = reduce_expression_complete(LHS(n));
2882 if (astnode_is_type(mesg, STRING_NODE)) {
2883 err(mesg->loc, mesg->string);
2885 else {
2886 err(mesg->loc, "string argument expected");
2888 astnode_remove(n);
2889 astnode_finalize(n);
2890 return 0;
2894 * Processes a forward branch declaration.
2895 * @param n Node of type FORWARD_BRANCH_DECL_NODE
2896 * @param arg Not used
2898 static int process_forward_branch_decl(astnode *n, void *arg, astnode **next)
2900 astnode *l;
2901 int i;
2902 char str[32];
2903 /* Get branch info structure for label (+, ++, ...) */
2904 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2905 /* Morph n to globally unique label */
2906 sprintf(str, "#%d", fwd->counter);
2907 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2908 strcat(n->label, str);
2909 n->type = LABEL_NODE;
2910 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2911 /* Fix reference identifiers */
2912 for (i=0; i<fwd->index; i++) {
2913 l = fwd->refs[i];
2914 l->ident = (char *)realloc(l->ident, strlen(n->ident)+1);
2915 strcpy(l->ident, n->ident);
2917 /* Prepare for next declaration */
2918 fwd->index = 0;
2919 fwd->counter++;
2920 return 0;
2924 * Processes a backward branch declaration.
2925 * @param n Node of type BACKWARD_BRANCH_DECL_NODE
2926 * @param arg Not used
2928 static int process_backward_branch_decl(astnode *n, void *arg, astnode **next)
2930 char str[32];
2931 /* Get branch info */
2932 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2933 bwd->decl = n;
2934 /* Morph n to globally unique label */
2935 sprintf(str, "#%d", bwd->counter);
2936 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2937 strcat(n->label, str);
2938 n->type = LABEL_NODE;
2939 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2940 /* Prepare for next declaration */
2941 bwd->counter++;
2942 return 0;
2946 * Processes a forward branch label reference.
2947 * @param n Node of type FORWARD_BRANCH_NODE
2948 * @param arg Not used
2950 static int process_forward_branch(astnode *n, void *arg, astnode **next)
2952 /* Add n to proper forward_branch array */
2953 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2954 fwd->refs[fwd->index++] = n;
2955 /* Change to identifier node */
2956 n->type = IDENTIFIER_NODE;
2957 return 0;
2961 * Processes a backward branch label reference.
2962 * @param n Node of type BACKWARD_BRANCH_NODE
2963 * @param arg Not used
2965 static int process_backward_branch(astnode *n, void *arg, astnode **next)
2967 /* Get branch info */
2968 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2969 /* Make sure it's a valid reference */
2970 if (bwd->decl != NULL) {
2971 /* Fix n->ident */
2972 n->ident = (char *)realloc(n->ident, strlen(bwd->decl->ident)+1);
2973 strcpy(n->ident, bwd->decl->ident);
2975 /* Change to identifier node */
2976 n->type = IDENTIFIER_NODE;
2977 return 0;
2980 /*---------------------------------------------------------------------------*/
2982 static int is_field_ref(astnode *n)
2984 astnode *p = astnode_get_parent(n);
2985 /* Case 1: id.id */
2986 if (astnode_is_type(p, DOT_NODE)) return 1;
2987 /* Case 2: id.id[expr] */
2988 if (astnode_is_type(p, INDEX_NODE) && (n == LHS(p)) && astnode_is_type(astnode_get_parent(p), DOT_NODE) ) return 1;
2989 return 0;
2993 * Checks that the given identifier node is present in symbol table.
2994 * Issues error if it is not, and replaces with integer 0.
2995 * @param n A node of type IDENTIFIER_NODE
2997 static int validate_ref(astnode *n, void *arg, astnode **next)
2999 int i;
3000 symbol_ident_list list;
3001 symtab_entry *enum_def;
3002 if (is_field_ref(n)) {
3003 return 1; /* Validated by validate_dotref() */
3005 /* Look it up in symbol table */
3006 symtab_entry * e = symtab_lookup(n->ident);
3007 if (e == NULL) {
3008 /* This identifier is unknown */
3009 /* Maybe it is part of an enumeration */
3010 symtab_list_type(ENUM_SYMBOL, &list);
3011 for (i=0; i<list.size; i++) {
3012 enum_def = symtab_lookup(list.idents[i]);
3013 symtab_push(enum_def->symtab);
3014 e = symtab_lookup(n->ident);
3015 symtab_pop();
3016 if (e != NULL) {
3017 /* Found it */
3018 /* Replace id by SCOPE_NODE */
3019 astnode_replace(
3021 astnode_create_scope(
3022 astnode_create_identifier(enum_def->id, n->loc),
3023 astnode_clone(n, n->loc),
3024 n->loc
3027 astnode_finalize(n);
3028 break;
3031 symtab_list_finalize(&list);
3032 /* If still not found, error */
3033 if (e == NULL) {
3034 strtok(n->ident, "#"); /* Remove globalize junk */
3035 // err(n->loc, "unknown symbol `%s'", n->ident);
3036 /* Replace by integer 0 */
3037 //astnode_replace(n, astnode_create_integer(0, n->loc) );
3038 //astnode_finalize(n);
3039 warn(n->loc, "`%s' undeclared; assuming external label", n->ident);
3040 e = symtab_enter(n->ident, LABEL_SYMBOL, NULL, EXTRN_FLAG);
3043 assert(e);
3044 /* Increase reference count */
3045 e->ref_count++;
3046 return 1;
3050 * Validates top-level (not part of structure) indexed identifier.
3051 * @param n Node of type INDEX_NODE
3052 * @param arg Not used
3054 static int validate_index(astnode *n, void *arg, astnode **next)
3056 symtab_entry *e;
3057 astnode *id;
3058 astnode *type;
3059 if (is_field_ref(LHS(n))) {
3060 return 1; /* Validated by validate_dotref() */
3062 id = LHS(n);
3063 if (!astnode_is_type(id, IDENTIFIER_NODE)) {
3064 err(n->loc, "identifier expected");
3065 astnode_replace(n, astnode_create_integer(0, n->loc) );
3066 astnode_finalize(n);
3067 return 1;
3069 e = symtab_lookup(id->ident);
3070 if (e != NULL) {
3071 type = LHS(e->def);
3072 if (!astnode_is_type(type, DATATYPE_NODE)) {
3073 err(n->loc, "`%s' cannot be indexed", id->ident);
3074 astnode_replace(n, astnode_create_integer(0, n->loc) );
3075 astnode_finalize(n);
3076 } else {
3077 // TODO: bounds check
3078 reduce_index(n);
3080 } else {
3081 err(n->loc, "unknown symbol `%s'", id->ident);
3082 astnode_replace(n, astnode_create_integer(0, n->loc) );
3083 astnode_finalize(n);
3085 return 1;
3089 * Checks that A::B is valid.
3090 * If it's not valid it is replaced by integer 0.
3091 * @param n Node of type SCOPE_NODE
3093 static int validate_scoperef(astnode *n, void *arg, astnode **next)
3095 astnode *symbol;
3096 astnode *namespace = LHS(n);
3097 /* Look up namespace in global symbol table */
3098 symtab_entry * e = symtab_lookup(namespace->ident);
3099 if (e == NULL) {
3100 /* Error, this identifier is unknown */
3101 err(n->loc, "unknown namespace `%s'", namespace->ident);
3102 /* Replace by integer 0 */
3103 astnode_replace(n, astnode_create_integer(0, n->loc) );
3104 astnode_finalize(n);
3105 } else {
3106 /* Get symbol on right of :: operator */
3107 symbol = RHS(n);
3108 /* Namespace was found, check its type */
3109 switch (e->type) {
3110 case STRUC_SYMBOL:
3111 case UNION_SYMBOL:
3112 case RECORD_SYMBOL:
3113 case ENUM_SYMBOL:
3114 /* OK, check the symbol */
3115 symtab_push(e->symtab);
3116 e = symtab_lookup(symbol->ident);
3117 if (e == NULL) {
3118 /* Error, symbol is not in namespace */
3119 err(n->loc, "unknown symbol `%s' in namespace `%s'", symbol->ident, namespace->ident);
3120 /* Replace by integer 0 */
3121 astnode_replace(n, astnode_create_integer(0, n->loc) );
3122 astnode_finalize(n);
3124 symtab_pop();
3125 break;
3127 default:
3128 err(n->loc, "`%s' is not a namespace", namespace->ident);
3129 /* Replace by integer 0 */
3130 astnode_replace(n, astnode_create_integer(0, n->loc) );
3131 astnode_finalize(n);
3132 break;
3135 return 0;
3139 * Validates right part of dotted reference recursively.
3140 * Assumes that left part's symbol table is on stack.
3141 * @param n Node of type DOT_NODE
3143 static void validate_dotref_recursive(astnode *n, astnode *top)
3145 astnode *left;
3146 astnode *right;
3147 astnode *type;
3148 symtab_entry *field;
3149 symtab_entry *def;
3150 left = LHS(n);
3151 if (astnode_is_type(left, INDEX_NODE)) {
3152 left = LHS(left); /* Need identifier */
3154 right = RHS(n);
3155 if (astnode_is_type(right, DOT_NODE)) {
3156 right = LHS(right); /* Need identifier */
3158 if (astnode_is_type(right, INDEX_NODE)) {
3159 right = LHS(right); /* Need identifier */
3161 /* Lookup 'right' in 'left's symbol table */
3162 assert(astnode_get_type(right) == IDENTIFIER_NODE);
3163 field = symtab_lookup(right->ident);
3164 if (field == NULL) {
3165 /* Error, this symbol is unknown */
3166 err(n->loc, "`%s' is not a member of `%s'", right->ident, left->ident);
3167 /* Replace by integer 0 */
3168 astnode_replace(top, astnode_create_integer(0, top->loc) );
3169 astnode_finalize(top);
3170 } else {
3171 /* See if more subfields to process */
3172 n = RHS(n);
3173 if (astnode_is_type(n, DOT_NODE)) {
3174 /* Verify the variable's type -- should be user-defined */
3175 type = LHS(field->def);
3176 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3177 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3178 /* Replace by integer 0 */
3179 astnode_replace(top, astnode_create_integer(0, top->loc) );
3180 astnode_finalize(top);
3181 } else {
3182 /* Look up variable's type definition and verify it's a structure */
3183 def = symtab_global_lookup(LHS(type)->ident);
3184 if (def == NULL) {
3185 err(n->loc, "member '%s' of '%s' is of unknown type (`%s')", right->ident, left->ident, LHS(type)->ident);
3186 /* Replace by integer 0 */
3187 astnode_replace(top, astnode_create_integer(0, top->loc) );
3188 astnode_finalize(top);
3189 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3190 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3191 /* Replace by integer 0 */
3192 astnode_replace(top, astnode_create_integer(0, top->loc) );
3193 astnode_finalize(top);
3194 } else {
3195 /* Next field */
3196 symtab_push(def->symtab);
3197 validate_dotref_recursive(n, top);
3198 symtab_pop();
3206 * Validates A.B.C.D. . ...
3207 * Replaces the whole thing with integer 0 if not.
3208 * @param n Node of type DOT_NODE
3210 static int validate_dotref(astnode *n, void *arg, astnode **next)
3212 symtab_entry *father;
3213 symtab_entry *def;
3214 astnode *type;
3215 astnode *left;
3216 if (astnode_has_ancestor_of_type(n, DOT_NODE)) {
3217 return 1; /* Already validated, since this function is recursive */
3219 /* Look up parent in global symbol table */
3220 left = LHS(n); /* n := left . right */
3221 if (astnode_is_type(left, INDEX_NODE)) {
3222 left = LHS(left); /* Need identifier */
3224 father = symtab_lookup(left->ident);
3225 if (father == NULL) {
3226 /* Error, this symbol is unknown */
3227 err(n->loc, "unknown symbol `%s'", left->ident);
3228 /* Replace by integer 0 */
3229 astnode_replace(n, astnode_create_integer(0, n->loc) );
3230 astnode_finalize(n);
3231 } else {
3232 /* Increase reference count */
3233 father->ref_count++;
3234 /* Verify the variable's type -- should be user-defined */
3235 type = LHS(father->def);
3236 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3237 err(n->loc, "`%s' is not a structure", left->ident);
3238 /* Replace by integer 0 */
3239 astnode_replace(n, astnode_create_integer(0, n->loc) );
3240 astnode_finalize(n);
3241 } else {
3242 /* Look up variable's type definition and verify it's a structure */
3243 def = symtab_lookup(LHS(type)->ident);
3244 if (def == NULL) {
3245 err(n->loc, "'%s' is of unknown type (`%s')", left->ident, LHS(type)->ident);
3246 /* Replace by integer 0 */
3247 astnode_replace(n, astnode_create_integer(0, n->loc) );
3248 astnode_finalize(n);
3249 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3250 err(n->loc, "`%s' is not a structure", left->ident);
3251 /* Replace by integer 0 */
3252 astnode_replace(n, astnode_create_integer(0, n->loc) );
3253 astnode_finalize(n);
3254 } else {
3255 /* Verify fields recursively */
3256 symtab_push(def->symtab);
3257 validate_dotref_recursive(n, n);
3258 symtab_pop();
3262 return 1;
3265 /*---------------------------------------------------------------------------*/
3268 * Evaluates expressions involved in conditional assembly, and removes the
3269 * appropriate branches from the AST.
3270 * Does some other stuff too, such as substitute equates and fold constants.
3272 void astproc_first_pass(astnode *root)
3274 /* Table of callback functions for our purpose. */
3275 static astnodeprocmap map[] = {
3276 { LABEL_NODE, enter_label },
3277 { VAR_DECL_NODE, enter_var },
3278 { PROC_NODE, enter_proc },
3279 { STRUC_DECL_NODE, enter_struc },
3280 { UNION_DECL_NODE, enter_union },
3281 { ENUM_DECL_NODE, enter_enum },
3282 { RECORD_DECL_NODE, enter_record },
3283 { LOCAL_LABEL_NODE, globalize_local },
3284 { LOCAL_ID_NODE, globalize_local },
3285 { MACRO_DECL_NODE, enter_macro },
3286 { MACRO_NODE, expand_macro },
3287 { REPT_NODE, process_rept },
3288 { WHILE_NODE, process_while },
3289 { DATASEG_NODE, process_dataseg },
3290 { CODESEG_NODE, process_codeseg },
3291 { ORG_NODE, process_org },
3292 { CHARMAP_NODE, load_charmap },
3293 { INSTRUCTION_NODE, process_instruction },
3294 { DATA_NODE, process_data },
3295 { STORAGE_NODE, process_storage },
3296 { EQU_NODE, process_equ },
3297 { ASSIGN_NODE, process_assign },
3298 { IFDEF_NODE, process_ifdef },
3299 { IFNDEF_NODE, process_ifndef },
3300 { IF_NODE, process_if },
3301 { EXTRN_NODE, tag_extrn_symbols },
3302 { MESSAGE_NODE, process_message },
3303 { WARNING_NODE, process_warning },
3304 { ERROR_NODE, process_error },
3305 { FORWARD_BRANCH_DECL_NODE, process_forward_branch_decl },
3306 { BACKWARD_BRANCH_DECL_NODE, process_backward_branch_decl },
3307 { FORWARD_BRANCH_NODE, process_forward_branch },
3308 { BACKWARD_BRANCH_NODE, process_backward_branch },
3309 { 0, NULL }
3311 reset_charmap();
3312 branch_init();
3313 in_dataseg = 0; /* codeseg is default */
3314 /* Do the walk. */
3315 astproc_walk(root, NULL, map);
3316 /* Remove all the volatile constants from the symbol table */
3317 /* These are the ones defined with the '=' operator, whose identifiers should
3318 all have been replaced by their value in the syntax tree now. Since
3319 they're not referenced anywhere we can safely dispose of them.
3320 The EQUates on the other hand should be kept, since they will
3321 possibly be exported. */
3322 #ifdef ENABLE_BUGGY_THING // ### FIXME
3324 int i;
3325 symbol_ident_list list;
3326 symtab_entry *e;
3327 symtab_list_type(CONSTANT_SYMBOL, &list);
3328 for (i = 0; i < list.size; ++i) {
3329 e = symtab_lookup(list.idents[i]);
3330 if (e->flags & VOLATILE_FLAG) {
3331 symtab_remove(list.idents[i]);
3334 symtab_list_finalize(&list);
3336 #endif
3339 /*---------------------------------------------------------------------------*/
3342 * Tags labels as public.
3343 * @param n A node of type PUBLIC_NODE
3345 static int tag_public_symbols(astnode *n, void *arg, astnode **next)
3347 astnode *id;
3348 symtab_entry *e;
3349 /* Go through the list of identifiers */
3350 for (id=astnode_get_first_child(n); id != NULL; id = astnode_get_next_sibling(id) ) {
3351 /* Look up identifier in symbol table */
3352 e = symtab_lookup(id->ident);
3353 if (e != NULL) {
3354 /* Symbol exists. Set the proper flag unless ambiguous. */
3355 if (e->flags & EXTRN_FLAG) {
3356 err(n->loc, "`%s' already declared extrn", id->ident);
3357 } else {
3358 switch (e->type) {
3359 case LABEL_SYMBOL:
3360 case CONSTANT_SYMBOL:
3361 case VAR_SYMBOL:
3362 case PROC_SYMBOL:
3363 /* GO! */
3364 e->flags |= PUBLIC_FLAG;
3365 break;
3367 default:
3368 err(n->loc, "`%s' is of non-exportable type", id->ident);
3369 break;
3372 } else {
3373 /* Warning, can't export a symbol that's not defined. */
3374 warn(n->loc, "`%s' declared as public but is not defined", id->ident);
3377 /* Remove PUBLIC_NODE from AST */
3378 astnode_remove(n);
3379 astnode_finalize(n);
3381 return 0;
3385 * Sets alignment for a set of (data) labels.
3386 * @param n A node of type ALIGN_NODE
3388 static int tag_align_symbols(astnode *n, void *arg, astnode **next)
3390 int pow;
3391 astnode *id;
3392 astnode *idents;
3393 astnode *expr;
3394 symtab_entry *e;
3395 /* Go through the list of identifiers */
3396 idents = LHS(n);
3397 for (id=astnode_get_first_child(idents); id != NULL; id = astnode_get_next_sibling(id) ) {
3398 /* Look up identifier in symbol table */
3399 e = symtab_lookup(id->ident);
3400 if (e != NULL) {
3401 /* Symbol exists. Set the proper flag unless ambiguous. */
3402 if (!(e->flags & DATA_FLAG)) {
3403 err(n->loc, "cannot align a code symbol (`%s')", id->ident);
3404 } else {
3405 switch (e->type) {
3406 case LABEL_SYMBOL:
3407 case VAR_SYMBOL:
3408 expr = reduce_expression(RHS(n));
3409 if (!astnode_is_type(expr, INTEGER_NODE)) {
3410 err(n->loc, "alignment expression must be an integer literal");
3411 } else if ((expr->integer < 0) || (expr->integer >= 0x10000)) {
3412 err(n->loc, "alignment expression out of range");
3413 } else if (expr->integer > 1) {
3414 pow = 0;
3415 switch (expr->integer) {
3416 case 32768: pow++;
3417 case 16384: pow++;
3418 case 8192: pow++;
3419 case 4096: pow++;
3420 case 2048: pow++;
3421 case 1024: pow++;
3422 case 512: pow++;
3423 case 256: pow++;
3424 case 128: pow++;
3425 case 64: pow++;
3426 case 32: pow++;
3427 case 16: pow++;
3428 case 8: pow++;
3429 case 4: pow++;
3430 case 2: pow++;
3431 /* GO! */
3432 e->flags |= ALIGN_FLAG;
3433 e->align = pow;
3434 break;
3436 default:
3437 err(n->loc, "alignment expression must be a power of 2");
3438 break;
3441 break;
3443 default:
3444 err(n->loc, "`%s' cannot be aligned", id->ident);
3445 break;
3449 else {
3450 /* Warning, can't align a symbol that's not defined. */
3451 warn(n->loc, "alignment ignored for undefined symbol `%s'", id->ident);
3454 /* Remove ALIGN_NODE from AST */
3455 astnode_remove(n);
3456 astnode_finalize(n);
3458 return 0;
3461 /*---------------------------------------------------------------------------*/
3464 * Removes unused labels from a syntax tree (and symbol table).
3465 * Unused labels are labels that are defined but not referenced anywhere.
3466 * This function assumes that the reference counts have already been calculated.
3468 void remove_unused_labels()
3470 int i;
3471 char *id;
3472 astnode *n;
3473 symbol_ident_list list;
3474 symtab_list_type(LABEL_SYMBOL, &list);
3475 for (i=0; i<list.size; i++) {
3476 /* Look up label in symbol table */
3477 id = list.idents[i];
3478 symtab_entry * e = symtab_lookup(id);
3479 /* If reference count is zero, AND label isn't declared public, remove it. */
3480 if ((e->ref_count == 0) && ((e->flags & PUBLIC_FLAG) == 0)) {
3481 n = e->def;
3482 strtok(n->label, "#"); /* Remove globalize junk */
3483 warn(n->loc, "`%s' defined but not used", n->label);
3484 /* Remove label from AST */
3485 astnode_remove(n);
3486 astnode_finalize(n);
3487 //symtab_remove(n->label); ### FIXME leads to crash sometimes...
3490 symtab_list_finalize(&list);
3494 * If the storage is of user-defined type, replaces it with
3495 * .DSB sizeof(type) * count
3497 static int reduce_user_storage(astnode *n, void *arg, astnode **next)
3499 astnode *type;
3500 astnode *count;
3501 astnode *byte_storage;
3502 symtab_entry *e;
3503 type = LHS(n);
3504 if (type->datatype == USER_DATATYPE) {
3505 /* Look it up */
3506 e = symtab_lookup(LHS(type)->ident);
3507 if (e != NULL) {
3508 /* Replace by DSB */
3509 count = RHS(n);
3510 byte_storage = astnode_create_storage(
3511 astnode_create_datatype(BYTE_DATATYPE, NULL, type->loc),
3512 astnode_create_arithmetic(
3513 MUL_OPERATOR,
3514 astnode_create_sizeof(
3515 astnode_create_identifier(LHS(type)->ident, n->loc),
3516 n->loc
3518 astnode_clone(count, n->loc),
3519 n->loc
3521 n->loc
3523 astnode_replace(n, byte_storage);
3524 astnode_finalize(n);
3525 } else {
3526 err(n->loc, "unknown symbol `%s'", LHS(type)->ident);
3527 /* Remove from AST */
3528 astnode_remove(n);
3529 astnode_finalize(n);
3530 return 0;
3533 return 1;
3537 * Second major pass over AST.
3539 void astproc_second_pass(astnode *root)
3541 /* Table of callback functions for our purpose. */
3542 static astnodeprocmap map[] = {
3543 { IDENTIFIER_NODE, validate_ref },
3544 { SCOPE_NODE, validate_scoperef },
3545 { DOT_NODE, validate_dotref },
3546 { INDEX_NODE, validate_index },
3547 { PUBLIC_NODE, tag_public_symbols },
3548 { STORAGE_NODE, reduce_user_storage },
3549 { ALIGN_NODE, tag_align_symbols },
3550 { STRUC_DECL_NODE, noop },
3551 { UNION_DECL_NODE, noop },
3552 { ENUM_DECL_NODE, noop },
3553 { RECORD_DECL_NODE, noop },
3554 { 0, NULL }
3556 in_dataseg = 0; /* codeseg is default */
3557 /* Do the walk. */
3558 astproc_walk(root, NULL, map);
3559 /* */
3560 remove_unused_labels();
3563 /*---------------------------------------------------------------------------*/
3566 * Translates a single instruction.
3567 * @param n A node of type INSTRUCTION_NODE
3569 static int translate_instruction(astnode *n, void *arg, astnode **next)
3571 unsigned char c;
3572 /* Put the operand in final form */
3573 astnode *o = reduce_expression_complete( LHS(n) );
3574 assert(o == LHS(n));
3575 /* Convert (mnemonic, addressing mode) pair to opcode */
3576 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3577 /* Test if opcode is invalid */
3578 if (n->instr.opcode == 0xFF) {
3579 /* Check for the special cases */
3580 if ((n->instr.mnemonic == STX_MNEMONIC) && (n->instr.mode == ABSOLUTE_Y_MODE)) {
3581 /* Doesn't have absolute version, "scale down" to zeropage */
3582 n->instr.mode = ZEROPAGE_Y_MODE;
3583 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3584 } else if ((n->instr.mnemonic == STY_MNEMONIC) && (n->instr.mode == ABSOLUTE_X_MODE)) {
3585 /* Doesn't have absolute version, "scale down" to zeropage */
3586 n->instr.mode = ZEROPAGE_X_MODE;
3587 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3588 } else if (n->instr.mode == ABSOLUTE_MODE) {
3589 /* Check for relative addressing (these are parsed as absolute mode) */
3590 switch (n->instr.mnemonic) {
3591 case BCC_MNEMONIC:
3592 case BCS_MNEMONIC:
3593 case BEQ_MNEMONIC:
3594 case BMI_MNEMONIC:
3595 case BNE_MNEMONIC:
3596 case BPL_MNEMONIC:
3597 case BVC_MNEMONIC:
3598 case BVS_MNEMONIC:
3599 /* Fix addressing mode and opcode */
3600 n->instr.mode = RELATIVE_MODE;
3601 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3602 break;
3606 if (n->instr.opcode != 0xFF) {
3607 /* If the operand is a constant, see if we can "reduce" from
3608 absolute mode to zeropage mode */
3609 if ((astnode_is_type(o, INTEGER_NODE)) &&
3610 ((unsigned long)o->integer < 256) &&
3611 ((c = opcode_zp_equiv(n->instr.opcode)) != 0xFF)) {
3612 /* Switch to the zeromode version */
3613 n->instr.opcode = c;
3614 switch (n->instr.mode) {
3615 case ABSOLUTE_MODE: n->instr.mode = ZEROPAGE_MODE; break;
3616 case ABSOLUTE_X_MODE: n->instr.mode = ZEROPAGE_X_MODE;break;
3617 case ABSOLUTE_Y_MODE: n->instr.mode = ZEROPAGE_Y_MODE;break;
3618 default: /* Impossible to get here, right? */ break;
3621 /* If the operand is a constant, make sure it fits */
3622 if (astnode_is_type(o, INTEGER_NODE)) {
3623 switch (n->instr.mode) {
3624 case IMMEDIATE_MODE:
3625 case ZEROPAGE_MODE:
3626 case ZEROPAGE_X_MODE:
3627 case ZEROPAGE_Y_MODE:
3628 case PREINDEXED_INDIRECT_MODE:
3629 case POSTINDEXED_INDIRECT_MODE:
3630 /* Operand must fit in 8 bits */
3631 if (!IS_BYTE_VALUE(o->integer)) {
3632 warn(o->loc, "operand out of range; truncated");
3633 o->integer &= 0xFF;
3635 break;
3637 case ABSOLUTE_MODE:
3638 case ABSOLUTE_X_MODE:
3639 case ABSOLUTE_Y_MODE:
3640 case INDIRECT_MODE:
3641 /* Operand must fit in 8 bits */
3642 if ((unsigned long)o->integer >= 0x10000) {
3643 warn(o->loc, "operand out of range; truncated");
3644 o->integer &= 0xFFFF;
3646 break;
3648 case RELATIVE_MODE:
3649 /* Constant isn't allowed here is it? */
3650 break;
3652 default:
3653 break;
3656 else if (astnode_is_type(o, STRING_NODE)) {
3657 /* String operand doesn't make sense here */
3658 err(n->loc, "invalid operand");
3660 } else {
3661 /* opcode_get() returned 0xFF */
3662 err(n->loc, "invalid addressing mode");
3664 return 0;
3668 * ### Is this really such a good idea?
3670 static int maybe_merge_data(astnode *n, void *arg, astnode **next)
3672 astnode *temp;
3673 astnode *type;
3674 type = LHS(n);
3675 /* Only merge if no debugging, otherwise line information is lost. */
3676 if (!xasm_args.debug && astnode_is_type(*next, DATA_NODE) &&
3677 astnode_equal(type, LHS(*next)) ) {
3678 /* Merge ahead */
3679 temp = *next;
3680 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3681 astnode_add_child(n, astnode_remove_children(temp) );
3682 astnode_finalize(temp);
3683 *next = n;
3684 } else {
3685 /* Reduce expressions to final form */
3686 for (n = n->first_child; n != NULL; n = temp->next_sibling) {
3687 temp = reduce_expression_complete(n);
3688 if (astnode_is_type(temp, INTEGER_NODE)) {
3689 /* Check that value fits according to datatype */
3690 switch (type->datatype) {
3691 case BYTE_DATATYPE:
3692 if (!IS_BYTE_VALUE(temp->integer)) {
3693 warn(temp->loc, "operand out of range; truncated");
3694 temp->integer &= 0xFF;
3696 break;
3698 case WORD_DATATYPE:
3699 if (!IS_WORD_VALUE(temp->integer)) {
3700 warn(temp->loc, "operand out of range; truncated");
3701 temp->integer &= 0xFFFF;
3703 break;
3705 case DWORD_DATATYPE:
3706 break;
3708 default:
3709 break;
3714 return 0;
3720 static int maybe_merge_storage(astnode *n, void *arg, astnode **next)
3722 astnode *temp;
3723 astnode *new_count;
3724 astnode *old_count;
3725 if (astnode_is_type(*next, STORAGE_NODE) &&
3726 astnode_equal(LHS(n), LHS(*next)) ) {
3727 /* Merge ahead */
3728 temp = *next;
3729 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3730 old_count = RHS(n);
3731 /* Calculate new count */
3732 new_count = astnode_create_arithmetic(
3733 PLUS_OPERATOR,
3734 astnode_remove_child_at(temp, 0),
3735 astnode_clone(old_count, n->loc),
3736 n->loc
3738 new_count = reduce_expression_complete(new_count);
3739 astnode_replace(old_count, new_count);
3740 astnode_finalize(old_count);
3741 astnode_finalize(temp);
3742 *next = n;
3743 } else {
3744 reduce_expression_complete(RHS(n));
3746 return 0;
3750 * Replaces .proc by its label followed by statements.
3752 static int flatten_proc(astnode *n, void *arg, astnode **next)
3754 astnode *id = LHS(n);
3755 astnode *list = RHS(n);
3756 astnode_remove(id);
3757 id->type = LABEL_NODE;
3758 astnode_insert_child(list, id, 0);
3759 astnode *stmts = astnode_remove_children(list);
3760 astnode_replace(n, stmts);
3761 astnode_finalize(n);
3762 *next = stmts;
3763 return 0;
3769 static int flatten_var_decl(astnode *n, void *arg, astnode **next)
3771 astnode *stmts = LHS(n);
3772 astnode_remove_children(n);
3773 stmts->type = LABEL_NODE;
3774 astnode_replace(n, stmts);
3775 astnode_finalize(n);
3776 *next = stmts;
3777 return 0;
3781 * Third and final pass (if the output isn't pure 6502).
3782 * Translates instructions, merges data and storage nodes,
3783 * and reduces their operands to final form on the way.
3785 void astproc_third_pass(astnode *root)
3787 /* Table of callback functions for our purpose. */
3788 static astnodeprocmap map[] = {
3789 { INSTRUCTION_NODE, translate_instruction },
3790 { DATA_NODE, maybe_merge_data },
3791 { STORAGE_NODE, maybe_merge_storage },
3792 { VAR_DECL_NODE, flatten_var_decl },
3793 { PROC_NODE, flatten_proc },
3794 { STRUC_DECL_NODE, noop },
3795 { UNION_DECL_NODE, noop },
3796 { ENUM_DECL_NODE, noop },
3797 { RECORD_DECL_NODE, noop },
3798 { 0, NULL }
3800 in_dataseg = 0; /* codeseg is default */
3801 /* Do the walk. */
3802 astproc_walk(root, NULL, map);
3805 /*---------------------------------------------------------------------------*/
3808 * Evaluates the given expression, _without_ replacing it in the AST
3809 * (unlike astproc_reduce_expression() and friends).
3811 static astnode *eval_expression(astnode *expr)
3813 switch (astnode_get_type(expr)) {
3815 case ARITHMETIC_NODE: {
3816 astnode *lhs = eval_expression(LHS(expr));
3817 astnode *rhs = eval_expression(RHS(expr));
3818 switch (expr->oper) {
3819 /* Binary ops */
3820 case PLUS_OPERATOR:
3821 case MINUS_OPERATOR:
3822 case MUL_OPERATOR:
3823 case DIV_OPERATOR:
3824 case MOD_OPERATOR:
3825 case AND_OPERATOR:
3826 case OR_OPERATOR:
3827 case XOR_OPERATOR:
3828 case SHL_OPERATOR:
3829 case SHR_OPERATOR:
3830 case LT_OPERATOR:
3831 case GT_OPERATOR:
3832 case EQ_OPERATOR:
3833 case NE_OPERATOR:
3834 case LE_OPERATOR:
3835 case GE_OPERATOR:
3836 if (astnode_is_type(lhs, INTEGER_NODE)
3837 && astnode_is_type(rhs, INTEGER_NODE)) {
3838 /* Both sides are integer literals. */
3839 switch (expr->oper) {
3840 case PLUS_OPERATOR: return astnode_create_integer(lhs->integer + rhs->integer, expr->loc);
3841 case MINUS_OPERATOR: return astnode_create_integer(lhs->integer - rhs->integer, expr->loc);
3842 case MUL_OPERATOR: return astnode_create_integer(lhs->integer * rhs->integer, expr->loc);
3843 case DIV_OPERATOR: return astnode_create_integer(lhs->integer / rhs->integer, expr->loc);
3844 case MOD_OPERATOR: return astnode_create_integer(lhs->integer % rhs->integer, expr->loc);
3845 case AND_OPERATOR: return astnode_create_integer(lhs->integer & rhs->integer, expr->loc);
3846 case OR_OPERATOR: return astnode_create_integer(lhs->integer | rhs->integer, expr->loc);
3847 case XOR_OPERATOR: return astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc);
3848 case SHL_OPERATOR: return astnode_create_integer(lhs->integer << rhs->integer, expr->loc);
3849 case SHR_OPERATOR: return astnode_create_integer(lhs->integer >> rhs->integer, expr->loc);
3850 case LT_OPERATOR: return astnode_create_integer(lhs->integer < rhs->integer, expr->loc);
3851 case GT_OPERATOR: return astnode_create_integer(lhs->integer > rhs->integer, expr->loc);
3852 case EQ_OPERATOR: return astnode_create_integer(lhs->integer == rhs->integer, expr->loc);
3853 case NE_OPERATOR: return astnode_create_integer(lhs->integer != rhs->integer, expr->loc);
3854 case LE_OPERATOR: return astnode_create_integer(lhs->integer <= rhs->integer, expr->loc);
3855 case GE_OPERATOR: return astnode_create_integer(lhs->integer >= rhs->integer, expr->loc);
3857 default: /* ### Error, actually */
3858 break;
3861 /* Use some mathematical identities... */
3862 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
3863 && (expr->oper == PLUS_OPERATOR)) {
3864 /* 0+expr == expr */
3865 return astnode_clone(rhs, rhs->loc);
3866 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
3867 && (expr->oper == PLUS_OPERATOR)) {
3868 /* expr+0 == expr */
3869 return astnode_clone(lhs, lhs->loc);
3870 } else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
3871 && (expr->oper == MUL_OPERATOR)) {
3872 /* 1*expr == expr */
3873 return astnode_clone(rhs, rhs->loc);
3874 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
3875 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
3876 /* expr*1 == expr */
3877 /* expr/1 == expr */
3878 return astnode_clone(lhs, lhs->loc);
3880 break;
3882 /* Unary ops */
3883 case NEG_OPERATOR:
3884 case NOT_OPERATOR:
3885 case LO_OPERATOR:
3886 case HI_OPERATOR:
3887 case UMINUS_OPERATOR:
3888 case BANK_OPERATOR:
3889 if (astnode_is_type(lhs, INTEGER_NODE)) {
3890 switch (expr->oper) {
3891 case NEG_OPERATOR: return astnode_create_integer(~lhs->integer, expr->loc);
3892 case NOT_OPERATOR: return astnode_create_integer(!lhs->integer, expr->loc);
3893 case LO_OPERATOR: return astnode_create_integer(lhs->integer & 0xFF, expr->loc);
3894 case HI_OPERATOR: return astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc);
3895 case UMINUS_OPERATOR: return astnode_create_integer(-lhs->integer, expr->loc);
3896 default: break;
3899 break;
3900 } /* switch */
3901 } break;
3903 case INTEGER_NODE:
3904 return astnode_clone(expr, expr->loc);
3906 case IDENTIFIER_NODE: {
3907 symtab_entry *e = symtab_lookup(expr->ident);
3908 // ### assert(e->type == LABEL_SYMBOL);
3909 if (e->flags & ADDR_FLAG)
3910 return astnode_create_integer(e->address, expr->loc);
3911 } break;
3913 case CURRENT_PC_NODE:
3914 return astnode_create_integer(in_dataseg ? dataseg_pc : codeseg_pc, expr->loc);
3916 default:
3917 break;
3918 } /* switch */
3919 return 0;
3923 * Sets the address of the label to be the currently calculated PC.
3925 static int set_label_address(astnode *label, void *arg, astnode **next)
3927 symtab_entry *e = symtab_lookup(label->ident);
3928 // ### assert(e && (e->type == LABEL_SYMBOL));
3929 e->address = in_dataseg ? dataseg_pc : codeseg_pc;
3930 e->flags |= ADDR_FLAG;
3931 return 0;
3935 * Sets the current PC to the address specified by the ORG node.
3937 static int set_pc_from_org(astnode *org, void *arg, astnode **next)
3939 astnode *addr = LHS(org);
3940 assert(astnode_is_type(addr, INTEGER_NODE));
3941 if (in_dataseg)
3942 dataseg_pc = addr->integer;
3943 else
3944 codeseg_pc = addr->integer;
3945 return 0;
3949 * Ensures that the given symbol is defined.
3951 static int ensure_symbol_is_defined(astnode *id, void *arg, astnode **next)
3953 symtab_entry *e = symtab_lookup(id->ident);
3954 assert(e);
3955 if ((e->flags & EXTRN_FLAG) && !(e->flags & ERROR_UNDEFINED_FLAG)) {
3956 err(id->loc, "cannot generate pure binary because `%s' is not defined", id->ident);
3957 e->flags |= ERROR_UNDEFINED_FLAG;
3959 return 0;
3963 * Increments PC according to the size of the instruction.
3965 static int inc_pc_by_instruction(astnode *instr, void *arg, astnode **next)
3967 assert(!in_dataseg);
3968 if (LHS(instr)) {
3969 /* Has operand */
3970 unsigned char zp_op = opcode_zp_equiv(instr->instr.opcode);
3971 if (zp_op != 0xFF) {
3972 /* See if we can optimize this to a ZP-instruction */
3973 astnode *operand = eval_expression(LHS(instr));
3974 if (operand && astnode_is_type(operand, INTEGER_NODE)) {
3975 if ((operand->integer >= 0) && (operand->integer < 256)) {
3976 instr->instr.opcode = zp_op;
3978 astnode_finalize(operand);
3982 codeseg_pc += opcode_length(instr->instr.opcode);
3983 return 1;
3987 * Increments PC according to the size of the defined data.
3989 static int inc_pc_by_data(astnode *data, void *arg, astnode **next)
3991 astnode *type = LHS(data);
3992 int count = astnode_get_child_count(data) - 1;
3993 int nbytes;
3994 assert(!in_dataseg);
3995 switch (type->datatype) {
3996 case BYTE_DATATYPE: nbytes = count; break;
3997 case WORD_DATATYPE: nbytes = count * 2; break;
3998 case DWORD_DATATYPE: nbytes = count * 4; break;
3999 default:
4000 assert(0);
4001 break;
4003 codeseg_pc += nbytes;
4004 return 0;
4008 * Increments PC according to the size of the storage.
4010 static int inc_pc_by_storage(astnode *storage, void *arg, astnode **next)
4012 astnode *type = LHS(storage);
4013 assert(type->datatype == BYTE_DATATYPE);
4014 astnode *count = eval_expression(RHS(storage));
4015 if (count) {
4016 if (astnode_get_type(count) == INTEGER_NODE) {
4017 if (in_dataseg)
4018 dataseg_pc += count->integer;
4019 else
4020 codeseg_pc += count->integer;
4022 astnode_finalize(count);
4024 return 1;
4028 * This pass is only performed if the output format is pure 6502.
4029 * It ensures that it is actually possible to generate pure 6502
4030 * for this syntax tree (i.e. no external symbols).
4031 * Furthermore, it calculates the address of all labels, so that
4032 * everything is ready for the final output phase.
4034 void astproc_fourth_pass(astnode *root)
4036 int x;
4037 /* ### Should loop while there's a change in the address of
4038 one or more labels */
4039 for (x = 0; x < 2; ++x) {
4040 in_dataseg = 0; /* codeseg is default */
4041 dataseg_pc = 0;
4042 codeseg_pc = 0;
4043 /* Table of callback functions for our purpose. */
4044 static astnodeprocmap map[] = {
4045 { DATASEG_NODE, process_dataseg },
4046 { CODESEG_NODE, process_codeseg },
4047 { ORG_NODE, set_pc_from_org },
4048 { LABEL_NODE, set_label_address },
4049 { IDENTIFIER_NODE, ensure_symbol_is_defined },
4050 { INSTRUCTION_NODE, inc_pc_by_instruction },
4051 { DATA_NODE, inc_pc_by_data },
4052 { STORAGE_NODE, inc_pc_by_storage },
4053 { STRUC_DECL_NODE, noop },
4054 { UNION_DECL_NODE, noop },
4055 { ENUM_DECL_NODE, noop },
4056 { RECORD_DECL_NODE, noop },
4057 { 0, NULL }
4059 /* Do the walk. */
4060 astproc_walk(root, NULL, map);
4064 /*---------------------------------------------------------------------------*/
4067 * Writes an instruction.
4069 static int write_instruction(astnode *instr, void *arg, astnode **next)
4071 FILE *fp = (FILE *)arg;
4072 unsigned char op = instr->instr.opcode;
4073 int len = opcode_length(op);
4074 fputc(op, fp);
4075 if (len > 1) {
4076 /* Write operand */
4077 astnode *operand = eval_expression(LHS(instr));
4078 if(!astnode_is_type(operand, INTEGER_NODE)) {
4079 /* ### This is rather fatal, it should be a literal by this point */
4080 err(instr->loc, "operand does not evaluate to literal");
4081 } else {
4082 int value = operand->integer;
4083 if (len == 2) {
4084 /* Check if it's a relative jump */
4085 switch (op) {
4086 case 0x10:
4087 case 0x30:
4088 case 0x50:
4089 case 0x70:
4090 case 0x90:
4091 case 0xB0:
4092 case 0xD0:
4093 case 0xF0:
4094 /* Calculate difference between target and address of next instruction */
4095 value = value - (codeseg_pc + 2);
4096 if (!IS_BYTE_VALUE(value)) {
4097 err(operand->loc, "branch out of range");
4098 value &= 0xFF;
4100 break;
4102 default:
4103 if (!IS_BYTE_VALUE(value)) {
4104 warn(operand->loc, "operand out of range; truncated");
4105 value &= 0xFF;
4107 break;
4109 fputc((unsigned char)value, fp);
4110 } else {
4111 assert(len == 3);
4112 if (!IS_WORD_VALUE(value)) {
4113 warn(operand->loc, "operand out of range; truncated");
4114 value &= 0xFFFF;
4116 fputc((unsigned char)value, fp);
4117 fputc((unsigned char)(value >> 8), fp);
4120 astnode_finalize(operand);
4122 return 0;
4126 * Writes data.
4128 static int write_data(astnode *data, void *arg, astnode **next)
4130 FILE *fp = (FILE *)arg;
4131 astnode *type = LHS(data);
4132 astnode *expr;
4133 assert(!in_dataseg);
4134 for (expr = RHS(data); expr != NULL; expr = astnode_get_next_sibling(expr) ) {
4135 int value;
4136 astnode *e = eval_expression(expr);
4137 assert(e->type == INTEGER_NODE);
4138 value = e->integer;
4139 switch (type->datatype) {
4140 case BYTE_DATATYPE:
4141 if (!IS_BYTE_VALUE(value)) {
4142 warn(expr->loc, "operand out of range; truncated");
4143 value &= 0xFF;
4145 fputc((unsigned char)value, fp);
4146 codeseg_pc += 1;
4147 break;
4149 case WORD_DATATYPE:
4150 if (!IS_WORD_VALUE(value)) {
4151 warn(expr->loc, "operand out of range; truncated");
4152 value &= 0xFFFF;
4154 fputc((unsigned char)value, fp);
4155 fputc((unsigned char)(value >> 8), fp);
4156 codeseg_pc += 2;
4157 break;
4159 case DWORD_DATATYPE:
4160 fputc((unsigned char)value, fp);
4161 fputc((unsigned char)(value >> 8), fp);
4162 fputc((unsigned char)(value >> 16), fp);
4163 fputc((unsigned char)(value >> 24), fp);
4164 codeseg_pc += 4;
4165 break;
4167 default:
4168 assert(0);
4169 break;
4171 astnode_finalize(e);
4173 return 0;
4177 * This pass is only performed if the output format is pure 6502.
4178 * It writes the binary code.
4180 void astproc_fifth_pass(astnode *root)
4182 FILE *fp = fopen(xasm_args.output_file, "wb");
4183 if (!fp) {
4184 fprintf(stderr, "could not open '%s' for writing\n", xasm_args.output_file);
4185 ++err_count;
4186 return;
4188 /* Table of callback functions for our purpose. */
4189 static astnodeprocmap map[] = {
4190 { DATASEG_NODE, process_dataseg },
4191 { CODESEG_NODE, process_codeseg },
4192 { ORG_NODE, set_pc_from_org },
4193 { INSTRUCTION_NODE, write_instruction },
4194 { DATA_NODE, write_data },
4195 { STORAGE_NODE, inc_pc_by_storage },
4196 { STRUC_DECL_NODE, noop },
4197 { UNION_DECL_NODE, noop },
4198 { ENUM_DECL_NODE, noop },
4199 { RECORD_DECL_NODE, noop },
4200 { 0, NULL }
4202 in_dataseg = 0; /* codeseg is default */
4203 dataseg_pc = 0;
4204 codeseg_pc = 0;
4205 /* Do the walk. */
4206 astproc_walk(root, fp, map);
4207 fclose(fp);