update configure stuff
[xorcyst.git] / astproc.c
blobac8a2f56b09277d1046e0b359a6a6ecc969855b8
1 /*
2 * $Id: astproc.c,v 1.21 2007/11/11 22:35:22 khansen Exp $
3 * $Log: astproc.c,v $
4 * Revision 1.21 2007/11/11 22:35:22 khansen
5 * compile on mac
7 * Revision 1.20 2007/08/19 10:17:39 khansen
8 * allow symbols to be used without having been declared
10 * Revision 1.19 2007/08/12 18:58:12 khansen
11 * ability to generate pure 6502 binary (--pure-binary switch)
13 * Revision 1.18 2007/08/12 02:42:46 khansen
14 * prettify, const
16 * Revision 1.17 2007/08/09 22:06:10 khansen
17 * ability to pass in reference to local label as argument to macro
19 * Revision 1.16 2007/08/09 20:48:46 khansen
20 * disable buggy code that can cause crash
22 * Revision 1.15 2007/08/09 20:33:40 khansen
23 * progress
25 * Revision 1.14 2007/08/08 22:40:01 khansen
26 * improved symbol lookup, definitions must precede usage
28 * Revision 1.13 2007/07/22 13:33:26 khansen
29 * convert tabs to whitespaces
31 * Revision 1.12 2005/01/09 11:17:57 kenth
32 * xorcyst 1.4.5
33 * fixed bug in process_data(), merge_data()
34 * no longer truncation warning when fits in signed byte/word
36 * Revision 1.11 2005/01/05 02:28:13 kenth
37 * xorcyst 1.4.3
38 * support for anonymous unions
39 * fixed sizeof bug
41 * Revision 1.10 2004/12/29 21:44:41 kenth
42 * xorcyst 1.4.2
43 * static indexing, sizeof improved
45 * Revision 1.9 2004/12/25 02:22:35 kenth
46 * fixed bug in reduce_user_storage()
48 * Revision 1.8 2004/12/19 19:58:29 kenth
49 * xorcyst 1.4.0
51 * Revision 1.7 2004/12/18 16:57:39 kenth
52 * STORAGE_NODE(WORD/DWORD_DATATYPE) converts to BYTE
54 * Revision 1.6 2004/12/16 13:19:47 kenth
55 * xorcyst 1.3.5
57 * Revision 1.5 2004/12/14 01:49:05 kenth
58 * xorcyst 1.3.0
60 * Revision 1.4 2004/12/11 02:01:25 kenth
61 * added forward/backward branching
63 * Revision 1.3 2004/12/09 11:18:13 kenth
64 * added: warning, error node processing
66 * Revision 1.2 2004/12/06 04:52:24 kenth
67 * Major updates (xorcyst 1.1.0)
69 * Revision 1.1 2004/06/30 07:55:31 kenth
70 * Initial revision
74 /**
75 * (C) 2004 Kent Hansen
77 * The XORcyst is free software; you can redistribute it and/or modify
78 * it under the terms of the GNU General Public License as published by
79 * the Free Software Foundation; either version 2 of the License, or
80 * (at your option) any later version.
82 * The XORcyst is distributed in the hope that it will be useful,
83 * but WITHOUT ANY WARRANTY; without even the implied warranty of
84 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
85 * GNU General Public License for more details.
87 * You should have received a copy of the GNU General Public License
88 * along with The XORcyst; if not, write to the Free Software
89 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
92 /**
93 * This file contains functions that process the Abstract Syntax Tree (AST).
94 * After the assembly file has been parsed into an AST, a number of passes are
95 * made on it to process it and transform it. The functions here are
96 * concerned with things like
97 * - macro expansion
98 * - symbol table generation
99 * - equates substitution
100 * - constant folding
101 * - code and symbol validation
104 #include <stdlib.h>
105 #include <stdio.h>
106 #include <stdarg.h>
107 #include <string.h>
108 #include <assert.h>
109 #include "astproc.h"
110 #include "symtab.h"
111 #include "opcode.h"
112 #include "charmap.h"
113 #include "xasm.h"
115 #define IS_SIGNED_BYTE_VALUE(v) (((v) >= -128) && ((v) <= 127))
116 #define IS_UNSIGNED_BYTE_VALUE(v) (((v) >= 0) && ((v) <= 255))
117 #define IS_BYTE_VALUE(v) (IS_SIGNED_BYTE_VALUE(v) || IS_UNSIGNED_BYTE_VALUE(v))
119 #define IS_SIGNED_WORD_VALUE(v) (((v) >= -32768) && ((v) <= 32767))
120 #define IS_UNSIGNED_WORD_VALUE(v) (((v) >= 0) && ((v) <= 65535))
121 #define IS_WORD_VALUE(v) (IS_SIGNED_WORD_VALUE(v) || IS_UNSIGNED_WORD_VALUE(v))
123 /*---------------------------------------------------------------------------*/
125 /** Number of errors issued during processing. */
126 static int err_count = 0;
128 /** Number of warnings issued during processing. */
129 static int warn_count = 0;
131 /* Keeps track of number of global labels encountered. */
132 static int label_count = 0;
134 /* Keeps track of whether statement is in dataseg or codeseg. */
135 static int in_dataseg = 0;
137 /* Default symbol modifiers, i.e. ZEROPAGE_FLAG, PUBLIC_FLAG */
138 static int modifiers = 0;
140 /* Used when we are outputting pure 6502 binary */
141 static int dataseg_pc;
142 static int codeseg_pc;
144 /*---------------------------------------------------------------------------*/
146 /** Mapping from regular ASCII characters to custom character values.
147 * Used to transform .char arrays to regular .db arrays.
149 static unsigned char charmap[256];
152 * Resets the custom character map.
153 * Every ASCII character is mapped to itself.
155 static void reset_charmap()
157 int i;
158 for (i=0; i<256; i++) {
159 charmap[i] = (char)i;
163 /*---------------------------------------------------------------------------*/
164 /* Forward/backward branching stuff */
166 struct tag_forward_branch_info {
167 astnode *refs[128];
168 int index; /* Index into refs */
169 int counter;
172 typedef struct tag_forward_branch_info forward_branch_info;
174 struct tag_backward_branch_info {
175 astnode *decl;
176 int counter;
179 typedef struct tag_backward_branch_info backward_branch_info;
181 #define BRANCH_MAX 8
183 static forward_branch_info forward_branch[BRANCH_MAX];
185 static backward_branch_info backward_branch[BRANCH_MAX];
188 * Zaps forward/backward branch data.
190 static void branch_init()
192 int i, j;
193 for (i=0; i<BRANCH_MAX; i++) {
194 for (j=0; j<128; j++) {
195 forward_branch[i].refs[j] = NULL;
197 forward_branch[i].index = 0;
198 forward_branch[i].counter = 0;
199 backward_branch[i].decl = NULL;
200 backward_branch[i].counter = 0;
204 /*---------------------------------------------------------------------------*/
207 * Issues an error.
208 * @param loc File location of error
209 * @param fmt printf-style format string
211 static void err(location loc, const char *fmt, ...)
213 va_list ap;
214 va_start(ap, fmt);
216 /* Print error message w/ location info */
217 fprintf(stderr, "error: %s:", loc.file);
218 LOCATION_PRINT(stderr, loc);
219 fprintf(stderr, ": ");
220 vfprintf(stderr, fmt, ap);
221 fprintf(stderr, "\n");
223 va_end(ap);
225 /* Increase total error count */
226 err_count++;
230 * Issues a warning.
231 * @param loc File location of warning
232 * @param fmt printf-style format string
234 static void warn(location loc, const char *fmt, ...)
236 va_list ap;
237 if (!xasm_args.no_warn) {
238 va_start(ap, fmt);
239 /* Print warning message w/ location info */
240 fprintf(stderr, "warning: %s:", loc.file);
241 LOCATION_PRINT(stderr, loc);
242 fprintf(stderr, ": ");
243 vfprintf(stderr, fmt, ap);
244 fprintf(stderr, "\n");
245 va_end(ap);
248 /* Increase total warning count */
249 warn_count++;
253 * Gets the number of errors encountered during processing.
254 * @return Number of errors
256 int astproc_err_count()
258 return err_count;
261 /*---------------------------------------------------------------------------*/
264 * Gets the processor function for a node type from a map.
265 * Used by astproc_walk().
266 * @param type The node type
267 * @param map A mapping from node types to processor functions
269 static astnodeproc astproc_node_type_to_proc(astnode_type type, const astnodeprocmap *map)
271 /* Try all map entries */
272 for (; map->proc != NULL; map += 1) {
273 if (map->type == type) {
274 return map->proc; /* Match */
277 /* No match */
278 return NULL;
281 /*---------------------------------------------------------------------------*/
284 * Walks an abstract syntax tree recursively.
285 * @param n Node to walk
286 * @param arg Optional argument to pass to processor function
287 * @param map Mapping of node types to processor functions
289 static void astproc_walk_recursive(astnode *n, void *arg, const astnodeprocmap *map, astnode **next)
291 astnode *c;
292 astnode *t;
293 if (n == NULL) { return; }
294 /* Process this node if it has a processor function */
295 astnodeproc p = astproc_node_type_to_proc(astnode_get_type(n), map);
296 if (p != NULL) {
297 if (!p(n, arg, next)) return; /* Don't walk children */
299 /* Walk the node's children recursively */
300 for (c=n->first_child; c != NULL; c = t) {
301 t = c->next_sibling; /* default next node */
302 astproc_walk_recursive(c, arg, map, &t);
307 * Generic tree walker function.
308 * @param n Root
309 * @param arg General-purpose argument passed to each node handler function
310 * @param map Array of (nodetype, handler function) tuples
312 void astproc_walk(astnode *n, void *arg, const astnodeprocmap *map)
314 astnode *dummy;
315 astproc_walk_recursive(n, arg, map, &dummy);
318 /*---------------------------------------------------------------------------*/
321 * Don't do any processing of this node or its children on this pass.
323 static int noop(astnode *n, void *arg, astnode **next)
325 return 0;
329 * Substitutes an identifier node with subst_expr if the id is equal to subst_id.
330 * @param n A node of type IDENTIFIER_NODE
331 * @param arg Array of length 2, containing (expr, id) pair
333 static int substitute_id(astnode *n, void *arg, astnode **next)
335 /* arg is array containing expression and identifier */
336 astnode **array = (astnode **)arg;
337 astnode *subst_expr = array[0];
338 astnode *subst_id = array[1];
339 /* Test if this node and the identifier to replace are equal */
340 if (astnode_equal(n, subst_id)) {
341 /* They're equal, replace it by expression. */
342 astnode *cl = astnode_clone(subst_expr, n->loc);
343 /* ### Generalize: traverse all children, set the flag */
344 if (astnode_get_type(cl) == LOCAL_ID_NODE) {
345 cl->flags |= 0x80; /* don't globalize it */
347 astnode_replace(n, cl);
348 astnode_finalize(n);
350 return 1;
354 * Substitutes expr for id in list.
355 * Used by macro expander to substitute a macro body parameter name with the
356 * actual expression used in the macro expansion.
357 * @param expr An expression
358 * @param id An identifier
359 * @param list A list of statements (macro body)
361 static void substitute_expr_for_id(astnode *expr, astnode *id, astnode *list)
363 /* Prepare argument to astproc_walk */
364 astnode *array[2];
365 array[0] = expr;
366 array[1] = id;
367 /* Table of callback functions for our purpose. */
368 static astnodeprocmap map[] = {
369 { IDENTIFIER_NODE, substitute_id },
370 { 0, NULL }
372 /* Do the walk. */
373 astproc_walk(list, array, map);
376 /*---------------------------------------------------------------------------*/
379 * Globalizes a macro expanded local.
380 * This is done simply by concatenating the local label identifier with the
381 * global macro invocation counter.
382 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
383 * @param arg Namespace counter (int)
385 static int globalize_macro_expanded_local(astnode *n, void *arg, astnode **next)
387 /* Only globalize if it's a reference to a label defined in the macro */
388 if (!(n->flags & 0x80)) {
389 char str[16];
390 int count;
391 /* Make it global by appending the macro expansion counter to the id */
392 count = (int)arg;
393 sprintf(str, "#%d", count);
394 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
395 /* LOCAL_LABEL_NODE, use label field */
396 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
397 strcat(n->label, str);
398 } else {
399 /* LOCAL_ID_NODE, use ident field */
400 assert(astnode_is_type(n, LOCAL_ID_NODE));
401 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
402 strcat(n->ident, str);
405 /* */
406 return 1;
410 * Globalizes all locals in the body of a macro expansion.
411 * Used by the macro expander to ensure that local labels in macro expansions
412 * are unique.
413 * @param exp_body The expanded macro body
414 * @param count Unique macro namespace counter
416 static void globalize_macro_expanded_locals(astnode *exp_body, int count)
418 /* Table of callback functions for our purpose. */
419 static astnodeprocmap map[] = {
420 { LOCAL_ID_NODE, globalize_macro_expanded_local },
421 { LOCAL_LABEL_NODE, globalize_macro_expanded_local },
422 { 0, NULL }
424 /* Do the walk. */
425 astproc_walk(exp_body, (void *)count, map);
429 * Expands a macro; that is, replaces a macro invocation in the AST with the
430 * macro body. Substitutes parameter names for values.
431 * @param n Must be a node of type MACRO_NODE
432 * @param arg Not used
434 static int expand_macro(astnode *n, void *arg, astnode **next)
436 astnode *decl;
437 astnode *decl_body;
438 astnode *exp_body;
439 astnode *formals;
440 astnode *actuals;
441 astnode *id;
442 astnode *expr;
443 int i;
444 /* Keeps track of the current/total number of macro expansions */
445 static int count = 0;
446 /* Get the name of the macro to expand */
447 id = astnode_get_child(n, 0);
448 /* Look up its definition in symbol table */
449 symtab_entry *e = symtab_lookup(id->ident);
450 /* If it's not in the symbol table, error. */
451 if (e == NULL) {
452 err(n->loc, "unknown macro or directive `%s'", id->ident);
453 /* Remove from AST */
454 astnode_remove(n);
455 astnode_finalize(n);
456 return 0;
458 else if (e->type != MACRO_SYMBOL) {
459 err(n->loc, "cannot expand `%s'; not a macro", e->id);
460 /* Remove from AST */
461 astnode_remove(n);
462 astnode_finalize(n);
463 return 0;
465 else {
466 /* e->def has pointer to proper MACRO_DECL_NODE */
467 decl = (astnode *)e->def;
468 /* Get the lists of formals and actuals */
469 formals = astnode_get_child(decl, 1);
470 actuals = astnode_get_child(n, 1);
471 /* Verify that argument count is correct */
472 if (astnode_get_child_count(formals) != astnode_get_child_count(actuals)) {
473 err(n->loc, "macro `%s' does not take %d argument(s)", id->ident, astnode_get_child_count(actuals) );
474 /* Remove from AST */
475 astnode_remove(n);
476 astnode_finalize(n);
477 return 0;
479 /* Expand the body */
480 decl_body = astnode_get_child(decl, 2);
481 exp_body = astnode_clone(decl_body, n->loc);
482 /* Substitute actuals for formals */
483 for (i=0; i<astnode_get_child_count(actuals); i++) {
484 /* The id to substitute */
485 id = astnode_get_child(formals, i);
486 /* The expression to substitute it with */
487 expr = astnode_get_child(actuals, i);
488 /* Do it! */
489 substitute_expr_for_id(expr, id, exp_body);
491 /* Make locals a bit more global */
492 globalize_macro_expanded_locals(exp_body, count);
493 /* Replace MACRO_NODE by the macro body instance */
494 astnode_replace(n, astnode_get_child(exp_body, 0));
495 /* Discard the replaced node */
496 astnode_finalize(n);
497 /* Increase macro expansion counter */
498 count++;
499 /* Set next node to start of body */
500 *next = exp_body;
502 /* */
503 return 0;
506 /*---------------------------------------------------------------------------*/
509 * Does constant folding of expression.
510 * If the expression can be folded, the original expression is replaced by the
511 * new one, and the original expression is finalized.
512 * @param expr Expression
513 * @return Original expression, if couldn't fold, otherwise new, folded expression
515 astnode *astproc_fold_constants(astnode *expr)
517 astnode *folded;
518 astnode *lhs;
519 astnode *rhs;
520 if (expr == NULL) { return NULL; }
521 folded = NULL;
522 if (astnode_is_type(expr, ARITHMETIC_NODE)) {
523 /* Fold operands recursively */
524 lhs = astproc_fold_constants(LHS(expr));
525 rhs = astproc_fold_constants(RHS(expr));
526 switch (expr->oper) {
527 /* Binary ops */
528 case PLUS_OPERATOR:
529 case MINUS_OPERATOR:
530 case MUL_OPERATOR:
531 case DIV_OPERATOR:
532 case MOD_OPERATOR:
533 case AND_OPERATOR:
534 case OR_OPERATOR:
535 case XOR_OPERATOR:
536 case SHL_OPERATOR:
537 case SHR_OPERATOR:
538 case LT_OPERATOR:
539 case GT_OPERATOR:
540 case EQ_OPERATOR:
541 case NE_OPERATOR:
542 case LE_OPERATOR:
543 case GE_OPERATOR:
544 /* See if it can be folded */
545 if ( (astnode_is_type(lhs, INTEGER_NODE)) &&
546 (astnode_is_type(rhs, INTEGER_NODE)) ) {
547 /* Both sides are integer literals, so fold. */
548 switch (expr->oper) {
549 case PLUS_OPERATOR: folded = astnode_create_integer(lhs->integer + rhs->integer, expr->loc); break;
550 case MINUS_OPERATOR: folded = astnode_create_integer(lhs->integer - rhs->integer, expr->loc); break;
551 case MUL_OPERATOR: folded = astnode_create_integer(lhs->integer * rhs->integer, expr->loc); break;
552 case DIV_OPERATOR: folded = astnode_create_integer(lhs->integer / rhs->integer, expr->loc); break;
553 case MOD_OPERATOR: folded = astnode_create_integer(lhs->integer % rhs->integer, expr->loc); break;
554 case AND_OPERATOR: folded = astnode_create_integer(lhs->integer & rhs->integer, expr->loc); break;
555 case OR_OPERATOR: folded = astnode_create_integer(lhs->integer | rhs->integer, expr->loc); break;
556 case XOR_OPERATOR: folded = astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc); break;
557 case SHL_OPERATOR: folded = astnode_create_integer(lhs->integer << rhs->integer, expr->loc); break;
558 case SHR_OPERATOR: folded = astnode_create_integer(lhs->integer >> rhs->integer, expr->loc); break;
559 case LT_OPERATOR: folded = astnode_create_integer(lhs->integer < rhs->integer, expr->loc); break;
560 case GT_OPERATOR: folded = astnode_create_integer(lhs->integer > rhs->integer, expr->loc); break;
561 case EQ_OPERATOR: folded = astnode_create_integer(lhs->integer == rhs->integer, expr->loc); break;
562 case NE_OPERATOR: folded = astnode_create_integer(lhs->integer != rhs->integer, expr->loc); break;
563 case LE_OPERATOR: folded = astnode_create_integer(lhs->integer <= rhs->integer, expr->loc); break;
564 case GE_OPERATOR: folded = astnode_create_integer(lhs->integer >= rhs->integer, expr->loc); break;
566 default: /* Error, actually */
567 folded = expr;
568 break;
570 if (folded != expr) {
571 /* Replace expression by folded one. */
572 astnode_replace(expr, folded);
573 astnode_finalize(expr);
574 return folded;
577 else if ( (astnode_is_type(lhs, STRING_NODE)) &&
578 (astnode_is_type(rhs, STRING_NODE)) ) {
579 /* Both sides are string literals. */
580 /* Folding is defined only for certain operators. */
581 switch (expr->oper) {
582 case PLUS_OPERATOR:
583 /* String concatenation. */
584 folded = astnode_create(STRING_NODE, expr->loc);
585 folded->string = (char *)malloc(strlen(lhs->string) + strlen(rhs->string) + 1);
586 if (folded->string != NULL) {
587 strcpy(folded->string, lhs->string);
588 strcat(folded->string, rhs->string);
590 break;
592 /* String comparison. */
593 case LT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) < 0, expr->loc); break;
594 case GT_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) > 0, expr->loc); break;
595 case EQ_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) == 0, expr->loc); break;
596 case NE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) != 0, expr->loc); break;
597 case LE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) <= 0, expr->loc); break;
598 case GE_OPERATOR: folded = astnode_create_integer(strcmp(lhs->string, rhs->string) >= 0, expr->loc); break;
600 default:
601 folded = expr;
602 break;
604 if (folded != expr) {
605 /* Replace expression by folded one. */
606 astnode_replace(expr, folded);
607 astnode_finalize(expr);
608 return folded;
611 else if ((astnode_get_type(lhs) == STRING_NODE) &&
612 (astnode_get_type(rhs) == INTEGER_NODE) &&
613 (expr->oper == PLUS_OPERATOR)) {
614 /* Left side is string and right side is integer.
615 Result is a string. */
616 char str[32];
617 sprintf(str, "%d", rhs->integer);
618 folded = astnode_create(STRING_NODE, expr->loc);
619 folded->string = (char *)malloc(strlen(lhs->string) + strlen(str) + 1);
620 if (folded->string != NULL) {
621 strcpy(folded->string, lhs->string);
622 strcat(folded->string, str);
624 /* Replace expression by folded one. */
625 astnode_replace(expr, folded);
626 astnode_finalize(expr);
627 return folded;
629 else if ((astnode_get_type(rhs) == STRING_NODE) &&
630 (astnode_get_type(lhs) == INTEGER_NODE) &&
631 (expr->oper == PLUS_OPERATOR)) {
632 /* Left side is integer and right side is string.
633 Result is a string. */
634 char str[32];
635 sprintf(str, "%d", lhs->integer);
636 folded = astnode_create(STRING_NODE, expr->loc);
637 folded->string = (char *)malloc(strlen(str) + strlen(rhs->string) + 1);
638 if (folded->string != NULL) {
639 strcpy(folded->string, str);
640 strcat(folded->string, rhs->string);
642 /* Replace expression by folded one. */
643 astnode_replace(expr, folded);
644 astnode_finalize(expr);
645 return folded;
647 /* Use some mathematical identities... */
648 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
649 && (expr->oper == PLUS_OPERATOR)) {
650 /* 0+expr == expr */
651 astnode_remove_child(expr, rhs);
652 astnode_replace(expr, rhs);
653 return rhs;
655 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
656 && (expr->oper == PLUS_OPERATOR)) {
657 /* expr+0 == expr */
658 astnode_remove_child(expr, lhs);
659 astnode_replace(expr, lhs);
660 return lhs;
662 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
663 && (expr->oper == MUL_OPERATOR)) {
664 /* 1*expr == expr */
665 astnode_remove_child(expr, rhs);
666 astnode_replace(expr, rhs);
667 return rhs;
669 else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
670 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
671 /* expr*1 == expr */
672 /* expr/1 == expr */
673 astnode_remove_child(expr, lhs);
674 astnode_replace(expr, lhs);
675 return lhs;
677 else {
678 /* No chance of folding this one. */
680 break;
682 /* Unary ops */
683 case NEG_OPERATOR:
684 case NOT_OPERATOR:
685 case LO_OPERATOR:
686 case HI_OPERATOR:
687 case UMINUS_OPERATOR:
688 case BANK_OPERATOR:
689 /* See if it can be folded */
690 if (astnode_is_type(lhs, INTEGER_NODE)) {
691 /* Fold it. */
692 switch (expr->oper) {
693 case NEG_OPERATOR: folded = astnode_create_integer(~lhs->integer, expr->loc); break;
694 case NOT_OPERATOR: folded = astnode_create_integer(!lhs->integer, expr->loc); break;
695 case LO_OPERATOR: folded = astnode_create_integer(lhs->integer & 0xFF, expr->loc); break;
696 case HI_OPERATOR: folded = astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc); break;
697 case UMINUS_OPERATOR: folded = astnode_create_integer(-lhs->integer, expr->loc); break;
698 default: break;
700 /* Replace expression by folded one. */
701 astnode_replace(expr, folded);
702 astnode_finalize(expr);
703 return folded;
705 else {
706 /* Couldn't fold this one. */
708 break;
711 /* Couldn't fold it, return original expression */
712 return expr;
715 /*---------------------------------------------------------------------------*/
718 * Substitutes identifier if it has a constant definition in symbol table.
719 * @param expr Node of type IDENTIFIER_NODE
721 static astnode *substitute_ident(astnode *expr)
723 astnode *c;
724 symtab_entry *e;
725 /* Look it up in symbol table */
726 e = symtab_lookup(expr->ident);
727 if (e != NULL) {
728 /* Found it. Test if it's a define. */
729 if (e->type == CONSTANT_SYMBOL) {
730 /* This is a defined symbol that should be
731 replaced by the expression it stands for */
732 c = astnode_clone((astnode *)e->def, expr->loc);
733 astnode_replace(expr, c);
734 astnode_finalize(expr);
735 expr = c;
738 else {
739 /* Didn't find it in symbol table. */
741 return expr;
745 * Substitutes sizeof with proper constant.
746 * @param expr Node of type SIZEOF_NODE
748 static astnode *reduce_sizeof(astnode *expr)
750 int ok;
751 astnode *c;
752 astnode *id;
753 astnode *type;
754 astnode *count;
755 symtab_entry *e;
757 count = NULL;
758 if (astnode_is_type(LHS(expr), IDENTIFIER_NODE)) {
759 /* Identifier might be the name of a user-defined type, OR
760 it might be the name of a variable of a user-defined type */
761 type = NULL;
762 /* Look it up */
763 id = LHS(expr);
764 e = symtab_global_lookup(id->ident);
765 if (e != NULL) {
766 switch (e->type) {
767 case STRUC_SYMBOL:
768 case UNION_SYMBOL:
769 case RECORD_SYMBOL:
770 case ENUM_SYMBOL:
771 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
772 break;
774 case VAR_SYMBOL:
775 type = astnode_clone(LHS(e->def), id->loc);
776 if (astnode_is_type(e->def, STORAGE_NODE)) {
777 count = astnode_clone(RHS(e->def), id->loc);
779 else {
780 count = astnode_create_integer(astnode_get_child_count(e->def)-1, id->loc);
782 break;
784 default:
785 /* Can't take sizeof of this symbol type */
786 break;
789 if (type == NULL) {
790 /* Unknown */
791 type = astnode_create_datatype(USER_DATATYPE, astnode_clone(id, id->loc), id->loc);
793 /* Replace identifier by datatype node */
794 astnode_replace(id, type);
795 astnode_finalize(id);
797 type = LHS(expr);
798 switch (type->datatype) {
799 case BYTE_DATATYPE:
800 case CHAR_DATATYPE:
801 c = astnode_create_integer(1, expr->loc);
802 astnode_replace(expr, c);
803 astnode_finalize(expr);
804 expr = c;
805 break;
807 case WORD_DATATYPE:
808 c = astnode_create_integer(2, expr->loc);
809 astnode_replace(expr, c);
810 astnode_finalize(expr);
811 expr = c;
812 break;
814 case DWORD_DATATYPE:
815 c = astnode_create_integer(4, expr->loc);
816 astnode_replace(expr, c);
817 astnode_finalize(expr);
818 expr = c;
819 break;
821 case USER_DATATYPE:
822 /* Look up the data type in symbol table */
823 id = LHS(type);
824 e = symtab_global_lookup(id->ident);
825 ok = 0;
826 if (e != NULL) {
827 switch (e->type) {
828 case STRUC_SYMBOL:
829 case UNION_SYMBOL:
830 /* Datatype is defined, replace sizeof with proper expression */
831 c = astnode_clone((astnode *)(e->struc.size), ((astnode *)(e->struc.size))->loc);
832 astnode_replace(expr, c);
833 astnode_finalize(expr);
834 expr = c;
835 ok = 1;
836 break;
838 case RECORD_SYMBOL:
839 case ENUM_SYMBOL:
840 /* 1 byte */
841 c = astnode_create_integer(1, expr->loc);
842 astnode_replace(expr, c);
843 astnode_finalize(expr);
844 expr = c;
845 ok = 1;
846 break;
848 default:
849 /* Dunno the size of this symbol type */
850 break;
853 if (!ok) {
854 /* Datatype not defined, error */
855 err(expr->loc, "size of `%s' is unknown", id->ident);
856 /* Replace by 1 */
857 c = astnode_create_integer(1, expr->loc);
858 astnode_replace(expr, c);
859 astnode_finalize(expr);
860 return c;
862 break;
864 default:
865 err(expr->loc, "substitute_sizeof(): unknown type");
866 break;
868 if (count != NULL) {
869 c = astnode_create_arithmetic(
870 MUL_OPERATOR,
871 astnode_clone(expr, expr->loc),
872 count,
873 expr->loc
875 astnode_replace(expr, c);
876 astnode_finalize(expr);
877 expr = c;
879 return expr;
883 * Substitutes A::B with an expression.
884 * If A is a struct: substitute with offset of B
885 * If A is a union: substitute with 0
886 * If A is an enumeration: substitute with value for B
887 * @param expr Node of type SCOPE_NODE
889 static astnode *reduce_scope(astnode *expr)
891 symtab_entry *ns;
892 symtab_entry *sym;
893 astnode *c;
894 astnode *namespace;
895 astnode *symbol;
896 /* Look up the namespace */
897 namespace = LHS(expr);
898 ns = symtab_lookup(namespace->ident);
899 if (ns != NULL) {
900 /* Look up the local symbol */
901 symtab_push(ns->symtab);
902 symbol = RHS(expr);
903 sym = symtab_lookup(symbol->ident);
904 if (sym != NULL) {
905 /* See if we can replace it */
906 switch (ns->type) {
907 case STRUC_SYMBOL:
908 case UNION_SYMBOL:
909 case RECORD_SYMBOL:
910 /* Replace with field offset */
911 c = astnode_clone(sym->field.offset, sym->field.offset->loc);
912 astnode_replace(expr, c);
913 astnode_finalize(expr);
914 expr = c;
915 break;
917 case ENUM_SYMBOL:
918 /* Replace with enum entry value */
919 c = astnode_clone(sym->def, sym->def->loc);
920 astnode_replace(expr, c);
921 astnode_finalize(expr);
922 expr = c;
923 break;
925 default:
926 break;
929 symtab_pop();
931 return expr;
934 static astnode *reduce_expression(astnode *expr);
937 * Handles remainder of fields in A.B.C.D . ..., where one or more fields may be indexed.
938 * @param expr Node of type DOT_NODE, INDEX_NODE or IDENTIFIER_NODE
940 static astnode *reduce_dot_recursive(astnode *expr)
942 astnode *term;
943 astnode *offset;
944 astnode *left;
945 astnode *right;
946 astnode *type;
947 symtab_entry *field;
948 symtab_entry *def;
949 astnode *index = NULL;
950 /* Get identifiers involved: 'right' is field in 'left' */
951 left = LHS(expr);
952 if (astnode_is_type(left, INDEX_NODE)) {
953 left = LHS(left); /* Need identifier */
955 right = RHS(expr);
956 if (astnode_is_type(right, DOT_NODE)) {
957 right = LHS(right); /* Need identifier */
959 if (astnode_is_type(right, INDEX_NODE)) {
960 index = RHS(right);
961 right = LHS(right); /* Need identifier */
963 /* Lookup 'right' in 'left's symbol table (on stack) */
964 field = symtab_lookup(right->ident);
965 /* Look up variable's type definition */
966 type = LHS(field->def);
967 /* Copy its offset */
968 offset = astnode_clone(field->field.offset, right->loc);
969 if (index != NULL) {
970 /* Create expression: identifier + sizeof(datatype) * index */
971 offset = astnode_create_arithmetic(
972 PLUS_OPERATOR,
973 offset,
974 astnode_create_arithmetic(
975 MUL_OPERATOR,
976 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
977 astnode_clone(index, index->loc),
978 index->loc
980 expr->loc
983 /* See if more subfields to process */
984 expr = RHS(expr);
985 if (astnode_is_type(expr, DOT_NODE)) {
986 /* Next field */
987 def = symtab_global_lookup(LHS(type)->ident);
988 symtab_push(def->symtab);
989 term = reduce_dot_recursive(expr);
990 symtab_pop();
991 /* Construct sum */
992 offset = astnode_create_arithmetic(
993 PLUS_OPERATOR,
994 offset,
995 term,
996 expr->loc
999 return offset;
1003 * Transforms A.B.C.D . ... to A + offset(B) + offset(C) + ...
1004 * No error checking, since validate_dotref() should have been called previously.
1005 * @param expr Node of type DOT_NODE
1007 static astnode *reduce_dot(astnode *expr)
1009 symtab_entry *father;
1010 symtab_entry *def;
1011 astnode *type;
1012 astnode *left;
1013 astnode *term1;
1014 astnode *term2;
1015 astnode *sum;
1016 astnode *index = NULL;
1017 /* Look up parent in global symbol table */
1018 left = LHS(expr); /* expr := left . right */
1019 if (astnode_is_type(left, INDEX_NODE)) {
1020 index = RHS(left);
1021 left = LHS(left); /* Need identifier */
1023 father = symtab_lookup(left->ident);
1024 /* Look up variable's type definition */
1025 type = LHS(father->def); /* DATATYPE_NODE */
1026 def = symtab_lookup(LHS(type)->ident);
1027 /* 1st term of sum is the leftmost structure identifier */
1028 term1 = astnode_clone(left, left->loc);
1029 if (index != NULL) {
1030 /* Create expression: identifier + sizeof(datatype) * index */
1031 term1 = astnode_create_arithmetic(
1032 PLUS_OPERATOR,
1033 term1,
1034 astnode_create_arithmetic(
1035 MUL_OPERATOR,
1036 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1037 astnode_clone(index, index->loc),
1038 index->loc
1040 expr->loc
1043 /* Add offsets recursively */
1044 symtab_push(def->symtab);
1045 term2 = reduce_dot_recursive(expr);
1046 symtab_pop();
1047 /* Calculate final sum */
1048 sum = astnode_create_arithmetic(
1049 PLUS_OPERATOR,
1050 term1,
1051 term2,
1052 expr->loc
1054 sum = reduce_expression(sum);
1055 /* Replace dotted expression by sum */
1056 astnode_replace(expr, sum);
1057 astnode_finalize(expr);
1058 return sum;
1062 * Reduces MASK operation to a field mask.
1063 * @param mask A node of type MASK_NODE
1065 static astnode *reduce_mask(astnode *mask)
1067 symtab_entry *ns;
1068 symtab_entry *sym;
1069 astnode *c;
1070 astnode *namespace;
1071 astnode *symbol;
1072 astnode *expr;
1073 /* Child is a scope node, record::field */
1074 expr = LHS(mask);
1075 /* Look up the namespace */
1076 namespace = LHS(expr);
1077 ns = symtab_lookup(namespace->ident);
1078 if (ns != NULL) {
1079 /* Make sure it's a record */
1080 if (ns->type != RECORD_SYMBOL) {
1081 err(expr->loc, "`%s' is not a record");
1082 /* Replace by 0 */
1083 c = astnode_create_integer(0, expr->loc);
1084 astnode_replace(mask, c);
1085 astnode_finalize(mask);
1086 expr = c;
1088 else {
1089 /* Look up the local symbol */
1090 symtab_push(ns->symtab);
1091 symbol = RHS(expr);
1092 sym = symtab_lookup(symbol->ident);
1093 if (sym != NULL) {
1094 /* Calculate field mask */
1095 // mask = ((1 << width) - 1) << offset
1096 c = astnode_create_arithmetic(
1097 SHL_OPERATOR,
1098 astnode_create_arithmetic(
1099 MINUS_OPERATOR,
1100 astnode_create_arithmetic(
1101 SHL_OPERATOR,
1102 astnode_create_integer(1, expr->loc),
1103 astnode_clone(sym->field.size, expr->loc),
1104 expr->loc
1106 astnode_create_integer(1, expr->loc),
1107 expr->loc
1109 astnode_clone(sym->field.offset, expr->loc),
1110 expr->loc
1112 c = reduce_expression(c);
1113 astnode_replace(mask, c);
1114 astnode_finalize(mask);
1115 expr = c;
1117 symtab_pop();
1120 return expr;
1124 * Reduces identifier[expression] to identifier + sizeof(identifier type) * expression
1126 static astnode *reduce_index(astnode *expr)
1128 symtab_entry *e;
1129 astnode *c;
1130 astnode *type;
1131 astnode *id;
1132 astnode *index;
1133 id = LHS(expr);
1134 index = reduce_expression(RHS(expr));
1135 /* Lookup identifier */
1136 e = symtab_lookup(id->ident);
1137 /* Get its datatype */
1138 type = LHS(e->def);
1139 /* Create expression: identifier + sizeof(datatype) * index */
1140 c = astnode_create_arithmetic(
1141 PLUS_OPERATOR,
1142 astnode_clone(id, id->loc),
1143 astnode_create_arithmetic(
1144 MUL_OPERATOR,
1145 astnode_create_sizeof(astnode_clone(type, type->loc), expr->loc),
1146 astnode_clone(index, index->loc),
1147 index->loc
1149 expr->loc
1151 /* Replace index expression */
1152 astnode_replace(expr, c);
1153 astnode_finalize(expr);
1154 /* Return the new expression */
1155 return c;
1159 * Substitutes all identifiers that represent EQU defines with their
1160 * corresponding expression.
1161 * @param expr The expression whose defines to substitute
1163 static astnode *substitute_defines(astnode *expr)
1165 switch (astnode_get_type(expr)) {
1166 case ARITHMETIC_NODE:
1167 substitute_defines(LHS(expr));
1168 substitute_defines(RHS(expr));
1169 break;
1171 case IDENTIFIER_NODE:
1172 expr = substitute_ident(expr);
1173 break;
1175 case SIZEOF_NODE:
1176 expr = reduce_sizeof(expr);
1177 break;
1179 case MASK_NODE:
1180 expr = reduce_mask(expr);
1181 break;
1183 case INDEX_NODE:
1184 substitute_defines(LHS(expr));
1185 substitute_defines(RHS(expr));
1186 break;
1188 case DOT_NODE:
1189 substitute_defines(LHS(expr));
1190 substitute_defines(RHS(expr));
1191 break;
1193 default:
1194 /* Nada */
1195 break;
1197 return expr;
1203 static astnode *reduce_highlevel_constructs(astnode *expr)
1205 switch (astnode_get_type(expr)) {
1206 case ARITHMETIC_NODE:
1207 reduce_highlevel_constructs(LHS(expr));
1208 reduce_highlevel_constructs(RHS(expr));
1209 break;
1211 case SCOPE_NODE:
1212 expr = reduce_scope(expr);
1213 break;
1215 case DOT_NODE:
1216 expr = reduce_dot(expr);
1217 break;
1219 case INDEX_NODE:
1220 expr = reduce_index(expr);
1221 break;
1223 default:
1224 /* Nada */
1225 break;
1227 return expr;
1231 * Really reduces an expression.
1232 * @param expr Expression to attempt to reduce
1234 static astnode *reduce_expression_complete(astnode *expr)
1236 return astproc_fold_constants( reduce_highlevel_constructs( substitute_defines(expr) ) );
1240 * Reduces an expression.
1241 * It does two things:
1242 * 1. Substitute all equates by their value
1243 * 2. Folds constants in the resulting expression
1244 * If the expression is reduced, the original expression is replaced by the
1245 * new one, the original is finalized, and a pointer to the new expression
1246 * is returned.
1247 * If the expression is not reduced, the original pointer is returned.
1249 static astnode *reduce_expression(astnode *expr)
1251 return astproc_fold_constants( substitute_defines(expr) );
1255 * Reduces RECORD instance to a single byte (DB statement).
1256 * @param r Record's symbol table entry
1257 * @param expr Record initializer
1258 * @param flat List on which to append the reduced form
1260 static void reduce_record(symtab_entry *r, astnode *init, astnode *flat)
1262 ordered_field_list *list;
1263 symtab_entry *e;
1264 astnode *val;
1265 astnode *term;
1266 astnode *result;
1267 astnode *mask;
1268 astnode *repl;
1269 /* Validate initializer */
1270 if (!astnode_is_type(init, STRUC_NODE)) {
1271 err(init->loc, "record initializer expected");
1272 return;
1274 /* Go through fields */
1275 symtab_push(r->symtab);
1276 result = astnode_create_integer(0, init->loc);
1277 for (val = init->first_child, list = r->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1278 if (astnode_is_type(val, NULL_NODE)) {
1279 continue;
1281 if (astnode_is_type(val, STRUC_NODE)) {
1282 err(init->loc, "record field initializer expected");
1283 continue;
1285 /* Get field definition */
1286 e = list->entry;
1287 /* Calculate field mask */
1288 // mask = ((1 << width) - 1) << offset
1289 mask = astnode_create_arithmetic(
1290 SHL_OPERATOR,
1291 astnode_create_arithmetic(
1292 MINUS_OPERATOR,
1293 astnode_create_arithmetic(
1294 SHL_OPERATOR,
1295 astnode_create_integer(1, val->loc),
1296 astnode_clone(e->field.size, val->loc),
1297 val->loc
1299 astnode_create_integer(1, val->loc),
1300 val->loc
1302 astnode_clone(e->field.offset, val->loc),
1303 val->loc
1305 /* Shift val left e->field.offset bits, AND with mask */
1306 term = astnode_create_arithmetic(
1307 AND_OPERATOR,
1308 astnode_create_arithmetic(
1309 SHL_OPERATOR,
1310 astnode_clone(val, val->loc),
1311 astnode_clone(e->field.offset, val->loc),
1312 val->loc
1314 mask,
1315 val->loc
1317 /* OR the value with the result so far */
1318 result = astnode_create_arithmetic(
1319 OR_OPERATOR,
1320 result,
1321 term,
1322 val->loc
1324 result = reduce_expression(result);
1326 /* Determine reason for stopping loop */
1327 if (val != NULL) {
1328 err(init->loc, "too many field initializers");
1330 /* Make byte data node (packed record value) */
1331 repl = astnode_create_data(
1332 astnode_create_datatype(BYTE_DATATYPE, NULL, init->loc),
1333 result,
1334 init->loc
1336 /* Add to list */
1337 astnode_add_child(flat, repl);
1338 /* Restore old symbol table */
1339 symtab_pop();
1343 * Reduces ENUM instance to DB.
1344 * @param e Enumeration's symbol table entry
1345 * @param expr Expression
1346 * @param flat List on which to append the reduced form
1348 static void reduce_enum(symtab_entry *e, astnode *expr, astnode *list)
1350 symtab_entry *sym;
1351 astnode *repl;
1352 if (!astnode_is_type(expr, IDENTIFIER_NODE)) {
1353 err(expr->loc, "identifier expected");
1355 else {
1356 /* Look up the enumeration symbol */
1357 symtab_push(e->symtab);
1358 sym = symtab_lookup(expr->ident);
1359 symtab_pop();
1360 /* Make byte data node (symbol value) */
1361 repl = astnode_create_data(
1362 astnode_create_datatype(BYTE_DATATYPE, NULL, expr->loc),
1363 astnode_clone(sym->def, expr->loc),
1364 expr->loc
1366 /* Add to list */
1367 astnode_add_child(list, repl);
1371 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat);
1374 * Flattens a union initializer to a sequence of native data values.
1375 * Verify similar to flattening of structure, but only single field allowed.
1376 * @param s Union's symbol table definition
1377 * @param init Union initializer
1378 * @param flat List on which to append the flattened form
1380 static void flatten_union_recursive(symtab_entry *s, astnode *init, astnode *flat)
1382 astnode *fill;
1383 astnode *type;
1384 astnode *count;
1385 symtab_entry *e;
1386 symtab_entry *t;
1387 astnode *val;
1388 astnode *valvals;
1389 astnode *temp;
1390 ordered_field_list *list;
1391 int num;
1392 /* Validate initializer */
1393 if (!astnode_is_type(init, STRUC_NODE)) {
1394 err(init->loc, "union initializer expected");
1395 return;
1397 /* Go through fields */
1398 symtab_push(s->symtab);
1399 fill = astnode_clone(s->struc.size, flat->loc);
1400 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1401 if (astnode_is_type(val, NULL_NODE)) {
1402 continue;
1404 if (!astnode_equal(fill, s->struc.size)) {
1405 err(init->loc, "only one field of union can be initialized");
1406 continue;
1408 /* Get field definition */
1409 e = list->entry;
1410 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1411 type = LHS(e->def);
1412 count = RHS(e->def);
1413 /* Decide what to do based on field type and value */
1414 switch (type->datatype) {
1415 case BYTE_DATATYPE:
1416 case CHAR_DATATYPE:
1417 case WORD_DATATYPE:
1418 case DWORD_DATATYPE:
1419 if (astnode_is_type(val, STRUC_NODE)) {
1420 /* Handle multi-value array */
1421 temp = astnode_clone(val, val->loc);
1422 valvals = astnode_remove_children(temp);
1423 astnode_finalize(temp);
1424 astnode_add_child(flat,
1425 astnode_create_data(
1426 astnode_create_datatype(type->datatype, NULL, type->loc),
1427 valvals,
1428 val->loc
1431 num = astnode_get_child_count(val);
1432 } else {
1433 /* Output single value */
1434 astnode_add_child(flat,
1435 astnode_create_data(
1436 astnode_create_datatype(type->datatype, NULL, type->loc),
1437 astnode_clone(val, val->loc),
1438 val->loc
1441 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1443 if (num > count->integer) {
1444 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1446 /* Fill in remainder of field if necessary: count - 1 */
1447 else if (count->integer > num) {
1448 astnode_add_child(flat,
1449 astnode_create_storage(
1450 astnode_create_datatype(type->datatype, NULL, type->loc),
1451 astproc_fold_constants(
1452 astnode_create_arithmetic(
1453 MINUS_OPERATOR,
1454 astnode_clone(count, count->loc),
1455 astnode_create_integer(num, flat->loc),
1456 count->loc
1459 val->loc
1463 break;
1465 case USER_DATATYPE:
1466 /* Look up user type definition */
1467 t = symtab_global_lookup(LHS(type)->ident);
1468 switch (t->type) {
1469 case STRUC_SYMBOL:
1470 flatten_struc_recursive(t, val, flat);
1471 break;
1473 case UNION_SYMBOL:
1474 flatten_union_recursive(t, val, flat);
1475 break;
1477 case RECORD_SYMBOL:
1478 reduce_record(t, val, flat);
1479 break;
1481 case ENUM_SYMBOL:
1482 reduce_enum(t, val, flat);
1483 break;
1485 default:
1486 break;
1488 break;
1490 /* Decrease fill amount according to field size */
1491 fill = astproc_fold_constants(
1492 astnode_create_arithmetic(
1493 MINUS_OPERATOR,
1494 fill,
1495 astnode_clone(e->field.size, flat->loc),
1496 flat->loc
1500 /* Determine reason for stopping loop */
1501 if (val != NULL) {
1502 err(init->loc, "too many field initializers");
1504 if (fill->integer > 0) {
1505 /* Fill remainder of union with zeroes */
1506 astnode_add_child(flat,
1507 astnode_create_storage(
1508 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1509 fill,
1510 flat->loc
1514 symtab_pop();
1518 * Flattens a structure initializer to a sequence of native data values.
1519 * @param s Structure's symbol table definition
1520 * @param init Structure initializer
1521 * @param flat List on which to append the flattened form
1523 static void flatten_struc_recursive(symtab_entry *s, astnode *init, astnode *flat)
1525 astnode *fill;
1526 astnode *type;
1527 astnode *count;
1528 astnode *temp;
1529 symtab_entry *e;
1530 symtab_entry *t;
1531 astnode *val;
1532 astnode *valvals;
1533 ordered_field_list *list;
1534 int num;
1535 /* Validate initializer */
1536 if (!astnode_is_type(init, STRUC_NODE)) {
1537 err(init->loc, "structure initializer expected");
1538 return;
1540 /* Go through fields */
1541 symtab_push(s->symtab);
1542 fill = astnode_clone(s->struc.size, flat->loc);
1543 for (val = init->first_child, list = s->struc.fields; (val != NULL) && (list != NULL); list = list->next, val = val->next_sibling) {
1544 /* Get field definition */
1545 e = list->entry;
1546 /* Check if normal field or anonymous union */
1547 if (e->type == UNION_SYMBOL) {
1548 if (astnode_is_type(val, NULL_NODE)) {
1549 /* Output union size bytes to fill in field */
1550 astnode_add_child(flat,
1551 astnode_create_storage(
1552 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1553 astnode_clone(e->struc.size, val->loc),
1554 val->loc
1557 } else {
1558 flatten_union_recursive(e, val, flat);
1559 /* Decrease fill amount according to union size */
1560 fill = astproc_fold_constants(
1561 astnode_create_arithmetic(
1562 MINUS_OPERATOR,
1563 fill,
1564 astnode_clone(e->struc.size, flat->loc),
1565 flat->loc
1569 } else {
1570 /* VAR_SYMBOL */
1571 /* Symbol definition is STORAGE_NODE w/ two children: type and count */
1572 type = LHS(e->def);
1573 count = RHS(e->def);
1574 /* Decide what to do based on field type and value */
1575 switch (type->datatype) {
1576 case BYTE_DATATYPE:
1577 case CHAR_DATATYPE:
1578 case WORD_DATATYPE:
1579 case DWORD_DATATYPE:
1580 if (astnode_is_type(val, NULL_NODE)) {
1581 /* Output field_size bytes to fill in field */
1582 astnode_add_child(flat,
1583 astnode_create_storage(
1584 astnode_create_datatype(type->datatype, NULL, type->loc),
1585 astnode_clone(count, count->loc),
1586 val->loc
1589 } else {
1590 if (astnode_is_type(val, STRUC_NODE)) {
1591 /* Handle multi-value array */
1592 temp = astnode_clone(val, val->loc);
1593 valvals = astnode_remove_children(temp);
1594 astnode_finalize(temp);
1595 astnode_add_child(flat,
1596 astnode_create_data(
1597 astnode_create_datatype(type->datatype, NULL, type->loc),
1598 valvals,
1599 val->loc
1602 num = astnode_get_child_count(val);
1603 } else {
1604 /* Output single value */
1605 astnode_add_child(flat,
1606 astnode_create_data(
1607 astnode_create_datatype(type->datatype, NULL, type->loc),
1608 astnode_clone(val, val->loc),
1609 val->loc
1612 num = astnode_is_type(val, STRING_NODE) ? strlen(val->string) : 1;
1614 if (astnode_is_type(count, INTEGER_NODE) && (count->integer < num)) {
1615 err(val->loc, "initializer for field `%s' exceeds field size", e->id);
1617 /* Fill in remainder of field if necessary: count - 1 */
1618 else if ( (astnode_is_type(count, INTEGER_NODE) && (count->integer > num))
1619 || !astnode_is_type(count, INTEGER_NODE) ) {
1620 astnode_add_child(flat,
1621 astnode_create_storage(
1622 astnode_create_datatype(type->datatype, NULL, flat->loc),
1623 astproc_fold_constants(
1624 astnode_create_arithmetic(
1625 MINUS_OPERATOR,
1626 astnode_clone(count, flat->loc),
1627 astnode_create_integer(num, flat->loc),
1628 flat->loc
1631 flat->loc
1636 break;
1638 case USER_DATATYPE:
1639 /* Look up user type definition */
1640 t = symtab_global_lookup(LHS(type)->ident);
1641 if (astnode_is_type(val, NULL_NODE)) {
1642 /* Output sizeof(type) bytes to fill in */
1643 astnode_add_child(flat,
1644 astnode_create_storage(
1645 astnode_create_datatype(BYTE_DATATYPE, NULL, val->loc),
1646 astnode_clone(t->struc.size, val->loc),
1647 val->loc
1650 } else {
1651 switch (t->type) {
1652 case STRUC_SYMBOL:
1653 flatten_struc_recursive(t, val, flat);
1654 break;
1656 case UNION_SYMBOL:
1657 flatten_union_recursive(t, val, flat);
1658 break;
1660 case RECORD_SYMBOL:
1661 reduce_record(t, val, flat);
1662 break;
1664 case ENUM_SYMBOL:
1665 reduce_enum(t, val, flat);
1666 break;
1668 default:
1669 break;
1672 break;
1674 /* Decrease fill amount according to field size */
1675 fill = astproc_fold_constants(
1676 astnode_create_arithmetic(
1677 MINUS_OPERATOR,
1678 fill,
1679 astnode_clone(e->field.size, flat->loc),
1680 flat->loc
1685 /* Determine reason for stopping loop */
1686 if (val != NULL) {
1687 err(init->loc, "too many field initializers");
1689 else if (list != NULL) {
1690 /* All fields not initialized; fill remainder of struc with zeroes */
1691 astnode_add_child(flat,
1692 astnode_create_storage(
1693 astnode_create_datatype(BYTE_DATATYPE, NULL, flat->loc),
1694 fill,
1695 flat->loc
1699 symtab_pop();
1703 * Converts data that is expressed in a high-level form (such as structure initializers)
1704 * to a simple sequence of bytes.
1705 * @param n The source node to flatten
1706 * @param type The type of data that n is an instance of
1707 * @param list List on which to append the resulting sequence of items (bytes/words/dwords)
1709 static void flatten_user_data(astnode *n, astnode *type, astnode *list)
1711 symtab_entry *def;
1712 /* Look up type definition */
1713 def = symtab_global_lookup(LHS(type)->ident);
1714 if (def != NULL) {
1715 switch (def->type) {
1716 case STRUC_SYMBOL:
1717 /* Flatten structure initializer to series of simple data statements */
1718 flatten_struc_recursive(def, n, list);
1719 break;
1721 case UNION_SYMBOL:
1722 /* Flatten union initializer to series of simple data statements */
1723 flatten_union_recursive(def, n, list);
1724 break;
1726 case RECORD_SYMBOL:
1727 reduce_record(def, n, list);
1728 break;
1730 case ENUM_SYMBOL:
1731 reduce_enum(def, n, list);
1732 break;
1734 default:
1735 break;
1740 /*---------------------------------------------------------------------------*/
1743 * Loads the character map specified by the node.
1744 * @param n Node of type CHARMAP_NODE
1746 static int load_charmap(astnode *n, void *arg, astnode **next)
1748 /* TODO: should probably be done in the parsing phase (same path resolution as for INCSRC and INCBIN) */
1749 astnode *file;
1750 /* Get file descriptor */
1751 file = astnode_get_child(n, 0);
1752 /* Try to load the charmap */
1753 if (charmap_parse(file->file_path, charmap) == 0) {
1754 err(n->loc, "could not open `%s' for reading", file->file_path);
1756 return 0;
1760 * First-time processing of instruction node.
1761 * @param n Node of type INSTRUCTION_NODE
1762 * @param arg Not used
1764 static int process_instruction(astnode *n, void *arg, astnode **next)
1766 astnode *expr;
1767 if (in_dataseg) {
1768 err(n->loc, "instructions not allowed in data segment");
1769 /* Remove from AST */
1770 astnode_remove(n);
1771 astnode_finalize(n);
1773 else {
1774 /* The instruction operand */
1775 expr = astnode_get_child(n, 0);
1776 /* Substitute defines and fold constants */
1777 reduce_expression(expr);
1779 return 1;
1783 * First-time processing of data node.
1784 * @param n Node of type DATA_NODE
1785 * @param arg Not used
1787 static int process_data(astnode *n, void *arg, astnode **next)
1789 int j;
1790 int k;
1791 astnode *type;
1792 astnode *expr;
1793 astnode *list;
1794 astnode *stmts;
1795 type = astnode_get_child(n, 0); /* DATATYPE_NODE */
1796 if (in_dataseg) {
1797 err(n->loc, "value not allowed in data segment");
1798 /* Replace with storage node */
1799 astnode_replace(
1801 astnode_create_storage(
1802 astnode_create_datatype(BYTE_DATATYPE, NULL, n->loc),
1803 astnode_create_integer(1, n->loc),
1804 n->loc
1807 astnode_finalize(n);
1808 return 0;
1810 if (type->datatype == USER_DATATYPE) {
1811 /* Make sure the type exists */
1812 if (symtab_global_lookup(LHS(type)->ident) == NULL) {
1813 err(n->loc, "unknown type `%s'", LHS(type)->ident);
1814 /* Remove from AST */
1815 astnode_remove(n);
1816 astnode_finalize(n);
1817 return 0;
1818 } else {
1819 /* Attempt to reduce user data to native data */
1820 list = astnode_create(LIST_NODE, n->loc);
1821 for (expr = type->next_sibling; expr != NULL; expr = expr->next_sibling) {
1822 flatten_user_data(expr, type, list);
1824 /* Replace initializers with generated list */
1825 stmts = astnode_remove_children(list);
1826 astnode_replace(n, stmts);
1827 astnode_finalize(n);
1828 astnode_finalize(list);
1829 *next = stmts;
1832 /* Go through the list of data values, replacing defines and folding constants */
1833 for (j=1; j<astnode_get_child_count(n); j++) {
1834 expr = astnode_get_child(n, j);
1835 /* Substitute defines and fold constants */
1836 expr = reduce_expression(expr);
1837 /* If it's a string, replace by array of integers */
1838 /* (makes it easier to process later... favour regularity) */
1839 if (astnode_is_type(expr, STRING_NODE)) {
1840 astnode_remove_child_at(n, j); /* Remove string */
1841 for (k=strlen(expr->string)-1; k>=0; k--) {
1842 /* Check if we should map character from custom charmap */
1843 if (type->datatype == CHAR_DATATYPE) {
1844 expr->string[k] = charmap[(unsigned)expr->string[k]];
1846 /* Append character value to array */
1847 astnode_insert_child(n, astnode_create_integer((unsigned char)expr->string[k], n->loc), j);
1849 if (type->datatype == CHAR_DATATYPE) {
1850 /* It's normal byte array now */
1851 type->datatype = BYTE_DATATYPE;
1853 j += strlen(expr->string)-1;
1854 astnode_finalize(expr);
1857 return 1;
1861 * First-time processing of storage node.
1862 * @param n Node of type STORAGE_NODE
1863 * @param arg Not used
1865 static int process_storage(astnode *n, void *arg, astnode **next)
1867 int item_size;
1868 astnode *type;
1869 astnode *expr;
1870 astnode *new_expr;
1871 type = LHS(n);
1872 expr = RHS(n);
1873 /* If not BYTE_DATATYPE, multiply by word/dword-size */
1874 switch (type->datatype) {
1875 case BYTE_DATATYPE:
1876 case CHAR_DATATYPE: item_size = 1; break;
1877 case WORD_DATATYPE: item_size = 2; break;
1878 case DWORD_DATATYPE: item_size = 4; break;
1879 default: item_size = 1; break; // ### Hmmm...
1881 if (item_size != 1) {
1882 new_expr = astnode_create_arithmetic(
1883 MUL_OPERATOR,
1884 astnode_clone(expr, expr->loc),
1885 astnode_create_integer(item_size, expr->loc),
1886 expr->loc
1888 astnode_replace(expr, new_expr);
1889 astnode_finalize(expr);
1890 expr = new_expr;
1891 type->datatype = BYTE_DATATYPE;
1893 /* Substitute defines and fold constants */
1894 expr = reduce_expression(expr);
1895 // TODO: Validate range somewhere else than here please... ???
1896 if (astnode_is_type(expr, INTEGER_NODE)) {
1897 if ((expr->integer <= 0) || (expr->integer >= 0x10000)) {
1898 err(n->loc, "operand out of range");
1901 return 1;
1905 * Process EQU node.
1906 * @param n Node of type EQU_NODE
1907 * @param arg Not used
1909 static int process_equ(astnode *n, void *arg, astnode **next)
1911 symtab_entry *e;
1912 astnode *id;
1913 astnode *expr;
1914 /* The expression which describes the value */
1915 expr = astnode_clone(astnode_get_child(n, 1), n->loc);
1916 /* Substitute defines and fold constants */
1917 expr = reduce_expression(expr);
1918 /* The identifier which is being defined */
1919 id = astnode_get_child(n, 0);
1920 /* Look up in symbol table */
1921 e = symtab_lookup(id->ident);
1922 if (e == NULL) {
1923 /* Symbol is being defined */
1924 // TODO: Check that expression is a constant?
1925 /* Enter it in symbol table */
1926 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, 0);
1927 } else {
1928 /* Symbol is being redefined */
1929 /* This is not allowed for EQU equate! */
1930 if (!astnode_equal((astnode *)(e->def), expr)) {
1931 warn(n->loc, "redefinition of `%s' is not identical; ignored", id->ident);
1934 /* Remove the equate node from the tree. */
1935 astnode_remove(n);
1936 astnode_finalize(n);
1937 return 0;
1941 * Process '=' node.
1942 * @param n Node of type ASSIGN_NODE
1943 * @param arg Not used
1945 static int process_assign(astnode *n, void *arg, astnode **next)
1947 symtab_entry *e;
1948 astnode *id;
1949 astnode *expr;
1950 /* If it's part of ENUM declaration, don't touch */
1951 if (astnode_has_ancestor_of_type(n, ENUM_DECL_NODE)) {
1952 return 0;
1954 /* Very similar to EQU, except symbol 1) can be
1955 redefined and 2) is volatile (see end of proc) */
1956 /* The expression which describes the value */
1957 expr = astnode_clone(astnode_get_child(n, 1), n->loc);
1958 /* Substitute defines and fold constants */
1959 expr = reduce_expression(expr);
1960 /* The identifier which is being (re)defined */
1961 id = astnode_get_child(n, 0);
1962 /* Look up in symbol table */
1963 e = symtab_lookup(id->ident);
1964 if (e == NULL) {
1965 /* Symbol is being defined for the first time */
1966 /* Note that the VOLATILE_FLAG is set */
1967 symtab_enter(id->ident, CONSTANT_SYMBOL, expr, VOLATILE_FLAG);
1968 } else {
1969 /* Symbol is being redefined */
1970 /* This is OK for ASSIGN equate, simply replace definition */
1971 // ### store a list of definitions
1972 expr->loc = e->def->loc;
1973 e->def = expr;
1975 /* Remove the equate node from the tree. */
1976 astnode_remove(n);
1977 astnode_finalize(n);
1978 return 0;
1982 * Process IFDEF-node.
1983 * @param n Node of type IFDEF_NODE
1984 * @param arg Not used
1986 static int process_ifdef(astnode *n, void *arg, astnode **next)
1988 symtab_entry *e;
1989 astnode *id;
1990 astnode *stmts;
1991 /* The identifier which is being tested */
1992 id = astnode_get_child(n, 0);
1993 e = symtab_lookup(id->ident);
1994 if (e != NULL) {
1995 /* Symbol is defined. */
1996 /* Replace IFDEF node by the true-branch statement list */
1997 stmts = astnode_remove_children( astnode_remove_child_at(n, 1));
1998 astnode_replace(n, stmts);
1999 *next = stmts;
2000 } else {
2001 /* Symbol is not defined. */
2002 /* Replace IFDEF node by the false-branch statement list (if any) */
2003 stmts = astnode_remove_children( astnode_remove_child_at(n, 2));
2004 if (stmts != NULL) {
2005 astnode_replace(n, stmts);
2006 *next = stmts;
2007 } else {
2008 astnode_remove(n);
2011 /* Discard the original node */
2012 astnode_finalize(n);
2013 return 0;
2017 * Process IFNDEF-node.
2018 * @param n Node of type IFNDEF_NODE
2019 * @param arg Not used
2021 static int process_ifndef(astnode *n, void *arg, astnode **next)
2023 symtab_entry *e;
2024 astnode *id;
2025 astnode *stmts;
2026 /* The identifier which is being tested */
2027 id = astnode_get_child(n, 0);
2028 e = symtab_lookup(id->ident);
2029 if (e == NULL) {
2030 /* Symbol is not defined. */
2031 /* Replace IFNDEF node by the true-branch statement list */
2032 stmts = astnode_remove_children( astnode_remove_child_at(n, 1));
2033 astnode_replace(n, stmts);
2034 *next = stmts;
2035 } else {
2036 /* Symbol is defined. */
2037 /* Replace IFNDEF node by the false-branch statement list, if any */
2038 stmts = astnode_remove_children( astnode_remove_child_at(n, 2));
2039 if (stmts != NULL) {
2040 astnode_replace(n, stmts);
2041 *next = stmts;
2042 } else {
2043 astnode_remove(n);
2046 /* Discard the original node */
2047 astnode_finalize(n);
2048 return 0;
2052 * Process IF-node.
2053 * @param n Node of type IF_NODE
2054 * @param arg Not used
2056 static int process_if(astnode *n, void *arg, astnode **next)
2058 astnode *expr;
2059 astnode *stmts;
2060 astnode *c;
2061 int ret = 0;
2062 /* IF_NODE has a list of CASE, DEFAULT nodes as children */
2063 for (c = astnode_get_first_child(n); c != NULL; c = astnode_get_next_sibling(c) ) {
2064 if (astnode_is_type(c, CASE_NODE)) {
2065 /* The expression which is being tested */
2066 expr = astnode_get_child(c, 0);
2067 /* Try to reduce expression to literal */
2068 expr = reduce_expression(expr);
2069 /* Resulting expression must be an integer literal,
2070 since this is static evaluation.
2071 In other words, it can't contain label references.
2073 if (astnode_is_type(expr, INTEGER_NODE)) {
2074 /* Non-zero is true, zero is false */
2075 if (expr->integer) {
2076 /* Replace IF node by the true-branch statement list */
2077 stmts = astnode_remove_children( astnode_remove_child_at(c, 1) );
2078 astnode_replace(n, stmts);
2079 astnode_finalize(n);
2080 *next = stmts;
2081 return ret;
2083 } else {
2084 /* Error, expression is not constant */
2085 err(expr->loc, "conditional expression does not evaluate to literal");
2087 } else { /* DEFAULT_NODE */
2088 /* Replace IF node by the false-branch statement list */
2089 stmts = astnode_remove_children(c);
2090 astnode_replace(n, stmts);
2091 astnode_finalize(n);
2092 *next = stmts;
2093 return ret;
2096 /* No match, remove IF node from AST */
2097 astnode_remove(n);
2098 astnode_finalize(n);
2099 return ret;
2103 * Process dataseg-node.
2104 * @param n Node of type DATASEG_NODE
2105 * @param arg Not used
2107 static int process_dataseg(astnode *n, void *arg, astnode **next)
2109 modifiers = n->modifiers;
2110 in_dataseg = 1; /* true */
2111 return 0;
2115 * Process codeseg-node.
2116 * @param n Node of type CODESEG_NODE
2117 * @param arg Not used
2119 static int process_codeseg(astnode *n, void *arg, astnode **next)
2121 modifiers = 0;
2122 in_dataseg = 0; /* false */
2123 return 0;
2127 * Process org-node.
2128 * @param n Node of type ORG_NODE
2129 * @param arg Not used
2131 static int process_org(astnode *n, void *arg, astnode **next)
2133 if (!xasm_args.pure_binary) {
2134 err(n->loc, "org directive can only be used when output format is pure 6502 binary");
2135 } else {
2136 astnode *addr = astnode_get_child(n, 0);
2137 addr = reduce_expression_complete(addr);
2138 if (astnode_is_type(addr, INTEGER_NODE)) {
2139 /* Range check */
2140 if ((addr->integer < 0) || (addr->integer >= 0x10000)) {
2141 err(n->loc, "org address out of 64K range");
2143 } else {
2144 err(n->loc, "org address does not evaluate to literal");
2145 /* Remove from AST */
2146 astnode_remove(n);
2147 astnode_finalize(n);
2150 return 0;
2154 * Process REPT node.
2155 * @param n Node of type REPT_NODE
2156 * @param arg Not used
2158 static int process_rept(astnode *n, void *arg, astnode **next)
2160 astnode *count;
2161 astnode *stmts;
2162 astnode *list;
2163 /* The repeat count */
2164 count = astnode_get_child(n, 0);
2165 /* Try to reduce count expression to literal */
2166 count = reduce_expression_complete(count);
2167 /* Resulting expression must be an integer literal,
2168 since this is static evaluation.
2170 if (astnode_is_type(count, INTEGER_NODE)) {
2171 if (count->integer < 0) {
2172 warn(n->loc, "REPT ignored; negative repeat count (%d)", count->integer);
2173 /* Remove from AST */
2174 astnode_remove(n);
2175 astnode_finalize(n);
2176 } else if (count->integer > 0) {
2177 /* Expand body <count> times */
2178 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2179 stmts = astnode_remove_children(list);
2180 astnode_finalize(list);
2181 while (--count->integer > 0) {
2182 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2183 astnode_add_sibling(stmts, astnode_remove_children(list) );
2184 astnode_finalize(list);
2186 astnode_replace(n, stmts);
2187 astnode_finalize(n);
2188 *next = stmts;
2189 } else {
2190 /* count == 0, remove from AST */
2191 astnode_remove(n);
2192 astnode_finalize(n);
2194 } else {
2195 err(n->loc, "repeat count does not evaluate to literal");
2196 /* Remove from AST */
2197 astnode_remove(n);
2198 astnode_finalize(n);
2200 return 0;
2204 * Process WHILE node.
2205 * @param n Node of type WHILE_NODE
2206 * @param arg Not used
2208 static int process_while(astnode *n, void *arg, astnode **next)
2210 astnode *expr;
2211 astnode *stmts;
2212 astnode *list;
2213 /* The boolean expression */
2214 expr = astnode_get_child(n, 0);
2215 /* Try to reduce expression to literal */
2216 expr = reduce_expression(astnode_clone(expr, expr->loc));
2217 /* Resulting expression must be an integer literal,
2218 since this is static evaluation.
2220 if (astnode_is_type(expr, INTEGER_NODE)) {
2221 /* Expand body if the expression is true */
2222 if (expr->integer) {
2223 list = astnode_clone(astnode_get_child(n, 1), n->loc);
2224 stmts = astnode_remove_children(list);
2225 astnode_finalize(list);
2226 astnode_replace(n, stmts);
2227 astnode_add_sibling(stmts, n); /* Clever huh? */
2228 *next = stmts;
2229 } else {
2230 /* Remove WHILE node from AST */
2231 astnode_remove(n);
2232 astnode_finalize(n);
2234 } else {
2235 err(n->loc, "while expression does not evaluate to literal");
2236 /* Remove WHILE node from AST */
2237 astnode_remove(n);
2238 astnode_finalize(n);
2240 astnode_finalize(expr);
2241 return 0;
2244 /*---------------------------------------------------------------------------*/
2247 * Enters a macro into the symbol table.
2248 * @param n Must be a node of type MACRO_DECL_NODE
2249 * @param arg Not used
2251 static int enter_macro(astnode *n, void *arg, astnode **next)
2253 astnode *id = astnode_get_child(n, 0); /* Child 0 is macro identifier */
2254 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2255 if (symtab_enter(id->ident, MACRO_SYMBOL, n, 0) == NULL) {
2256 /* ### This could be allowed, you know... */
2257 err(n->loc, "duplicate symbol `%s'", id->ident);
2259 /* Remove from AST */
2260 astnode_remove(n);
2261 // ### n is not finalized???
2262 return 0;
2266 * Enters a label into the symbol table.
2267 * @param n Must be a node of type LABEL_NODE
2269 static int enter_label(astnode *n, void *arg, astnode **next)
2271 symtab_entry *e;
2272 astnode *addr;
2273 /* Make sure it's unique first */
2274 if (symtab_lookup(n->ident)) {
2275 err(n->loc, "duplicate symbol `%s'", n->ident);
2276 /* Remove from AST */
2277 astnode_remove(n);
2278 astnode_finalize(n);
2279 } else {
2280 /* Enter it! */
2281 e = symtab_enter(n->ident, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) | modifiers );
2282 /* Check if hardcoded address */
2283 addr = reduce_expression_complete(RHS(n));
2284 if (astnode_is_type(addr, INTEGER_NODE)) {
2285 /* Store it */
2286 e->address = addr->integer;
2287 e->flags |= ADDR_FLAG;
2288 } else if (!astnode_is_type(addr, CURRENT_PC_NODE)) {
2289 err(n->loc, "label address does not evaluate to literal");
2291 /* Increase namespace counter */
2292 label_count++;
2294 /* */
2295 return 0;
2299 * Enters a variable declaration in symbol table.
2300 * @param n Must be a node of type VAR_DECL_NODE
2302 static int enter_var(astnode *n, void *arg, astnode **next)
2304 astnode *id = LHS(n); /* Variable identifier */
2305 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2306 /* Make sure it's unique first */
2307 if (symtab_lookup(id->ident)) {
2308 err(n->loc, "duplicate symbol `%s'", id->ident);
2309 /* Remove from AST */
2310 astnode_remove(n);
2311 astnode_finalize(n);
2312 } else {
2313 /* Validate modifiers */
2314 if ((n->modifiers & ZEROPAGE_FLAG) && !in_dataseg) {
2315 warn(n->loc, "zeropage modifier has no effect in code segment");
2316 n->modifiers &= ~ZEROPAGE_FLAG;
2318 /* Enter it! */
2319 symtab_enter(id->ident, VAR_SYMBOL, astnode_clone(RHS(n), n->loc), (in_dataseg ? DATA_FLAG : 0) | n->modifiers | modifiers);
2321 /* */
2322 return 1;
2326 * Enters a procedure declaration in symbol table.
2327 * @param n Must be a node of type PROC_NODE
2329 static int enter_proc(astnode *n, void *arg, astnode **next)
2331 astnode *id;
2332 if (in_dataseg) {
2333 err(n->loc, "procedures not allowed in data segment");
2334 /* Remove from AST */
2335 astnode_remove(n);
2336 astnode_finalize(n);
2337 return 0;
2339 id = LHS(n); /* Procedure identifier */
2340 assert(astnode_get_type(id) == IDENTIFIER_NODE);
2341 /* Make sure it's unique first */
2342 if (symtab_lookup(id->ident)) {
2343 err(n->loc, "duplicate symbol `%s'", id->ident);
2344 /* Remove from AST */
2345 astnode_remove(n);
2346 astnode_finalize(n);
2347 } else {
2348 /* Enter it! RHS(n) is the list of procedure statements */
2349 symtab_enter(id->ident, PROC_SYMBOL, RHS(n), (in_dataseg ? DATA_FLAG : 0) );
2350 /* Increase global namespace counter */
2351 label_count++;
2353 /* */
2354 return 1;
2358 * Enters a simple <identifier> <storage> structure member.
2359 * @param c Node of type VAR_DECL_NODE
2360 * @param offset Offset of this field
2361 * @param plist List of symbol table's entries
2362 * @param struc_id Structure identifier (for error messages)
2363 * @return New offset (old offset + size of this field)
2365 static astnode *enter_struc_atomic_field(astnode *c, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2367 astnode *field_id;
2368 astnode *field_data;
2369 astnode *field_size;
2370 symtab_entry *fe;
2371 /* c has two children: id and STORAGE_NODE */
2372 field_id = LHS(c);
2373 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2374 field_data = RHS(c);
2375 reduce_expression(RHS(field_data));
2376 /* Validate the declaration -- no data initialized */
2377 if (astnode_is_type(field_data, DATA_NODE)) {
2378 err(c->loc, "data initialization not allowed here");
2379 return(offset);
2381 /* Try to enter field in structure's symbol table */
2382 fe = symtab_enter(
2383 field_id->ident,
2384 VAR_SYMBOL,
2385 astnode_clone(field_data, field_data->loc),
2388 if (fe == NULL) {
2389 err(c->loc, "duplicate symbol `%s' in structure `%s'", field_id->ident, struc_id->ident);
2390 return(offset);
2392 /* Add to ordered list of fields */
2393 (**plist) = malloc(sizeof(ordered_field_list));
2394 (**plist)->entry = fe;
2395 (**plist)->next = NULL;
2396 *plist = &((**plist)->next);
2397 /* Set field offset */
2398 fe->field.offset = astnode_clone(offset, offset->loc);
2399 /* Calculate field size in bytes: sizeof(datatype) * count */
2400 field_size = astnode_create_arithmetic(
2401 MUL_OPERATOR,
2402 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2403 astnode_clone(RHS(field_data), field_data->loc),
2404 field_data->loc
2406 field_size = reduce_expression(field_size);
2407 /* Set field size */
2408 fe->field.size = astnode_clone(field_size, field_size->loc);
2409 /* Add field size to total offset */
2410 offset = astnode_create_arithmetic(
2411 PLUS_OPERATOR,
2412 offset,
2413 field_size,
2414 offset->loc
2416 offset = reduce_expression(offset);
2417 return(offset);
2420 static void enter_union_fields(symtab_entry *, astnode *);
2423 * Attempts to enter an (anonymous) union's members into structure's symbol table.
2424 * @param n Node of type UNION_DECL_NODE
2425 * @param offset Current parent structure offset
2426 * @param plist Ordered list of parent structure's fields
2428 astnode *enter_struc_union_field(astnode *n, astnode *offset, ordered_field_list ***plist, astnode *struc_id)
2430 ordered_field_list *ls;
2431 symtab_entry *se;
2432 symtab_entry *fe;
2433 static int id = 0;
2434 char id_str[16];
2435 astnode *union_id;
2436 union_id = LHS(n);
2437 if (astnode_is_type(union_id, IDENTIFIER_NODE)) {
2438 err(n->loc, "anonymous union expected");
2439 return(offset);
2441 /* Put UNION in symbol table */
2442 sprintf(id_str, "%d", id++);
2443 se = symtab_enter(id_str, UNION_SYMBOL, n, 0);
2444 enter_union_fields(se, n);
2445 /* Add to ordered list of fields */
2446 (**plist) = malloc(sizeof(ordered_field_list));
2447 (**plist)->entry = se;
2448 (**plist)->next = NULL;
2449 *plist = &((**plist)->next);
2450 /* Add to parent structure as well, with same offsets */
2451 for (ls = se->struc.fields; ls != NULL; ls = ls->next) {
2452 /* Try to enter field in structure's symbol table */
2453 fe = symtab_enter(
2454 ls->entry->id,
2455 VAR_SYMBOL,
2456 astnode_clone(ls->entry->def, ls->entry->def->loc),
2459 if (fe == NULL) {
2460 err(ls->entry->def->loc, "duplicate symbol `%s' in structure `%s'", ls->entry->id, struc_id->ident);
2461 continue;
2463 /* Set field offset */
2464 fe->field.offset = astnode_clone(offset, offset->loc);
2465 /* Set field size */
2466 fe->field.size = astnode_clone(se->struc.size, offset->loc);
2468 /* Advance offset by size of union */
2469 offset = astnode_create_arithmetic(
2470 PLUS_OPERATOR,
2471 offset,
2472 astnode_clone(se->struc.size, offset->loc),
2473 offset->loc
2475 offset = reduce_expression(offset);
2476 return(offset);
2480 * Enters struc type into symbol table based on AST node.
2481 * - Creates a symbol table for the structure
2482 * - Validates and enters all its fields
2483 * - Calculates offset of each field in the structure, and total size
2484 * @param n Node of type STRUC_DECL_NODE
2486 static int enter_struc(astnode *n, void *arg, astnode **next)
2488 ordered_field_list **plist;
2489 symtab_entry *se;
2490 astnode *c;
2491 astnode *offset;
2492 astnode *struc_id = LHS(n); /* Child 0 is struc identifier */
2493 /* Put STRUC in symbol table */
2494 se = symtab_enter(struc_id->ident, STRUC_SYMBOL, n, 0);
2495 if (se == NULL) {
2496 err(n->loc, "duplicate symbol `%s'", struc_id->ident);
2497 } else {
2498 /* Put the fields of the structure in local symbol table */
2499 se->symtab = symtab_create();
2500 offset = astnode_create_integer(0, n->loc); /* offset = 0 */
2501 plist = &se->struc.fields;
2502 for (c = struc_id->next_sibling; c != NULL; c = c->next_sibling) {
2503 /* Check if it's a field declaration */
2504 if (astnode_is_type(c, VAR_DECL_NODE)) {
2505 offset = enter_struc_atomic_field(c, offset, &plist, struc_id);
2507 /* Check if (anonymous) union */
2508 else if (astnode_is_type(c, UNION_DECL_NODE)) {
2509 offset = enter_struc_union_field(c, offset, &plist, struc_id);
2510 } else {
2511 err(c->loc, "field declaration expected");
2512 continue;
2515 /* Store total size of structure */
2516 se->struc.size = offset;
2517 /* Restore previous symbol table */
2518 symtab_pop();
2520 /* Remove STRUC node from AST */
2521 // astnode_remove(n);
2522 // astnode_finalize(n);
2523 return 0;
2527 * Enters fields of union into its symbol table.
2529 static void enter_union_fields(symtab_entry *se, astnode *n)
2531 ordered_field_list **plist;
2532 astnode *c;
2533 astnode *field_id;
2534 astnode *field_data;
2535 astnode *field_size;
2536 symtab_entry *fe;
2538 se->symtab = symtab_create();
2539 se->struc.size = astnode_create_integer(0, n->loc);
2540 plist = &se->struc.fields;
2541 /* Process field declarations */
2542 for (c = RHS(n); c != NULL; c = c->next_sibling) {
2543 /* Make sure it's a field declaration */
2544 if (!astnode_is_type(c, VAR_DECL_NODE)) {
2545 err(c->loc, "field declaration expected");
2546 continue;
2548 /* c has two children: id and STORAGE_NODE */
2549 field_id = LHS(c);
2550 assert(astnode_get_type(field_id) == IDENTIFIER_NODE);
2551 field_data = RHS(c);
2552 reduce_expression(RHS(field_data));
2553 /* Validate the declaration -- no data initialized */
2554 if (astnode_is_type(field_data, DATA_NODE)) {
2555 err(c->loc, "data initialization not allowed here");
2556 continue;
2558 /* Calculate field size in bytes: sizeof(datatype) * count */
2559 field_size = astnode_create_arithmetic(
2560 MUL_OPERATOR,
2561 astnode_create_sizeof(astnode_clone(LHS(field_data), field_data->loc), field_data->loc),
2562 astnode_clone(RHS(field_data), field_data->loc),
2563 field_data->loc
2565 field_size = reduce_expression(field_size);
2566 /* Make sure field size is a constant */
2567 if (!astnode_is_type(field_size, INTEGER_NODE)) {
2568 err(c->loc, "union member must be of constant size");
2569 astnode_finalize(field_size);
2570 /* Use default size: 1 byte */
2571 field_size = astnode_create_integer(1, field_data->loc);
2573 /* Try to enter field in structure's symbol table */
2574 fe = symtab_enter(
2575 field_id->ident,
2576 VAR_SYMBOL,
2577 astnode_clone(field_data, field_data->loc),
2580 if (fe == NULL) {
2581 err(c->loc, "duplicate symbol `%s' in union `%s'", field_id->ident, se->id);
2582 astnode_finalize(field_size);
2583 continue;
2585 /* Add to ordered list of fields */
2586 (*plist) = malloc(sizeof(ordered_field_list));
2587 (*plist)->entry = fe;
2588 (*plist)->next = NULL;
2589 plist = &((*plist)->next);
2590 /* Set field offset (0 for all) and size */
2591 fe->field.offset = astnode_create_integer(0, n->loc);
2592 fe->field.size = astnode_clone(field_size, field_size->loc);
2593 /* See if field size of this member is largest so far */
2594 if (se->struc.size->integer < field_size->integer) {
2595 astnode_finalize(se->struc.size);
2596 se->struc.size = field_size;
2597 } else {
2598 astnode_finalize(field_size);
2601 symtab_pop();
2605 * Enters union type into symbol table based on AST node.
2606 * @param n Node of type UNION_DECL_NODE
2608 static int enter_union(astnode *n, void *arg, astnode **next)
2610 symtab_entry *se;
2611 astnode *union_id = astnode_get_child(n, 0); /* Child 0 is union identifier */
2612 /* Check for anonymous union */
2613 if (astnode_is_type(union_id, NULL_NODE)) {
2614 err(n->loc, "anonymous union not allowed in global scope");
2615 } else {
2616 /* Put UNION in symbol table */
2617 assert(astnode_get_type(union_id) == IDENTIFIER_NODE);
2618 se = symtab_enter(union_id->ident, UNION_SYMBOL, n, 0);
2619 if (se == NULL) {
2620 err(n->loc, "duplicate symbol `%s'", union_id->ident);
2621 } else {
2622 /* Put the fields of the union in local symbol table */
2623 enter_union_fields(se, n);
2626 /* Remove UNION node from AST */
2627 // astnode_remove(n);
2628 // astnode_finalize(n);
2629 return 0;
2633 * Enters enumerated type into symbol table based on AST node.
2634 * @param n Node of type ENUM_DECL_NODE
2636 static int enter_enum(astnode *n, void *arg, astnode **next)
2638 astnode *c;
2639 astnode *id;
2640 astnode *val;
2641 symtab_entry *se;
2642 astnode *enum_id = astnode_get_child(n, 0); /* Child 0 is enum identifier */
2643 /* Enter in global symbol table */
2644 assert(astnode_get_type(enum_id) == IDENTIFIER_NODE);
2645 se = symtab_enter(enum_id->ident, ENUM_SYMBOL, n, 0);
2646 if (se == NULL) {
2647 err(n->loc, "duplicate symbol `%s'", enum_id->ident);
2648 } else {
2649 /* Add all the enum symbols to its own symbol table */
2650 se->symtab = symtab_create();
2651 val = NULL;
2652 for (c = enum_id->next_sibling; c != NULL; c = c->next_sibling) {
2653 if (astnode_is_type(c, IDENTIFIER_NODE)) {
2654 id = c;
2655 if (val == NULL) {
2656 val = astnode_create_integer(0, c->loc);
2657 } else {
2658 val = astnode_create_integer(val->integer+1, c->loc);
2660 } else {
2661 id = LHS(c);
2662 val = reduce_expression_complete(astnode_clone(RHS(c), RHS(c)->loc));
2663 if (!astnode_is_type(val, INTEGER_NODE)) {
2664 err(c->loc, "initializer does not evaluate to integer literal");
2665 astnode_finalize(val);
2666 /* Use default value */
2667 val = astnode_create_integer(0, c->loc);
2670 if (symtab_enter(id->ident, CONSTANT_SYMBOL, val, 0) == NULL) {
2671 err(c->loc, "duplicate symbol `%s' in enumeration `%s'", id->ident, enum_id->ident);
2672 continue;
2675 symtab_pop();
2677 /* Remove ENUM node from AST */
2678 // astnode_remove(n);
2679 // astnode_finalize(n);
2680 return 0;
2684 * Enters record type into symbol table based on AST node.
2685 * @param n Node of type RECORD_DECL_NODE
2687 static int enter_record(astnode *n, void *arg, astnode **next)
2689 ordered_field_list **plist;
2690 astnode *c;
2691 astnode *field_id;
2692 astnode *field_width;
2693 int size;
2694 int offset;
2695 symtab_entry *se;
2696 symtab_entry *fe;
2697 astnode *record_id = astnode_get_child(n, 0); /* Child 0 is record identifier */
2698 assert(astnode_get_type(record_id) == IDENTIFIER_NODE);
2699 /* Enter in global symbol table */
2700 se = symtab_enter(record_id->ident, RECORD_SYMBOL, n, 0);
2701 if (se == NULL) {
2702 err(n->loc, "duplicate symbol `%s'", record_id->ident);
2704 else {
2705 /* Add all the record fields to record's own symbol table */
2706 se->symtab = symtab_create();
2707 offset = 8;
2708 plist = &se->struc.fields;
2709 for (c = record_id->next_sibling; c != NULL; c = c->next_sibling) {
2710 /* c has two children: field identifier and its width */
2711 field_id = LHS(c);
2712 field_width = astnode_clone(reduce_expression(RHS(c)), RHS(c)->loc);
2713 /* Validate the width -- must be positive integer literal */
2714 if (!astnode_is_type(field_width, INTEGER_NODE)) {
2715 err(c->loc, "record member `%s' is not of constant size", field_id->ident);
2716 continue;
2718 if ((field_width->integer <= 0) || (field_width->integer >= 8)) {
2719 err(c->loc, "width of record member `%s' is out of range (%d)", field_id->ident, field_width->integer);
2720 continue;
2722 /* Attempt to enter field in record's symbol table */
2723 fe = symtab_enter(field_id->ident, VAR_SYMBOL, c, 0);
2724 if (fe == NULL) {
2725 err(c->loc, "duplicate symbol `%s' in record `%s'", field_id->ident, record_id->ident);
2726 continue;
2728 /* Add to ordered list of fields */
2729 (*plist) = malloc(sizeof(ordered_field_list));
2730 (*plist)->entry = fe;
2731 (*plist)->next = NULL;
2732 plist = &((*plist)->next);
2733 /* Set field offset */
2734 offset = offset - field_width->integer;
2735 fe->field.offset = astnode_create_integer(offset, c->loc);
2736 /* Set field size (width) */
2737 fe->field.size = field_width;
2739 size = 8 - offset;
2740 if (size > 8) {
2741 err(n->loc, "size of record `%s' (%d) exceeds 8 bits", record_id->ident, size);
2742 } else {
2743 /* Set size of record (in bits) */
2744 se->struc.size = astnode_create_integer(size, n->loc);
2746 symtab_pop();
2748 /* Remove RECORD node from AST */
2749 // astnode_remove(n);
2750 // astnode_finalize(n);
2751 return 0;
2755 * Globalizes a local.
2756 * The node is morphed into its global equivalent (LABEL_NODE or IDENTIFIER_NODE).
2757 * @param n A node of type LOCAL_LABEL_NODE or LOCAL_ID_NODE
2758 * @param arg Pointer to namespace counter
2760 static int globalize_local(astnode *n, void *arg, astnode **next)
2762 char str[32];
2763 /* Make it global by appending namespace counter to the id */
2764 sprintf(str, "#%d", label_count);
2765 if (astnode_is_type(n, LOCAL_LABEL_NODE)) {
2766 /* Local label definition, use label field */
2767 n->label = realloc(n->label, strlen(n->label)+strlen(str)+1);
2768 strcat(n->label, str);
2769 /* This node is now a unique, global label */
2770 n->type = LABEL_NODE;
2771 /* Make sure it's unique */
2772 if (symtab_lookup(n->label)) {
2773 err(n->loc, "duplicate symbol `%s'", n->label);
2774 /* Remove from AST */
2775 astnode_remove(n);
2776 astnode_finalize(n);
2777 } else {
2778 /* Enter it in symbol table */
2779 symtab_enter(n->label, LABEL_SYMBOL, n, (in_dataseg ? DATA_FLAG : 0) );
2781 } else {
2782 /* Local label reference, use ident field */
2783 n->ident = realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2784 strcat(n->ident, str);
2785 /* This node is now a unique, global identifier */
2786 n->type = IDENTIFIER_NODE;
2788 return 1;
2792 * Tags symbols as extrn.
2793 * @param n A node of type EXTRN_NODE
2795 static int tag_extrn_symbols(astnode *n, void *arg, astnode **next)
2797 astnode *id;
2798 astnode *type;
2799 astnode *list;
2800 symtab_entry *e;
2801 /* Get symbol type specifier */
2802 type = astnode_get_child(n, 0);
2803 /* Go through the list of identifiers */
2804 list = astnode_get_child(n, 1);
2805 for (id=astnode_get_first_child(list); id != NULL; id=astnode_get_next_sibling(id) ) {
2806 /* Look up identifier in symbol table */
2807 e = symtab_lookup(id->ident);
2808 if (e != NULL) {
2809 if (!(e->flags & EXTRN_FLAG)) {
2810 /* Error, can't import a symbol that's defined locally! */
2811 // TODO: this is okay?
2812 err(n->loc, "`%s' declared as extrn but is defined locally", id->ident);
2815 else {
2816 // TODO: store external unit name
2817 switch (astnode_get_type(type)) {
2818 case DATATYPE_NODE:
2819 /* Put it in symbol table */
2820 symtab_enter(id->ident, VAR_SYMBOL, astnode_create_data(astnode_clone(type, n->loc), NULL, n->loc), EXTRN_FLAG);
2821 break;
2823 case INTEGER_NODE:
2824 /* type->integer is (LABEL|PROC)_SYMBOL */
2825 symtab_enter(id->ident, type->integer, NULL, EXTRN_FLAG);
2826 break;
2828 default:
2829 break;
2833 /* Remove extrn node from AST */
2834 astnode_remove(n);
2835 astnode_finalize(n);
2837 return 0;
2843 static int process_message(astnode *n, void *arg, astnode **next)
2845 astnode *mesg = reduce_expression_complete(LHS(n));
2846 if (astnode_is_type(mesg, STRING_NODE)) {
2847 printf("%s\n", mesg->string);
2849 else if (astnode_is_type(mesg, INTEGER_NODE)) {
2850 printf("%d\n", mesg->integer);
2852 else {
2853 err(mesg->loc, "string or integer argument expected");
2855 astnode_remove(n);
2856 astnode_finalize(n);
2857 return 0;
2863 static int process_warning(astnode *n, void *arg, astnode **next)
2865 astnode *mesg = reduce_expression_complete(LHS(n));
2866 if (astnode_is_type(mesg, STRING_NODE)) {
2867 warn(mesg->loc, mesg->string);
2869 else {
2870 err(mesg->loc, "string argument expected");
2872 astnode_remove(n);
2873 astnode_finalize(n);
2874 return 0;
2880 static int process_error(astnode *n, void *arg, astnode **next)
2882 astnode *mesg = reduce_expression_complete(LHS(n));
2883 if (astnode_is_type(mesg, STRING_NODE)) {
2884 err(mesg->loc, mesg->string);
2886 else {
2887 err(mesg->loc, "string argument expected");
2889 astnode_remove(n);
2890 astnode_finalize(n);
2891 return 0;
2895 * Processes a forward branch declaration.
2896 * @param n Node of type FORWARD_BRANCH_DECL_NODE
2897 * @param arg Not used
2899 static int process_forward_branch_decl(astnode *n, void *arg, astnode **next)
2901 astnode *l;
2902 int i;
2903 char str[32];
2904 /* Get branch info structure for label (+, ++, ...) */
2905 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2906 /* Morph n to globally unique label */
2907 sprintf(str, "#%d", fwd->counter);
2908 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2909 strcat(n->label, str);
2910 n->type = LABEL_NODE;
2911 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2912 /* Fix reference identifiers */
2913 for (i=0; i<fwd->index; i++) {
2914 l = fwd->refs[i];
2915 l->ident = (char *)realloc(l->ident, strlen(n->ident)+1);
2916 strcpy(l->ident, n->ident);
2918 /* Prepare for next declaration */
2919 fwd->index = 0;
2920 fwd->counter++;
2921 return 0;
2925 * Processes a backward branch declaration.
2926 * @param n Node of type BACKWARD_BRANCH_DECL_NODE
2927 * @param arg Not used
2929 static int process_backward_branch_decl(astnode *n, void *arg, astnode **next)
2931 char str[32];
2932 /* Get branch info */
2933 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2934 bwd->decl = n;
2935 /* Morph n to globally unique label */
2936 sprintf(str, "#%d", bwd->counter);
2937 n->label = (char *)realloc(n->ident, strlen(n->ident)+strlen(str)+1);
2938 strcat(n->label, str);
2939 n->type = LABEL_NODE;
2940 symtab_enter(n->label, LABEL_SYMBOL, n, 0);
2941 /* Prepare for next declaration */
2942 bwd->counter++;
2943 return 0;
2947 * Processes a forward branch label reference.
2948 * @param n Node of type FORWARD_BRANCH_NODE
2949 * @param arg Not used
2951 static int process_forward_branch(astnode *n, void *arg, astnode **next)
2953 /* Add n to proper forward_branch array */
2954 forward_branch_info *fwd = &forward_branch[strlen(n->ident)-1];
2955 fwd->refs[fwd->index++] = n;
2956 /* Change to identifier node */
2957 n->type = IDENTIFIER_NODE;
2958 return 0;
2962 * Processes a backward branch label reference.
2963 * @param n Node of type BACKWARD_BRANCH_NODE
2964 * @param arg Not used
2966 static int process_backward_branch(astnode *n, void *arg, astnode **next)
2968 /* Get branch info */
2969 backward_branch_info *bwd = &backward_branch[strlen(n->ident)-1];
2970 /* Make sure it's a valid reference */
2971 if (bwd->decl != NULL) {
2972 /* Fix n->ident */
2973 n->ident = (char *)realloc(n->ident, strlen(bwd->decl->ident)+1);
2974 strcpy(n->ident, bwd->decl->ident);
2976 /* Change to identifier node */
2977 n->type = IDENTIFIER_NODE;
2978 return 0;
2981 /*---------------------------------------------------------------------------*/
2983 static int is_field_ref(astnode *n)
2985 astnode *p = astnode_get_parent(n);
2986 /* Case 1: id.id */
2987 if (astnode_is_type(p, DOT_NODE)) return 1;
2988 /* Case 2: id.id[expr] */
2989 if (astnode_is_type(p, INDEX_NODE) && (n == LHS(p)) && astnode_is_type(astnode_get_parent(p), DOT_NODE) ) return 1;
2990 return 0;
2994 * Checks that the given identifier node is present in symbol table.
2995 * Issues error if it is not, and replaces with integer 0.
2996 * @param n A node of type IDENTIFIER_NODE
2998 static int validate_ref(astnode *n, void *arg, astnode **next)
3000 int i;
3001 symbol_ident_list list;
3002 symtab_entry *enum_def;
3003 if (is_field_ref(n)) {
3004 return 1; /* Validated by validate_dotref() */
3006 /* Look it up in symbol table */
3007 symtab_entry * e = symtab_lookup(n->ident);
3008 if (e == NULL) {
3009 /* This identifier is unknown */
3010 /* Maybe it is part of an enumeration */
3011 symtab_list_type(ENUM_SYMBOL, &list);
3012 for (i=0; i<list.size; i++) {
3013 enum_def = symtab_lookup(list.idents[i]);
3014 symtab_push(enum_def->symtab);
3015 e = symtab_lookup(n->ident);
3016 symtab_pop();
3017 if (e != NULL) {
3018 /* Found it */
3019 /* Replace id by SCOPE_NODE */
3020 astnode_replace(
3022 astnode_create_scope(
3023 astnode_create_identifier(enum_def->id, n->loc),
3024 astnode_clone(n, n->loc),
3025 n->loc
3028 astnode_finalize(n);
3029 break;
3032 symtab_list_finalize(&list);
3033 /* If still not found, error */
3034 if (e == NULL) {
3035 strtok(n->ident, "#"); /* Remove globalize junk */
3036 // err(n->loc, "unknown symbol `%s'", n->ident);
3037 /* Replace by integer 0 */
3038 //astnode_replace(n, astnode_create_integer(0, n->loc) );
3039 //astnode_finalize(n);
3040 warn(n->loc, "`%s' undeclared; assuming external label", n->ident);
3041 e = symtab_enter(n->ident, LABEL_SYMBOL, NULL, EXTRN_FLAG);
3044 assert(e);
3045 /* Increase reference count */
3046 e->ref_count++;
3047 return 1;
3051 * Validates top-level (not part of structure) indexed identifier.
3052 * @param n Node of type INDEX_NODE
3053 * @param arg Not used
3055 static int validate_index(astnode *n, void *arg, astnode **next)
3057 symtab_entry *e;
3058 astnode *id;
3059 astnode *type;
3060 if (is_field_ref(LHS(n))) {
3061 return 1; /* Validated by validate_dotref() */
3063 id = LHS(n);
3064 if (!astnode_is_type(id, IDENTIFIER_NODE)) {
3065 err(n->loc, "identifier expected");
3066 astnode_replace(n, astnode_create_integer(0, n->loc) );
3067 astnode_finalize(n);
3068 return 1;
3070 e = symtab_lookup(id->ident);
3071 if (e != NULL) {
3072 type = LHS(e->def);
3073 if (!astnode_is_type(type, DATATYPE_NODE)) {
3074 err(n->loc, "`%s' cannot be indexed", id->ident);
3075 astnode_replace(n, astnode_create_integer(0, n->loc) );
3076 astnode_finalize(n);
3077 } else {
3078 // TODO: bounds check
3079 reduce_index(n);
3081 } else {
3082 err(n->loc, "unknown symbol `%s'", id->ident);
3083 astnode_replace(n, astnode_create_integer(0, n->loc) );
3084 astnode_finalize(n);
3086 return 1;
3090 * Checks that A::B is valid.
3091 * If it's not valid it is replaced by integer 0.
3092 * @param n Node of type SCOPE_NODE
3094 static int validate_scoperef(astnode *n, void *arg, astnode **next)
3096 astnode *symbol;
3097 astnode *namespace = LHS(n);
3098 /* Look up namespace in global symbol table */
3099 symtab_entry * e = symtab_lookup(namespace->ident);
3100 if (e == NULL) {
3101 /* Error, this identifier is unknown */
3102 err(n->loc, "unknown namespace `%s'", namespace->ident);
3103 /* Replace by integer 0 */
3104 astnode_replace(n, astnode_create_integer(0, n->loc) );
3105 astnode_finalize(n);
3106 } else {
3107 /* Get symbol on right of :: operator */
3108 symbol = RHS(n);
3109 /* Namespace was found, check its type */
3110 switch (e->type) {
3111 case STRUC_SYMBOL:
3112 case UNION_SYMBOL:
3113 case RECORD_SYMBOL:
3114 case ENUM_SYMBOL:
3115 /* OK, check the symbol */
3116 symtab_push(e->symtab);
3117 e = symtab_lookup(symbol->ident);
3118 if (e == NULL) {
3119 /* Error, symbol is not in namespace */
3120 err(n->loc, "unknown symbol `%s' in namespace `%s'", symbol->ident, namespace->ident);
3121 /* Replace by integer 0 */
3122 astnode_replace(n, astnode_create_integer(0, n->loc) );
3123 astnode_finalize(n);
3125 symtab_pop();
3126 break;
3128 default:
3129 err(n->loc, "`%s' is not a namespace", namespace->ident);
3130 /* Replace by integer 0 */
3131 astnode_replace(n, astnode_create_integer(0, n->loc) );
3132 astnode_finalize(n);
3133 break;
3136 return 0;
3140 * Validates right part of dotted reference recursively.
3141 * Assumes that left part's symbol table is on stack.
3142 * @param n Node of type DOT_NODE
3144 static void validate_dotref_recursive(astnode *n, astnode *top)
3146 astnode *left;
3147 astnode *right;
3148 astnode *type;
3149 symtab_entry *field;
3150 symtab_entry *def;
3151 left = LHS(n);
3152 if (astnode_is_type(left, INDEX_NODE)) {
3153 left = LHS(left); /* Need identifier */
3155 right = RHS(n);
3156 if (astnode_is_type(right, DOT_NODE)) {
3157 right = LHS(right); /* Need identifier */
3159 if (astnode_is_type(right, INDEX_NODE)) {
3160 right = LHS(right); /* Need identifier */
3162 /* Lookup 'right' in 'left's symbol table */
3163 assert(astnode_get_type(right) == IDENTIFIER_NODE);
3164 field = symtab_lookup(right->ident);
3165 if (field == NULL) {
3166 /* Error, this symbol is unknown */
3167 err(n->loc, "`%s' is not a member of `%s'", right->ident, left->ident);
3168 /* Replace by integer 0 */
3169 astnode_replace(top, astnode_create_integer(0, top->loc) );
3170 astnode_finalize(top);
3171 } else {
3172 /* See if more subfields to process */
3173 n = RHS(n);
3174 if (astnode_is_type(n, DOT_NODE)) {
3175 /* Verify the variable's type -- should be user-defined */
3176 type = LHS(field->def);
3177 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3178 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3179 /* Replace by integer 0 */
3180 astnode_replace(top, astnode_create_integer(0, top->loc) );
3181 astnode_finalize(top);
3182 } else {
3183 /* Look up variable's type definition and verify it's a structure */
3184 def = symtab_global_lookup(LHS(type)->ident);
3185 if (def == NULL) {
3186 err(n->loc, "member '%s' of '%s' is of unknown type (`%s')", right->ident, left->ident, LHS(type)->ident);
3187 /* Replace by integer 0 */
3188 astnode_replace(top, astnode_create_integer(0, top->loc) );
3189 astnode_finalize(top);
3190 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3191 err(n->loc, "member `%s' of `%s' is not a structure", right->ident, left->ident);
3192 /* Replace by integer 0 */
3193 astnode_replace(top, astnode_create_integer(0, top->loc) );
3194 astnode_finalize(top);
3195 } else {
3196 /* Next field */
3197 symtab_push(def->symtab);
3198 validate_dotref_recursive(n, top);
3199 symtab_pop();
3207 * Validates A.B.C.D. . ...
3208 * Replaces the whole thing with integer 0 if not.
3209 * @param n Node of type DOT_NODE
3211 static int validate_dotref(astnode *n, void *arg, astnode **next)
3213 symtab_entry *father;
3214 symtab_entry *def;
3215 astnode *type;
3216 astnode *left;
3217 if (astnode_has_ancestor_of_type(n, DOT_NODE)) {
3218 return 1; /* Already validated, since this function is recursive */
3220 /* Look up parent in global symbol table */
3221 left = LHS(n); /* n := left . right */
3222 if (astnode_is_type(left, INDEX_NODE)) {
3223 left = LHS(left); /* Need identifier */
3225 father = symtab_lookup(left->ident);
3226 if (father == NULL) {
3227 /* Error, this symbol is unknown */
3228 err(n->loc, "unknown symbol `%s'", left->ident);
3229 /* Replace by integer 0 */
3230 astnode_replace(n, astnode_create_integer(0, n->loc) );
3231 astnode_finalize(n);
3232 } else {
3233 /* Increase reference count */
3234 father->ref_count++;
3235 /* Verify the variable's type -- should be user-defined */
3236 type = LHS(father->def);
3237 if ((type == NULL) || (type->datatype != USER_DATATYPE)) {
3238 err(n->loc, "`%s' is not a structure", left->ident);
3239 /* Replace by integer 0 */
3240 astnode_replace(n, astnode_create_integer(0, n->loc) );
3241 astnode_finalize(n);
3242 } else {
3243 /* Look up variable's type definition and verify it's a structure */
3244 def = symtab_lookup(LHS(type)->ident);
3245 if (def == NULL) {
3246 err(n->loc, "'%s' is of unknown type (`%s')", left->ident, LHS(type)->ident);
3247 /* Replace by integer 0 */
3248 astnode_replace(n, astnode_create_integer(0, n->loc) );
3249 astnode_finalize(n);
3250 } else if ( !((def->type == STRUC_SYMBOL) || (def->type == UNION_SYMBOL)) ) {
3251 err(n->loc, "`%s' is not a structure", left->ident);
3252 /* Replace by integer 0 */
3253 astnode_replace(n, astnode_create_integer(0, n->loc) );
3254 astnode_finalize(n);
3255 } else {
3256 /* Verify fields recursively */
3257 symtab_push(def->symtab);
3258 validate_dotref_recursive(n, n);
3259 symtab_pop();
3263 return 1;
3266 /*---------------------------------------------------------------------------*/
3269 * Evaluates expressions involved in conditional assembly, and removes the
3270 * appropriate branches from the AST.
3271 * Does some other stuff too, such as substitute equates and fold constants.
3273 void astproc_first_pass(astnode *root)
3275 /* Table of callback functions for our purpose. */
3276 static astnodeprocmap map[] = {
3277 { LABEL_NODE, enter_label },
3278 { VAR_DECL_NODE, enter_var },
3279 { PROC_NODE, enter_proc },
3280 { STRUC_DECL_NODE, enter_struc },
3281 { UNION_DECL_NODE, enter_union },
3282 { ENUM_DECL_NODE, enter_enum },
3283 { RECORD_DECL_NODE, enter_record },
3284 { LOCAL_LABEL_NODE, globalize_local },
3285 { LOCAL_ID_NODE, globalize_local },
3286 { MACRO_DECL_NODE, enter_macro },
3287 { MACRO_NODE, expand_macro },
3288 { REPT_NODE, process_rept },
3289 { WHILE_NODE, process_while },
3290 { DATASEG_NODE, process_dataseg },
3291 { CODESEG_NODE, process_codeseg },
3292 { ORG_NODE, process_org },
3293 { CHARMAP_NODE, load_charmap },
3294 { INSTRUCTION_NODE, process_instruction },
3295 { DATA_NODE, process_data },
3296 { STORAGE_NODE, process_storage },
3297 { EQU_NODE, process_equ },
3298 { ASSIGN_NODE, process_assign },
3299 { IFDEF_NODE, process_ifdef },
3300 { IFNDEF_NODE, process_ifndef },
3301 { IF_NODE, process_if },
3302 { EXTRN_NODE, tag_extrn_symbols },
3303 { MESSAGE_NODE, process_message },
3304 { WARNING_NODE, process_warning },
3305 { ERROR_NODE, process_error },
3306 { FORWARD_BRANCH_DECL_NODE, process_forward_branch_decl },
3307 { BACKWARD_BRANCH_DECL_NODE, process_backward_branch_decl },
3308 { FORWARD_BRANCH_NODE, process_forward_branch },
3309 { BACKWARD_BRANCH_NODE, process_backward_branch },
3310 { 0, NULL }
3312 reset_charmap();
3313 branch_init();
3314 in_dataseg = 0; /* codeseg is default */
3315 /* Do the walk. */
3316 astproc_walk(root, NULL, map);
3317 /* Remove all the volatile constants from the symbol table */
3318 /* These are the ones defined with the '=' operator, whose identifiers should
3319 all have been replaced by their value in the syntax tree now. Since
3320 they're not referenced anywhere we can safely dispose of them.
3321 The EQUates on the other hand should be kept, since they will
3322 possibly be exported. */
3323 #ifdef ENABLE_BUGGY_THING // ### FIXME
3325 int i;
3326 symbol_ident_list list;
3327 symtab_entry *e;
3328 symtab_list_type(CONSTANT_SYMBOL, &list);
3329 for (i = 0; i < list.size; ++i) {
3330 e = symtab_lookup(list.idents[i]);
3331 if (e->flags & VOLATILE_FLAG) {
3332 symtab_remove(list.idents[i]);
3335 symtab_list_finalize(&list);
3337 #endif
3340 /*---------------------------------------------------------------------------*/
3343 * Tags labels as public.
3344 * @param n A node of type PUBLIC_NODE
3346 static int tag_public_symbols(astnode *n, void *arg, astnode **next)
3348 astnode *id;
3349 symtab_entry *e;
3350 /* Go through the list of identifiers */
3351 for (id=astnode_get_first_child(n); id != NULL; id = astnode_get_next_sibling(id) ) {
3352 /* Look up identifier in symbol table */
3353 e = symtab_lookup(id->ident);
3354 if (e != NULL) {
3355 /* Symbol exists. Set the proper flag unless ambiguous. */
3356 if (e->flags & EXTRN_FLAG) {
3357 err(n->loc, "`%s' already declared extrn", id->ident);
3358 } else {
3359 switch (e->type) {
3360 case LABEL_SYMBOL:
3361 case CONSTANT_SYMBOL:
3362 case VAR_SYMBOL:
3363 case PROC_SYMBOL:
3364 /* GO! */
3365 e->flags |= PUBLIC_FLAG;
3366 break;
3368 default:
3369 err(n->loc, "`%s' is of non-exportable type", id->ident);
3370 break;
3373 } else {
3374 /* Warning, can't export a symbol that's not defined. */
3375 warn(n->loc, "`%s' declared as public but is not defined", id->ident);
3378 /* Remove PUBLIC_NODE from AST */
3379 astnode_remove(n);
3380 astnode_finalize(n);
3382 return 0;
3386 * Sets alignment for a set of (data) labels.
3387 * @param n A node of type ALIGN_NODE
3389 static int tag_align_symbols(astnode *n, void *arg, astnode **next)
3391 int pow;
3392 astnode *id;
3393 astnode *idents;
3394 astnode *expr;
3395 symtab_entry *e;
3396 /* Go through the list of identifiers */
3397 idents = LHS(n);
3398 for (id=astnode_get_first_child(idents); id != NULL; id = astnode_get_next_sibling(id) ) {
3399 /* Look up identifier in symbol table */
3400 e = symtab_lookup(id->ident);
3401 if (e != NULL) {
3402 /* Symbol exists. Set the proper flag unless ambiguous. */
3403 if (!(e->flags & DATA_FLAG)) {
3404 err(n->loc, "cannot align a code symbol (`%s')", id->ident);
3405 } else {
3406 switch (e->type) {
3407 case LABEL_SYMBOL:
3408 case VAR_SYMBOL:
3409 expr = reduce_expression(RHS(n));
3410 if (!astnode_is_type(expr, INTEGER_NODE)) {
3411 err(n->loc, "alignment expression must be an integer literal");
3412 } else if ((expr->integer < 0) || (expr->integer >= 0x10000)) {
3413 err(n->loc, "alignment expression out of range");
3414 } else if (expr->integer > 1) {
3415 pow = 0;
3416 switch (expr->integer) {
3417 case 32768: pow++;
3418 case 16384: pow++;
3419 case 8192: pow++;
3420 case 4096: pow++;
3421 case 2048: pow++;
3422 case 1024: pow++;
3423 case 512: pow++;
3424 case 256: pow++;
3425 case 128: pow++;
3426 case 64: pow++;
3427 case 32: pow++;
3428 case 16: pow++;
3429 case 8: pow++;
3430 case 4: pow++;
3431 case 2: pow++;
3432 /* GO! */
3433 e->flags |= ALIGN_FLAG;
3434 e->align = pow;
3435 break;
3437 default:
3438 err(n->loc, "alignment expression must be a power of 2");
3439 break;
3442 break;
3444 default:
3445 err(n->loc, "`%s' cannot be aligned", id->ident);
3446 break;
3450 else {
3451 /* Warning, can't align a symbol that's not defined. */
3452 warn(n->loc, "alignment ignored for undefined symbol `%s'", id->ident);
3455 /* Remove ALIGN_NODE from AST */
3456 astnode_remove(n);
3457 astnode_finalize(n);
3459 return 0;
3462 /*---------------------------------------------------------------------------*/
3465 * Removes unused labels from a syntax tree (and symbol table).
3466 * Unused labels are labels that are defined but not referenced anywhere.
3467 * This function assumes that the reference counts have already been calculated.
3469 void remove_unused_labels()
3471 int i;
3472 char *id;
3473 astnode *n;
3474 symbol_ident_list list;
3475 symtab_list_type(LABEL_SYMBOL, &list);
3476 for (i=0; i<list.size; i++) {
3477 /* Look up label in symbol table */
3478 id = list.idents[i];
3479 symtab_entry * e = symtab_lookup(id);
3480 /* If reference count is zero, AND label isn't declared public, remove it. */
3481 if ((e->ref_count == 0) && ((e->flags & PUBLIC_FLAG) == 0)) {
3482 n = e->def;
3483 strtok(n->label, "#"); /* Remove globalize junk */
3484 warn(n->loc, "`%s' defined but not used", n->label);
3485 /* Remove label from AST */
3486 astnode_remove(n);
3487 astnode_finalize(n);
3488 //symtab_remove(n->label); ### FIXME leads to crash sometimes...
3491 symtab_list_finalize(&list);
3495 * If the storage is of user-defined type, replaces it with
3496 * .DSB sizeof(type) * count
3498 static int reduce_user_storage(astnode *n, void *arg, astnode **next)
3500 astnode *type;
3501 astnode *count;
3502 astnode *byte_storage;
3503 symtab_entry *e;
3504 type = LHS(n);
3505 if (type->datatype == USER_DATATYPE) {
3506 /* Look it up */
3507 e = symtab_lookup(LHS(type)->ident);
3508 if (e != NULL) {
3509 /* Replace by DSB */
3510 count = RHS(n);
3511 byte_storage = astnode_create_storage(
3512 astnode_create_datatype(BYTE_DATATYPE, NULL, type->loc),
3513 astnode_create_arithmetic(
3514 MUL_OPERATOR,
3515 astnode_create_sizeof(
3516 astnode_create_identifier(LHS(type)->ident, n->loc),
3517 n->loc
3519 astnode_clone(count, n->loc),
3520 n->loc
3522 n->loc
3524 astnode_replace(n, byte_storage);
3525 astnode_finalize(n);
3526 } else {
3527 err(n->loc, "unknown symbol `%s'", LHS(type)->ident);
3528 /* Remove from AST */
3529 astnode_remove(n);
3530 astnode_finalize(n);
3531 return 0;
3534 return 1;
3538 * Second major pass over AST.
3540 void astproc_second_pass(astnode *root)
3542 /* Table of callback functions for our purpose. */
3543 static astnodeprocmap map[] = {
3544 { IDENTIFIER_NODE, validate_ref },
3545 { SCOPE_NODE, validate_scoperef },
3546 { DOT_NODE, validate_dotref },
3547 { INDEX_NODE, validate_index },
3548 { PUBLIC_NODE, tag_public_symbols },
3549 { STORAGE_NODE, reduce_user_storage },
3550 { ALIGN_NODE, tag_align_symbols },
3551 { STRUC_DECL_NODE, noop },
3552 { UNION_DECL_NODE, noop },
3553 { ENUM_DECL_NODE, noop },
3554 { RECORD_DECL_NODE, noop },
3555 { 0, NULL }
3557 in_dataseg = 0; /* codeseg is default */
3558 /* Do the walk. */
3559 astproc_walk(root, NULL, map);
3560 /* */
3561 remove_unused_labels();
3564 /*---------------------------------------------------------------------------*/
3567 * Translates a single instruction.
3568 * @param n A node of type INSTRUCTION_NODE
3570 static int translate_instruction(astnode *n, void *arg, astnode **next)
3572 unsigned char c;
3573 /* Put the operand in final form */
3574 astnode *o = reduce_expression_complete( LHS(n) );
3575 assert(o == LHS(n));
3576 /* Convert (mnemonic, addressing mode) pair to opcode */
3577 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3578 /* Test if opcode is invalid */
3579 if (n->instr.opcode == 0xFF) {
3580 /* Check for the special cases */
3581 if ((n->instr.mnemonic == STX_MNEMONIC) && (n->instr.mode == ABSOLUTE_Y_MODE)) {
3582 /* Doesn't have absolute version, "scale down" to zeropage */
3583 n->instr.mode = ZEROPAGE_Y_MODE;
3584 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3585 } else if ((n->instr.mnemonic == STY_MNEMONIC) && (n->instr.mode == ABSOLUTE_X_MODE)) {
3586 /* Doesn't have absolute version, "scale down" to zeropage */
3587 n->instr.mode = ZEROPAGE_X_MODE;
3588 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3589 } else if (n->instr.mode == ABSOLUTE_MODE) {
3590 /* Check for relative addressing (these are parsed as absolute mode) */
3591 switch (n->instr.mnemonic) {
3592 case BCC_MNEMONIC:
3593 case BCS_MNEMONIC:
3594 case BEQ_MNEMONIC:
3595 case BMI_MNEMONIC:
3596 case BNE_MNEMONIC:
3597 case BPL_MNEMONIC:
3598 case BVC_MNEMONIC:
3599 case BVS_MNEMONIC:
3600 /* Fix addressing mode and opcode */
3601 n->instr.mode = RELATIVE_MODE;
3602 n->instr.opcode = opcode_get(n->instr.mnemonic, n->instr.mode);
3603 break;
3607 if (n->instr.opcode != 0xFF) {
3608 /* If the operand is a constant, see if we can "reduce" from
3609 absolute mode to zeropage mode */
3610 if ((astnode_is_type(o, INTEGER_NODE)) &&
3611 ((unsigned long)o->integer < 256) &&
3612 ((c = opcode_zp_equiv(n->instr.opcode)) != 0xFF)) {
3613 /* Switch to the zeromode version */
3614 n->instr.opcode = c;
3615 switch (n->instr.mode) {
3616 case ABSOLUTE_MODE: n->instr.mode = ZEROPAGE_MODE; break;
3617 case ABSOLUTE_X_MODE: n->instr.mode = ZEROPAGE_X_MODE;break;
3618 case ABSOLUTE_Y_MODE: n->instr.mode = ZEROPAGE_Y_MODE;break;
3619 default: /* Impossible to get here, right? */ break;
3622 /* If the operand is a constant, make sure it fits */
3623 if (astnode_is_type(o, INTEGER_NODE)) {
3624 switch (n->instr.mode) {
3625 case IMMEDIATE_MODE:
3626 case ZEROPAGE_MODE:
3627 case ZEROPAGE_X_MODE:
3628 case ZEROPAGE_Y_MODE:
3629 case PREINDEXED_INDIRECT_MODE:
3630 case POSTINDEXED_INDIRECT_MODE:
3631 /* Operand must fit in 8 bits */
3632 if (!IS_BYTE_VALUE(o->integer)) {
3633 warn(o->loc, "operand out of range; truncated");
3634 o->integer &= 0xFF;
3636 break;
3638 case ABSOLUTE_MODE:
3639 case ABSOLUTE_X_MODE:
3640 case ABSOLUTE_Y_MODE:
3641 case INDIRECT_MODE:
3642 /* Operand must fit in 8 bits */
3643 if ((unsigned long)o->integer >= 0x10000) {
3644 warn(o->loc, "operand out of range; truncated");
3645 o->integer &= 0xFFFF;
3647 break;
3649 case RELATIVE_MODE:
3650 /* Constant isn't allowed here is it? */
3651 break;
3653 default:
3654 break;
3657 else if (astnode_is_type(o, STRING_NODE)) {
3658 /* String operand doesn't make sense here */
3659 err(n->loc, "invalid operand");
3661 } else {
3662 /* opcode_get() returned 0xFF */
3663 err(n->loc, "invalid addressing mode");
3665 return 0;
3669 * ### Is this really such a good idea?
3671 static int maybe_merge_data(astnode *n, void *arg, astnode **next)
3673 astnode *temp;
3674 astnode *type;
3675 type = LHS(n);
3676 /* Only merge if no debugging, otherwise line information is lost. */
3677 if (!xasm_args.debug && astnode_is_type(*next, DATA_NODE) &&
3678 astnode_equal(type, LHS(*next)) ) {
3679 /* Merge ahead */
3680 temp = *next;
3681 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3682 astnode_add_child(n, astnode_remove_children(temp) );
3683 astnode_finalize(temp);
3684 *next = n;
3685 } else {
3686 /* Reduce expressions to final form */
3687 for (n = n->first_child; n != NULL; n = temp->next_sibling) {
3688 temp = reduce_expression_complete(n);
3689 if (astnode_is_type(temp, INTEGER_NODE)) {
3690 /* Check that value fits according to datatype */
3691 switch (type->datatype) {
3692 case BYTE_DATATYPE:
3693 if (!IS_BYTE_VALUE(temp->integer)) {
3694 warn(temp->loc, "operand out of range; truncated");
3695 temp->integer &= 0xFF;
3697 break;
3699 case WORD_DATATYPE:
3700 if (!IS_WORD_VALUE(temp->integer)) {
3701 warn(temp->loc, "operand out of range; truncated");
3702 temp->integer &= 0xFFFF;
3704 break;
3706 case DWORD_DATATYPE:
3707 break;
3709 default:
3710 break;
3715 return 0;
3721 static int maybe_merge_storage(astnode *n, void *arg, astnode **next)
3723 astnode *temp;
3724 astnode *new_count;
3725 astnode *old_count;
3726 if (astnode_is_type(*next, STORAGE_NODE) &&
3727 astnode_equal(LHS(n), LHS(*next)) ) {
3728 /* Merge ahead */
3729 temp = *next;
3730 astnode_finalize( astnode_remove_child_at(temp, 0) ); /* Remove datatype node */
3731 old_count = RHS(n);
3732 /* Calculate new count */
3733 new_count = astnode_create_arithmetic(
3734 PLUS_OPERATOR,
3735 astnode_remove_child_at(temp, 0),
3736 astnode_clone(old_count, n->loc),
3737 n->loc
3739 new_count = reduce_expression_complete(new_count);
3740 astnode_replace(old_count, new_count);
3741 astnode_finalize(old_count);
3742 astnode_finalize(temp);
3743 *next = n;
3744 } else {
3745 reduce_expression_complete(RHS(n));
3747 return 0;
3751 * Replaces .proc by its label followed by statements.
3753 static int flatten_proc(astnode *n, void *arg, astnode **next)
3755 astnode *id = LHS(n);
3756 astnode *list = RHS(n);
3757 astnode_remove(id);
3758 id->type = LABEL_NODE;
3759 astnode_insert_child(list, id, 0);
3760 astnode *stmts = astnode_remove_children(list);
3761 astnode_replace(n, stmts);
3762 astnode_finalize(n);
3763 *next = stmts;
3764 return 0;
3770 static int flatten_var_decl(astnode *n, void *arg, astnode **next)
3772 astnode *stmts = LHS(n);
3773 astnode_remove_children(n);
3774 stmts->type = LABEL_NODE;
3775 astnode_replace(n, stmts);
3776 astnode_finalize(n);
3777 *next = stmts;
3778 return 0;
3782 * Third and final pass (if the output isn't pure 6502).
3783 * Translates instructions, merges data and storage nodes,
3784 * and reduces their operands to final form on the way.
3786 void astproc_third_pass(astnode *root)
3788 /* Table of callback functions for our purpose. */
3789 static astnodeprocmap map[] = {
3790 { INSTRUCTION_NODE, translate_instruction },
3791 { DATA_NODE, maybe_merge_data },
3792 { STORAGE_NODE, maybe_merge_storage },
3793 { VAR_DECL_NODE, flatten_var_decl },
3794 { PROC_NODE, flatten_proc },
3795 { STRUC_DECL_NODE, noop },
3796 { UNION_DECL_NODE, noop },
3797 { ENUM_DECL_NODE, noop },
3798 { RECORD_DECL_NODE, noop },
3799 { 0, NULL }
3801 in_dataseg = 0; /* codeseg is default */
3802 /* Do the walk. */
3803 astproc_walk(root, NULL, map);
3806 /*---------------------------------------------------------------------------*/
3809 * Evaluates the given expression, _without_ replacing it in the AST
3810 * (unlike astproc_reduce_expression() and friends).
3812 static astnode *eval_expression(astnode *expr)
3814 switch (astnode_get_type(expr)) {
3816 case ARITHMETIC_NODE: {
3817 astnode *lhs = eval_expression(LHS(expr));
3818 astnode *rhs = eval_expression(RHS(expr));
3819 switch (expr->oper) {
3820 /* Binary ops */
3821 case PLUS_OPERATOR:
3822 case MINUS_OPERATOR:
3823 case MUL_OPERATOR:
3824 case DIV_OPERATOR:
3825 case MOD_OPERATOR:
3826 case AND_OPERATOR:
3827 case OR_OPERATOR:
3828 case XOR_OPERATOR:
3829 case SHL_OPERATOR:
3830 case SHR_OPERATOR:
3831 case LT_OPERATOR:
3832 case GT_OPERATOR:
3833 case EQ_OPERATOR:
3834 case NE_OPERATOR:
3835 case LE_OPERATOR:
3836 case GE_OPERATOR:
3837 if (astnode_is_type(lhs, INTEGER_NODE)
3838 && astnode_is_type(rhs, INTEGER_NODE)) {
3839 /* Both sides are integer literals. */
3840 switch (expr->oper) {
3841 case PLUS_OPERATOR: return astnode_create_integer(lhs->integer + rhs->integer, expr->loc);
3842 case MINUS_OPERATOR: return astnode_create_integer(lhs->integer - rhs->integer, expr->loc);
3843 case MUL_OPERATOR: return astnode_create_integer(lhs->integer * rhs->integer, expr->loc);
3844 case DIV_OPERATOR: return astnode_create_integer(lhs->integer / rhs->integer, expr->loc);
3845 case MOD_OPERATOR: return astnode_create_integer(lhs->integer % rhs->integer, expr->loc);
3846 case AND_OPERATOR: return astnode_create_integer(lhs->integer & rhs->integer, expr->loc);
3847 case OR_OPERATOR: return astnode_create_integer(lhs->integer | rhs->integer, expr->loc);
3848 case XOR_OPERATOR: return astnode_create_integer(lhs->integer ^ rhs->integer, expr->loc);
3849 case SHL_OPERATOR: return astnode_create_integer(lhs->integer << rhs->integer, expr->loc);
3850 case SHR_OPERATOR: return astnode_create_integer(lhs->integer >> rhs->integer, expr->loc);
3851 case LT_OPERATOR: return astnode_create_integer(lhs->integer < rhs->integer, expr->loc);
3852 case GT_OPERATOR: return astnode_create_integer(lhs->integer > rhs->integer, expr->loc);
3853 case EQ_OPERATOR: return astnode_create_integer(lhs->integer == rhs->integer, expr->loc);
3854 case NE_OPERATOR: return astnode_create_integer(lhs->integer != rhs->integer, expr->loc);
3855 case LE_OPERATOR: return astnode_create_integer(lhs->integer <= rhs->integer, expr->loc);
3856 case GE_OPERATOR: return astnode_create_integer(lhs->integer >= rhs->integer, expr->loc);
3858 default: /* ### Error, actually */
3859 break;
3862 /* Use some mathematical identities... */
3863 else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 0))
3864 && (expr->oper == PLUS_OPERATOR)) {
3865 /* 0+expr == expr */
3866 return astnode_clone(rhs, rhs->loc);
3867 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 0))
3868 && (expr->oper == PLUS_OPERATOR)) {
3869 /* expr+0 == expr */
3870 return astnode_clone(lhs, lhs->loc);
3871 } else if ((astnode_is_type(lhs, INTEGER_NODE) && (lhs->integer == 1))
3872 && (expr->oper == MUL_OPERATOR)) {
3873 /* 1*expr == expr */
3874 return astnode_clone(rhs, rhs->loc);
3875 } else if ((astnode_is_type(rhs, INTEGER_NODE) && (rhs->integer == 1))
3876 && ((expr->oper == MUL_OPERATOR) || (expr->oper == DIV_OPERATOR)) ) {
3877 /* expr*1 == expr */
3878 /* expr/1 == expr */
3879 return astnode_clone(lhs, lhs->loc);
3881 break;
3883 /* Unary ops */
3884 case NEG_OPERATOR:
3885 case NOT_OPERATOR:
3886 case LO_OPERATOR:
3887 case HI_OPERATOR:
3888 case UMINUS_OPERATOR:
3889 case BANK_OPERATOR:
3890 if (astnode_is_type(lhs, INTEGER_NODE)) {
3891 switch (expr->oper) {
3892 case NEG_OPERATOR: return astnode_create_integer(~lhs->integer, expr->loc);
3893 case NOT_OPERATOR: return astnode_create_integer(!lhs->integer, expr->loc);
3894 case LO_OPERATOR: return astnode_create_integer(lhs->integer & 0xFF, expr->loc);
3895 case HI_OPERATOR: return astnode_create_integer((lhs->integer >> 8) & 0xFF, expr->loc);
3896 case UMINUS_OPERATOR: return astnode_create_integer(-lhs->integer, expr->loc);
3897 default: break;
3900 break;
3901 } /* switch */
3902 } break;
3904 case INTEGER_NODE:
3905 return astnode_clone(expr, expr->loc);
3907 case IDENTIFIER_NODE: {
3908 symtab_entry *e = symtab_lookup(expr->ident);
3909 // ### assert(e->type == LABEL_SYMBOL);
3910 if (e->flags & ADDR_FLAG)
3911 return astnode_create_integer(e->address, expr->loc);
3912 } break;
3914 case CURRENT_PC_NODE:
3915 return astnode_create_integer(in_dataseg ? dataseg_pc : codeseg_pc, expr->loc);
3917 default:
3918 break;
3919 } /* switch */
3920 return 0;
3924 * Sets the address of the label to be the currently calculated PC.
3926 static int set_label_address(astnode *label, void *arg, astnode **next)
3928 symtab_entry *e = symtab_lookup(label->ident);
3929 // ### assert(e && (e->type == LABEL_SYMBOL));
3930 e->address = in_dataseg ? dataseg_pc : codeseg_pc;
3931 e->flags |= ADDR_FLAG;
3932 return 0;
3936 * Sets the current PC to the address specified by the ORG node.
3938 static int set_pc_from_org(astnode *org, void *arg, astnode **next)
3940 astnode *addr = LHS(org);
3941 assert(astnode_is_type(addr, INTEGER_NODE));
3942 if (in_dataseg)
3943 dataseg_pc = addr->integer;
3944 else
3945 codeseg_pc = addr->integer;
3946 return 0;
3950 * Ensures that the given symbol is defined.
3952 static int ensure_symbol_is_defined(astnode *id, void *arg, astnode **next)
3954 symtab_entry *e = symtab_lookup(id->ident);
3955 assert(e);
3956 if ((e->flags & EXTRN_FLAG) && !(e->flags & ERROR_UNDEFINED_FLAG)) {
3957 err(id->loc, "cannot generate pure binary because `%s' is not defined", id->ident);
3958 e->flags |= ERROR_UNDEFINED_FLAG;
3960 return 0;
3964 * Increments PC according to the size of the instruction.
3966 static int inc_pc_by_instruction(astnode *instr, void *arg, astnode **next)
3968 assert(!in_dataseg);
3969 if (LHS(instr)) {
3970 /* Has operand */
3971 unsigned char zp_op = opcode_zp_equiv(instr->instr.opcode);
3972 if (zp_op != 0xFF) {
3973 /* See if we can optimize this to a ZP-instruction */
3974 astnode *operand = eval_expression(LHS(instr));
3975 if (operand && astnode_is_type(operand, INTEGER_NODE)) {
3976 if ((operand->integer >= 0) && (operand->integer < 256)) {
3977 instr->instr.opcode = zp_op;
3979 astnode_finalize(operand);
3983 codeseg_pc += opcode_length(instr->instr.opcode);
3984 return 1;
3988 * Increments PC according to the size of the defined data.
3990 static int inc_pc_by_data(astnode *data, void *arg, astnode **next)
3992 astnode *type = LHS(data);
3993 int count = astnode_get_child_count(data) - 1;
3994 int nbytes;
3995 assert(!in_dataseg);
3996 switch (type->datatype) {
3997 case BYTE_DATATYPE: nbytes = count; break;
3998 case WORD_DATATYPE: nbytes = count * 2; break;
3999 case DWORD_DATATYPE: nbytes = count * 4; break;
4000 default:
4001 assert(0);
4002 break;
4004 codeseg_pc += nbytes;
4005 return 0;
4009 * Increments PC according to the size of the included binary.
4011 static int inc_pc_by_binary(astnode *node, void *arg, astnode **next)
4013 assert(!in_dataseg);
4014 codeseg_pc += node->binary.size;
4015 return 0;
4019 * Increments PC according to the size of the storage.
4021 static int inc_pc_by_storage(astnode *storage, void *arg, astnode **next)
4023 astnode *type = LHS(storage);
4024 assert(type->datatype == BYTE_DATATYPE);
4025 astnode *count = eval_expression(RHS(storage));
4026 if (count) {
4027 if (astnode_get_type(count) == INTEGER_NODE) {
4028 if (in_dataseg)
4029 dataseg_pc += count->integer;
4030 else
4031 codeseg_pc += count->integer;
4033 astnode_finalize(count);
4035 return 1;
4039 * This pass is only performed if the output format is pure 6502.
4040 * It ensures that it is actually possible to generate pure 6502
4041 * for this syntax tree (i.e. no external symbols).
4042 * Furthermore, it calculates the address of all labels, so that
4043 * everything is ready for the final output phase.
4045 void astproc_fourth_pass(astnode *root)
4047 int x;
4048 /* ### Should loop while there's a change in the address of
4049 one or more labels */
4050 for (x = 0; x < 2; ++x) {
4051 in_dataseg = 0; /* codeseg is default */
4052 dataseg_pc = 0;
4053 codeseg_pc = 0;
4054 /* Table of callback functions for our purpose. */
4055 static astnodeprocmap map[] = {
4056 { DATASEG_NODE, process_dataseg },
4057 { CODESEG_NODE, process_codeseg },
4058 { ORG_NODE, set_pc_from_org },
4059 { LABEL_NODE, set_label_address },
4060 { IDENTIFIER_NODE, ensure_symbol_is_defined },
4061 { INSTRUCTION_NODE, inc_pc_by_instruction },
4062 { DATA_NODE, inc_pc_by_data },
4063 { STORAGE_NODE, inc_pc_by_storage },
4064 { BINARY_NODE, inc_pc_by_binary },
4065 { STRUC_DECL_NODE, noop },
4066 { UNION_DECL_NODE, noop },
4067 { ENUM_DECL_NODE, noop },
4068 { RECORD_DECL_NODE, noop },
4069 { 0, NULL }
4071 /* Do the walk. */
4072 astproc_walk(root, NULL, map);
4076 /*---------------------------------------------------------------------------*/
4079 * Writes an instruction.
4081 static int write_instruction(astnode *instr, void *arg, astnode **next)
4083 FILE *fp = (FILE *)arg;
4084 unsigned char op = instr->instr.opcode;
4085 int len = opcode_length(op);
4086 fputc(op, fp);
4087 if (len > 1) {
4088 /* Write operand */
4089 astnode *operand = eval_expression(LHS(instr));
4090 if(!astnode_is_type(operand, INTEGER_NODE)) {
4091 /* ### This is rather fatal, it should be a literal by this point */
4092 err(instr->loc, "operand does not evaluate to literal");
4093 } else {
4094 int value = operand->integer;
4095 if (len == 2) {
4096 /* Check if it's a relative jump */
4097 switch (op) {
4098 case 0x10:
4099 case 0x30:
4100 case 0x50:
4101 case 0x70:
4102 case 0x90:
4103 case 0xB0:
4104 case 0xD0:
4105 case 0xF0:
4106 /* Calculate difference between target and address of next instruction */
4107 value = value - (codeseg_pc + 2);
4108 if (!IS_BYTE_VALUE(value)) {
4109 err(operand->loc, "branch out of range");
4110 value &= 0xFF;
4112 break;
4114 default:
4115 if (!IS_BYTE_VALUE(value)) {
4116 warn(operand->loc, "operand out of range; truncated");
4117 value &= 0xFF;
4119 break;
4121 fputc((unsigned char)value, fp);
4122 } else {
4123 assert(len == 3);
4124 if (!IS_WORD_VALUE(value)) {
4125 warn(operand->loc, "operand out of range; truncated");
4126 value &= 0xFFFF;
4128 fputc((unsigned char)value, fp);
4129 fputc((unsigned char)(value >> 8), fp);
4132 astnode_finalize(operand);
4134 return 0;
4138 * Writes data.
4140 static int write_data(astnode *data, void *arg, astnode **next)
4142 FILE *fp = (FILE *)arg;
4143 astnode *type = LHS(data);
4144 astnode *expr;
4145 assert(!in_dataseg);
4146 for (expr = RHS(data); expr != NULL; expr = astnode_get_next_sibling(expr) ) {
4147 int value;
4148 astnode *e = eval_expression(expr);
4149 assert(e->type == INTEGER_NODE);
4150 value = e->integer;
4151 switch (type->datatype) {
4152 case BYTE_DATATYPE:
4153 if (!IS_BYTE_VALUE(value)) {
4154 warn(expr->loc, "operand out of range; truncated");
4155 value &= 0xFF;
4157 fputc((unsigned char)value, fp);
4158 codeseg_pc += 1;
4159 break;
4161 case WORD_DATATYPE:
4162 if (!IS_WORD_VALUE(value)) {
4163 warn(expr->loc, "operand out of range; truncated");
4164 value &= 0xFFFF;
4166 fputc((unsigned char)value, fp);
4167 fputc((unsigned char)(value >> 8), fp);
4168 codeseg_pc += 2;
4169 break;
4171 case DWORD_DATATYPE:
4172 fputc((unsigned char)value, fp);
4173 fputc((unsigned char)(value >> 8), fp);
4174 fputc((unsigned char)(value >> 16), fp);
4175 fputc((unsigned char)(value >> 24), fp);
4176 codeseg_pc += 4;
4177 break;
4179 default:
4180 assert(0);
4181 break;
4183 astnode_finalize(e);
4185 return 0;
4189 * Writes binary.
4191 static int write_binary(astnode *node, void *arg, astnode **next)
4193 FILE *fp = (FILE *)arg;
4194 fwrite(node->binary.data, 1, node->binary.size, fp);
4195 return 0;
4199 * This pass is only performed if the output format is pure 6502.
4200 * It writes the binary code.
4202 void astproc_fifth_pass(astnode *root)
4204 FILE *fp = fopen(xasm_args.output_file, "wb");
4205 if (!fp) {
4206 fprintf(stderr, "could not open '%s' for writing\n", xasm_args.output_file);
4207 ++err_count;
4208 return;
4210 /* Table of callback functions for our purpose. */
4211 static astnodeprocmap map[] = {
4212 { DATASEG_NODE, process_dataseg },
4213 { CODESEG_NODE, process_codeseg },
4214 { ORG_NODE, set_pc_from_org },
4215 { INSTRUCTION_NODE, write_instruction },
4216 { DATA_NODE, write_data },
4217 { STORAGE_NODE, inc_pc_by_storage },
4218 { BINARY_NODE, write_binary },
4219 { STRUC_DECL_NODE, noop },
4220 { UNION_DECL_NODE, noop },
4221 { ENUM_DECL_NODE, noop },
4222 { RECORD_DECL_NODE, noop },
4223 { 0, NULL }
4225 in_dataseg = 0; /* codeseg is default */
4226 dataseg_pc = 0;
4227 codeseg_pc = 0;
4228 /* Do the walk. */
4229 astproc_walk(root, fp, map);
4230 fclose(fp);