2 * $Id: codegen.c,v 1.10 2007/08/12 18:58:12 khansen Exp $
4 * Revision 1.10 2007/08/12 18:58:12 khansen
5 * ability to generate pure 6502 binary (--pure-binary switch)
7 * Revision 1.9 2007/08/09 20:33:48 khansen
10 * Revision 1.8 2007/08/07 22:42:17 khansen
11 * make sure a CMD_FILE command is output at the start of each segment
13 * Revision 1.7 2007/08/07 21:23:24 khansen
16 * Revision 1.6 2007/07/22 13:33:26 khansen
17 * convert tabs to whitespaces
19 * Revision 1.5 2004/12/19 19:58:37 kenth
22 * Revision 1.4 2004/12/18 16:58:52 kenth
23 * outputs file and line info if --debug
24 * CMD_DSW, CMD_DSD gone
26 * Revision 1.3 2004/12/14 01:49:14 kenth
29 * Revision 1.2 2004/12/06 04:52:36 kenth
32 * Revision 1.1 2004/06/30 07:55:38 kenth
38 * (C) 2004 Kent Hansen
40 * The XORcyst is free software; you can redistribute it and/or modify
41 * it under the terms of the GNU General Public License as published by
42 * the Free Software Foundation; either version 2 of the License, or
43 * (at your option) any later version.
45 * The XORcyst is distributed in the hope that it will be useful,
46 * but WITHOUT ANY WARRANTY; without even the implied warranty of
47 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48 * GNU General Public License for more details.
50 * You should have received a copy of the GNU General Public License
51 * along with The XORcyst; if not, write to the Free Software
52 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
56 * This file contains functions for generating object code from an abstract
58 * The output of the assembler is not pure 6502 binary code. It is a file in
59 * a custom object format meant to be fed to the XORcyst linker. The object
60 * file contains information about the symbols it exports (that others may
61 * access), the symbols it imports (the linker will look for these in other
62 * units and resolve their values), and of course the 6502 instructions.
63 * Labels, data and instructions are encoded in a special bytecode language.
64 * All operands to instructions and directives which aren't constants are
65 * stored as expressions. (It is the linker's job to parse the expressions,
66 * compute their values and produce the actual 6502 binary.)
68 * The format of resulting file is:
69 * 1 word: magic number
71 * 1 word: number of exported constants
72 * ? bytes: exported constants descriptors
73 * 1 byte: number of units explicitly imported from
74 * ? bytes: names of units explicitly imported from
75 * 1 word: number of imported symbols (externals)
76 * ? bytes: imported symbols descriptors
77 * 24-bit word: size of data segment
78 * ? bytes: data segment bytecodes
79 * 24-bit word: size of code segment
80 * ? bytes: code segment bytecodes
81 * 1 word: number of expressions
82 * ? bytes: expression descriptors
95 /*---------------------------------------------------------------------------*/
97 /* Convenient macros to write integers in big-endian form. */
98 #define put_1(f, b) { fputc((unsigned char)(b), f); }
99 #define put_2(f, w) { put_1(f, (w) >> 8); put_1(f, w); }
100 #define put_3(f, t) { put_2(f, (t) >> 8); put_1(f, t); }
101 #define put_4(f, q) { put_2(f, (q) >> 16); put_2(f, q); }
103 /* Convenient macros to write length-prepended strings. */
104 #define put_str_8(f, s) { put_1(f, strlen(s)-1); fprintf(f, "%s", s); }
105 #define put_str_16(f, s) { put_2(f, strlen(s)-1); fprintf(f, "%s", s); }
106 #define put_str(f, s) { if (strlen(s) <= 0x100) { put_str_8(f, s); } else { put_str_16(f, s); } }
109 * Writes a sequence of bytes to file.
110 * @param f File handle
111 * @param bytes Array of bytes
112 * @param count Number of bytes
114 static void put_bytes(FILE *f
, const unsigned char *bytes
, int count
)
117 /* Write the bytes */
118 for (i
=0; i
<count
; i
++) {
123 /*---------------------------------------------------------------------------*/
125 #define BUF_MAX_SIZE 0x10000
126 /** Buffer used to collect binary data generated from processing consecutive code statements. */
127 static unsigned char buf
[BUF_MAX_SIZE
];
128 /** Current position in buffer */
129 static int buf_pos
= 0;
131 static FILE *buf_file
= NULL
;
133 #define buf_reset() { buf_pos = 0; }
136 * Flushes contents of <code>buf</code> to file.
137 * @param f File handle
139 static void buf_flush(FILE *f
)
142 if (buf_pos
<= 0x100) {
143 /* Use 8-bit form. */
145 put_1(f
, buf_pos
- 1);
146 put_bytes(f
, buf
, buf_pos
);
148 else if (buf_pos
<= 0x10000) {
149 /* Use 16-bit form. */
151 put_2(f
, buf_pos
- 1);
152 put_bytes(f
, buf
, buf_pos
);
155 /* Error, buffer overflow. */
156 fprintf(stderr
, "buf_flush: buffer overflow\n");
162 /** Appends single byte to buffer. */
163 #define buf_append(b) { if (buf_pos >= BUF_MAX_SIZE) buf_flush(buf_file); buf[buf_pos++] = (unsigned char)(b); }
165 /** Appends n bytes to buffer. */
166 static void buf_append_n(unsigned char *data
, int size
)
169 for (i
=0; i
<size
; i
++) {
174 /*---------------------------------------------------------------------------*/
177 * Converts an arithmetic operator (from ARITHMETIC_NODE) into a byte value.
178 * The bytecode language uses a different operator encoding which is why
179 * we need this function.
180 * @param op Operator to convert
181 * @return Bytecode-equivalent operator
183 static unsigned char operator_to_bytecode(arithmetic_operator op
)
186 case PLUS_OPERATOR
: return OP_PLUS
;
187 case MINUS_OPERATOR
: return OP_MINUS
;
188 case MUL_OPERATOR
: return OP_MUL
;
189 case DIV_OPERATOR
: return OP_DIV
;
190 case MOD_OPERATOR
: return OP_MOD
;
191 case AND_OPERATOR
: return OP_AND
;
192 case OR_OPERATOR
: return OP_OR
;
193 case XOR_OPERATOR
: return OP_XOR
;
194 case SHL_OPERATOR
: return OP_SHL
;
195 case SHR_OPERATOR
: return OP_SHR
;
196 case LT_OPERATOR
: return OP_LT
;
197 case GT_OPERATOR
: return OP_GT
;
198 case EQ_OPERATOR
: return OP_EQ
;
199 case NE_OPERATOR
: return OP_NE
;
200 case LE_OPERATOR
: return OP_LE
;
201 case GE_OPERATOR
: return OP_GE
;
202 case NEG_OPERATOR
: return OP_NEG
;
203 case NOT_OPERATOR
: return OP_NOT
;
204 case LO_OPERATOR
: return OP_LO
;
205 case HI_OPERATOR
: return OP_HI
;
206 case UMINUS_OPERATOR
: return OP_UMINUS
;
207 case BANK_OPERATOR
: return OP_BANK
;
210 fprintf(stderr
, "operator_to_bytecode(): bad operator\n");
215 /*---------------------------------------------------------------------------*/
217 /* Maximum number of expressions that the code generator can handle. */
218 #define MAX_EXPRS 16384
220 /* List of expressions involved in code statements. */
221 static astnode
*expr_list
[MAX_EXPRS
];
222 /* Number of expressions generated so far. */
223 static int expr_count
= 0;
226 * Registers an expression with the code generator.
227 * Unique 16-bit ID of the expression is returned, which may then be used to refer
228 * to the expression in bytecodes.
229 * @param expr Expression to register
230 * @return Unique ID of expression
232 static int register_expression(astnode
*expr
)
235 /* See if an equivalent expression already is registered */
236 for (i
=0; i
<expr_count
; i
++) {
237 if (astnode_equal(expr
, expr_list
[i
])) {
238 /* Return ID of equivalent expression. */
242 /* This is a new expression, add it to end of list if possible. */
243 if (expr_count
== MAX_EXPRS
) {
244 fprintf(stderr
, "register_expression(): buffer overflow\n");
248 expr_list
[expr_count
++] = expr
;
254 * Serializes an expression recursively.
255 * The result of serializing a leaf node is always (type, value).
256 * The result of serializing an operator node is always (operator, lhs, rhs).
257 * @param fp File handle
258 * @param expr Expression to serialize
260 static void put_expr_recursive(FILE *fp
, const astnode
*expr
)
265 if (expr
== NULL
) { return; }
266 // printf("put_expr_recursive(%s)\n", astnode_type_to_string(expr->type) );
267 switch (astnode_get_type(expr
)) {
269 v
= (unsigned long)expr
->integer
;
271 /* Write type */ put_1(fp
, INT_8
);
272 /* Write value */ put_1(fp
, v
);
274 else if (v
< 0x10000) {
275 /* Write type */ put_1(fp
, INT_16
);
276 /* Write value */ put_2(fp
, v
);
278 else if (v
< 0x1000000) {
279 /* Write type */ put_1(fp
, INT_24
);
280 /* Write value */ put_3(fp
, v
);
283 /* Write type */ put_1(fp
, INT_32
);
284 /* Write value */ put_4(fp
, v
);
290 if (strlen(s
) <= 0x100) {
291 /* Write type */ put_1(fp
, STR_8
);
292 /* Write string */ put_str_8(fp
, s
);
295 /* Write type */ put_1(fp
, STR_16
);
296 /* Write string */ put_str_16(fp
, s
);
300 case IDENTIFIER_NODE
:
302 e
= symtab_lookup(expr
->ident
);
304 put_1(fp
, (e
->flags
& EXTRN_FLAG
) ? EXTRN
: LOCAL
);
309 case ARITHMETIC_NODE
:
310 /* The operator goes first */
311 put_1(fp
, operator_to_bytecode(expr
->oper
) );
312 /* Then left-hand side */
313 put_expr_recursive(fp
, LHS(expr
));
314 /* Then right-hand side */
315 put_expr_recursive(fp
, RHS(expr
));
318 case CURRENT_PC_NODE
:
326 "put_expr_recursive(): unknown expression type (%s)\n",
327 astnode_type_to_string(astnode_get_type(expr
))
334 * Serializes an expression.
335 * The expression is written in postfix form so that it can be easily evaluated
336 * even as pure binary.
337 * @param fp File handle
338 * @param expr Expression to serialize
340 static void put_expr(FILE *fp
, const astnode
*expr
)
342 put_expr_recursive(fp
, expr
);
346 * Writes all expressions registered during code generation to file.
347 * @param fp File handle
349 static void put_expressions(FILE *fp
)
352 /* Write expression count */
353 put_2(fp
, expr_count
);
354 /* Write serialized expressions */
355 for (i
=0; i
<expr_count
; i
++) {
356 put_expr(fp
, expr_list
[i
]);
360 /*---------------------------------------------------------------------------*/
363 * Tests if two file location descriptors are equal.
365 static int locations_are_equal(const location
*l1
, const location
*l2
)
367 return ((l1
->first_line
== l2
->first_line
) && (strcmp(l1
->file
, l2
->file
) == 0));
370 /*---------------------------------------------------------------------------*/
373 * Writes a statement to file.
374 * @param fp File handle
377 static void put_statement(FILE *fp
, const astnode
*n
, location
*loc
)
386 static int tag
= 0; /* Used to give labels unique IDs */
388 /* See if we should embed file+line information */
389 if (xasm_args
.debug
&& !locations_are_equal(loc
, &n
->loc
) ) {
391 if (strcmp(loc
->file
, n
->loc
.file
) != 0) {
393 put_str_8(fp
, n
->loc
.file
);
395 if (loc
->first_line
!= n
->loc
.first_line
) {
396 line
= n
->loc
.first_line
;
397 if (line
== loc
->first_line
+ 1) {
398 put_1(fp
, CMD_LINE_INC
);
401 put_1(fp
, CMD_LINE8
);
404 else if (line
< 65536) {
405 put_1(fp
, CMD_LINE16
);
409 put_1(fp
, CMD_LINE24
);
417 switch (astnode_get_type(n
)) {
422 put_1(fp
, CMD_LABEL
);
423 /* Look it up in symbol table */
424 e
= symtab_lookup(n
->label
);
425 /* IMPORTANT: Tag label uniquely so we can refer to it in expressions */
427 /* Write flag byte */
429 flags
|= (e
->flags
& PUBLIC_FLAG
) ? LABEL_FLAG_EXPORT
: 0;
430 flags
|= (e
->flags
& ZEROPAGE_FLAG
) ? LABEL_FLAG_ZEROPAGE
: 0;
431 flags
|= (e
->flags
& ALIGN_FLAG
) ? LABEL_FLAG_ALIGN
: 0;
432 flags
|= (e
->flags
& ADDR_FLAG
) ? LABEL_FLAG_ADDR
: 0;
434 /* If exported, write name also */
435 if (e
->flags
& PUBLIC_FLAG
) {
436 put_str_8(fp
, e
->id
);
438 /* if alignment, write that too */
439 if (e
->flags
& ALIGN_FLAG
) {
442 /* if address, write that too */
443 if (e
->flags
& ADDR_FLAG
) {
444 put_2(fp
, e
->address
);
448 case INSTRUCTION_NODE
:
449 /* Get # of bytes such a 6502 instruction occupies */
450 len
= opcode_length(n
->instr
.opcode
);
452 /* Instruction has no operand, so append it to binary buffer. */
453 buf_append(n
->instr
.opcode
);
456 /* Instruction has an operand.
457 It may be a constant or something unresolved. */
458 expr
= astnode_get_child(n
, 0);
459 if (astnode_get_type(expr
) == INTEGER_NODE
) {
460 /* Instruction has constant operand, so append it to binary buffer. */
461 buf_append(n
->instr
.opcode
);
463 buf_append(expr
->integer
);
466 buf_append(expr
->integer
>> 8);
470 /* Instruction has unresolved operand. */
471 /* Flush binary buffer to file */
473 /* Output 4-byte sequence: CMD_INSTR [opcode] [expr-id] */
474 put_1(fp
, CMD_INSTR
);
475 put_1(fp
, n
->instr
.opcode
);
476 put_2(fp
, register_expression(expr
));
482 /* Append the binary to the buffer */
483 buf_append_n(n
->binary
.data
, n
->binary
.size
);
489 /* Go through all the elements of the data array */
490 for (expr
= RHS(n
); expr
!= NULL
; expr
= astnode_get_next_sibling(expr
) ) {
491 if (astnode_get_type(expr
) == INTEGER_NODE
) {
492 /* Constant, output as binary */
493 switch (type
->datatype
) {
495 buf_append(expr
->integer
);
499 /* Note: little-endian here (6502) */
500 buf_append(expr
->integer
);
501 buf_append(expr
->integer
>> 8);
505 /* Note: little-endian here (6502) */
506 buf_append(expr
->integer
);
507 buf_append(expr
->integer
>> 8);
508 buf_append(expr
->integer
>> 16);
509 buf_append(expr
->integer
>> 24);
518 /* Unresolved. Linker will handle it. */
519 /* Flush binary buffer to file */
521 /* Output 3-byte sequence: [type-cmd] [expr-id] */
522 switch (type
->datatype
) {
523 case BYTE_DATATYPE
: put_1(fp
, CMD_DB
); break;
524 case WORD_DATATYPE
: put_1(fp
, CMD_DW
); break;
525 case DWORD_DATATYPE
: put_1(fp
, CMD_DD
); break;
530 put_2(fp
, register_expression(expr
));
538 assert(type
->datatype
== BYTE_DATATYPE
);
539 /* Get expression which is the count */
542 if (astnode_get_type(expr
) == INTEGER_NODE
) {
543 /* Use the immediate form. */
544 /* Calculate the # of bytes. */
546 /* Select bytecode depending on whether count fits in 8 bits or not */
555 put_1(fp
, CMD_DSI16
);
561 /* Use the unresolved form. */
563 /* Write expression ID */
564 put_2(fp
, register_expression(expr
));
571 case STRUC_DECL_NODE
:
572 case UNION_DECL_NODE
:
574 case RECORD_DECL_NODE
:
580 fprintf(stderr
, "put_statement(%s): unsupported type\n", astnode_type_to_string(astnode_get_type(n
)));
585 /*---------------------------------------------------------------------------*/
588 * Writes the public constants to file.
589 * @param fp File handle
591 static void put_public_constants(FILE *fp
)
593 symbol_ident_list list
;
601 /* 16-bit count followed by (name, type, value) triplets */
603 symtab_list_type(CONSTANT_SYMBOL
, &list
);
604 /* Make one iteration to look them up and count them */
606 for (i
=0; i
<list
.size
; i
++) {
607 e
= symtab_lookup(list
.idents
[i
]);
608 if (e
->flags
& PUBLIC_FLAG
) {
615 for (i
=0; i
<list
.size
; i
++) {
616 e
= symtab_lookup(list
.idents
[i
]);
617 if (e
->flags
& PUBLIC_FLAG
) {
619 put_str_8(fp
, e
->id
);
620 /* Get the expression which represents the value */
621 expr
= (astnode
*)e
->def
;
622 /* Write the type and value */
623 switch (astnode_get_type(expr
)) {
625 v
= (unsigned long)expr
->integer
;
627 /* Write type */ put_1(fp
, INT_8
);
628 /* Write value */ put_1(fp
, v
);
630 else if (v
< 0x10000) {
631 /* Write type */ put_1(fp
, INT_16
);
632 /* Write value */ put_2(fp
, v
);
634 else if (v
< 0x1000000) {
635 /* Write type */ put_1(fp
, INT_24
);
636 /* Write value */ put_3(fp
, v
);
639 /* Write type */ put_1(fp
, INT_32
);
640 /* Write value */ put_4(fp
, v
);
646 if (strlen(s
) <= 0x100) {
647 /* Write type */ put_1(fp
, STR_8
);
648 /* Write value */ put_str_8(fp
, s
);
650 else if (strlen(s
) <= 0x10000) {
651 /* Write type */ put_1(fp
, STR_16
);
652 /* Write value */ put_str_16(fp
, s
);
655 fprintf(stderr
, "put_public_constants(): string constant too long\n");
660 fprintf(stderr
, "put_public_constants(): expression isn't a constant after all\n");
665 symtab_list_finalize(&list
);
669 * Writes the external symbol specifiers to file.
670 * @param fp File handle
672 static void put_externals(FILE *fp
)
674 symbol_ident_list list
;
679 /* number of units explicitly imported from */
680 put_1(fp
, 0x00); /* TODO */
681 /* List of unit names (TODO) */
683 /* 16-bit count followed by name list */
685 symtab_list_type(ANY_SYMBOL
, &list
);
686 /* One iteration to count them */
688 for (i
=0; i
<list
.size
; i
++) {
689 e
= symtab_lookup(list
.idents
[i
]);
690 if (e
->flags
& EXTRN_FLAG
) {
695 /* Another iteration to output them */
697 for (i
=0; i
<list
.size
; i
++) {
699 e
= symtab_lookup(list
.idents
[i
]);
700 if (e
->flags
& EXTRN_FLAG
) {
701 /* IMPORTANT: Set unique tag so we can refer to it in expressions */
702 /* (This probably shouldn't be done here though...???) */
704 /* Write unit (TODO) */
705 put_1(fp
, 0x00); /* 0 = Any unit */
707 put_str_8(fp
, list
.idents
[i
]);
710 symtab_list_finalize(&list
);
714 * Writes a segment to file.
715 * @param fp File handle
716 * @param root AST node which has the list of assembly statements as its children
717 * @param targetseg Specifies the type of segment (data or code)
719 static void put_segment(FILE *fp
, const astnode
*root
, int targetseg
)
722 int seg
; /* The type of segment we're in */
723 fpos_t size_pos
, end_pos
;
725 location loc
= { -1, -1, -1, -1, "" };
727 /* Write the size of the segment (backpatched later) */
728 fgetpos(fp
, &size_pos
);
729 put_3(fp
, 0xDECADE); /* 24-bit integer */
731 seg
= 2; /* Codeseg is default */
734 /* Step through all the nodes in the list */
735 for (n
= astnode_get_first_child(root
); n
!= NULL
; n
= astnode_get_next_sibling(n
) ) {
736 switch (astnode_get_type(n
)) {
738 seg
= 1; /* Now we're in a DATA segment */
742 seg
= 2; /* Now we're in a CODE segment */
746 /* Only output the statement if we're in the proper segment. */
747 if (seg
== targetseg
) {
748 put_statement(fp
, n
, &loc
);
757 fgetpos(fp
, &end_pos
); /* First save the current (end) position */
758 fsetpos(fp
, &size_pos
); /* Set position to where the size should be written */
759 put_3(fp
, end
- start
); /* Size is difference between end and start offset */
760 fsetpos(fp
, &end_pos
); /* Restore end position */
764 * Writes a sequence of parsed assembly statements to file.
765 * @param fp File handle
766 * @param root AST node which has the assembly statements as its children
768 static void put_bytecodes(FILE *fp
, const astnode
*root
)
770 /* Write data segment first */
771 put_segment(fp
, root
, 1);
772 /* Write code segment next */
773 put_segment(fp
, root
, 2);
776 /*---------------------------------------------------------------------------*/
779 * Writes an object file which captures all the information in a syntax tree.
780 * @param root Root of the syntax tree
781 * @param filename Name of the file to write to (object file)
783 void codegen_write(const astnode
*root
, const char *filename
)
785 /* Attempt to open file for writing */
786 FILE *fp
= fopen(filename
, "wb");
788 fprintf(stderr
, "codegen_write: could not open '%s' for writing\n", filename
);
789 /* ### increase error count */
796 /* Write magic number */
798 /* Write version (upper nibble=major, lower nibble=minor) */
799 put_1(fp
, A_VERSION
);
801 /* Write exported constants. */
802 put_public_constants(fp
);
804 /* Write imported symbols. */
807 /* Write bytecodes. */
808 put_bytecodes(fp
, root
);
810 /* Write expressions */