agssim: implement ptrstack
[rofl0r-agsutils.git] / Assembler.c
blob1175cfb56b20be4629b0dcbaf08689dfdd2eded7
1 #define _GNU_SOURCE
2 #include "File.h"
3 #include "ByteArray.h"
4 #include "MemGrow.h"
5 #include "Script_internal.h"
6 #include "List.h"
7 #include <ctype.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <string.h>
11 #include <stdlib.h>
12 #include "Assembler.h"
14 struct fixup {
15 int type;
16 unsigned offset;
19 struct string {
20 size_t len;
21 char* ptr;
24 struct label {
25 char* name;
26 unsigned insno;
29 struct variable {
30 char* name;
31 unsigned vs;
32 unsigned offset;
35 static int add_label(AS *a, char* name, size_t insno) {
36 struct label item = { .name = strdup(name), .insno = insno };
37 assert(item.name);
38 return List_add(a->label_list, &item);
41 static int get_label_offset(AS *a, char* name) {
42 struct label item;
43 size_t i = 0;
44 for(; i < List_size(a->label_list); i++) {
45 assert(List_get(a->label_list, i, &item));
46 if(!strcmp(item.name, name))
47 return item.insno;
49 assert(0);
50 return 0;
53 static int add_label_ref(AS *a, char * name, size_t insno) {
54 /* add reference to named label to a list. after the first pass
55 * over the code these locations have to be fixed with the offset
56 * of the label. */
57 struct label item = { .name = strdup(name), .insno = insno };
58 assert(item.name);
59 return List_add(a->label_ref_list, &item);
62 static int add_function_ref(AS *a, char* name, size_t insno) {
63 /* add reference to named function to a list. after the first pass
64 * over the code these locations have to be fixed with the offset
65 * of the label. */
66 struct label item = { .name = strdup(name), .insno = insno };
67 assert(item.name);
68 return List_add(a->function_ref_list, &item);
71 static int add_export(AS *a, int type, char* name, size_t offset) {
72 struct function_export item = { .fn = strdup(name), .instr = offset, .type = type};
73 assert(item.fn);
74 return List_add(a->export_list, &item);
77 static int add_fixup(AS *a, int type, size_t offset) {
78 struct fixup item = {.type = type, .offset = offset};
79 /* offset equals instruction number for non-DATADATA fixups */
80 return List_add(a->fixup_list, &item);
83 static int add_or_get_string(AS* a, char* str) {
84 /* return index of string in string table
85 * add to string table if not yet existing */
86 str++; /* leading '"' */
87 size_t l = strlen(str);
88 l--;
89 str[l] = 0; /* trailing '"' */
90 struct string item = {.ptr = str, .len = l }, iter;
91 size_t i = 0;
92 for(; i < List_size(a->string_list); i++) {
93 assert(List_get(a->string_list, i, &iter));
94 if(iter.len == item.len && !strcmp(iter.ptr, str)) {
95 return i;
98 item.ptr = strdup(str);
99 List_add(a->string_list, &item);
100 return List_size(a->string_list) -1;
103 static unsigned get_string_offset(AS *a, unsigned index) {
104 assert(index < List_size(a->string_list));
105 unsigned i = 0, ret = 0;
106 struct string item;
107 for(; i < index; i++) {
108 assert(List_get(a->string_list, i, &item));
109 ret += item.len + 1;
111 return ret;
114 static size_t get_string_section_length(AS* a) {
115 struct string item;;
116 size_t i = 0, l = 0;
117 for(; i < List_size(a->string_list); i++) {
118 assert(List_get(a->string_list, i, &item));
119 l += item.len + 1;
121 return l;
124 static int add_variable(AS *a, char* name, unsigned vs, size_t offset) {
125 struct variable item = { .name = strdup(name), .vs = vs, .offset = offset };
126 return List_add(a->variable_list, &item);
129 static int get_variable_offset(AS* a, char* name) {
130 /* return globaldata offset of named variable */
131 size_t i = 0;
132 struct variable item;
133 for(; i < List_size(a->variable_list); i++) {
134 assert(List_get(a->variable_list, i, &item));
135 if(!strcmp(item.name, name))
136 return item.offset;
138 assert(0);
139 return 0;
142 static ssize_t find_section(FILE* in, char* name, size_t *lineno) {
143 char buf[1024];
144 size_t off = 0, l = strlen(name);
145 *lineno = 0;
146 fseek(in, 0, SEEK_SET);
147 while(fgets(buf, sizeof buf, in)) {
148 *lineno = *lineno +1;
149 off += strlen(buf);
150 if(buf[0] == '.' && memcmp(name, buf + 1, l) == 0)
151 return off;
153 return -1;
156 static int asm_data(AS* a) {
157 size_t lineno;
158 ssize_t start = find_section(a->in, "data", &lineno);
159 if(start == -1) return 1; // it is valid for .s file to only have .text
160 fseek(a->in, start, SEEK_SET);
161 char buf[1024];
162 size_t data_pos = 0;
163 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
164 if(buf[0] == '\n') continue;
165 char* p = buf, *pend = buf + sizeof buf, *var;
166 int exportflag = 0;
167 unsigned vs = 0;
168 if(*p == '#' || *p == ';') continue;
169 while(isspace(*p) && p < pend) p++;
170 if(!memcmp(p, "export", 6) && isspace(p[6])) {
171 p += 7;
172 exportflag = 1;
173 while(isspace(*p) && p < pend) p++;
175 if(memcmp(p, "int", 3) == 0)
176 vs = 4;
177 else if(memcmp(p, "short", 5) == 0)
178 vs = 2;
179 else if(memcmp(p, "char", 4) == 0) {
180 vs = 1;
181 if(p[4] == '[') {
182 vs = atoi(p+5);
183 char *q = p+5;
184 while(isdigit(*q) && q < pend) q++;
185 if(vs == 0 || *q != ']') {
186 dprintf(2, "error: expected number > 0 and ']' after '['\n");
187 return 0;
190 else vs = 1;
191 } else if(memcmp(p, "string", 6) == 0)
192 vs = 200;
193 else {
194 dprintf(2, "error: expected int, short, char, or string\n");
195 return 0;
197 while(!isspace(*p) && p < pend) p++;
198 while(isspace(*p) && p < pend) p++;
199 var = p;
200 while(!isspace(*p) && p < pend) p++;
201 *p = 0; p++;
202 assert(p < pend && *p == '=');
203 p++; while(isspace(*p) && p < pend) p++;
204 assert(p < pend);
205 int value;
207 if(*p == '.') {
208 p++;
209 if(memcmp(p, "data", 4) == 0) {
210 p += 4;
211 while(isspace(*p) && p < pend) p++;
212 assert(p < pend && *p == '+');
213 p++;
214 while(isspace(*p) && p < pend) p++;
215 value = atoi(p);
216 add_fixup(a, FIXUP_DATADATA, data_pos);
217 goto write_var;
218 } else {
219 dprintf(2, "error: expected \"data\"\n");
220 return 0;
222 } else {
223 value = atoi(p);
224 write_var:
225 switch (vs) {
226 default:
227 for(value = vs; value >= 10; value-=10)
228 ByteArray_writeMem(a->data, (void*)"\0\0\0\0\0\0\0\0\0\0", 10);
229 while(value--) ByteArray_writeUnsignedByte(a->data, 0);
230 break;
231 case 4:
232 ByteArray_writeInt(a->data, value);
233 break;
234 case 2:
235 ByteArray_writeShort(a->data, value);
236 break;
237 case 1:
238 ByteArray_writeUnsignedByte(a->data, value);
239 break;
242 if(exportflag) add_export(a, EXPORT_DATA, var, data_pos);
243 add_variable(a, var, vs, data_pos);
244 data_pos += vs;
246 return 1;
249 ssize_t get_import_index(AS* a, char* name, size_t len) {
250 size_t i;
251 struct string item;
252 for(i = 0; i < List_size(a->import_list); i++) {
253 assert(List_get(a->import_list, i, &item));
254 if(len == item.len && !strcmp(name, item.ptr)) return i;
256 return -1;
259 void add_import(AS *a, char* name) {
260 size_t l = strlen(name);
261 if(get_import_index(a, name, l) != -1) return;
262 struct string item;
263 item.ptr = strdup(name);
264 item.len = l;
265 List_add(a->import_list, &item);
268 static int find_export(AS *a, int type, char* name, unsigned *offset) {
269 struct function_export item;
270 size_t i;
271 for(i = 0; i < List_size(a->export_list); i++) {
272 assert(List_get(a->export_list, i, &item));
273 if(item.type == type && !strcmp(name, item.fn)) {
274 *offset = item.instr;
275 return 1;
278 return 0;
281 void generate_import_table(AS *a) {
282 size_t i;
283 struct label item;
284 unsigned off;
285 for(i = 0; i < List_size(a->function_ref_list); i++) {
286 assert(List_get(a->function_ref_list, i, &item));
287 if(!find_export(a, EXPORT_FUNCTION, item.name, &off))
288 add_import(a, item.name);
292 #include "ags_cpu.h"
294 int get_reg(char* regname) {
295 int i = AR_NULL + 1;
296 for(; i < AR_MAX; i++)
297 if(strcmp(regnames[i], regname) == 0)
298 return i;
299 return AR_NULL;
302 static size_t mnemolen[SCMD_MAX];
303 static int mnemolen_initdone = 0;
305 static void init_mnemolen(void) {
306 size_t i = 0;
307 for(; i< SCMD_MAX; i++)
308 mnemolen[i] = strlen(opcodes[i].mnemonic);
309 mnemolen_initdone = 1;
312 static unsigned find_insn(char* sym) {
313 if(!mnemolen_initdone) init_mnemolen();
314 size_t i = 0, l = strlen(sym);
315 for(; i< SCMD_MAX; i++)
316 if(l == mnemolen[i] && memcmp(sym, opcodes[i].mnemonic, l) == 0)
317 return i;
318 return 0;
321 #include "StringEscape.h"
322 /* expects a pointer to the first char after a opening " in a string,
323 * converts the string into convbuf, and returns the length of that string */
324 static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
325 size_t result = 0;
326 char* e = x + strlen(x);
327 assert(e > x && e < end && *e == 0);
328 e--;
329 while(isspace(*e)) e--;
330 if(*e != '"') return (size_t) -1;
331 *e = 0;
332 result = unescape(x, convbuf, convbuflen);
333 return result;
336 /* sets lets char in arg to 0, and advances pointer till the next argstart */
337 static char* finalize_arg(char **p, char* pend, char* convbuf, size_t convbuflen) {
338 if(**p == '"') {
339 convbuf[0] = '"';
340 size_t l= get_length_and_convert(*p + 1, pend, convbuf+1, convbuflen - 1);
341 if(l == (size_t) -1) return 0;
342 convbuf[l+1] = '"';
343 convbuf[l+2] = 0;
344 *p = 0; /* make it crash if its accessed again, since a string should always be the last arg */
345 return convbuf;
346 } else {
347 char* ret = *p;
348 while(*p < pend && **p != ',' && !isspace(**p)) (*p)++;
349 assert(*p < pend);
350 **p = 0; (*p)++;
351 while(*p < pend && isspace(**p)) (*p)++;
352 assert(*p < pend);
353 return ret;
357 static int asm_strings(AS *a) {
358 /* add strings in .strings section, even when they're not used from .text */
359 size_t lineno;
360 ssize_t start = find_section(a->in, "strings", &lineno);
361 if(start == -1) return 1;
362 fseek(a->in, start, SEEK_SET);
363 char buf[1024];
364 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
365 char* p = buf;
366 if(*p == '#' || *p == ';') continue;
367 assert(*p == '"');
368 size_t l = strlen(p);
369 assert(l>1 && p[l-1] == '\n' && p[l-2] == '"');
370 p[l-1] = 0;
371 add_or_get_string(a, p);
373 return 1;
376 static int asm_text(AS *a) {
377 size_t lineno;
378 ssize_t start = find_section(a->in, "text", &lineno);
379 if(start == -1) return 1;
380 fseek(a->in, start, SEEK_SET);
381 char buf[1024];
382 char convbuf[sizeof(buf)]; /* to convert escaped string into non-escaped version */
383 size_t pos = 0;
384 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
385 lineno++;
386 char* p = buf, *pend = buf + sizeof buf;
387 if(*p == '#' || *p == ';') continue;
388 while(isspace(*p) && p < pend) p++;
389 assert(p < pend);
390 if(!*p) continue;
391 char* sym = p;
392 while(!isspace(*p) && p < pend) p++;
393 *p = 0; p++;
394 size_t l = strlen(sym);
395 if(l > 1 && sym[l-1] == ':') {
396 // functionstart or label
397 sym[l-1] = 0;
398 if(memcmp(sym, "label", 5) == 0)
399 add_label(a, sym, pos);
400 else {
401 add_export(a, EXPORT_FUNCTION, sym, pos);
402 ByteArray_writeUnsignedInt(a->code, SCMD_THISBASE);
403 ByteArray_writeUnsignedInt(a->code, pos);
404 pos+=2;
406 continue;
408 unsigned instr = find_insn(sym);
409 if(!instr) {
410 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno, sym);
411 return 0;
413 if(instr == SCMD_THISBASE) continue; /* we emit this instruction ourselves when a new function starts. */
415 ByteArray_writeUnsignedInt(a->code, instr);
416 pos++;
417 size_t arg;
418 for(arg = 0; arg < opcodes[instr].argcount; arg++) {
419 sym = finalize_arg(&p, pend, convbuf, sizeof(convbuf));
420 if(sym == 0) {
421 dprintf(2, "line %zu: error: expected \"\n", lineno);
422 return 0;
424 int value = 0;
425 if(arg < opcodes[instr].regcount) {
426 value=get_reg(sym);
427 if(instr == SCMD_REGTOREG) {
428 /* fix reversed order of arguments */
429 int dst = value;
430 sym = p;
431 while(p < pend && *p != ',' && !isspace(*p)) p++;
432 assert(p < pend);
433 *p = 0;
434 value=get_reg(sym);
435 ByteArray_writeInt(a->code, value);
436 ByteArray_writeInt(a->code, dst);
437 pos += 2;
438 break;
440 } else {
441 switch(instr) {
442 case SCMD_LITTOREG:
443 /* immediate can be function name, string,
444 * variable name, stack fixup, or numeric value */
445 if(sym[0] == '"') {
446 value = get_string_offset(a, add_or_get_string(a, sym));
447 add_fixup(a, FIXUP_STRING, pos);
448 } else if(sym[0] == '@') {
449 value = get_variable_offset(a, sym+1);
450 add_fixup(a, FIXUP_GLOBALDATA, pos);
451 } else if(sym[0] == '.') {
452 if(memcmp(sym+1, "stack", 5)) {
453 dprintf(2, "error: expected stack\n");
454 return 0;
456 sym += 6;
457 while(isspace(*sym) && sym < pend) sym++;
458 assert(sym < pend && *sym == '+');
459 sym++;
460 while(isspace(*sym) && sym < pend) sym++;
461 add_fixup(a, FIXUP_STACK, pos);
462 value = atoi(sym);
463 } else if(isdigit(sym[0]) || sym[0] == '-') {
464 if(sym[0] == '-') assert(isdigit(sym[1]));
465 value = atoi(sym);
466 } else
467 add_function_ref(a, sym, pos);
468 break;
469 case SCMD_JMP: case SCMD_JZ: case SCMD_JNZ:
470 add_label_ref(a, sym, pos);
471 break;
472 default:
473 value = atoi(sym);
476 ByteArray_writeInt(a->code, value);
477 pos++;
481 size_t i;
482 struct label item;
483 for(i = 0; i < List_size(a->label_ref_list); i++) {
484 assert(List_get(a->label_ref_list, i, &item));
485 ByteArray_set_position(a->code, item.insno * 4);
486 int lbl = get_label_offset(a, item.name);
487 assert(lbl >= 0 && lbl < pos);
488 int label_insno = lbl - (item.insno+1); /* offset is calculated from next instruction */
489 ByteArray_writeInt(a->code, label_insno);
491 generate_import_table(a);
492 for(i = 0; i < List_size(a->function_ref_list); i++) {
493 assert(List_get(a->function_ref_list, i, &item));
494 ssize_t imp = get_import_index(a, item.name, strlen(item.name));
495 if(imp == -1) {
496 unsigned off;
497 assert(find_export(a, EXPORT_FUNCTION, item.name, &off));
498 imp = off;
499 add_fixup(a, FIXUP_FUNCTION, item.insno);
500 } else {
501 add_fixup(a, FIXUP_IMPORT, item.insno);
503 assert(imp != -1);
504 ByteArray_set_position(a->code, item.insno * 4);
505 ByteArray_writeInt(a->code, imp);
508 return 1;
510 #include "endianness.h"
511 static void write_int(FILE* o, int val) {
512 #ifndef IS_LITTLE_ENDIAN
513 val = byteswap32(val);
514 #endif
515 fwrite(&val, 4, 1, o);
518 static int fixup_comparefunc(const void *a, const void* b) {
519 const struct fixup* fa = a, *fb = b;
520 if(fa->type == FIXUP_DATADATA && fb->type != FIXUP_DATADATA)
521 return -1;
522 if(fb->type == FIXUP_DATADATA && fa->type != FIXUP_DATADATA)
523 return 1;
524 if(fa->offset < fb->offset) return -1;
525 if(fa->offset == fb->offset) return 0;
526 return 1;
529 static void sort_fixup_list(AS* a) {
530 List_sort(a->fixup_list, fixup_comparefunc);
533 static void write_fixup_list(AS* a, FILE *o) {
534 struct fixup item;
535 size_t i;
536 for(i = 0; i < List_size(a->fixup_list); i++) {
537 assert(List_get(a->fixup_list, i, &item));
538 char type = item.type;
539 fwrite(&type, 1, 1, o);
541 for(i = 0; i < List_size(a->fixup_list); i++) {
542 assert(List_get(a->fixup_list, i, &item));
543 write_int(o, item.offset);
547 static void write_string_section(AS* a, FILE* o) {
548 struct string item;
549 size_t i = 0;
550 for(; i < List_size(a->string_list); i++) {
551 assert(List_get(a->string_list, i, &item));
552 fwrite(item.ptr, item.len + 1, 1, o);
556 static void write_import_section(AS* a, FILE* o) {
557 struct string item;
558 size_t i = 0;
559 for(; i < List_size(a->import_list); i++) {
560 assert(List_get(a->import_list, i, &item));
561 fwrite(item.ptr, item.len + 1, 1, o);
565 static void write_export_section(AS* a, FILE* o) {
566 struct function_export item;
567 size_t i = 0;
568 for(; i < List_size(a->export_list); i++) {
569 assert(List_get(a->export_list, i, &item));
570 fwrite(item.fn, strlen(item.fn) + 1, 1, o);
571 unsigned encoded = (item.type << 24) | (item.instr &0x00FFFFFF);
572 write_int(o, encoded);
576 static void write_sections_section(AS* a, FILE *o) {
577 //FIXME
580 static int write_object(AS *a, char *out) {
581 FILE *o;
582 if(!(o = fopen(out, "w"))) return 0;
583 fprintf(o, "SCOM");
584 write_int(o, 83); //version
585 write_int(o, ByteArray_get_length(a->data)); // globaldatasize
586 write_int(o, ByteArray_get_length(a->code) / 4); // codesize
587 write_int(o, get_string_section_length(a)); // stringssize
588 size_t l = ByteArray_get_length(a->data);
589 void *p;
590 if(l) {
591 p = mem_getptr(&a->data->source.mem, 0, l); // FIXME dont access directly, use some getter method
592 assert(p);
593 fwrite(p,l,1,o); // globaldata
595 l = ByteArray_get_length(a->code);
596 if(l) {
597 p = mem_getptr(&a->code->source.mem, 0, l);
598 assert(p);
599 fwrite(p,l,1,o); // code
601 write_string_section(a, o);
602 write_int(o, List_size(a->fixup_list));
603 sort_fixup_list(a);
604 write_fixup_list(a, o);
605 if(!List_size(a->import_list)) {
606 /* AGS declares object files with 0 imports as invalid */
607 add_import(a, "");
609 write_int(o, List_size(a->import_list));
610 write_import_section(a, o);
611 write_int(o, List_size(a->export_list));
612 write_export_section(a, o);
613 write_int(o, 0); // FIXME sectioncount
614 write_sections_section(a, o);
615 write_int(o, 0xbeefcafe); // magic end marker.
616 fclose(o);
617 return 1;
620 int AS_assemble(AS* a, char* out) {
621 if(!asm_data(a)) return 0;
622 if(!asm_text(a)) return 0;
623 // if(!asm_strings(a)) return 0; // emitting unneeded strings is not necessary
624 if(!write_object(a, out)) return 0;
625 return 1;
628 void AS_open_stream(AS* a, FILE* f) {
629 memset(a, 0, sizeof *a);
630 a->obj = &a->obj_b;
631 a->data = &a->data_b;
632 a->code = &a->code_b;
633 ByteArray_ctor(a->obj);
634 ByteArray_open_mem(a->obj, 0, 0);
635 ByteArray_ctor(a->data);
636 ByteArray_set_endian(a->data, BAE_LITTLE);
637 ByteArray_set_flags(a->data, BAF_CANGROW);
638 ByteArray_open_mem(a->data, 0, 0);
639 ByteArray_ctor(a->code);
640 ByteArray_set_endian(a->code, BAE_LITTLE);
641 ByteArray_set_flags(a->code, BAF_CANGROW);
642 ByteArray_open_mem(a->code, 0, 0);
644 a->export_list = &a->export_list_b;
645 a->fixup_list = &a->fixup_list_b;
646 a->string_list = &a->string_list_b;
647 a->label_list = &a->label_list_b;
648 a->label_ref_list = &a->label_ref_list_b;
649 a->function_ref_list = &a->function_ref_list_b;
650 a->variable_list = &a->variable_list_b;
651 a->import_list = &a->import_list_b;
653 List_init(a->export_list, sizeof(struct function_export));
654 List_init(a->fixup_list , sizeof(struct fixup));
655 List_init(a->string_list, sizeof(struct string));
656 List_init(a->label_list, sizeof(struct label));
657 List_init(a->label_ref_list, sizeof(struct label));
658 List_init(a->function_ref_list, sizeof(struct label));
659 List_init(a->variable_list, sizeof(struct variable));
660 List_init(a->import_list, sizeof(struct string));
662 a->in = f;
665 int AS_open(AS* a, char* fn) {
666 FILE *f = fopen(fn, "r");
667 if(!f) return 0;
668 AS_open_stream(a, f);
669 return 1;
673 void AS_close(AS* a) {
674 fclose(a->in);