agssim: implement signals
[rofl0r-agsutils.git] / Assembler.c
blobde28057bd92f5bcb5293a7442a3467737909eed0
1 #define _GNU_SOURCE
2 #include "File.h"
3 #include "ByteArray.h"
4 #include "MemGrow.h"
5 #include "Script_internal.h"
6 #include "List.h"
7 #include <ctype.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <string.h>
11 #include <stdlib.h>
12 #include "Assembler.h"
14 struct fixup {
15 int type;
16 unsigned offset;
19 struct string {
20 size_t len;
21 char* ptr;
24 struct label {
25 char* name;
26 unsigned insno;
29 struct variable {
30 char* name;
31 unsigned vs;
32 unsigned offset;
35 static int add_label(AS *a, char* name, size_t insno) {
36 char* tmp = strdup(name);
37 return hbmap_insert(a->label_map, tmp, (unsigned) insno) != -1;
40 static unsigned get_label_offset(AS *a, char* name) {
41 unsigned *ret = hbmap_get(a->label_map, name);
42 if(!ret) {
43 dprintf(2, "error: label '%s' not found\n", name);
44 if(strncmp(name, "label", 5)) dprintf(2, "hint: label names must start with 'label'\n");
45 exit(1);
47 return *ret;
50 static int add_label_ref(AS *a, char * name, size_t insno) {
51 /* add reference to named label to a list. after the first pass
52 * over the code these locations have to be fixed with the offset
53 * of the label. */
54 struct label item = { .name = strdup(name), .insno = insno };
55 assert(item.name);
56 return List_add(a->label_ref_list, &item);
59 static int add_function_ref(AS *a, char* name, size_t insno) {
60 /* add reference to named function to a list. after the first pass
61 * over the code these locations have to be fixed with the offset
62 * of the label. */
63 struct label item = { .name = strdup(name), .insno = insno };
64 assert(item.name);
65 return List_add(a->function_ref_list, &item);
68 static int add_export(AS *a, int type, char* name, size_t offset) {
69 struct export item = { .fn = strdup(name), .instr = offset, .type = type};
70 assert(item.fn);
71 return List_add(a->export_list, &item);
74 static int add_fixup(AS *a, int type, size_t offset) {
75 struct fixup item = {.type = type, .offset = offset};
76 /* offset equals instruction number for non-DATADATA fixups */
77 return List_add(a->fixup_list, &item);
80 static int add_or_get_string(AS* a, char* str) {
81 /* return index of string in string table
82 * add to string table if not yet existing */
83 str++; /* leading '"' */
84 size_t l = strlen(str);
85 l--;
86 str[l] = 0; /* trailing '"' */
87 struct string item = {.ptr = str, .len = l }, iter;
88 size_t i = 0;
89 for(; i < List_size(a->string_list); i++) {
90 assert(List_get(a->string_list, i, &iter));
91 if(iter.len == item.len && !strcmp(iter.ptr, str)) {
92 return i;
95 item.ptr = strdup(str);
96 List_add(a->string_list, &item);
97 return List_size(a->string_list) -1;
100 static unsigned get_string_offset(AS *a, unsigned index) {
101 assert(index < List_size(a->string_list));
102 unsigned i = 0, ret = 0;
103 struct string item;
104 for(; i < index; i++) {
105 assert(List_get(a->string_list, i, &item));
106 ret += item.len + 1;
108 return ret;
111 static size_t get_string_section_length(AS* a) {
112 struct string item;;
113 size_t i = 0, l = 0;
114 for(; i < List_size(a->string_list); i++) {
115 assert(List_get(a->string_list, i, &item));
116 l += item.len + 1;
118 return l;
121 static int add_variable(AS *a, char* name, unsigned vs, size_t offset) {
122 struct variable item = { .name = strdup(name), .vs = vs, .offset = offset };
123 return List_add(a->variable_list, &item);
126 static int get_variable_offset(AS* a, char* name) {
127 /* return globaldata offset of named variable */
128 size_t i = 0;
129 struct variable item;
130 for(; i < List_size(a->variable_list); i++) {
131 assert(List_get(a->variable_list, i, &item));
132 if(!strcmp(item.name, name))
133 return item.offset;
135 assert(0);
136 return 0;
139 static ssize_t find_section(FILE* in, char* name, size_t *lineno) {
140 char buf[1024];
141 size_t off = 0, l = strlen(name);
142 *lineno = 0;
143 fseek(in, 0, SEEK_SET);
144 while(fgets(buf, sizeof buf, in)) {
145 *lineno = *lineno +1;
146 off += strlen(buf);
147 if(buf[0] == '.' && memcmp(name, buf + 1, l) == 0)
148 return off;
150 return -1;
153 static int asm_data(AS* a) {
154 size_t lineno;
155 ssize_t start = find_section(a->in, "data", &lineno);
156 if(start == -1) return 1; // it is valid for .s file to only have .text
157 fseek(a->in, start, SEEK_SET);
158 char buf[1024];
159 size_t data_pos = 0;
160 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
161 if(buf[0] == '\n') continue;
162 char* p = buf, *pend = buf + sizeof buf, *var;
163 int exportflag = 0;
164 unsigned vs = 0;
165 if(*p == '#' || *p == ';') continue;
166 while(isspace(*p) && p < pend) p++;
167 if(!memcmp(p, "export", 6) && isspace(p[6])) {
168 p += 7;
169 exportflag = 1;
170 while(isspace(*p) && p < pend) p++;
172 if(memcmp(p, "int", 3) == 0)
173 vs = 4;
174 else if(memcmp(p, "short", 5) == 0)
175 vs = 2;
176 else if(memcmp(p, "char", 4) == 0) {
177 vs = 1;
178 if(p[4] == '[') {
179 vs = atoi(p+5);
180 char *q = p+5;
181 while(isdigit(*q) && q < pend) q++;
182 if(vs == 0 || *q != ']') {
183 dprintf(2, "error: expected number > 0 and ']' after '['\n");
184 return 0;
187 else vs = 1;
188 } else if(memcmp(p, "string", 6) == 0)
189 vs = 200;
190 else {
191 dprintf(2, "error: expected int, short, char, or string\n");
192 return 0;
194 while(!isspace(*p) && p < pend) p++;
195 while(isspace(*p) && p < pend) p++;
196 var = p;
197 while(!isspace(*p) && p < pend) p++;
198 *p = 0; p++;
199 assert(p < pend && *p == '=');
200 p++; while(isspace(*p) && p < pend) p++;
201 assert(p < pend);
202 int value;
204 if(*p == '.') {
205 p++;
206 if(memcmp(p, "data", 4) == 0) {
207 p += 4;
208 while(isspace(*p) && p < pend) p++;
209 assert(p < pend && *p == '+');
210 p++;
211 while(isspace(*p) && p < pend) p++;
212 value = atoi(p);
213 add_fixup(a, FIXUP_DATADATA, data_pos);
214 goto write_var;
215 } else {
216 dprintf(2, "error: expected \"data\"\n");
217 return 0;
219 } else {
220 value = atoi(p);
221 write_var:
222 switch (vs) {
223 default:
224 for(value = vs; value >= 10; value-=10)
225 ByteArray_writeMem(a->data, (void*)"\0\0\0\0\0\0\0\0\0\0", 10);
226 while(value--) ByteArray_writeUnsignedByte(a->data, 0);
227 break;
228 case 4:
229 ByteArray_writeInt(a->data, value);
230 break;
231 case 2:
232 ByteArray_writeShort(a->data, value);
233 break;
234 case 1:
235 ByteArray_writeUnsignedByte(a->data, value);
236 break;
239 if(exportflag) add_export(a, EXPORT_DATA, var, data_pos);
240 add_variable(a, var, vs, data_pos);
241 data_pos += vs;
243 return 1;
246 ssize_t get_import_index(AS* a, char* name, size_t len) {
247 size_t i;
248 struct string item;
249 for(i = 0; i < List_size(a->import_list); i++) {
250 assert(List_get(a->import_list, i, &item));
251 if(len == item.len && !strcmp(name, item.ptr)) return i;
253 return -1;
256 void add_import(AS *a, char* name) {
257 size_t l = strlen(name);
258 if(get_import_index(a, name, l) != -1) return;
259 struct string item;
260 item.ptr = strdup(name);
261 item.len = l;
262 List_add(a->import_list, &item);
265 static int find_export(AS *a, int type, char* name, unsigned *offset) {
266 struct export item;
267 size_t i;
268 for(i = 0; i < List_size(a->export_list); i++) {
269 assert(List_get(a->export_list, i, &item));
270 if(item.type == type && !strcmp(name, item.fn)) {
271 *offset = item.instr;
272 return 1;
275 return 0;
278 void generate_import_table(AS *a) {
279 size_t i;
280 struct label item;
281 unsigned off;
282 for(i = 0; i < List_size(a->function_ref_list); i++) {
283 assert(List_get(a->function_ref_list, i, &item));
284 if(!find_export(a, EXPORT_FUNCTION, item.name, &off))
285 add_import(a, item.name);
289 #include "ags_cpu.h"
291 int get_reg(char* regname) {
292 int i = AR_NULL + 1;
293 for(; i < AR_MAX; i++)
294 if(strcmp(regnames[i], regname) == 0)
295 return i;
296 return AR_NULL;
299 static size_t mnemolen[SCMD_MAX];
300 static int mnemolen_initdone = 0;
302 static void init_mnemolen(void) {
303 size_t i = 0;
304 for(; i< SCMD_MAX; i++)
305 mnemolen[i] = strlen(opcodes[i].mnemonic);
306 mnemolen_initdone = 1;
309 static unsigned find_insn(char* sym) {
310 if(!mnemolen_initdone) init_mnemolen();
311 size_t i = 0, l = strlen(sym);
312 for(; i< SCMD_MAX; i++)
313 if(l == mnemolen[i] && memcmp(sym, opcodes[i].mnemonic, l) == 0)
314 return i;
315 return 0;
318 #include "StringEscape.h"
319 /* expects a pointer to the first char after a opening " in a string,
320 * converts the string into convbuf, and returns the length of that string */
321 static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
322 size_t result = 0;
323 char* e = x + strlen(x);
324 assert(e > x && e < end && *e == 0);
325 e--;
326 while(isspace(*e)) e--;
327 if(*e != '"') return (size_t) -1;
328 *e = 0;
329 result = unescape(x, convbuf, convbuflen);
330 return result;
333 /* sets lets char in arg to 0, and advances pointer till the next argstart */
334 static char* finalize_arg(char **p, char* pend, char* convbuf, size_t convbuflen) {
335 if(**p == '"') {
336 convbuf[0] = '"';
337 size_t l= get_length_and_convert(*p + 1, pend, convbuf+1, convbuflen - 1);
338 if(l == (size_t) -1) return 0;
339 convbuf[l+1] = '"';
340 convbuf[l+2] = 0;
341 *p = 0; /* make it crash if its accessed again, since a string should always be the last arg */
342 return convbuf;
343 } else {
344 char* ret = *p;
345 while(*p < pend && **p != ',' && !isspace(**p)) (*p)++;
346 assert(*p < pend);
347 **p = 0; (*p)++;
348 while(*p < pend && isspace(**p)) (*p)++;
349 assert(*p < pend);
350 return ret;
354 static int asm_strings(AS *a) {
355 /* add strings in .strings section, even when they're not used from .text */
356 size_t lineno;
357 ssize_t start = find_section(a->in, "strings", &lineno);
358 if(start == -1) return 1;
359 fseek(a->in, start, SEEK_SET);
360 char buf[1024];
361 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
362 char* p = buf;
363 if(*p == '#' || *p == ';') continue;
364 assert(*p == '"');
365 size_t l = strlen(p);
366 assert(l>1 && p[l-1] == '\n' && p[l-2] == '"');
367 p[l-1] = 0;
368 add_or_get_string(a, p);
370 return 1;
373 static int asm_text(AS *a) {
374 size_t lineno;
375 ssize_t start = find_section(a->in, "text", &lineno);
376 if(start == -1) return 1;
377 fseek(a->in, start, SEEK_SET);
378 char buf[1024];
379 char convbuf[sizeof(buf)]; /* to convert escaped string into non-escaped version */
380 size_t pos = 0;
381 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
382 lineno++;
383 char* p = buf, *pend = buf + sizeof buf;
384 if(*p == '#' || *p == ';') continue;
385 while(isspace(*p) && p < pend) p++;
386 assert(p < pend);
387 if(!*p) continue;
388 char* sym = p;
389 while(!isspace(*p) && p < pend) p++;
390 *p = 0; p++;
391 size_t l = strlen(sym);
392 if(l > 1 && sym[l-1] == ':') {
393 // functionstart or label
394 sym[l-1] = 0;
395 if(memcmp(sym, "label", 5) == 0)
396 add_label(a, sym, pos);
397 else {
398 add_export(a, EXPORT_FUNCTION, sym, pos);
399 ByteArray_writeUnsignedInt(a->code, SCMD_THISBASE);
400 ByteArray_writeUnsignedInt(a->code, pos);
401 pos+=2;
403 continue;
405 unsigned instr = find_insn(sym);
406 if(!instr) {
407 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno, sym);
408 return 0;
410 if(instr == SCMD_THISBASE) continue; /* we emit this instruction ourselves when a new function starts. */
412 ByteArray_writeUnsignedInt(a->code, instr);
413 pos++;
414 size_t arg;
415 for(arg = 0; arg < opcodes[instr].argcount; arg++) {
416 sym = finalize_arg(&p, pend, convbuf, sizeof(convbuf));
417 if(sym == 0) {
418 dprintf(2, "line %zu: error: expected \"\n", lineno);
419 return 0;
421 int value = 0;
422 if(arg < opcodes[instr].regcount) {
423 value=get_reg(sym);
424 if(instr == SCMD_REGTOREG) {
425 /* fix reversed order of arguments */
426 int dst = value;
427 sym = p;
428 while(p < pend && *p != ',' && !isspace(*p)) p++;
429 assert(p < pend);
430 *p = 0;
431 value=get_reg(sym);
432 ByteArray_writeInt(a->code, value);
433 ByteArray_writeInt(a->code, dst);
434 pos += 2;
435 break;
437 } else {
438 switch(instr) {
439 case SCMD_LITTOREG:
440 /* immediate can be function name, string,
441 * variable name, stack fixup, or numeric value */
442 if(sym[0] == '"') {
443 value = get_string_offset(a, add_or_get_string(a, sym));
444 add_fixup(a, FIXUP_STRING, pos);
445 } else if(sym[0] == '@') {
446 value = get_variable_offset(a, sym+1);
447 add_fixup(a, FIXUP_GLOBALDATA, pos);
448 } else if(sym[0] == '.') {
449 if(memcmp(sym+1, "stack", 5)) {
450 dprintf(2, "error: expected stack\n");
451 return 0;
453 sym += 6;
454 while(isspace(*sym) && sym < pend) sym++;
455 assert(sym < pend && *sym == '+');
456 sym++;
457 while(isspace(*sym) && sym < pend) sym++;
458 add_fixup(a, FIXUP_STACK, pos);
459 value = atoi(sym);
460 } else if(isdigit(sym[0]) || sym[0] == '-') {
461 if(sym[0] == '-') assert(isdigit(sym[1]));
462 value = atoi(sym);
463 } else
464 add_function_ref(a, sym, pos);
465 break;
466 case SCMD_JMP: case SCMD_JZ: case SCMD_JNZ:
467 add_label_ref(a, sym, pos);
468 break;
469 default:
470 value = atoi(sym);
473 ByteArray_writeInt(a->code, value);
474 pos++;
478 size_t i;
479 struct label item;
480 for(i = 0; i < List_size(a->label_ref_list); i++) {
481 assert(List_get(a->label_ref_list, i, &item));
482 ByteArray_set_position(a->code, item.insno * 4);
483 int lbl = get_label_offset(a, item.name);
484 assert(lbl >= 0 && lbl < pos);
485 int label_insno = lbl - (item.insno+1); /* offset is calculated from next instruction */
486 ByteArray_writeInt(a->code, label_insno);
488 generate_import_table(a);
489 for(i = 0; i < List_size(a->function_ref_list); i++) {
490 assert(List_get(a->function_ref_list, i, &item));
491 ssize_t imp = get_import_index(a, item.name, strlen(item.name));
492 if(imp == -1) {
493 unsigned off;
494 assert(find_export(a, EXPORT_FUNCTION, item.name, &off));
495 imp = off;
496 add_fixup(a, FIXUP_FUNCTION, item.insno);
497 } else {
498 add_fixup(a, FIXUP_IMPORT, item.insno);
500 assert(imp != -1);
501 ByteArray_set_position(a->code, item.insno * 4);
502 ByteArray_writeInt(a->code, imp);
505 return 1;
507 #include "endianness.h"
508 static void write_int(FILE* o, int val) {
509 #ifndef IS_LITTLE_ENDIAN
510 val = byteswap32(val);
511 #endif
512 fwrite(&val, 4, 1, o);
515 static int fixup_comparefunc(const void *a, const void* b) {
516 const struct fixup* fa = a, *fb = b;
517 if(fa->type == FIXUP_DATADATA && fb->type != FIXUP_DATADATA)
518 return -1;
519 if(fb->type == FIXUP_DATADATA && fa->type != FIXUP_DATADATA)
520 return 1;
521 if(fa->offset < fb->offset) return -1;
522 if(fa->offset == fb->offset) return 0;
523 return 1;
526 static void sort_fixup_list(AS* a) {
527 List_sort(a->fixup_list, fixup_comparefunc);
530 static void write_fixup_list(AS* a, FILE *o) {
531 struct fixup item;
532 size_t i;
533 for(i = 0; i < List_size(a->fixup_list); i++) {
534 assert(List_get(a->fixup_list, i, &item));
535 char type = item.type;
536 fwrite(&type, 1, 1, o);
538 for(i = 0; i < List_size(a->fixup_list); i++) {
539 assert(List_get(a->fixup_list, i, &item));
540 write_int(o, item.offset);
544 static void write_string_section(AS* a, FILE* o) {
545 struct string item;
546 size_t i = 0;
547 for(; i < List_size(a->string_list); i++) {
548 assert(List_get(a->string_list, i, &item));
549 fwrite(item.ptr, item.len + 1, 1, o);
553 static void write_import_section(AS* a, FILE* o) {
554 struct string item;
555 size_t i = 0;
556 for(; i < List_size(a->import_list); i++) {
557 assert(List_get(a->import_list, i, &item));
558 fwrite(item.ptr, item.len + 1, 1, o);
562 static void write_export_section(AS* a, FILE* o) {
563 struct export item;
564 size_t i = 0;
565 for(; i < List_size(a->export_list); i++) {
566 assert(List_get(a->export_list, i, &item));
567 fwrite(item.fn, strlen(item.fn) + 1, 1, o);
568 unsigned encoded = (item.type << 24) | (item.instr &0x00FFFFFF);
569 write_int(o, encoded);
573 static void write_sections_section(AS* a, FILE *o) {
574 //FIXME
577 static int write_object(AS *a, char *out) {
578 FILE *o;
579 if(!(o = fopen(out, "w"))) return 0;
580 fprintf(o, "SCOM");
581 write_int(o, 83); //version
582 write_int(o, ByteArray_get_length(a->data)); // globaldatasize
583 write_int(o, ByteArray_get_length(a->code) / 4); // codesize
584 write_int(o, get_string_section_length(a)); // stringssize
585 size_t l = ByteArray_get_length(a->data);
586 void *p;
587 if(l) {
588 p = mem_getptr(&a->data->source.mem, 0, l); // FIXME dont access directly, use some getter method
589 assert(p);
590 fwrite(p,l,1,o); // globaldata
592 l = ByteArray_get_length(a->code);
593 if(l) {
594 p = mem_getptr(&a->code->source.mem, 0, l);
595 assert(p);
596 fwrite(p,l,1,o); // code
598 write_string_section(a, o);
599 write_int(o, List_size(a->fixup_list));
600 sort_fixup_list(a);
601 write_fixup_list(a, o);
602 if(!List_size(a->import_list)) {
603 /* AGS declares object files with 0 imports as invalid */
604 add_import(a, "");
606 write_int(o, List_size(a->import_list));
607 write_import_section(a, o);
608 write_int(o, List_size(a->export_list));
609 write_export_section(a, o);
610 write_int(o, 0); // FIXME sectioncount
611 write_sections_section(a, o);
612 write_int(o, 0xbeefcafe); // magic end marker.
613 fclose(o);
614 return 1;
617 int AS_assemble(AS* a, char* out) {
618 if(!asm_data(a)) return 0;
619 if(!asm_text(a)) return 0;
620 // if(!asm_strings(a)) return 0; // emitting unneeded strings is not necessary
621 if(!write_object(a, out)) return 0;
622 return 1;
625 static int strptrcmp(const void *a, const void *b) {
626 const char * const *x = a;
627 const char * const *y = b;
628 return strcmp(*x, *y);
630 static unsigned string_hash(const char* s) {
631 uint_fast32_t h = 0;
632 while (*s) {
633 h = 16*h + *s++;
634 h ^= h>>24 & 0xf0;
636 return h & 0xfffffff;
639 void AS_open_stream(AS* a, FILE* f) {
640 memset(a, 0, sizeof *a);
641 a->obj = &a->obj_b;
642 a->data = &a->data_b;
643 a->code = &a->code_b;
644 ByteArray_ctor(a->obj);
645 ByteArray_open_mem(a->obj, 0, 0);
646 ByteArray_ctor(a->data);
647 ByteArray_set_endian(a->data, BAE_LITTLE);
648 ByteArray_set_flags(a->data, BAF_CANGROW);
649 ByteArray_open_mem(a->data, 0, 0);
650 ByteArray_ctor(a->code);
651 ByteArray_set_endian(a->code, BAE_LITTLE);
652 ByteArray_set_flags(a->code, BAF_CANGROW);
653 ByteArray_open_mem(a->code, 0, 0);
655 a->export_list = &a->export_list_b;
656 a->fixup_list = &a->fixup_list_b;
657 a->string_list = &a->string_list_b;
658 a->label_ref_list = &a->label_ref_list_b;
659 a->function_ref_list = &a->function_ref_list_b;
660 a->variable_list = &a->variable_list_b;
661 a->import_list = &a->import_list_b;
662 a->label_map = (void*) &a->label_map_b;
664 List_init(a->export_list, sizeof(struct export));
665 List_init(a->fixup_list , sizeof(struct fixup));
666 List_init(a->string_list, sizeof(struct string));
667 List_init(a->label_ref_list, sizeof(struct label));
668 List_init(a->function_ref_list, sizeof(struct label));
669 List_init(a->variable_list, sizeof(struct variable));
670 List_init(a->import_list, sizeof(struct string));
671 hbmap_init(a->label_map, strptrcmp, string_hash);
673 a->in = f;
676 int AS_open(AS* a, char* fn) {
677 FILE *f = fopen(fn, "r");
678 if(!f) return 0;
679 AS_open_stream(a, f);
680 return 1;
684 void AS_close(AS* a) {
685 fclose(a->in);