agssim: don't accept wrong input when register name expected
[rofl0r-agsutils.git] / Assembler.c
blob679486457222cfbbb4c263c05723189856113e78
1 #define _GNU_SOURCE
2 #include "File.h"
3 #include "ByteArray.h"
4 #include "MemGrow.h"
5 #include "Script_internal.h"
6 #include "List.h"
7 #include <ctype.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <string.h>
11 #include <stdlib.h>
12 #include "Assembler.h"
14 struct fixup {
15 int type;
16 unsigned offset;
19 struct string {
20 size_t len;
21 char* ptr;
24 struct label {
25 char* name;
26 unsigned insno;
29 struct variable {
30 char* name;
31 unsigned vs;
32 unsigned offset;
35 static int add_label(AS *a, char* name, size_t insno) {
36 char* tmp = strdup(name);
37 return hbmap_insert(a->label_map, tmp, (unsigned) insno) != -1;
40 static unsigned get_label_offset(AS *a, char* name) {
41 return *hbmap_get(a->label_map, name);
44 static int add_label_ref(AS *a, char * name, size_t insno) {
45 /* add reference to named label to a list. after the first pass
46 * over the code these locations have to be fixed with the offset
47 * of the label. */
48 struct label item = { .name = strdup(name), .insno = insno };
49 assert(item.name);
50 return List_add(a->label_ref_list, &item);
53 static int add_function_ref(AS *a, char* name, size_t insno) {
54 /* add reference to named function to a list. after the first pass
55 * over the code these locations have to be fixed with the offset
56 * of the label. */
57 struct label item = { .name = strdup(name), .insno = insno };
58 assert(item.name);
59 return List_add(a->function_ref_list, &item);
62 static int add_export(AS *a, int type, char* name, size_t offset) {
63 struct export item = { .fn = strdup(name), .instr = offset, .type = type};
64 assert(item.fn);
65 return List_add(a->export_list, &item);
68 static int add_fixup(AS *a, int type, size_t offset) {
69 struct fixup item = {.type = type, .offset = offset};
70 /* offset equals instruction number for non-DATADATA fixups */
71 return List_add(a->fixup_list, &item);
74 static int add_or_get_string(AS* a, char* str) {
75 /* return index of string in string table
76 * add to string table if not yet existing */
77 str++; /* leading '"' */
78 size_t l = strlen(str);
79 l--;
80 str[l] = 0; /* trailing '"' */
81 struct string item = {.ptr = str, .len = l }, iter;
82 size_t i = 0;
83 for(; i < List_size(a->string_list); i++) {
84 assert(List_get(a->string_list, i, &iter));
85 if(iter.len == item.len && !strcmp(iter.ptr, str)) {
86 return i;
89 item.ptr = strdup(str);
90 List_add(a->string_list, &item);
91 return List_size(a->string_list) -1;
94 static unsigned get_string_offset(AS *a, unsigned index) {
95 assert(index < List_size(a->string_list));
96 unsigned i = 0, ret = 0;
97 struct string item;
98 for(; i < index; i++) {
99 assert(List_get(a->string_list, i, &item));
100 ret += item.len + 1;
102 return ret;
105 static size_t get_string_section_length(AS* a) {
106 struct string item;;
107 size_t i = 0, l = 0;
108 for(; i < List_size(a->string_list); i++) {
109 assert(List_get(a->string_list, i, &item));
110 l += item.len + 1;
112 return l;
115 static int add_variable(AS *a, char* name, unsigned vs, size_t offset) {
116 struct variable item = { .name = strdup(name), .vs = vs, .offset = offset };
117 return List_add(a->variable_list, &item);
120 static int get_variable_offset(AS* a, char* name) {
121 /* return globaldata offset of named variable */
122 size_t i = 0;
123 struct variable item;
124 for(; i < List_size(a->variable_list); i++) {
125 assert(List_get(a->variable_list, i, &item));
126 if(!strcmp(item.name, name))
127 return item.offset;
129 assert(0);
130 return 0;
133 static ssize_t find_section(FILE* in, char* name, size_t *lineno) {
134 char buf[1024];
135 size_t off = 0, l = strlen(name);
136 *lineno = 0;
137 fseek(in, 0, SEEK_SET);
138 while(fgets(buf, sizeof buf, in)) {
139 *lineno = *lineno +1;
140 off += strlen(buf);
141 if(buf[0] == '.' && memcmp(name, buf + 1, l) == 0)
142 return off;
144 return -1;
147 static int asm_data(AS* a) {
148 size_t lineno;
149 ssize_t start = find_section(a->in, "data", &lineno);
150 if(start == -1) return 1; // it is valid for .s file to only have .text
151 fseek(a->in, start, SEEK_SET);
152 char buf[1024];
153 size_t data_pos = 0;
154 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
155 if(buf[0] == '\n') continue;
156 char* p = buf, *pend = buf + sizeof buf, *var;
157 int exportflag = 0;
158 unsigned vs = 0;
159 if(*p == '#' || *p == ';') continue;
160 while(isspace(*p) && p < pend) p++;
161 if(!memcmp(p, "export", 6) && isspace(p[6])) {
162 p += 7;
163 exportflag = 1;
164 while(isspace(*p) && p < pend) p++;
166 if(memcmp(p, "int", 3) == 0)
167 vs = 4;
168 else if(memcmp(p, "short", 5) == 0)
169 vs = 2;
170 else if(memcmp(p, "char", 4) == 0) {
171 vs = 1;
172 if(p[4] == '[') {
173 vs = atoi(p+5);
174 char *q = p+5;
175 while(isdigit(*q) && q < pend) q++;
176 if(vs == 0 || *q != ']') {
177 dprintf(2, "error: expected number > 0 and ']' after '['\n");
178 return 0;
181 else vs = 1;
182 } else if(memcmp(p, "string", 6) == 0)
183 vs = 200;
184 else {
185 dprintf(2, "error: expected int, short, char, or string\n");
186 return 0;
188 while(!isspace(*p) && p < pend) p++;
189 while(isspace(*p) && p < pend) p++;
190 var = p;
191 while(!isspace(*p) && p < pend) p++;
192 *p = 0; p++;
193 assert(p < pend && *p == '=');
194 p++; while(isspace(*p) && p < pend) p++;
195 assert(p < pend);
196 int value;
198 if(*p == '.') {
199 p++;
200 if(memcmp(p, "data", 4) == 0) {
201 p += 4;
202 while(isspace(*p) && p < pend) p++;
203 assert(p < pend && *p == '+');
204 p++;
205 while(isspace(*p) && p < pend) p++;
206 value = atoi(p);
207 add_fixup(a, FIXUP_DATADATA, data_pos);
208 goto write_var;
209 } else {
210 dprintf(2, "error: expected \"data\"\n");
211 return 0;
213 } else {
214 value = atoi(p);
215 write_var:
216 switch (vs) {
217 default:
218 for(value = vs; value >= 10; value-=10)
219 ByteArray_writeMem(a->data, (void*)"\0\0\0\0\0\0\0\0\0\0", 10);
220 while(value--) ByteArray_writeUnsignedByte(a->data, 0);
221 break;
222 case 4:
223 ByteArray_writeInt(a->data, value);
224 break;
225 case 2:
226 ByteArray_writeShort(a->data, value);
227 break;
228 case 1:
229 ByteArray_writeUnsignedByte(a->data, value);
230 break;
233 if(exportflag) add_export(a, EXPORT_DATA, var, data_pos);
234 add_variable(a, var, vs, data_pos);
235 data_pos += vs;
237 return 1;
240 ssize_t get_import_index(AS* a, char* name, size_t len) {
241 size_t i;
242 struct string item;
243 for(i = 0; i < List_size(a->import_list); i++) {
244 assert(List_get(a->import_list, i, &item));
245 if(len == item.len && !strcmp(name, item.ptr)) return i;
247 return -1;
250 void add_import(AS *a, char* name) {
251 size_t l = strlen(name);
252 if(get_import_index(a, name, l) != -1) return;
253 struct string item;
254 item.ptr = strdup(name);
255 item.len = l;
256 List_add(a->import_list, &item);
259 static int find_export(AS *a, int type, char* name, unsigned *offset) {
260 struct export item;
261 size_t i;
262 for(i = 0; i < List_size(a->export_list); i++) {
263 assert(List_get(a->export_list, i, &item));
264 if(item.type == type && !strcmp(name, item.fn)) {
265 *offset = item.instr;
266 return 1;
269 return 0;
272 void generate_import_table(AS *a) {
273 size_t i;
274 struct label item;
275 unsigned off;
276 for(i = 0; i < List_size(a->function_ref_list); i++) {
277 assert(List_get(a->function_ref_list, i, &item));
278 if(!find_export(a, EXPORT_FUNCTION, item.name, &off))
279 add_import(a, item.name);
283 #include "ags_cpu.h"
285 int get_reg(char* regname) {
286 int i = AR_NULL + 1;
287 for(; i < AR_MAX; i++)
288 if(strcmp(regnames[i], regname) == 0)
289 return i;
290 return AR_NULL;
293 static size_t mnemolen[SCMD_MAX];
294 static int mnemolen_initdone = 0;
296 static void init_mnemolen(void) {
297 size_t i = 0;
298 for(; i< SCMD_MAX; i++)
299 mnemolen[i] = strlen(opcodes[i].mnemonic);
300 mnemolen_initdone = 1;
303 static unsigned find_insn(char* sym) {
304 if(!mnemolen_initdone) init_mnemolen();
305 size_t i = 0, l = strlen(sym);
306 for(; i< SCMD_MAX; i++)
307 if(l == mnemolen[i] && memcmp(sym, opcodes[i].mnemonic, l) == 0)
308 return i;
309 return 0;
312 #include "StringEscape.h"
313 /* expects a pointer to the first char after a opening " in a string,
314 * converts the string into convbuf, and returns the length of that string */
315 static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
316 size_t result = 0;
317 char* e = x + strlen(x);
318 assert(e > x && e < end && *e == 0);
319 e--;
320 while(isspace(*e)) e--;
321 if(*e != '"') return (size_t) -1;
322 *e = 0;
323 result = unescape(x, convbuf, convbuflen);
324 return result;
327 /* sets lets char in arg to 0, and advances pointer till the next argstart */
328 static char* finalize_arg(char **p, char* pend, char* convbuf, size_t convbuflen) {
329 if(**p == '"') {
330 convbuf[0] = '"';
331 size_t l= get_length_and_convert(*p + 1, pend, convbuf+1, convbuflen - 1);
332 if(l == (size_t) -1) return 0;
333 convbuf[l+1] = '"';
334 convbuf[l+2] = 0;
335 *p = 0; /* make it crash if its accessed again, since a string should always be the last arg */
336 return convbuf;
337 } else {
338 char* ret = *p;
339 while(*p < pend && **p != ',' && !isspace(**p)) (*p)++;
340 assert(*p < pend);
341 **p = 0; (*p)++;
342 while(*p < pend && isspace(**p)) (*p)++;
343 assert(*p < pend);
344 return ret;
348 static int asm_strings(AS *a) {
349 /* add strings in .strings section, even when they're not used from .text */
350 size_t lineno;
351 ssize_t start = find_section(a->in, "strings", &lineno);
352 if(start == -1) return 1;
353 fseek(a->in, start, SEEK_SET);
354 char buf[1024];
355 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
356 char* p = buf;
357 if(*p == '#' || *p == ';') continue;
358 assert(*p == '"');
359 size_t l = strlen(p);
360 assert(l>1 && p[l-1] == '\n' && p[l-2] == '"');
361 p[l-1] = 0;
362 add_or_get_string(a, p);
364 return 1;
367 static int asm_text(AS *a) {
368 size_t lineno;
369 ssize_t start = find_section(a->in, "text", &lineno);
370 if(start == -1) return 1;
371 fseek(a->in, start, SEEK_SET);
372 char buf[1024];
373 char convbuf[sizeof(buf)]; /* to convert escaped string into non-escaped version */
374 size_t pos = 0;
375 while(fgets(buf, sizeof buf, a->in) && buf[0] != '.') {
376 lineno++;
377 char* p = buf, *pend = buf + sizeof buf;
378 if(*p == '#' || *p == ';') continue;
379 while(isspace(*p) && p < pend) p++;
380 assert(p < pend);
381 if(!*p) continue;
382 char* sym = p;
383 while(!isspace(*p) && p < pend) p++;
384 *p = 0; p++;
385 size_t l = strlen(sym);
386 if(l > 1 && sym[l-1] == ':') {
387 // functionstart or label
388 sym[l-1] = 0;
389 if(memcmp(sym, "label", 5) == 0)
390 add_label(a, sym, pos);
391 else {
392 add_export(a, EXPORT_FUNCTION, sym, pos);
393 ByteArray_writeUnsignedInt(a->code, SCMD_THISBASE);
394 ByteArray_writeUnsignedInt(a->code, pos);
395 pos+=2;
397 continue;
399 unsigned instr = find_insn(sym);
400 if(!instr) {
401 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno, sym);
402 return 0;
404 if(instr == SCMD_THISBASE) continue; /* we emit this instruction ourselves when a new function starts. */
406 ByteArray_writeUnsignedInt(a->code, instr);
407 pos++;
408 size_t arg;
409 for(arg = 0; arg < opcodes[instr].argcount; arg++) {
410 sym = finalize_arg(&p, pend, convbuf, sizeof(convbuf));
411 if(sym == 0) {
412 dprintf(2, "line %zu: error: expected \"\n", lineno);
413 return 0;
415 int value = 0;
416 if(arg < opcodes[instr].regcount) {
417 value=get_reg(sym);
418 if(instr == SCMD_REGTOREG) {
419 /* fix reversed order of arguments */
420 int dst = value;
421 sym = p;
422 while(p < pend && *p != ',' && !isspace(*p)) p++;
423 assert(p < pend);
424 *p = 0;
425 value=get_reg(sym);
426 ByteArray_writeInt(a->code, value);
427 ByteArray_writeInt(a->code, dst);
428 pos += 2;
429 break;
431 } else {
432 switch(instr) {
433 case SCMD_LITTOREG:
434 /* immediate can be function name, string,
435 * variable name, stack fixup, or numeric value */
436 if(sym[0] == '"') {
437 value = get_string_offset(a, add_or_get_string(a, sym));
438 add_fixup(a, FIXUP_STRING, pos);
439 } else if(sym[0] == '@') {
440 value = get_variable_offset(a, sym+1);
441 add_fixup(a, FIXUP_GLOBALDATA, pos);
442 } else if(sym[0] == '.') {
443 if(memcmp(sym+1, "stack", 5)) {
444 dprintf(2, "error: expected stack\n");
445 return 0;
447 sym += 6;
448 while(isspace(*sym) && sym < pend) sym++;
449 assert(sym < pend && *sym == '+');
450 sym++;
451 while(isspace(*sym) && sym < pend) sym++;
452 add_fixup(a, FIXUP_STACK, pos);
453 value = atoi(sym);
454 } else if(isdigit(sym[0]) || sym[0] == '-') {
455 if(sym[0] == '-') assert(isdigit(sym[1]));
456 value = atoi(sym);
457 } else
458 add_function_ref(a, sym, pos);
459 break;
460 case SCMD_JMP: case SCMD_JZ: case SCMD_JNZ:
461 add_label_ref(a, sym, pos);
462 break;
463 default:
464 value = atoi(sym);
467 ByteArray_writeInt(a->code, value);
468 pos++;
472 size_t i;
473 struct label item;
474 for(i = 0; i < List_size(a->label_ref_list); i++) {
475 assert(List_get(a->label_ref_list, i, &item));
476 ByteArray_set_position(a->code, item.insno * 4);
477 int lbl = get_label_offset(a, item.name);
478 assert(lbl >= 0 && lbl < pos);
479 int label_insno = lbl - (item.insno+1); /* offset is calculated from next instruction */
480 ByteArray_writeInt(a->code, label_insno);
482 generate_import_table(a);
483 for(i = 0; i < List_size(a->function_ref_list); i++) {
484 assert(List_get(a->function_ref_list, i, &item));
485 ssize_t imp = get_import_index(a, item.name, strlen(item.name));
486 if(imp == -1) {
487 unsigned off;
488 assert(find_export(a, EXPORT_FUNCTION, item.name, &off));
489 imp = off;
490 add_fixup(a, FIXUP_FUNCTION, item.insno);
491 } else {
492 add_fixup(a, FIXUP_IMPORT, item.insno);
494 assert(imp != -1);
495 ByteArray_set_position(a->code, item.insno * 4);
496 ByteArray_writeInt(a->code, imp);
499 return 1;
501 #include "endianness.h"
502 static void write_int(FILE* o, int val) {
503 #ifndef IS_LITTLE_ENDIAN
504 val = byteswap32(val);
505 #endif
506 fwrite(&val, 4, 1, o);
509 static int fixup_comparefunc(const void *a, const void* b) {
510 const struct fixup* fa = a, *fb = b;
511 if(fa->type == FIXUP_DATADATA && fb->type != FIXUP_DATADATA)
512 return -1;
513 if(fb->type == FIXUP_DATADATA && fa->type != FIXUP_DATADATA)
514 return 1;
515 if(fa->offset < fb->offset) return -1;
516 if(fa->offset == fb->offset) return 0;
517 return 1;
520 static void sort_fixup_list(AS* a) {
521 List_sort(a->fixup_list, fixup_comparefunc);
524 static void write_fixup_list(AS* a, FILE *o) {
525 struct fixup item;
526 size_t i;
527 for(i = 0; i < List_size(a->fixup_list); i++) {
528 assert(List_get(a->fixup_list, i, &item));
529 char type = item.type;
530 fwrite(&type, 1, 1, o);
532 for(i = 0; i < List_size(a->fixup_list); i++) {
533 assert(List_get(a->fixup_list, i, &item));
534 write_int(o, item.offset);
538 static void write_string_section(AS* a, FILE* o) {
539 struct string item;
540 size_t i = 0;
541 for(; i < List_size(a->string_list); i++) {
542 assert(List_get(a->string_list, i, &item));
543 fwrite(item.ptr, item.len + 1, 1, o);
547 static void write_import_section(AS* a, FILE* o) {
548 struct string item;
549 size_t i = 0;
550 for(; i < List_size(a->import_list); i++) {
551 assert(List_get(a->import_list, i, &item));
552 fwrite(item.ptr, item.len + 1, 1, o);
556 static void write_export_section(AS* a, FILE* o) {
557 struct export item;
558 size_t i = 0;
559 for(; i < List_size(a->export_list); i++) {
560 assert(List_get(a->export_list, i, &item));
561 fwrite(item.fn, strlen(item.fn) + 1, 1, o);
562 unsigned encoded = (item.type << 24) | (item.instr &0x00FFFFFF);
563 write_int(o, encoded);
567 static void write_sections_section(AS* a, FILE *o) {
568 //FIXME
571 static int write_object(AS *a, char *out) {
572 FILE *o;
573 if(!(o = fopen(out, "w"))) return 0;
574 fprintf(o, "SCOM");
575 write_int(o, 83); //version
576 write_int(o, ByteArray_get_length(a->data)); // globaldatasize
577 write_int(o, ByteArray_get_length(a->code) / 4); // codesize
578 write_int(o, get_string_section_length(a)); // stringssize
579 size_t l = ByteArray_get_length(a->data);
580 void *p;
581 if(l) {
582 p = mem_getptr(&a->data->source.mem, 0, l); // FIXME dont access directly, use some getter method
583 assert(p);
584 fwrite(p,l,1,o); // globaldata
586 l = ByteArray_get_length(a->code);
587 if(l) {
588 p = mem_getptr(&a->code->source.mem, 0, l);
589 assert(p);
590 fwrite(p,l,1,o); // code
592 write_string_section(a, o);
593 write_int(o, List_size(a->fixup_list));
594 sort_fixup_list(a);
595 write_fixup_list(a, o);
596 if(!List_size(a->import_list)) {
597 /* AGS declares object files with 0 imports as invalid */
598 add_import(a, "");
600 write_int(o, List_size(a->import_list));
601 write_import_section(a, o);
602 write_int(o, List_size(a->export_list));
603 write_export_section(a, o);
604 write_int(o, 0); // FIXME sectioncount
605 write_sections_section(a, o);
606 write_int(o, 0xbeefcafe); // magic end marker.
607 fclose(o);
608 return 1;
611 int AS_assemble(AS* a, char* out) {
612 if(!asm_data(a)) return 0;
613 if(!asm_text(a)) return 0;
614 // if(!asm_strings(a)) return 0; // emitting unneeded strings is not necessary
615 if(!write_object(a, out)) return 0;
616 return 1;
619 static int strptrcmp(const void *a, const void *b) {
620 const char * const *x = a;
621 const char * const *y = b;
622 return strcmp(*x, *y);
624 static unsigned string_hash(const char* s) {
625 uint_fast32_t h = 0;
626 while (*s) {
627 h = 16*h + *s++;
628 h ^= h>>24 & 0xf0;
630 return h & 0xfffffff;
633 void AS_open_stream(AS* a, FILE* f) {
634 memset(a, 0, sizeof *a);
635 a->obj = &a->obj_b;
636 a->data = &a->data_b;
637 a->code = &a->code_b;
638 ByteArray_ctor(a->obj);
639 ByteArray_open_mem(a->obj, 0, 0);
640 ByteArray_ctor(a->data);
641 ByteArray_set_endian(a->data, BAE_LITTLE);
642 ByteArray_set_flags(a->data, BAF_CANGROW);
643 ByteArray_open_mem(a->data, 0, 0);
644 ByteArray_ctor(a->code);
645 ByteArray_set_endian(a->code, BAE_LITTLE);
646 ByteArray_set_flags(a->code, BAF_CANGROW);
647 ByteArray_open_mem(a->code, 0, 0);
649 a->export_list = &a->export_list_b;
650 a->fixup_list = &a->fixup_list_b;
651 a->string_list = &a->string_list_b;
652 a->label_ref_list = &a->label_ref_list_b;
653 a->function_ref_list = &a->function_ref_list_b;
654 a->variable_list = &a->variable_list_b;
655 a->import_list = &a->import_list_b;
656 a->label_map = (void*) &a->label_map_b;
658 List_init(a->export_list, sizeof(struct export));
659 List_init(a->fixup_list , sizeof(struct fixup));
660 List_init(a->string_list, sizeof(struct string));
661 List_init(a->label_ref_list, sizeof(struct label));
662 List_init(a->function_ref_list, sizeof(struct label));
663 List_init(a->variable_list, sizeof(struct variable));
664 List_init(a->import_list, sizeof(struct string));
665 hbmap_init(a->label_map, strptrcmp, string_hash);
667 a->in = f;
670 int AS_open(AS* a, char* fn) {
671 FILE *f = fopen(fn, "r");
672 if(!f) return 0;
673 AS_open_stream(a, f);
674 return 1;
678 void AS_close(AS* a) {
679 fclose(a->in);