disassembler: fix bug omitting exported vars not used in module
[rofl0r-agsutils.git] / agssim.c
blob7aede101fa6424d94e4563ec7d01fcc1f9952168
1 #include <stdio.h>
2 #include <ctype.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <assert.h>
7 #include "ags_cpu.h"
9 enum RegisterAccess {
10 RA_NONE = 0,
11 RA_READ = 1 << 0,
12 RA_WRITE = 1 << 1,
13 RA_READWRITE = 1 << 2,
16 struct regaccess_info {
17 /* enum RegisterAccess */ unsigned char ra_reg1;
18 /* enum RegisterAccess */ unsigned char ra_reg2;
19 } __attribute__((packed));
21 static const struct regaccess_info regaccess_info[] = {
22 [0] = {RA_NONE, RA_NONE},
23 [SCMD_ADD] = {RA_READWRITE, RA_NONE},
24 [SCMD_SUB] = {RA_READWRITE, RA_NONE},
25 [SCMD_REGTOREG] = {RA_READ, RA_WRITE},
26 [SCMD_WRITELIT] = {RA_NONE, RA_NONE}, // TODO
27 [SCMD_RET] = {RA_NONE, RA_NONE},
28 [SCMD_LITTOREG] = {RA_WRITE, RA_NONE},
29 [SCMD_MEMREAD] = {RA_WRITE, RA_NONE},
30 [SCMD_MEMWRITE] = {RA_READ, RA_NONE},
31 [SCMD_MULREG] = {RA_READWRITE, RA_READ},
32 [SCMD_DIVREG] = {RA_READWRITE, RA_READ},
33 [SCMD_ADDREG] = {RA_READWRITE, RA_READ},
34 [SCMD_SUBREG] = {RA_READWRITE, RA_READ},
35 [SCMD_BITAND] = {RA_READWRITE, RA_READ},
36 [SCMD_BITOR] = {RA_READWRITE, RA_READ},
37 [SCMD_ISEQUAL] = {RA_READWRITE, RA_READ},
38 [SCMD_NOTEQUAL] = {RA_READWRITE, RA_READ},
39 [SCMD_GREATER] = {RA_READWRITE, RA_READ},
40 [SCMD_LESSTHAN] = {RA_READWRITE, RA_READ},
41 [SCMD_GTE] = {RA_READWRITE, RA_READ},
42 [SCMD_LTE] = {RA_READWRITE, RA_READ},
43 [SCMD_AND] = {RA_READWRITE, RA_READ}, /*logical*/
44 [SCMD_OR] = {RA_READWRITE, RA_READ},
45 [SCMD_CALL] = {RA_READ, RA_NONE},
46 [SCMD_MEMREADB] = {RA_WRITE, RA_NONE},
47 [SCMD_MEMREADW] = {RA_WRITE, RA_NONE},
48 [SCMD_MEMWRITEB] = {RA_READ, RA_NONE},
49 [SCMD_MEMWRITEW] = {RA_READ, RA_NONE},
50 [SCMD_JZ] = {RA_READ, RA_NONE},
51 [SCMD_PUSHREG] = {RA_READ, RA_NONE},
52 [SCMD_POPREG] = {RA_WRITE, RA_NONE},
53 [SCMD_JMP] = {RA_READ, RA_NONE},
54 [SCMD_MUL] = {RA_READWRITE, RA_NONE},
55 [SCMD_CALLEXT] = {RA_READ, RA_NONE},
56 [SCMD_PUSHREAL] = {RA_READ, RA_NONE},
57 [SCMD_SUBREALSTACK] = {RA_READ, RA_NONE},
58 [SCMD_LINENUM] = {RA_NONE, RA_NONE},
59 [SCMD_CALLAS] = {RA_READ, RA_NONE},
60 [SCMD_THISBASE] = {RA_NONE, RA_NONE},
61 [SCMD_NUMFUNCARGS] = {RA_NONE, RA_NONE},
62 [SCMD_MODREG] = {RA_READWRITE, RA_READ},
63 [SCMD_XORREG] = {RA_READWRITE, RA_READ},
64 [SCMD_NOTREG] = {RA_READWRITE, RA_READ},
65 [SCMD_SHIFTLEFT] = {RA_READWRITE, RA_READ},
66 [SCMD_SHIFTRIGHT] = {RA_READWRITE, RA_READ},
67 [SCMD_CALLOBJ] = {RA_READ, RA_NONE},
68 [SCMD_CHECKBOUNDS] = {RA_READ, RA_NONE},
69 [SCMD_MEMWRITEPTR] = {RA_NONE, RA_NONE}, //TODO
70 [SCMD_MEMREADPTR] = {RA_NONE, RA_NONE}, //TODO
71 [SCMD_MEMZEROPTR] = {RA_NONE, RA_NONE},
72 [SCMD_MEMINITPTR] = {RA_NONE, RA_NONE}, //TODO
73 [SCMD_LOADSPOFFS] = {RA_NONE, RA_NONE},
74 [SCMD_CHECKNULL] = {RA_NONE, RA_NONE},
75 [SCMD_FADD] = {RA_READWRITE, RA_NONE},
76 [SCMD_FSUB] = {RA_READWRITE, RA_NONE},
77 [SCMD_FMULREG] = {RA_READWRITE, RA_READ},
78 [SCMD_FDIVREG] = {RA_READWRITE, RA_READ},
79 [SCMD_FADDREG] = {RA_READWRITE, RA_READ},
80 [SCMD_FSUBREG] = {RA_READWRITE, RA_READ},
81 [SCMD_FGREATER] = {RA_READWRITE, RA_READ},
82 [SCMD_FLESSTHAN] = {RA_READWRITE, RA_READ},
83 [SCMD_FGTE] = {RA_READWRITE, RA_READ},
84 [SCMD_FLTE] = {RA_READWRITE, RA_READ},
85 [SCMD_ZEROMEMORY] = {RA_NONE, RA_NONE},
86 [SCMD_CREATESTRING] = {RA_NONE, RA_NONE}, //TODO
87 [SCMD_STRINGSEQUAL] = {RA_READWRITE, RA_READ},
88 [SCMD_STRINGSNOTEQ] = {RA_READWRITE, RA_READ},
89 [SCMD_CHECKNULLREG] = {RA_NONE, RA_NONE}, //TODO
90 [SCMD_LOOPCHECKOFF] = {RA_NONE, RA_NONE},
91 [SCMD_MEMZEROPTRND] = {RA_NONE, RA_NONE},
92 [SCMD_JNZ] = {RA_NONE, RA_NONE},
93 [SCMD_DYNAMICBOUNDS] = {RA_NONE, RA_NONE}, //TODO
94 [SCMD_NEWARRAY] = {RA_NONE, RA_NONE}, //TODO
97 #ifndef MAX
98 #define MAX(a, b) ((a) > (b) ? (a) : (b))
99 #define MIN(a, b) ((a) < (b) ? (a) : (b))
100 #endif
102 /* TODO: move duplicate code from Assembler.c into separate TU */
103 static int get_reg(char* regname) {
104 int i = AR_NULL + 1;
105 for(; i < AR_MAX; i++)
106 if(strcmp(regnames[i], regname) == 0)
107 return i;
108 return AR_NULL;
111 static size_t mnemolen[SCMD_MAX];
112 static int mnemolen_initdone = 0;
114 static void init_mnemolen(void) {
115 size_t i = 0;
116 for(; i< SCMD_MAX; i++)
117 mnemolen[i] = strlen(opcodes[i].mnemonic);
118 mnemolen_initdone = 1;
121 static unsigned find_insn(char* sym) {
122 if(!mnemolen_initdone) init_mnemolen();
123 size_t i = 0, l = strlen(sym);
124 for(; i< SCMD_MAX; i++)
125 if(l == mnemolen[i] && memcmp(sym, opcodes[i].mnemonic, l) == 0)
126 return i;
127 return 0;
130 #include "StringEscape.h"
131 /* expects a pointer to the first char after a opening " in a string,
132 * converts the string into convbuf, and returns the length of that string */
133 static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
134 size_t result = 0;
135 char* e = x + strlen(x);
136 assert(e > x && e < end && *e == 0);
137 e--;
138 while(isspace(*e)) e--;
139 if(*e != '"') return (size_t) -1;
140 *e = 0;
141 result = unescape(x, convbuf, convbuflen);
142 return result;
145 /* sets lets char in arg to 0, and advances pointer till the next argstart */
146 static char* finalize_arg(char **p, char* pend, char* convbuf, size_t convbuflen) {
147 if(**p == '"') {
148 convbuf[0] = '"';
149 size_t l= get_length_and_convert(*p + 1, pend, convbuf+1, convbuflen - 1);
150 if(l == (size_t) -1) return 0;
151 convbuf[l+1] = '"';
152 convbuf[l+2] = 0;
153 *p = 0; /* make it crash if its accessed again, since a string should always be the last arg */
154 return convbuf;
155 } else {
156 char* ret = *p;
157 while(*p < pend && **p != ',' && !isspace(**p)) (*p)++;
158 assert(*p < pend);
159 **p = 0; (*p)++;
160 while(*p < pend && isspace(**p)) (*p)++;
161 assert(*p < pend);
162 return ret;
167 static struct text_segment {
168 int *code;
169 size_t len;
170 size_t capa;
171 } text;
173 enum RegisterUsage {
174 RU_NONE = 0,
175 RU_READ = 1 << 0,
176 RU_WRITE = 1 << 1,
177 RU_WRITE_AFTER_READ = 1 << 2,
180 static struct rval {
181 union {
182 int i;
183 float f;
185 enum RegisterUsage ru;
186 } registers[AR_MAX];
188 static unsigned char stack_mem[1000*4];
189 #define memory stack_mem
191 static int canread(int index, int cnt) {
192 return index >= 0 && index+cnt < sizeof(memory)/sizeof(memory[0]);
195 static void grow_text(size_t req) {
196 if(text.len + req > text.capa) {
197 text.code = realloc(text.code, (text.capa+1024)*sizeof(int));
198 text.capa += 1024;
202 static void append_code(int *code, size_t cnt) {
203 grow_text(cnt);
204 size_t i;
205 for(i = 0; i < cnt; i++) {
206 text.code[text.len++] = code[i];
210 static void vm_init() {
211 size_t i;
212 /* initialize registers to an easily recognisable junk value */
213 for(i = AR_NULL + 1; i < AR_MAX; i++) {
214 registers[i].i = 2222222222;
215 registers[i].ru = RU_NONE;
217 registers[AR_SP].i = 0;
218 registers[AR_NULL].i = 0;
221 static inline int consume_int(int **eip) {
222 *eip = *eip+1;
223 return **eip;
226 static void change_reg_usage(int regno, enum RegisterAccess ra) {
227 enum RegisterUsage ru = registers[regno].ru;
228 switch(ra) {
229 case RA_READ:
230 if(ru == RU_NONE || ru == RU_READ) ru = RU_READ;
231 else if(ru == RU_WRITE);
232 else if(ru == RU_WRITE_AFTER_READ);
233 break;
234 case RA_WRITE:
235 if(ru == RU_NONE || ru == RU_WRITE) ru = RU_WRITE;
236 else if(ru == RU_READ) ru = RU_WRITE_AFTER_READ;
237 else if(ru == RU_WRITE_AFTER_READ);
238 break;
239 case RA_READWRITE:
240 if(ru == RU_NONE || ru == RU_READ) ru = RU_WRITE_AFTER_READ;
241 else if(ru == RU_WRITE);
242 else if(ru == RU_WRITE_AFTER_READ);
243 break;
245 registers[regno].ru = ru;
248 static void vm_update_register_usage(int *eip) {
249 const struct regaccess_info *ri = &regaccess_info[*eip];
250 if(ri->ra_reg1) change_reg_usage(eip[1], ri->ra_reg1);
251 if(ri->ra_reg2) change_reg_usage(eip[2], ri->ra_reg2);
254 static void write_mem(int off, int val) {
255 int *m = (void*) memory;
256 m[off/4] = val;
259 static int read_mem(int off) {
260 int *m = (void*) memory;
261 return m[off/4];
264 #define CODE_INT(X) eip[X]
265 #define CODE_FLOAT(X) ((float*)eip)[X]
266 #define REGI(X) registers[CODE_INT(X)].i
267 #define REGF(X) registers[CODE_INT(X)].f
269 static void vm_step() {
270 /* we use register AR_NULL as instruction pointer */
271 int *eip = &text.code[registers[AR_NULL].i];
272 int eip_inc = 1 + opcodes[*eip].argcount;
273 int tmp;
274 vm_update_register_usage(eip);
276 switch(*eip) {
277 case SCMD_ADD:
278 REGI(1) += CODE_INT(2);
279 break;
280 case SCMD_SUB:
281 REGI(1) -= CODE_INT(2);
282 break;
283 case SCMD_REGTOREG:
284 REGI(2) = REGI(1);
285 break;
286 case SCMD_LITTOREG:
287 REGI(1) = CODE_INT(2);
288 break;
289 case SCMD_MULREG:
290 REGI(1) *= REGI(2);
291 break;
292 case SCMD_DIVREG:
293 REGI(1) /= REGI(2);
294 break;
295 case SCMD_ADDREG:
296 REGI(1) += REGI(2);
297 break;
298 case SCMD_SUBREG:
299 REGI(1) -= REGI(2);
300 break;
301 case SCMD_BITAND:
302 REGI(1) &= REGI(2);
303 break;
304 case SCMD_BITOR:
305 REGI(1) &= REGI(2);
306 break;
307 case SCMD_ISEQUAL:
308 REGI(1) = !!(REGI(1) == REGI(2));
309 break;
310 case SCMD_NOTEQUAL:
311 REGI(1) = !!(REGI(1) != REGI(2));
312 break;
313 case SCMD_GREATER:
314 REGI(1) = !!(REGI(1) > REGI(2));
315 break;
316 case SCMD_LESSTHAN:
317 REGI(1) = !!(REGI(1) < REGI(2));
318 break;
319 case SCMD_GTE:
320 REGI(1) = !!(REGI(1) >= REGI(2));
321 break;
322 case SCMD_LTE:
323 REGI(1) = !!(REGI(1) <= REGI(2));
324 break;
325 case SCMD_AND:
326 REGI(1) = !!(REGI(1) && REGI(2));
327 break;
328 case SCMD_OR:
329 REGI(1) = !!(REGI(1) || REGI(2));
330 break;
331 case SCMD_LOADSPOFFS:
332 registers[AR_MAR].i = registers[AR_SP].i - CODE_INT(1);
333 break;
334 case SCMD_PUSHREG:
335 write_mem(registers[AR_SP].i, REGI(1));
336 registers[AR_SP].i += 4;
337 break;
338 case SCMD_POPREG:
339 registers[AR_SP].i -= 4;
340 REGI(1) = read_mem(registers[AR_SP].i);
341 break;
342 case SCMD_MUL:
343 REGI(1) *= CODE_INT(2);
344 break;
345 case SCMD_THISBASE:
346 case SCMD_LINENUM:
347 break;
348 case SCMD_MODREG:
349 REGI(1) %= REGI(2);
350 break;
351 case SCMD_XORREG:
352 REGI(1) ^= REGI(2);
353 break;
354 case SCMD_NOTREG:
355 REGI(1) = !REGI(2);
356 break;
357 case SCMD_SHIFTLEFT:
358 REGI(1) <<= REGI(2);
359 break;
360 case SCMD_SHIFTRIGHT:
361 REGI(1) >>= REGI(2);
362 break;
363 case SCMD_FADD:
364 REGF(1) += CODE_FLOAT(2);
365 break;
366 case SCMD_FSUB:
367 REGF(1) -= CODE_FLOAT(2);
368 break;
369 case SCMD_FMULREG:
370 REGF(1) *= REGF(2);
371 break;
372 case SCMD_FDIVREG:
373 REGF(1) /= REGF(2);
374 break;
375 case SCMD_FADDREG:
376 REGF(1) += REGF(2);
377 break;
378 case SCMD_FSUBREG:
379 REGF(1) -= REGF(2);
380 break;
381 case SCMD_FGREATER:
382 REGI(1) = !!(REGF(1) > REGF(2));
383 break;
384 case SCMD_FLESSTHAN:
385 REGI(1) = !!(REGF(1) < REGF(2));
386 break;
387 case SCMD_FGTE:
388 REGI(1) = !!(REGF(1) >= REGF(2));
389 break;
390 case SCMD_FLTE:
391 REGI(1) = !!(REGF(1) <= REGF(2));
392 break;
393 case SCMD_MEMREAD:
394 tmp = 4;
395 goto mread;
396 case SCMD_MEMREADW:
397 tmp = 2;
398 goto mread;
399 case SCMD_MEMREADB:
400 tmp = 1;
401 mread:
402 if(canread(registers[AR_MAR].i, tmp)) {
403 int val = memory[registers[AR_MAR].i];
404 switch(tmp) {
405 case 4: REGI(1) = val; break;
406 case 2: REGI(1) = val & 0xffff; break;
407 case 1: REGI(1) = val & 0xff; break;
409 } else {
410 dprintf(2, "info: caught OOB memread\n");
412 break;
413 case SCMD_NEWARRAY:
414 case SCMD_DYNAMICBOUNDS:
415 case SCMD_JNZ:
416 case SCMD_MEMZEROPTRND:
417 case SCMD_LOOPCHECKOFF:
418 case SCMD_CHECKNULLREG:
419 case SCMD_STRINGSNOTEQ:
420 case SCMD_STRINGSEQUAL:
421 case SCMD_CREATESTRING:
422 case SCMD_ZEROMEMORY:
423 case SCMD_CHECKNULL:
424 case SCMD_MEMINITPTR:
425 case SCMD_MEMZEROPTR:
426 case SCMD_MEMREADPTR:
427 case SCMD_MEMWRITEPTR:
428 case SCMD_CHECKBOUNDS:
429 case SCMD_CALLOBJ:
430 case SCMD_NUMFUNCARGS:
431 case SCMD_CALLAS:
432 case SCMD_SUBREALSTACK:
433 case SCMD_PUSHREAL:
434 case SCMD_CALLEXT:
435 case SCMD_JMP:
436 case SCMD_JZ:
437 case SCMD_MEMWRITEW:
438 case SCMD_MEMWRITEB:
439 case SCMD_CALL:
440 case SCMD_MEMWRITE:
441 case SCMD_WRITELIT:
442 case SCMD_RET:
443 default:
444 dprintf(2, "info: %s not implemented yet\n", opcodes[*eip].mnemonic);
446 size_t i, l = opcodes[*eip].argcount;
447 for(i = 0; i < l; i++) ++(*eip);
449 break;
451 registers[AR_NULL].i += eip_inc;
454 static inline char *int_to_str(int value, char* out) {
455 sprintf(out, "%d", value);
456 return out;
459 static void vm_state() {
460 static const char ru_strings[][3] = {
461 [RU_NONE] = {0},
462 [RU_READ] = {'R', 0},
463 [RU_WRITE] = {'W', 0},
464 [RU_WRITE_AFTER_READ] = {'R', 'W', 0},
466 size_t i;
467 for(i=0; i< AR_MAX; i++)
468 printf("%s: %2s %d\n", i == 0 ? "eip" : regnames[i], ru_strings[registers[i].ru], registers[i].i);
470 for( i = MIN(registers[AR_SP].i+2*4, sizeof(stack_mem)/4);
471 i >= MAX(registers[AR_SP].i-2*4, 0);
472 i-=4) {
473 printf("SL %s %3zu %d\n", i == registers[AR_SP].i ? ">" : " ", i, read_mem(i));
474 if(i == 0) break;
477 int *eip = &text.code[registers[AR_NULL].i];
478 char arg1buf[32], arg2buf[32];
479 const char *arg1 = opcodes[*eip].argcount == 0 ? "" : \
480 (opcodes[*eip].regcount > 0 ? regnames[eip[1]] : int_to_str(eip[1], arg1buf));
481 const char *arg2 = opcodes[*eip].argcount < 2 ? "" : \
482 (opcodes[*eip].regcount > 1 ? regnames[eip[2]] : int_to_str(eip[2], arg2buf));
483 printf(" > %s %s %s\n", opcodes[*eip].mnemonic, arg1, arg2);
486 void vm_run(void) {
487 while(1) {
488 int *eip = &text.code[registers[AR_NULL].i];
489 if(!*eip) break;
490 vm_step();
495 static void execute_user_command(char *cmd) {
496 if(!strcmp(cmd, "s")) {
497 vm_step();
498 } else if(!strcmp(cmd, "r")) {
499 vm_run();
500 } else if(!strcmp(cmd, "i")) {
501 vm_init();
502 } else if(!strcmp(cmd, "q")) {
503 exit(0);
505 vm_state();
508 int main(int argc, char** argv) {
509 if(argc != 1) {
510 dprintf(2,
511 "%s - simple ags vm simulator\n"
512 "implements the ALU and a small stack\n"
513 "useful to examine how a chunk of code modifies VM state\n"
514 "not implemented: memory access apart from stack, jumps, functions\n"
515 "supply the assembly code via stdin, then type one of the following\n"
516 "commands:\n"
517 "!i - reset VM state and IP\n"
518 "!s - single-step\n"
519 "!r - run\n"
520 , argv[0]);
521 return 1;
523 char buf[1024], *sym;
524 char convbuf[sizeof(buf)]; /* to convert escaped string into non-escaped version */
525 int lineno = 0;
526 vm_init();
527 while(fgets(buf, sizeof buf, stdin)) {
528 int code[3];
529 size_t pos = 0;
530 lineno++;
531 char* p = buf, *pend = buf + sizeof buf;
532 if(*p == '#' || *p == ';') continue;
533 if(*p == '!') {
534 char *n = strchr(p, '\n');
535 if(n) *n = 0;
536 execute_user_command(p+1);
537 continue;
539 while(isspace(*p) && p < pend) p++;
540 assert(p < pend);
541 if(!*p) continue;
542 char* sym = p;
543 while(!isspace(*p) && p < pend) p++;
544 *p = 0; p++;
545 size_t l = strlen(sym);
546 if(l > 1 && sym[l-1] == ':') {
547 // functionstart or label
548 sym[l-1] = 0;
549 // we currently ignore that
550 continue;
552 unsigned instr = find_insn(sym);
553 if(!instr) {
554 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno, sym);
555 continue;
557 code[pos++] = instr;
558 size_t arg;
559 for(arg = 0; arg < opcodes[instr].argcount; arg++) {
560 sym = finalize_arg(&p, pend, convbuf, sizeof(convbuf));
561 if(sym == 0) {
562 dprintf(2, "line %zu: error: expected \"\n", lineno);
563 goto loop_footer;
565 int value = 0;
566 if(arg < opcodes[instr].regcount) {
567 value=get_reg(sym);
568 if(instr == SCMD_REGTOREG) {
569 /* fix reversed order of arguments */
570 int dst = value;
571 sym = p;
572 while(p < pend && *p != ',' && !isspace(*p)) p++;
573 assert(p < pend);
574 *p = 0;
575 value=get_reg(sym);
576 code[pos++] = value;
577 code[pos++] = dst;
578 break;
580 } else {
581 switch(instr) {
582 case SCMD_LITTOREG:
583 /* immediate can be function name, string,
584 * variable name, stack fixup, or numeric value */
585 if(sym[0] == '"') {
586 dprintf(2, "error: string handling not implemented\n");
587 goto loop_footer;
588 } else if(sym[0] == '@') {
589 dprintf(2, "error: global variable handling not implemented\n");
590 goto loop_footer;
591 } else if(sym[0] == '.') {
592 if(memcmp(sym+1, "stack", 5)) {
593 dprintf(2, "error: expected stack\n");
594 goto loop_footer;;
596 dprintf(2, "error: stack fixup not implemented\n");
597 goto loop_footer;
598 } else if(isdigit(sym[0]) || sym[0] == '-') {
599 if(sym[0] == '-') assert(isdigit(sym[1]));
600 value = atoi(sym);
601 } else {
602 dprintf(2, "error: function refs not implemented yet\n");
603 goto loop_footer;
605 break;
607 case SCMD_JMP: case SCMD_JZ: case SCMD_JNZ:
608 add_label_ref(a, sym, pos);
609 break;
611 default:
612 value = atoi(sym);
615 code[pos++] = value;
617 append_code(code, pos);
618 loop_footer: ;