agsoptimize: add some passes
[rofl0r-agsutils.git] / agssim.c
blob4b7d0aa440f6b4b131affb9047a0008f04dfdbb7
1 #include <stdio.h>
2 #include <ctype.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <assert.h>
7 #include "ags_cpu.h"
9 enum RegisterAccess {
10 RA_NONE = 0,
11 RA_READ = 1 << 0,
12 RA_WRITE = 1 << 1,
13 RA_READWRITE = 1 << 2,
16 struct regaccess_info {
17 /* enum RegisterAccess */ unsigned char ra_reg1;
18 /* enum RegisterAccess */ unsigned char ra_reg2;
19 } __attribute__((packed));
21 static const struct regaccess_info regaccess_info[] = {
22 [0] = {RA_NONE, RA_NONE},
23 [SCMD_ADD] = {RA_READWRITE, RA_NONE},
24 [SCMD_SUB] = {RA_READWRITE, RA_NONE},
25 [SCMD_REGTOREG] = {RA_READ, RA_WRITE},
26 [SCMD_WRITELIT] = {RA_NONE, RA_NONE}, // TODO
27 [SCMD_RET] = {RA_NONE, RA_NONE},
28 [SCMD_LITTOREG] = {RA_WRITE, RA_NONE},
29 [SCMD_MEMREAD] = {RA_WRITE, RA_NONE},
30 [SCMD_MEMWRITE] = {RA_READ, RA_NONE},
31 [SCMD_MULREG] = {RA_READWRITE, RA_READ},
32 [SCMD_DIVREG] = {RA_READWRITE, RA_READ},
33 [SCMD_ADDREG] = {RA_READWRITE, RA_READ},
34 [SCMD_SUBREG] = {RA_READWRITE, RA_READ},
35 [SCMD_BITAND] = {RA_READWRITE, RA_READ},
36 [SCMD_BITOR] = {RA_READWRITE, RA_READ},
37 [SCMD_ISEQUAL] = {RA_READWRITE, RA_READ},
38 [SCMD_NOTEQUAL] = {RA_READWRITE, RA_READ},
39 [SCMD_GREATER] = {RA_READWRITE, RA_READ},
40 [SCMD_LESSTHAN] = {RA_READWRITE, RA_READ},
41 [SCMD_GTE] = {RA_READWRITE, RA_READ},
42 [SCMD_LTE] = {RA_READWRITE, RA_READ},
43 [SCMD_AND] = {RA_READWRITE, RA_READ}, /*logical*/
44 [SCMD_OR] = {RA_READWRITE, RA_READ},
45 [SCMD_CALL] = {RA_READ, RA_NONE},
46 [SCMD_MEMREADB] = {RA_WRITE, RA_NONE},
47 [SCMD_MEMREADW] = {RA_WRITE, RA_NONE},
48 [SCMD_MEMWRITEB] = {RA_READ, RA_NONE},
49 [SCMD_MEMWRITEW] = {RA_READ, RA_NONE},
50 [SCMD_JZ] = {RA_READ, RA_NONE},
51 [SCMD_PUSHREG] = {RA_READ, RA_NONE},
52 [SCMD_POPREG] = {RA_WRITE, RA_NONE},
53 [SCMD_JMP] = {RA_READ, RA_NONE},
54 [SCMD_MUL] = {RA_READWRITE, RA_NONE},
55 [SCMD_CALLEXT] = {RA_READ, RA_NONE},
56 [SCMD_PUSHREAL] = {RA_READ, RA_NONE},
57 [SCMD_SUBREALSTACK] = {RA_READ, RA_NONE},
58 [SCMD_LINENUM] = {RA_NONE, RA_NONE},
59 [SCMD_CALLAS] = {RA_READ, RA_NONE},
60 [SCMD_THISBASE] = {RA_NONE, RA_NONE},
61 [SCMD_NUMFUNCARGS] = {RA_NONE, RA_NONE},
62 [SCMD_MODREG] = {RA_READWRITE, RA_READ},
63 [SCMD_XORREG] = {RA_READWRITE, RA_READ},
64 [SCMD_NOTREG] = {RA_READWRITE, RA_READ},
65 [SCMD_SHIFTLEFT] = {RA_READWRITE, RA_READ},
66 [SCMD_SHIFTRIGHT] = {RA_READWRITE, RA_READ},
67 [SCMD_CALLOBJ] = {RA_READ, RA_NONE},
68 [SCMD_CHECKBOUNDS] = {RA_READ, RA_NONE},
69 [SCMD_MEMWRITEPTR] = {RA_NONE, RA_NONE}, //TODO
70 [SCMD_MEMREADPTR] = {RA_NONE, RA_NONE}, //TODO
71 [SCMD_MEMZEROPTR] = {RA_NONE, RA_NONE},
72 [SCMD_MEMINITPTR] = {RA_NONE, RA_NONE}, //TODO
73 [SCMD_LOADSPOFFS] = {RA_NONE, RA_NONE},
74 [SCMD_CHECKNULL] = {RA_NONE, RA_NONE},
75 [SCMD_FADD] = {RA_READWRITE, RA_NONE},
76 [SCMD_FSUB] = {RA_READWRITE, RA_NONE},
77 [SCMD_FMULREG] = {RA_READWRITE, RA_READ},
78 [SCMD_FDIVREG] = {RA_READWRITE, RA_READ},
79 [SCMD_FADDREG] = {RA_READWRITE, RA_READ},
80 [SCMD_FSUBREG] = {RA_READWRITE, RA_READ},
81 [SCMD_FGREATER] = {RA_READWRITE, RA_READ},
82 [SCMD_FLESSTHAN] = {RA_READWRITE, RA_READ},
83 [SCMD_FGTE] = {RA_READWRITE, RA_READ},
84 [SCMD_FLTE] = {RA_READWRITE, RA_READ},
85 [SCMD_ZEROMEMORY] = {RA_NONE, RA_NONE},
86 [SCMD_CREATESTRING] = {RA_NONE, RA_NONE}, //TODO
87 [SCMD_STRINGSEQUAL] = {RA_READWRITE, RA_READ},
88 [SCMD_STRINGSNOTEQ] = {RA_READWRITE, RA_READ},
89 [SCMD_CHECKNULLREG] = {RA_NONE, RA_NONE}, //TODO
90 [SCMD_LOOPCHECKOFF] = {RA_NONE, RA_NONE},
91 [SCMD_MEMZEROPTRND] = {RA_NONE, RA_NONE},
92 [SCMD_JNZ] = {RA_NONE, RA_NONE},
93 [SCMD_DYNAMICBOUNDS] = {RA_NONE, RA_NONE}, //TODO
94 [SCMD_NEWARRAY] = {RA_NONE, RA_NONE}, //TODO
97 #ifndef MAX
98 #define MAX(a, b) ((a) > (b) ? (a) : (b))
99 #define MIN(a, b) ((a) < (b) ? (a) : (b))
100 #endif
102 /* TODO: move duplicate code from Assembler.c into separate TU */
103 static int get_reg(char* regname) {
104 int i = AR_NULL + 1;
105 for(; i < AR_MAX; i++)
106 if(strcmp(regnames[i], regname) == 0)
107 return i;
108 return AR_NULL;
111 static size_t mnemolen[SCMD_MAX];
112 static int mnemolen_initdone = 0;
114 static void init_mnemolen(void) {
115 size_t i = 0;
116 for(; i< SCMD_MAX; i++)
117 mnemolen[i] = strlen(opcodes[i].mnemonic);
118 mnemolen_initdone = 1;
121 static unsigned find_insn(char* sym) {
122 if(!mnemolen_initdone) init_mnemolen();
123 size_t i = 0, l = strlen(sym);
124 for(; i< SCMD_MAX; i++)
125 if(l == mnemolen[i] && memcmp(sym, opcodes[i].mnemonic, l) == 0)
126 return i;
127 return 0;
130 #include "StringEscape.h"
131 /* expects a pointer to the first char after a opening " in a string,
132 * converts the string into convbuf, and returns the length of that string */
133 static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
134 size_t result = 0;
135 char* e = x + strlen(x);
136 assert(e > x && e < end && *e == 0);
137 e--;
138 while(isspace(*e)) e--;
139 if(*e != '"') return (size_t) -1;
140 *e = 0;
141 result = unescape(x, convbuf, convbuflen);
142 return result;
145 /* sets lets char in arg to 0, and advances pointer till the next argstart */
146 static char* finalize_arg(char **p, char* pend, char* convbuf, size_t convbuflen) {
147 if(**p == '"') {
148 convbuf[0] = '"';
149 size_t l= get_length_and_convert(*p + 1, pend, convbuf+1, convbuflen - 1);
150 if(l == (size_t) -1) return 0;
151 convbuf[l+1] = '"';
152 convbuf[l+2] = 0;
153 *p = 0; /* make it crash if its accessed again, since a string should always be the last arg */
154 return convbuf;
155 } else {
156 char* ret = *p;
157 while(*p < pend && **p != ',' && !isspace(**p)) (*p)++;
158 assert(*p < pend);
159 **p = 0; (*p)++;
160 while(*p < pend && isspace(**p)) (*p)++;
161 assert(*p < pend);
162 return ret;
167 static struct text_segment {
168 int *code;
169 size_t len;
170 size_t capa;
171 } text;
173 enum RegisterUsage {
174 RU_NONE = 0,
175 RU_READ = 1 << 0,
176 RU_WRITE = 1 << 1,
177 RU_WRITE_AFTER_READ = 1 << 2,
180 static struct rval {
181 union {
182 int i;
183 float f;
185 enum RegisterUsage ru;
186 } registers[AR_MAX];
188 static int stack_mem[1000];
190 static void grow_text(size_t req) {
191 if(text.len + req > text.capa) {
192 text.code = realloc(text.code, (text.capa+1024)*sizeof(int));
193 text.capa += 1024;
197 static void append_code(int *code, size_t cnt) {
198 grow_text(cnt);
199 size_t i;
200 for(i = 0; i < cnt; i++) {
201 text.code[text.len++] = code[i];
205 static void vm_init() {
206 size_t i;
207 /* initialize registers to an easily recognisable junk value */
208 for(i = AR_NULL + 1; i < AR_MAX; i++) {
209 registers[i].i = 2222222222;
210 registers[i].ru = RU_NONE;
212 registers[AR_SP].i = (sizeof(stack_mem)/sizeof(stack_mem[0])) -1;
213 registers[AR_NULL].i = 0;
216 static inline int consume_int(int **eip) {
217 *eip = *eip+1;
218 return **eip;
221 static void change_reg_usage(int regno, enum RegisterAccess ra) {
222 enum RegisterUsage ru = registers[regno].ru;
223 switch(ra) {
224 case RA_READ:
225 if(ru == RU_NONE || ru == RU_READ) ru = RU_READ;
226 else if(ru == RU_WRITE);
227 else if(ru == RU_WRITE_AFTER_READ);
228 break;
229 case RA_WRITE:
230 if(ru == RU_NONE || ru == RU_WRITE) ru = RU_WRITE;
231 else if(ru == RU_READ) ru = RU_WRITE_AFTER_READ;
232 else if(ru == RU_WRITE_AFTER_READ);
233 break;
234 case RA_READWRITE:
235 if(ru == RU_NONE || ru == RU_READ) ru = RU_WRITE_AFTER_READ;
236 else if(ru == RU_WRITE);
237 else if(ru == RU_WRITE_AFTER_READ);
238 break;
240 registers[regno].ru = ru;
243 static void vm_update_register_usage(int *eip) {
244 const struct regaccess_info *ri = &regaccess_info[*eip];
245 if(ri->ra_reg1) change_reg_usage(eip[1], ri->ra_reg1);
246 if(ri->ra_reg2) change_reg_usage(eip[2], ri->ra_reg2);
249 #define CODE_INT(X) eip[X]
250 #define CODE_FLOAT(X) ((float*)eip)[X]
251 #define REGI(X) registers[CODE_INT(X)].i
252 #define REGF(X) registers[CODE_INT(X)].f
254 static void vm_step() {
255 /* we use register AR_NULL as instruction pointer */
256 int *eip = &text.code[registers[AR_NULL].i];
257 int eip_inc = 1 + opcodes[*eip].argcount;
258 vm_update_register_usage(eip);
260 switch(*eip) {
261 case SCMD_ADD:
262 REGI(1) += CODE_INT(2);
263 break;
264 case SCMD_SUB:
265 REGI(1) -= CODE_INT(2);
266 break;
267 case SCMD_REGTOREG:
268 REGI(2) = REGI(1);
269 break;
270 case SCMD_LITTOREG:
271 REGI(1) = CODE_INT(2);
272 break;
273 case SCMD_MULREG:
274 REGI(1) *= REGI(2);
275 break;
276 case SCMD_DIVREG:
277 REGI(1) /= REGI(2);
278 break;
279 case SCMD_ADDREG:
280 REGI(1) += REGI(2);
281 break;
282 case SCMD_SUBREG:
283 REGI(1) -= REGI(2);
284 break;
285 case SCMD_BITAND:
286 REGI(1) &= REGI(2);
287 break;
288 case SCMD_BITOR:
289 REGI(1) &= REGI(2);
290 break;
291 case SCMD_ISEQUAL:
292 REGI(1) = !!(REGI(1) == REGI(2));
293 break;
294 case SCMD_NOTEQUAL:
295 REGI(1) = !!(REGI(1) != REGI(2));
296 break;
297 case SCMD_GREATER:
298 REGI(1) = !!(REGI(1) > REGI(2));
299 break;
300 case SCMD_LESSTHAN:
301 REGI(1) = !!(REGI(1) < REGI(2));
302 break;
303 case SCMD_GTE:
304 REGI(1) = !!(REGI(1) >= REGI(2));
305 break;
306 case SCMD_LTE:
307 REGI(1) = !!(REGI(1) <= REGI(2));
308 break;
309 case SCMD_AND:
310 REGI(1) = !!(REGI(1) && REGI(2));
311 break;
312 case SCMD_OR:
313 REGI(1) = !!(REGI(1) || REGI(2));
314 break;
315 case SCMD_PUSHREG:
316 stack_mem[--registers[AR_SP].i] = REGI(1);
317 break;
318 case SCMD_POPREG:
319 REGI(1) = stack_mem[registers[AR_SP].i++];
320 break;
321 case SCMD_MUL:
322 REGI(1) *= CODE_INT(2);
323 break;
324 case SCMD_THISBASE:
325 case SCMD_LINENUM:
326 break;
327 case SCMD_MODREG:
328 REGI(1) %= REGI(2);
329 break;
330 case SCMD_XORREG:
331 REGI(1) ^= REGI(2);
332 break;
333 case SCMD_NOTREG:
334 REGI(1) = !REGI(2);
335 break;
336 case SCMD_SHIFTLEFT:
337 REGI(1) <<= REGI(2);
338 break;
339 case SCMD_SHIFTRIGHT:
340 REGI(1) >>= REGI(2);
341 break;
342 case SCMD_FADD:
343 REGF(1) += CODE_FLOAT(2);
344 break;
345 case SCMD_FSUB:
346 REGF(1) -= CODE_FLOAT(2);
347 break;
348 case SCMD_FMULREG:
349 REGF(1) *= REGF(2);
350 break;
351 case SCMD_FDIVREG:
352 REGF(1) /= REGF(2);
353 break;
354 case SCMD_FADDREG:
355 REGF(1) += REGF(2);
356 break;
357 case SCMD_FSUBREG:
358 REGF(1) -= REGF(2);
359 break;
360 case SCMD_FGREATER:
361 REGI(1) = !!(REGF(1) > REGF(2));
362 break;
363 case SCMD_FLESSTHAN:
364 REGI(1) = !!(REGF(1) < REGF(2));
365 break;
366 case SCMD_FGTE:
367 REGI(1) = !!(REGF(1) >= REGF(2));
368 break;
369 case SCMD_FLTE:
370 REGI(1) = !!(REGF(1) <= REGF(2));
371 break;
372 case SCMD_NEWARRAY:
373 case SCMD_DYNAMICBOUNDS:
374 case SCMD_JNZ:
375 case SCMD_MEMZEROPTRND:
376 case SCMD_LOOPCHECKOFF:
377 case SCMD_CHECKNULLREG:
378 case SCMD_STRINGSNOTEQ:
379 case SCMD_STRINGSEQUAL:
380 case SCMD_CREATESTRING:
381 case SCMD_ZEROMEMORY:
382 case SCMD_CHECKNULL:
383 case SCMD_LOADSPOFFS:
384 case SCMD_MEMINITPTR:
385 case SCMD_MEMZEROPTR:
386 case SCMD_MEMREADPTR:
387 case SCMD_MEMWRITEPTR:
388 case SCMD_CHECKBOUNDS:
389 case SCMD_CALLOBJ:
390 case SCMD_NUMFUNCARGS:
391 case SCMD_CALLAS:
392 case SCMD_SUBREALSTACK:
393 case SCMD_PUSHREAL:
394 case SCMD_CALLEXT:
395 case SCMD_JMP:
396 case SCMD_JZ:
397 case SCMD_MEMWRITEW:
398 case SCMD_MEMWRITEB:
399 case SCMD_MEMREADW:
400 case SCMD_MEMREADB:
401 case SCMD_CALL:
402 case SCMD_MEMREAD:
403 case SCMD_MEMWRITE:
404 case SCMD_WRITELIT:
405 case SCMD_RET:
406 default:
407 dprintf(2, "info: %s not implemented yet\n", opcodes[*eip].mnemonic);
409 size_t i, l = opcodes[*eip].argcount;
410 for(i = 0; i < l; i++) ++(*eip);
412 break;
414 registers[AR_NULL].i += eip_inc;
417 static inline char *int_to_str(int value, char* out) {
418 sprintf(out, "%d", value);
419 return out;
422 static void vm_state() {
423 static const char ru_strings[][3] = {
424 [RU_NONE] = {0},
425 [RU_READ] = {'R', 0},
426 [RU_WRITE] = {'W', 0},
427 [RU_WRITE_AFTER_READ] = {'R', 'W', 0},
429 size_t i;
430 for(i=0; i< AR_MAX; i++)
431 printf("%s: %2s %d\n", i == 0 ? "eip" : regnames[i], ru_strings[registers[i].ru], registers[i].i);
433 for(i=MIN(registers[AR_SP].i+2, 999); i >= MAX(0, registers[AR_SP].i-2); i--) {
434 printf("SL %s %3zu %d\n", i == registers[AR_SP].i ? ">" : " ", i, stack_mem[i]);
437 int *eip = &text.code[registers[AR_NULL].i];
438 char arg1buf[32], arg2buf[32];
439 const char *arg1 = opcodes[*eip].argcount == 0 ? "" : \
440 (opcodes[*eip].regcount > 0 ? regnames[eip[1]] : int_to_str(eip[1], arg1buf));
441 const char *arg2 = opcodes[*eip].argcount < 2 ? "" : \
442 (opcodes[*eip].regcount > 1 ? regnames[eip[2]] : int_to_str(eip[2], arg2buf));
443 printf(" > %s %s %s\n", opcodes[*eip].mnemonic, arg1, arg2);
446 void vm_run(void) {
447 while(1) {
448 int *eip = &text.code[registers[AR_NULL].i];
449 if(!*eip) break;
450 vm_step();
455 static void execute_user_command(char *cmd) {
456 if(!strcmp(cmd, "s")) {
457 vm_step();
458 } else if(!strcmp(cmd, "r")) {
459 vm_run();
460 } else if(!strcmp(cmd, "i")) {
461 vm_init();
463 vm_state();
466 int main(int argc, char** argv) {
467 if(argc != 1) {
468 dprintf(2,
469 "%s - simple ags vm simulator\n"
470 "implements the ALU and a small stack\n"
471 "useful to examine how a chunk of code modifies VM state\n"
472 "not implemented: memory access apart from stack, jumps, functions\n"
473 "supply the assembly code via stdin, then type one of the following\n"
474 "commands:\n"
475 "!i - reset VM state and IP\n"
476 "!s - single-step\n"
477 "!r - run\n"
478 , argv[0]);
479 return 1;
481 char buf[1024], *sym;
482 char convbuf[sizeof(buf)]; /* to convert escaped string into non-escaped version */
483 int lineno = 0;
484 vm_init();
485 while(fgets(buf, sizeof buf, stdin)) {
486 int code[3];
487 size_t pos = 0;
488 lineno++;
489 char* p = buf, *pend = buf + sizeof buf;
490 if(*p == '#' || *p == ';') continue;
491 if(*p == '!') {
492 char *n = strchr(p, '\n');
493 if(n) *n = 0;
494 execute_user_command(p+1);
495 continue;
497 while(isspace(*p) && p < pend) p++;
498 assert(p < pend);
499 if(!*p) continue;
500 char* sym = p;
501 while(!isspace(*p) && p < pend) p++;
502 *p = 0; p++;
503 size_t l = strlen(sym);
504 if(l > 1 && sym[l-1] == ':') {
505 // functionstart or label
506 sym[l-1] = 0;
507 // we currently ignore that
508 continue;
510 unsigned instr = find_insn(sym);
511 if(!instr) {
512 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno, sym);
513 continue;
515 code[pos++] = instr;
516 size_t arg;
517 for(arg = 0; arg < opcodes[instr].argcount; arg++) {
518 sym = finalize_arg(&p, pend, convbuf, sizeof(convbuf));
519 if(sym == 0) {
520 dprintf(2, "line %zu: error: expected \"\n", lineno);
521 goto loop_footer;
523 int value = 0;
524 if(arg < opcodes[instr].regcount) {
525 value=get_reg(sym);
526 if(instr == SCMD_REGTOREG) {
527 /* fix reversed order of arguments */
528 int dst = value;
529 sym = p;
530 while(p < pend && *p != ',' && !isspace(*p)) p++;
531 assert(p < pend);
532 *p = 0;
533 value=get_reg(sym);
534 code[pos++] = value;
535 code[pos++] = dst;
536 break;
538 } else {
539 switch(instr) {
540 case SCMD_LITTOREG:
541 /* immediate can be function name, string,
542 * variable name, stack fixup, or numeric value */
543 if(sym[0] == '"') {
544 dprintf(2, "error: string handling not implemented\n");
545 goto loop_footer;
546 } else if(sym[0] == '@') {
547 dprintf(2, "error: global variable handling not implemented\n");
548 goto loop_footer;
549 } else if(sym[0] == '.') {
550 if(memcmp(sym+1, "stack", 5)) {
551 dprintf(2, "error: expected stack\n");
552 goto loop_footer;;
554 dprintf(2, "error: stack fixup not implemented\n");
555 goto loop_footer;
556 } else if(isdigit(sym[0]) || sym[0] == '-') {
557 if(sym[0] == '-') assert(isdigit(sym[1]));
558 value = atoi(sym);
559 } else {
560 dprintf(2, "error: function refs not implemented yet\n");
561 goto loop_footer;
563 break;
565 case SCMD_JMP: case SCMD_JZ: case SCMD_JNZ:
566 add_label_ref(a, sym, pos);
567 break;
569 default:
570 value = atoi(sym);
573 code[pos++] = value;
575 append_code(code, pos);
576 loop_footer: ;