agssim: don't accept wrong input when register name expected
[rofl0r-agsutils.git] / agssim.c
blob91cc6ab4ad1d8db7ddf2d54100f30e9eeb74b590
1 #include <stdio.h>
2 #include <ctype.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <assert.h>
7 #include "ags_cpu.h"
8 #include "regusage.h"
9 #include "hbmap.h"
10 #include "version.h"
11 #define ADS ":::AGSSim " VERSION " by rofl0r:::"
13 #ifndef MAX
14 #define MAX(a, b) ((a) > (b) ? (a) : (b))
15 #endif
16 #ifndef MIN
17 #define MIN(a, b) ((a) < (b) ? (a) : (b))
18 #endif
20 #ifndef ARRAY_SIZE
21 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
22 #endif
24 static struct text_segment {
25 int *code;
26 size_t len;
27 size_t capa;
28 } text;
30 static void patch_label_offset(int old, int new) {
31 int *p = text.code, *pe = text.code + text.len;
32 while(p < pe) switch(*p) {
33 case SCMD_JMP: case SCMD_JZ: case SCMD_JNZ:
34 if(p[1] == old) p[1] = new;
35 /* fall-through */
36 default:
37 p += opcodes[*p].argcount + 1;
40 /* for label_map */
41 hbmap(char*, int, 32) *label_map;
42 static int *get_label_offset(char* name) {
43 return hbmap_get(label_map, name);
45 static int add_label(char* name, int insno) {
46 char* tmp = strdup(name);
47 int *old = get_label_offset(name);
48 if(old && (*old & (1<<31)))
49 patch_label_offset(*old, insno);
50 return hbmap_insert(label_map, tmp, insno) != -1;
52 static int strptrcmp(const void *a, const void *b) {
53 const char * const *x = a;
54 const char * const *y = b;
55 return strcmp(*x, *y);
57 static unsigned string_hash(const char* s) {
58 uint_fast32_t h = 0;
59 while (*s) {
60 h = 16*h + *s++;
61 h ^= h>>24 & 0xf0;
63 return h & 0xfffffff;
65 static void init_labels() {
66 label_map = hbmap_new(strptrcmp, string_hash, 32);
69 /* TODO: move duplicate code from Assembler.c into separate TU */
70 static int get_reg(char* regname) {
71 int i = AR_NULL + 1;
72 for(; i < AR_MAX; i++)
73 if(strcmp(regnames[i], regname) == 0)
74 return i;
75 return AR_NULL;
78 static size_t mnemolen[SCMD_MAX];
79 static int mnemolen_initdone = 0;
81 static void init_mnemolen(void) {
82 size_t i = 0;
83 for(; i< SCMD_MAX; i++)
84 mnemolen[i] = strlen(opcodes[i].mnemonic);
85 mnemolen_initdone = 1;
88 static unsigned find_insn(char* sym) {
89 if(!mnemolen_initdone) init_mnemolen();
90 size_t i = 0, l = strlen(sym);
91 for(; i< SCMD_MAX; i++)
92 if(l == mnemolen[i] && memcmp(sym, opcodes[i].mnemonic, l) == 0)
93 return i;
94 return 0;
97 #include "StringEscape.h"
98 /* expects a pointer to the first char after a opening " in a string,
99 * converts the string into convbuf, and returns the length of that string */
100 static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
101 size_t result = 0;
102 char* e = x + strlen(x);
103 assert(e > x && e < end && *e == 0);
104 e--;
105 while(isspace(*e)) e--;
106 if(*e != '"') return (size_t) -1;
107 *e = 0;
108 result = unescape(x, convbuf, convbuflen);
109 return result;
112 /* sets lets char in arg to 0, and advances pointer till the next argstart */
113 static char* finalize_arg(char **p, char* pend, char* convbuf, size_t convbuflen) {
114 if(**p == '"') {
115 convbuf[0] = '"';
116 size_t l= get_length_and_convert(*p + 1, pend, convbuf+1, convbuflen - 1);
117 if(l == (size_t) -1) return 0;
118 convbuf[l+1] = '"';
119 convbuf[l+2] = 0;
120 *p = 0; /* make it crash if its accessed again, since a string should always be the last arg */
121 return convbuf;
122 } else {
123 char* ret = *p;
124 while(*p < pend && **p != ',' && !isspace(**p)) (*p)++;
125 assert(*p < pend);
126 **p = 0; (*p)++;
127 while(*p < pend && isspace(**p)) (*p)++;
128 assert(*p < pend);
129 return ret;
133 static struct rval {
134 union {
135 int i;
136 float f;
138 enum RegisterUsage ru;
139 } registers[AR_MAX];
141 static unsigned char stack_mem[1000*4];
142 #define memory stack_mem
144 static int canread(int index, int cnt) {
145 return index >= 0 && index+cnt < sizeof(memory)/sizeof(memory[0]);
148 static void grow_text(size_t req) {
149 if(text.len + req > text.capa) {
150 text.code = realloc(text.code, (text.capa+1024)*sizeof(int));
151 text.capa += 1024;
155 static void append_code(int *code, size_t cnt) {
156 grow_text(cnt+1);
157 size_t i;
158 for(i = 0; i < cnt; i++) {
159 text.code[text.len++] = code[i];
161 text.code[text.len] = 0;
164 static void vm_reset_register_usage() {
165 size_t i;
166 for(i = AR_NULL + 1; i < AR_MAX; i++)
167 registers[i].ru = RU_NONE;
170 static void vm_init() {
171 size_t i;
172 /* initialize registers to an easily recognisable junk value */
173 for(i = AR_NULL + 1; i < AR_MAX; i++) {
174 registers[i].i = 2222222222;
176 vm_reset_register_usage();
177 registers[AR_SP].i = 0;
178 registers[AR_NULL].i = 0;
179 /* set up EIP so vm_state() doesn't crash */
180 grow_text(16);
181 /* put NULL insn as first instruction so VM doesn't execute
182 random garbage in mem */
183 text.code[0] = 0;
186 static inline int consume_int(int **eip) {
187 *eip = *eip+1;
188 return **eip;
191 static void change_reg_usage(int regno, enum RegisterAccess ra) {
192 registers[regno].ru = get_reg_usage(regno, registers[regno].ru, ra);
195 static void vm_update_register_usage(int *eip) {
196 const struct regaccess_info *ri = &regaccess_info[*eip];
197 if(ri->ra_reg1) change_reg_usage(eip[1], ri->ra_reg1);
198 if(ri->ra_reg2) change_reg_usage(eip[2], ri->ra_reg2);
199 if(ri->ra_mar) change_reg_usage(AR_MAR, ri->ra_mar);
200 if(ri->ra_sp) change_reg_usage(AR_SP, ri->ra_sp);
203 static void write_mem1(int off, int val) {
204 unsigned char *m = (void*) memory;
205 m[off] = val&0xff;
207 static void write_mem2(int off, int val) {
208 unsigned short *m = (void*) memory;
209 m[off/2] = val&0xffff;
211 static void write_mem(int off, int val) {
212 int *m = (void*) memory;
213 m[off/4] = val;
216 static int read_mem(int off) {
217 int *m = (void*) memory;
218 return m[off/4];
221 #define CODE_INT(X) eip[X]
222 #define CODE_FLOAT(X) ((float*)eip)[X]
223 #define REGI(X) registers[CODE_INT(X)].i
224 #define REGF(X) registers[CODE_INT(X)].f
226 static int vm_step(int run_context) {
227 /* we use register AR_NULL as instruction pointer */
228 int *eip = &text.code[registers[AR_NULL].i];
229 int eip_inc = 1 + opcodes[*eip].argcount;
230 int tmp, val;
231 if(!run_context) vm_reset_register_usage();
232 vm_update_register_usage(eip);
234 switch(*eip) {
235 case 0:
236 /* don't modify IP */
237 dprintf(2, "no code at IP.\n");
238 return 0;
239 case SCMD_ADD:
240 REGI(1) += CODE_INT(2);
241 break;
242 case SCMD_SUB:
243 REGI(1) -= CODE_INT(2);
244 break;
245 case SCMD_REGTOREG:
246 REGI(2) = REGI(1);
247 break;
248 case SCMD_LITTOREG:
249 REGI(1) = CODE_INT(2);
250 break;
251 case SCMD_MULREG:
252 REGI(1) *= REGI(2);
253 break;
254 case SCMD_DIVREG:
255 REGI(1) /= REGI(2);
256 break;
257 case SCMD_ADDREG:
258 REGI(1) += REGI(2);
259 break;
260 case SCMD_SUBREG:
261 REGI(1) -= REGI(2);
262 break;
263 case SCMD_BITAND:
264 REGI(1) &= REGI(2);
265 break;
266 case SCMD_BITOR:
267 REGI(1) &= REGI(2);
268 break;
269 case SCMD_ISEQUAL:
270 REGI(1) = !!(REGI(1) == REGI(2));
271 break;
272 case SCMD_NOTEQUAL:
273 REGI(1) = !!(REGI(1) != REGI(2));
274 break;
275 case SCMD_GREATER:
276 REGI(1) = !!(REGI(1) > REGI(2));
277 break;
278 case SCMD_LESSTHAN:
279 REGI(1) = !!(REGI(1) < REGI(2));
280 break;
281 case SCMD_GTE:
282 REGI(1) = !!(REGI(1) >= REGI(2));
283 break;
284 case SCMD_LTE:
285 REGI(1) = !!(REGI(1) <= REGI(2));
286 break;
287 case SCMD_AND:
288 REGI(1) = !!(REGI(1) && REGI(2));
289 break;
290 case SCMD_OR:
291 REGI(1) = !!(REGI(1) || REGI(2));
292 break;
293 case SCMD_LOADSPOFFS:
294 registers[AR_MAR].i = registers[AR_SP].i - CODE_INT(1);
295 break;
296 case SCMD_PUSHREG:
297 write_mem(registers[AR_SP].i, REGI(1));
298 registers[AR_SP].i += 4;
299 break;
300 case SCMD_POPREG:
301 registers[AR_SP].i -= 4;
302 REGI(1) = read_mem(registers[AR_SP].i);
303 break;
304 case SCMD_MUL:
305 REGI(1) *= CODE_INT(2);
306 break;
307 case SCMD_THISBASE:
308 case SCMD_LINENUM:
309 break;
310 case SCMD_MODREG:
311 REGI(1) %= REGI(2);
312 break;
313 case SCMD_XORREG:
314 REGI(1) ^= REGI(2);
315 break;
316 case SCMD_NOTREG:
317 REGI(1) = !REGI(2);
318 break;
319 case SCMD_SHIFTLEFT:
320 REGI(1) <<= REGI(2);
321 break;
322 case SCMD_SHIFTRIGHT:
323 REGI(1) >>= REGI(2);
324 break;
325 case SCMD_FADD:
326 REGF(1) += CODE_FLOAT(2);
327 break;
328 case SCMD_FSUB:
329 REGF(1) -= CODE_FLOAT(2);
330 break;
331 case SCMD_FMULREG:
332 REGF(1) *= REGF(2);
333 break;
334 case SCMD_FDIVREG:
335 REGF(1) /= REGF(2);
336 break;
337 case SCMD_FADDREG:
338 REGF(1) += REGF(2);
339 break;
340 case SCMD_FSUBREG:
341 REGF(1) -= REGF(2);
342 break;
343 case SCMD_FGREATER:
344 REGI(1) = !!(REGF(1) > REGF(2));
345 break;
346 case SCMD_FLESSTHAN:
347 REGI(1) = !!(REGF(1) < REGF(2));
348 break;
349 case SCMD_FGTE:
350 REGI(1) = !!(REGF(1) >= REGF(2));
351 break;
352 case SCMD_FLTE:
353 REGI(1) = !!(REGF(1) <= REGF(2));
354 break;
355 case SCMD_WRITELIT:
356 tmp = CODE_INT(1);
357 if(tmp <= 0 || tmp > 4 || tmp == 3) {
358 dprintf(2, "invalid memcpy use\n");
359 break;
361 val = CODE_INT(2);
362 goto mwrite;
363 case SCMD_MEMWRITE:
364 tmp = 4;
365 val = REGI(1);
366 goto mwrite;
367 case SCMD_MEMWRITEW:
368 tmp = 2;
369 val = REGI(1);
370 goto mwrite;
371 case SCMD_MEMWRITEB:
372 tmp = 1;
373 val = REGI(1);
374 mwrite:
375 if(canread(registers[AR_MAR].i, tmp)) {
376 switch(tmp) {
377 case 4: write_mem (registers[AR_MAR].i, val); break;
378 case 2: write_mem2(registers[AR_MAR].i, val); break;
379 case 1: write_mem1(registers[AR_MAR].i, val); break;
381 } else {
382 dprintf(2, "info: caught OOB memwrite\n");
384 break;
385 case SCMD_MEMREAD:
386 tmp = 4;
387 goto mread;
388 case SCMD_MEMREADW:
389 tmp = 2;
390 goto mread;
391 case SCMD_MEMREADB:
392 tmp = 1;
393 mread:
394 if(canread(registers[AR_MAR].i, tmp)) {
395 int val = memory[registers[AR_MAR].i];
396 switch(tmp) {
397 case 4: REGI(1) = val; break;
398 case 2: REGI(1) = val & 0xffff; break;
399 case 1: REGI(1) = val & 0xff; break;
401 } else {
402 dprintf(2, "info: caught OOB memread\n");
404 break;
405 case SCMD_JZ:
406 if(registers[AR_AX].i == 0) goto jump;
407 break;
408 case SCMD_JNZ:
409 if(registers[AR_AX].i == 0) break;
410 /* fall through */
411 case SCMD_JMP:
412 jump:
413 tmp = CODE_INT(1);
414 if((tmp & (1<<31)) == 0) {
415 registers[AR_NULL].i = CODE_INT(1);
416 } else {
417 dprintf(2, "error: jump target lacks definition\n");
418 return 0;
420 eip_inc = 0;
421 break;
422 case SCMD_NEWARRAY:
423 case SCMD_DYNAMICBOUNDS:
424 case SCMD_MEMZEROPTRND:
425 case SCMD_LOOPCHECKOFF:
426 case SCMD_CHECKNULLREG:
427 case SCMD_STRINGSNOTEQ:
428 case SCMD_STRINGSEQUAL:
429 case SCMD_CREATESTRING:
430 case SCMD_ZEROMEMORY:
431 case SCMD_CHECKNULL:
432 case SCMD_MEMINITPTR:
433 case SCMD_MEMZEROPTR:
434 case SCMD_MEMREADPTR:
435 case SCMD_MEMWRITEPTR:
436 case SCMD_CHECKBOUNDS:
437 case SCMD_CALLOBJ:
438 case SCMD_NUMFUNCARGS:
439 case SCMD_CALLAS:
440 case SCMD_SUBREALSTACK:
441 case SCMD_PUSHREAL:
442 case SCMD_CALLEXT:
443 case SCMD_CALL:
444 case SCMD_RET:
445 default:
446 dprintf(2, "info: %s not implemented yet\n", opcodes[*eip].mnemonic);
448 size_t i, l = opcodes[*eip].argcount;
449 for(i = 0; i < l; i++) ++(*eip);
451 break;
453 registers[AR_NULL].i += eip_inc;
454 return 1;
457 static inline char *int_to_str(int value, char* out) {
458 sprintf(out, "%d", value);
459 return out;
462 static void vm_state() {
463 static const char ru_strings[][3] = {
464 [RU_NONE] = {0},
465 [RU_READ] = {'R', 0},
466 [RU_WRITE] = {'W', 0},
467 [RU_WRITE_AFTER_READ] = {'R', 'W', 0},
469 static const char regorder[] = {
470 0, AR_MAR, AR_OP, AR_SP, -1,
471 AR_AX, AR_BX, AR_CX, AR_DX, -1, -1};
472 size_t i, j;
473 for(j=0; j < ARRAY_SIZE(regorder)-1; ++j) {
474 i = regorder[j];
475 if(i == -1) printf("\n");
476 else {
477 printf("%-3s: %-2s %-11d", i == 0 ? "eip" : regnames[i], ru_strings[registers[i].ru], registers[i].i);
478 if(regorder[j+1] != -1) printf(" ");
482 for( i = MIN(registers[AR_SP].i+2*4, sizeof(stack_mem)/4);
483 i >= MAX(registers[AR_SP].i-2*4, 0);
484 i-=4) {
485 printf("SL %s %3zu %d\n", i == registers[AR_SP].i ? ">" : " ", i, read_mem(i));
486 if(i == 0) break;
489 int *eip = &text.code[registers[AR_NULL].i];
490 char arg1buf[32], arg2buf[32];
491 const char *arg1 = opcodes[*eip].argcount == 0 ? "" : \
492 (opcodes[*eip].regcount > 0 ? regnames[eip[1]] : int_to_str(eip[1], arg1buf));
493 const char *arg2 = opcodes[*eip].argcount < 2 ? "" : \
494 (opcodes[*eip].regcount > 1 ? regnames[eip[2]] : int_to_str(eip[2], arg2buf));
495 printf(" > %s %s %s\n", opcodes[*eip].mnemonic, arg1, arg2);
498 void vm_run(void) {
499 while(1) {
500 int *eip = &text.code[registers[AR_NULL].i];
501 if(!*eip) break;
502 if(!vm_step(1)) break;
506 static int usage(int fd, char *a0) {
507 dprintf(fd,
508 "%s - simple ags vm simulator\n"
509 "implements the ALU and a small stack\n"
510 "useful to examine how a chunk of code modifies VM state\n"
511 "not implemented: memory access apart from stack, jumps, functions\n"
512 "supply the assembly code via stdin, then type one of the following\n"
513 "commands:\n"
514 "!i - reset VM state and IP\n"
515 "!s - single-step\n"
516 "!r - run\n"
517 , a0);
518 return 1;
521 static int lastcommand;
522 enum UserCommand {
523 UC_STEP = 1,
524 UC_RUN,
525 UC_INIT,
526 UC_QUIT,
527 UC_HELP,
529 static void execute_user_command_i(int uc) {
530 switch(uc) {
531 case UC_STEP: vm_step(0); break;
532 case UC_RUN : vm_run(); break;
533 case UC_INIT: vm_init(); break;
534 case UC_QUIT: exit(0); break;
535 case UC_HELP: usage(1, "agssim"); break;
537 lastcommand = uc;
538 vm_state();
540 static void execute_user_command(char *cmd) {
541 int uc = 0;
542 if(0) ;
543 else if(!strcmp(cmd, "s")) uc = UC_STEP;
544 else if(!strcmp(cmd, "r")) uc = UC_RUN;
545 else if(!strcmp(cmd, "i")) uc = UC_INIT;
546 else if(!strcmp(cmd, "q")) uc = UC_QUIT;
547 else if(!strcmp(cmd, "h")) uc = UC_HELP;
548 else {
549 dprintf(2, "unknown command\n");
550 return;
552 execute_user_command_i(uc);
555 int main(int argc, char** argv) {
556 if(argc != 1) return usage(2, argv[0]);
557 char buf[1024], *sym;
558 char convbuf[sizeof(buf)]; /* to convert escaped string into non-escaped version */
559 int lineno = 0;
560 init_labels();
561 vm_init();
562 printf(ADS " - type !h for help\n");
563 while(fgets(buf, sizeof buf, stdin)) {
564 int code[4];
565 size_t pos = 0;
566 lineno++;
567 char* p = buf, *pend = buf + sizeof buf;
568 if(*p == '\n' && lastcommand) {
569 execute_user_command_i(lastcommand);
570 continue;
572 if(*p == '#' || *p == ';') continue;
573 if(*p == '!') {
574 char *n = strchr(p, '\n');
575 if(n) *n = 0;
576 execute_user_command(p+1);
577 continue;
579 while(isspace(*p) && p < pend) p++;
580 assert(p < pend);
581 if(!*p) continue;
582 char* sym = p;
583 while(!isspace(*p) && p < pend) p++;
584 *p = 0; p++;
585 size_t l = strlen(sym);
586 if(l > 1 && sym[l-1] == ':') {
587 // functionstart or label
588 sym[l-1] = 0;
589 int *loff = get_label_offset(sym);
590 if(loff) dprintf(2, "warning: label %s overwritten\n");
591 add_label(sym, text.len);
592 continue;
594 unsigned instr = find_insn(sym);
595 if(!instr) {
596 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno, sym);
597 continue;
599 code[pos++] = instr;
600 size_t arg;
601 for(arg = 0; arg < opcodes[instr].argcount; arg++) {
602 sym = finalize_arg(&p, pend, convbuf, sizeof(convbuf));
603 if(sym == 0) {
604 dprintf(2, "line %zu: error: expected \"\n", lineno);
605 goto loop_footer;
607 int value = 0;
608 if(arg < opcodes[instr].regcount) {
609 value=get_reg(sym);
610 if(value == AR_NULL) {
611 needreg_err:
612 dprintf(2, "line %zu: error: expected register name!\n", lineno);
613 goto loop_footer;
615 if(instr == SCMD_REGTOREG) {
616 /* fix reversed order of arguments */
617 int dst = value;
618 sym = p;
619 while(p < pend && *p != ',' && !isspace(*p)) p++;
620 assert(p < pend);
621 *p = 0;
622 value=get_reg(sym);
623 if(value == AR_NULL) goto needreg_err;
624 code[pos++] = value;
625 code[pos++] = dst;
626 break;
628 } else {
629 switch(instr) {
630 case SCMD_LITTOREG:
631 /* immediate can be function name, string,
632 * variable name, stack fixup, or numeric value */
633 if(sym[0] == '"') {
634 dprintf(2, "error: string handling not implemented\n");
635 goto loop_footer;
636 } else if(sym[0] == '@') {
637 dprintf(2, "error: global variable handling not implemented\n");
638 goto loop_footer;
639 } else if(sym[0] == '.') {
640 if(memcmp(sym+1, "stack", 5)) {
641 dprintf(2, "error: expected stack\n");
642 goto loop_footer;;
644 dprintf(2, "error: stack fixup not implemented\n");
645 goto loop_footer;
646 } else if(isdigit(sym[0]) || sym[0] == '-') {
647 if(sym[0] == '-') assert(isdigit(sym[1]));
648 value = atoi(sym);
649 } else {
650 goto label_ref;
652 break;
653 case SCMD_JMP: case SCMD_JZ: case SCMD_JNZ: {
654 label_ref:;
655 unsigned *loff = get_label_offset(sym);
656 if(!loff) {
657 add_label(sym, (unsigned)text.len | (1<<31));
658 loff = get_label_offset(sym);
660 value = *loff;
661 } break;
662 default:
663 value = atoi(sym);
666 code[pos++] = value;
668 append_code(code, pos);
669 loop_footer: ;