agssim: catch potential OOB access in change_reg_usage
[rofl0r-agsutils.git] / agssim.c
blob687d6239f8fc960642bb2408d42be253cea97e5f
1 #include <stdio.h>
2 #include <ctype.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <assert.h>
6 #include <errno.h>
8 #include "ags_cpu.h"
9 #include "regusage.h"
10 #include "hbmap.h"
11 #include "version.h"
12 #define ADS ":::AGSSim " VERSION " by rofl0r:::"
14 #ifndef MAX
15 #define MAX(a, b) ((a) > (b) ? (a) : (b))
16 #endif
17 #ifndef MIN
18 #define MIN(a, b) ((a) < (b) ? (a) : (b))
19 #endif
21 #ifndef ARRAY_SIZE
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
23 #endif
25 #define ALIGN(X, A) ((X+(A-1)) & -(A))
27 #define BREAKPOINT_FLAG (1<<31)
28 #define OPCODE_MASK (~(BREAKPOINT_FLAG))
30 static int interactive;
32 static struct mem {
33 unsigned char *mem;
34 size_t capa;
35 size_t ltext;
36 size_t lstack;
37 size_t lheap;
38 } mem;
40 #define memory (mem.mem)
41 #define text memory
42 #define text_end ALIGN(mem.ltext, 4096)
43 #define stack_mem (mem.mem+text_end)
44 #define heap_mem (mem.mem+text_end+mem.lstack)
46 struct label_ref {
47 char *name;
48 unsigned insoff;
50 tglist(struct label_ref) *label_refs;
51 static void add_label_ref(char *name, unsigned insoff) {
52 struct label_ref new = {.name = strdup(name), .insoff = insoff};
53 tglist_add(label_refs, new);
55 static void resolve_label(char* name, unsigned insoff) {
56 size_t i;
57 for(i=0; i<tglist_getsize(label_refs); ) {
58 struct label_ref *l = &tglist_get(label_refs, i);
59 if(!strcmp(l->name, name)) {
60 free(l->name);
61 memcpy(text+l->insoff, &insoff, 4);
62 tglist_delete(label_refs, i);
63 } else ++i;
66 /* label_map */
67 hbmap(char*, unsigned, 32) *label_map;
68 static unsigned *get_label_offset(char* name) {
69 return hbmap_get(label_map, name);
71 static int add_label(char* name, int insoff) {
72 char* tmp = strdup(name);
73 return hbmap_insert(label_map, tmp, insoff) != -1;
75 static int strptrcmp(const void *a, const void *b) {
76 const char * const *x = a;
77 const char * const *y = b;
78 return strcmp(*x, *y);
80 static unsigned string_hash(const char* s) {
81 uint_fast32_t h = 0;
82 while (*s) {
83 h = 16*h + *s++;
84 h ^= h>>24 & 0xf0;
86 return h & 0xfffffff;
88 static void init_labels() {
89 label_map = hbmap_new(strptrcmp, string_hash, 32);
90 label_refs = tglist_new();
93 /* TODO: move duplicate code from Assembler.c into separate TU */
94 static int get_reg(char* regname) {
95 int i = AR_NULL + 1;
96 for(; i < AR_MAX; i++)
97 if(strcmp(regnames[i], regname) == 0)
98 return i;
99 return AR_NULL;
102 static size_t mnemolen[SCMD_MAX];
103 static int mnemolen_initdone = 0;
105 static void init_mnemolen(void) {
106 size_t i = 0;
107 for(; i< SCMD_MAX; i++)
108 mnemolen[i] = strlen(opcodes[i].mnemonic);
109 mnemolen_initdone = 1;
112 static unsigned find_insn(char* sym) {
113 if(!mnemolen_initdone) init_mnemolen();
114 size_t i = 0, l = strlen(sym);
115 for(; i< SCMD_MAX; i++)
116 if(l == mnemolen[i] && memcmp(sym, opcodes[i].mnemonic, l) == 0)
117 return i;
118 return 0;
121 #include "StringEscape.h"
122 /* expects a pointer to the first char after a opening " in a string,
123 * converts the string into convbuf, and returns the length of that string */
124 static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
125 size_t result = 0;
126 char* e = x + strlen(x);
127 assert(e > x && e < end && *e == 0);
128 e--;
129 while(isspace(*e)) e--;
130 if(*e != '"') return (size_t) -1;
131 *e = 0;
132 result = unescape(x, convbuf, convbuflen);
133 return result;
136 /* sets lets char in arg to 0, and advances pointer till the next argstart */
137 static char* finalize_arg(char **p, char* pend, char* convbuf, size_t convbuflen) {
138 if(**p == '"') {
139 convbuf[0] = '"';
140 size_t l= get_length_and_convert(*p + 1, pend, convbuf+1, convbuflen - 1);
141 if(l == (size_t) -1) return 0;
142 convbuf[l+1] = '"';
143 convbuf[l+2] = 0;
144 *p = 0; /* make it crash if its accessed again, since a string should always be the last arg */
145 return convbuf;
146 } else {
147 char* ret = *p;
148 while(*p < pend && **p != ',' && !isspace(**p)) (*p)++;
149 assert(*p < pend);
150 **p = 0; (*p)++;
151 while(*p < pend && isspace(**p)) (*p)++;
152 assert(*p < pend);
153 return ret;
157 static struct rval {
158 union {
159 int i;
160 float f;
162 enum RegisterUsage ru;
163 } registers[AR_MAX];
165 static int canread(int index, int cnt) {
166 return index >= 0 && index+cnt < mem.capa;
168 static int canwrite(int index, int cnt) {
169 return index >= text_end && index+cnt < mem.capa;
172 #define ALIGN(X, A) ((X+(A-1)) & -(A))
174 static int vm_init_stack(unsigned size) {
175 if(mem.lstack) return 1;
176 unsigned want = ALIGN(size, 4096);
177 unsigned char *p = realloc(mem.mem, mem.capa+want);
178 if(!p) {
179 dprintf(2, "error: could not allocate stack!\n");
180 return 0;
182 mem.mem = p;
183 mem.lstack = want;
184 mem.capa += want;
185 registers[AR_SP].i = text_end;
186 return 1;
189 static int grow_text(size_t req) {
190 /* add 4 more slots than strictly necessary so we can access
191 * at least 1 full-length insn past text end without crash */
192 req += 4*sizeof(int);
193 size_t need = mem.ltext + req;
194 if(need > mem.capa-mem.lheap-mem.lstack) {
195 if(mem.lstack) {
196 dprintf(2, "error: cannot enlarge text segment once execution started!\n");
197 return 0;
199 size_t want = ALIGN(need, 4096);
200 unsigned char *p = realloc(mem.mem, want);
201 if(!p) {
202 dprintf(2, "error: allocating memory failed!\n");
203 return 0;
205 mem.mem = p;
206 mem.capa = want;
208 return 1;
211 static int append_code(int *code, size_t cnt) {
212 if(!grow_text((cnt+1)*4)) return 0;
213 size_t i;
214 for(i = 0; i < cnt; i++) {
215 memcpy(text+mem.ltext, &code[i], 4);
216 mem.ltext += 4;
218 memcpy(text+mem.ltext, "\0\0\0\0", 4);
219 return 1;
222 static void vm_reset_register_usage() {
223 size_t i;
224 for(i = AR_NULL + 1; i < AR_MAX; i++)
225 registers[i].ru = RU_NONE;
228 static void vm_init() {
229 size_t i;
230 /* initialize registers to an easily recognisable junk value */
231 for(i = AR_NULL + 1; i < AR_MAX; i++) {
232 registers[i].i = 2222222222;
234 vm_reset_register_usage();
235 registers[AR_SP].i = -1;
236 registers[AR_NULL].i = 0;
237 int was_null = text == 0;
238 /* set up EIP so vm_state() doesn't crash */
239 grow_text(16);
240 /* put NULL insn as first instruction so VM doesn't execute
241 random garbage in mem */
242 if(was_null) memcpy(text, "\0\0\0\0", 4);
245 static inline int consume_int(int **eip) {
246 *eip = *eip+1;
247 return **eip;
250 static void change_reg_usage(int regno, enum RegisterAccess ra) {
251 if(regno >= AR_MAX) {
252 vm_signal(VM_SIGSEGV, 0);
253 return;
255 registers[regno].ru = get_reg_usage(regno, registers[regno].ru, ra);
258 static void vm_update_register_usage(int *eip) {
259 const struct regaccess_info *ri = &regaccess_info[*eip];
260 if(ri->ra_reg1) change_reg_usage(eip[1], ri->ra_reg1);
261 if(ri->ra_reg2) change_reg_usage(eip[2], ri->ra_reg2);
262 if(ri->ra_mar) change_reg_usage(AR_MAR, ri->ra_mar);
263 if(ri->ra_sp) change_reg_usage(AR_SP, ri->ra_sp);
266 static void write_mem1(int off, int val) {
267 unsigned char *m = memory+off;
268 *m = val&0xff;
270 static void write_mem2(int off, int val) {
271 unsigned short *m = (void*) (memory+off);
272 *m = val&0xffff;
274 static void write_mem(int off, int val) {
275 int *m = (void*) (memory+off);
276 *m = val;
279 static int read_mem(int off) {
280 int ret;
281 memcpy(&ret, memory+off, 4);
282 return ret;
285 static int vm_push(int value) {
286 if(!canwrite(registers[AR_SP].i, 4)) return 0;
287 write_mem(registers[AR_SP].i, value);
288 registers[AR_SP].i += 4;
289 return 1;
292 static int vm_pop(int *value) {
293 if((int) registers[AR_SP].i >= 4) {
294 registers[AR_SP].i -= 4;
295 *value = read_mem(registers[AR_SP].i);
296 return 1;
298 return 0;
301 static int vm_syscall(void) {
302 int ret,
303 scno = registers[AR_AX].i,
304 arg1 = registers[AR_BX].i,
305 arg2 = registers[AR_CX].i,
306 arg3 = registers[AR_DX].i;
307 /* we follow linux x86_64 syscall numbers for simplicity */
308 switch(scno) {
309 case 0: /* SYS_read (fd, buf, size) */
310 /* fall-through */
311 case 1: /* SYS_write (fd, buf, size) */
312 if(!canread(arg2, arg3)) return -EFAULT;
313 if(scno == 0)
314 ret = read(arg1, ((char*)memory)+arg2, arg3);
315 else
316 ret = write(arg1, ((char*)memory)+arg2, arg3);
317 if(ret == -1) return -errno;
318 return ret;
319 case 60: /* SYS_exit (exitcode) */
320 exit(arg1);
321 default: return -ENOSYS;
325 static int label_check() {
326 if(tglist_getsize(label_refs)) {
327 dprintf(2, "error: unresolved label refs!\n");
328 size_t i; struct label_ref *l;
329 for(i=0; i<tglist_getsize(label_refs); ++i) {
330 l = &tglist_get(label_refs, i);
331 dprintf(2, "%s@%u\n", l->name, l->insoff);
333 return 0;
335 return 1;
338 #define EIP registers[AR_NULL].i
340 #define VM_SIGILL 1
341 #define VM_SIGSEGV 2
342 #define VM_SIGABRT 3
343 static int vm_return;
344 static void vm_signal(int sig, int param) {
345 switch(sig) {
346 case VM_SIGILL:
347 dprintf(2, "illegal instruction at IP %u\n", EIP);
348 break;
349 case VM_SIGSEGV:
350 dprintf(2, "segmentation fault: invalid access at %u\n", EIP);
351 break;
352 case VM_SIGABRT:
353 dprintf(2, "aborted (assertlte check failed at IP %u)\n", EIP);
354 break;
355 default:
356 dprintf(2, "unknown signal\n");
358 vm_return = 1;
361 #define CODE_INT(X) eip[X]
362 #define CODE_FLOAT(X) ((float*)eip)[X]
363 #define REGI(X) registers[CODE_INT(X)].i
364 #define REGF(X) registers[CODE_INT(X)].f
366 static int vm_step(int run_context) {
367 /* we use register AR_NULL as instruction pointer */
368 int *eip = (void*)(text + EIP);
369 unsigned op = *eip;
370 if(interactive) {
371 // breakpoints can be set only in interactive mode
372 op &= OPCODE_MASK;
373 if(op >= SCMD_MAX) {
374 vm_signal(VM_SIGILL, 0);
375 return 0;
378 if(*eip & BREAKPOINT_FLAG) {
379 *eip &= ~BREAKPOINT_FLAG;
380 return 0;
382 if(!run_context) vm_reset_register_usage();
383 vm_update_register_usage(eip);
384 } else if(op >= SCMD_MAX) {
385 vm_signal(VM_SIGILL, 0);
386 return 0;
388 int eip_inc = 1 + opcodes[op].argcount;
389 int tmp, val;
391 switch(op) {
392 case 0:
393 /* don't modify IP */
394 if(!run_context)
395 dprintf(2, "no code at IP %u.\n", EIP);
396 return 0;
397 case SCMD_ADD:
398 REGI(1) += CODE_INT(2);
399 break;
400 case SCMD_SUB:
401 REGI(1) -= CODE_INT(2);
402 break;
403 case SCMD_REGTOREG:
404 REGI(2) = REGI(1);
405 break;
406 case SCMD_LITTOREG:
407 REGI(1) = CODE_INT(2);
408 break;
409 case SCMD_MULREG:
410 REGI(1) *= REGI(2);
411 break;
412 case SCMD_DIVREG:
413 REGI(1) /= REGI(2);
414 break;
415 case SCMD_ADDREG:
416 REGI(1) += REGI(2);
417 break;
418 case SCMD_SUBREG:
419 REGI(1) -= REGI(2);
420 break;
421 case SCMD_BITAND:
422 REGI(1) &= REGI(2);
423 break;
424 case SCMD_BITOR:
425 REGI(1) |= REGI(2);
426 break;
427 case SCMD_ISEQUAL:
428 REGI(1) = !!(REGI(1) == REGI(2));
429 break;
430 case SCMD_NOTEQUAL:
431 REGI(1) = !!(REGI(1) != REGI(2));
432 break;
433 case SCMD_GREATER:
434 REGI(1) = !!(REGI(1) > REGI(2));
435 break;
436 case SCMD_LESSTHAN:
437 REGI(1) = !!(REGI(1) < REGI(2));
438 break;
439 case SCMD_GTE:
440 REGI(1) = !!(REGI(1) >= REGI(2));
441 break;
442 case SCMD_LTE:
443 REGI(1) = !!(REGI(1) <= REGI(2));
444 break;
445 case SCMD_AND:
446 REGI(1) = !!(REGI(1) && REGI(2));
447 break;
448 case SCMD_OR:
449 REGI(1) = !!(REGI(1) || REGI(2));
450 break;
451 case SCMD_LOADSPOFFS:
452 registers[AR_MAR].i = registers[AR_SP].i - CODE_INT(1);
453 break;
454 case SCMD_PUSHREG:
455 if(!vm_push(REGI(1))) goto oob;
456 break;
457 case SCMD_POPREG:
458 if(!vm_pop(&REGI(1))) goto oob;
459 break;
460 case SCMD_MUL:
461 REGI(1) *= CODE_INT(2);
462 break;
463 case SCMD_THISBASE:
464 case SCMD_LINENUM:
465 break;
466 case SCMD_MODREG:
467 REGI(1) %= REGI(2);
468 break;
469 case SCMD_XORREG:
470 REGI(1) ^= REGI(2);
471 break;
472 case SCMD_NOTREG:
473 REGI(1) = !REGI(2);
474 break;
475 case SCMD_SHIFTLEFT:
476 REGI(1) <<= REGI(2);
477 break;
478 case SCMD_SHIFTRIGHT:
479 REGI(1) >>= REGI(2);
480 break;
481 case SCMD_FADD:
482 REGF(1) += CODE_FLOAT(2);
483 break;
484 case SCMD_FSUB:
485 REGF(1) -= CODE_FLOAT(2);
486 break;
487 case SCMD_FMULREG:
488 REGF(1) *= REGF(2);
489 break;
490 case SCMD_FDIVREG:
491 REGF(1) /= REGF(2);
492 break;
493 case SCMD_FADDREG:
494 REGF(1) += REGF(2);
495 break;
496 case SCMD_FSUBREG:
497 REGF(1) -= REGF(2);
498 break;
499 case SCMD_FGREATER:
500 REGI(1) = !!(REGF(1) > REGF(2));
501 break;
502 case SCMD_FLESSTHAN:
503 REGI(1) = !!(REGF(1) < REGF(2));
504 break;
505 case SCMD_FGTE:
506 REGI(1) = !!(REGF(1) >= REGF(2));
507 break;
508 case SCMD_FLTE:
509 REGI(1) = !!(REGF(1) <= REGF(2));
510 break;
511 case SCMD_ZEROMEMORY:
512 tmp = CODE_INT(1);
513 if(canwrite(registers[AR_MAR].i, tmp)) {
514 memset(((char*)memory)+registers[AR_MAR].i,0,tmp);
515 } else goto oob;
516 break;
517 case SCMD_WRITELIT:
518 tmp = CODE_INT(1);
519 if(tmp <= 0 || tmp > 4 || tmp == 3) {
520 dprintf(2, "VM: invalid memcpy use at IP %u\n", EIP);
521 break;
523 val = CODE_INT(2);
524 goto mwrite;
525 case SCMD_MEMWRITE:
526 tmp = 4;
527 val = REGI(1);
528 goto mwrite;
529 case SCMD_MEMWRITEW:
530 tmp = 2;
531 val = REGI(1);
532 goto mwrite;
533 case SCMD_MEMWRITEB:
534 tmp = 1;
535 val = REGI(1);
536 mwrite:
537 if(canwrite(registers[AR_MAR].i, tmp)) {
538 switch(tmp) {
539 case 4: write_mem (registers[AR_MAR].i, val); break;
540 case 2: write_mem2(registers[AR_MAR].i, val); break;
541 case 1: write_mem1(registers[AR_MAR].i, val); break;
543 } else {
544 oob:
545 vm_signal(VM_SIGSEGV, 0);
546 return 0;
548 break;
549 case SCMD_MEMREAD:
550 tmp = 4;
551 goto mread;
552 case SCMD_MEMREADW:
553 tmp = 2;
554 goto mread;
555 case SCMD_MEMREADB:
556 tmp = 1;
557 mread:
558 if(canread(registers[AR_MAR].i, tmp)) {
559 int val;
560 memcpy(&val, memory+registers[AR_MAR].i, 4);
561 switch(tmp) {
562 case 4: REGI(1) = val; break;
563 case 2: REGI(1) = val & 0xffff; break;
564 case 1: REGI(1) = val & 0xff; break;
566 } else goto oob;
567 break;
568 case SCMD_JZ:
569 if(registers[AR_AX].i == 0) goto jump;
570 break;
571 case SCMD_JNZ:
572 if(registers[AR_AX].i == 0) break;
573 /* fall through */
574 case SCMD_JMP:
575 jump:
576 tmp = CODE_INT(1);
577 jump_tmp:
578 if((unsigned)tmp < text_end && !(tmp&3))
579 registers[AR_NULL].i = tmp;
580 else {
581 vm_signal(VM_SIGSEGV, tmp);
582 return 0;
584 eip_inc = 0;
585 break;
586 case SCMD_CALL:
587 if(!vm_push(registers[AR_NULL].i + eip_inc*4)) goto oob;
588 tmp = REGI(1);
589 goto jump_tmp;
590 case SCMD_RET:
591 registers[AR_SP].i -= 4;
592 tmp = read_mem(registers[AR_SP].i);
593 goto jump_tmp;
594 case SCMD_CALLAS:
595 /* we re-purpose "callscr" mnemonic to mean syscall,
596 as it is unused in ags-emitted bytecode.
597 using it is unportable, it works only in agssim.
598 the register arg for callscr instruction is ignored.
599 the arguments are passed in regs ax,bx,cx,dx,op
600 in this order, where the first arg is the syscall
601 number. return value is put in ax. */
602 registers[AR_AX].i = vm_syscall();
603 break;
604 case SCMD_CHECKBOUNDS:
605 if(REGI(1) > CODE_INT(2)) vm_signal(VM_SIGABRT, 0);
606 break;
607 case SCMD_NEWARRAY:
608 case SCMD_DYNAMICBOUNDS:
609 case SCMD_MEMZEROPTRND:
610 case SCMD_LOOPCHECKOFF:
611 case SCMD_CHECKNULLREG:
612 case SCMD_STRINGSNOTEQ:
613 case SCMD_STRINGSEQUAL:
614 case SCMD_CREATESTRING:
615 case SCMD_CHECKNULL:
616 case SCMD_MEMINITPTR:
617 case SCMD_MEMZEROPTR:
618 case SCMD_MEMREADPTR:
619 case SCMD_MEMWRITEPTR:
620 case SCMD_CALLOBJ:
621 case SCMD_NUMFUNCARGS:
622 case SCMD_SUBREALSTACK:
623 case SCMD_PUSHREAL:
624 case SCMD_CALLEXT:
625 dprintf(2, "info: %s not implemented yet\n", opcodes[*eip].mnemonic);
627 size_t i, l = opcodes[*eip].argcount;
628 for(i = 0; i < l; i++) ++(*eip);
630 break;
631 default:
632 vm_signal(VM_SIGILL, 0);
633 return 0;
635 registers[AR_NULL].i += eip_inc*4;
636 return 1;
639 static inline char *int_to_str(int value, char* out) {
640 sprintf(out, "%d", value);
641 return out;
644 static int* get_next_ip(int *eip, int off) {
645 int *ret = eip, i, op;
646 for(i=0; i<off; ++i) {
647 op = *ret & OPCODE_MASK;
648 if(op < SCMD_MAX)
649 ret+=1+opcodes[op].argcount;
650 else
651 ++ret;
653 return ret;
656 static const char *get_regname(unsigned regno) {
657 if(regno < AR_MAX) return regnames[regno];
658 return "INVALID";
661 static void vm_state() {
662 if(!interactive) return;
663 static const char ru_strings[][3] = {
664 [RU_NONE] = {0},
665 [RU_READ] = {'R', 0},
666 [RU_WRITE] = {'W', 0},
667 [RU_WRITE_AFTER_READ] = {'R', 'W', 0},
669 static const char regorder[] = {
670 0, AR_MAR, AR_OP, AR_SP, -1,
671 AR_AX, AR_BX, AR_CX, AR_DX, -1, -1};
672 size_t i, j;
673 for(j=0; j < ARRAY_SIZE(regorder)-1; ++j) {
674 i = regorder[j];
675 if(i == -1) printf("\n");
676 else {
677 printf("%-3s: %-2s %-11d", i == 0 ? "eip" : regnames[i], ru_strings[registers[i].ru], registers[i].i);
678 if(regorder[j+1] != -1) printf(" ");
681 char stackview[5][24];
682 stackview[2][0] = 0;
683 stackview[3][0] = 0;
684 stackview[4][0] = 0;
685 for(j=0,i = MIN(registers[AR_SP].i+2*4, text_end+mem.lstack);
686 i >= MAX(registers[AR_SP].i-2*4, text_end);
687 i-=4, ++j) {
688 sprintf(stackview[j],
689 "SL %s %3zu %d", i == registers[AR_SP].i ? ">" : " ", i, read_mem(i));
690 if(i <= 0) break;
692 int *eip = (void*)(text + registers[AR_NULL].i), wasnull = 0;
693 for(i = 0; i<5; i++) {
694 char a1b[32], a2b[32], a3b[32], inst[48];
695 if(i > 1) {
696 int *nip = get_next_ip(eip, i-2),
697 op = *nip & OPCODE_MASK;
698 if(op < SCMD_MAX) {
699 const char *arg1 = opcodes[op].argcount == 0 ? "" : \
700 (opcodes[op].regcount > 0 ? get_regname(nip[1]) : int_to_str(nip[1], a1b));
701 const char *arg2 = opcodes[op].argcount < 2 ? "" : \
702 (opcodes[op].regcount > 1 ? get_regname(nip[2]) : int_to_str(nip[2], a2b));
703 const char *arg3 = opcodes[op].argcount < 3 ? "" : \
704 (opcodes[op].regcount > 2 ? get_regname(nip[3]) : int_to_str(nip[2], a3b));
705 if(op == SCMD_REGTOREG) {
706 const char* tmp = arg1;
707 arg1 = arg2; arg2 = tmp;
709 if(!wasnull)
710 sprintf(inst, " %s %s %s %s", i==2?">":" ", opcodes[op].mnemonic, arg1, arg2);
711 else inst[0] = 0;
712 } else {
713 sprintf(inst, "%d", *nip);
715 if(!op) wasnull = 1;
716 } else inst[0] = 0;
717 printf("%-52s %s\n", inst, stackview[i]);
721 void vm_run(void) {
722 if(!label_check()) return;
723 while(1) {
724 if(!vm_step(1)) break;
728 static int usage(int fd, char *a0) {
729 dprintf(fd,
730 "%s [OPTIONS] [file.s] - simple ags vm simulator\n"
731 "implements the ALU and a small stack\n"
732 "useful to examine how a chunk of code modifies VM state\n"
733 "OPTIONS:\n"
734 "-i : interpreter mode - don't print anything, run and exit\n"
735 "by default, mode is interactive, sporting the following commands:\n"
736 "!i - reset VM state and IP\n"
737 "!s - single-step\n"
738 "!n - step-over\n"
739 "!r - run\n"
740 "!b ADDR - set a breakpoint on ADDR (address or label)\n"
741 , a0);
742 return 1;
745 static int lastcommand;
746 enum UserCommand {
747 UC_STEP = 1,
748 UC_NEXT, /* step-over */
749 UC_BP,
750 UC_RUN,
751 UC_INIT,
752 UC_QUIT,
753 UC_HELP,
755 static void execute_user_command_i(int uc, char* param) {
756 switch(uc) {
757 case UC_STEP: if(label_check()) vm_step(0); break;
758 case UC_BP: {
759 int addr, *ptr;
760 if(isdigit(param[0]))
761 addr = atoi(param);
762 else {
763 ptr = get_label_offset(param);
764 if(!ptr) {
765 dprintf(2, "label %s not found!\n", param);
766 return;
768 addr = *ptr;
770 if(addr >= text_end) {
771 dprintf(2, "breakpoint offset %d out of bounds\n", addr);
772 return;
774 int insn;
775 memcpy(&insn, text+addr, 4);
776 insn |= BREAKPOINT_FLAG;
777 memcpy(text+addr, &insn, 4);
779 return;
780 case UC_NEXT: *get_next_ip((void*)(text+EIP), 1) |= BREAKPOINT_FLAG;
781 /* fall-through */
782 case UC_RUN : vm_run(); break;
783 case UC_INIT: vm_init(); break;
784 case UC_QUIT: exit(0); break;
785 case UC_HELP: usage(1, "agssim"); break;
787 lastcommand = uc;
788 vm_state();
790 static void execute_user_command(char *cmd) {
791 if(!vm_init_stack(16384)) return;
792 int uc = 0;
793 char *param = cmd;
794 while(!isspace(*param)) param++;
795 while(isspace(*param)) param++;
796 if(0) ;
797 else if(!strcmp(cmd, "s")) uc = UC_STEP;
798 else if(!strcmp(cmd, "r")) uc = UC_RUN;
799 else if(!strcmp(cmd, "i")) uc = UC_INIT;
800 else if(!strcmp(cmd, "q")) uc = UC_QUIT;
801 else if(!strcmp(cmd, "h")) uc = UC_HELP;
802 else if(!strcmp(cmd, "n")) uc = UC_NEXT;
803 else if(*cmd == 'b') uc = UC_BP;
804 else {
805 dprintf(2, "unknown command\n");
806 return;
808 execute_user_command_i(uc, param);
811 int main(int argc, char** argv) {
812 int c;
813 interactive = 1;
814 FILE *in = stdin;
815 while((c = getopt(argc, argv, "i")) != EOF) switch(c) {
816 case 'i': interactive = 0; break;
817 default: return usage(2, argv[0]);
819 if(argv[optind]) in = fopen(argv[optind], "r");
820 if(!in) {
821 dprintf(2, "error opening %s\n", argv[optind]);
822 return 1;
824 char buf[1024], *sym;
825 char convbuf[sizeof(buf)]; /* to convert escaped string into non-escaped version */
826 int lineno = 0;
827 init_labels();
828 vm_init();
829 if(interactive) printf(ADS " - type !h for help\n");
830 mainloop:
831 while(fgets(buf, sizeof buf, in)) {
832 int code[4];
833 size_t pos = 0;
834 lineno++;
835 char* p = buf, *pend = buf + sizeof buf;
836 if(*p == '\n' && lastcommand) {
837 execute_user_command_i(lastcommand, "");
838 continue;
840 if(*p == '#' || *p == ';') continue;
841 if(*p == '!') {
842 char *n = strchr(p, '\n');
843 if(n) *n = 0;
844 execute_user_command(p+1);
845 continue;
847 while(isspace(*p) && p < pend) p++;
848 assert(p < pend);
849 if(!*p) continue;
850 char* sym = p;
851 while(!isspace(*p) && p < pend) p++;
852 *p = 0; p++;
853 size_t l = strlen(sym);
854 if(l > 1 && sym[l-1] == ':') {
855 // functionstart or label
856 sym[l-1] = 0;
857 resolve_label(sym, mem.ltext);
858 unsigned *loff = get_label_offset(sym);
859 if(loff) dprintf(2, "warning: label %s overwritten\n", sym);
860 add_label(sym, mem.ltext);
861 continue;
863 unsigned instr = find_insn(sym);
864 if(!instr) {
865 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno, sym);
866 continue;
868 code[pos++] = instr;
869 size_t arg;
870 for(arg = 0; arg < opcodes[instr].argcount; arg++) {
871 sym = finalize_arg(&p, pend, convbuf, sizeof(convbuf));
872 if(sym == 0) {
873 dprintf(2, "line %zu: error: expected \"\n", lineno);
874 goto loop_footer;
876 int value = 0;
877 if(arg < opcodes[instr].regcount) {
878 value=get_reg(sym);
879 if(value == AR_NULL) {
880 needreg_err:
881 dprintf(2, "line %zu: error: expected register name!\n", lineno);
882 goto loop_footer;
884 if(instr == SCMD_REGTOREG) {
885 /* fix reversed order of arguments */
886 int dst = value;
887 sym = p;
888 while(p < pend && *p != ',' && !isspace(*p)) p++;
889 assert(p < pend);
890 *p = 0;
891 value=get_reg(sym);
892 if(value == AR_NULL) goto needreg_err;
893 code[pos++] = value;
894 code[pos++] = dst;
895 break;
897 } else {
898 switch(instr) {
899 case SCMD_LITTOREG:
900 /* immediate can be function name, string,
901 * variable name, stack fixup, or numeric value */
902 if(sym[0] == '"') {
903 size_t l = strlen(sym)-1, tl = mem.ltext;
904 if(!append_code((int[2]){SCMD_JMP, tl+8+ALIGN(l, 4)}, 2)) goto loop_footer;
905 char*p = sym+1;
906 --l;
907 while((ssize_t)l > 0) {
908 int x = 0;
909 memcpy(&x, p, l>=4?4:l);
910 if(!append_code(&x, 1)) goto loop_footer;
911 l -= 4;
912 p += 4;
914 value = tl+8;
915 } else if(sym[0] == '@') {
916 dprintf(2, "error: global variable handling not implemented\n");
917 goto loop_footer;
918 } else if(sym[0] == '.') {
919 if(memcmp(sym+1, "stack", 5)) {
920 dprintf(2, "error: expected stack\n");
921 goto loop_footer;;
923 dprintf(2, "error: stack fixup not implemented\n");
924 goto loop_footer;
925 } else if(isdigit(sym[0]) || sym[0] == '-') {
926 if(sym[0] == '-') assert(isdigit(sym[1]));
927 value = atoi(sym);
928 } else {
929 goto label_ref;
931 break;
932 case SCMD_JMP: case SCMD_JZ: case SCMD_JNZ: {
933 label_ref:;
934 unsigned *loff = get_label_offset(sym);
935 if(!loff) {
936 add_label_ref(sym, mem.ltext+pos*4);
937 value = -1;
938 } else value = *loff;
939 } break;
940 default:
941 if(!isdigit(sym[0])) {
942 dprintf(2, "line %zu: error: expected number\n", lineno);
943 goto loop_footer;
945 value = atoi(sym);
948 code[pos++] = value;
950 append_code(code, pos);
951 loop_footer: ;
953 if(!interactive) execute_user_command("r");
954 else if(in != stdin) {
955 in = stdin;
956 goto mainloop;
958 return vm_return;