12 #define ADS ":::AGSSim " VERSION " by rofl0r:::"
15 #define MAX(a, b) ((a) > (b) ? (a) : (b))
18 #define MIN(a, b) ((a) < (b) ? (a) : (b))
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
25 #define ALIGN(X, A) ((X+(A-1)) & -(A))
27 #define BREAKPOINT_FLAG (1<<31)
28 #define OPCODE_MASK (~(BREAKPOINT_FLAG))
30 static int interactive
;
40 #define memory (mem.mem)
42 #define text_end ALIGN(mem.ltext, 4096)
43 #define stack_mem (mem.mem+text_end)
44 #define heap_mem (mem.mem+text_end+mem.lstack)
50 tglist(struct label_ref
) *label_refs
;
51 static void add_label_ref(char *name
, unsigned insoff
) {
52 struct label_ref
new = {.name
= strdup(name
), .insoff
= insoff
};
53 tglist_add(label_refs
, new);
55 static void resolve_label(char* name
, unsigned insoff
) {
57 for(i
=0; i
<tglist_getsize(label_refs
); ) {
58 struct label_ref
*l
= &tglist_get(label_refs
, i
);
59 if(!strcmp(l
->name
, name
)) {
61 memcpy(text
+l
->insoff
, &insoff
, 4);
62 tglist_delete(label_refs
, i
);
67 hbmap(char*, unsigned, 32) *label_map
;
68 static unsigned *get_label_offset(char* name
) {
69 return hbmap_get(label_map
, name
);
71 static int add_label(char* name
, int insoff
) {
72 char* tmp
= strdup(name
);
73 return hbmap_insert(label_map
, tmp
, insoff
) != -1;
75 static int strptrcmp(const void *a
, const void *b
) {
76 const char * const *x
= a
;
77 const char * const *y
= b
;
78 return strcmp(*x
, *y
);
80 static unsigned string_hash(const char* s
) {
88 static void init_labels() {
89 label_map
= hbmap_new(strptrcmp
, string_hash
, 32);
90 label_refs
= tglist_new();
93 /* TODO: move duplicate code from Assembler.c into separate TU */
94 static int get_reg(char* regname
) {
96 for(; i
< AR_MAX
; i
++)
97 if(strcmp(regnames
[i
], regname
) == 0)
102 static size_t mnemolen
[SCMD_MAX
];
103 static int mnemolen_initdone
= 0;
105 static void init_mnemolen(void) {
107 for(; i
< SCMD_MAX
; i
++)
108 mnemolen
[i
] = strlen(opcodes
[i
].mnemonic
);
109 mnemolen_initdone
= 1;
112 static unsigned find_insn(char* sym
) {
113 if(!mnemolen_initdone
) init_mnemolen();
114 size_t i
= 0, l
= strlen(sym
);
115 for(; i
< SCMD_MAX
; i
++)
116 if(l
== mnemolen
[i
] && memcmp(sym
, opcodes
[i
].mnemonic
, l
) == 0)
121 #include "StringEscape.h"
122 /* expects a pointer to the first char after a opening " in a string,
123 * converts the string into convbuf, and returns the length of that string */
124 static size_t get_length_and_convert(char* x
, char* end
, char* convbuf
, size_t convbuflen
) {
126 char* e
= x
+ strlen(x
);
127 assert(e
> x
&& e
< end
&& *e
== 0);
129 while(isspace(*e
)) e
--;
130 if(*e
!= '"') return (size_t) -1;
132 result
= unescape(x
, convbuf
, convbuflen
);
136 /* sets lets char in arg to 0, and advances pointer till the next argstart */
137 static char* finalize_arg(char **p
, char* pend
, char* convbuf
, size_t convbuflen
) {
140 size_t l
= get_length_and_convert(*p
+ 1, pend
, convbuf
+1, convbuflen
- 1);
141 if(l
== (size_t) -1) return 0;
144 *p
= 0; /* make it crash if its accessed again, since a string should always be the last arg */
148 while(*p
< pend
&& **p
!= ',' && !isspace(**p
)) (*p
)++;
151 while(*p
< pend
&& isspace(**p
)) (*p
)++;
162 enum RegisterUsage ru
;
165 static int canread(int index
, int cnt
) {
166 return index
>= 0 && index
+cnt
< mem
.capa
;
168 static int canwrite(int index
, int cnt
) {
169 return index
>= text_end
&& index
+cnt
< mem
.capa
;
172 #define ALIGN(X, A) ((X+(A-1)) & -(A))
174 static int vm_init_stack(unsigned size
) {
175 if(mem
.lstack
) return 1;
176 unsigned want
= ALIGN(size
, 4096);
177 unsigned char *p
= realloc(mem
.mem
, mem
.capa
+want
);
179 dprintf(2, "error: could not allocate stack!\n");
185 registers
[AR_SP
].i
= text_end
;
189 static int grow_text(size_t req
) {
190 /* add 4 more slots than strictly necessary so we can access
191 * at least 1 full-length insn past text end without crash */
192 req
+= 4*sizeof(int);
193 size_t need
= mem
.ltext
+ req
;
194 if(need
> mem
.capa
-mem
.lheap
-mem
.lstack
) {
196 dprintf(2, "error: cannot enlarge text segment once execution started!\n");
199 size_t want
= ALIGN(need
, 4096);
200 unsigned char *p
= realloc(mem
.mem
, want
);
202 dprintf(2, "error: allocating memory failed!\n");
211 static int append_code(int *code
, size_t cnt
) {
212 if(!grow_text((cnt
+1)*4)) return 0;
214 for(i
= 0; i
< cnt
; i
++) {
215 memcpy(text
+mem
.ltext
, &code
[i
], 4);
218 memcpy(text
+mem
.ltext
, "\0\0\0\0", 4);
222 static void vm_reset_register_usage() {
224 for(i
= AR_NULL
+ 1; i
< AR_MAX
; i
++)
225 registers
[i
].ru
= RU_NONE
;
228 static void vm_init() {
230 /* initialize registers to an easily recognisable junk value */
231 for(i
= AR_NULL
+ 1; i
< AR_MAX
; i
++) {
232 registers
[i
].i
= 2222222222;
234 vm_reset_register_usage();
235 registers
[AR_SP
].i
= -1;
236 registers
[AR_NULL
].i
= 0;
237 int was_null
= text
== 0;
238 /* set up EIP so vm_state() doesn't crash */
240 /* put NULL insn as first instruction so VM doesn't execute
241 random garbage in mem */
242 if(was_null
) memcpy(text
, "\0\0\0\0", 4);
245 static inline int consume_int(int **eip
) {
250 static void change_reg_usage(int regno
, enum RegisterAccess ra
) {
251 if(regno
>= AR_MAX
) {
252 vm_signal(VM_SIGSEGV
, 0);
255 registers
[regno
].ru
= get_reg_usage(regno
, registers
[regno
].ru
, ra
);
258 static void vm_update_register_usage(int *eip
) {
259 const struct regaccess_info
*ri
= ®access_info
[*eip
];
260 if(ri
->ra_reg1
) change_reg_usage(eip
[1], ri
->ra_reg1
);
261 if(ri
->ra_reg2
) change_reg_usage(eip
[2], ri
->ra_reg2
);
262 if(ri
->ra_mar
) change_reg_usage(AR_MAR
, ri
->ra_mar
);
263 if(ri
->ra_sp
) change_reg_usage(AR_SP
, ri
->ra_sp
);
266 static void write_mem1(int off
, int val
) {
267 unsigned char *m
= memory
+off
;
270 static void write_mem2(int off
, int val
) {
271 unsigned short *m
= (void*) (memory
+off
);
274 static void write_mem(int off
, int val
) {
275 int *m
= (void*) (memory
+off
);
279 static int read_mem(int off
) {
281 memcpy(&ret
, memory
+off
, 4);
285 static int vm_push(int value
) {
286 if(!canwrite(registers
[AR_SP
].i
, 4)) return 0;
287 write_mem(registers
[AR_SP
].i
, value
);
288 registers
[AR_SP
].i
+= 4;
292 static int vm_pop(int *value
) {
293 if((int) registers
[AR_SP
].i
>= 4) {
294 registers
[AR_SP
].i
-= 4;
295 *value
= read_mem(registers
[AR_SP
].i
);
301 static int vm_syscall(void) {
303 scno
= registers
[AR_AX
].i
,
304 arg1
= registers
[AR_BX
].i
,
305 arg2
= registers
[AR_CX
].i
,
306 arg3
= registers
[AR_DX
].i
;
307 /* we follow linux x86_64 syscall numbers for simplicity */
309 case 0: /* SYS_read (fd, buf, size) */
311 case 1: /* SYS_write (fd, buf, size) */
312 if(!canread(arg2
, arg3
)) return -EFAULT
;
314 ret
= read(arg1
, ((char*)memory
)+arg2
, arg3
);
316 ret
= write(arg1
, ((char*)memory
)+arg2
, arg3
);
317 if(ret
== -1) return -errno
;
319 case 60: /* SYS_exit (exitcode) */
321 default: return -ENOSYS
;
325 static int label_check() {
326 if(tglist_getsize(label_refs
)) {
327 dprintf(2, "error: unresolved label refs!\n");
328 size_t i
; struct label_ref
*l
;
329 for(i
=0; i
<tglist_getsize(label_refs
); ++i
) {
330 l
= &tglist_get(label_refs
, i
);
331 dprintf(2, "%s@%u\n", l
->name
, l
->insoff
);
338 #define EIP registers[AR_NULL].i
343 static int vm_return
;
344 static void vm_signal(int sig
, int param
) {
347 dprintf(2, "illegal instruction at IP %u\n", EIP
);
350 dprintf(2, "segmentation fault: invalid access at %u\n", EIP
);
353 dprintf(2, "aborted (assertlte check failed at IP %u)\n", EIP
);
356 dprintf(2, "unknown signal\n");
361 #define CODE_INT(X) eip[X]
362 #define CODE_FLOAT(X) ((float*)eip)[X]
363 #define REGI(X) registers[CODE_INT(X)].i
364 #define REGF(X) registers[CODE_INT(X)].f
366 static int vm_step(int run_context
) {
367 /* we use register AR_NULL as instruction pointer */
368 int *eip
= (void*)(text
+ EIP
);
371 // breakpoints can be set only in interactive mode
374 vm_signal(VM_SIGILL
, 0);
378 if(*eip
& BREAKPOINT_FLAG
) {
379 *eip
&= ~BREAKPOINT_FLAG
;
382 if(!run_context
) vm_reset_register_usage();
383 vm_update_register_usage(eip
);
384 } else if(op
>= SCMD_MAX
) {
385 vm_signal(VM_SIGILL
, 0);
388 int eip_inc
= 1 + opcodes
[op
].argcount
;
393 /* don't modify IP */
395 dprintf(2, "no code at IP %u.\n", EIP
);
398 REGI(1) += CODE_INT(2);
401 REGI(1) -= CODE_INT(2);
407 REGI(1) = CODE_INT(2);
428 REGI(1) = !!(REGI(1) == REGI(2));
431 REGI(1) = !!(REGI(1) != REGI(2));
434 REGI(1) = !!(REGI(1) > REGI(2));
437 REGI(1) = !!(REGI(1) < REGI(2));
440 REGI(1) = !!(REGI(1) >= REGI(2));
443 REGI(1) = !!(REGI(1) <= REGI(2));
446 REGI(1) = !!(REGI(1) && REGI(2));
449 REGI(1) = !!(REGI(1) || REGI(2));
451 case SCMD_LOADSPOFFS
:
452 registers
[AR_MAR
].i
= registers
[AR_SP
].i
- CODE_INT(1);
455 if(!vm_push(REGI(1))) goto oob
;
458 if(!vm_pop(®I(1))) goto oob
;
461 REGI(1) *= CODE_INT(2);
478 case SCMD_SHIFTRIGHT
:
482 REGF(1) += CODE_FLOAT(2);
485 REGF(1) -= CODE_FLOAT(2);
500 REGI(1) = !!(REGF(1) > REGF(2));
503 REGI(1) = !!(REGF(1) < REGF(2));
506 REGI(1) = !!(REGF(1) >= REGF(2));
509 REGI(1) = !!(REGF(1) <= REGF(2));
511 case SCMD_ZEROMEMORY
:
513 if(canwrite(registers
[AR_MAR
].i
, tmp
)) {
514 memset(((char*)memory
)+registers
[AR_MAR
].i
,0,tmp
);
519 if(tmp
<= 0 || tmp
> 4 || tmp
== 3) {
520 dprintf(2, "VM: invalid memcpy use at IP %u\n", EIP
);
537 if(canwrite(registers
[AR_MAR
].i
, tmp
)) {
539 case 4: write_mem (registers
[AR_MAR
].i
, val
); break;
540 case 2: write_mem2(registers
[AR_MAR
].i
, val
); break;
541 case 1: write_mem1(registers
[AR_MAR
].i
, val
); break;
545 vm_signal(VM_SIGSEGV
, 0);
558 if(canread(registers
[AR_MAR
].i
, tmp
)) {
560 memcpy(&val
, memory
+registers
[AR_MAR
].i
, 4);
562 case 4: REGI(1) = val
; break;
563 case 2: REGI(1) = val
& 0xffff; break;
564 case 1: REGI(1) = val
& 0xff; break;
569 if(registers
[AR_AX
].i
== 0) goto jump
;
572 if(registers
[AR_AX
].i
== 0) break;
578 if((unsigned)tmp
< text_end
&& !(tmp
&3))
579 registers
[AR_NULL
].i
= tmp
;
581 vm_signal(VM_SIGSEGV
, tmp
);
587 if(!vm_push(registers
[AR_NULL
].i
+ eip_inc
*4)) goto oob
;
591 registers
[AR_SP
].i
-= 4;
592 tmp
= read_mem(registers
[AR_SP
].i
);
595 /* we re-purpose "callscr" mnemonic to mean syscall,
596 as it is unused in ags-emitted bytecode.
597 using it is unportable, it works only in agssim.
598 the register arg for callscr instruction is ignored.
599 the arguments are passed in regs ax,bx,cx,dx,op
600 in this order, where the first arg is the syscall
601 number. return value is put in ax. */
602 registers
[AR_AX
].i
= vm_syscall();
604 case SCMD_CHECKBOUNDS
:
605 if(REGI(1) > CODE_INT(2)) vm_signal(VM_SIGABRT
, 0);
608 case SCMD_DYNAMICBOUNDS
:
609 case SCMD_MEMZEROPTRND
:
610 case SCMD_LOOPCHECKOFF
:
611 case SCMD_CHECKNULLREG
:
612 case SCMD_STRINGSNOTEQ
:
613 case SCMD_STRINGSEQUAL
:
614 case SCMD_CREATESTRING
:
616 case SCMD_MEMINITPTR
:
617 case SCMD_MEMZEROPTR
:
618 case SCMD_MEMREADPTR
:
619 case SCMD_MEMWRITEPTR
:
621 case SCMD_NUMFUNCARGS
:
622 case SCMD_SUBREALSTACK
:
625 dprintf(2, "info: %s not implemented yet\n", opcodes
[*eip
].mnemonic
);
627 size_t i
, l
= opcodes
[*eip
].argcount
;
628 for(i
= 0; i
< l
; i
++) ++(*eip
);
632 vm_signal(VM_SIGILL
, 0);
635 registers
[AR_NULL
].i
+= eip_inc
*4;
639 static inline char *int_to_str(int value
, char* out
) {
640 sprintf(out
, "%d", value
);
644 static int* get_next_ip(int *eip
, int off
) {
645 int *ret
= eip
, i
, op
;
646 for(i
=0; i
<off
; ++i
) {
647 op
= *ret
& OPCODE_MASK
;
649 ret
+=1+opcodes
[op
].argcount
;
656 static const char *get_regname(unsigned regno
) {
657 if(regno
< AR_MAX
) return regnames
[regno
];
661 static void vm_state() {
662 if(!interactive
) return;
663 static const char ru_strings
[][3] = {
665 [RU_READ
] = {'R', 0},
666 [RU_WRITE
] = {'W', 0},
667 [RU_WRITE_AFTER_READ
] = {'R', 'W', 0},
669 static const char regorder
[] = {
670 0, AR_MAR
, AR_OP
, AR_SP
, -1,
671 AR_AX
, AR_BX
, AR_CX
, AR_DX
, -1, -1};
673 for(j
=0; j
< ARRAY_SIZE(regorder
)-1; ++j
) {
675 if(i
== -1) printf("\n");
677 printf("%-3s: %-2s %-11d", i
== 0 ? "eip" : regnames
[i
], ru_strings
[registers
[i
].ru
], registers
[i
].i
);
678 if(regorder
[j
+1] != -1) printf(" ");
681 char stackview
[5][24];
685 for(j
=0,i
= MIN(registers
[AR_SP
].i
+2*4, text_end
+mem
.lstack
);
686 i
>= MAX(registers
[AR_SP
].i
-2*4, text_end
);
688 sprintf(stackview
[j
],
689 "SL %s %3zu %d", i
== registers
[AR_SP
].i
? ">" : " ", i
, read_mem(i
));
692 int *eip
= (void*)(text
+ registers
[AR_NULL
].i
), wasnull
= 0;
693 for(i
= 0; i
<5; i
++) {
694 char a1b
[32], a2b
[32], a3b
[32], inst
[48];
696 int *nip
= get_next_ip(eip
, i
-2),
697 op
= *nip
& OPCODE_MASK
;
699 const char *arg1
= opcodes
[op
].argcount
== 0 ? "" : \
700 (opcodes
[op
].regcount
> 0 ? get_regname(nip
[1]) : int_to_str(nip
[1], a1b
));
701 const char *arg2
= opcodes
[op
].argcount
< 2 ? "" : \
702 (opcodes
[op
].regcount
> 1 ? get_regname(nip
[2]) : int_to_str(nip
[2], a2b
));
703 const char *arg3
= opcodes
[op
].argcount
< 3 ? "" : \
704 (opcodes
[op
].regcount
> 2 ? get_regname(nip
[3]) : int_to_str(nip
[2], a3b
));
705 if(op
== SCMD_REGTOREG
) {
706 const char* tmp
= arg1
;
707 arg1
= arg2
; arg2
= tmp
;
710 sprintf(inst
, " %s %s %s %s", i
==2?">":" ", opcodes
[op
].mnemonic
, arg1
, arg2
);
713 sprintf(inst
, "%d", *nip
);
717 printf("%-52s %s\n", inst
, stackview
[i
]);
722 if(!label_check()) return;
724 if(!vm_step(1)) break;
728 static int usage(int fd
, char *a0
) {
730 "%s [OPTIONS] [file.s] - simple ags vm simulator\n"
731 "implements the ALU and a small stack\n"
732 "useful to examine how a chunk of code modifies VM state\n"
734 "-i : interpreter mode - don't print anything, run and exit\n"
735 "by default, mode is interactive, sporting the following commands:\n"
736 "!i - reset VM state and IP\n"
740 "!b ADDR - set a breakpoint on ADDR (address or label)\n"
745 static int lastcommand
;
748 UC_NEXT
, /* step-over */
755 static void execute_user_command_i(int uc
, char* param
) {
757 case UC_STEP
: if(label_check()) vm_step(0); break;
760 if(isdigit(param
[0]))
763 ptr
= get_label_offset(param
);
765 dprintf(2, "label %s not found!\n", param
);
770 if(addr
>= text_end
) {
771 dprintf(2, "breakpoint offset %d out of bounds\n", addr
);
775 memcpy(&insn
, text
+addr
, 4);
776 insn
|= BREAKPOINT_FLAG
;
777 memcpy(text
+addr
, &insn
, 4);
780 case UC_NEXT
: *get_next_ip((void*)(text
+EIP
), 1) |= BREAKPOINT_FLAG
;
782 case UC_RUN
: vm_run(); break;
783 case UC_INIT
: vm_init(); break;
784 case UC_QUIT
: exit(0); break;
785 case UC_HELP
: usage(1, "agssim"); break;
790 static void execute_user_command(char *cmd
) {
791 if(!vm_init_stack(16384)) return;
794 while(!isspace(*param
)) param
++;
795 while(isspace(*param
)) param
++;
797 else if(!strcmp(cmd
, "s")) uc
= UC_STEP
;
798 else if(!strcmp(cmd
, "r")) uc
= UC_RUN
;
799 else if(!strcmp(cmd
, "i")) uc
= UC_INIT
;
800 else if(!strcmp(cmd
, "q")) uc
= UC_QUIT
;
801 else if(!strcmp(cmd
, "h")) uc
= UC_HELP
;
802 else if(!strcmp(cmd
, "n")) uc
= UC_NEXT
;
803 else if(*cmd
== 'b') uc
= UC_BP
;
805 dprintf(2, "unknown command\n");
808 execute_user_command_i(uc
, param
);
811 int main(int argc
, char** argv
) {
815 while((c
= getopt(argc
, argv
, "i")) != EOF
) switch(c
) {
816 case 'i': interactive
= 0; break;
817 default: return usage(2, argv
[0]);
819 if(argv
[optind
]) in
= fopen(argv
[optind
], "r");
821 dprintf(2, "error opening %s\n", argv
[optind
]);
824 char buf
[1024], *sym
;
825 char convbuf
[sizeof(buf
)]; /* to convert escaped string into non-escaped version */
829 if(interactive
) printf(ADS
" - type !h for help\n");
831 while(fgets(buf
, sizeof buf
, in
)) {
835 char* p
= buf
, *pend
= buf
+ sizeof buf
;
836 if(*p
== '\n' && lastcommand
) {
837 execute_user_command_i(lastcommand
, "");
840 if(*p
== '#' || *p
== ';') continue;
842 char *n
= strchr(p
, '\n');
844 execute_user_command(p
+1);
847 while(isspace(*p
) && p
< pend
) p
++;
851 while(!isspace(*p
) && p
< pend
) p
++;
853 size_t l
= strlen(sym
);
854 if(l
> 1 && sym
[l
-1] == ':') {
855 // functionstart or label
857 resolve_label(sym
, mem
.ltext
);
858 unsigned *loff
= get_label_offset(sym
);
859 if(loff
) dprintf(2, "warning: label %s overwritten\n", sym
);
860 add_label(sym
, mem
.ltext
);
863 unsigned instr
= find_insn(sym
);
865 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno
, sym
);
870 for(arg
= 0; arg
< opcodes
[instr
].argcount
; arg
++) {
871 sym
= finalize_arg(&p
, pend
, convbuf
, sizeof(convbuf
));
873 dprintf(2, "line %zu: error: expected \"\n", lineno
);
877 if(arg
< opcodes
[instr
].regcount
) {
879 if(value
== AR_NULL
) {
881 dprintf(2, "line %zu: error: expected register name!\n", lineno
);
884 if(instr
== SCMD_REGTOREG
) {
885 /* fix reversed order of arguments */
888 while(p
< pend
&& *p
!= ',' && !isspace(*p
)) p
++;
892 if(value
== AR_NULL
) goto needreg_err
;
900 /* immediate can be function name, string,
901 * variable name, stack fixup, or numeric value */
903 size_t l
= strlen(sym
)-1, tl
= mem
.ltext
;
904 if(!append_code((int[2]){SCMD_JMP
, tl
+8+ALIGN(l
, 4)}, 2)) goto loop_footer
;
907 while((ssize_t
)l
> 0) {
909 memcpy(&x
, p
, l
>=4?4:l
);
910 if(!append_code(&x
, 1)) goto loop_footer
;
915 } else if(sym
[0] == '@') {
916 dprintf(2, "error: global variable handling not implemented\n");
918 } else if(sym
[0] == '.') {
919 if(memcmp(sym
+1, "stack", 5)) {
920 dprintf(2, "error: expected stack\n");
923 dprintf(2, "error: stack fixup not implemented\n");
925 } else if(isdigit(sym
[0]) || sym
[0] == '-') {
926 if(sym
[0] == '-') assert(isdigit(sym
[1]));
932 case SCMD_JMP
: case SCMD_JZ
: case SCMD_JNZ
: {
934 unsigned *loff
= get_label_offset(sym
);
936 add_label_ref(sym
, mem
.ltext
+pos
*4);
938 } else value
= *loff
;
941 if(!isdigit(sym
[0])) {
942 dprintf(2, "line %zu: error: expected number\n", lineno
);
950 append_code(code
, pos
);
953 if(!interactive
) execute_user_command("r");
954 else if(in
!= stdin
) {