12 #define ADS ":::AGSSim " VERSION " by rofl0r:::"
15 #define MAX(a, b) ((a) > (b) ? (a) : (b))
18 #define MIN(a, b) ((a) < (b) ? (a) : (b))
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
25 static int interactive
;
27 static struct text_segment
{
37 tglist(struct label_ref
) *label_refs
;
38 static void add_label_ref(char *name
, unsigned insno
) {
39 struct label_ref
new = {.name
= strdup(name
), .insno
= insno
};
40 tglist_add(label_refs
, new);
42 static void resolve_label(char* name
, unsigned insno
) {
44 for(i
=0; i
<tglist_getsize(label_refs
); ) {
45 struct label_ref
*l
= &tglist_get(label_refs
, i
);
46 if(!strcmp(l
->name
, name
)) {
48 text
.code
[l
->insno
] = insno
;
49 tglist_delete(label_refs
, i
);
54 hbmap(char*, unsigned, 32) *label_map
;
55 static unsigned *get_label_offset(char* name
) {
56 return hbmap_get(label_map
, name
);
58 static int add_label(char* name
, int insno
) {
59 char* tmp
= strdup(name
);
60 return hbmap_insert(label_map
, tmp
, insno
) != -1;
62 static int strptrcmp(const void *a
, const void *b
) {
63 const char * const *x
= a
;
64 const char * const *y
= b
;
65 return strcmp(*x
, *y
);
67 static unsigned string_hash(const char* s
) {
75 static void init_labels() {
76 label_map
= hbmap_new(strptrcmp
, string_hash
, 32);
77 label_refs
= tglist_new();
80 /* TODO: move duplicate code from Assembler.c into separate TU */
81 static int get_reg(char* regname
) {
83 for(; i
< AR_MAX
; i
++)
84 if(strcmp(regnames
[i
], regname
) == 0)
89 static size_t mnemolen
[SCMD_MAX
];
90 static int mnemolen_initdone
= 0;
92 static void init_mnemolen(void) {
94 for(; i
< SCMD_MAX
; i
++)
95 mnemolen
[i
] = strlen(opcodes
[i
].mnemonic
);
96 mnemolen_initdone
= 1;
99 static unsigned find_insn(char* sym
) {
100 if(!mnemolen_initdone
) init_mnemolen();
101 size_t i
= 0, l
= strlen(sym
);
102 for(; i
< SCMD_MAX
; i
++)
103 if(l
== mnemolen
[i
] && memcmp(sym
, opcodes
[i
].mnemonic
, l
) == 0)
108 #include "StringEscape.h"
109 /* expects a pointer to the first char after a opening " in a string,
110 * converts the string into convbuf, and returns the length of that string */
111 static size_t get_length_and_convert(char* x
, char* end
, char* convbuf
, size_t convbuflen
) {
113 char* e
= x
+ strlen(x
);
114 assert(e
> x
&& e
< end
&& *e
== 0);
116 while(isspace(*e
)) e
--;
117 if(*e
!= '"') return (size_t) -1;
119 result
= unescape(x
, convbuf
, convbuflen
);
123 /* sets lets char in arg to 0, and advances pointer till the next argstart */
124 static char* finalize_arg(char **p
, char* pend
, char* convbuf
, size_t convbuflen
) {
127 size_t l
= get_length_and_convert(*p
+ 1, pend
, convbuf
+1, convbuflen
- 1);
128 if(l
== (size_t) -1) return 0;
131 *p
= 0; /* make it crash if its accessed again, since a string should always be the last arg */
135 while(*p
< pend
&& **p
!= ',' && !isspace(**p
)) (*p
)++;
138 while(*p
< pend
&& isspace(**p
)) (*p
)++;
149 enum RegisterUsage ru
;
152 static unsigned char stack_mem
[1000*4];
153 #define memory stack_mem
155 static int canread(int index
, int cnt
) {
156 return index
>= 0 && index
+cnt
< sizeof(memory
)/sizeof(memory
[0]);
159 static void grow_text(size_t req
) {
160 if(text
.len
+ req
> text
.capa
) {
161 text
.code
= realloc(text
.code
, (text
.capa
+1024)*sizeof(int));
166 static void append_code(int *code
, size_t cnt
) {
169 for(i
= 0; i
< cnt
; i
++) {
170 text
.code
[text
.len
++] = code
[i
];
172 text
.code
[text
.len
] = 0;
175 static void vm_reset_register_usage() {
177 for(i
= AR_NULL
+ 1; i
< AR_MAX
; i
++)
178 registers
[i
].ru
= RU_NONE
;
181 static void vm_init() {
183 /* initialize registers to an easily recognisable junk value */
184 for(i
= AR_NULL
+ 1; i
< AR_MAX
; i
++) {
185 registers
[i
].i
= 2222222222;
187 vm_reset_register_usage();
188 registers
[AR_SP
].i
= 0;
189 registers
[AR_NULL
].i
= 0;
190 int was_null
= text
.code
== 0;
191 /* set up EIP so vm_state() doesn't crash */
193 /* put NULL insn as first instruction so VM doesn't execute
194 random garbage in mem */
195 if(was_null
) text
.code
[0] = 0;
198 static inline int consume_int(int **eip
) {
203 static void change_reg_usage(int regno
, enum RegisterAccess ra
) {
204 registers
[regno
].ru
= get_reg_usage(regno
, registers
[regno
].ru
, ra
);
207 static void vm_update_register_usage(int *eip
) {
208 const struct regaccess_info
*ri
= ®access_info
[*eip
];
209 if(ri
->ra_reg1
) change_reg_usage(eip
[1], ri
->ra_reg1
);
210 if(ri
->ra_reg2
) change_reg_usage(eip
[2], ri
->ra_reg2
);
211 if(ri
->ra_mar
) change_reg_usage(AR_MAR
, ri
->ra_mar
);
212 if(ri
->ra_sp
) change_reg_usage(AR_SP
, ri
->ra_sp
);
215 static void write_mem1(int off
, int val
) {
216 unsigned char *m
= (void*) memory
;
219 static void write_mem2(int off
, int val
) {
220 unsigned short *m
= (void*) memory
;
221 m
[off
/2] = val
&0xffff;
223 static void write_mem(int off
, int val
) {
224 int *m
= (void*) memory
;
228 static int read_mem(int off
) {
229 int *m
= (void*) memory
;
233 static int vm_push(int value
) {
234 if(!canread(registers
[AR_SP
].i
, 4)) return 0;
235 write_mem(registers
[AR_SP
].i
, value
);
236 registers
[AR_SP
].i
+= 4;
240 static int vm_pop(int *value
) {
241 if((int) registers
[AR_SP
].i
>= 4) {
242 registers
[AR_SP
].i
-= 4;
243 *value
= read_mem(registers
[AR_SP
].i
);
249 static int vm_syscall(int scno
) {
250 int ret
, arg1
, arg2
, arg3
;
251 /* we follow linux x86_64 syscall numbers for simplicity */
253 case 0: /* SYS_read (fd, buf, size) */
255 case 1: /* SYS_write (fd, buf, size) */
256 if(!vm_pop(&arg1
) || !vm_pop(&arg2
) || !vm_pop(&arg3
)) return -EINVAL
;
257 if(!canread(arg2
, arg3
)) return -EFAULT
;
259 ret
= read(arg1
, ((char*)memory
)+arg2
, arg3
);
261 ret
= write(arg1
, ((char*)memory
)+arg2
, arg3
);
262 if(ret
== -1) return -errno
;
264 case 60: /* SYS_exit (exitcode) */
265 if(!vm_pop(&arg1
)) arg1
= 1;
267 default: return -ENOSYS
;
271 static int label_check() {
272 if(tglist_getsize(label_refs
)) {
273 dprintf(2, "error: unresolved label refs!\n");
274 size_t i
; struct label_ref
*l
;
275 for(i
=0; i
<tglist_getsize(label_refs
); ++i
) {
276 l
= &tglist_get(label_refs
, i
);
277 dprintf(2, "%s@%u\n", l
->name
, l
->insno
);
284 #define CODE_INT(X) eip[X]
285 #define CODE_FLOAT(X) ((float*)eip)[X]
286 #define REGI(X) registers[CODE_INT(X)].i
287 #define REGF(X) registers[CODE_INT(X)].f
289 static int vm_step(int run_context
) {
290 if(!run_context
&& label_check()) return 0;
291 /* we use register AR_NULL as instruction pointer */
292 #define EIP registers[AR_NULL].i
293 int *eip
= &text
.code
[EIP
];
294 int eip_inc
= 1 + opcodes
[*eip
].argcount
;
296 if(!run_context
) vm_reset_register_usage();
297 vm_update_register_usage(eip
);
301 /* don't modify IP */
302 dprintf(2, "no code at IP %u.\n", EIP
);
305 REGI(1) += CODE_INT(2);
308 REGI(1) -= CODE_INT(2);
314 REGI(1) = CODE_INT(2);
335 REGI(1) = !!(REGI(1) == REGI(2));
338 REGI(1) = !!(REGI(1) != REGI(2));
341 REGI(1) = !!(REGI(1) > REGI(2));
344 REGI(1) = !!(REGI(1) < REGI(2));
347 REGI(1) = !!(REGI(1) >= REGI(2));
350 REGI(1) = !!(REGI(1) <= REGI(2));
353 REGI(1) = !!(REGI(1) && REGI(2));
356 REGI(1) = !!(REGI(1) || REGI(2));
358 case SCMD_LOADSPOFFS
:
359 registers
[AR_MAR
].i
= registers
[AR_SP
].i
- CODE_INT(1);
362 if(!vm_push(REGI(1))) goto oob
;
365 if(!vm_pop(®I(1))) goto oob
;
368 REGI(1) *= CODE_INT(2);
385 case SCMD_SHIFTRIGHT
:
389 REGF(1) += CODE_FLOAT(2);
392 REGF(1) -= CODE_FLOAT(2);
407 REGI(1) = !!(REGF(1) > REGF(2));
410 REGI(1) = !!(REGF(1) < REGF(2));
413 REGI(1) = !!(REGF(1) >= REGF(2));
416 REGI(1) = !!(REGF(1) <= REGF(2));
420 if(tmp
<= 0 || tmp
> 4 || tmp
== 3) {
421 dprintf(2, "VM: invalid memcpy use at IP %u\n", EIP
);
438 if(canread(registers
[AR_MAR
].i
, tmp
)) {
440 case 4: write_mem (registers
[AR_MAR
].i
, val
); break;
441 case 2: write_mem2(registers
[AR_MAR
].i
, val
); break;
442 case 1: write_mem1(registers
[AR_MAR
].i
, val
); break;
446 dprintf(2, "info: caught OOB access at IP %u\n", EIP
);
458 if(canread(registers
[AR_MAR
].i
, tmp
)) {
459 int val
= memory
[registers
[AR_MAR
].i
];
461 case 4: REGI(1) = val
; break;
462 case 2: REGI(1) = val
& 0xffff; break;
463 case 1: REGI(1) = val
& 0xff; break;
468 if(registers
[AR_AX
].i
== 0) goto jump
;
471 if(registers
[AR_AX
].i
== 0) break;
477 if((unsigned)tmp
<= text
.len
)
478 registers
[AR_NULL
].i
= tmp
;
479 else dprintf(2, "error: caught invalid jump to %u at IP %u\n", tmp
, EIP
);
483 if(!vm_push(registers
[AR_NULL
].i
+ eip_inc
)) goto oob
;
487 registers
[AR_SP
].i
-= 4;
488 tmp
= read_mem(registers
[AR_SP
].i
);
491 /* we re-purpose "callscr" mnemonic to mean syscall,
492 as it is unused in ags-emitted bytecode.
493 using it is unportable, it works only in agssim.
494 syscall number is passed in reg, arguments on the stack. */
495 registers
[AR_AX
].i
= vm_syscall(REGI(1));
498 case SCMD_DYNAMICBOUNDS
:
499 case SCMD_MEMZEROPTRND
:
500 case SCMD_LOOPCHECKOFF
:
501 case SCMD_CHECKNULLREG
:
502 case SCMD_STRINGSNOTEQ
:
503 case SCMD_STRINGSEQUAL
:
504 case SCMD_CREATESTRING
:
505 case SCMD_ZEROMEMORY
:
507 case SCMD_MEMINITPTR
:
508 case SCMD_MEMZEROPTR
:
509 case SCMD_MEMREADPTR
:
510 case SCMD_MEMWRITEPTR
:
511 case SCMD_CHECKBOUNDS
:
513 case SCMD_NUMFUNCARGS
:
514 case SCMD_SUBREALSTACK
:
518 dprintf(2, "info: %s not implemented yet\n", opcodes
[*eip
].mnemonic
);
520 size_t i
, l
= opcodes
[*eip
].argcount
;
521 for(i
= 0; i
< l
; i
++) ++(*eip
);
525 registers
[AR_NULL
].i
+= eip_inc
;
529 static inline char *int_to_str(int value
, char* out
) {
530 sprintf(out
, "%d", value
);
534 static void vm_state() {
535 if(!interactive
) return;
536 static const char ru_strings
[][3] = {
538 [RU_READ
] = {'R', 0},
539 [RU_WRITE
] = {'W', 0},
540 [RU_WRITE_AFTER_READ
] = {'R', 'W', 0},
542 static const char regorder
[] = {
543 0, AR_MAR
, AR_OP
, AR_SP
, -1,
544 AR_AX
, AR_BX
, AR_CX
, AR_DX
, -1, -1};
546 for(j
=0; j
< ARRAY_SIZE(regorder
)-1; ++j
) {
548 if(i
== -1) printf("\n");
550 printf("%-3s: %-2s %-11d", i
== 0 ? "eip" : regnames
[i
], ru_strings
[registers
[i
].ru
], registers
[i
].i
);
551 if(regorder
[j
+1] != -1) printf(" ");
555 for( i
= MIN(registers
[AR_SP
].i
+2*4, sizeof(stack_mem
)/4);
556 i
>= MAX(registers
[AR_SP
].i
-2*4, 0);
558 printf("SL %s %3zu %d\n", i
== registers
[AR_SP
].i
? ">" : " ", i
, read_mem(i
));
562 int *eip
= &text
.code
[registers
[AR_NULL
].i
];
563 char arg1buf
[32], arg2buf
[32];
564 const char *arg1
= opcodes
[*eip
].argcount
== 0 ? "" : \
565 (opcodes
[*eip
].regcount
> 0 ? regnames
[eip
[1]] : int_to_str(eip
[1], arg1buf
));
566 const char *arg2
= opcodes
[*eip
].argcount
< 2 ? "" : \
567 (opcodes
[*eip
].regcount
> 1 ? regnames
[eip
[2]] : int_to_str(eip
[2], arg2buf
));
568 printf(" > %s %s %s\n", opcodes
[*eip
].mnemonic
, arg1
, arg2
);
572 if(!label_check()) return;
574 int *eip
= &text
.code
[registers
[AR_NULL
].i
];
576 if(!vm_step(1)) break;
580 static int usage(int fd
, char *a0
) {
582 "%s [OPTIONS] [file.s] - simple ags vm simulator\n"
583 "implements the ALU and a small stack\n"
584 "useful to examine how a chunk of code modifies VM state\n"
586 "-i : interpreter mode - don't print anything, run and exit\n"
587 "by default, mode is interactive, sporting the following commands:\n"
588 "!i - reset VM state and IP\n"
595 static int lastcommand
;
603 static void execute_user_command_i(int uc
) {
605 case UC_STEP
: vm_step(0); break;
606 case UC_RUN
: vm_run(); break;
607 case UC_INIT
: vm_init(); break;
608 case UC_QUIT
: exit(0); break;
609 case UC_HELP
: usage(1, "agssim"); break;
614 static void execute_user_command(char *cmd
) {
617 else if(!strcmp(cmd
, "s")) uc
= UC_STEP
;
618 else if(!strcmp(cmd
, "r")) uc
= UC_RUN
;
619 else if(!strcmp(cmd
, "i")) uc
= UC_INIT
;
620 else if(!strcmp(cmd
, "q")) uc
= UC_QUIT
;
621 else if(!strcmp(cmd
, "h")) uc
= UC_HELP
;
623 dprintf(2, "unknown command\n");
626 execute_user_command_i(uc
);
629 int main(int argc
, char** argv
) {
633 while((c
= getopt(argc
, argv
, "i")) != EOF
) switch(c
) {
634 case 'i': interactive
= 0; break;
635 default: return usage(2, argv
[0]);
637 if(argv
[optind
]) in
= fopen(argv
[optind
], "r");
639 dprintf(2, "error opening %s\n", argv
[optind
]);
642 char buf
[1024], *sym
;
643 char convbuf
[sizeof(buf
)]; /* to convert escaped string into non-escaped version */
647 if(interactive
) printf(ADS
" - type !h for help\n");
648 while(fgets(buf
, sizeof buf
, in
)) {
652 char* p
= buf
, *pend
= buf
+ sizeof buf
;
653 if(*p
== '\n' && lastcommand
) {
654 execute_user_command_i(lastcommand
);
657 if(*p
== '#' || *p
== ';') continue;
659 char *n
= strchr(p
, '\n');
661 execute_user_command(p
+1);
664 while(isspace(*p
) && p
< pend
) p
++;
668 while(!isspace(*p
) && p
< pend
) p
++;
670 size_t l
= strlen(sym
);
671 if(l
> 1 && sym
[l
-1] == ':') {
672 // functionstart or label
674 resolve_label(sym
, text
.len
);
675 unsigned *loff
= get_label_offset(sym
);
676 if(loff
) dprintf(2, "warning: label %s overwritten\n");
677 add_label(sym
, text
.len
);
680 unsigned instr
= find_insn(sym
);
682 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno
, sym
);
687 for(arg
= 0; arg
< opcodes
[instr
].argcount
; arg
++) {
688 sym
= finalize_arg(&p
, pend
, convbuf
, sizeof(convbuf
));
690 dprintf(2, "line %zu: error: expected \"\n", lineno
);
694 if(arg
< opcodes
[instr
].regcount
) {
696 if(value
== AR_NULL
) {
698 dprintf(2, "line %zu: error: expected register name!\n", lineno
);
701 if(instr
== SCMD_REGTOREG
) {
702 /* fix reversed order of arguments */
705 while(p
< pend
&& *p
!= ',' && !isspace(*p
)) p
++;
709 if(value
== AR_NULL
) goto needreg_err
;
717 /* immediate can be function name, string,
718 * variable name, stack fixup, or numeric value */
720 dprintf(2, "error: string handling not implemented\n");
722 } else if(sym
[0] == '@') {
723 dprintf(2, "error: global variable handling not implemented\n");
725 } else if(sym
[0] == '.') {
726 if(memcmp(sym
+1, "stack", 5)) {
727 dprintf(2, "error: expected stack\n");
730 dprintf(2, "error: stack fixup not implemented\n");
732 } else if(isdigit(sym
[0]) || sym
[0] == '-') {
733 if(sym
[0] == '-') assert(isdigit(sym
[1]));
739 case SCMD_JMP
: case SCMD_JZ
: case SCMD_JNZ
: {
741 unsigned *loff
= get_label_offset(sym
);
743 add_label_ref(sym
, text
.len
+pos
);
745 } else value
= *loff
;
748 if(!isdigit(sym
[0])) {
749 dprintf(2, "error: expected number\n");
757 append_code(code
, pos
);
760 if(!interactive
) execute_user_command("r");