12 #define ADS ":::AGSSim " VERSION " by rofl0r:::"
15 #define MAX(a, b) ((a) > (b) ? (a) : (b))
18 #define MIN(a, b) ((a) < (b) ? (a) : (b))
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
25 #define BREAKPOINT_FLAG (1<<31)
26 #define OPCODE_MASK (~(BREAKPOINT_FLAG))
28 static int interactive
;
30 static struct text_segment
{
40 tglist(struct label_ref
) *label_refs
;
41 static void add_label_ref(char *name
, unsigned insno
) {
42 struct label_ref
new = {.name
= strdup(name
), .insno
= insno
};
43 tglist_add(label_refs
, new);
45 static void resolve_label(char* name
, unsigned insno
) {
47 for(i
=0; i
<tglist_getsize(label_refs
); ) {
48 struct label_ref
*l
= &tglist_get(label_refs
, i
);
49 if(!strcmp(l
->name
, name
)) {
51 text
.code
[l
->insno
] = insno
;
52 tglist_delete(label_refs
, i
);
57 hbmap(char*, unsigned, 32) *label_map
;
58 static unsigned *get_label_offset(char* name
) {
59 return hbmap_get(label_map
, name
);
61 static int add_label(char* name
, int insno
) {
62 char* tmp
= strdup(name
);
63 return hbmap_insert(label_map
, tmp
, insno
) != -1;
65 static int strptrcmp(const void *a
, const void *b
) {
66 const char * const *x
= a
;
67 const char * const *y
= b
;
68 return strcmp(*x
, *y
);
70 static unsigned string_hash(const char* s
) {
78 static void init_labels() {
79 label_map
= hbmap_new(strptrcmp
, string_hash
, 32);
80 label_refs
= tglist_new();
83 /* TODO: move duplicate code from Assembler.c into separate TU */
84 static int get_reg(char* regname
) {
86 for(; i
< AR_MAX
; i
++)
87 if(strcmp(regnames
[i
], regname
) == 0)
92 static size_t mnemolen
[SCMD_MAX
];
93 static int mnemolen_initdone
= 0;
95 static void init_mnemolen(void) {
97 for(; i
< SCMD_MAX
; i
++)
98 mnemolen
[i
] = strlen(opcodes
[i
].mnemonic
);
99 mnemolen_initdone
= 1;
102 static unsigned find_insn(char* sym
) {
103 if(!mnemolen_initdone
) init_mnemolen();
104 size_t i
= 0, l
= strlen(sym
);
105 for(; i
< SCMD_MAX
; i
++)
106 if(l
== mnemolen
[i
] && memcmp(sym
, opcodes
[i
].mnemonic
, l
) == 0)
111 #include "StringEscape.h"
112 /* expects a pointer to the first char after a opening " in a string,
113 * converts the string into convbuf, and returns the length of that string */
114 static size_t get_length_and_convert(char* x
, char* end
, char* convbuf
, size_t convbuflen
) {
116 char* e
= x
+ strlen(x
);
117 assert(e
> x
&& e
< end
&& *e
== 0);
119 while(isspace(*e
)) e
--;
120 if(*e
!= '"') return (size_t) -1;
122 result
= unescape(x
, convbuf
, convbuflen
);
126 /* sets lets char in arg to 0, and advances pointer till the next argstart */
127 static char* finalize_arg(char **p
, char* pend
, char* convbuf
, size_t convbuflen
) {
130 size_t l
= get_length_and_convert(*p
+ 1, pend
, convbuf
+1, convbuflen
- 1);
131 if(l
== (size_t) -1) return 0;
134 *p
= 0; /* make it crash if its accessed again, since a string should always be the last arg */
138 while(*p
< pend
&& **p
!= ',' && !isspace(**p
)) (*p
)++;
141 while(*p
< pend
&& isspace(**p
)) (*p
)++;
152 enum RegisterUsage ru
;
155 static unsigned char stack_mem
[1000*4];
156 #define memory stack_mem
158 static int canread(int index
, int cnt
) {
159 return index
>= 0 && index
+cnt
< sizeof(memory
)/sizeof(memory
[0]);
162 static void grow_text(size_t req
) {
163 if(text
.len
+ req
> text
.capa
) {
164 text
.code
= realloc(text
.code
, (text
.capa
+1024)*sizeof(int));
169 static void append_code(int *code
, size_t cnt
) {
172 for(i
= 0; i
< cnt
; i
++) {
173 text
.code
[text
.len
++] = code
[i
];
175 text
.code
[text
.len
] = 0;
178 static void vm_reset_register_usage() {
180 for(i
= AR_NULL
+ 1; i
< AR_MAX
; i
++)
181 registers
[i
].ru
= RU_NONE
;
184 static void vm_init() {
186 /* initialize registers to an easily recognisable junk value */
187 for(i
= AR_NULL
+ 1; i
< AR_MAX
; i
++) {
188 registers
[i
].i
= 2222222222;
190 vm_reset_register_usage();
191 registers
[AR_SP
].i
= 0;
192 registers
[AR_NULL
].i
= 0;
193 int was_null
= text
.code
== 0;
194 /* set up EIP so vm_state() doesn't crash */
196 /* put NULL insn as first instruction so VM doesn't execute
197 random garbage in mem */
198 if(was_null
) text
.code
[0] = 0;
201 static inline int consume_int(int **eip
) {
206 static void change_reg_usage(int regno
, enum RegisterAccess ra
) {
207 registers
[regno
].ru
= get_reg_usage(regno
, registers
[regno
].ru
, ra
);
210 static void vm_update_register_usage(int *eip
) {
211 const struct regaccess_info
*ri
= ®access_info
[*eip
];
212 if(ri
->ra_reg1
) change_reg_usage(eip
[1], ri
->ra_reg1
);
213 if(ri
->ra_reg2
) change_reg_usage(eip
[2], ri
->ra_reg2
);
214 if(ri
->ra_mar
) change_reg_usage(AR_MAR
, ri
->ra_mar
);
215 if(ri
->ra_sp
) change_reg_usage(AR_SP
, ri
->ra_sp
);
218 static void write_mem1(int off
, int val
) {
219 unsigned char *m
= (void*) memory
;
222 static void write_mem2(int off
, int val
) {
223 unsigned short *m
= (void*) memory
;
224 m
[off
/2] = val
&0xffff;
226 static void write_mem(int off
, int val
) {
227 int *m
= (void*) memory
;
231 static int read_mem(int off
) {
232 int *m
= (void*) memory
;
236 static int vm_push(int value
) {
237 if(!canread(registers
[AR_SP
].i
, 4)) return 0;
238 write_mem(registers
[AR_SP
].i
, value
);
239 registers
[AR_SP
].i
+= 4;
243 static int vm_pop(int *value
) {
244 if((int) registers
[AR_SP
].i
>= 4) {
245 registers
[AR_SP
].i
-= 4;
246 *value
= read_mem(registers
[AR_SP
].i
);
252 static int vm_syscall(int scno
) {
253 int ret
, arg1
, arg2
, arg3
;
254 /* we follow linux x86_64 syscall numbers for simplicity */
256 case 0: /* SYS_read (fd, buf, size) */
258 case 1: /* SYS_write (fd, buf, size) */
259 if(!vm_pop(&arg1
) || !vm_pop(&arg2
) || !vm_pop(&arg3
)) return -EINVAL
;
260 if(!canread(arg2
, arg3
)) return -EFAULT
;
262 ret
= read(arg1
, ((char*)memory
)+arg2
, arg3
);
264 ret
= write(arg1
, ((char*)memory
)+arg2
, arg3
);
265 if(ret
== -1) return -errno
;
267 case 60: /* SYS_exit (exitcode) */
268 if(!vm_pop(&arg1
)) arg1
= 1;
270 default: return -ENOSYS
;
274 static int label_check() {
275 if(tglist_getsize(label_refs
)) {
276 dprintf(2, "error: unresolved label refs!\n");
277 size_t i
; struct label_ref
*l
;
278 for(i
=0; i
<tglist_getsize(label_refs
); ++i
) {
279 l
= &tglist_get(label_refs
, i
);
280 dprintf(2, "%s@%u\n", l
->name
, l
->insno
);
287 #define CODE_INT(X) eip[X]
288 #define CODE_FLOAT(X) ((float*)eip)[X]
289 #define REGI(X) registers[CODE_INT(X)].i
290 #define REGF(X) registers[CODE_INT(X)].f
292 static int vm_step(int run_context
) {
293 /* we use register AR_NULL as instruction pointer */
294 #define EIP registers[AR_NULL].i
295 int *eip
= &text
.code
[EIP
];
296 int eip_inc
= 1 + opcodes
[*eip
&OPCODE_MASK
].argcount
;
299 if(*eip
& BREAKPOINT_FLAG
) {
300 *eip
&= ~BREAKPOINT_FLAG
;
303 if(!run_context
) vm_reset_register_usage();
304 vm_update_register_usage(eip
);
309 /* don't modify IP */
310 dprintf(2, "no code at IP %u.\n", EIP
);
313 REGI(1) += CODE_INT(2);
316 REGI(1) -= CODE_INT(2);
322 REGI(1) = CODE_INT(2);
343 REGI(1) = !!(REGI(1) == REGI(2));
346 REGI(1) = !!(REGI(1) != REGI(2));
349 REGI(1) = !!(REGI(1) > REGI(2));
352 REGI(1) = !!(REGI(1) < REGI(2));
355 REGI(1) = !!(REGI(1) >= REGI(2));
358 REGI(1) = !!(REGI(1) <= REGI(2));
361 REGI(1) = !!(REGI(1) && REGI(2));
364 REGI(1) = !!(REGI(1) || REGI(2));
366 case SCMD_LOADSPOFFS
:
367 registers
[AR_MAR
].i
= registers
[AR_SP
].i
- CODE_INT(1);
370 if(!vm_push(REGI(1))) goto oob
;
373 if(!vm_pop(®I(1))) goto oob
;
376 REGI(1) *= CODE_INT(2);
393 case SCMD_SHIFTRIGHT
:
397 REGF(1) += CODE_FLOAT(2);
400 REGF(1) -= CODE_FLOAT(2);
415 REGI(1) = !!(REGF(1) > REGF(2));
418 REGI(1) = !!(REGF(1) < REGF(2));
421 REGI(1) = !!(REGF(1) >= REGF(2));
424 REGI(1) = !!(REGF(1) <= REGF(2));
428 if(tmp
<= 0 || tmp
> 4 || tmp
== 3) {
429 dprintf(2, "VM: invalid memcpy use at IP %u\n", EIP
);
446 if(canread(registers
[AR_MAR
].i
, tmp
)) {
448 case 4: write_mem (registers
[AR_MAR
].i
, val
); break;
449 case 2: write_mem2(registers
[AR_MAR
].i
, val
); break;
450 case 1: write_mem1(registers
[AR_MAR
].i
, val
); break;
454 dprintf(2, "info: caught OOB access at IP %u\n", EIP
);
466 if(canread(registers
[AR_MAR
].i
, tmp
)) {
467 int val
= memory
[registers
[AR_MAR
].i
];
469 case 4: REGI(1) = val
; break;
470 case 2: REGI(1) = val
& 0xffff; break;
471 case 1: REGI(1) = val
& 0xff; break;
476 if(registers
[AR_AX
].i
== 0) goto jump
;
479 if(registers
[AR_AX
].i
== 0) break;
485 if((unsigned)tmp
<= text
.len
)
486 registers
[AR_NULL
].i
= tmp
;
487 else dprintf(2, "error: caught invalid jump to %u at IP %u\n", tmp
, EIP
);
491 if(!vm_push(registers
[AR_NULL
].i
+ eip_inc
)) goto oob
;
495 registers
[AR_SP
].i
-= 4;
496 tmp
= read_mem(registers
[AR_SP
].i
);
499 /* we re-purpose "callscr" mnemonic to mean syscall,
500 as it is unused in ags-emitted bytecode.
501 using it is unportable, it works only in agssim.
502 syscall number is passed in reg, arguments on the stack. */
503 registers
[AR_AX
].i
= vm_syscall(REGI(1));
506 case SCMD_DYNAMICBOUNDS
:
507 case SCMD_MEMZEROPTRND
:
508 case SCMD_LOOPCHECKOFF
:
509 case SCMD_CHECKNULLREG
:
510 case SCMD_STRINGSNOTEQ
:
511 case SCMD_STRINGSEQUAL
:
512 case SCMD_CREATESTRING
:
513 case SCMD_ZEROMEMORY
:
515 case SCMD_MEMINITPTR
:
516 case SCMD_MEMZEROPTR
:
517 case SCMD_MEMREADPTR
:
518 case SCMD_MEMWRITEPTR
:
519 case SCMD_CHECKBOUNDS
:
521 case SCMD_NUMFUNCARGS
:
522 case SCMD_SUBREALSTACK
:
526 dprintf(2, "info: %s not implemented yet\n", opcodes
[*eip
].mnemonic
);
528 size_t i
, l
= opcodes
[*eip
].argcount
;
529 for(i
= 0; i
< l
; i
++) ++(*eip
);
533 registers
[AR_NULL
].i
+= eip_inc
;
537 static inline char *int_to_str(int value
, char* out
) {
538 sprintf(out
, "%d", value
);
542 static int* get_next_ip(int *eip
, int off
) {
544 for(i
=0; i
<off
; ++i
) ret
+=1+opcodes
[*ret
].argcount
;
548 static void vm_state() {
549 if(!interactive
) return;
550 static const char ru_strings
[][3] = {
552 [RU_READ
] = {'R', 0},
553 [RU_WRITE
] = {'W', 0},
554 [RU_WRITE_AFTER_READ
] = {'R', 'W', 0},
556 static const char regorder
[] = {
557 0, AR_MAR
, AR_OP
, AR_SP
, -1,
558 AR_AX
, AR_BX
, AR_CX
, AR_DX
, -1, -1};
560 for(j
=0; j
< ARRAY_SIZE(regorder
)-1; ++j
) {
562 if(i
== -1) printf("\n");
564 printf("%-3s: %-2s %-11d", i
== 0 ? "eip" : regnames
[i
], ru_strings
[registers
[i
].ru
], registers
[i
].i
);
565 if(regorder
[j
+1] != -1) printf(" ");
568 char stackview
[5][24];
571 for(j
=0,i
= MIN(registers
[AR_SP
].i
+2*4, sizeof(stack_mem
)/4);
572 i
>= MAX(registers
[AR_SP
].i
-2*4, 0);
574 sprintf(stackview
[j
],
575 "SL %s %3zu %d", i
== registers
[AR_SP
].i
? ">" : " ", i
, read_mem(i
));
578 int *eip
= &text
.code
[registers
[AR_NULL
].i
], wasnull
= 0;
579 for(i
= 0; i
<5; i
++) {
580 char a1b
[32], a2b
[32], a3b
[32], inst
[48];
582 int *nip
= get_next_ip(eip
, i
-2),
583 op
= *nip
& OPCODE_MASK
;
584 const char *arg1
= opcodes
[*nip
].argcount
== 0 ? "" : \
585 (opcodes
[op
].regcount
> 0 ? regnames
[nip
[1]] : int_to_str(nip
[1], a1b
));
586 const char *arg2
= opcodes
[*nip
].argcount
< 2 ? "" : \
587 (opcodes
[op
].regcount
> 1 ? regnames
[nip
[2]] : int_to_str(nip
[2], a2b
));
588 const char *arg3
= opcodes
[*nip
].argcount
< 3 ? "" : \
589 (opcodes
[op
].regcount
> 2 ? regnames
[nip
[3]] : int_to_str(nip
[2], a3b
));
591 sprintf(inst
, " %s %s %s %s", i
==2?">":" ", opcodes
[op
].mnemonic
, arg1
, arg2
);
595 printf("%-52s %s\n", inst
, stackview
[i
]);
600 if(!label_check()) return;
602 int *eip
= &text
.code
[registers
[AR_NULL
].i
];
604 if(!vm_step(1)) break;
608 static int usage(int fd
, char *a0
) {
610 "%s [OPTIONS] [file.s] - simple ags vm simulator\n"
611 "implements the ALU and a small stack\n"
612 "useful to examine how a chunk of code modifies VM state\n"
614 "-i : interpreter mode - don't print anything, run and exit\n"
615 "by default, mode is interactive, sporting the following commands:\n"
616 "!i - reset VM state and IP\n"
624 static int lastcommand
;
627 UC_NEXT
, /* step-over */
633 static void execute_user_command_i(int uc
) {
635 case UC_STEP
: if(label_check()) vm_step(0); break;
636 case UC_NEXT
: *get_next_ip(&text
.code
[EIP
], 1) |= BREAKPOINT_FLAG
;
638 case UC_RUN
: vm_run(); break;
639 case UC_INIT
: vm_init(); break;
640 case UC_QUIT
: exit(0); break;
641 case UC_HELP
: usage(1, "agssim"); break;
646 static void execute_user_command(char *cmd
) {
649 else if(!strcmp(cmd
, "s")) uc
= UC_STEP
;
650 else if(!strcmp(cmd
, "r")) uc
= UC_RUN
;
651 else if(!strcmp(cmd
, "i")) uc
= UC_INIT
;
652 else if(!strcmp(cmd
, "q")) uc
= UC_QUIT
;
653 else if(!strcmp(cmd
, "h")) uc
= UC_HELP
;
654 else if(!strcmp(cmd
, "n")) uc
= UC_NEXT
;
656 dprintf(2, "unknown command\n");
659 execute_user_command_i(uc
);
662 int main(int argc
, char** argv
) {
666 while((c
= getopt(argc
, argv
, "i")) != EOF
) switch(c
) {
667 case 'i': interactive
= 0; break;
668 default: return usage(2, argv
[0]);
670 if(argv
[optind
]) in
= fopen(argv
[optind
], "r");
672 dprintf(2, "error opening %s\n", argv
[optind
]);
675 char buf
[1024], *sym
;
676 char convbuf
[sizeof(buf
)]; /* to convert escaped string into non-escaped version */
680 if(interactive
) printf(ADS
" - type !h for help\n");
682 while(fgets(buf
, sizeof buf
, in
)) {
686 char* p
= buf
, *pend
= buf
+ sizeof buf
;
687 if(*p
== '\n' && lastcommand
) {
688 execute_user_command_i(lastcommand
);
691 if(*p
== '#' || *p
== ';') continue;
693 char *n
= strchr(p
, '\n');
695 execute_user_command(p
+1);
698 while(isspace(*p
) && p
< pend
) p
++;
702 while(!isspace(*p
) && p
< pend
) p
++;
704 size_t l
= strlen(sym
);
705 if(l
> 1 && sym
[l
-1] == ':') {
706 // functionstart or label
708 resolve_label(sym
, text
.len
);
709 unsigned *loff
= get_label_offset(sym
);
710 if(loff
) dprintf(2, "warning: label %s overwritten\n");
711 add_label(sym
, text
.len
);
714 unsigned instr
= find_insn(sym
);
716 dprintf(2, "line %zu: error: unknown instruction '%s'\n", lineno
, sym
);
721 for(arg
= 0; arg
< opcodes
[instr
].argcount
; arg
++) {
722 sym
= finalize_arg(&p
, pend
, convbuf
, sizeof(convbuf
));
724 dprintf(2, "line %zu: error: expected \"\n", lineno
);
728 if(arg
< opcodes
[instr
].regcount
) {
730 if(value
== AR_NULL
) {
732 dprintf(2, "line %zu: error: expected register name!\n", lineno
);
735 if(instr
== SCMD_REGTOREG
) {
736 /* fix reversed order of arguments */
739 while(p
< pend
&& *p
!= ',' && !isspace(*p
)) p
++;
743 if(value
== AR_NULL
) goto needreg_err
;
751 /* immediate can be function name, string,
752 * variable name, stack fixup, or numeric value */
754 dprintf(2, "error: string handling not implemented\n");
756 } else if(sym
[0] == '@') {
757 dprintf(2, "error: global variable handling not implemented\n");
759 } else if(sym
[0] == '.') {
760 if(memcmp(sym
+1, "stack", 5)) {
761 dprintf(2, "error: expected stack\n");
764 dprintf(2, "error: stack fixup not implemented\n");
766 } else if(isdigit(sym
[0]) || sym
[0] == '-') {
767 if(sym
[0] == '-') assert(isdigit(sym
[1]));
773 case SCMD_JMP
: case SCMD_JZ
: case SCMD_JNZ
: {
775 unsigned *loff
= get_label_offset(sym
);
777 add_label_ref(sym
, text
.len
+pos
);
779 } else value
= *loff
;
782 if(!isdigit(sym
[0])) {
783 dprintf(2, "error: expected number\n");
791 append_code(code
, pos
);
794 if(!interactive
) execute_user_command("r");
795 else if(in
!= stdin
) {