experimental risc-like x86_64 port of neatcc
[neatcc.git] / x86.c
blobb03a2faeec5328732e25cea697fa0c39703c1ed3
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include "gen.h"
5 #include "out.h"
6 #include "tok.h"
8 /* variable location */
9 #define LOC_REG 0x01
10 #define LOC_MEM 0x02
11 #define LOC_NUM 0x04
12 #define LOC_SYM 0x08
13 #define LOC_LOCAL 0x10
15 /* special registers */
16 #define REG_FP R_RBP
17 #define REG_SP R_RSP
18 #define REG_RET R_RAX
19 #define REG_FORK R_RAX
21 /* registers */
22 #define R_RAX 0x00
23 #define R_RCX 0x01
24 #define R_RDX 0x02
25 #define R_RBX 0x03
26 #define R_RSP 0x04
27 #define R_RBP 0x05
28 #define R_RSI 0x06
29 #define R_RDI 0x07
30 /* x86_64 registers */
31 #define R_R8 0x08
32 #define R_R9 0x09
33 #define R_R10 0x0a
34 #define R_R11 0x0b
35 #define R_R12 0x0c
36 #define R_R13 0x0d
37 #define R_R14 0x0e
38 #define R_R15 0x0f
39 #define N_REGS 16
40 #define N_ARGS ARRAY_SIZE(argregs)
41 #define N_TMPS ARRAY_SIZE(tmpregs)
42 #define R_TMPS 0xffcf
43 #define R_ARGS 0x03c6
44 #define R_SAVED 0xf008
46 #define MIN(a, b) ((a) < (b) ? (a) : (b))
47 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
48 void err(char *msg);
50 static char cs[SECSIZE]; /* code segment */
51 static int cslen;
52 static char ds[SECSIZE]; /* data segment */
53 static int dslen;
54 static long bsslen; /* bss segment size */
56 static long sp; /* stack pointer offset from R_RBP */
57 static long sp_max; /* maximum stack pointer offset */
58 static long sp_tmp; /* sp for the first tmp on the stack */
59 static long func_beg; /* function address in CS */
60 static long func_fpsub; /* stack pointer sub address in CS */
61 static int func_argc; /* # of args */
62 static int func_vararg; /* vararg function */
64 #define TMP(i) (((i) < ntmp) ? &tmps[ntmp - 1 - (i)] : NULL)
66 static struct tmp {
67 long addr;
68 char sym[NAMELEN];
69 long off; /* offset from a symbol or a local */
70 unsigned loc; /* variable location */
71 unsigned bt; /* type of address; zero when not a pointer */
72 } tmps[MAXTMP];
73 static int ntmp;
75 /* arch-specific functions */
76 static void i_ldr(int l, int rd, int rn, int off, int bt);
77 static void i_mov(int rd, int rn, int bt);
78 static void i_add(int op, int rd, int rn, int rm);
79 static void i_shl(int op, int rd, int rm, int rs);
80 static void i_mul(int rd, int rn, int rm);
81 static void i_div(int op, int rd, int rn, int rm);
82 static void i_cmp(int rn, int rm);
83 static int i_decodeable(long imm);
84 static void i_add_imm(int op, int rd, int rn, long n);
85 static void i_shl_imm(int op, int rd, int rn, long n);
86 static void i_cmp_imm(int rn, long n);
87 static void i_add_anyimm(int rd, int rn, long n);
88 static void i_num(int rd, long n);
89 static void i_sym(int rd, char *sym, int off);
90 static void i_set(int op, int rd);
91 static void i_neg(int rd);
92 static void i_not(int rd);
93 static void i_lnot(int rd);
94 static void i_zx(int rd, int bits);
95 static void i_sx(int rd, int bits);
96 static void i_b(void);
97 static void i_bz(int rn, int z);
98 static void i_b_fill(long src, long dst);
99 static void i_call(char *sym, int off);
100 static void i_call_reg(int rd);
101 static void i_prolog(void);
102 static void i_epilog(void);
104 static struct tmp *regs[N_REGS];
105 static int tmpregs[] = {R_RAX, R_RDI, R_RSI, R_RDX, R_RCX, R_R8, R_R9,
106 R_R10, R_R11};
107 static int argregs[] = {R_RDI, R_RSI, R_RDX, R_RCX, R_R8, R_R9};
109 /* labels and jmps */
110 #define MAXJMPS (1 << 14)
112 static long labels[MAXJMPS];
113 static int nlabels;
114 static long jmp_loc[MAXJMPS];
115 static int jmp_goal[MAXJMPS];
116 static int njmps;
118 void o_label(int id)
120 if (id > nlabels)
121 nlabels = id + 1;
122 labels[id] = cslen;
125 static void jmp_add(int id)
127 if (njmps >= MAXJMPS)
128 err("nomem: MAXJMPS reached!\n");
129 jmp_loc[njmps] = cslen - 4;
130 jmp_goal[njmps] = id;
131 njmps++;
134 static void jmp_fill(void)
136 int i;
137 for (i = 0; i < njmps; i++)
138 i_b_fill(jmp_loc[i], labels[jmp_goal[i]]);
141 /* generating code */
143 static void putint(char *s, long n, int l)
145 while (l--) {
146 *s++ = n;
147 n >>= 8;
151 static void os(void *s, int n)
153 while (n--)
154 cs[cslen++] = *(char *) (s++);
157 static void oi(long n, int l)
159 while (l--) {
160 cs[cslen++] = n;
161 n >>= 8;
165 static long sp_push(int size)
167 sp += size;
168 if (sp > sp_max)
169 sp_max = sp;
170 return sp;
173 static void tmp_mem(struct tmp *tmp)
175 int src = tmp->addr;
176 if (tmp->loc != LOC_REG)
177 return;
178 if (sp_tmp == -1)
179 sp_tmp = sp;
180 tmp->addr = -sp_push(LONGSZ);
181 i_ldr(0, src, REG_FP, tmp->addr, LONGSZ);
182 regs[src] = NULL;
183 tmp->loc = LOC_MEM;
186 static void num_cast(struct tmp *t, unsigned bt)
188 if (!(bt & BT_SIGNED) && BT_SZ(bt) != LONGSZ)
189 t->addr &= ((1l << (long) (BT_SZ(bt) * 8)) - 1);
190 if (bt & BT_SIGNED && BT_SZ(bt) != LONGSZ &&
191 t->addr > (1l << (BT_SZ(bt) * 8 - 1)))
192 t->addr = -((1l << (BT_SZ(bt) * 8)) - t->addr);
195 static void tmp_reg(struct tmp *tmp, int dst, int deref)
197 int bt = tmp->bt;
198 if (!tmp->bt)
199 deref = 0;
200 if (deref)
201 tmp->bt = 0;
202 if (tmp->loc == LOC_NUM) {
203 i_num(dst, tmp->addr);
204 tmp->addr = dst;
205 regs[dst] = tmp;
206 tmp->loc = LOC_REG;
208 if (tmp->loc == LOC_SYM) {
209 i_sym(dst, tmp->sym, tmp->off);
210 tmp->addr = dst;
211 regs[dst] = tmp;
212 tmp->loc = LOC_REG;
214 if (tmp->loc == LOC_REG) {
215 if (deref)
216 i_ldr(1, dst, tmp->addr, 0, bt);
217 else if (dst != tmp->addr)
218 i_mov(dst, tmp->addr, LONGSZ);
219 regs[tmp->addr] = NULL;
221 if (tmp->loc == LOC_LOCAL) {
222 if (deref)
223 i_ldr(1, dst, REG_FP, tmp->addr + tmp->off, bt);
224 else
225 i_add_anyimm(dst, REG_FP, tmp->addr + tmp->off);
227 if (tmp->loc == LOC_MEM) {
228 i_ldr(1, dst, REG_FP, tmp->addr, LONGSZ);
229 if (deref)
230 i_ldr(1, dst, dst, 0, bt);
232 tmp->addr = dst;
233 regs[dst] = tmp;
234 tmp->loc = LOC_REG;
237 static void reg_free(int reg)
239 int i;
240 if (!regs[reg])
241 return;
242 for (i = 0; i < N_TMPS; i++)
243 if (!regs[tmpregs[i]]) {
244 tmp_reg(regs[reg], tmpregs[i], 0);
245 return;
247 tmp_mem(regs[reg]);
250 static void reg_for(int reg, struct tmp *t)
252 if (regs[reg] && regs[reg] != t)
253 reg_free(reg);
256 static void tmp_mv(struct tmp *t, int reg)
258 reg_for(reg, t);
259 tmp_reg(t, reg, 0);
262 static void tmp_to(struct tmp *t, int reg)
264 reg_for(reg, t);
265 tmp_reg(t, reg, 1);
268 static void tmp_drop(int n)
270 int i;
271 for (i = ntmp - n; i < ntmp; i++)
272 if (tmps[i].loc == LOC_REG)
273 regs[tmps[i].addr] = NULL;
274 ntmp -= n;
277 static void tmp_pop(int reg)
279 struct tmp *t = TMP(0);
280 tmp_to(t, reg);
281 tmp_drop(1);
284 static struct tmp *tmp_new(void)
286 return &tmps[ntmp++];
289 static void tmp_push(int reg)
291 struct tmp *t = tmp_new();
292 t->addr = reg;
293 t->bt = 0;
294 t->loc = LOC_REG;
295 regs[reg] = t;
298 void o_local(long addr)
300 struct tmp *t = tmp_new();
301 t->addr = -addr;
302 t->loc = LOC_LOCAL;
303 t->bt = 0;
304 t->off = 0;
307 void o_num(long num)
309 struct tmp *t = tmp_new();
310 t->addr = num;
311 t->bt = 0;
312 t->loc = LOC_NUM;
315 void o_sym(char *name)
317 struct tmp *t = tmp_new();
318 strcpy(t->sym, name);
319 t->loc = LOC_SYM;
320 t->bt = 0;
321 t->off = 0;
324 void o_tmpdrop(int n)
326 if (n == -1 || n > ntmp)
327 n = ntmp;
328 tmp_drop(n);
329 if (!ntmp) {
330 if (sp_tmp != -1)
331 sp = sp_tmp;
332 sp_tmp = -1;
336 /* make sure tmps remain intact after a conditional expression */
337 void o_fork(void)
339 int i;
340 for (i = 0; i < ntmp - 1; i++)
341 tmp_mem(&tmps[i]);
344 void o_forkpush(void)
346 tmp_pop(REG_FORK);
349 void o_forkjoin(void)
351 tmp_push(REG_FORK);
354 void o_tmpswap(void)
356 struct tmp *t1 = TMP(0);
357 struct tmp *t2 = TMP(1);
358 struct tmp t;
359 memcpy(&t, t1, sizeof(t));
360 memcpy(t1, t2, sizeof(t));
361 memcpy(t2, &t, sizeof(t));
362 if (t1->loc == LOC_REG)
363 regs[t1->addr] = t1;
364 if (t2->loc == LOC_REG)
365 regs[t2->addr] = t2;
368 static int reg_get(int mask)
370 int i;
371 for (i = 0; i < N_TMPS; i++)
372 if ((1 << tmpregs[i]) & mask && !regs[tmpregs[i]])
373 return tmpregs[i];
374 for (i = 0; i < N_TMPS; i++)
375 if ((1 << tmpregs[i]) & mask) {
376 reg_free(tmpregs[i]);
377 return tmpregs[i];
379 return 0;
382 static int reg_fortmp(struct tmp *t, int notmask)
384 if (t->loc == LOC_REG && !(notmask & (1 << t->addr)))
385 return t->addr;
386 return reg_get(~notmask);
389 static void tmp_copy(struct tmp *t1)
391 struct tmp *t2 = tmp_new();
392 memcpy(t2, t1, sizeof(*t1));
393 if (!(t1->loc & (LOC_REG | LOC_MEM)))
394 return;
395 if (t1->loc == LOC_MEM) {
396 tmp_mv(t2, reg_get(~0));
397 } else if (t1->loc == LOC_REG) {
398 t2->addr = reg_fortmp(t2, 1 << t1->addr);
399 i_mov(t2->addr, t1->addr, LONGSZ);
400 regs[t2->addr] = t2;
404 void o_tmpcopy(void)
406 tmp_copy(TMP(0));
409 void o_cast(unsigned bt)
411 struct tmp *t = TMP(0);
412 if (!t->bt && t->loc == LOC_NUM) {
413 num_cast(t, bt);
414 return;
416 if (BT_SZ(bt) != LONGSZ) {
417 int reg = reg_fortmp(t, 0);
418 tmp_to(t, reg);
419 if (bt & BT_SIGNED)
420 i_sx(reg, BT_SZ(bt) * 8);
421 else
422 i_zx(reg, BT_SZ(bt) * 8);
426 void o_func_beg(char *name, int argc, int global, int vararg)
428 out_sym(name, (global ? OUT_GLOB : 0) | OUT_CS, cslen, 0);
429 func_argc = argc;
430 func_vararg = vararg;
431 i_prolog();
432 sp = 0;
433 sp_max = sp;
434 ntmp = 0;
435 sp_tmp = -1;
436 nlabels = 0;
437 njmps = 0;
438 memset(regs, 0, sizeof(regs));
441 void o_deref(unsigned bt)
443 struct tmp *t = TMP(0);
444 if (t->bt)
445 tmp_to(t, reg_fortmp(t, 0));
446 t->bt = bt;
449 void o_load(void)
451 struct tmp *t = TMP(0);
452 tmp_to(t, reg_fortmp(t, 0));
455 #define TMP_NUM(t) ((t)->loc == LOC_NUM && !(t)->bt)
456 #define LOCAL_PTR(t) ((t)->loc == LOC_LOCAL && !(t)->bt)
457 #define SYM_PTR(t) ((t)->loc == LOC_SYM && !(t)->bt)
459 int o_popnum(long *c)
461 struct tmp *t = TMP(0);
462 if (!TMP_NUM(t))
463 return 1;
464 *c = t->addr;
465 tmp_drop(1);
466 return 0;
469 void o_ret(int rets)
471 if (rets)
472 tmp_pop(REG_RET);
473 else
474 i_num(REG_RET, 0);
475 o_jmp(0);
478 void o_func_end(void)
480 o_label(0);
481 jmp_fill();
482 i_epilog();
485 long o_mklocal(int sz)
487 return sp_push(ALIGN(sz, LONGSZ));
490 void o_rmlocal(long addr, int sz)
492 sp = addr - ALIGN(sz, LONGSZ);
495 long o_arg2loc(int i)
497 return -LONGSZ * (i + 2);
500 void o_assign(unsigned bt)
502 struct tmp *t1 = TMP(0);
503 struct tmp *t2 = TMP(1);
504 int r1 = reg_fortmp(t1, 0);
505 int r2 = reg_fortmp(t2, 1 << r1);
506 int off = 0;
507 tmp_to(t1, r1);
508 if (t2->bt)
509 tmp_to(t2, r2);
510 if (t2->loc == LOC_LOCAL) {
511 r2 = REG_FP;
512 off = t2->addr + t2->off;
513 } else {
514 tmp_to(t2, r2);
516 i_ldr(0, r1, r2, off, bt);
517 tmp_drop(2);
518 tmp_push(r1);
521 static long cu(int op, long i)
523 switch (op & 0xff) {
524 case O_NEG:
525 return -i;
526 case O_NOT:
527 return ~i;
528 case O_LNOT:
529 return !i;
531 return 0;
534 static int c_uop(int op)
536 struct tmp *t1 = TMP(0);
537 if (!TMP_NUM(t1))
538 return 1;
539 tmp_drop(1);
540 o_num(cu(op, t1->addr));
541 return 0;
544 static long cb(int op, long a, long b)
546 switch (op & 0xff) {
547 case O_ADD:
548 return a + b;
549 case O_SUB:
550 return a - b;
551 case O_AND:
552 return a & b;
553 case O_OR:
554 return a | b;
555 case O_XOR:
556 return a ^ b;
557 case O_MUL:
558 return a * b;
559 case O_DIV:
560 return a / b;
561 case O_MOD:
562 return a % b;
563 case O_SHL:
564 return a << b;
565 case O_SHR:
566 if (op & O_SIGNED)
567 return a >> b;
568 else
569 return (unsigned long) a >> b;
570 case O_LT:
571 return a < b;
572 case O_GT:
573 return a > b;
574 case O_LE:
575 return a <= b;
576 case O_GE:
577 return a >= b;
578 case O_EQ:
579 return a == b;
580 case O_NEQ:
581 return a != b;
583 return 0;
586 static int c_bop(int op)
588 struct tmp *t1 = TMP(0);
589 struct tmp *t2 = TMP(1);
590 int locs = LOCAL_PTR(t1) + LOCAL_PTR(t2);
591 int syms = SYM_PTR(t1) + SYM_PTR(t2);
592 int nums = TMP_NUM(t1) + TMP_NUM(t2);
593 if (syms + locs == 2 || syms + nums + locs != 2)
594 return 1;
595 if (nums == 1)
596 if ((op & 0xff) != O_ADD && ((op & 0xff) != O_SUB || TMP_NUM(t2)))
597 return 1;
598 if (nums == 1) {
599 long o1 = TMP_NUM(t1) ? t1->addr : t1->off;
600 long o2 = TMP_NUM(t2) ? t2->addr : t2->off;
601 long ret = cb(op, o2, o1);
602 if (!TMP_NUM(t1))
603 o_tmpswap();
604 t2->off = ret;
605 tmp_drop(1);
606 } else {
607 long ret = cb(op, t2->addr, t1->addr);
608 tmp_drop(2);
609 o_num(ret);
611 return 0;
614 void o_uop(int op)
616 int r1 = (op & 0xff) == O_LNOT ? R_RAX : reg_fortmp(TMP(0), 0);
617 if (!c_uop(op))
618 return;
619 tmp_to(TMP(0), r1);
620 switch (op & 0xff) {
621 case O_NEG:
622 i_neg(r1);
623 break;
624 case O_NOT:
625 i_not(r1);
626 break;
627 case O_LNOT:
628 i_lnot(r1);
629 break;
633 static void bin_regs(int *r1, int *r2, int mask1, int mask2)
635 struct tmp *t2 = TMP(0);
636 struct tmp *t1 = TMP(1);
637 *r2 = reg_fortmp(t2, ~mask1);
638 tmp_to(t2, *r2);
639 *r1 = reg_fortmp(t1, ~mask2 | (1 << *r2));
640 tmp_pop(*r2);
641 tmp_pop(*r1);
644 static int bop_imm(int *r1, long *n, int swap)
646 struct tmp *t1 = TMP(0);
647 struct tmp *t2 = TMP(1);
648 if (!TMP_NUM(t1) && (!swap || !TMP_NUM(t2)))
649 return 1;
650 *n = TMP_NUM(t1) ? t1->addr : t2->addr;
651 if (!i_decodeable(*n))
652 return 1;
653 if (!TMP_NUM(t1))
654 o_tmpswap();
655 *r1 = reg_fortmp(t2, 0);
656 tmp_drop(1);
657 tmp_pop(*r1);
658 return 0;
661 static void bin_add(int op)
663 int r1, r2;
664 long n;
665 if (!bop_imm(&r1, &n, (op & 0xff) != O_SUB)) {
666 i_add_imm(op, r1, r1, n);
667 } else {
668 bin_regs(&r1, &r2, R_TMPS, R_TMPS);
669 i_add(op, r1, r1, r2);
671 tmp_push(r1);
674 static void bin_shx(int op)
676 int r1, r2;
677 long n;
678 if (!bop_imm(&r1, &n, 0)) {
679 i_shl_imm(op, r1, r1, n);
680 } else {
681 bin_regs(&r1, &r2, 1 << R_RCX, R_TMPS);
682 i_shl(op, r1, r1, r2);
684 tmp_push(r1);
687 static int log2a(unsigned long n)
689 int i = 0;
690 for (i = 0; i < LONGSZ * 8; i++)
691 if (n & (1u << i))
692 break;
693 if (i == LONGSZ * 8 || !(n >> (i + 1)))
694 return i;
695 return -1;
698 /* optimized version of mul/div/mod for powers of two */
699 static int mul_2(int op)
701 struct tmp *t1 = TMP(0);
702 struct tmp *t2 = TMP(1);
703 long n;
704 int r2;
705 int p;
706 if ((op & 0xff) == O_MUL && t2->loc == LOC_NUM && !t2->bt)
707 o_tmpswap();
708 if (t1->loc != LOC_NUM || t1->bt)
709 return 1;
710 n = t1->addr;
711 p = log2a(n);
712 if (n && p == -1)
713 return 1;
714 if ((op & 0xff) == O_MUL) {
715 tmp_drop(1);
716 if (n == 1)
717 return 0;
718 if (n == 0) {
719 tmp_drop(1);
720 o_num(0);
721 return 0;
723 r2 = reg_fortmp(t2, 0);
724 tmp_to(t2, r2);
725 i_shl_imm(O_SHL, r2, r2, p);
726 return 0;
728 if (op == O_DIV) {
729 tmp_drop(1);
730 if (n == 1)
731 return 0;
732 r2 = reg_fortmp(t2, 0);
733 tmp_to(t2, r2);
734 i_shl_imm((op & O_SIGNED) | O_SHR, r2, r2, p);
735 return 0;
737 if (op == O_MOD) {
738 tmp_drop(1);
739 if (n == 1) {
740 tmp_drop(1);
741 o_num(0);
742 return 0;
744 r2 = reg_fortmp(t2, 0);
745 tmp_to(t2, r2);
746 i_zx(r2, p);
747 return 0;
749 return 1;
752 static void mulop(int *r1, int *r2, int rop)
754 struct tmp *t1 = TMP(0);
755 struct tmp *t2 = TMP(1);
756 if (t1->loc & LOC_REG && t1->addr != R_RAX && t1->addr != R_RDX)
757 rop = t1->addr;
758 tmp_to(t1, rop);
759 tmp_to(t2, R_RAX);
760 if (rop != R_RDX)
761 reg_free(R_RDX);
762 tmp_drop(2);
763 *r1 = rop;
764 *r2 = R_RAX;
767 static void bin_mul(int op)
769 int r1, r2;
770 if (!mul_2(op))
771 return;
772 mulop(&r1, &r2, (op & 0xff) == O_MUL ? R_RDX : R_RCX);
773 if ((op & 0xff) == O_MUL) {
774 i_mul(R_RAX, r1, r2);
775 tmp_push(R_RAX);
777 if ((op & 0xff) == O_DIV) {
778 i_div(op, R_RAX, r1, r2);
779 tmp_push(R_RAX);
781 if ((op & 0xff) == O_MOD) {
782 i_div(op, R_RDX, r1, r2);
783 tmp_push(R_RDX);
787 static void bin_cmp(int op)
789 int r1, r2;
790 long n;
791 if (!bop_imm(&r1, &n, (op & 0xff) == O_EQ || (op & 0xff) == O_NEQ)) {
792 i_cmp_imm(r1, n);
793 } else {
794 bin_regs(&r1, &r2, R_TMPS, R_TMPS);
795 i_cmp(r1, r2);
797 r1 = R_RAX;
798 reg_free(r1);
799 i_set(op, r1);
800 tmp_push(r1);
803 void o_bop(int op)
805 if (!c_bop(op))
806 return;
807 if ((op & 0xf0) == 0x00)
808 bin_add(op);
809 if ((op & 0xf0) == 0x10)
810 bin_shx(op);
811 if ((op & 0xf0) == 0x20)
812 bin_mul(op);
813 if ((op & 0xf0) == 0x30)
814 bin_cmp(op);
817 void o_memcpy(void)
819 struct tmp *t0 = TMP(0);
820 struct tmp *t1 = TMP(1);
821 struct tmp *t2 = TMP(2);
822 tmp_to(t0, R_RCX);
823 tmp_to(t1, R_RSI);
824 tmp_to(t2, R_RDI);
825 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
826 tmp_drop(2);
829 void o_memset(void)
831 struct tmp *t0 = TMP(0);
832 struct tmp *t1 = TMP(1);
833 struct tmp *t2 = TMP(2);
834 tmp_to(t0, R_RCX);
835 tmp_to(t1, R_RAX);
836 tmp_to(t2, R_RDI);
837 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
838 tmp_drop(2);
841 static void jxz(int id, int z)
843 int r = reg_fortmp(TMP(0), 0);
844 tmp_pop(r);
845 i_bz(r, z);
846 jmp_add(id);
849 void o_jz(int id)
851 jxz(id, 1);
854 void o_jnz(int id)
856 jxz(id, 0);
859 void o_jmp(int id)
861 i_b();
862 jmp_add(id);
865 void o_call(int argc, int rets)
867 struct tmp *t;
868 int i;
869 int aregs = MIN(N_ARGS, argc);
870 for (i = 0; i < N_TMPS; i++)
871 if (regs[tmpregs[i]] && regs[tmpregs[i]] - tmps < ntmp - argc)
872 tmp_mem(regs[tmpregs[i]]);
873 if (argc > aregs) {
874 sp_push(LONGSZ * (argc - aregs));
875 for (i = argc - 1; i >= aregs; --i) {
876 int reg = reg_fortmp(TMP(0), 0);
877 tmp_pop(reg);
878 i_ldr(0, reg, REG_SP, (i - aregs) * LONGSZ, LONGSZ);
881 for (i = aregs - 1; i >= 0; --i)
882 tmp_to(TMP(aregs - i - 1), argregs[i]);
883 tmp_drop(aregs);
884 t = TMP(0);
885 if (t->loc == LOC_SYM && !t->bt) {
886 i_call(t->sym, t->off);
887 tmp_drop(1);
888 } else {
889 int reg = reg_fortmp(t, 0);
890 tmp_pop(reg);
891 i_call_reg(reg);
893 if (rets)
894 tmp_push(REG_RET);
897 void o_mkbss(char *name, int size, int global)
899 out_sym(name, OUT_BSS | (global ? OUT_GLOB : 0), bsslen, size);
900 bsslen += ALIGN(size, OUT_ALIGNMENT);
903 #define MAXDATS (1 << 10)
904 static char dat_names[MAXDATS][NAMELEN];
905 static int dat_offs[MAXDATS];
906 static int ndats;
908 void *o_mkdat(char *name, int size, int global)
910 void *addr = ds + dslen;
911 int idx = ndats++;
912 if (idx >= MAXDATS)
913 err("nomem: MAXDATS reached!\n");
914 strcpy(dat_names[idx], name);
915 dat_offs[idx] = dslen;
916 out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), dslen, size);
917 dslen += ALIGN(size, OUT_ALIGNMENT);
918 return addr;
921 static int dat_off(char *name)
923 int i;
924 for (i = 0; i < ndats; i++)
925 if (!strcmp(name, dat_names[i]))
926 return dat_offs[i];
927 return 0;
930 void o_datset(char *name, int off, unsigned bt)
932 struct tmp *t = TMP(0);
933 int sym_off = dat_off(name) + off;
934 if (t->loc == LOC_NUM && !t->bt) {
935 num_cast(t, bt);
936 memcpy(ds + sym_off, &t->addr, BT_SZ(bt));
938 if (t->loc == LOC_SYM && !t->bt) {
939 out_rel(t->sym, OUT_DS, sym_off);
940 memcpy(ds + sym_off, &t->off, BT_SZ(bt));
942 tmp_drop(1);
945 void o_write(int fd)
947 out_write(fd, cs, cslen, ds, dslen);
950 /* X86 arch specific functions */
952 #define I_MOV 0x89
953 #define I_MOVI 0xc7
954 #define I_MOVIR 0xb8
955 #define I_MOVR 0x8b
956 #define I_MOVSXD 0x63
957 #define I_SHX 0xd3
958 #define I_CMP 0x3b
959 #define I_TST 0x85
960 #define I_LEA 0x8d
961 #define I_NOT 0xf7
962 #define I_CALL 0xff
963 #define I_MUL 0xf7
964 #define I_XOR 0x33
965 #define I_TEST 0x85
966 #define I_CQO 0x99
967 #define I_PUSH 0x50
968 #define I_POP 0x58
970 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
971 #define O2(op) (((op) >> 8) & 0xff)
972 #define O1(op) ((op) & 0xff)
973 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
974 #define REX(r1, r2) (0x48 | (((r1) & 8) >> 1) | (((r2) & 8) >> 3))
976 /* for optimizing cmp + jmp */
977 #define OPT_ISCMP() (last_set + 7 == cslen)
978 #define OPT_CCOND() (cs[last_set + 1])
980 static long last_set = -1;
982 static void op_x(int op, int r1, int r2, int bt)
984 int sz = BT_SZ(bt);
985 int rex = 0;
986 if (sz == 8)
987 rex |= 8;
988 if (sz == 1)
989 rex |= 0x40;
990 if (r1 & 0x8)
991 rex |= 4;
992 if (r2 & 0x8)
993 rex |= 1;
994 if (sz == 2)
995 oi(0x66, 1);
996 if (rex)
997 oi(rex | 0x40, 1);
998 if (op & 0x10000)
999 oi(O2(op), 1);
1000 oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1);
1003 #define op_mr op_rm
1005 /* op_*(): r=reg, m=mem, i=imm, s=sym */
1006 static void op_rm(int op, int src, int base, int off, int bt)
1008 int dis = off == (char) off ? 1 : 4;
1009 int mod = dis == 4 ? 2 : 1;
1010 if (!off && (base & 7) != R_RBP)
1011 mod = 0;
1012 op_x(op, src, base, bt);
1013 oi(MODRM(mod, src & 0x07, base & 0x07), 1);
1014 if ((base & 7) == R_RSP)
1015 oi(0x24, 1);
1016 if (mod)
1017 oi(off, dis);
1020 static void op_rr(int op, int src, int dst, int bt)
1022 op_x(op, src, dst, bt);
1023 oi(MODRM(3, src & 0x07, dst & 0x07), 1);
1026 #define movrx_bt(bt) (((bt) == 4) ? 4 : LONGSZ)
1028 static int movrx_op(int bt, int mov)
1030 int sz = BT_SZ(bt);
1031 if (sz == 4)
1032 return bt & BT_SIGNED ? I_MOVSXD : mov;
1033 if (sz == 2)
1034 return OP2(0x0f, bt & BT_SIGNED ? 0xbf : 0xb7);
1035 if (sz == 1)
1036 return OP2(0x0f, bt & BT_SIGNED ? 0xbe : 0xb6);
1037 return mov;
1040 static void mov_r2r(int r1, int r2, unsigned bt)
1042 if (r1 != r2 || BT_SZ(bt) != LONGSZ)
1043 op_rr(movrx_op(bt, I_MOV), r1, r2, movrx_bt(bt));
1046 static void mov_m2r(int dst, int base, int off, int bt)
1048 op_rm(movrx_op(bt, I_MOVR), dst, base, off, movrx_bt(bt));
1051 static void i_zx(int rd, int bits)
1053 if (bits & 0x07) {
1054 i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits);
1055 i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits);
1056 } else {
1057 mov_r2r(rd, rd, bits >> 3);
1061 static void i_sx(int rd, int bits)
1063 mov_r2r(rd, rd, BT_SIGNED | (bits >> 3));
1066 static void i_add(int op, int rd, int rn, int rm)
1068 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
1069 static int rx[] = {0003, 0053, 0043, 0013, 0063};
1070 if (rn != rd)
1071 die("this is cisc!\n");
1072 op_rr(rx[op & 0x0f], rd, rm, LONGSZ);
1075 static void i_add_imm(int op, int rd, int rn, long n)
1077 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
1078 static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
1079 unsigned char s[4] = {REX(0, rd), 0x83, rx[op & 0x0f] | (rd & 7), n & 0xff};
1080 if (rn != rd)
1081 die("this is cisc!\n");
1082 os((void *) s, 4);
1085 static int i_decodeable(long imm)
1087 return imm <= 127 && imm >= -128;
1090 static void i_num(int rd, long n)
1092 if (!n) {
1093 op_rr(I_XOR, rd, rd, 4);
1094 return;
1096 if (n < 0 && -n <= 0xffffffff) {
1097 op_rr(I_MOVI, 0, rd, LONGSZ);
1098 oi(n, 4);
1099 } else {
1100 int len = 8;
1101 if (n > 0 && n <= 0xffffffff)
1102 len = 4;
1103 op_x(I_MOVIR + (rd & 7), 0, rd, len);
1104 oi(n, len);
1108 static void i_add_anyimm(int rd, int rn, long n)
1110 op_rm(I_LEA, rd, rn, n, LONGSZ);
1113 static void i_mul(int rd, int rn, int rm)
1115 if (rn != R_RDX)
1116 i_num(R_RDX, 0);
1117 op_rr(I_MUL, 4, rn, LONGSZ);
1120 static void i_div(int op, int rd, int rn, int rm)
1122 if (rn != R_RDX) {
1123 if (op & O_SIGNED)
1124 op_x(I_CQO, R_RAX, R_RDX, LONGSZ);
1125 else
1126 i_num(R_RDX, 0);
1128 op_rr(I_MUL, op & O_SIGNED ? 7 : 6, rn, LONGSZ);
1131 static void i_tst(int rn, int rm)
1133 op_rr(I_TST, rn, rm, LONGSZ);
1136 static void i_cmp(int rn, int rm)
1138 op_rr(I_CMP, rn, rm, LONGSZ);
1141 static void i_cmp_imm(int rn, long n)
1143 unsigned char s[4] = {REX(0, rn), 0x83, 0xf8 | rn, n & 0xff};
1144 os(s, 4);
1147 static void i_set(int op, int rd)
1149 /* lt, gt, le, ge, eq, neq */
1150 static int ucond[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95};
1151 static int scond[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95};
1152 int cond = op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f];
1153 char set[] = "\x0f\x00\xc0";
1154 if (rd != R_RAX)
1155 die("set works only with R_RAX\n");
1156 set[1] = cond;
1157 last_set = cslen;
1158 os(set, 3); /* setl al */
1159 os("\x48\x0f\xb6\xc0", 4); /* movzx rax, al */
1162 static void i_shl(int op, int rd, int rm, int rs)
1164 int sm = 4;
1165 if ((op & 0x0f) == 1)
1166 sm = op & O_SIGNED ? 7 : 5;
1167 if (rd != rm)
1168 die("this is cisc!\n");
1169 op_rr(I_SHX, sm, rd, LONGSZ);
1172 static void i_shl_imm(int op, int rd, int rn, long n)
1174 int sm = (op & 0x1) ? (op & O_SIGNED ? 0xf8 : 0xe8) : 0xe0 ;
1175 char s[4] = {REX(0, rn), 0xc1, sm | (rn & 7), n & 0xff};
1176 if (rd != rn)
1177 die("this is cisc!\n");
1178 os(s, 4);
1181 static void i_mov(int rd, int rn, int bt)
1183 op_rr(movrx_op(bt, I_MOVR), rd, rn, movrx_bt(bt));
1186 static void i_ldr(int l, int rd, int rn, int off, int bt)
1188 if (l)
1189 mov_m2r(rd, rn, off, bt);
1190 else
1191 op_rm(I_MOV, rd, rn, off, bt);
1194 static void i_sym(int rd, char *sym, int off)
1196 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
1197 out_rel(sym, OUT_CS, cslen);
1198 oi(off, 8);
1201 static void i_neg(int rd)
1203 op_rr(I_NOT, 3, rd, LONGSZ);
1206 static void i_not(int rd)
1208 op_rr(I_NOT, 2, rd, LONGSZ);
1211 static void i_lnot(int rd)
1213 if (OPT_ISCMP()) {
1214 cs[last_set + 1] ^= 0x01;
1215 } else {
1216 char cmp[] = "\x00\x83\xf8\x00";
1217 cmp[0] = REX(0, rd);
1218 cmp[2] |= rd & 7;
1219 os(cmp, 4); /* cmp rax, 0 */
1220 i_set(O_EQ, rd);
1224 static void jx(int x, long addr)
1226 char op[2] = {0x0f};
1227 op[1] = x;
1228 os(op, 2); /* jx $addr */
1229 oi(addr - cslen - 4, 4);
1232 static void i_bz(int rn, int z)
1234 if (OPT_ISCMP()) {
1235 int cond = OPT_CCOND();
1236 cslen = last_set;
1237 jx((!z ? cond : cond ^ 0x01) & ~0x10, 0);
1238 last_set = -1;
1239 } else {
1240 i_tst(rn, rn);
1241 jx(z ? 0x84 : 0x85, 0);
1245 static void i_b(void)
1247 os("\xe9", 1); /* jmp $addr */
1248 oi(0, 4);
1251 static void i_b_fill(long src, long dst)
1253 putint((void *) (cs + src), (dst - src) - 4, 4);
1256 static void i_call_reg(int rd)
1258 op_rr(I_CALL, 2, rd, LONGSZ);
1261 static void i_call(char *sym, int off)
1263 os("\xe8", 1); /* call $x */
1264 out_rel(sym, OUT_CS | OUT_REL, cslen);
1265 oi(-4 + off, 4);
1268 static void i_push(int reg)
1270 op_x(I_PUSH | (reg & 0x7), 0, reg, 4);
1273 static void i_pop(int reg)
1275 op_x(I_POP | (reg & 0x7), 0, reg, 4);
1278 static void i_saveargs(void)
1280 int i;
1281 int saved = func_vararg ? N_ARGS : MIN(N_ARGS, func_argc);
1282 os("\x58", 1); /* pop rax */
1283 for (i = saved - 1; i >= 0; i--)
1284 i_push(argregs[i]);
1285 os("\x50", 1); /* push rax */
1288 static void i_prolog(void)
1290 last_set = -1;
1291 i_saveargs();
1292 os("\x55", 1); /* push rbp */
1293 os("\x48\x89\xe5", 3); /* mov rbp, rsp */
1294 os("\x48\x81\xec", 3); /* sub rsp, $xxx */
1295 func_fpsub = cslen;
1296 oi(0, 4);
1299 static void i_epilog(void)
1301 int saved = func_vararg ? N_ARGS : MIN(N_ARGS, func_argc);
1302 int diff = ALIGN(sp_max, 16) + (saved & 1 ? 8 : 0);
1303 if (diff) {
1304 putint(cs + func_fpsub, diff, 4);
1306 os("\xc9", 1); /* leave */
1307 if (saved) {
1308 os("\xc2", 1); /* ret n */
1309 oi(saved * LONGSZ, 2);
1310 } else {
1311 os("\xc3", 1); /* ret */