gen: complain if MAXJMPS is reached
[neatcc/cc.git] / x86.c
blob1613452b724c95c0a85c2e2c28ad98263f97a16f
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include "gen.h"
5 #include "out.h"
6 #include "tok.h"
8 /* variable location */
9 #define LOC_REG 0x01
10 #define LOC_MEM 0x02
11 #define LOC_NUM 0x04
12 #define LOC_SYM 0x08
13 #define LOC_LOCAL 0x10
15 /* special registers */
16 #define REG_FP R_RBP
17 #define REG_SP R_RSP
18 #define REG_RET R_RAX
19 #define REG_FORK R_RAX
21 /* registers */
22 #define R_RAX 0x00
23 #define R_RCX 0x01
24 #define R_RDX 0x02
25 #define R_RBX 0x03
26 #define R_RSP 0x04
27 #define R_RBP 0x05
28 #define R_RSI 0x06
29 #define R_RDI 0x07
31 #define N_REGS 8
32 #define N_ARGS 0
33 #define N_TMPS ARRAY_SIZE(tmpregs)
34 #define R_TMPS 0x00cf
35 #define R_ARGS 0x0000
36 #define R_SAVED 0x00c8
37 #define R_BYTEREGS (1 << R_RAX | 1 << R_RDX | 1 << R_RCX)
39 #define MIN(a, b) ((a) < (b) ? (a) : (b))
40 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
41 void err(char *msg);
43 static char cs[SECSIZE]; /* code segment */
44 static int cslen;
45 static char ds[SECSIZE]; /* data segment */
46 static int dslen;
47 static long bsslen; /* bss segment size */
49 static long sp; /* stack pointer offset from R_RBP */
50 static long sp_max; /* maximum stack pointer offset */
51 static long sp_tmp; /* sp for the first tmp on the stack */
52 static long func_fpsub; /* stack pointer sub address in CS */
54 #define TMP(i) (((i) < ntmp) ? &tmps[ntmp - 1 - (i)] : NULL)
56 static struct tmp {
57 long addr;
58 char sym[NAMELEN];
59 long off; /* offset from a symbol or a local */
60 unsigned loc; /* variable location */
61 unsigned bt; /* type of address; zero when not a pointer */
62 } tmps[MAXTMP];
63 static int ntmp;
65 /* arch-specific functions */
66 static void i_ldr(int l, int rd, int rn, int off, int bt);
67 static void i_mov(int rd, int rn, int bt);
68 static void i_add(int op, int rd, int rn, int rm);
69 static void i_shl(int op, int rd, int rm, int rs);
70 static void i_mul(int rd, int rn, int rm);
71 static void i_div(int op, int rd, int rn, int rm);
72 static void i_cmp(int rn, int rm);
73 static int i_decodeable(long imm);
74 static void i_add_imm(int op, int rd, int rn, long n);
75 static void i_shl_imm(int op, int rd, int rn, long n);
76 static void i_cmp_imm(int rn, long n);
77 static void i_add_anyimm(int rd, int rn, long n);
78 static void i_num(int rd, long n);
79 static void i_sym(int rd, char *sym, int off);
80 static void i_set(int op, int rd);
81 static void i_neg(int rd);
82 static void i_not(int rd);
83 static void i_lnot(int rd);
84 static void i_zx(int rd, int bits);
85 static void i_sx(int rd, int bits);
86 static void i_b(void);
87 static void i_bz(int rn, int z);
88 static void i_b_fill(long src, long dst);
89 static void i_call(char *sym, int off);
90 static void i_call_reg(int rd);
91 static void i_prolog(void);
92 static void i_epilog(void);
94 static struct tmp *regs[N_REGS];
95 static int tmpregs[] = {R_RAX, R_RSI, R_RDI, R_RBX, R_RDX, R_RCX};
97 /* labels and jmps */
98 #define MAXJMPS (1 << 14)
100 static long labels[MAXJMPS];
101 static int nlabels;
102 static long jmp_loc[MAXJMPS];
103 static int jmp_goal[MAXJMPS];
104 static int njmps;
106 void o_label(int id)
108 if (id > nlabels)
109 nlabels = id + 1;
110 labels[id] = cslen;
113 static void jmp_add(int id)
115 if (njmps >= MAXJMPS)
116 err("nomem: MAXJMPS reached!\n");
117 jmp_loc[njmps] = cslen - 4;
118 jmp_goal[njmps] = id;
119 njmps++;
122 static void jmp_fill(void)
124 int i;
125 for (i = 0; i < njmps; i++)
126 i_b_fill(jmp_loc[i], labels[jmp_goal[i]]);
129 /* generating code */
131 static void putint(char *s, long n, int l)
133 while (l--) {
134 *s++ = n;
135 n >>= 8;
139 static void os(void *s, int n)
141 while (n--)
142 cs[cslen++] = *(char *) (s++);
145 static void oi(long n, int l)
147 while (l--) {
148 cs[cslen++] = n;
149 n >>= 8;
153 static long sp_push(int size)
155 sp += size;
156 if (sp > sp_max)
157 sp_max = sp;
158 return sp;
161 static void tmp_mem(struct tmp *tmp)
163 int src = tmp->addr;
164 if (tmp->loc != LOC_REG)
165 return;
166 if (sp_tmp == -1)
167 sp_tmp = sp;
168 tmp->addr = -sp_push(LONGSZ);
169 i_ldr(0, src, REG_FP, tmp->addr, LONGSZ);
170 regs[src] = NULL;
171 tmp->loc = LOC_MEM;
174 static void num_cast(struct tmp *t, unsigned bt)
176 if (!(bt & BT_SIGNED) && BT_SZ(bt) != LONGSZ)
177 t->addr &= ((1l << (long) (BT_SZ(bt) * 8)) - 1);
178 if (bt & BT_SIGNED && BT_SZ(bt) != LONGSZ &&
179 t->addr > (1l << (BT_SZ(bt) * 8 - 1)))
180 t->addr = -((1l << (BT_SZ(bt) * 8)) - t->addr);
183 static void tmp_reg(struct tmp *tmp, int dst, int deref)
185 int bt = tmp->bt;
186 if (!tmp->bt)
187 deref = 0;
188 if (deref)
189 tmp->bt = 0;
190 if (tmp->loc == LOC_NUM) {
191 i_num(dst, tmp->addr);
192 tmp->addr = dst;
193 regs[dst] = tmp;
194 tmp->loc = LOC_REG;
196 if (tmp->loc == LOC_SYM) {
197 i_sym(dst, tmp->sym, tmp->off);
198 tmp->addr = dst;
199 regs[dst] = tmp;
200 tmp->loc = LOC_REG;
202 if (tmp->loc == LOC_REG) {
203 if (deref)
204 i_ldr(1, dst, tmp->addr, 0, bt);
205 else if (dst != tmp->addr)
206 i_mov(dst, tmp->addr, LONGSZ);
207 regs[tmp->addr] = NULL;
209 if (tmp->loc == LOC_LOCAL) {
210 if (deref)
211 i_ldr(1, dst, REG_FP, tmp->addr + tmp->off, bt);
212 else
213 i_add_anyimm(dst, REG_FP, tmp->addr + tmp->off);
215 if (tmp->loc == LOC_MEM) {
216 i_ldr(1, dst, REG_FP, tmp->addr, LONGSZ);
217 if (deref)
218 i_ldr(1, dst, dst, 0, bt);
220 tmp->addr = dst;
221 regs[dst] = tmp;
222 tmp->loc = LOC_REG;
225 static void reg_free(int reg)
227 int i;
228 if (!regs[reg])
229 return;
230 for (i = 0; i < N_TMPS; i++)
231 if (!regs[tmpregs[i]]) {
232 tmp_reg(regs[reg], tmpregs[i], 0);
233 return;
235 tmp_mem(regs[reg]);
238 static void reg_for(int reg, struct tmp *t)
240 if (regs[reg] && regs[reg] != t)
241 reg_free(reg);
244 static void tmp_mv(struct tmp *t, int reg)
246 reg_for(reg, t);
247 tmp_reg(t, reg, 0);
250 static void tmp_to(struct tmp *t, int reg)
252 reg_for(reg, t);
253 tmp_reg(t, reg, 1);
256 static void tmp_drop(int n)
258 int i;
259 for (i = ntmp - n; i < ntmp; i++)
260 if (tmps[i].loc == LOC_REG)
261 regs[tmps[i].addr] = NULL;
262 ntmp -= n;
265 static void tmp_pop(int reg)
267 struct tmp *t = TMP(0);
268 tmp_to(t, reg);
269 tmp_drop(1);
272 static struct tmp *tmp_new(void)
274 return &tmps[ntmp++];
277 static void tmp_push(int reg)
279 struct tmp *t = tmp_new();
280 t->addr = reg;
281 t->bt = 0;
282 t->loc = LOC_REG;
283 regs[reg] = t;
286 void o_local(long addr)
288 struct tmp *t = tmp_new();
289 t->addr = -addr;
290 t->loc = LOC_LOCAL;
291 t->bt = 0;
292 t->off = 0;
295 void o_num(long num)
297 struct tmp *t = tmp_new();
298 t->addr = num;
299 t->bt = 0;
300 t->loc = LOC_NUM;
303 void o_sym(char *name)
305 struct tmp *t = tmp_new();
306 strcpy(t->sym, name);
307 t->loc = LOC_SYM;
308 t->bt = 0;
309 t->off = 0;
312 void o_tmpdrop(int n)
314 if (n == -1 || n > ntmp)
315 n = ntmp;
316 tmp_drop(n);
317 if (!ntmp) {
318 if (sp_tmp != -1)
319 sp = sp_tmp;
320 sp_tmp = -1;
324 /* make sure tmps remain intact after a conditional expression */
325 void o_fork(void)
327 int i;
328 for (i = 0; i < ntmp - 1; i++)
329 tmp_mem(&tmps[i]);
332 void o_forkpush(void)
334 tmp_pop(REG_FORK);
337 void o_forkjoin(void)
339 tmp_push(REG_FORK);
342 void o_tmpswap(void)
344 struct tmp *t1 = TMP(0);
345 struct tmp *t2 = TMP(1);
346 struct tmp t;
347 memcpy(&t, t1, sizeof(t));
348 memcpy(t1, t2, sizeof(t));
349 memcpy(t2, &t, sizeof(t));
350 if (t1->loc == LOC_REG)
351 regs[t1->addr] = t1;
352 if (t2->loc == LOC_REG)
353 regs[t2->addr] = t2;
356 static int reg_get(int mask)
358 int i;
359 for (i = 0; i < N_TMPS; i++)
360 if ((1 << tmpregs[i]) & mask && !regs[tmpregs[i]])
361 return tmpregs[i];
362 for (i = 0; i < N_TMPS; i++)
363 if ((1 << tmpregs[i]) & mask) {
364 reg_free(tmpregs[i]);
365 return tmpregs[i];
367 return 0;
370 static int reg_fortmp(struct tmp *t, int notmask)
372 if (t->loc == LOC_REG && !(notmask & (1 << t->addr)))
373 return t->addr;
374 return reg_get(~notmask);
377 static void tmp_copy(struct tmp *t1)
379 struct tmp *t2 = tmp_new();
380 memcpy(t2, t1, sizeof(*t1));
381 if (!(t1->loc & (LOC_REG | LOC_MEM)))
382 return;
383 if (t1->loc == LOC_MEM) {
384 tmp_mv(t2, reg_get(~0));
385 } else if (t1->loc == LOC_REG) {
386 t2->addr = reg_fortmp(t2, 1 << t1->addr);
387 i_mov(t2->addr, t1->addr, LONGSZ);
388 regs[t2->addr] = t2;
392 void o_tmpcopy(void)
394 tmp_copy(TMP(0));
397 void o_cast(unsigned bt)
399 struct tmp *t = TMP(0);
400 if (!t->bt && t->loc == LOC_NUM) {
401 num_cast(t, bt);
402 return;
404 if (BT_SZ(bt) != LONGSZ) {
405 int reg = reg_fortmp(t, BT_SZ(bt) > 1 ? 0 : ~R_BYTEREGS);
406 tmp_to(t, reg);
407 if (bt & BT_SIGNED)
408 i_sx(reg, BT_SZ(bt) * 8);
409 else
410 i_zx(reg, BT_SZ(bt) * 8);
414 void o_func_beg(char *name, int argc, int global, int vararg)
416 out_sym(name, (global ? OUT_GLOB : 0) | OUT_CS, cslen, 0);
417 i_prolog();
418 sp = 3 * LONGSZ;
419 sp_max = sp;
420 ntmp = 0;
421 sp_tmp = -1;
422 nlabels = 0;
423 njmps = 0;
424 memset(regs, 0, sizeof(regs));
427 void o_deref(unsigned bt)
429 struct tmp *t = TMP(0);
430 if (t->bt)
431 tmp_to(t, reg_fortmp(t, 0));
432 t->bt = bt;
435 void o_load(void)
437 struct tmp *t = TMP(0);
438 tmp_to(t, reg_fortmp(t, 0));
441 #define TMP_NUM(t) ((t)->loc == LOC_NUM && !(t)->bt)
442 #define LOCAL_PTR(t) ((t)->loc == LOC_LOCAL && !(t)->bt)
443 #define SYM_PTR(t) ((t)->loc == LOC_SYM && !(t)->bt)
445 int o_popnum(long *c)
447 struct tmp *t = TMP(0);
448 if (!TMP_NUM(t))
449 return 1;
450 *c = t->addr;
451 tmp_drop(1);
452 return 0;
455 void o_ret(int rets)
457 if (rets)
458 tmp_pop(REG_RET);
459 else
460 i_num(REG_RET, 0);
461 o_jmp(0);
464 void o_func_end(void)
466 o_label(0);
467 jmp_fill();
468 i_epilog();
471 long o_mklocal(int sz)
473 return sp_push(ALIGN(sz, LONGSZ));
476 void o_rmlocal(long addr, int sz)
478 sp = addr - ALIGN(sz, LONGSZ);
481 long o_arg2loc(int i)
483 return -LONGSZ * (i + 2);
486 void o_assign(unsigned bt)
488 struct tmp *t1 = TMP(0);
489 struct tmp *t2 = TMP(1);
490 int r1 = reg_fortmp(t1, BT_SZ(bt) > 1 ? 0 : ~R_BYTEREGS);
491 int r2 = reg_fortmp(t2, 1 << r1);
492 int off = 0;
493 tmp_to(t1, r1);
494 if (t2->bt)
495 tmp_to(t2, r2);
496 if (t2->loc == LOC_LOCAL) {
497 r2 = REG_FP;
498 off = t2->addr + t2->off;
499 } else {
500 tmp_to(t2, r2);
502 i_ldr(0, r1, r2, off, bt);
503 tmp_drop(2);
504 tmp_push(r1);
507 static long cu(int op, long i)
509 switch (op & 0xff) {
510 case O_NEG:
511 return -i;
512 case O_NOT:
513 return ~i;
514 case O_LNOT:
515 return !i;
517 return 0;
520 static int c_uop(int op)
522 struct tmp *t1 = TMP(0);
523 if (!TMP_NUM(t1))
524 return 1;
525 tmp_drop(1);
526 o_num(cu(op, t1->addr));
527 return 0;
530 static long cb(int op, long a, long b)
532 switch (op & 0xff) {
533 case O_ADD:
534 return a + b;
535 case O_SUB:
536 return a - b;
537 case O_AND:
538 return a & b;
539 case O_OR:
540 return a | b;
541 case O_XOR:
542 return a ^ b;
543 case O_MUL:
544 return a * b;
545 case O_DIV:
546 return a / b;
547 case O_MOD:
548 return a % b;
549 case O_SHL:
550 return a << b;
551 case O_SHR:
552 if (op & O_SIGNED)
553 return a >> b;
554 else
555 return (unsigned long) a >> b;
556 case O_LT:
557 return a < b;
558 case O_GT:
559 return a > b;
560 case O_LE:
561 return a <= b;
562 case O_GE:
563 return a >= b;
564 case O_EQ:
565 return a == b;
566 case O_NEQ:
567 return a != b;
569 return 0;
572 static int c_bop(int op)
574 struct tmp *t1 = TMP(0);
575 struct tmp *t2 = TMP(1);
576 int locs = LOCAL_PTR(t1) + LOCAL_PTR(t2);
577 int syms = SYM_PTR(t1) + SYM_PTR(t2);
578 int nums = TMP_NUM(t1) + TMP_NUM(t2);
579 if (syms + locs == 2 || syms + nums + locs != 2)
580 return 1;
581 if (nums == 1)
582 if ((op & 0xff) != O_ADD && ((op & 0xff) != O_SUB || TMP_NUM(t2)))
583 return 1;
584 if (nums == 1) {
585 long o1 = TMP_NUM(t1) ? t1->addr : t1->off;
586 long o2 = TMP_NUM(t2) ? t2->addr : t2->off;
587 long ret = cb(op, o2, o1);
588 if (!TMP_NUM(t1))
589 o_tmpswap();
590 t2->off = ret;
591 tmp_drop(1);
592 } else {
593 long ret = cb(op, t2->addr, t1->addr);
594 tmp_drop(2);
595 o_num(ret);
597 return 0;
600 void o_uop(int op)
602 int r1 = (op & 0xff) == O_LNOT ? R_RAX : reg_fortmp(TMP(0), 0);
603 if (!c_uop(op))
604 return;
605 tmp_to(TMP(0), r1);
606 switch (op & 0xff) {
607 case O_NEG:
608 i_neg(r1);
609 break;
610 case O_NOT:
611 i_not(r1);
612 break;
613 case O_LNOT:
614 i_lnot(r1);
615 break;
619 static void bin_regs(int *r1, int *r2, int mask1, int mask2)
621 struct tmp *t2 = TMP(0);
622 struct tmp *t1 = TMP(1);
623 *r2 = reg_fortmp(t2, ~mask1);
624 tmp_to(t2, *r2);
625 *r1 = reg_fortmp(t1, ~mask2 | (1 << *r2));
626 tmp_pop(*r2);
627 tmp_pop(*r1);
630 static int bop_imm(int *r1, long *n, int swap)
632 struct tmp *t1 = TMP(0);
633 struct tmp *t2 = TMP(1);
634 if (!TMP_NUM(t1) && (!swap || !TMP_NUM(t2)))
635 return 1;
636 *n = TMP_NUM(t1) ? t1->addr : t2->addr;
637 if (!i_decodeable(*n))
638 return 1;
639 if (!TMP_NUM(t1))
640 o_tmpswap();
641 *r1 = reg_fortmp(t2, 0);
642 tmp_drop(1);
643 tmp_pop(*r1);
644 return 0;
647 static void bin_add(int op)
649 int r1, r2;
650 long n;
651 if (!bop_imm(&r1, &n, (op & 0xff) != O_SUB)) {
652 i_add_imm(op, r1, r1, n);
653 } else {
654 bin_regs(&r1, &r2, R_TMPS, R_TMPS);
655 i_add(op, r1, r1, r2);
657 tmp_push(r1);
660 static void bin_shx(int op)
662 int r1, r2;
663 long n;
664 if (!bop_imm(&r1, &n, 0)) {
665 i_shl_imm(op, r1, r1, n);
666 } else {
667 bin_regs(&r1, &r2, 1 << R_RCX, R_TMPS);
668 i_shl(op, r1, r1, r2);
670 tmp_push(r1);
673 static int log2a(unsigned long n)
675 int i = 0;
676 for (i = 0; i < LONGSZ * 8; i++)
677 if (n & (1u << i))
678 break;
679 if (i == LONGSZ * 8 || !(n >> (i + 1)))
680 return i;
681 return -1;
684 /* optimized version of mul/div/mod for powers of two */
685 static int mul_2(int op)
687 struct tmp *t1 = TMP(0);
688 struct tmp *t2 = TMP(1);
689 long n;
690 int r2;
691 int p;
692 if ((op & 0xff) == O_MUL && t2->loc == LOC_NUM && !t2->bt)
693 o_tmpswap();
694 if (t1->loc != LOC_NUM || t1->bt)
695 return 1;
696 n = t1->addr;
697 p = log2a(n);
698 if (n && p == -1)
699 return 1;
700 if ((op & 0xff) == O_MUL) {
701 tmp_drop(1);
702 if (n == 1)
703 return 0;
704 if (n == 0) {
705 tmp_drop(1);
706 o_num(0);
707 return 0;
709 r2 = reg_fortmp(t2, 0);
710 tmp_to(t2, r2);
711 i_shl_imm(O_SHL, r2, r2, p);
712 return 0;
714 if (op == O_DIV) {
715 tmp_drop(1);
716 if (n == 1)
717 return 0;
718 r2 = reg_fortmp(t2, 0);
719 tmp_to(t2, r2);
720 i_shl_imm((op & O_SIGNED) | O_SHR, r2, r2, p);
721 return 0;
723 if (op == O_MOD) {
724 tmp_drop(1);
725 if (n == 1) {
726 tmp_drop(1);
727 o_num(0);
728 return 0;
730 r2 = reg_fortmp(t2, 0);
731 tmp_to(t2, r2);
732 i_zx(r2, p);
733 return 0;
735 return 1;
738 static void mulop(int *r1, int *r2, int rop)
740 struct tmp *t1 = TMP(0);
741 struct tmp *t2 = TMP(1);
742 if (t1->loc & LOC_REG && t1->addr != R_RAX && t1->addr != R_RDX)
743 rop = t1->addr;
744 tmp_to(t1, rop);
745 tmp_to(t2, R_RAX);
746 if (rop != R_RDX)
747 reg_free(R_RDX);
748 tmp_drop(2);
749 *r1 = rop;
750 *r2 = R_RAX;
753 static void bin_mul(int op)
755 int r1, r2;
756 if (!mul_2(op))
757 return;
758 mulop(&r1, &r2, (op & 0xff) == O_MUL ? R_RDX : R_RCX);
759 if ((op & 0xff) == O_MUL) {
760 i_mul(R_RAX, r1, r2);
761 tmp_push(R_RAX);
763 if ((op & 0xff) == O_DIV) {
764 i_div(op, R_RAX, r1, r2);
765 tmp_push(R_RAX);
767 if ((op & 0xff) == O_MOD) {
768 i_div(op, R_RDX, r1, r2);
769 tmp_push(R_RDX);
773 static void bin_cmp(int op)
775 int r1, r2;
776 long n;
777 if (!bop_imm(&r1, &n, (op & 0xff) == O_EQ || (op & 0xff) == O_NEQ)) {
778 i_cmp_imm(r1, n);
779 } else {
780 bin_regs(&r1, &r2, R_TMPS, R_TMPS);
781 i_cmp(r1, r2);
783 r1 = R_RAX;
784 reg_free(r1);
785 i_set(op, r1);
786 tmp_push(r1);
789 void o_bop(int op)
791 if (!c_bop(op))
792 return;
793 if ((op & 0xf0) == 0x00)
794 bin_add(op);
795 if ((op & 0xf0) == 0x10)
796 bin_shx(op);
797 if ((op & 0xf0) == 0x20)
798 bin_mul(op);
799 if ((op & 0xf0) == 0x30)
800 bin_cmp(op);
803 void o_memcpy(void)
805 struct tmp *t0 = TMP(0);
806 struct tmp *t1 = TMP(1);
807 struct tmp *t2 = TMP(2);
808 tmp_to(t0, R_RCX);
809 tmp_to(t1, R_RSI);
810 tmp_to(t2, R_RDI);
811 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
812 tmp_drop(2);
815 void o_memset(void)
817 struct tmp *t0 = TMP(0);
818 struct tmp *t1 = TMP(1);
819 struct tmp *t2 = TMP(2);
820 tmp_to(t0, R_RCX);
821 tmp_to(t1, R_RAX);
822 tmp_to(t2, R_RDI);
823 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
824 tmp_drop(2);
827 static void jxz(int id, int z)
829 int r = reg_fortmp(TMP(0), 0);
830 tmp_pop(r);
831 i_bz(r, z);
832 jmp_add(id);
835 void o_jz(int id)
837 jxz(id, 1);
840 void o_jnz(int id)
842 jxz(id, 0);
845 void o_jmp(int id)
847 i_b();
848 jmp_add(id);
851 void o_call(int argc, int rets)
853 struct tmp *t;
854 int i;
855 for (i = 0; i < N_TMPS; i++)
856 if (regs[tmpregs[i]] && regs[tmpregs[i]] - tmps < ntmp - argc)
857 tmp_mem(regs[tmpregs[i]]);
858 sp_push(LONGSZ * argc);
859 for (i = argc - 1; i >= 0; --i) {
860 int reg = reg_fortmp(TMP(0), 0);
861 tmp_pop(reg);
862 i_ldr(0, reg, REG_SP, i * LONGSZ, LONGSZ);
864 t = TMP(0);
865 if (t->loc == LOC_SYM && !t->bt) {
866 i_call(t->sym, t->off);
867 tmp_drop(1);
868 } else {
869 int reg = reg_fortmp(t, 0);
870 tmp_pop(reg);
871 i_call_reg(reg);
873 if (rets)
874 tmp_push(REG_RET);
877 void o_mkbss(char *name, int size, int global)
879 out_sym(name, OUT_BSS | (global ? OUT_GLOB : 0), bsslen, size);
880 bsslen += ALIGN(size, OUT_ALIGNMENT);
883 #define MAXDATS (1 << 10)
884 static char dat_names[MAXDATS][NAMELEN];
885 static int dat_offs[MAXDATS];
886 static int ndats;
888 void *o_mkdat(char *name, int size, int global)
890 void *addr = ds + dslen;
891 int idx = ndats++;
892 if (idx >= MAXDATS)
893 err("nomem: MAXDATS reached!\n");
894 strcpy(dat_names[idx], name);
895 dat_offs[idx] = dslen;
896 out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), dslen, size);
897 dslen += ALIGN(size, OUT_ALIGNMENT);
898 return addr;
901 static int dat_off(char *name)
903 int i;
904 for (i = 0; i < ndats; i++)
905 if (!strcmp(name, dat_names[i]))
906 return dat_offs[i];
907 return 0;
910 void o_datset(char *name, int off, unsigned bt)
912 struct tmp *t = TMP(0);
913 int sym_off = dat_off(name) + off;
914 if (t->loc == LOC_NUM && !t->bt) {
915 num_cast(t, bt);
916 memcpy(ds + sym_off, &t->addr, BT_SZ(bt));
918 if (t->loc == LOC_SYM && !t->bt) {
919 out_rel(t->sym, OUT_DS, sym_off);
920 memcpy(ds + sym_off, &t->off, BT_SZ(bt));
922 tmp_drop(1);
925 void o_write(int fd)
927 out_write(fd, cs, cslen, ds, dslen);
930 /* X86 arch specific functions */
932 #define I_MOV 0x89
933 #define I_MOVI 0xc7
934 #define I_MOVIR 0xb8
935 #define I_MOVR 0x8b
936 #define I_SHX 0xd3
937 #define I_CMP 0x3b
938 #define I_TST 0x85
939 #define I_LEA 0x8d
940 #define I_NOT 0xf7
941 #define I_CALL 0xff
942 #define I_MUL 0xf7
943 #define I_XOR 0x33
944 #define I_TEST 0x85
945 #define I_CQO 0x99
946 #define I_PUSH 0x50
947 #define I_POP 0x58
949 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
950 #define O2(op) (((op) >> 8) & 0xff)
951 #define O1(op) ((op) & 0xff)
952 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
954 /* for optimizing cmp + jmp */
955 #define OPT_ISCMP() (last_set + 6 == cslen)
956 #define OPT_CCOND() (cs[last_set + 1])
958 static long last_set = -1;
960 static void op_x(int op, int r1, int r2, int bt)
962 int sz = BT_SZ(bt);
963 if (sz == 2)
964 oi(0x66, 1);
965 if (op & 0x10000)
966 oi(O2(op), 1);
967 oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1);
970 #define op_mr op_rm
972 /* op_*(): r=reg, m=mem, i=imm, s=sym */
973 static void op_rm(int op, int src, int base, int off, int bt)
975 int dis = off == (char) off ? 1 : 4;
976 int mod = dis == 4 ? 2 : 1;
977 if (!off && (base & 7) != R_RBP)
978 mod = 0;
979 op_x(op, src, base, bt);
980 oi(MODRM(mod, src & 0x07, base & 0x07), 1);
981 if ((base & 7) == R_RSP)
982 oi(0x24, 1);
983 if (mod)
984 oi(off, dis);
987 static void op_rr(int op, int src, int dst, int bt)
989 op_x(op, src, dst, bt);
990 oi(MODRM(3, src & 0x07, dst & 0x07), 1);
993 #define movrx_bt(bt) (LONGSZ)
995 static int movrx_op(int bt, int mov)
997 int sz = BT_SZ(bt);
998 if (sz == 2)
999 return OP2(0x0f, bt & BT_SIGNED ? 0xbf : 0xb7);
1000 if (sz == 1)
1001 return OP2(0x0f, bt & BT_SIGNED ? 0xbe : 0xb6);
1002 return mov;
1005 static void mov_r2r(int r1, int r2, unsigned bt)
1007 if (r1 != r2 || BT_SZ(bt) != LONGSZ)
1008 op_rr(movrx_op(bt, I_MOV), r1, r2, movrx_bt(bt));
1011 static void mov_m2r(int dst, int base, int off, int bt)
1013 op_rm(movrx_op(bt, I_MOVR), dst, base, off, movrx_bt(bt));
1016 static void i_zx(int rd, int bits)
1018 if (bits & 0x07) {
1019 i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits);
1020 i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits);
1021 } else {
1022 mov_r2r(rd, rd, bits >> 3);
1026 static void i_sx(int rd, int bits)
1028 mov_r2r(rd, rd, BT_SIGNED | (bits >> 3));
1031 static void i_add(int op, int rd, int rn, int rm)
1033 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
1034 static int rx[] = {0003, 0053, 0043, 0013, 0063};
1035 if (rn != rd)
1036 die("this is cisc!\n");
1037 op_rr(rx[op & 0x0f], rd, rm, LONGSZ);
1040 static void i_add_imm(int op, int rd, int rn, long n)
1042 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
1043 static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
1044 unsigned char s[3] = {0x83, rx[op & 0x0f] | rd, n & 0xff};
1045 if (rn != rd)
1046 die("this is cisc!\n");
1047 os((void *) s, 3);
1050 static int i_decodeable(long imm)
1052 return imm <= 127 && imm >= -128;
1055 static void i_num(int rd, long n)
1057 if (!n) {
1058 op_rr(I_XOR, rd, rd, 4);
1059 } else {
1060 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
1061 oi(n, LONGSZ);
1065 static void i_add_anyimm(int rd, int rn, long n)
1067 op_rm(I_LEA, rd, rn, n, LONGSZ);
1070 static void i_mul(int rd, int rn, int rm)
1072 if (rn != R_RDX)
1073 i_num(R_RDX, 0);
1074 op_rr(I_MUL, 4, rn, LONGSZ);
1077 static void i_div(int op, int rd, int rn, int rm)
1079 if (rn != R_RDX) {
1080 if (op & O_SIGNED)
1081 op_x(I_CQO, R_RAX, R_RDX, LONGSZ);
1082 else
1083 i_num(R_RDX, 0);
1085 op_rr(I_MUL, op & O_SIGNED ? 7 : 6, rn, LONGSZ);
1088 static void i_tst(int rn, int rm)
1090 op_rr(I_TST, rn, rm, LONGSZ);
1093 static void i_cmp(int rn, int rm)
1095 op_rr(I_CMP, rn, rm, LONGSZ);
1098 static void i_cmp_imm(int rn, long n)
1100 unsigned char s[3] = {0x83, 0xf8 | rn, n & 0xff};
1101 os(s, 3);
1104 static void i_set(int op, int rd)
1106 /* lt, gt, le, ge, eq, neq */
1107 static int ucond[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95};
1108 static int scond[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95};
1109 int cond = op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f];
1110 char set[] = "\x0f\x00\xc0";
1111 if (rd != R_RAX)
1112 die("set works only with R_RAX\n");
1113 set[1] = cond;
1114 last_set = cslen;
1115 os(set, 3); /* setl al */
1116 os("\x0f\xb6\xc0", 3); /* movzbl eax, al */
1119 static void i_shl(int op, int rd, int rm, int rs)
1121 int sm = 4;
1122 if ((op & 0x0f) == 1)
1123 sm = op & O_SIGNED ? 7 : 5;
1124 if (rd != rm)
1125 die("this is cisc!\n");
1126 op_rr(I_SHX, sm, rd, LONGSZ);
1129 static void i_shl_imm(int op, int rd, int rn, long n)
1131 int sm = (op & 0x1) ? (op & O_SIGNED ? 0xf8 : 0xe8) : 0xe0 ;
1132 char s[3] = {0xc1, sm | rn, n & 0xff};
1133 if (rd != rn)
1134 die("this is cisc!\n");
1135 os(s, 3);
1138 static void i_mov(int rd, int rn, int bt)
1140 op_rr(movrx_op(bt, I_MOVR), rd, rn, movrx_bt(bt));
1143 static void i_ldr(int l, int rd, int rn, int off, int bt)
1145 if (l)
1146 mov_m2r(rd, rn, off, bt);
1147 else
1148 op_rm(I_MOV, rd, rn, off, bt);
1151 static void i_sym(int rd, char *sym, int off)
1153 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
1154 out_rel(sym, OUT_CS, cslen);
1155 oi(off, LONGSZ);
1158 static void i_neg(int rd)
1160 op_rr(I_NOT, 3, rd, LONGSZ);
1163 static void i_not(int rd)
1165 op_rr(I_NOT, 2, rd, LONGSZ);
1168 static void i_lnot(int rd)
1170 if (OPT_ISCMP()) {
1171 cs[last_set + 1] ^= 0x01;
1172 } else {
1173 char cmp[] = "\x83\xf8\x00";
1174 cmp[1] |= rd;
1175 os(cmp, 3); /* cmp eax, 0 */
1176 i_set(O_EQ, rd);
1180 static void jx(int x, long addr)
1182 char op[2] = {0x0f};
1183 op[1] = x;
1184 os(op, 2); /* jx $addr */
1185 oi(addr - cslen - 4, 4);
1188 static void i_bz(int rn, int z)
1190 if (OPT_ISCMP()) {
1191 int cond = OPT_CCOND();
1192 cslen = last_set;
1193 jx((!z ? cond : cond ^ 0x01) & ~0x10, 0);
1194 last_set = -1;
1195 } else {
1196 i_tst(rn, rn);
1197 jx(z ? 0x84 : 0x85, 0);
1201 static void i_b(void)
1203 os("\xe9", 1); /* jmp $addr */
1204 oi(0, 4);
1207 static void i_b_fill(long src, long dst)
1209 putint((void *) (cs + src), (dst - src) - 4, 4);
1212 static void i_call_reg(int rd)
1214 op_rr(I_CALL, 2, rd, LONGSZ);
1217 static void i_call(char *sym, int off)
1219 os("\xe8", 1); /* call $x */
1220 out_rel(sym, OUT_CS | OUT_REL, cslen);
1221 oi(-4 + off, 4);
1224 static void i_prolog(void)
1226 last_set = -1;
1227 os("\x55", 1); /* push rbp */
1228 os("\x89\xe5", 2); /* mov rbp, rsp */
1229 os("\x53\x56\x57", 3); /* push rbx; push rsi; push rdi */
1230 os("\x81\xec", 2); /* sub rsp, $xxx */
1231 func_fpsub = cslen;
1232 oi(0, 4);
1235 static void i_epilog(void)
1237 int diff = ALIGN(sp_max - 3 * LONGSZ, LONGSZ);
1238 if (diff) {
1239 os("\x81\xc4", 2); /* add $xxx, %esp */
1240 oi(diff, 4);
1241 putint(cs + func_fpsub, diff, 4);
1243 os("\x5f\x5e\x5b", 3); /* pop edi; pop esi; pop ebx */
1244 os("\xc9\xc3", 2); /* leave; ret; */