ncc: define __i386__ for x86 builds
[neatcc.git] / x86.c
bloba4969780d48fa8e3f97b69e2fe40b5a57f5f2477
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include "gen.h"
5 #include "out.h"
6 #include "tok.h"
8 /* variable location */
9 #define LOC_REG 0x01
10 #define LOC_MEM 0x02
11 #define LOC_NUM 0x04
12 #define LOC_SYM 0x08
13 #define LOC_LOCAL 0x10
15 /* special registers */
16 #define REG_FP R_RBP
17 #define REG_SP R_RSP
18 #define REG_RET R_RAX
19 #define REG_FORK R_RAX
21 /* registers */
22 #define R_RAX 0x00
23 #define R_RCX 0x01
24 #define R_RDX 0x02
25 #define R_RBX 0x03
26 #define R_RSP 0x04
27 #define R_RBP 0x05
28 #define R_RSI 0x06
29 #define R_RDI 0x07
31 #define N_REGS 8
32 #define N_ARGS 0
33 #define N_TMPS ARRAY_SIZE(tmpregs)
34 #define R_TMPS 0x00cf
35 #define R_ARGS 0x0000
36 #define R_SAVED 0x00c8
37 #define R_BYTEREGS (1 << R_RAX | 1 << R_RDX | 1 << R_RCX)
39 #define MIN(a, b) ((a) < (b) ? (a) : (b))
40 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
42 static char cs[SECSIZE]; /* code segment */
43 static int cslen;
44 static char ds[SECSIZE]; /* data segment */
45 static int dslen;
46 static long bsslen; /* bss segment size */
48 static long sp; /* stack pointer offset from R_RBP */
49 static long sp_max; /* maximum stack pointer offset */
50 static long sp_tmp; /* sp for the first tmp on the stack */
51 static long func_fpsub; /* stack pointer sub address in CS */
53 #define TMP(i) (((i) < ntmp) ? &tmps[ntmp - 1 - (i)] : NULL)
55 static struct tmp {
56 long addr;
57 char sym[NAMELEN];
58 long off; /* offset from a symbol or a local */
59 unsigned loc; /* variable location */
60 unsigned bt; /* type of address; zero when not a pointer */
61 } tmps[MAXTMP];
62 static int ntmp;
64 /* arch-specific functions */
65 static void i_ldr(int l, int rd, int rn, int off, int bt);
66 static void i_mov(int rd, int rn, int bt);
67 static void i_add(int op, int rd, int rn, int rm);
68 static void i_shl(int op, int rd, int rm, int rs);
69 static void i_mul(int rd, int rn, int rm);
70 static void i_div(int op, int rd, int rn, int rm);
71 static void i_cmp(int rn, int rm);
72 static int i_decodeable(long imm);
73 static void i_add_imm(int op, int rd, int rn, long n);
74 static void i_shl_imm(int op, int rd, int rn, long n);
75 static void i_cmp_imm(int rn, long n);
76 static void i_add_anyimm(int rd, int rn, long n);
77 static void i_num(int rd, long n);
78 static void i_sym(int rd, char *sym, int off);
79 static void i_set(int op, int rd);
80 static void i_neg(int rd);
81 static void i_not(int rd);
82 static void i_lnot(int rd);
83 static void i_zx(int rd, int bits);
84 static void i_sx(int rd, int bits);
85 static void i_b(long addr);
86 static void i_b_fill(long *dst, int diff);
87 static void i_b_if(long addr, int rn, int z);
88 static void i_call(char *sym, int off);
89 static void i_call_reg(int rd);
90 static void i_prolog(void);
91 static void i_epilog(void);
93 static struct tmp *regs[N_REGS];
94 static int tmpregs[] = {R_RAX, R_RSI, R_RDI, R_RBX, R_RDX, R_RCX};
96 #define MAXRET (1 << 8)
98 static long ret[MAXRET];
99 static int nret;
101 static void putint(char *s, long n, int l)
103 while (l--) {
104 *s++ = n;
105 n >>= 8;
109 static void os(void *s, int n)
111 while (n--)
112 cs[cslen++] = *(char *) (s++);
115 static void oi(long n, int l)
117 while (l--) {
118 cs[cslen++] = n;
119 n >>= 8;
123 static long sp_push(int size)
125 sp += size;
126 if (sp > sp_max)
127 sp_max = sp;
128 return sp;
131 static void tmp_mem(struct tmp *tmp)
133 int src = tmp->addr;
134 if (tmp->loc != LOC_REG)
135 return;
136 if (sp_tmp == -1)
137 sp_tmp = sp;
138 tmp->addr = -sp_push(LONGSZ);
139 i_ldr(0, src, REG_FP, tmp->addr, LONGSZ);
140 regs[src] = NULL;
141 tmp->loc = LOC_MEM;
144 static void num_cast(struct tmp *t, unsigned bt)
146 if (!(bt & BT_SIGNED) && BT_SZ(bt) != LONGSZ)
147 t->addr &= ((1l << (long) (BT_SZ(bt) * 8)) - 1);
148 if (bt & BT_SIGNED && BT_SZ(bt) != LONGSZ &&
149 t->addr > (1l << (BT_SZ(bt) * 8 - 1)))
150 t->addr = -((1l << (BT_SZ(bt) * 8)) - t->addr);
153 static void tmp_reg(struct tmp *tmp, int dst, int deref)
155 int bt = tmp->bt;
156 if (!tmp->bt)
157 deref = 0;
158 if (deref)
159 tmp->bt = 0;
160 if (tmp->loc == LOC_NUM) {
161 i_num(dst, tmp->addr);
162 tmp->addr = dst;
163 regs[dst] = tmp;
164 tmp->loc = LOC_REG;
166 if (tmp->loc == LOC_SYM) {
167 i_sym(dst, tmp->sym, tmp->off);
168 tmp->addr = dst;
169 regs[dst] = tmp;
170 tmp->loc = LOC_REG;
172 if (tmp->loc == LOC_REG) {
173 if (deref)
174 i_ldr(1, dst, tmp->addr, 0, bt);
175 else if (dst != tmp->addr)
176 i_mov(dst, tmp->addr, LONGSZ);
177 regs[tmp->addr] = NULL;
179 if (tmp->loc == LOC_LOCAL) {
180 if (deref)
181 i_ldr(1, dst, REG_FP, tmp->addr + tmp->off, bt);
182 else
183 i_add_anyimm(dst, REG_FP, tmp->addr + tmp->off);
185 if (tmp->loc == LOC_MEM) {
186 i_ldr(1, dst, REG_FP, tmp->addr, LONGSZ);
187 if (deref)
188 i_ldr(1, dst, dst, 0, bt);
190 tmp->addr = dst;
191 regs[dst] = tmp;
192 tmp->loc = LOC_REG;
195 static void reg_free(int reg)
197 int i;
198 if (!regs[reg])
199 return;
200 for (i = 0; i < N_TMPS; i++)
201 if (!regs[tmpregs[i]]) {
202 tmp_reg(regs[reg], tmpregs[i], 0);
203 return;
205 tmp_mem(regs[reg]);
208 static void reg_for(int reg, struct tmp *t)
210 if (regs[reg] && regs[reg] != t)
211 reg_free(reg);
214 static void tmp_mv(struct tmp *t, int reg)
216 reg_for(reg, t);
217 tmp_reg(t, reg, 0);
220 static void tmp_to(struct tmp *t, int reg)
222 reg_for(reg, t);
223 tmp_reg(t, reg, 1);
226 static void tmp_drop(int n)
228 int i;
229 for (i = ntmp - n; i < ntmp; i++)
230 if (tmps[i].loc == LOC_REG)
231 regs[tmps[i].addr] = NULL;
232 ntmp -= n;
235 static void tmp_pop(int reg)
237 struct tmp *t = TMP(0);
238 tmp_to(t, reg);
239 tmp_drop(1);
242 static struct tmp *tmp_new(void)
244 return &tmps[ntmp++];
247 static void tmp_push(int reg)
249 struct tmp *t = tmp_new();
250 t->addr = reg;
251 t->bt = 0;
252 t->loc = LOC_REG;
253 regs[reg] = t;
256 void o_local(long addr)
258 struct tmp *t = tmp_new();
259 t->addr = -addr;
260 t->loc = LOC_LOCAL;
261 t->bt = 0;
262 t->off = 0;
265 void o_num(long num)
267 struct tmp *t = tmp_new();
268 t->addr = num;
269 t->bt = 0;
270 t->loc = LOC_NUM;
273 void o_sym(char *name)
275 struct tmp *t = tmp_new();
276 strcpy(t->sym, name);
277 t->loc = LOC_SYM;
278 t->bt = 0;
279 t->off = 0;
282 void o_tmpdrop(int n)
284 if (n == -1 || n > ntmp)
285 n = ntmp;
286 tmp_drop(n);
287 if (!ntmp) {
288 if (sp_tmp != -1)
289 sp = sp_tmp;
290 sp_tmp = -1;
294 /* make sure tmps remain intact after a conditional expression */
295 void o_fork(void)
297 int i;
298 for (i = 0; i < ntmp - 1; i++)
299 tmp_mem(&tmps[i]);
302 void o_forkpush(void)
304 tmp_pop(REG_FORK);
307 void o_forkjoin(void)
309 tmp_push(REG_FORK);
312 void o_tmpswap(void)
314 struct tmp *t1 = TMP(0);
315 struct tmp *t2 = TMP(1);
316 struct tmp t;
317 memcpy(&t, t1, sizeof(t));
318 memcpy(t1, t2, sizeof(t));
319 memcpy(t2, &t, sizeof(t));
320 if (t1->loc == LOC_REG)
321 regs[t1->addr] = t1;
322 if (t2->loc == LOC_REG)
323 regs[t2->addr] = t2;
326 static int reg_get(int mask)
328 int i;
329 for (i = 0; i < N_TMPS; i++)
330 if ((1 << tmpregs[i]) & mask && !regs[tmpregs[i]])
331 return tmpregs[i];
332 for (i = 0; i < N_TMPS; i++)
333 if ((1 << tmpregs[i]) & mask) {
334 reg_free(tmpregs[i]);
335 return tmpregs[i];
337 return 0;
340 static int reg_fortmp(struct tmp *t, int notmask)
342 if (t->loc == LOC_REG && !(notmask & (1 << t->addr)))
343 return t->addr;
344 return reg_get(~notmask);
347 static void tmp_copy(struct tmp *t1)
349 struct tmp *t2 = tmp_new();
350 memcpy(t2, t1, sizeof(*t1));
351 if (!(t1->loc & (LOC_REG | LOC_MEM)))
352 return;
353 if (t1->loc == LOC_MEM) {
354 tmp_mv(t2, reg_get(~0));
355 } else if (t1->loc == LOC_REG) {
356 t2->addr = reg_fortmp(t2, 1 << t1->addr);
357 i_mov(t2->addr, t1->addr, LONGSZ);
358 regs[t2->addr] = t2;
362 void o_tmpcopy(void)
364 tmp_copy(TMP(0));
367 void o_cast(unsigned bt)
369 struct tmp *t = TMP(0);
370 if (!t->bt && t->loc == LOC_NUM) {
371 num_cast(t, bt);
372 return;
374 if (BT_SZ(bt) != LONGSZ) {
375 int reg = reg_fortmp(t, BT_SZ(bt) > 1 ? 0 : ~R_BYTEREGS);
376 tmp_to(t, reg);
377 if (bt & BT_SIGNED)
378 i_sx(reg, BT_SZ(bt) * 8);
379 else
380 i_zx(reg, BT_SZ(bt) * 8);
384 void o_func_beg(char *name, int argc, int global, int vararg)
386 out_sym(name, (global ? OUT_GLOB : 0) | OUT_CS, cslen, 0);
387 i_prolog();
388 sp = 3 * LONGSZ;
389 sp_max = sp;
390 ntmp = 0;
391 sp_tmp = -1;
392 nret = 0;
393 memset(regs, 0, sizeof(regs));
396 void o_deref(unsigned bt)
398 struct tmp *t = TMP(0);
399 if (t->bt)
400 tmp_to(t, reg_fortmp(t, 0));
401 t->bt = bt;
404 void o_load(void)
406 struct tmp *t = TMP(0);
407 tmp_to(t, reg_fortmp(t, 0));
410 #define TMP_NUM(t) ((t)->loc == LOC_NUM && !(t)->bt)
411 #define LOCAL_PTR(t) ((t)->loc == LOC_LOCAL && !(t)->bt)
412 #define SYM_PTR(t) ((t)->loc == LOC_SYM && !(t)->bt)
414 int o_popnum(long *c)
416 struct tmp *t = TMP(0);
417 if (!TMP_NUM(t))
418 return 1;
419 *c = t->addr;
420 tmp_drop(1);
421 return 0;
424 void o_ret(int rets)
426 if (rets)
427 tmp_pop(REG_RET);
428 else
429 i_num(REG_RET, 0);
430 ret[nret++] = o_jmp(0);
433 void o_func_end(void)
435 int i;
436 for (i = 0; i < nret; i++)
437 o_filljmp(ret[i]);
438 i_epilog();
441 long o_mklocal(int sz)
443 return sp_push(ALIGN(sz, LONGSZ));
446 void o_rmlocal(long addr, int sz)
448 sp = addr - ALIGN(sz, LONGSZ);
451 long o_arg2loc(int i)
453 return -LONGSZ * (i + 2);
456 void o_assign(unsigned bt)
458 struct tmp *t1 = TMP(0);
459 struct tmp *t2 = TMP(1);
460 int r1 = reg_fortmp(t1, BT_SZ(bt) > 1 ? 0 : ~R_BYTEREGS);
461 int r2 = reg_fortmp(t2, 1 << r1);
462 int off = 0;
463 tmp_to(t1, r1);
464 if (t2->bt)
465 tmp_to(t2, r2);
466 if (t2->loc == LOC_LOCAL) {
467 r2 = REG_FP;
468 off = t2->addr + t2->off;
469 } else {
470 tmp_to(t2, r2);
472 i_ldr(0, r1, r2, off, bt);
473 tmp_drop(2);
474 tmp_push(r1);
477 static long cu(int op, long i)
479 switch (op & 0xff) {
480 case O_NEG:
481 return -i;
482 case O_NOT:
483 return ~i;
484 case O_LNOT:
485 return !i;
487 return 0;
490 static int c_uop(int op)
492 struct tmp *t1 = TMP(0);
493 if (!TMP_NUM(t1))
494 return 1;
495 tmp_drop(1);
496 o_num(cu(op, t1->addr));
497 return 0;
500 static long cb(int op, long a, long b)
502 switch (op & 0xff) {
503 case O_ADD:
504 return a + b;
505 case O_SUB:
506 return a - b;
507 case O_AND:
508 return a & b;
509 case O_OR:
510 return a | b;
511 case O_XOR:
512 return a ^ b;
513 case O_MUL:
514 return a * b;
515 case O_DIV:
516 return a / b;
517 case O_MOD:
518 return a % b;
519 case O_SHL:
520 return a << b;
521 case O_SHR:
522 if (op & O_SIGNED)
523 return a >> b;
524 else
525 return (unsigned long) a >> b;
526 case O_LT:
527 return a < b;
528 case O_GT:
529 return a > b;
530 case O_LE:
531 return a <= b;
532 case O_GE:
533 return a >= b;
534 case O_EQ:
535 return a == b;
536 case O_NEQ:
537 return a != b;
539 return 0;
542 static int c_bop(int op)
544 struct tmp *t1 = TMP(0);
545 struct tmp *t2 = TMP(1);
546 int locs = LOCAL_PTR(t1) + LOCAL_PTR(t2);
547 int syms = SYM_PTR(t1) + SYM_PTR(t2);
548 int nums = TMP_NUM(t1) + TMP_NUM(t2);
549 if (syms + locs == 2 || syms + nums + locs != 2)
550 return 1;
551 if (nums == 1)
552 if ((op & 0xff) != O_ADD && ((op & 0xff) != O_SUB || TMP_NUM(t2)))
553 return 1;
554 if (nums == 1) {
555 long o1 = TMP_NUM(t1) ? t1->addr : t1->off;
556 long o2 = TMP_NUM(t2) ? t2->addr : t2->off;
557 long ret = cb(op, o2, o1);
558 if (!TMP_NUM(t1))
559 o_tmpswap();
560 t2->off = ret;
561 tmp_drop(1);
562 } else {
563 long ret = cb(op, t2->addr, t1->addr);
564 tmp_drop(2);
565 o_num(ret);
567 return 0;
570 void o_uop(int op)
572 int r1 = (op & 0xff) == O_LNOT ? R_RAX : reg_fortmp(TMP(0), 0);
573 if (!c_uop(op))
574 return;
575 tmp_to(TMP(0), r1);
576 switch (op & 0xff) {
577 case O_NEG:
578 i_neg(r1);
579 break;
580 case O_NOT:
581 i_not(r1);
582 break;
583 case O_LNOT:
584 i_lnot(r1);
585 break;
589 static void bin_regs(int *r1, int *r2, int mask1, int mask2)
591 struct tmp *t2 = TMP(0);
592 struct tmp *t1 = TMP(1);
593 *r2 = reg_fortmp(t2, ~mask1);
594 tmp_to(t2, *r2);
595 *r1 = reg_fortmp(t1, ~mask2 | (1 << *r2));
596 tmp_pop(*r2);
597 tmp_pop(*r1);
600 static int bop_imm(int *r1, long *n, int swap)
602 struct tmp *t1 = TMP(0);
603 struct tmp *t2 = TMP(1);
604 if (!TMP_NUM(t1) && (!swap || !TMP_NUM(t2)))
605 return 1;
606 *n = TMP_NUM(t1) ? t1->addr : t2->addr;
607 if (!i_decodeable(*n))
608 return 1;
609 if (!TMP_NUM(t1))
610 o_tmpswap();
611 *r1 = reg_fortmp(t2, 0);
612 tmp_drop(1);
613 tmp_pop(*r1);
614 return 0;
617 static void bin_add(int op)
619 int r1, r2;
620 long n;
621 if (!bop_imm(&r1, &n, (op & 0xff) != O_SUB)) {
622 i_add_imm(op, r1, r1, n);
623 } else {
624 bin_regs(&r1, &r2, R_TMPS, R_TMPS);
625 i_add(op, r1, r1, r2);
627 tmp_push(r1);
630 static void bin_shx(int op)
632 int r1, r2;
633 long n;
634 if (!bop_imm(&r1, &n, 0)) {
635 i_shl_imm(op, r1, r1, n);
636 } else {
637 bin_regs(&r1, &r2, 1 << R_RCX, R_TMPS);
638 i_shl(op, r1, r1, r2);
640 tmp_push(r1);
643 static int log2a(unsigned long n)
645 int i = 0;
646 for (i = 0; i < LONGSZ * 8; i++)
647 if (n & (1u << i))
648 break;
649 if (i == LONGSZ * 8 || !(n >> (i + 1)))
650 return i;
651 return -1;
654 /* optimized version of mul/div/mod for powers of two */
655 static int mul_2(int op)
657 struct tmp *t1 = TMP(0);
658 struct tmp *t2 = TMP(1);
659 long n;
660 int r2;
661 int p;
662 if ((op & 0xff) == O_MUL && t2->loc == LOC_NUM && !t2->bt)
663 o_tmpswap();
664 if (t1->loc != LOC_NUM || t1->bt)
665 return 1;
666 n = t1->addr;
667 p = log2a(n);
668 if (n && p == -1)
669 return 1;
670 if ((op & 0xff) == O_MUL) {
671 tmp_drop(1);
672 if (n == 1)
673 return 0;
674 if (n == 0) {
675 tmp_drop(1);
676 o_num(0);
677 return 0;
679 r2 = reg_fortmp(t2, 0);
680 tmp_to(t2, r2);
681 i_shl_imm(O_SHL, r2, r2, p);
682 return 0;
684 if (op == O_DIV) {
685 tmp_drop(1);
686 if (n == 1)
687 return 0;
688 r2 = reg_fortmp(t2, 0);
689 tmp_to(t2, r2);
690 i_shl_imm((op & O_SIGNED) | O_SHR, r2, r2, p);
691 return 0;
693 if (op == O_MOD) {
694 tmp_drop(1);
695 if (n == 1) {
696 tmp_drop(1);
697 o_num(0);
698 return 0;
700 r2 = reg_fortmp(t2, 0);
701 tmp_to(t2, r2);
702 i_zx(r2, p);
703 return 0;
705 return 1;
708 static void mulop(int *r1, int *r2, int rop)
710 struct tmp *t1 = TMP(0);
711 struct tmp *t2 = TMP(1);
712 if (t1->loc & LOC_REG && t1->addr != R_RAX && t1->addr != R_RDX)
713 rop = t1->addr;
714 tmp_to(t1, rop);
715 tmp_to(t2, R_RAX);
716 if (rop != R_RDX)
717 reg_free(R_RDX);
718 tmp_drop(2);
719 *r1 = rop;
720 *r2 = R_RAX;
723 static void bin_mul(int op)
725 int r1, r2;
726 if (!mul_2(op))
727 return;
728 mulop(&r1, &r2, (op & 0xff) == O_MUL ? R_RDX : R_RCX);
729 if ((op & 0xff) == O_MUL) {
730 i_mul(R_RAX, r1, r2);
731 tmp_push(R_RAX);
733 if ((op & 0xff) == O_DIV) {
734 i_div(op, R_RAX, r1, r2);
735 tmp_push(R_RAX);
737 if ((op & 0xff) == O_MOD) {
738 i_div(op, R_RDX, r1, r2);
739 tmp_push(R_RDX);
743 static void bin_cmp(int op)
745 int r1, r2;
746 long n;
747 if (!bop_imm(&r1, &n, (op & 0xff) == O_EQ || (op & 0xff) == O_NEQ)) {
748 i_cmp_imm(r1, n);
749 } else {
750 bin_regs(&r1, &r2, R_TMPS, R_TMPS);
751 i_cmp(r1, r2);
753 r1 = R_RAX;
754 reg_free(r1);
755 i_set(op, r1);
756 tmp_push(r1);
759 void o_bop(int op)
761 if (!c_bop(op))
762 return;
763 if ((op & 0xf0) == 0x00)
764 bin_add(op);
765 if ((op & 0xf0) == 0x10)
766 bin_shx(op);
767 if ((op & 0xf0) == 0x20)
768 bin_mul(op);
769 if ((op & 0xf0) == 0x30)
770 bin_cmp(op);
773 void o_memcpy(void)
775 struct tmp *t0 = TMP(0);
776 struct tmp *t1 = TMP(1);
777 struct tmp *t2 = TMP(2);
778 tmp_to(t0, R_RCX);
779 tmp_to(t1, R_RSI);
780 tmp_to(t2, R_RDI);
781 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
782 tmp_drop(2);
785 void o_memset(void)
787 struct tmp *t0 = TMP(0);
788 struct tmp *t1 = TMP(1);
789 struct tmp *t2 = TMP(2);
790 tmp_to(t0, R_RCX);
791 tmp_to(t1, R_RAX);
792 tmp_to(t2, R_RDI);
793 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
794 tmp_drop(2);
797 long o_mklabel(void)
799 return cslen;
802 static long jxz(long addr, int z)
804 int r = reg_fortmp(TMP(0), 0);
805 tmp_pop(r);
806 i_b_if(addr, r, z);
807 return cslen - 4;
810 long o_jz(long addr)
812 return jxz(addr, 1);
815 long o_jnz(long addr)
817 return jxz(addr, 0);
820 long o_jmp(long addr)
822 i_b(addr);
823 return cslen - 4;
826 void o_filljmp2(long addr, long jmpdst)
828 i_b_fill((void *) cs + addr, jmpdst - addr);
831 void o_filljmp(long addr)
833 o_filljmp2(addr, cslen);
836 void o_call(int argc, int rets)
838 struct tmp *t;
839 int i;
840 for (i = 0; i < N_TMPS; i++)
841 if (regs[tmpregs[i]] && regs[tmpregs[i]] - tmps < ntmp - argc)
842 tmp_mem(regs[tmpregs[i]]);
843 sp_push(LONGSZ * argc);
844 for (i = argc - 1; i >= 0; --i) {
845 int reg = reg_fortmp(TMP(0), 0);
846 tmp_pop(reg);
847 i_ldr(0, reg, REG_SP, i * LONGSZ, LONGSZ);
849 t = TMP(0);
850 if (t->loc == LOC_SYM && !t->bt) {
851 i_call(t->sym, t->off);
852 tmp_drop(1);
853 } else {
854 int reg = reg_fortmp(t, 0);
855 tmp_pop(reg);
856 i_call_reg(reg);
858 if (rets)
859 tmp_push(REG_RET);
862 void o_mkbss(char *name, int size, int global)
864 out_sym(name, OUT_BSS | (global ? OUT_GLOB : 0), bsslen, size);
865 bsslen += ALIGN(size, OUT_ALIGNMENT);
868 #define MAXDATS (1 << 10)
869 static char dat_names[MAXDATS][NAMELEN];
870 static int dat_offs[MAXDATS];
871 static int ndats;
873 void err(char *msg);
874 void *o_mkdat(char *name, int size, int global)
876 void *addr = ds + dslen;
877 int idx = ndats++;
878 if (idx >= MAXDATS)
879 err("nomem: MAXDATS reached!\n");
880 strcpy(dat_names[idx], name);
881 dat_offs[idx] = dslen;
882 out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), dslen, size);
883 dslen += ALIGN(size, OUT_ALIGNMENT);
884 return addr;
887 static int dat_off(char *name)
889 int i;
890 for (i = 0; i < ndats; i++)
891 if (!strcmp(name, dat_names[i]))
892 return dat_offs[i];
893 return 0;
896 void o_datset(char *name, int off, unsigned bt)
898 struct tmp *t = TMP(0);
899 int sym_off = dat_off(name) + off;
900 if (t->loc == LOC_NUM && !t->bt) {
901 num_cast(t, bt);
902 memcpy(ds + sym_off, &t->addr, BT_SZ(bt));
904 if (t->loc == LOC_SYM && !t->bt) {
905 out_rel(t->sym, OUT_DS, sym_off);
906 memcpy(ds + sym_off, &t->off, BT_SZ(bt));
908 tmp_drop(1);
911 void o_write(int fd)
913 out_write(fd, cs, cslen, ds, dslen);
916 /* X86 arch specific functions */
918 #define I_MOV 0x89
919 #define I_MOVI 0xc7
920 #define I_MOVIR 0xb8
921 #define I_MOVR 0x8b
922 #define I_SHX 0xd3
923 #define I_CMP 0x3b
924 #define I_TST 0x85
925 #define I_LEA 0x8d
926 #define I_NOT 0xf7
927 #define I_CALL 0xff
928 #define I_MUL 0xf7
929 #define I_XOR 0x33
930 #define I_TEST 0x85
931 #define I_CQO 0x99
932 #define I_PUSH 0x50
933 #define I_POP 0x58
935 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
936 #define O2(op) (((op) >> 8) & 0xff)
937 #define O1(op) ((op) & 0xff)
938 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
940 /* for optimizing cmp + jmp */
941 #define OPT_ISCMP() (last_set + 6 == cslen)
942 #define OPT_CCOND() (cs[last_set + 1])
944 static long last_set = -1;
946 static void op_x(int op, int r1, int r2, int bt)
948 int sz = BT_SZ(bt);
949 if (sz == 2)
950 oi(0x66, 1);
951 if (op & 0x10000)
952 oi(O2(op), 1);
953 oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1);
956 #define op_mr op_rm
958 /* op_*(): r=reg, m=mem, i=imm, s=sym */
959 static void op_rm(int op, int src, int base, int off, int bt)
961 int dis = off == (char) off ? 1 : 4;
962 int mod = dis == 4 ? 2 : 1;
963 if (!off && (base & 7) != R_RBP)
964 mod = 0;
965 op_x(op, src, base, bt);
966 oi(MODRM(mod, src & 0x07, base & 0x07), 1);
967 if ((base & 7) == R_RSP)
968 oi(0x24, 1);
969 if (mod)
970 oi(off, dis);
973 static void op_rr(int op, int src, int dst, int bt)
975 op_x(op, src, dst, bt);
976 oi(MODRM(3, src & 0x07, dst & 0x07), 1);
979 #define movrx_bt(bt) (LONGSZ)
981 static int movrx_op(int bt, int mov)
983 int sz = BT_SZ(bt);
984 if (sz == 2)
985 return OP2(0x0f, bt & BT_SIGNED ? 0xbf : 0xb7);
986 if (sz == 1)
987 return OP2(0x0f, bt & BT_SIGNED ? 0xbe : 0xb6);
988 return mov;
991 static void mov_r2r(int r1, int r2, unsigned bt)
993 if (r1 != r2 || BT_SZ(bt) != LONGSZ)
994 op_rr(movrx_op(bt, I_MOV), r1, r2, movrx_bt(bt));
997 static void mov_m2r(int dst, int base, int off, int bt)
999 op_rm(movrx_op(bt, I_MOVR), dst, base, off, movrx_bt(bt));
1002 static void i_zx(int rd, int bits)
1004 if (bits & 0x07) {
1005 i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits);
1006 i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits);
1007 } else {
1008 mov_r2r(rd, rd, bits >> 3);
1012 static void i_sx(int rd, int bits)
1014 mov_r2r(rd, rd, BT_SIGNED | (bits >> 3));
1017 static void i_add(int op, int rd, int rn, int rm)
1019 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
1020 static int rx[] = {0003, 0053, 0043, 0013, 0063};
1021 if (rn != rd)
1022 die("this is cisc!\n");
1023 op_rr(rx[op & 0x0f], rd, rm, LONGSZ);
1026 static void i_add_imm(int op, int rd, int rn, long n)
1028 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
1029 static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
1030 unsigned char s[3] = {0x83, rx[op & 0x0f] | rd, n & 0xff};
1031 if (rn != rd)
1032 die("this is cisc!\n");
1033 os((void *) s, 3);
1036 static int i_decodeable(long imm)
1038 return imm <= 127 && imm >= -128;
1041 static void i_num(int rd, long n)
1043 if (!n) {
1044 op_rr(I_XOR, rd, rd, 4);
1045 } else {
1046 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
1047 oi(n, LONGSZ);
1051 static void i_add_anyimm(int rd, int rn, long n)
1053 op_rm(I_LEA, rd, rn, n, LONGSZ);
1056 static void i_mul(int rd, int rn, int rm)
1058 if (rn != R_RDX)
1059 i_num(R_RDX, 0);
1060 op_rr(I_MUL, 4, rn, LONGSZ);
1063 static void i_div(int op, int rd, int rn, int rm)
1065 if (rn != R_RDX) {
1066 if (op & O_SIGNED)
1067 op_x(I_CQO, R_RAX, R_RDX, LONGSZ);
1068 else
1069 i_num(R_RDX, 0);
1071 op_rr(I_MUL, op & O_SIGNED ? 7 : 6, rn, LONGSZ);
1074 static void i_tst(int rn, int rm)
1076 op_rr(I_TST, rn, rm, LONGSZ);
1079 static void i_cmp(int rn, int rm)
1081 op_rr(I_CMP, rn, rm, LONGSZ);
1084 static void i_cmp_imm(int rn, long n)
1086 unsigned char s[3] = {0x83, 0xf8 | rn, n & 0xff};
1087 os(s, 3);
1090 static void i_set(int op, int rd)
1092 /* lt, gt, le, ge, eq, neq */
1093 static int ucond[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95};
1094 static int scond[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95};
1095 int cond = op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f];
1096 char set[] = "\x0f\x00\xc0";
1097 if (rd != R_RAX)
1098 die("set works only with R_RAX\n");
1099 set[1] = cond;
1100 last_set = cslen;
1101 os(set, 3); /* setl al */
1102 os("\x0f\xb6\xc0", 3); /* movzbl eax, al */
1105 static void i_shl(int op, int rd, int rm, int rs)
1107 int sm = 4;
1108 if ((op & 0x0f) == 1)
1109 sm = op & O_SIGNED ? 7 : 5;
1110 if (rd != rm)
1111 die("this is cisc!\n");
1112 op_rr(I_SHX, sm, rd, LONGSZ);
1115 static void i_shl_imm(int op, int rd, int rn, long n)
1117 int sm = (op & 0x1) ? (op & O_SIGNED ? 0xf8 : 0xe8) : 0xe0 ;
1118 char s[3] = {0xc1, sm | rn, n & 0xff};
1119 if (rd != rn)
1120 die("this is cisc!\n");
1121 os(s, 3);
1124 static void i_mov(int rd, int rn, int bt)
1126 op_rr(movrx_op(bt, I_MOVR), rd, rn, movrx_bt(bt));
1129 static void i_ldr(int l, int rd, int rn, int off, int bt)
1131 if (l)
1132 mov_m2r(rd, rn, off, bt);
1133 else
1134 op_rm(I_MOV, rd, rn, off, bt);
1137 static void i_sym(int rd, char *sym, int off)
1139 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
1140 out_rel(sym, OUT_CS, cslen);
1141 oi(off, LONGSZ);
1144 static void i_neg(int rd)
1146 op_rr(I_NOT, 3, rd, LONGSZ);
1149 static void i_not(int rd)
1151 op_rr(I_NOT, 2, rd, LONGSZ);
1154 static void i_lnot(int rd)
1156 if (OPT_ISCMP()) {
1157 cs[last_set + 1] ^= 0x01;
1158 } else {
1159 char cmp[] = "\x83\xf8\x00";
1160 cmp[1] |= rd;
1161 os(cmp, 3); /* cmp eax, 0 */
1162 i_set(O_EQ, rd);
1166 static void jx(int x, long addr)
1168 char op[2] = {0x0f};
1169 op[1] = x;
1170 os(op, 2); /* jx $addr */
1171 oi(addr - cslen - 4, 4);
1174 static void i_b_if(long addr, int rn, int z)
1176 if (OPT_ISCMP()) {
1177 int cond = OPT_CCOND();
1178 cslen = last_set;
1179 jx((!z ? cond : cond ^ 0x01) & ~0x10, addr);
1180 last_set = -1;
1181 } else {
1182 i_tst(rn, rn);
1183 jx(z ? 0x84 : 0x85, addr);
1187 static void i_b(long addr)
1189 os("\xe9", 1); /* jmp $addr */
1190 oi(addr - cslen - 4, 4);
1193 static void i_b_fill(long *dst, int diff)
1195 putint((void *) dst, diff - 4, 4);
1198 static void i_call_reg(int rd)
1200 op_rr(I_CALL, 2, rd, LONGSZ);
1203 static void i_call(char *sym, int off)
1205 os("\xe8", 1); /* call $x */
1206 out_rel(sym, OUT_CS | OUT_REL, cslen);
1207 oi(-4 + off, 4);
1210 static void i_prolog(void)
1212 last_set = -1;
1213 os("\x55", 1); /* push rbp */
1214 os("\x89\xe5", 2); /* mov rbp, rsp */
1215 os("\x53\x56\x57", 3); /* push rbx; push rsi; push rdi */
1216 os("\x81\xec", 2); /* sub rsp, $xxx */
1217 func_fpsub = cslen;
1218 oi(0, 4);
1221 static void i_epilog(void)
1223 int diff = ALIGN(sp_max - 3 * LONGSZ, LONGSZ);
1224 if (diff) {
1225 os("\x81\xc4", 2); /* add $xxx, %esp */
1226 oi(diff, 4);
1227 putint(cs + func_fpsub, diff, 4);
1229 os("\x5f\x5e\x5b", 3); /* pop edi; pop esi; pop ebx */
1230 os("\xc9\xc3", 2); /* leave; ret; */