From 7d0f092898fc1d7138aaff4a1b2186674481498a Mon Sep 17 00:00:00 2001 From: Ali Gholami Rudi Date: Fri, 23 Dec 2011 11:25:08 +0330 Subject: [PATCH] extract architecture independent gen.c This is actually the next version of neatcc. Previously, code generation was done independently for each architecture. That included handling different instructions, optimizing instructions using immediate, constant inlining, handling jumps, locals, stack and temporaries, in addition to a separate optimization patch for each architecture, which was separated to make the complexity more manageable. I was satisfied with the design when there were only one architecture, but with three architectures and an optimization patch for each, things started to look awful. This patch extracts the architecture-independent gen.c from the old code generation files. The new layout of neatcc is: 1. cpp.c preprocessing 2. tok.c tokenizing 3. ncc.c parsing 4. gen.c architecture independent code generation and optimization 5. x86.c architecture dependent code generation 6. out.c output executable file It has many benefits: * Architecture-dependent files are much shorter and easier to maintain, so adding new architectures or tweaking instructions for old architectures are much easier now. * All architectures are maintained on the same branch; this makes distributing neatcc as a single package much easier and without depending on git. * The architecture-independent gen.c now leaves a lot of room for optimizations. neatcc's two pass optimization, which used to be a hack and left as a separate patch, is now an effective optimization method. A lot of improvements are possible. --- Makefile | 12 +- README | 8 +- arm.c | 2060 ++++++++++++++++++++------------------------------------------ arm.h | 18 + cpp.c | 70 ++- gen.c | 1080 ++++++++++++++++++++++++++++++++ gen.h | 109 +++- ncc.c | 45 +- neatcc | 2 +- out.c | 184 ++++-- out.h | 6 +- tok.h | 1 + x64.c | 595 ++++++++++++++++++ x64.h | 21 + x86.c | 1857 +++++++++++++++++-------------------------------------- x86.h | 18 + 16 files changed, 3270 insertions(+), 2816 deletions(-) rewrite arm.c (69%) create mode 100644 arm.h create mode 100644 gen.c create mode 100644 x64.c create mode 100644 x64.h rewrite x86.c (81%) create mode 100644 x86.h diff --git a/Makefile b/Makefile index 5c59fef..0d06a4f 100644 --- a/Makefile +++ b/Makefile @@ -2,9 +2,13 @@ #ARCH = -DNEATCC_ARM #GEN = arm.o -# for x86 build -ARCH = -DNEATCC_X86 -GEN = x86.o +# for x86_64 build +#ARCH = -DNEATCC_X86 +#GEN = x86.o + +# for x86_64 build +ARCH = -DNEATCC_X64 +GEN = x64.o CC = cc CFLAGS = -Wall -O2 $(ARCH) @@ -13,7 +17,7 @@ LDFLAGS = all: ncc npp .c.o: $(CC) -c $(CFLAGS) $< -ncc: ncc.o tok.o $(GEN) out.o cpp.o tab.o +ncc: ncc.o tok.o $(GEN) out.o cpp.o tab.o gen.o $(CC) -o $@ $^ $(LDFLAGS) npp: npp.o cpp.o tab.o $(CC) -o $@ $^ $(LDFLAGS) diff --git a/README b/README index f5207df..6059dad 100644 --- a/README +++ b/README @@ -1,15 +1,15 @@ NEATCC ====== -neatcc is a simple ARM/x86 C compiler. I wrote it to complete my -development toolchain (see http://litcave.berlios.de/): +neatcc is a simple ARM/x86(_64) C compiler. I wrote it to complete my +development toolchain (see http://litcave.rudi.ir/): * cc: git://repo.or.cz/neatcc.git * ld: git://repo.or.cz/ld.git * as: git://repo.or.cz/neatas.git * libc: git://repo.or.cz/neatlibc.git -* gdb: http://litcave.berlios.de/coredump.tar.gz - http://litcave.berlios.de/elfloc.tar.gz +* gdb: http://litcave.rudi.ir/coredump.tar.gz + http://litcave.rudi.ir/elfloc.tar.gz neatcc supports a large subset of C language but lacks some of the features that I didn't use much myself. The most important missing diff --git a/arm.c b/arm.c dissimilarity index 69% index 10ec29b..242d808 100644 --- a/arm.c +++ b/arm.c @@ -1,1393 +1,667 @@ -#include -#include -#include -#include "gen.h" -#include "out.h" -#include "tok.h" - -#define LOC_REG 0x01 -#define LOC_MEM 0x02 -#define LOC_NUM 0x04 -#define LOC_SYM 0x08 -#define LOC_LOCAL 0x10 - -#define NREGS 16 - -#define REG_PC 15 /* program counter */ -#define REG_LR 14 /* link register */ -#define REG_SP 13 /* stack pointer */ -#define REG_TMP 12 /* temporary register */ -#define REG_FP 11 /* frame pointer register */ -#define REG_DP 10 /* data pointer register */ -#define REG_RET 0 /* returned value register */ -#define REG_FORK 0 /* result of conditional branches */ - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) -void err(char *msg); - -static char cs[SECSIZE]; /* code segment */ -static int cslen; -static char ds[SECSIZE]; /* data segment */ -static int dslen; -static long bsslen; /* bss segment size */ - -static long sp; -static long func_beg; -static long maxsp; - -#define TMP(i) (((i) < ntmp) ? &tmps[ntmp - 1 - (i)] : NULL) - -static struct tmp { - long addr; - char sym[NAMELEN]; - long off; /* offset from a symbol or a local */ - unsigned loc; /* variable location */ - unsigned bt; /* type of address; zero when not a pointer */ -} tmps[MAXTMP]; -static int ntmp; - -static int tmpsp; - -/* arch-specific functions */ -static void i_ldr(int l, int rd, int rn, int off, int bt); -static void i_mov(int rd, int rn); -static void i_add(int op, int rd, int rn, int rm); -static void i_shl(int op, int rd, int rm, int rs); -static void i_mul(int rd, int rn, int rm); -static void i_cmp(int rn, int rm); -static int i_decodeable(long imm); -static void i_add_imm(int op, int rd, int rn, long n); -static void i_shl_imm(int op, int rd, int rn, long n); -static void i_cmp_imm(int rn, long n); -static void i_add_anyimm(int rd, int rn, long n); -static void i_num(int rd, long n); -static void i_sym(int rd, char *sym, int off); -static void i_set(int op, int rd); -static void i_neg(int rd); -static void i_not(int rd); -static void i_lnot(int rd); -static void i_zx(int rd, int bits); -static void i_sx(int rd, int bits); -static void i_b(void); -static void i_bz(int rn, int z); -static void i_b_fill(long src, long dst); -static void i_memcpy(int rd, int rs, int rn); -static void i_memset(int rd, int rs, int rn); -static void i_call(char *sym, int off); -static void i_call_reg(int rd); -static void i_prolog(void); -static void i_epilog(void); - -static struct tmp *regs[NREGS]; -static int tmpregs[] = {4, 5, 6, 7, 8, 9, 0, 1, 2, 3}; -static int argregs[] = {0, 1, 2, 3}; - -/* labels and jmps */ -#define MAXJMPS (1 << 14) - -static long labels[MAXJMPS]; -static int nlabels; -static long jmp_loc[MAXJMPS]; -static int jmp_goal[MAXJMPS]; -static int njmps; - -void o_label(int id) -{ - if (id > nlabels) - nlabels = id + 1; - labels[id] = cslen; -} - -static void jmp_add(int id) -{ - if (njmps >= MAXJMPS) - err("nomem: MAXJMPS reached!\n"); - jmp_loc[njmps] = cslen - 4; - jmp_goal[njmps] = id; - njmps++; -} - -static void jmp_fill(void) -{ - int i; - for (i = 0; i < njmps; i++) - i_b_fill(jmp_loc[i], labels[jmp_goal[i]]); -} - -/* output div/mod functions */ -static int putdiv = 0; - -/* generating code */ - -static void os(void *s, int n) -{ - memcpy(cs + cslen, s, n); - cslen += n; -} - -static void oi(long n) -{ - *(int *) (cs + cslen) = n; - cslen += 4; -} - -static long sp_push(int size) -{ - sp += size; - if (sp > maxsp) - maxsp = sp; - return sp; -} - -static void tmp_mem(struct tmp *tmp) -{ - int src = tmp->addr; - if (!(tmp->loc == LOC_REG)) - return; - if (tmpsp == -1) - tmpsp = sp; - tmp->addr = -sp_push(LONGSZ); - i_ldr(0, src, REG_FP, tmp->addr, LONGSZ); - regs[src] = NULL; - tmp->loc = LOC_MEM; -} - -static void num_cast(struct tmp *t, unsigned bt) -{ - if (!(bt & BT_SIGNED) && BT_SZ(bt) != LONGSZ) - t->addr &= ((1l << (long) (BT_SZ(bt) * 8)) - 1); - if (bt & BT_SIGNED && BT_SZ(bt) != LONGSZ && - t->addr > (1l << (BT_SZ(bt) * 8 - 1))) - t->addr = -((1l << (BT_SZ(bt) * 8)) - t->addr); -} - -static void tmp_reg(struct tmp *tmp, int dst, int deref) -{ - int bt = tmp->bt; - if (!tmp->bt) - deref = 0; - if (deref) - tmp->bt = 0; - if (tmp->loc == LOC_NUM) { - i_num(dst, tmp->addr); - tmp->addr = dst; - regs[dst] = tmp; - tmp->loc = LOC_REG; - } - if (tmp->loc == LOC_SYM) { - i_sym(dst, tmp->sym, tmp->off); - tmp->addr = dst; - regs[dst] = tmp; - tmp->loc = LOC_REG; - } - if (tmp->loc == LOC_REG) { - if (deref) - i_ldr(1, dst, tmp->addr, 0, bt); - else if (dst != tmp->addr) - i_mov(dst, tmp->addr); - regs[tmp->addr] = NULL; - } - if (tmp->loc == LOC_LOCAL) { - if (deref) - i_ldr(1, dst, REG_FP, tmp->addr + tmp->off, bt); - else - i_add_anyimm(dst, REG_FP, tmp->addr + tmp->off); - } - if (tmp->loc == LOC_MEM) { - i_ldr(1, dst, REG_FP, tmp->addr, LONGSZ); - if (deref) - i_ldr(1, dst, dst, 0, bt); - } - tmp->addr = dst; - regs[dst] = tmp; - tmp->loc = LOC_REG; -} - -static void reg_free(int reg) -{ - int i; - if (!regs[reg]) - return; - for (i = 0; i < ARRAY_SIZE(tmpregs); i++) - if (!regs[tmpregs[i]]) { - tmp_reg(regs[reg], tmpregs[i], 0); - return; - } - tmp_mem(regs[reg]); -} - -static void reg_for(int reg, struct tmp *t) -{ - if (regs[reg] && regs[reg] != t) - reg_free(reg); -} - -static void tmp_mv(struct tmp *t, int reg) -{ - reg_for(reg, t); - tmp_reg(t, reg, 0); -} - -static void tmp_to(struct tmp *t, int reg) -{ - reg_for(reg, t); - tmp_reg(t, reg, 1); -} - -static void tmp_drop(int n) -{ - int i; - for (i = ntmp - n; i < ntmp; i++) - if (tmps[i].loc == LOC_REG) - regs[tmps[i].addr] = NULL; - ntmp -= n; -} - -static void tmp_pop(int reg) -{ - struct tmp *t = TMP(0); - tmp_to(t, reg); - tmp_drop(1); -} - -static struct tmp *tmp_new(void) -{ - return &tmps[ntmp++]; -} - -static void tmp_push(int reg) -{ - struct tmp *t = tmp_new(); - t->addr = reg; - t->bt = 0; - t->loc = LOC_REG; - regs[reg] = t; -} - -void o_local(long addr) -{ - struct tmp *t = tmp_new(); - t->addr = -addr; - t->loc = LOC_LOCAL; - t->bt = 0; - t->off = 0; -} - -void o_num(long num) -{ - struct tmp *t = tmp_new(); - t->addr = num; - t->bt = 0; - t->loc = LOC_NUM; -} - -void o_sym(char *name) -{ - struct tmp *t = tmp_new(); - strcpy(t->sym, name); - t->loc = LOC_SYM; - t->bt = 0; - t->off = 0; -} - -void o_tmpdrop(int n) -{ - if (n == -1 || n > ntmp) - n = ntmp; - tmp_drop(n); - if (!ntmp) { - if (tmpsp != -1) - sp = tmpsp; - tmpsp = -1; - } -} - -/* make sure tmps remain intact after a conditional expression */ -void o_fork(void) -{ - int i; - for (i = 0; i < ntmp - 1; i++) - tmp_mem(&tmps[i]); -} - -void o_forkpush(void) -{ - tmp_pop(REG_FORK); -} - -void o_forkjoin(void) -{ - tmp_push(REG_FORK); -} - -void o_tmpswap(void) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - struct tmp t; - memcpy(&t, t1, sizeof(t)); - memcpy(t1, t2, sizeof(t)); - memcpy(t2, &t, sizeof(t)); - if (t1->loc == LOC_REG) - regs[t1->addr] = t1; - if (t2->loc == LOC_REG) - regs[t2->addr] = t2; -} - -static int reg_get(int mask) -{ - int i; - for (i = 0; i < ARRAY_SIZE(tmpregs); i++) - if ((1 << tmpregs[i]) & mask && !regs[tmpregs[i]]) - return tmpregs[i]; - for (i = 0; i < ARRAY_SIZE(tmpregs); i++) - if ((1 << tmpregs[i]) & mask) { - reg_free(tmpregs[i]); - return tmpregs[i]; - } - return 0; -} - -static int reg_fortmp(struct tmp *t, int notmask) -{ - if (t->loc == LOC_REG && !(notmask & (1 << t->addr))) - return t->addr; - return reg_get(~notmask); -} - -static void tmp_copy(struct tmp *t1) -{ - struct tmp *t2 = tmp_new(); - memcpy(t2, t1, sizeof(*t1)); - if (!(t1->loc & (LOC_REG | LOC_MEM))) - return; - if (t1->loc == LOC_MEM) { - tmp_mv(t2, reg_get(~0)); - } else if (t1->loc == LOC_REG) { - t2->addr = reg_fortmp(t2, 1 << t1->addr); - i_mov(t2->addr, t1->addr); - regs[t2->addr] = t2; - } -} - -void o_tmpcopy(void) -{ - tmp_copy(TMP(0)); -} - -void o_cast(unsigned bt) -{ - struct tmp *t = TMP(0); - if (!t->bt && t->loc == LOC_NUM) { - num_cast(t, bt); - return; - } - if (BT_SZ(bt) != LONGSZ) { - int reg = reg_fortmp(t, 0); - tmp_to(t, reg); - if (bt & BT_SIGNED) - i_sx(reg, BT_SZ(bt) * 8); - else - i_zx(reg, BT_SZ(bt) * 8); - } -} - -void o_func_beg(char *name, int argc, int global, int vararg) -{ - out_sym(name, (global ? OUT_GLOB : 0) | OUT_CS, cslen, 0); - i_prolog(); - sp = 0; - maxsp = sp; - ntmp = 0; - tmpsp = -1; - nlabels = 0; - njmps = 0; - memset(regs, 0, sizeof(regs)); -} - -void o_deref(unsigned bt) -{ - struct tmp *t = TMP(0); - if (t->bt) - tmp_to(t, reg_fortmp(t, 0)); - t->bt = bt; -} - -void o_load(void) -{ - struct tmp *t = TMP(0); - tmp_to(t, reg_fortmp(t, 0)); -} - -#define TMP_NUM(t) ((t)->loc == LOC_NUM && !(t)->bt) -#define LOCAL_PTR(t) ((t)->loc == LOC_LOCAL && !(t)->bt) -#define SYM_PTR(t) ((t)->loc == LOC_SYM && !(t)->bt) - -int o_popnum(long *c) -{ - struct tmp *t = TMP(0); - if (!TMP_NUM(t)) - return 1; - *c = t->addr; - tmp_drop(1); - return 0; -} - -void o_ret(int rets) -{ - if (rets) - tmp_pop(REG_RET); - else - i_num(REG_RET, 0); - o_jmp(0); -} - -void o_func_end(void) -{ - o_label(0); - jmp_fill(); - i_epilog(); -} - -long o_mklocal(int size) -{ - return sp_push(ALIGN(size, LONGSZ)); -} - -void o_rmlocal(long addr, int sz) -{ - sp = addr - sz; -} - -long o_arg2loc(int i) -{ - return -(10 + i) << 2; -} - -void o_assign(unsigned bt) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - int r1 = reg_fortmp(t1, 0); - int r2 = reg_fortmp(t2, 1 << r1); - int off = 0; - tmp_to(t1, r1); - if (t2->bt) - tmp_to(t2, r2); - if (t2->loc == LOC_LOCAL) { - r2 = REG_FP; - off = t2->addr + t2->off; - } else { - tmp_to(t2, r2); - } - tmp_drop(2); - i_ldr(0, r1, r2, off, bt); - tmp_push(r1); -} - -static long cu(int op, long i) -{ - switch (op & 0xff) { - case O_NEG: - return -i; - case O_NOT: - return ~i; - case O_LNOT: - return !i; - } - return 0; -} - -static int c_uop(int op) -{ - struct tmp *t1 = TMP(0); - if (!TMP_NUM(t1)) - return 1; - tmp_drop(1); - o_num(cu(op, t1->addr)); - return 0; -} - -static long cb(int op, long a, long b) -{ - switch (op & 0xff) { - case O_ADD: - return a + b; - case O_SUB: - return a - b; - case O_AND: - return a & b; - case O_OR: - return a | b; - case O_XOR: - return a ^ b; - case O_MUL: - return a * b; - case O_DIV: - return a / b; - case O_MOD: - return a % b; - case O_SHL: - return a << b; - case O_SHR: - if (op & O_SIGNED) - return a >> b; - else - return (unsigned long) a >> b; - case O_LT: - return a < b; - case O_GT: - return a > b; - case O_LE: - return a <= b; - case O_GE: - return a >= b; - case O_EQ: - return a == b; - case O_NEQ: - return a != b; - } - return 0; -} - -static int c_bop(int op) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - int locals = LOCAL_PTR(t1) + LOCAL_PTR(t2); - int syms = SYM_PTR(t1) + SYM_PTR(t2); - int nums = TMP_NUM(t1) + TMP_NUM(t2); - if (syms + locals == 2 || syms + nums + locals != 2) - return 1; - if (nums == 1) - if ((op & 0xff) != O_ADD && ((op & 0xff) != O_SUB || TMP_NUM(t2))) - return 1; - if (nums == 1) { - long o1 = TMP_NUM(t1) ? t1->addr : t1->off; - long o2 = TMP_NUM(t2) ? t2->addr : t2->off; - long ret = cb(op, o2, o1); - if (!TMP_NUM(t1)) - o_tmpswap(); - t2->off = ret; - tmp_drop(1); - } else { - long ret = cb(op, t2->addr, t1->addr); - tmp_drop(2); - o_num(ret); - } - return 0; -} - -void o_uop(int op) -{ - int r1 = reg_fortmp(TMP(0), 0); - if (!c_uop(op)) - return; - tmp_to(TMP(0), r1); - switch (op & 0xff) { - case O_NEG: - i_neg(r1); - break; - case O_NOT: - i_not(r1); - break; - case O_LNOT: - i_lnot(r1); - break; - } -} - -static void bin_regs(int *r1, int *r2) -{ - struct tmp *t2 = TMP(0); - struct tmp *t1 = TMP(1); - *r2 = reg_fortmp(t2, 0); - tmp_to(t2, *r2); - *r1 = reg_fortmp(t1, 1 << *r2); - tmp_pop(*r2); - tmp_pop(*r1); -} - -static int bop_imm(int *r1, long *n, int swap) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - if (!TMP_NUM(t1) && (!swap || !TMP_NUM(t2))) - return 1; - *n = TMP_NUM(t1) ? t1->addr : t2->addr; - if (!i_decodeable(*n)) - return 1; - if (!TMP_NUM(t1)) - o_tmpswap(); - *r1 = reg_fortmp(t2, 0); - tmp_drop(1); - tmp_pop(*r1); - return 0; -} - -static void bin_add(int op) -{ - int r1, r2; - long n; - if (!bop_imm(&r1, &n, (op & 0xff) != O_SUB)) { - i_add_imm(op, r1, r1, n); - } else { - bin_regs(&r1, &r2); - i_add(op, r1, r1, r2); - } - tmp_push(r1); -} - -static void bin_shx(int op) -{ - int r1, r2; - long n; - if (!bop_imm(&r1, &n, 0)) { - i_shl_imm(op, r1, r1, n); - } else { - bin_regs(&r1, &r2); - i_shl(op, r1, r1, r2); - } - tmp_push(r1); -} - -static int log2a(unsigned long n) -{ - int i = 0; - for (i = 0; i < LONGSZ * 8; i++) - if (n & (1u << i)) - break; - if (i == LONGSZ * 8 || !(n >> (i + 1))) - return i; - return -1; -} - -/* optimized version of mul/div/mod for powers of two */ -static int mul_2(int op) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - long n; - int r2; - int p; - if ((op & 0xff) == O_MUL && t2->loc == LOC_NUM && !t2->bt) - o_tmpswap(); - if (t1->loc != LOC_NUM || t1->bt) - return 1; - n = t1->addr; - p = log2a(n); - if (n && p == -1) - return 1; - if ((op & 0xff) == O_MUL) { - tmp_drop(1); - if (n == 1) - return 0; - if (n == 0) { - tmp_drop(1); - o_num(0); - return 0; - } - r2 = reg_fortmp(t2, 0); - tmp_to(t2, r2); - i_shl_imm(O_SHL, r2, r2, p); - return 0; - } - if (op == O_DIV) { - tmp_drop(1); - if (n == 1) - return 0; - r2 = reg_fortmp(t2, 0); - tmp_to(t2, r2); - i_shl_imm((op & O_SIGNED) | O_SHR, r2, r2, p); - return 0; - } - if (op == O_MOD) { - tmp_drop(1); - if (n == 1) { - tmp_drop(1); - o_num(0); - return 0; - } - r2 = reg_fortmp(t2, 0); - tmp_to(t2, r2); - i_zx(r2, p); - return 0; - } - return 1; -} - -static void bin_div(int op) -{ - struct tmp *t2 = TMP(0); - struct tmp *t1 = TMP(1); - char *func; - int i; - putdiv = 1; - if ((op & 0xff) == O_DIV) - func = op & O_SIGNED ? "__divdi3" : "__udivdi3"; - else - func = op & O_SIGNED ? "__moddi3" : "__umoddi3"; - for (i = 0; i < ARRAY_SIZE(argregs); i++) - if (regs[argregs[i]] && regs[argregs[i]] - tmps < ntmp - 2) - tmp_mem(regs[argregs[i]]); - tmp_to(t1, argregs[0]); - tmp_to(t2, argregs[1]); - tmp_drop(2); - i_call(func, 0); - tmp_push(REG_RET); -} - -static void bin_mul(int op) -{ - int r1, r2; - if (!mul_2(op)) - return; - if ((op & 0xff) == O_DIV || (op & 0xff) == O_MOD) { - bin_div(op); - } else { - bin_regs(&r1, &r2); - i_mul(r1, r1, r2); - tmp_push(r1); - } -} - -static void bin_cmp(int op) -{ - int r1, r2; - long n; - if (!bop_imm(&r1, &n, (op & 0xff) == O_EQ || (op & 0xff) == O_NEQ)) { - i_cmp_imm(r1, n); - } else { - bin_regs(&r1, &r2); - i_cmp(r1, r2); - } - i_set(op, r1); - tmp_push(r1); -} - -void o_bop(int op) -{ - if (!c_bop(op)) - return; - if ((op & 0xf0) == 0x00) - bin_add(op); - if ((op & 0xf0) == 0x10) - bin_shx(op); - if ((op & 0xf0) == 0x20) - bin_mul(op); - if ((op & 0xf0) == 0x30) - bin_cmp(op); -} - -static void load_regs2(int *r0, int *r1, int *r2) -{ - struct tmp *t0 = TMP(0); - struct tmp *t1 = TMP(1); - struct tmp *t2 = TMP(2); - *r0 = reg_fortmp(t0, 0); - *r1 = reg_fortmp(t1, 1 << *r0); - *r2 = reg_fortmp(t2, (1 << *r0) | (1 << *r1)); - tmp_to(t0, *r0); - tmp_to(t1, *r1); - tmp_to(t2, *r2); -} - -void o_memcpy(void) -{ - int rd, rs, rn; - load_regs2(&rn, &rs, &rd); - i_memcpy(rd, rs, rn); - tmp_drop(2); -} - -void o_memset(void) -{ - int rd, rs, rn; - load_regs2(&rn, &rs, &rd); - i_memset(rd, rs, rn); - tmp_drop(2); -} - -static void jxz(int id, int z) -{ - int r = reg_fortmp(TMP(0), 0); - tmp_pop(r); - i_bz(r, z); - jmp_add(id); -} - -void o_jz(int id) -{ - jxz(id, 1); -} - -void o_jnz(int id) -{ - jxz(id, 0); -} - -void o_jmp(int id) -{ - i_b(); - jmp_add(id); -} - -void o_call(int argc, int rets) -{ - struct tmp *t; - int i; - int aregs = MIN(ARRAY_SIZE(argregs), argc); - for (i = 0; i < ARRAY_SIZE(argregs); i++) - if (regs[argregs[i]] && regs[argregs[i]] - tmps < ntmp - argc) - tmp_mem(regs[argregs[i]]); - if (argc > aregs) { - sp_push(LONGSZ * (argc - aregs)); - for (i = argc - 1; i >= aregs; --i) { - int reg = reg_fortmp(TMP(0), 0); - tmp_pop(reg); - i_ldr(0, reg, REG_SP, (i - aregs) * LONGSZ, LONGSZ); - } - } - for (i = aregs - 1; i >= 0; --i) - tmp_to(TMP(aregs - i - 1), argregs[i]); - tmp_drop(aregs); - t = TMP(0); - if (t->loc == LOC_SYM && !t->bt) { - i_call(t->sym, t->off); - tmp_drop(1); - } else { - int reg = t->loc == LOC_REG ? t->addr : REG_TMP; - tmp_pop(reg); - i_call_reg(reg); - } - if (rets) - tmp_push(REG_RET); -} - -void o_mkbss(char *name, int size, int global) -{ - out_sym(name, OUT_BSS | (global ? OUT_GLOB : 0), bsslen, size); - bsslen += ALIGN(size, OUT_ALIGNMENT); -} - -#define MAXDATS (1 << 10) -static char dat_names[MAXDATS][NAMELEN]; -static int dat_offs[MAXDATS]; -static int ndats; - -void *o_mkdat(char *name, int size, int global) -{ - void *addr = ds + dslen; - int idx = ndats++; - if (idx >= MAXDATS) - err("nomem: MAXDATS reached!\n"); - strcpy(dat_names[idx], name); - dat_offs[idx] = dslen; - out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), dslen, size); - dslen += ALIGN(size, OUT_ALIGNMENT); - return addr; -} - -static int dat_off(char *name) -{ - int i; - for (i = 0; i < ndats; i++) - if (!strcmp(name, dat_names[i])) - return dat_offs[i]; - return 0; -} - -void o_datset(char *name, int off, unsigned bt) -{ - struct tmp *t = TMP(0); - int sym_off = dat_off(name) + off; - if (t->loc == LOC_NUM && !t->bt) { - num_cast(t, bt); - memcpy(ds + sym_off, &t->addr, BT_SZ(bt)); - } - if (t->loc == LOC_SYM && !t->bt) { - out_rel(t->sym, OUT_DS, sym_off); - memcpy(ds + sym_off, &t->off, BT_SZ(bt)); - } - tmp_drop(1); -} - -/* compiled division functions; div.s contains the source */ -static int udivdi3[] = { - 0xe3a02000, 0xe3a03000, 0xe1110001, 0x0a00000a, - 0xe1b0c211, 0xe2822001, 0x5afffffc, 0xe3a0c001, - 0xe2522001, 0x4a000004, 0xe1500211, 0x3afffffb, - 0xe0400211, 0xe083321c, 0xeafffff8, 0xe1a01000, - 0xe1a00003, 0xe1a0f00e, -}; -static int umoddi3[] = { - 0xe92d4000, 0xebffffeb, 0xe1a00001, 0xe8bd8000, -}; -static int divdi3[] = { - 0xe92d4030, 0xe1a04000, 0xe1a05001, 0xe1100000, - 0x42600000, 0xe1110001, 0x42611000, 0xebffffe1, - 0xe1340005, 0x42600000, 0xe1140004, 0x42611000, - 0xe8bd8030, -}; -static int moddi3[] = { - 0xe92d4000, 0xebfffff0, 0xe1a00001, 0xe8bd8000, -}; - -void o_write(int fd) -{ - if (putdiv) { - out_sym("__udivdi3", OUT_CS, cslen, 0); - os(udivdi3, sizeof(udivdi3)); - out_sym("__umoddi3", OUT_CS, cslen, 0); - os(umoddi3, sizeof(umoddi3)); - out_sym("__divdi3", OUT_CS, cslen, 0); - os(divdi3, sizeof(divdi3)); - out_sym("__moddi3", OUT_CS, cslen, 0); - os(moddi3, sizeof(moddi3)); - } - out_write(fd, cs, cslen, ds, dslen); -} - -/* ARM arch specific functions */ - -#define I_AND 0x00 -#define I_EOR 0x01 -#define I_SUB 0x02 -#define I_RSB 0x03 -#define I_ADD 0x04 -#define I_TST 0x08 -#define I_CMP 0x0a -#define I_ORR 0x0c -#define I_MOV 0x0d -#define I_MVN 0x0f - -/* for optimizing cmp + bcc */ -#define OPT_ISCMP() (last_cmp + 12 == cslen && last_set + 4 == cslen) -#define OPT_CCOND() (*(unsigned int *) ((void *) cs + last_set) >> 28) - -static long last_cmp = -1; -static long last_set = -1; - -#define MAXNUMS 1024 - -/* data pool */ -static long num_offs[MAXNUMS]; /* data immediate value */ -static char num_names[MAXNUMS][NAMELEN]; /* relocation data symbol name */ -static int nums; - -static int pool_find(char *name, int off) -{ - int i; - for (i = 0; i < nums; i++) - if (!strcmp(name, num_names[i]) && off == num_offs[i]) - return i; - return -1; -} - -static int pool_num(long num) -{ - int idx = pool_find("", num); - if (idx < 0) { - idx = nums++; - num_offs[idx] = num; - num_names[idx][0] = '\0'; - } - return idx << 2; -} - -static int pool_reloc(char *name, long off) -{ - int idx = pool_find(name, off); - if (idx < 0) { - idx = nums++; - num_offs[idx] = off; - strcpy(num_names[idx], name); - } - return idx << 2; -} - -static void pool_write(void) -{ - int i; - for (i = 0; i < nums; i++) { - if (num_names[i]) - out_rel(num_names[i], OUT_CS, cslen); - oi(num_offs[i]); - } -} - -/* - * data processing: - * +---------------------------------------+ - * |COND|00|I| op |S| Rn | Rd | operand2 | - * +---------------------------------------+ - * - * S: set condition code - * Rn: first operand - * Rd: destination operand - * - * I=0 operand2=| shift | Rm | - * I=1 operand2=|rota| imm | - */ -#define ADD(op, rd, rn, s, i, cond) \ - (((cond) << 28) | ((i) << 25) | ((s) << 20) | \ - ((op) << 21) | ((rn) << 16) | ((rd) << 12)) - -static int add_encimm(unsigned n) -{ - int i = 0; - while (i < 12 && (n >> ((4 + i) << 1))) - i++; - return (n >> (i << 1)) | (((16 - i) & 0x0f) << 8); -} - -static unsigned add_decimm(int n) -{ - int rot = (16 - ((n >> 8) & 0x0f)) & 0x0f; - return (n & 0xff) << (rot << 1); -} - -static int add_rndimm(unsigned n) -{ - int rot = (n >> 8) & 0x0f; - int num = n & 0xff; - if (rot == 0) - return n; - if (num == 0xff) { - num = 0; - rot = (rot + 12) & 0x0f; - } - return ((num + 1) & 0xff) | (rot << 8); -} - -static int opcode_add(int op) -{ - /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */ - static int rx[] = {I_ADD, I_SUB, I_AND, I_ORR, I_EOR}; - return rx[op & 0x0f]; -} - -static void i_add(int op, int rd, int rn, int rm) -{ - oi(ADD(opcode_add(op), rd, rn, 0, 0, 14) | rm); -} - -static int i_decodeable(long imm) -{ - return add_decimm(add_encimm(imm)) == imm; -} - -static void i_add_imm(int op, int rd, int rn, long n) -{ - oi(ADD(opcode_add(op), rd, rn, 0, 1, 14) | add_encimm(n)); -} - -static void i_num(int rd, long n) -{ - int enc = add_encimm(n); - if (n == add_decimm(enc)) { - oi(ADD(I_MOV, rd, 0, 0, 1, 14) | enc); - return; - } - enc = add_encimm(-n - 1); - if (~n == add_decimm(enc)) { - oi(ADD(I_MVN, rd, 0, 0, 1, 14) | enc); - return; - } - i_ldr(1, rd, REG_DP, pool_num(n), LONGSZ); -} - -static void i_add_anyimm(int rd, int rn, long n) -{ - int neg = n < 0; - int imm = add_encimm(neg ? -n : n); - if (imm == add_decimm(neg ? -n : n)) { - oi(ADD(neg ? I_SUB : I_ADD, rd, rn, 0, 1, 14) | imm); - } else { - i_num(rd, n); - i_add(O_ADD, rd, rd, rn); - } -} - -/* - * multiply - * +----------------------------------------+ - * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm | - * +----------------------------------------+ - * - * Rd: destination - * A: accumulate - * C: set condition codes - * - * I=0 operand2=| shift | Rm | - * I=1 operand2=|rota| imm | - */ -#define MUL(rd, rn, rs) \ - ((14 << 28) | ((rd) << 16) | ((0) << 12) | ((rn) << 8) | ((9) << 4) | (rm)) - -static void i_mul(int rd, int rn, int rm) -{ - oi(MUL(rd, rn, rm)); -} - -static int opcode_set(int op) -{ - /* lt, gt, le, ge, eq, neq */ - static int ucond[] = {3, 8, 9, 2, 0, 1}; - static int scond[] = {11, 12, 13, 10, 0, 1}; - return op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f]; -} - -static void i_tst(int rn, int rm) -{ - oi(ADD(I_TST, 0, rn, 1, 0, 14) | rm); -} - -static void i_cmp(int rn, int rm) -{ - last_cmp = cslen; - oi(ADD(I_CMP, 0, rn, 1, 0, 14) | rm); -} - -static void i_cmp_imm(int rn, long n) -{ - last_cmp = cslen; - oi(ADD(I_CMP, 0, rn, 1, 1, 14) | add_encimm(n)); -} - -static void i_set(int cond, int rd) -{ - oi(ADD(I_MOV, rd, 0, 0, 1, 14)); - last_set = cslen; - oi(ADD(I_MOV, rd, 0, 0, 1, opcode_set(cond)) | 1); -} - -#define SM_LSL 0 -#define SM_LSR 1 -#define SM_ASR 2 - -static int opcode_shl(int op) -{ - if (op & 0x0f) - return op & O_SIGNED ? SM_ASR : SM_LSR; - return SM_LSL; -} - -static void i_shl(int op, int rd, int rm, int rs) -{ - int sm = opcode_shl(op); - oi(ADD(I_MOV, rd, 0, 0, 0, 14) | (rs << 8) | (sm << 5) | (1 << 4) | rm); -} - -static void i_shl_imm(int op, int rd, int rn, long n) -{ - int sm = opcode_shl(op); - oi(ADD(I_MOV, rd, 0, 0, 0, 14) | (n << 7) | (sm << 5) | rn); -} - -static void i_mov(int rd, int rn) -{ - oi(ADD(I_MOV, rd, 0, 0, 0, 14) | rn); -} - -/* - * single data transfer: - * +------------------------------------------+ - * |COND|01|I|P|U|B|W|L| Rn | Rd | offset | - * +------------------------------------------+ - * - * I: immediate/offset - * P: post/pre indexing - * U: down/up - * B: byte/word - * W: writeback - * L: store/load - * Rn: base register - * Rd: source/destination register - * - * I=0 offset=| immediate | - * I=1 offset=| shift | Rm | - * - * halfword and signed data transfer - * +----------------------------------------------+ - * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm | - * +----------------------------------------------+ - * - * +----------------------------------------------+ - * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2| - * +----------------------------------------------+ - * - * S: singed - * H: halfword - */ -#define LDR(l, rd, rn, b, u, p, w) \ - ((14 << 28) | (1 << 26) | ((p) << 24) | ((b) << 22) | ((u) << 23) | \ - ((w) << 21) | ((l) << 20) | ((rn) << 16) | ((rd) << 12)) -#define LDRH(l, rd, rn, s, h, u, i) \ - ((14 << 28) | (1 << 24) | ((u) << 23) | ((i) << 22) | ((l) << 20) | \ - ((rn) << 16) | ((rd) << 12) | ((s) << 6) | ((h) << 5) | (9 << 4)) - -static void i_ldr(int l, int rd, int rn, int off, int bt) -{ - int b = BT_SZ(bt) == 1; - int h = BT_SZ(bt) == 2; - int s = l && (bt & BT_SIGNED); - int half = h || (b && s); - int maximm = half ? 0x100 : 0x1000; - int neg = off < 0; - if (neg) - off = -off; - while (off >= maximm) { - int imm = add_encimm(off); - oi(ADD(neg ? I_SUB : I_ADD, REG_TMP, rn, 0, 1, 14) | imm); - rn = REG_TMP; - off -= add_decimm(imm); - } - if (!half) - oi(LDR(l, rd, rn, b, !neg, 1, 0) | off); - else - oi(LDRH(l, rd, rn, s, h, !neg, 1) | - ((off & 0xf0) << 4) | (off & 0x0f)); -} - -static void i_sym(int rd, char *sym, int off) -{ - int doff = pool_reloc(sym, off); - i_ldr(1, rd, REG_DP, doff, LONGSZ); -} - -static void i_neg(int rd) -{ - oi(ADD(I_RSB, rd, rd, 0, 1, 14)); -} - -static void i_not(int rd) -{ - oi(ADD(I_MVN, rd, 0, 0, 0, 14) | rd); -} - -static int cond_nots[] = {1, 0, 3, 2, -1, -1, -1, -1, 9, 8, 11, 10, 13, 12, -1}; - -static void i_lnot(int rd) -{ - if (OPT_ISCMP()) { - unsigned int *lset = (void *) cs + last_set; - int cond = cond_nots[OPT_CCOND()]; - *lset = (*lset & 0x0fffffff) | (cond << 28); - return; - } - i_tst(rd, rd); - i_set(O_EQ, rd); -} - -/* rd = rd & ((1 << bits) - 1) */ -static void i_zx(int rd, int bits) -{ - if (bits <= 8) { - oi(ADD(I_AND, rd, rd, 0, 1, 14) | add_encimm((1 << bits) - 1)); - } else { - i_shl_imm(O_SHL, rd, rd, 32 - bits); - i_shl_imm(O_SHR, rd, rd, 32 - bits); - } -} - -static void i_sx(int rd, int bits) -{ - i_shl_imm(O_SHL, rd, rd, 32 - bits); - i_shl_imm(O_SIGNED | O_SHR, rd, rd, 32 - bits); -} - -/* - * branch: - * +-----------------------------------+ - * |COND|101|L| offset | - * +-----------------------------------+ - * - * L: link - */ -#define BL(cond, l, o) (((cond) << 28) | (5 << 25) | ((l) << 24) | \ - ((((o) - 8) >> 2) & 0x00ffffff)) - -static void i_b(void) -{ - oi(BL(14, 0, 0)); -} - -static void i_bz(int rn, int z) -{ - if (OPT_ISCMP()) { - int cond = OPT_CCOND(); - cslen = last_cmp + 4; - last_set = -1; - oi(BL(z ? cond_nots[cond] : cond, 0, 0)); - return; - } - i_tst(rn, rn); - oi(BL(z ? 0 : 1, 0, 0)); -} - -static void i_b_fill(long src, long dst) -{ - long diff = dst - src - 8; - long *m = (long *) (cs + src); - *m = (*m & 0xff000000) | ((diff >> 2) & 0x00ffffff); -} - -static void i_memcpy(int rd, int rs, int rn) -{ - oi(ADD(I_SUB, rn, rn, 1, 1, 14) | 1); - oi(BL(4, 0, 16)); - oi(LDR(1, REG_TMP, rs, 1, 1, 0, 0) | 1); - oi(LDR(0, REG_TMP, rd, 1, 1, 0, 0) | 1); - oi(BL(14, 0, -16)); -} - -static void i_memset(int rd, int rs, int rn) -{ - oi(ADD(I_SUB, rn, rn, 1, 1, 14) | 1); - oi(BL(4, 0, 12)); - oi(LDR(0, rs, rd, 1, 1, 0, 0) | 1); - oi(BL(14, 0, -12)); -} - -static void i_call_reg(int rd) -{ - i_mov(REG_LR, REG_PC); - i_mov(REG_PC, rd); -} - -static void i_call(char *sym, int off) -{ - out_rel(sym, OUT_CS | OUT_REL24, cslen); - oi(BL(14, 1, off)); -} - -static void i_prolog(void) -{ - func_beg = cslen; - last_cmp = -1; - last_set = -1; - nums = 0; - oi(0xe1a0c00d); /* mov r12, sp */ - oi(0xe92d000f); /* stmfd sp!, {r0-r3} */ - oi(0xe92d5ff0); /* stmfd sp!, {r0-r11, r12, lr} */ - oi(0xe1a0b00d); /* mov fp, sp */ - oi(0xe24dd000); /* sub sp, sp, xx */ - oi(0xe28fa000); /* add dp, pc, xx */ -} - -static void i_epilog(void) -{ - int dpoff; - oi(0xe89baff0); /* ldmfd fp, {r4-r11, sp, pc} */ - dpoff = add_decimm(add_rndimm(add_encimm(cslen - func_beg - 28))); - cslen = func_beg + dpoff + 28; - maxsp = ALIGN(maxsp, 8); - maxsp = add_decimm(add_rndimm(add_encimm(maxsp))); - /* fill stack sub: sp = sp - xx */ - *(long *) (cs + func_beg + 16) |= add_encimm(maxsp); - /* fill data ptr addition: dp = pc + xx */ - *(long *) (cs + func_beg + 20) |= add_encimm(dpoff); - pool_write(); -} +#include +#include "tok.h" +#include "gen.h" +#include "out.h" + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define oi4(i) oi((i), 4) + +#define REG_DP 10 /* data pointer register */ +#define REG_TMP 12 /* temporary register */ +#define REG_LR 14 /* link register */ +#define REG_PC 15 /* program counter */ + +#define I_AND 0x00 +#define I_EOR 0x01 +#define I_SUB 0x02 +#define I_RSB 0x03 +#define I_ADD 0x04 +#define I_TST 0x08 +#define I_CMP 0x0a +#define I_ORR 0x0c +#define I_MOV 0x0d +#define I_MVN 0x0f + +int tmpregs[] = {4, 5, 6, 7, 8, 9, 3, 2, 1, 0}; +int argregs[] = {0, 1, 2, 3}; + +/* compiled division functions; div.s contains the source */ +static int udivdi3[] = { + 0xe3a02000, 0xe3a03000, 0xe1110001, 0x0a00000a, + 0xe1b0c211, 0xe2822001, 0x5afffffc, 0xe3a0c001, + 0xe2522001, 0x4a000004, 0xe1500211, 0x3afffffb, + 0xe0400211, 0xe083321c, 0xeafffff8, 0xe1a01000, + 0xe1a00003, 0xe1a0f00e, +}; +static int umoddi3[] = { + 0xe92d4000, 0xebffffeb, 0xe1a00001, 0xe8bd8000, +}; +static int divdi3[] = { + 0xe92d4030, 0xe1a04000, 0xe1a05001, 0xe1100000, + 0x42600000, 0xe1110001, 0x42611000, 0xebffffe1, + 0xe1340005, 0x42600000, 0xe1140004, 0x42611000, + 0xe8bd8030, +}; +static int moddi3[] = { + 0xe92d4000, 0xebfffff0, 0xe1a00001, 0xe8bd8000, +}; + +/* output div/mod functions */ +static int putdiv = 0; + +static void insert_spsub(void); + +static void i_div(char *func) +{ + putdiv = 1; + insert_spsub(); + i_call(func, 0); +} + +void i_done(void) +{ + if (putdiv) { + out_sym("__udivdi3", OUT_CS, cslen, 0); + os(udivdi3, sizeof(udivdi3)); + out_sym("__umoddi3", OUT_CS, cslen, 0); + os(umoddi3, sizeof(umoddi3)); + out_sym("__divdi3", OUT_CS, cslen, 0); + os(divdi3, sizeof(divdi3)); + out_sym("__moddi3", OUT_CS, cslen, 0); + os(moddi3, sizeof(moddi3)); + } +} + +/* for optimizing cmp + bcc */ +#define OPT_ISCMP() (last_cmp + 12 == cslen && last_set + 4 == cslen) +#define OPT_CCOND() (*(unsigned int *) ((void *) cs + last_set) >> 28) + +static long last_cmp = -1; +static long last_set = -1; + +#define MAXNUMS 1024 + +/* data pool */ +static long num_offs[MAXNUMS]; /* data immediate value */ +static char num_names[MAXNUMS][NAMELEN]; /* relocation data symbol name */ +static int nums; + +static int pool_find(char *name, int off) +{ + int i; + for (i = 0; i < nums; i++) + if (!strcmp(name, num_names[i]) && off == num_offs[i]) + return i; + return -1; +} + +static int pool_num(long num) +{ + int idx = pool_find("", num); + if (idx < 0) { + idx = nums++; + num_offs[idx] = num; + num_names[idx][0] = '\0'; + } + return idx << 2; +} + +static int pool_reloc(char *name, long off) +{ + int idx = pool_find(name, off); + if (idx < 0) { + idx = nums++; + num_offs[idx] = off; + strcpy(num_names[idx], name); + } + return idx << 2; +} + +static void pool_write(void) +{ + int i; + for (i = 0; i < nums; i++) { + if (num_names[i] && !pass1) + out_rel(num_names[i], OUT_CS, cslen); + oi4(num_offs[i]); + } +} + +/* + * data processing: + * +---------------------------------------+ + * |COND|00|I| op |S| Rn | Rd | operand2 | + * +---------------------------------------+ + * + * S: set condition code + * Rn: first operand + * Rd: destination operand + * + * I=0 operand2=| shift | Rm | + * I=1 operand2=|rota| imm | + */ +#define ADD(op, rd, rn, s, i, cond) \ + (((cond) << 28) | ((i) << 25) | ((s) << 20) | \ + ((op) << 21) | ((rn) << 16) | ((rd) << 12)) + +static int add_encimm(unsigned n) +{ + int i = 0; + while (i < 12 && (n >> ((4 + i) << 1))) + i++; + return (n >> (i << 1)) | (((16 - i) & 0x0f) << 8); +} + +static unsigned add_decimm(int n) +{ + int rot = (16 - ((n >> 8) & 0x0f)) & 0x0f; + return (n & 0xff) << (rot << 1); +} + +static int add_rndimm(unsigned n) +{ + int rot = (n >> 8) & 0x0f; + int num = n & 0xff; + if (rot == 0) + return n; + if (num == 0xff) { + num = 0; + rot = (rot + 12) & 0x0f; + } + return ((num + 1) & 0xff) | (rot << 8); +} + +static int opcode_add(int op) +{ + /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */ + static int rx[] = {I_ADD, I_SUB, I_AND, I_ORR, I_EOR}; + return rx[op & 0x0f]; +} + +static void i_add(int op, int rd, int rn, int rm) +{ + oi4(ADD(opcode_add(op), rd, rn, 0, 0, 14) | rm); +} + +int i_imm(int op, long imm) +{ + return (op & 0xf0) != 0x20 && add_decimm(add_encimm(imm)) == imm; +} + +static void i_add_imm(int op, int rd, int rn, long n) +{ + oi4(ADD(opcode_add(op), rd, rn, 0, 1, 14) | add_encimm(n)); +} + +static void i_ldr(int l, int rd, int rn, int off, int bt); + +void i_num(int rd, long n) +{ + int enc = add_encimm(n); + if (n == add_decimm(enc)) { + oi4(ADD(I_MOV, rd, 0, 0, 1, 14) | enc); + return; + } + enc = add_encimm(-n - 1); + if (~n == add_decimm(enc)) { + oi4(ADD(I_MVN, rd, 0, 0, 1, 14) | enc); + return; + } + i_ldr(1, rd, REG_DP, pool_num(n), LONGSZ); +} + +static void i_add_anyimm(int rd, int rn, long n) +{ + int neg = n < 0; + int imm = add_encimm(neg ? -n : n); + if (imm == add_decimm(neg ? -n : n)) { + oi4(ADD(neg ? I_SUB : I_ADD, rd, rn, 0, 1, 14) | imm); + } else { + i_num(rd, n); + i_add(O_ADD, rd, rd, rn); + } +} + +/* + * multiply + * +----------------------------------------+ + * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm | + * +----------------------------------------+ + * + * Rd: destination + * A: accumulate + * C: set condition codes + * + * I=0 operand2=| shift | Rm | + * I=1 operand2=|rota| imm | + */ +#define MUL(rd, rn, rs) \ + ((14 << 28) | ((rd) << 16) | ((0) << 12) | ((rn) << 8) | ((9) << 4) | (rm)) + +static void i_mul(int rd, int rn, int rm) +{ + oi4(MUL(rd, rn, rm)); +} + +static int opcode_set(int op) +{ + /* lt, gt, le, ge, eq, neq */ + static int ucond[] = {3, 8, 9, 2, 0, 1}; + static int scond[] = {11, 12, 13, 10, 0, 1}; + return op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f]; +} + +static void i_tst(int rn, int rm) +{ + oi4(ADD(I_TST, 0, rn, 1, 0, 14) | rm); +} + +static void i_cmp(int rn, int rm) +{ + last_cmp = cslen; + oi4(ADD(I_CMP, 0, rn, 1, 0, 14) | rm); +} + +static void i_cmp_imm(int rn, long n) +{ + last_cmp = cslen; + oi4(ADD(I_CMP, 0, rn, 1, 1, 14) | add_encimm(n)); +} + +static void i_set(int cond, int rd) +{ + oi4(ADD(I_MOV, rd, 0, 0, 1, 14)); + last_set = cslen; + oi4(ADD(I_MOV, rd, 0, 0, 1, opcode_set(cond)) | 1); +} + +#define SM_LSL 0 +#define SM_LSR 1 +#define SM_ASR 2 + +static int opcode_shl(int op) +{ + if (op & 0x0f) + return op & O_SIGNED ? SM_ASR : SM_LSR; + return SM_LSL; +} + +static void i_shl(int op, int rd, int rm, int rs) +{ + int sm = opcode_shl(op); + oi4(ADD(I_MOV, rd, 0, 0, 0, 14) | (rs << 8) | (sm << 5) | (1 << 4) | rm); +} + +static void i_shl_imm(int op, int rd, int rn, long n) +{ + int sm = opcode_shl(op); + oi4(ADD(I_MOV, rd, 0, 0, 0, 14) | (n << 7) | (sm << 5) | rn); +} + +void i_mov(int rd, int rn) +{ + oi4(ADD(I_MOV, rd, 0, 0, 0, 14) | rn); +} + +/* + * single data transfer: + * +------------------------------------------+ + * |COND|01|I|P|U|B|W|L| Rn | Rd | offset | + * +------------------------------------------+ + * + * I: immediate/offset + * P: post/pre indexing + * U: down/up + * B: byte/word + * W: writeback + * L: store/load + * Rn: base register + * Rd: source/destination register + * + * I=0 offset=| immediate | + * I=1 offset=| shift | Rm | + * + * halfword and signed data transfer + * +----------------------------------------------+ + * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm | + * +----------------------------------------------+ + * + * +----------------------------------------------+ + * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2| + * +----------------------------------------------+ + * + * S: singed + * H: halfword + */ +#define LDR(l, rd, rn, b, u, p, w) \ + ((14 << 28) | (1 << 26) | ((p) << 24) | ((b) << 22) | ((u) << 23) | \ + ((w) << 21) | ((l) << 20) | ((rn) << 16) | ((rd) << 12)) +#define LDRH(l, rd, rn, s, h, u, i) \ + ((14 << 28) | (1 << 24) | ((u) << 23) | ((i) << 22) | ((l) << 20) | \ + ((rn) << 16) | ((rd) << 12) | ((s) << 6) | ((h) << 5) | (9 << 4)) + +static void i_ldr(int l, int rd, int rn, int off, int bt) +{ + int b = BT_SZ(bt) == 1; + int h = BT_SZ(bt) == 2; + int s = l && (bt & BT_SIGNED); + int half = h || (b && s); + int maximm = half ? 0x100 : 0x1000; + int neg = off < 0; + if (neg) + off = -off; + while (off >= maximm) { + int imm = add_encimm(off); + oi4(ADD(neg ? I_SUB : I_ADD, REG_TMP, rn, 0, 1, 14) | imm); + rn = REG_TMP; + off -= add_decimm(imm); + } + if (!half) + oi4(LDR(l, rd, rn, b, !neg, 1, 0) | off); + else + oi4(LDRH(l, rd, rn, s, h, !neg, 1) | + ((off & 0xf0) << 4) | (off & 0x0f)); +} + +void i_load(int rd, int rn, int off, int bt) +{ + i_ldr(1, rd, rn, off, bt); +} + +void i_save(int rd, int rn, int off, int bt) +{ + i_ldr(0, rd, rn, off, bt); +} + +void i_sym(int rd, char *sym, int off) +{ + int doff = pool_reloc(sym, off); + i_ldr(1, rd, REG_DP, doff, LONGSZ); +} + +static void i_neg(int rd, int r1) +{ + oi4(ADD(I_RSB, rd, r1, 0, 1, 14)); +} + +static void i_not(int rd, int r1) +{ + oi4(ADD(I_MVN, rd, 0, 0, 0, 14) | r1); +} + +static int cond_nots[] = {1, 0, 3, 2, -1, -1, -1, -1, 9, 8, 11, 10, 13, 12, -1}; + +static void i_lnot(int rd, int r1) +{ + if (OPT_ISCMP()) { + unsigned int *lset = (void *) cs + last_set; + int cond = cond_nots[OPT_CCOND()]; + *lset = (*lset & 0x0fffffff) | (cond << 28); + return; + } + i_tst(r1, r1); + i_set(O_EQ, rd); +} + +/* rd = rd & ((1 << bits) - 1) */ +static void i_zx(int rd, int r1, int bits) +{ + if (bits <= 8) { + oi4(ADD(I_AND, rd, r1, 0, 1, 14) | add_encimm((1 << bits) - 1)); + } else { + i_shl_imm(O_SHL, rd, r1, 32 - bits); + i_shl_imm(O_SHR, rd, rd, 32 - bits); + } +} + +static void i_sx(int rd, int r1, int bits) +{ + i_shl_imm(O_SHL, rd, r1, 32 - bits); + i_shl_imm(O_SIGNED | O_SHR, rd, rd, 32 - bits); +} + +/* + * branch: + * +-----------------------------------+ + * |COND|101|L| offset | + * +-----------------------------------+ + * + * L: link + */ +#define BL(cond, l, o) (((cond) << 28) | (5 << 25) | ((l) << 24) | \ + ((((o) - 8) >> 2) & 0x00ffffff)) +void i_jmp(int rn, int jc, int nbytes) +{ + if (!nbytes) + return; + if (rn < 0) { + oi4(BL(14, 0, 0)); + return; + } + if (OPT_ISCMP()) { + int cond = OPT_CCOND(); + cslen = last_cmp + 4; + last_set = -1; + oi4(BL(jc ? cond_nots[cond] : cond, 0, 0)); + return; + } + i_tst(rn, rn); + oi4(BL(jc ? 0 : 1, 0, 0)); +} + +long i_fill(long src, long dst, int nbytes) +{ + long *d = (void *) cs + src - 4; + if (!nbytes) + return 0; + *d = (*d & 0xff000000) | (((dst - src - 4) >> 2) & 0x00ffffff); + return dst - src; +} + +void i_memcpy(int rd, int rs, int rn) +{ + oi4(ADD(I_SUB, rn, rn, 1, 1, 14) | 1); + oi4(BL(4, 0, 16)); + oi4(LDR(1, REG_TMP, rs, 1, 1, 0, 0) | 1); + oi4(LDR(0, REG_TMP, rd, 1, 1, 0, 0) | 1); + oi4(BL(14, 0, -16)); +} + +void i_memset(int rd, int rs, int rn) +{ + oi4(ADD(I_SUB, rn, rn, 1, 1, 14) | 1); + oi4(BL(4, 0, 12)); + oi4(LDR(0, rs, rd, 1, 1, 0, 0) | 1); + oi4(BL(14, 0, -12)); +} + +void i_call_reg(int rd) +{ + i_mov(REG_LR, REG_PC); + i_mov(REG_PC, rd); +} + +void i_call(char *sym, int off) +{ + if (!pass1) + out_rel(sym, OUT_CS | OUT_RLREL | OUT_RL24, cslen); + oi4(BL(14, 1, off)); +} + +void i_reg(int op, int *rd, int *r1, int *r2, int *tmp) +{ + *rd = R_TMPS; + *r1 = R_TMPS; + *r2 = (op & O_IMM || (op & 0xf0) == 0x40) ? 0 : R_TMPS; + *tmp = 0; + if ((op & 0xff) == O_DIV || (op & 0xff) == O_MOD) { + *rd = 1 << REG_RET; + *r1 = 1 << argregs[0]; + *r2 = 1 << argregs[1]; + *tmp = R_TMPS & ~R_SAVED; + } +} + +void i_op(int op, int rd, int r1, int r2) +{ + if ((op & 0xf0) == 0x00) + i_add(op, rd, r1, r2); + if ((op & 0xf0) == 0x10) + i_shl(op, rd, r1, r2); + if ((op & 0xf0) == 0x20) { + if ((op & 0xff) == O_MUL) + i_mul(rd, r1, r2); + if ((op & 0xff) == O_DIV) + i_div(op & O_SIGNED ? "__divdi3" : "__udivdi3"); + if ((op & 0xff) == O_MOD) + i_div(op & O_SIGNED ? "__moddi3" : "__umoddi3"); + return; + } + if ((op & 0xf0) == 0x30) { + i_cmp(r1, r2); + i_set(op, rd); + return; + } + if ((op & 0xf0) == 0x40) { /* uop */ + if ((op & 0xff) == O_NEG) + i_neg(rd, r1); + if ((op & 0xff) == O_NOT) + i_not(rd, r1); + if ((op & 0xff) == O_LNOT) + i_lnot(rd, r1); + return; + } +} + +void i_op_imm(int op, int rd, int r1, long n) +{ + if ((op & 0xf0) == 0x00) { + if (i_imm(O_ADD, n)) + i_add_imm(op, rd, r1, n); + else + i_add_anyimm(rd, r1, n); + } + if ((op & 0xf0) == 0x10) /* shl */ + i_shl_imm(op, rd, r1, n); + if ((op & 0xf0) == 0x30) { /* imm */ + i_cmp_imm(r1, n); + i_set(op, rd); + } + if ((op & 0xf0) == 0x50) { /* etc */ + if ((op & 0xff) == O_ZX) + i_zx(rd, r1, n); + if ((op & 0xff) == O_SX) + i_sx(rd, r1, n); + if ((op & 0xff) == O_MOV) + i_mov(rd, r1); + } +} + +static int func_argc; +static int func_varg; +static int func_spsub; +static int func_sargs; +static int func_sregs; +static int func_initfp; +static int func_initdp = 1; +static int spsub_addr; +static int dpadd_addr; + +static int saved_regs(int args) +{ + int n = 2; + int i; + for (i = 0; i < N_REGS; i++) { + if ((1 << i) & func_sregs) + n++; + if (args && (1 << i) & func_sargs) + n++; + } + return n; +} + +int i_args(void) +{ + return saved_regs(0) * LONGSZ; +} + +int i_sp(void) +{ + return 0; +} + +static int plain_function(void) +{ + return !func_initfp && !func_spsub && !func_initdp && !func_varg && + !func_sargs && !func_sregs && func_argc <= N_ARGS; +} + +static void insert_spsub(void) +{ + if (!func_spsub) { + func_spsub = 1; + spsub_addr = cslen; + oi4(0xe24dd000); /* sub sp, sp, xx */ + } +} + +void i_prolog(int argc, int varg, int sargs, int sregs, int initfp, int spsub) +{ + last_set = -1; + nums = 0; + func_argc = argc; + func_varg = varg; + func_sargs = sargs; + func_sregs = sregs; + func_initfp = initfp; + func_spsub = 0; + if (plain_function()) + return; + if (initfp) + func_sregs |= 1 << REG_FP; + if (func_initdp) + func_sregs |= 1 << REG_DP; + /* stack should remain 8-aligned */ + if (saved_regs(1) & 0x1) + func_sregs |= 8; + oi4(0xe1a0c00d); /* mov r12, sp */ + if (func_sargs) + oi4(0xe92d0000 | func_sargs); /* stmfd sp!, {r0-r3} */ + oi4(0xe92d5000 | func_sregs); /* stmfd sp!, {r0-r11, r12, lr} */ + if (func_initfp) + oi4(0xe1a0b00d); /* mov fp, sp */ + if (spsub) + insert_spsub(); + if (func_initdp) { + dpadd_addr = cslen; + oi4(0xe28fa000); /* add dp, pc, xx */ + } +} + +void i_epilog(int sp_max) +{ + sp_max = -sp_max; + if (plain_function()) { + oi4(0xe1a0f00e); /* mov pc, lr */ + return; + } + if (func_initfp) + oi4(0xe89ba000 | func_sregs);/* ldmfd fp, {r4-r11, sp, pc} */ + if (!func_initfp) + oi4(0xe89da000 | func_sregs);/* ldmfd sp, {r4-r11, sp, pc} */ + if (func_initdp) { + int dpoff = cslen - dpadd_addr - 8; + dpoff = add_decimm(add_rndimm(add_encimm(dpoff))); + cslen = dpadd_addr + dpoff + 8; + /* fill data ptr addition: dp = pc + xx */ + *(long *) (cs + dpadd_addr) |= add_encimm(dpoff); + } + if (func_initfp && func_spsub) { + sp_max = ALIGN(sp_max, 8); + sp_max = add_decimm(add_rndimm(add_encimm(sp_max))); + /* fill stack sub: sp = sp - xx */ + *(long *) (cs + spsub_addr) |= add_encimm(sp_max); + } + pool_write(); +} diff --git a/arm.h b/arm.h new file mode 100644 index 0000000..ae54ed4 --- /dev/null +++ b/arm.h @@ -0,0 +1,18 @@ +#define LONGSZ 4 /* word size */ +#define I_ARCH "__arm__" + +#define N_REGS 16 /* number of registers */ +#define N_ARGS 4 /* number of arg registers */ +#define N_TMPS 10 /* number of tmp registers */ +#define R_TMPS 0x03ff /* mask of tmp registers */ +#define R_ARGS 0x000f /* mask of arg registers */ +#define R_SAVED 0x0ff0 /* mask of callee-saved registers */ + +#define R_CALL R_TMPS /* mask of regs than can hold call dst */ +#define R_BYTE R_TMPS /* mask of regs that can perform byte-wide instructions */ + +/* special registers */ +#define REG_FP 11 /* frame pointer register */ +#define REG_SP 13 /* stack pointer register */ +#define REG_RET 0 /* returned value register */ +#define REG_FORK 0 /* result of conditional branches */ diff --git a/cpp.c b/cpp.c index ae8d8c5..9a56f99 100644 --- a/cpp.c +++ b/cpp.c @@ -52,6 +52,7 @@ static struct buf { int arg_buf; /* the bufs index of the owning macro */ } bufs[MAXBUFS]; static int nbufs; +static int bufs_limit = 1; /* cpp_read() limit; useful in cpp_eval() */ void die(char *fmt, ...) { @@ -148,10 +149,12 @@ int cpp_init(char *path) return include_file(path); } -static void jumpws(void) +static int jumpws(void) { + int old = cur; while (cur < len && isspace(buf[cur])) cur++; + return cur == old; } static void read_word(char *dst) @@ -262,7 +265,7 @@ static void readarg(char *s) { int depth = 0; int beg = cur; - while (cur < len && (depth || buf[cur] != ',' && buf[cur] != ')')) { + while (cur < len && (depth || (buf[cur] != ',' && buf[cur] != ')'))) { if (!jumpstr() || !jumpcomment()) continue; switch (buf[cur++]) { @@ -347,16 +350,18 @@ static long evalexpr(void); static int cpp_eval(void) { - int bufid; - int ret; char evalbuf[BUFSIZE]; + int old_limit; + int ret, nr; read_tilleol(evalbuf); buf_new(BUF_EVAL, evalbuf, strlen(evalbuf)); - bufid = nbufs; elen = 0; ecur = 0; - while (bufid < nbufs || (bufid == nbufs && cur < len)) - elen += cpp_read(ebuf + elen); + old_limit = bufs_limit; + bufs_limit = nbufs; + while ((nr = cpp_read(ebuf + elen)) >= 0) + elen += nr; + bufs_limit = old_limit; ret = evalexpr(); buf_pop(); return ret; @@ -442,7 +447,7 @@ static int cpp_cmd(void) file[e - s] = '\0'; cur += e - s + 2; if (include_find(file, *e == '>') == -1) - die("cannot include <%s>\n", file); + err("cannot include <%s>\n", file); return 0; } return 1; @@ -471,12 +476,10 @@ static int buf_arg_find(char *name) return -1; } -static void macro_expand(void) +static void macro_expand(char *name) { - char name[NAMELEN]; struct macro *m; int mbuf; - read_word(name); if ((mbuf = buf_arg_find(name)) >= 0) { int arg = macro_arg(bufs[mbuf].macro, name); char *dat = bufs[mbuf].args[arg]; @@ -522,11 +525,16 @@ static int buf_expanding(char *macro) return 0; } +/* return 1 for plain macros and arguments and 2 for function macros */ static int expandable(char *word) { + int i; if (buf_arg_find(word) >= 0) return 1; - return !buf_expanding(word) && macro_find(word) != -1; + if (buf_expanding(word)) + return 0; + i = macro_find(word); + return i >= 0 ? macros[i].isfunc + 1 : 0; } void cpp_define(char *name, char *def) @@ -541,21 +549,23 @@ void cpp_define(char *name, char *def) buf_pop(); } -static int seen_macro; +static int seen_macro; /* seen a macro; 2 if a function macro */ +static char seen_name[NAMELEN]; /* the name of the last macro */ static int hunk_off; static int hunk_len; int cpp_read(char *s) { - int old; - if (seen_macro) { + int old, end; + int jump_name = 0; + if (seen_macro == 1) { + macro_expand(seen_name); seen_macro = 0; - macro_expand(); } if (cur == len) { struct buf *cbuf = &bufs[nbufs - 1]; - if (nbufs < 2) + if (nbufs < bufs_limit + 1) return -1; if (cbuf->type == BUF_FILE) free(buf); @@ -566,18 +576,28 @@ int cpp_read(char *s) if (!cpp_cmd()) return 0; while (cur < len) { + if (!jumpws()) + continue; if (buf[cur] == '#') break; if (!jumpcomment()) continue; + if (seen_macro == 2) { + if (buf[cur] == '(') + macro_expand(seen_name); + seen_macro = 0; + old = cur; + continue; + } if (!jumpstr()) continue; if (isalpha(buf[cur]) || buf[cur] == '_') { char word[NAMELEN]; read_word(word); - if (expandable(word)) { - cur -= strlen(word); - seen_macro = 1; + seen_macro = expandable(word); + if (seen_macro) { + strcpy(seen_name, word); + jump_name = 1; break; } if (buf_iseval() && !strcmp("defined", word)) { @@ -597,13 +617,15 @@ int cpp_read(char *s) } cur++; } - memcpy(s, buf + old, cur - old); - s[cur - old] = '\0'; + /* macros are expanded later; ignore its name */ + end = jump_name ? cur - strlen(seen_name) : cur; + memcpy(s, buf + old, end - old); + s[end - old] = '\0'; if (!buf_iseval()) { hunk_off += hunk_len; - hunk_len = cur - old; + hunk_len = end - old; } - return cur - old; + return end - old; } /* preprocessor constant expression evaluation */ diff --git a/gen.c b/gen.c new file mode 100644 index 0000000..8850986 --- /dev/null +++ b/gen.c @@ -0,0 +1,1080 @@ +#include +#include +#include +#include "gen.h" +#include "out.h" +#include "tok.h" + +/* variable location */ +#define LOC_REG 0x01 +#define LOC_MEM 0x02 +#define LOC_NUM 0x04 +#define LOC_SYM 0x08 +#define LOC_LOCAL 0x10 + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) + +char cs[SECSIZE]; /* code segment */ +int cslen; +static char ds[SECSIZE]; /* data segment */ +static int dslen; +static long bsslen; /* bss segment size */ + +static long sp; /* stack pointer offset from R_RBP */ +static long sp_max; /* maximum stack pointer offset */ +static long sp_tmp; /* sp for the first tmp on the stack */ + +#define TMP(i) (((i) < ntmp) ? &tmps[ntmp - 1 - (i)] : NULL) + +static struct tmp { + long addr; + char sym[NAMELEN]; + long off; /* offset from a symbol or a local */ + unsigned loc; /* variable location */ + unsigned bt; /* type of address; zero when not a pointer */ + int id; /* local variable id */ +} tmps[MAXTMP]; +static int ntmp; + +static struct tmp *regs[N_REGS]; + +#define MAXLOCALS (1 << 10) +static struct local { + int loc; + int sz; + int n_addr; /* # of address accesses */ + int n_access; /* # of accesses */ +} locals[MAXLOCALS]; +static int nlocals; + +/* function info */ +static int func_beg; +static int func_argc; +static int func_varg; + +/* function statistics */ +int pass1; /* collect statistics; 1st pass */ +static int stat_calls; /* # of function calls */ +static int stat_tmps; /* # of stack temporaries */ +static int stat_regs; /* mask of used registers */ + +/* optimization info */ +static int pass2; /* use the collected statistics in the 1st pass */ +static int tmp_mask; /* registers that can be used for tmps */ +static int opt_sargs; /* saved args */ +static int opt_isreg[MAXLOCALS];/* is a register allocated to a local? */ +static int opt_lreg[MAXLOCALS]; /* registers allocated to locals */ +static int opt_lregs; /* mask of registers allocated to locals */ + +#define TMP_ISLREG(t) (!(t)->bt && (t)->loc == LOC_LOCAL && opt_isreg[(t)->id]) +#define TMP_LREG(t) (opt_lreg[(t)->id]) + +/* labels and jmps */ +#define MAXJMPS (1 << 14) + +static long labels[MAXJMPS]; +static int nlabels; +static long jmp_loc[MAXJMPS]; +static int jmp_goal[MAXJMPS]; +static int jmp_len[MAXJMPS]; +static int njmps; + +void o_label(int id) +{ + if (id > nlabels) + nlabels = id + 1; + labels[id] = cslen; +} + +/* the number of bytes needed for holding jmp displacement */ +static int jmp_sz(int id) +{ + long n = jmp_len[id] > 0 ? jmp_len[id] : -jmp_len[id]; + if (!pass2) + return 4; + if (n < 0x70) + return n == 0 ? 0 : 1; + return n < 0x7000 ? 2 : 4; +} + +static void jmp_add(int id, int rn, int z) +{ + if (njmps >= MAXJMPS) + err("nomem: MAXJMPS reached!\n"); + i_jmp(rn, z, jmp_sz(njmps)); + jmp_loc[njmps] = cslen; + jmp_goal[njmps] = id; + njmps++; +} + +static void jmp_fill(void) +{ + int i; + for (i = 0; i < njmps; i++) + jmp_len[i] = i_fill(jmp_loc[i], labels[jmp_goal[i]], jmp_sz(i)); +} + +/* generating code */ + +void os(void *s, int n) +{ + while (n--) + cs[cslen++] = *(char *) (s++); +} + +void oi(long n, int l) +{ + while (l--) { + cs[cslen++] = n; + n >>= 8; + } +} + +static long sp_push(int sz) +{ + sp -= ALIGN(sz, LONGSZ); + if (sp < sp_max) + sp_max = sp; + return sp; +} + +static void tmp_mem(struct tmp *tmp) +{ + int src = tmp->addr; + if (tmp->loc != LOC_REG || (1 << src) & opt_lregs) + return; + if (sp_tmp == -1) + sp_tmp = sp; + tmp->addr = sp_push(LONGSZ); + i_save(src, REG_FP, tmp->addr, LONGSZ); + stat_tmps++; + regs[src] = NULL; + tmp->loc = LOC_MEM; +} + +static void num_cast(struct tmp *t, unsigned bt) +{ + if (!(bt & BT_SIGNED) && BT_SZ(bt) != LONGSZ) + t->addr &= ((1l << (long) (BT_SZ(bt) * 8)) - 1); + if (bt & BT_SIGNED && BT_SZ(bt) != LONGSZ && + t->addr > (1l << (BT_SZ(bt) * 8 - 1))) + t->addr = -((1l << (BT_SZ(bt) * 8)) - t->addr); +} + +static void tmp_reg(struct tmp *tmp, int dst, int deref) +{ + int bt = tmp->bt; + if (!tmp->bt) + deref = 0; + if (deref) + tmp->bt = 0; + if (tmp->loc == LOC_NUM) { + i_num(dst, tmp->addr); + tmp->addr = dst; + regs[dst] = tmp; + tmp->loc = LOC_REG; + } + if (tmp->loc == LOC_SYM) { + i_sym(dst, tmp->sym, tmp->off); + tmp->addr = dst; + regs[dst] = tmp; + tmp->loc = LOC_REG; + } + if (tmp->loc == LOC_REG) { + if (deref) + i_load(dst, tmp->addr, 0, bt); + else if (dst != tmp->addr) + i_mov(dst, tmp->addr); + regs[tmp->addr] = NULL; + } + if (tmp->loc == LOC_LOCAL) { + if (deref) + locals[tmp->id].n_access++; + else + locals[tmp->id].n_addr++; + if (deref) + i_load(dst, REG_FP, tmp->addr + tmp->off, bt); + else + i_op_imm(O_ADD, dst, REG_FP, tmp->addr + tmp->off); + } + if (tmp->loc == LOC_MEM) { + i_load(dst, REG_FP, tmp->addr, LONGSZ); + if (deref) + i_load(dst, dst, 0, bt); + } + tmp->addr = dst; + stat_regs |= 1 << dst; + regs[dst] = tmp; + tmp->loc = LOC_REG; +} + +/* empty the given register, but never touch the registers in rsrvd mask */ +static void reg_free(int reg, int rsrvd) +{ + int i; + if (!regs[reg]) + return; + rsrvd |= ~tmp_mask; + for (i = 0; i < N_TMPS; i++) + if (!regs[tmpregs[i]] && ~rsrvd & (1 << tmpregs[i])) { + tmp_reg(regs[reg], tmpregs[i], 0); + return; + } + tmp_mem(regs[reg]); +} + +static void reg_for(int reg, struct tmp *t) +{ + if (regs[reg] && regs[reg] != t) + reg_free(reg, 0); +} + +static void tmp_mv(struct tmp *t, int reg) +{ + reg_for(reg, t); + tmp_reg(t, reg, 0); +} + +static void tmp_to(struct tmp *t, int reg) +{ + reg_for(reg, t); + if (t->loc == LOC_LOCAL && TMP_ISLREG(t)) { + t->loc = LOC_REG; + t->addr = TMP_LREG(t); + t->bt = 0; + } + tmp_reg(t, reg, 1); +} + +static void tmp_drop(int n) +{ + int i; + for (i = ntmp - n; i < ntmp; i++) + if (tmps[i].loc == LOC_REG) + regs[tmps[i].addr] = NULL; + ntmp -= n; +} + +static void tmp_pop(int reg) +{ + struct tmp *t = TMP(0); + tmp_to(t, reg); + tmp_drop(1); +} + +static struct tmp *tmp_new(void) +{ + return &tmps[ntmp++]; +} + +static void tmp_push(int reg) +{ + struct tmp *t = tmp_new(); + t->addr = reg; + t->bt = 0; + stat_regs |= 1 << reg; + t->loc = LOC_REG; + regs[reg] = t; +} + +void o_local(long addr) +{ + struct tmp *t = tmp_new(); + t->addr = locals[addr].loc; + t->id = addr; + t->loc = LOC_LOCAL; + t->bt = 0; + t->off = 0; +} + +void o_num(long num) +{ + struct tmp *t = tmp_new(); + t->addr = num; + t->bt = 0; + t->loc = LOC_NUM; +} + +void o_sym(char *name) +{ + struct tmp *t = tmp_new(); + strcpy(t->sym, name); + t->loc = LOC_SYM; + t->bt = 0; + t->off = 0; +} + +void o_tmpdrop(int n) +{ + if (n == -1 || n > ntmp) + n = ntmp; + tmp_drop(n); + if (!ntmp) { + if (sp_tmp != -1) + sp = sp_tmp; + sp_tmp = -1; + } +} + +/* make sure tmps remain intact after a conditional expression */ +void o_fork(void) +{ + int i; + for (i = 0; i < ntmp - 1; i++) + tmp_mem(&tmps[i]); +} + +void o_forkpush(void) +{ + tmp_pop(REG_FORK); +} + +void o_forkjoin(void) +{ + tmp_push(REG_FORK); +} + +void o_tmpswap(void) +{ + struct tmp *t1 = TMP(0); + struct tmp *t2 = TMP(1); + struct tmp t; + memcpy(&t, t1, sizeof(t)); + memcpy(t1, t2, sizeof(t)); + memcpy(t2, &t, sizeof(t)); + if (t1->loc == LOC_REG) + regs[t1->addr] = t1; + if (t2->loc == LOC_REG) + regs[t2->addr] = t2; +} + +static int reg_get(int mask) +{ + int i; + mask &= tmp_mask; + for (i = 0; i < N_TMPS; i++) + if ((1 << tmpregs[i]) & mask && !regs[tmpregs[i]]) { + stat_regs |= 1 << tmpregs[i]; + return tmpregs[i]; + } + for (i = 0; i < N_TMPS; i++) + if ((1 << tmpregs[i]) & mask) { + reg_free(tmpregs[i], 0); + stat_regs |= 1 << tmpregs[i]; + return tmpregs[i]; + } + die("reg_get: out of registers!\n"); + return 0; +} + +static int reg_tmp(struct tmp *t, int mask, int readonly) +{ + if (t->loc == LOC_REG && (mask & (1 << t->addr))) + if (!(opt_lregs & (1 << t->addr)) || (readonly && !t->bt)) + return t->addr; + return reg_get(mask); +} + +static int reg_tmpn(struct tmp *t, int notmask, int readonly) +{ + if (t->loc == LOC_REG && !(notmask & (1 << t->addr))) + if (!(opt_lregs & (1 << t->addr)) || (readonly && !t->bt)) + return t->addr; + return reg_get(~notmask); +} + +static void tmp_copy(struct tmp *t1) +{ + struct tmp *t2 = tmp_new(); + memcpy(t2, t1, sizeof(*t1)); + if (!(t1->loc & (LOC_REG | LOC_MEM))) + return; + if (t1->loc == LOC_MEM) { + tmp_mv(t2, reg_get(R_TMPS)); + } else if (t1->loc == LOC_REG) { + t2->addr = reg_tmpn(t2, 1 << t1->addr, 0); + i_mov(t2->addr, t1->addr); + regs[t2->addr] = t2; + stat_regs |= 1 << t2->addr; + } +} + +void o_tmpcopy(void) +{ + tmp_copy(TMP(0)); +} + +void o_deref(unsigned bt) +{ + struct tmp *t = TMP(0); + if (TMP_ISLREG(t)) { + t->loc = LOC_REG; + t->addr = TMP_LREG(t); + } else { + if (t->bt) + tmp_to(t, reg_tmp(t, R_TMPS, 0)); + t->bt = bt; + } +} + +void o_load(void) +{ + struct tmp *t = TMP(0); + tmp_to(t, reg_tmp(t, R_TMPS, 0)); +} + +#define TMP_NUM(t) ((t)->loc == LOC_NUM && !(t)->bt) +#define LOCAL_PTR(t) ((t)->loc == LOC_LOCAL && !(t)->bt) +#define SYM_PTR(t) ((t)->loc == LOC_SYM && !(t)->bt) + +int o_popnum(long *c) +{ + struct tmp *t = TMP(0); + if (!TMP_NUM(t)) + return 1; + *c = t->addr; + tmp_drop(1); + return 0; +} + +void o_ret(int rets) +{ + if (rets) + tmp_pop(REG_RET); + else + i_num(REG_RET, 0); + o_jmp(0); +} + +long o_mklocal(int sz) +{ + locals[nlocals].loc = sp_push(ALIGN(sz, LONGSZ)); + locals[nlocals].sz = sz; + return nlocals++; +} + +void o_rmlocal(long addr, int sz) +{ +} + +long o_arg2loc(int i) +{ + return i; +} + +#define MOVXX(bt) ((BT_SZ(bt) == LONGSZ ? O_MOV : ((bt) & BT_SIGNED ? O_SX : O_ZX))) + +void o_assign(unsigned bt) +{ + struct tmp *t1 = TMP(0); + struct tmp *t2 = TMP(1); + int r1 = reg_tmp(t1, BT_SZ(bt) > 1 ? R_TMPS : R_BYTE, 1); + int r2 = reg_tmpn(t2, 1 << r1, 1); + int off = 0; + tmp_to(t1, r1); + if (TMP_ISLREG(t2)) { + i_op_imm(MOVXX(bt), TMP_LREG(t2), r1, BT_SZ(bt) * 8); + goto done; + } + if (t2->bt) + tmp_to(t2, r2); + if (t2->loc == LOC_LOCAL) { + r2 = REG_FP; + off = t2->addr + t2->off; + locals[t2->id].n_access++; + } else { + tmp_to(t2, r2); + } + i_save(r1, r2, off, bt); +done: + tmp_drop(2); + tmp_push(r1); +} + +static long cu(int op, long i) +{ + switch (op & 0xff) { + case O_NEG: + return -i; + case O_NOT: + return ~i; + case O_LNOT: + return !i; + } + return 0; +} + +static int c_uop(int op) +{ + struct tmp *t1 = TMP(0); + if (!TMP_NUM(t1)) + return 1; + tmp_drop(1); + o_num(cu(op, t1->addr)); + return 0; +} + +static long cb(int op, long a, long b) +{ + switch (op & 0xff) { + case O_ADD: + return a + b; + case O_SUB: + return a - b; + case O_AND: + return a & b; + case O_OR: + return a | b; + case O_XOR: + return a ^ b; + case O_MUL: + return a * b; + case O_DIV: + return a / b; + case O_MOD: + return a % b; + case O_SHL: + return a << b; + case O_SHR: + if (op & O_SIGNED) + return a >> b; + else + return (unsigned long) a >> b; + case O_LT: + return a < b; + case O_GT: + return a > b; + case O_LE: + return a <= b; + case O_GE: + return a >= b; + case O_EQ: + return a == b; + case O_NEQ: + return a != b; + } + return 0; +} + +static int c_bop(int op) +{ + struct tmp *t1 = TMP(0); + struct tmp *t2 = TMP(1); + int locs = LOCAL_PTR(t1) + LOCAL_PTR(t2); + int syms = SYM_PTR(t1) + SYM_PTR(t2); + int nums = TMP_NUM(t1) + TMP_NUM(t2); + if (syms + locs == 2 || syms + nums + locs != 2) + return 1; + if (nums == 1) + if ((op & 0xff) != O_ADD && ((op & 0xff) != O_SUB || TMP_NUM(t2))) + return 1; + if (nums == 1) { + long o1 = TMP_NUM(t1) ? t1->addr : t1->off; + long o2 = TMP_NUM(t2) ? t2->addr : t2->off; + long ret = cb(op, o2, o1); + if (!TMP_NUM(t1)) + o_tmpswap(); + if (t2->loc == LOC_LOCAL) + locals[t2->id].n_addr++; + t2->off = ret; + tmp_drop(1); + } else { + long ret = cb(op, t2->addr, t1->addr); + tmp_drop(2); + o_num(ret); + } + return 0; +} + +/* allocate registers for the given binary or unary instruction */ +static void regs2(int op, int *rd, int *r1, int *r2) +{ + int md, m1, m2, mt; + int all = 0; + int i; + i_reg(op, &md, &m1, &m2, &mt); + if (m2) { + struct tmp *t2 = TMP(0); + *r2 = reg_tmp(t2, m2, 1); + tmp_to(t2, *r2); + all |= (1 << *r2); + } + if (m1) { + struct tmp *t1 = TMP(m2 ? 1 : 0); + *r1 = reg_tmp(t1, m1 & ~all, md ? 1 : 0); + tmp_to(t1, *r1); + all |= (1 << *r1); + } + if (md) { + if (m2 && md & tmp_mask & (1 << *r2)) + *rd = *r2; + else if (m1 && md & tmp_mask & (1 << *r1)) + *rd = *r1; + else + *rd = reg_get(md & ~all); + all |= (1 << *rd); + } else { + *rd = *r1; + } + if (mt & ~all) { + for (i = 0; i < N_TMPS; i++) + if (mt & ~all & (1 << tmpregs[i])) + reg_free(tmpregs[i], all | mt); + } + stat_regs |= mt; + tmp_drop(m2 ? 2 : 1); +} + +/* allocate registers for a 3 operand instruction */ +static void regs3(int op, int *r0, int *r1, int *r2) +{ + int m0, m1, m2, mt; + struct tmp *t0 = TMP(2); + struct tmp *t1 = TMP(1); + struct tmp *t2 = TMP(0); + int all = 0; + int i; + i_reg(op, &m0, &m1, &m2, &mt); + if (m2) { + *r2 = reg_tmp(t2, m2, 1); + tmp_to(t2, *r2); + all |= (1 << *r2); + } + if (m1) { + *r1 = reg_tmp(t1, m1 & ~(1 << *r2), 1); + tmp_to(t1, *r1); + all |= (1 << *r1); + } + if (m0) { + *r0 = reg_tmp(t0, m0 & ~((1 << *r2) | (1 << *r1)), 1); + tmp_to(t0, *r0); + all |= (1 << *r0); + } + if (mt & ~all) { + for (i = 0; i < N_TMPS; i++) + if (mt & ~all & (1 << tmpregs[i])) + reg_free(tmpregs[i], all | mt); + } + stat_regs |= mt; + tmp_drop(3); +} + +static void op_imm(int op, long n) +{ + int rd, r1, r2; + regs2(op | O_IMM, &rd, &r1, &r2); + i_op_imm(op | O_IMM, rd, r1, n); + tmp_push(rd); +} + +void o_uop(int op) +{ + int rd, r1, r2; + if (!c_uop(op)) + return; + regs2(op, &rd, &r1, &r2); + i_op(op, rd, r1, r2); + tmp_push(rd); +} + +static int bop_imm(int op, long *n, int swap) +{ + struct tmp *t1 = TMP(0); + struct tmp *t2 = TMP(1); + if (!TMP_NUM(t1) && (!swap || !TMP_NUM(t2))) + return 1; + *n = TMP_NUM(t1) ? t1->addr : t2->addr; + if (!i_imm(op, *n)) + return 1; + if (!TMP_NUM(t1)) + o_tmpswap(); + tmp_drop(1); + return 0; +} + +static void bin_op(int op, int swap) +{ + int rd, r1, r2; + long n; + if (!bop_imm(op, &n, swap)) { + regs2(op | O_IMM, &rd, &r1, &r2); + i_op_imm(op, rd, r1, n); + } else { + regs2(op, &rd, &r1, &r2); + i_op(op, rd, r1, r2); + } + tmp_push(rd); +} + +static int log2a(unsigned long n) +{ + int i = 0; + for (i = 0; i < LONGSZ * 8; i++) + if (n & (1u << i)) + break; + if (i == LONGSZ * 8 || !(n >> (i + 1))) + return i; + return -1; +} + +/* optimized version of mul/div/mod for powers of two */ +static int mul_2(int op) +{ + struct tmp *t1 = TMP(0); + struct tmp *t2 = TMP(1); + long n; + int p; + if ((op & 0xff) == O_MUL && t2->loc == LOC_NUM && !t2->bt) + o_tmpswap(); + if (t1->loc != LOC_NUM || t1->bt) + return 1; + n = t1->addr; + p = log2a(n); + if (n && p == -1) + return 1; + if ((op & 0xff) == O_MUL) { + tmp_drop(1); + if (n == 1) + return 0; + if (n == 0) { + tmp_drop(1); + o_num(0); + return 0; + } + op_imm(O_SHL, p); + return 0; + } + if (op == O_DIV) { + tmp_drop(1); + if (n == 1) + return 0; + op_imm((op & O_SIGNED) | O_SHR, p); + return 0; + } + if (op == O_MOD) { + tmp_drop(1); + if (n == 1) { + tmp_drop(1); + o_num(0); + return 0; + } + op_imm(O_ZX, p); + return 0; + } + return 1; +} + +void o_bop(int op) +{ + if (!c_bop(op)) + return; + if ((op & 0xf0) == 0x00) /* add */ + bin_op(op, (op & 0xff) != O_SUB); + if ((op & 0xf0) == 0x10) /* shx */ + bin_op(op, 0); + if ((op & 0xf0) == 0x20) { /* mul */ + if (!mul_2(op)) + return; + bin_op(op, (op & 0xff) == O_MUL); + } + if ((op & 0xf0) == 0x30) + bin_op(op, (op & 0xff) == O_EQ || (op & 0xff) == O_NEQ); +} + +void o_memcpy(void) +{ + int r0, r1, r2; + regs3(O_MCPY, &r0, &r1, &r2); + i_memcpy(r0, r1, r2); +} + +void o_memset(void) +{ + int r0, r1, r2; + regs3(O_MSET, &r0, &r1, &r2); + i_memset(r0, r1, r2); +} + +void o_cast(unsigned bt) +{ + struct tmp *t = TMP(0); + if (!t->bt && t->loc == LOC_NUM) { + num_cast(t, bt); + return; + } + if (BT_SZ(bt) != LONGSZ) + op_imm(MOVXX(bt), BT_SZ(bt) * 8); +} + +static void jxz(int id, int z) +{ + int r = reg_tmp(TMP(0), R_TMPS, 1); + tmp_pop(r); + jmp_add(id, r, z); +} + +void o_jz(int id) +{ + jxz(id, 1); +} + +void o_jnz(int id) +{ + jxz(id, 0); +} + +void o_jmp(int id) +{ + jmp_add(id, -1, 0); +} + +void o_call(int argc, int rets) +{ + struct tmp *t; + int i; + int aregs = MIN(N_ARGS, argc); + for (i = 0; i < N_TMPS; i++) + if (regs[tmpregs[i]] && regs[tmpregs[i]] - tmps < ntmp - argc) + tmp_mem(regs[tmpregs[i]]); + if (argc > aregs) { + sp_push(LONGSZ * (argc - aregs)); + for (i = argc - 1; i >= aregs; --i) { + int reg = reg_tmp(TMP(0), R_TMPS, 1); + tmp_pop(reg); + i_save(reg, REG_SP, (i - aregs) * LONGSZ, LONGSZ); + } + } + for (i = aregs - 1; i >= 0; --i) + tmp_to(TMP(aregs - i - 1), argregs[i]); + tmp_drop(aregs); + t = TMP(0); + if (t->loc == LOC_SYM && !t->bt) { + i_call(t->sym, t->off); + tmp_drop(1); + } else { + int reg = reg_tmp(t, R_TMPS, 1); + tmp_pop(reg); + i_call_reg(reg); + } + if (rets) + tmp_push(REG_RET); + stat_calls++; +} + +void o_mkbss(char *name, int size, int global) +{ + if (pass1) + return; + out_sym(name, OUT_BSS | (global ? OUT_GLOB : 0), bsslen, size); + bsslen += ALIGN(size, OUT_ALIGNMENT); +} + +#define MAXDATS (1 << 10) +static char dat_names[MAXDATS][NAMELEN]; +static int dat_offs[MAXDATS]; +static int ndats; + +void *o_mkdat(char *name, int size, int global) +{ + void *addr = ds + dslen; + int idx; + if (pass1) + return addr; + idx = ndats++; + if (idx >= MAXDATS) + err("nomem: MAXDATS reached!\n"); + strcpy(dat_names[idx], name); + dat_offs[idx] = dslen; + out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), dslen, size); + dslen += ALIGN(size, OUT_ALIGNMENT); + return addr; +} + +static int dat_off(char *name) +{ + int i; + for (i = 0; i < ndats; i++) + if (!strcmp(name, dat_names[i])) + return dat_offs[i]; + return 0; +} + +void o_datset(char *name, int off, unsigned bt) +{ + struct tmp *t = TMP(0); + int sym_off = dat_off(name) + off; + if (pass1) { + tmp_drop(1); + return; + } + if (t->loc == LOC_NUM && !t->bt) { + num_cast(t, bt); + memcpy(ds + sym_off, &t->addr, BT_SZ(bt)); + } + if (t->loc == LOC_SYM && !t->bt) { + out_rel(t->sym, OUT_DS, sym_off); + memcpy(ds + sym_off, &t->off, BT_SZ(bt)); + } + tmp_drop(1); +} + +void o_write(int fd) +{ + i_done(); + out_write(fd, cs, cslen, ds, dslen); +} + +static void opt_reset(void) +{ + int i; + memset(opt_isreg, 0, sizeof(opt_isreg)); + opt_sargs = func_varg ? R_ARGS : 0; + for (i = MIN(func_argc, N_ARGS) - 1; i >= 0; --i) + opt_sargs |= 1 << argregs[i]; + tmp_mask = N_TMPS > 6 ? R_TMPS & ~R_SAVED : R_TMPS; + opt_lregs = 0; + pass1 = 0; + pass2 = 0; +} + +static void func_reset(void) +{ + int i; + int argaddr = 0; + memset(regs, 0, sizeof(regs)); + memset(locals, 0, sizeof(*locals) * nlocals); + sp = i_sp(); + sp_max = sp; + ntmp = 0; + sp_tmp = -1; + nlabels = 0; + njmps = 0; + nlocals = 0; + stat_calls = 0; + stat_tmps = 0; + stat_regs = 1 << REG_RET; + for (i = 0; i < func_argc; i++) { + locals[nlocals].loc = i_args() + argaddr; + locals[nlocals].sz = LONGSZ; + nlocals++; + if (i >= N_ARGS || opt_sargs & (1 << argregs[i])) + argaddr += LONGSZ; + } +} + +void o_func_beg(char *name, int argc, int global, int varg) +{ + func_argc = argc; + func_varg = varg; + func_beg = cslen; + out_sym(name, (global ? OUT_GLOB : 0) | OUT_CS, cslen, 0); + opt_reset(); + i_prolog(argc, varg, opt_sargs, tmp_mask & R_SAVED, 1, 1); + func_reset(); +} + +/* sort locals for register allocation based on the number of accesses */ +static int *sortedlocals(void) +{ + static int ord[MAXLOCALS]; + int i, j; + for (i = 0; i < nlocals; i++) { + for (j = i - 1; j >= 0; j--) { + if (locals[i].n_access <= locals[ord[j]].n_access) + break; + ord[j + 1] = ord[j]; + } + ord[j + 1] = i; + } + return ord; +} + +/* assign locals to registers */ +static int locals2regs(int leaf) +{ + int *ord = sortedlocals(); + int nlocregs = 0; + int idx = 0; + int i; + /* letting arguments stay in their registers for leaf functions */ + if (!func_varg && leaf) { + for (i = 0; i < MIN(func_argc, N_ARGS); i++) { + if (locals[i].sz > LONGSZ || (1 << argregs[i]) & stat_regs) + continue; + if (locals[i].n_access && !locals[i].n_addr) { + opt_isreg[i] = 1; + opt_lreg[i] = argregs[i]; + opt_sargs &= ~(1 << argregs[i]); + opt_lregs |= (1 << argregs[i]); + nlocregs++; + } + } + } + /* try finding a register for each local */ + for (i = 0; i < nlocals; i++) { + int l = ord[i]; + int nmask = (leaf ? 0 : ~R_SAVED) | stat_regs | opt_lregs; + if (opt_isreg[l] || (func_varg && l < func_argc)) + continue; + /* find a free register */ + while (idx < N_TMPS && ((1 << tmpregs[idx]) & nmask)) + idx++; + if (idx >= N_TMPS) + break; + if (locals[l].sz > LONGSZ || locals[l].n_addr) + continue; + if (locals[l].n_access > (leaf ? 0 : 1)) { + opt_isreg[l] = 1; + opt_lreg[l] = tmpregs[idx]; + opt_lregs |= 1 << tmpregs[idx]; + if (l < MIN(N_ARGS, func_argc)) + opt_sargs &= ~(1 << argregs[l]); + nlocregs++; + idx++; + } + } + return nlocregs; +} + +void o_pass1(void) +{ + pass1 = 1; +} + +void o_pass2(void) +{ + int locregs, leaf; + int initfp, subsp, sregs; + int i; + o_label(0); + jmp_fill(); + leaf = !stat_calls; + cslen = func_beg; + locregs = locals2regs(leaf); + subsp = nlocals > locregs || !leaf; + initfp = subsp || stat_tmps || func_argc > N_ARGS; + sregs = (opt_lregs | stat_regs) & R_SAVED; + tmp_mask = stat_regs; + pass1 = 0; + pass2 = 1; + if (!func_varg) + for (i = 0; i < func_argc; i++) + if (i < N_ARGS && (locals[i].n_access + locals[i].n_addr) == 0) + opt_sargs &= ~(1 << argregs[i]); + i_prolog(func_argc, func_varg, opt_sargs, sregs, initfp, subsp); + func_reset(); + for (i = 0; i < MIN(func_argc, N_ARGS); i++) + if (opt_isreg[i] && opt_lreg[i] != argregs[i]) + i_mov(opt_lreg[i], argregs[i]); + for (i = N_ARGS; i < func_argc; i++) + if (opt_isreg[i]) + i_load(opt_lreg[i], REG_FP, locals[i].loc, LONGSZ); +} + +void o_func_end(void) +{ + o_label(0); + jmp_fill(); + i_epilog(sp_max); +} diff --git a/gen.h b/gen.h index 6061633..df4605b 100644 --- a/gen.h +++ b/gen.h @@ -1,6 +1,5 @@ #define SECSIZE (1 << 18) #define MAXTMP (1 << 12) -#define LONGSZ 8 /* basic types */ #define BT_SZMASK 0x00ff @@ -8,7 +7,7 @@ #define BT_SZ(bt) ((bt) & BT_SZMASK) #define O_SIGNED 0x100 -/* binary operations for o_bop() */ +/* binary instructions for o_bop() */ #define O_ADD 0x00 #define O_SUB 0x01 #define O_AND 0x02 @@ -25,7 +24,7 @@ #define O_GE 0x33 #define O_EQ 0x34 #define O_NEQ 0x35 -/* unary operations for o_uop() */ +/* unary instructions for o_uop() */ #define O_NEG 0x40 #define O_NOT 0x41 #define O_LNOT 0x42 @@ -71,3 +70,107 @@ void o_func_beg(char *name, int argc, int global, int vararg); void o_func_end(void); /* output */ void o_write(int fd); +/* passes */ +void o_pass1(void); +void o_pass2(void); + +/* + * neatcc architecture-dependent code-generation interface + * + * To make maintaining three different architectures easier and unifying the + * optimization patch, I've extracted gen.c from x86.c and arm.c. The i_*() + * functions are now the low level architecture-specific code generation + * entry points. The differences between RISC and CISC architectures, + * actually the annoying asymmetry in CISC architecture, made this interface + * a bit more complex than it could have ideally been. Nevertheless, the + * benefits of extracting gen.c and the cleaner design, specially with the + * presence of the optimization patch, is worth the added complexity. + * + * I tried to make the interface as small as possible. I'll describe the + * key functions and macros here. Overall, there were many challenges for + * extracting gen.c including: + * + Different register sets; caller/callee saved and argument registers + * + CISC-style instructions that work on limited registers and parameters + * + Different instruction formats and immediate value limitations + * + Producing epilog, prolog, and local variable addresses when optimizing + * + * Instructions: + * + i_reg(): The mask of allowed registers for each operand of an instruction. + * If md is zero, we assume the destination register should be equal to the + * first register, as in CISC architectures. m2 can be zero which means + * the instruction doesn't have three operands. mt denotes the mask of + * registers that may lose their contents after the instruction. + * + i_load(), i_save(), i_mov(), i_num(), i_sym(): The name is clear. + * + i_imm(): Specifies if the given immediate can be encoded for the given + * instruction. + * + i_jmp(), i_fill(): Branching instructions. If rn >= 0, the branch is + * a conditional branch: jump only the register rn is zero (or nonzero if + * jc is nonzero). nbytes specifies the number of bytes necessary for + * holding the jump distance; useful if the architecture supports short + * branching instructions. i_fill() actually fills the jump at src in + * code segment. It returns the amount of bytes jumped. + * + i_args(): The offset of the first argument from the frame pointer. + * It is probably positive. + * + i_args(): The offset of the first local from the frame pointer. + * It is probably negative + * + tmpregs: Register that can be used for holding temporaries. + * + argregs: Register for holding the first N_ARGS arguments. + * + * There are a few other macros defined in arch headers. See x64.h as + * an example. + * + */ +#ifdef NEATCC_ARM +#include "arm.h" +#endif +#ifdef NEATCC_X64 +#include "x64.h" +#endif +#ifdef NEATCC_X86 +#include "x86.h" +#endif + +/* intermediate instructions */ +#define O_IMM 0x200 /* mask for immediate instructions */ +#define O_MSET 0x51 /* memset() */ +#define O_MCPY 0x52 /* memcpy() */ +#define O_MOV 0x53 /* mov */ +#define O_SX 0x54 /* sign extend */ +#define O_ZX 0x55 /* zero extend */ + +void i_load(int rd, int rn, int off, int bt); +void i_save(int rd, int rn, int off, int bt); +void i_mov(int rd, int rn); +void i_reg(int op, int *md, int *m1, int *m2, int *mt); +void i_op(int op, int rd, int r1, int r2); +int i_imm(int op, long imm); +void i_op_imm(int op, int rd, int r1, long n); + +void i_num(int rd, long n); +void i_sym(int rd, char *sym, int off); + +void i_jmp(int rn, int jc, int nbytes); +long i_fill(long src, long dst, int nbytes); + +void i_call(char *sym, int off); +void i_call_reg(int rd); +void i_memset(int r0, int r1, int r2); +void i_memcpy(int r0, int r1, int r2); + +int i_args(void); /* the address of the first arg relative to fp */ +int i_sp(void); /* the address of the first local relative to fp */ + +void i_prolog(int argc, int varg, int sargs, int sregs, int initfp, int subsp); +void i_epilog(int sp_max); +void i_done(void); + +extern int tmpregs[]; +extern int argregs[]; + +/* code segment text */ +extern char cs[]; /* code segment */ +extern int cslen; /* code segment length */ +extern int pass1; /* first pass */ + +void os(void *s, int n); +void oi(long n, int l); diff --git a/ncc.c b/ncc.c index c3ed157..3849639 100644 --- a/ncc.c +++ b/ncc.c @@ -1,12 +1,13 @@ /* * neatcc - a small and simple C compiler * - * Copyright (C) 2010-2011 Ali Gholami Rudi + * Copyright (C) 2010-2012 Ali Gholami Rudi * * This program is released under GNU GPL version 2. */ #include #include +#include #include #include #include @@ -52,6 +53,7 @@ static int nogen; /* don't generate code */ #define TYPE_BT(t) ((t)->ptr ? LONGSZ : (t)->bt) #define TYPE_SZ(t) ((t)->ptr ? LONGSZ : (t)->bt & BT_SZMASK) +#define TYPE_VOID(t) (!(t)->bt && !(t)->flags && !(t)->ptr) /* type->flag values */ #define T_ARRAY 0x01 @@ -101,8 +103,13 @@ static void ts_pop(struct type *type) *type = ts[nts]; } -void err(char *msg) +void err(char *fmt, ...) { + va_list ap; + char msg[512]; + va_start(ap, fmt); + vsprintf(msg, fmt, ap); + va_end(ap); die("%s: %s", cpp_loc(tok_addr()), msg); } @@ -272,6 +279,7 @@ static struct name *struct_field(int id, char *name) if (!strcmp(name, si->fields[i].name)) return &si->fields[i]; err("field not found\n"); + return NULL; } /* return t's size */ @@ -337,7 +345,7 @@ static unsigned bt_op(unsigned bt1, unsigned bt2) { unsigned s1 = BT_SZ(bt1); unsigned s2 = BT_SZ(bt2); - return (bt1 | bt2) & BT_SIGNED | (s1 > s2 ? s1 : s2); + return ((bt1 | bt2) & BT_SIGNED) | (s1 > s2 ? s1 : s2); } static void ts_binop(int op) @@ -523,7 +531,7 @@ static void readprimary(void) return; } if (tok_see() != '(') - err("unknown symbol\n"); + err("unkown symbol <%s>\n", name); global_add(&unkn); ts_push_bt(LONGSZ); o_sym(unkn.name); @@ -1037,11 +1045,13 @@ static void readcexpr(void) if (readcexpr_const()) { int l_fail = LABEL(); int l_end = LABEL(); + struct type ret; o_jz(l_fail); readcexpr(); /* both branches yield the same type; so ignore the first */ - ts_pop_de(NULL); - o_forkpush(); + ts_pop_de(&ret); + if (!TYPE_VOID(&ret)) + o_forkpush(); o_jmp(l_end); tok_expect(':'); @@ -1049,8 +1059,10 @@ static void readcexpr(void) readcexpr(); /* making sure t->addr == 0 on both branches */ ts_de(1); - o_forkpush(); - o_forkjoin(); + if (!TYPE_VOID(&ret)) { + o_forkpush(); + o_forkjoin(); + } o_label(l_end); } ncexpr--; @@ -1229,7 +1241,7 @@ static char func_name[NAMELEN]; static void localdef(void *data, struct name *name, unsigned flags) { struct type *t = &name->type; - if ((flags & F_EXTERN) || (t->flags & T_FUNC) && !t->ptr) { + if ((flags & F_EXTERN) || ((t->flags & T_FUNC) && !t->ptr)) { global_add(name); return; } @@ -1503,6 +1515,7 @@ static void readstmt(void) static void readfunc(struct name *name, int flags) { struct funcinfo *fi = &funcs[name->type.id]; + long beg = tok_addr(); int i; strcpy(func_name, fi->name); o_func_beg(func_name, fi->nargs, F_GLOBAL(flags), fi->varg); @@ -1511,6 +1524,12 @@ static void readfunc(struct name *name, int flags) strcpy(arg.name, fi->argnames[i]); local_add(&arg); } + /* first pass: collecting statistics */ + o_pass1(); + readstmt(); + tok_jump(beg); + /* second pass: generating code */ + o_pass2(); readstmt(); o_func_end(); func_name[0] = '\0'; @@ -1540,11 +1559,7 @@ static void compat_macros(void) { cpp_define("__STDC__", ""); cpp_define("__linux__", ""); -#ifdef NEATCC_ARM - cpp_define("__arm__", ""); -#else - cpp_define("__x86_64__", ""); -#endif + cpp_define(I_ARCH, ""); /* ignored keywords */ cpp_define("const", ""); @@ -1910,7 +1925,7 @@ static int initsize(void) long addr = tok_addr(); int n = 0; if (tok_jmp('=')) - err("array size unspecified"); + return 0; if (!tok_jmp(TOK_STR)) { n = tok_str(NULL); tok_jump(addr); diff --git a/neatcc b/neatcc index d09e148..1f83899 100755 --- a/neatcc +++ b/neatcc @@ -1,7 +1,7 @@ #!/bin/sh NEAT="/path/to/neatlibc" CC="/path/to/neatcc/ncc" -LD="/path/to/neatld/ld" +LD="/path/to/neatld/nld" CPPFLAGS="-Dfloat=long -Ddouble=long -D__extension__=" diff --git a/out.c b/out.c index e9da0fc..a5571eb 100644 --- a/out.c +++ b/out.c @@ -2,31 +2,60 @@ #include #include #include "out.h" +#include "gen.h" #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) #define MAXSYMS (1 << 12) -#define MAXRELA (1 << 12) +#define MAXREL (1 << 12) #define SEC_TEXT 1 -#define SEC_RELA 2 +#define SEC_REL 2 #define SEC_SYMS 3 #define SEC_SYMSTR 4 #define SEC_DAT 5 -#define SEC_DATRELA 6 +#define SEC_DATREL 6 #define SEC_BSS 7 #define NSECS 8 -static Elf64_Ehdr ehdr; -static Elf64_Shdr shdr[NSECS]; -static Elf64_Sym syms[MAXSYMS]; +/* simplifed elf struct and macro names */ +#if LONGSZ == 8 +# define USERELA 1 +# define Elf_Ehdr Elf64_Ehdr +# define Elf_Shdr Elf64_Shdr +# define Elf_Sym Elf64_Sym +# define Elf_Rel Elf64_Rela +# define ELF_ST_INFO ELF64_ST_INFO +# define ELF_ST_BIND ELF64_ST_BIND +# define ELF_R_SYM ELF64_R_SYM +# define ELF_R_TYPE ELF64_R_TYPE +# define ELF_R_INFO ELF64_R_INFO +#else +# define USERELA 0 +# define Elf_Ehdr Elf32_Ehdr +# define Elf_Shdr Elf32_Shdr +# define Elf_Sym Elf32_Sym +# define Elf_Rel Elf32_Rel +# define ELF_ST_INFO ELF32_ST_INFO +# define ELF_ST_BIND ELF32_ST_BIND +# define ELF_R_SYM ELF32_R_SYM +# define ELF_R_TYPE ELF32_R_TYPE +# define ELF_R_INFO ELF32_R_INFO +#endif + +static Elf_Ehdr ehdr; +static Elf_Shdr shdr[NSECS]; +static Elf_Sym syms[MAXSYMS]; static int nsyms = 1; static char symstr[MAXSYMS * 8]; static int nsymstr = 1; -static Elf64_Rela datrela[MAXRELA]; -static int ndatrela; -static Elf64_Rela rela[MAXRELA]; -static int nrela; +static Elf_Rel dsrels[MAXREL]; +static int ndsrels; +static Elf_Rel rels[MAXREL]; +static int nrels; + +static int rel_type(int flags); +static void ehdr_init(Elf_Ehdr *ehdr); static int symstr_add(char *name) { @@ -45,27 +74,27 @@ static int sym_find(char *name) return -1; } -static Elf64_Sym *put_sym(char *name) +static Elf_Sym *put_sym(char *name) { int found = sym_find(name); - Elf64_Sym *sym = found != -1 ? &syms[found] : &syms[nsyms++]; + Elf_Sym *sym = found != -1 ? &syms[found] : &syms[nsyms++]; if (found >= 0) return sym; sym->st_name = symstr_add(name); sym->st_shndx = SHN_UNDEF; - sym->st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC); + sym->st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC); return sym; } -#define SYMLOCAL(i) (ELF64_ST_BIND(syms[i].st_info) == STB_LOCAL) +#define SYMLOCAL(i) (ELF_ST_BIND(syms[i].st_info) == STB_LOCAL) -static void mvrela(int *mv, Elf64_Rela *rela, int nrela) +static void mvrela(int *mv, Elf_Rel *rels, int nrels) { int i; - for (i = 0; i < nrela; i++) { - int sym = ELF64_R_SYM(rela[i].r_info); - int type = ELF64_R_TYPE(rela[i].r_info); - rela[i].r_info = ELF64_R_INFO(mv[sym], type); + for (i = 0; i < nrels; i++) { + int sym = ELF_R_SYM(rels[i].r_info); + int type = ELF_R_TYPE(rels[i].r_info); + rels[i].r_info = ELF_R_INFO(mv[sym], type); } } @@ -79,7 +108,7 @@ static int syms_sort(void) i = 1; j = nsyms - 1; while (1) { - Elf64_Sym t; + Elf_Sym t; while (i < j && SYMLOCAL(i)) i++; while (j >= i && !SYMLOCAL(j)) @@ -93,8 +122,8 @@ static int syms_sort(void) mv[j] = i; } glob_beg = j + 1; - mvrela(mv, rela, nrela); - mvrela(mv, datrela, ndatrela); + mvrela(mv, rels, nrels); + mvrela(mv, dsrels, ndsrels); return glob_beg; } @@ -104,7 +133,7 @@ void out_init(int flags) void out_sym(char *name, int flags, int off, int len) { - Elf64_Sym *sym = put_sym(name); + Elf_Sym *sym = put_sym(name); int type = (flags & OUT_CS) ? STT_FUNC : STT_OBJECT; int bind = (flags & OUT_GLOB) ? STB_GLOBAL : STB_LOCAL; if (flags & OUT_CS) @@ -113,33 +142,28 @@ void out_sym(char *name, int flags, int off, int len) sym->st_shndx = SEC_DAT; if (flags & OUT_BSS) sym->st_shndx = SEC_BSS; - sym->st_info = ELF64_ST_INFO(bind, type); + sym->st_info = ELF_ST_INFO(bind, type); sym->st_value = off; sym->st_size = len; } -static int rel_type(int flags) -{ - return flags & OUT_REL ? R_X86_64_PC32 : R_X86_64_64; -} - static void out_csrel(int idx, int off, int flags) { - Elf64_Rela *r = &rela[nrela++]; + Elf_Rel *r = &rels[nrels++]; r->r_offset = off; - r->r_info = ELF64_R_INFO(idx, rel_type(flags)); + r->r_info = ELF_R_INFO(idx, rel_type(flags)); } static void out_dsrel(int idx, int off, int flags) { - Elf64_Rela *r = &datrela[ndatrela++]; + Elf_Rel *r = &dsrels[ndsrels++]; r->r_offset = off; - r->r_info = ELF64_R_INFO(idx, rel_type(flags)); + r->r_info = ELF_R_INFO(idx, rel_type(flags)); } void out_rel(char *name, int flags, int off) { - Elf64_Sym *sym = put_sym(name); + Elf_Sym *sym = put_sym(name); int idx = sym - syms; if (flags & OUT_DS) out_dsrel(idx, off, flags); @@ -162,30 +186,30 @@ static int bss_len(void) void out_write(int fd, char *cs, int cslen, char *ds, int dslen) { - Elf64_Shdr *text_shdr = &shdr[SEC_TEXT]; - Elf64_Shdr *rela_shdr = &shdr[SEC_RELA]; - Elf64_Shdr *symstr_shdr = &shdr[SEC_SYMSTR]; - Elf64_Shdr *syms_shdr = &shdr[SEC_SYMS]; - Elf64_Shdr *dat_shdr = &shdr[SEC_DAT]; - Elf64_Shdr *datrela_shdr = &shdr[SEC_DATRELA]; - Elf64_Shdr *bss_shdr = &shdr[SEC_BSS]; + Elf_Shdr *text_shdr = &shdr[SEC_TEXT]; + Elf_Shdr *rela_shdr = &shdr[SEC_REL]; + Elf_Shdr *symstr_shdr = &shdr[SEC_SYMSTR]; + Elf_Shdr *syms_shdr = &shdr[SEC_SYMS]; + Elf_Shdr *dat_shdr = &shdr[SEC_DAT]; + Elf_Shdr *datrel_shdr = &shdr[SEC_DATREL]; + Elf_Shdr *bss_shdr = &shdr[SEC_BSS]; unsigned long offset = sizeof(ehdr); /* workaround for the idiotic gnuld; use neatld instead! */ text_shdr->sh_name = symstr_add(".cs"); - rela_shdr->sh_name = symstr_add(".rel.cs"); + rela_shdr->sh_name = symstr_add(USERELA ? ".rela.cs" : ".rels.cs"); dat_shdr->sh_name = symstr_add(".ds"); - datrela_shdr->sh_name = symstr_add(".rel.ds"); + datrel_shdr->sh_name = symstr_add(USERELA ? ".rela.ds" : ".rels.ds"); ehdr.e_ident[0] = 0x7f; ehdr.e_ident[1] = 'E'; ehdr.e_ident[2] = 'L'; ehdr.e_ident[3] = 'F'; - ehdr.e_ident[4] = ELFCLASS64; + ehdr.e_ident[4] = LONGSZ == 8 ? ELFCLASS64 : ELFCLASS32; ehdr.e_ident[5] = ELFDATA2LSB; ehdr.e_ident[6] = EV_CURRENT; ehdr.e_type = ET_REL; - ehdr.e_machine = EM_X86_64; + ehdr_init(&ehdr); ehdr.e_version = EV_CURRENT; ehdr.e_ehsize = sizeof(ehdr); ehdr.e_shentsize = sizeof(shdr[0]); @@ -202,12 +226,12 @@ void out_write(int fd, char *cs, int cslen, char *ds, int dslen) text_shdr->sh_addralign = OUT_ALIGNMENT; offset += text_shdr->sh_size; - rela_shdr->sh_type = SHT_RELA; + rela_shdr->sh_type = USERELA ? SHT_RELA : SHT_REL; rela_shdr->sh_link = SEC_SYMS; rela_shdr->sh_info = SEC_TEXT; rela_shdr->sh_offset = offset; - rela_shdr->sh_size = nrela * sizeof(rela[0]); - rela_shdr->sh_entsize = sizeof(rela[0]); + rela_shdr->sh_size = nrels * sizeof(rels[0]); + rela_shdr->sh_entsize = sizeof(rels[0]); offset += rela_shdr->sh_size; syms_shdr->sh_type = SHT_SYMTAB; @@ -226,13 +250,13 @@ void out_write(int fd, char *cs, int cslen, char *ds, int dslen) dat_shdr->sh_addralign = OUT_ALIGNMENT; offset += dat_shdr->sh_size; - datrela_shdr->sh_type = SHT_RELA; - datrela_shdr->sh_offset = offset; - datrela_shdr->sh_size = ndatrela * sizeof(datrela[0]); - datrela_shdr->sh_entsize = sizeof(datrela[0]); - datrela_shdr->sh_link = SEC_SYMS; - datrela_shdr->sh_info = SEC_DAT; - offset += datrela_shdr->sh_size; + datrel_shdr->sh_type = USERELA ? SHT_RELA : SHT_REL; + datrel_shdr->sh_offset = offset; + datrel_shdr->sh_size = ndsrels * sizeof(dsrels[0]); + datrel_shdr->sh_entsize = sizeof(dsrels[0]); + datrel_shdr->sh_link = SEC_SYMS; + datrel_shdr->sh_info = SEC_DAT; + offset += datrel_shdr->sh_size; bss_shdr->sh_type = SHT_NOBITS; bss_shdr->sh_flags = SHF_ALLOC | SHF_WRITE; @@ -250,9 +274,55 @@ void out_write(int fd, char *cs, int cslen, char *ds, int dslen) write(fd, &ehdr, sizeof(ehdr)); write(fd, shdr, NSECS * sizeof(shdr[0])); write(fd, cs, cslen); - write(fd, rela, nrela * sizeof(rela[0])); + write(fd, rels, nrels * sizeof(rels[0])); write(fd, syms, nsyms * sizeof(syms[0])); write(fd, ds, dslen); - write(fd, datrela, ndatrela * sizeof(datrela[0])); + write(fd, dsrels, ndsrels * sizeof(dsrels[0])); write(fd, symstr, nsymstr); } + +/* architecture dependent functions */ + +#ifdef NEATCC_ARM +static void ehdr_init(Elf_Ehdr *ehdr) +{ + ehdr->e_machine = EM_ARM; + ehdr->e_flags = EF_ARM_EABI_VER4; +} + +static int rel_type(int flags) +{ + if (flags & OUT_RL24) + return R_ARM_PC24; + return flags & OUT_RLREL ? R_ARM_REL32 : R_ARM_ABS32; + +} +#endif + +#ifdef NEATCC_X64 +static void ehdr_init(Elf_Ehdr *ehdr) +{ + ehdr->e_machine = EM_X86_64; +} + +static int rel_type(int flags) +{ + if (flags & OUT_RLREL) + return R_X86_64_PC32; + if (flags & OUT_RL32) + return flags & OUT_RLSX ? R_X86_64_32S : R_X86_64_32; + return R_X86_64_64; +} +#endif + +#ifdef NEATCC_X86 +static void ehdr_init(Elf_Ehdr *ehdr) +{ + ehdr->e_machine = EM_386; +} + +static int rel_type(int flags) +{ + return flags & OUT_RLREL ? R_386_PC32 : R_386_32; +} +#endif diff --git a/out.h b/out.h index f199dbf..3eba142 100644 --- a/out.h +++ b/out.h @@ -4,8 +4,10 @@ #define OUT_GLOB 0x0010 /* global symbol */ -#define OUT_REL 0x0100 /* relative relocation */ -#define OUT_REL24 0x0200 /* 24-bit relative relocation */ +#define OUT_RLREL 0x0020 /* relative relocation */ +#define OUT_RLSX 0x0040 /* sign extend relocation */ +#define OUT_RL24 0x0400 /* 3-byte relocation */ +#define OUT_RL32 0x0800 /* 4-byte relocation */ #define OUT_ALIGNMENT 16 /* amount of section alignment */ diff --git a/tok.h b/tok.h index faac4af..ea546cb 100644 --- a/tok.h +++ b/tok.h @@ -54,3 +54,4 @@ char *cpp_loc(long addr); int cpp_read(char *s); void die(char *msg, ...); +void err(char *fmt, ...); diff --git a/x64.c b/x64.c new file mode 100644 index 0000000..f0e7141 --- /dev/null +++ b/x64.c @@ -0,0 +1,595 @@ +#include "tok.h" +#include "gen.h" +#include "out.h" + +/* registers */ +#define R_RAX 0x00 +#define R_RCX 0x01 +#define R_RDX 0x02 +#define R_RBX 0x03 +#define R_RSP 0x04 +#define R_RBP 0x05 +#define R_RSI 0x06 +#define R_RDI 0x07 +/* x86_64 registers */ +#define R_R8 0x08 +#define R_R9 0x09 +#define R_R10 0x0a +#define R_R11 0x0b +#define R_R12 0x0c +#define R_R13 0x0d +#define R_R14 0x0e +#define R_R15 0x0f + +/* x86 opcodes */ +#define I_MOV 0x89 +#define I_MOVI 0xc7 +#define I_MOVIR 0xb8 +#define I_MOVR 0x8b +#define I_MOVSXD 0x63 +#define I_SHX 0xd3 +#define I_CMP 0x3b +#define I_TST 0x85 +#define I_LEA 0x8d +#define I_NOT 0xf7 +#define I_CALL 0xff +#define I_MUL 0xf7 +#define I_XOR 0x33 +#define I_TEST 0x85 +#define I_CQO 0x99 +#define I_PUSH 0x50 +#define I_POP 0x58 + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) + +int tmpregs[] = {0, 7, 6, 2, 1, 8, 9, 10, 11, 3, 12, 13, 14, 15}; +int argregs[] = {7, 6, 2, 1, 8, 9}; + +#define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1)) +#define O2(op) (((op) >> 8) & 0xff) +#define O1(op) ((op) & 0xff) +#define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2)) +#define REX(r1, r2) (0x48 | (((r1) & 8) >> 1) | (((r2) & 8) >> 3)) + +static void putint(char *s, long n, int l) +{ + while (l--) { + *s++ = n; + n >>= 8; + } +} + +static void op_x(int op, int r1, int r2, int bt) +{ + int sz = BT_SZ(bt); + int rex = 0; + if (sz == 8) + rex |= 8; + if (sz == 1) + rex |= 0x40; + if (r1 & 0x8) + rex |= 4; + if (r2 & 0x8) + rex |= 1; + if (sz == 2) + oi(0x66, 1); + if (rex) + oi(rex | 0x40, 1); + if (op & 0x10000) + oi(O2(op), 1); + oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1); +} + +#define op_mr op_rm + +/* op_*(): r=reg, m=mem, i=imm, s=sym */ +static void op_rm(int op, int src, int base, int off, int bt) +{ + int dis = off == (char) off ? 1 : 4; + int mod = dis == 4 ? 2 : 1; + if (!off && (base & 7) != R_RBP) + mod = 0; + op_x(op, src, base, bt); + oi(MODRM(mod, src & 0x07, base & 0x07), 1); + if ((base & 7) == R_RSP) + oi(0x24, 1); + if (mod) + oi(off, dis); +} + +static void op_rr(int op, int src, int dst, int bt) +{ + op_x(op, src, dst, bt); + oi(MODRM(3, src & 0x07, dst & 0x07), 1); +} + +#define movrx_bt(bt) (((bt) == 4) ? 4 : LONGSZ) + +static int movrx_op(int bt, int mov) +{ + int sz = BT_SZ(bt); + if (sz == 4) + return bt & BT_SIGNED ? I_MOVSXD : mov; + if (sz == 2) + return OP2(0x0f, bt & BT_SIGNED ? 0xbf : 0xb7); + if (sz == 1) + return OP2(0x0f, bt & BT_SIGNED ? 0xbe : 0xb6); + return mov; +} + +static void mov_r2r(int rd, int r1, unsigned bt) +{ + if (rd != r1 || BT_SZ(bt) != LONGSZ) + op_rr(movrx_op(bt, I_MOVR), rd, r1, movrx_bt(bt)); +} + +static void mov_m2r(int dst, int base, int off, int bt) +{ + op_rm(movrx_op(bt, I_MOVR), dst, base, off, movrx_bt(bt)); +} + +int i_imm(int op, long imm) +{ + if ((op & 0xf0) == 0x20) + return 0; + return imm <= 127 && imm >= -128; +} + +static void i_push(int reg) +{ + op_x(I_PUSH | (reg & 0x7), 0, reg, 4); +} + +static void i_pop(int reg) +{ + op_x(I_POP | (reg & 0x7), 0, reg, 4); +} + +void i_mov(int rd, int rn) +{ + op_rr(movrx_op(LONGSZ, I_MOVR), rd, rn, movrx_bt(LONGSZ)); +} + +void i_load(int rd, int rn, int off, int bt) +{ + mov_m2r(rd, rn, off, bt); +} + +void i_save(int rd, int rn, int off, int bt) +{ + op_rm(I_MOV, rd, rn, off, bt); +} + +void i_reg(int op, int *rd, int *r1, int *r2, int *tmp) +{ + *rd = 0; + *r1 = R_TMPS; + *r2 = op & O_IMM ? 0 : R_TMPS; + *tmp = 0; + if ((op & 0xf0) == 0x00) /* add */ + return; + if ((op & 0xf0) == 0x10) { /* shl */ + if (~op & O_IMM) { + *r2 = 1 << R_RCX; + *r1 = R_TMPS & ~*r2; + } + return; + } + if ((op & 0xf0) == 0x20) { /* mul */ + *rd = (op & 0xff) == O_MOD ? (1 << R_RDX) : (1 << R_RAX); + *r1 = (1 << R_RAX); + *r2 = R_TMPS & ~*rd & ~*r1; + if ((op & 0xff) == O_DIV) + *r2 &= ~(1 << R_RDX); + *tmp = (1 << R_RDX) | (1 << R_RAX); + return; + } + if ((op & 0xf0) == 0x30) { /* cmp */ + *rd = 1 << R_RAX; + return; + } + if ((op & 0xf0) == 0x40) { /* uop */ + *r2 = 0; + if ((op & 0xff) == O_LNOT) + *r1 = 1 << R_RAX; + return; + } + if ((op & 0xf0) == 0x50) { /* etc */ + if (op == O_MSET) { + *rd = 1 << R_RDI; + *r1 = 1 << R_RAX; + *r2 = 1 << R_RCX; + } + if (op == O_MCPY) { + *rd = 1 << R_RDI; + *r1 = 1 << R_RSI; + *r2 = 1 << R_RCX; + } + if (op == O_SX || op == O_ZX || op == O_MOV) { + *rd = R_TMPS; + *r2 = 0; + } + return; + } +} + +static void i_add(int op, int rd, int r1, int r2) +{ + /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */ + static int rx[] = {0003, 0053, 0043, 0013, 0063}; + op_rr(rx[op & 0x0f], rd, r2, LONGSZ); +} + +static void i_add_imm(int op, int rd, int rn, long n) +{ + /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */ + static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0}; + unsigned char s[4] = {REX(0, rd), 0x83, rx[op & 0x0f] | (rd & 7), n & 0xff}; + os((void *) s, 4); +} + +void i_num(int rd, long n) +{ + if (!n) { + op_rr(I_XOR, rd, rd, 4); + return; + } + if (n < 0 && -n <= 0xffffffff) { + op_rr(I_MOVI, 0, rd, LONGSZ); + oi(n, 4); + } else { + int len = 8; + if (n > 0 && n <= 0xffffffff) + len = 4; + op_x(I_MOVIR + (rd & 7), 0, rd, len); + oi(n, len); + } +} + +static void i_mul(int rd, int r1, int r2) +{ + if (r2 != R_RDX) + i_num(R_RDX, 0); + op_rr(I_MUL, 4, r2, LONGSZ); +} + +static void i_div(int op, int rd, int r1, int r2) +{ + if (r2 != R_RDX) { + if (op & O_SIGNED) + op_x(I_CQO, R_RAX, R_RDX, LONGSZ); + else + i_num(R_RDX, 0); + } + op_rr(I_MUL, op & O_SIGNED ? 7 : 6, r2, LONGSZ); +} + +static void i_tst(int rn, int rm) +{ + op_rr(I_TST, rn, rm, LONGSZ); +} + +static void i_cmp(int rn, int rm) +{ + op_rr(I_CMP, rn, rm, LONGSZ); +} + +static void i_cmp_imm(int rn, long n) +{ + unsigned char s[4] = {REX(0, rn), 0x83, 0xf8 | rn, n & 0xff}; + os(s, 4); +} + +static void i_shl(int op, int rd, int r1, int rs) +{ + int sm = 4; + if ((op & 0x0f) == 1) + sm = op & O_SIGNED ? 7 : 5; + op_rr(I_SHX, sm, rd, LONGSZ); +} + +static void i_shl_imm(int op, int rd, int rn, long n) +{ + int sm = (op & 0x1) ? (op & O_SIGNED ? 0xf8 : 0xe8) : 0xe0 ; + char s[4] = {REX(0, rn), 0xc1, sm | (rn & 7), n & 0xff}; + os(s, 4); +} + +void i_sym(int rd, char *sym, int off) +{ + int sz = X64_ABS_RL & OUT_RL32 ? 4 : LONGSZ; + if (X64_ABS_RL & OUT_RLSX) + op_rr(I_MOVI, 0, rd, sz); + else + op_x(I_MOVIR + (rd & 7), 0, rd, sz); + if (!pass1) + out_rel(sym, OUT_CS | X64_ABS_RL, cslen); + oi(off, sz); +} + +static void i_neg(int rd) +{ + op_rr(I_NOT, 3, rd, LONGSZ); +} + +static void i_not(int rd) +{ + op_rr(I_NOT, 2, rd, LONGSZ); +} + +/* for optimizing cmp + tst + jmp to cmp + jmp */ +#define OPT_ISCMP() (last_set >= 0 && last_set + 7 == cslen) +#define OPT_CCOND() (cs[last_set + 1]) + +static long last_set = -1; + +static void i_set(int op, int rd) +{ + /* lt, gt, le, ge, eq, neq */ + static int ucond[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95}; + static int scond[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95}; + int cond = op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f]; + char set[] = "\x0f\x00\xc0"; + set[1] = cond; + last_set = cslen; + os(set, 3); /* setl al */ + os("\x48\x0f\xb6\xc0", 4); /* movzx rax, al */ +} + +static void i_lnot(int rd) +{ + if (OPT_ISCMP()) { + cs[last_set + 1] ^= 0x01; + } else { + char cmp[] = "\x00\x83\xf8\x00"; + cmp[0] = REX(0, rd); + cmp[2] |= rd & 7; + os(cmp, 4); /* cmp rax, 0 */ + i_set(O_EQ, rd); + } +} + +static void jx(int x, int nbytes) +{ + char op[2] = {0x0f}; + if (nbytes == 1) { + op[0] = 0x70 | (x & 0x0f); + os(op, 1); /* jx $addr */ + } else { + op[1] = x; + os(op, 2); /* jx $addr */ + } + oi(0, nbytes); +} + +void i_jmp(int rn, int z, int nbytes) +{ + if (!nbytes) + return; + if (nbytes > 1) + nbytes = 4; + if (rn >= 0) { + if (OPT_ISCMP()) { + int cond = OPT_CCOND(); + cslen = last_set; + jx((!z ? cond : cond ^ 0x01) & ~0x10, nbytes); + last_set = -1; + } else { + i_tst(rn, rn); + jx(z ? 0x84 : 0x85, nbytes); + } + } else { + os(nbytes == 1 ? "\xeb" : "\xe9", 1); /* jmp $addr */ + oi(0, nbytes); + } +} + +long i_fill(long src, long dst, int nbytes) +{ + if (!nbytes) + return 0; + if (nbytes > 1) + nbytes = 4; + putint((void *) (cs + src - nbytes), dst - src, nbytes); + return dst - src; +} + +static void i_zx(int rd, int r1, int bits) +{ + if (bits & 0x07) { + i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits); + i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits); + } else { + mov_r2r(rd, r1, bits >> 3); + } +} + +static void i_sx(int rd, int r1, int bits) +{ + mov_r2r(rd, r1, BT_SIGNED | (bits >> 3)); +} + +void i_op(int op, int rd, int r1, int r2) +{ + if ((op & 0xf0) == 0x00) + i_add(op, r1, r1, r2); + if ((op & 0xf0) == 0x10) + i_shl(op, r1, r1, r2); + if ((op & 0xf0) == 0x20) { + if ((op & 0xff) == O_MUL) + i_mul(R_RAX, r1, r2); + if ((op & 0xff) == O_DIV) + i_div(op, R_RAX, r1, r2); + if ((op & 0xff) == O_MOD) + i_div(op, R_RDX, r1, r2); + return; + } + if ((op & 0xf0) == 0x30) { + i_cmp(r1, r2); + i_set(op, rd); + return; + } + if ((op & 0xf0) == 0x40) { /* uop */ + if ((op & 0xff) == O_NEG) + i_neg(r1); + if ((op & 0xff) == O_NOT) + i_not(r1); + if ((op & 0xff) == O_LNOT) + i_lnot(r1); + return; + } +} + +static void i_add_anyimm(int rd, int rn, long n) +{ + op_rm(I_LEA, rd, rn, n, LONGSZ); +} + +void i_op_imm(int op, int rd, int r1, long n) +{ + if ((op & 0xf0) == 0x00) { /* add */ + if (rd == r1 && i_imm(O_ADD, n)) + i_add_imm(op, rd, r1, n); + else + i_add_anyimm(rd, r1, n); + } + if ((op & 0xf0) == 0x10) /* shl */ + i_shl_imm(op, rd, r1, n); + if ((op & 0xf0) == 0x20) /* mul */ + die("mul/imm not implemented"); + if ((op & 0xf0) == 0x30) { /* imm */ + i_cmp_imm(r1, n); + i_set(op, rd); + } + if ((op & 0xf0) == 0x50) { /* etc */ + if ((op & 0xff) == O_ZX) + i_zx(rd, r1, n); + if ((op & 0xff) == O_SX) + i_sx(rd, r1, n); + if ((op & 0xff) == O_MOV) + i_mov(rd, r1); + } +} + +void i_memcpy(int r0, int r1, int r2) +{ + os("\xfc\xf3\xa4", 3); /* cld; rep movs */ +} + +void i_memset(int r0, int r1, int r2) +{ + os("\xfc\xf3\xaa", 3); /* cld; rep stosb */ +} + +void i_call_reg(int rd) +{ + op_rr(I_CALL, 2, rd, LONGSZ); +} + +void i_call(char *sym, int off) +{ + os("\xe8", 1); /* call $x */ + if (!pass1) + out_rel(sym, OUT_CS | OUT_RLREL, cslen); + oi(-4 + off, 4); +} + +static int func_argc; +static int func_varg; +static int func_spsub; +static int func_sargs; +static int func_sregs; +static int func_initfp; +static int spsub_addr; + +int i_args(void) +{ + return 16; +} + +int i_sp(void) +{ + int i; + int n = 0; + for (i = 0; i < N_TMPS; i++) + if ((1 << tmpregs[i]) & func_sregs) + n += 8; + return -n; +} + +static void i_saveargs(void) +{ + int i; + os("\x58", 1); /* pop rax */ + for (i = N_ARGS - 1; i >= 0; i--) + if ((1 << argregs[i]) & func_sargs) + i_push(argregs[i]); + os("\x50", 1); /* push rax */ +} + +void i_prolog(int argc, int varg, int sargs, int sregs, int initfp, int subsp) +{ + int i; + last_set = -1; + func_argc = argc; + func_varg = varg; + func_sargs = sargs; + func_sregs = sregs; + func_initfp = initfp; + func_spsub = subsp; + if (func_sargs) + i_saveargs(); + if (initfp) { + os("\x55", 1); /* push rbp */ + os("\x48\x89\xe5", 3); /* mov rbp, rsp */ + } + if (func_sregs) { + for (i = N_TMPS - 1; i >= 0; i--) + if ((1 << tmpregs[i]) & func_sregs) + i_push(tmpregs[i]); + } + if (func_spsub) { + os("\x48\x81\xec", 3); /* sub rsp, $xxx */ + spsub_addr = cslen; + oi(0, 4); + } +} + +void i_epilog(int sp_max) +{ + int diff; + int nsregs = 0; + int nsargs = 0; + int i; + for (i = 0; i < N_TMPS; i++) + if ((1 << tmpregs[i]) & func_sregs) + nsregs++; + for (i = 0; i < N_ARGS; i++) + if ((1 << argregs[i]) & func_sargs) + nsargs++; + diff = ALIGN(-sp_max - nsregs * LONGSZ, 16); + /* forcing 16-byte alignment */ + diff = (nsregs + nsargs) & 1 ? diff + LONGSZ : diff; + if (func_spsub && diff) { + i_add_anyimm(R_RSP, R_RBP, -nsregs * LONGSZ); + putint(cs + spsub_addr, diff, 4); + } + if (func_sregs) { + for (i = 0; i < N_TMPS; i++) + if ((1 << tmpregs[i]) & func_sregs) + i_pop(tmpregs[i]); + } + if (func_initfp) + os("\xc9", 1); /* leave */ + if (func_sargs) { + os("\xc2", 1); /* ret n */ + oi(nsargs * LONGSZ, 2); + } else { + os("\xc3", 1); /* ret */ + } +} + +void i_done(void) +{ +} diff --git a/x64.h b/x64.h new file mode 100644 index 0000000..0157312 --- /dev/null +++ b/x64.h @@ -0,0 +1,21 @@ +#define LONGSZ 8 /* word size */ +#define I_ARCH "__x86_64__" + +#define N_REGS 16 /* number of registers */ +#define N_ARGS 6 /* number of arg registers */ +#define N_TMPS 14 /* number of tmp registers */ +#define R_TMPS 0xffcf /* mask of tmp registers */ +#define R_ARGS 0x03c6 /* mask of arg registers */ +#define R_SAVED 0xf008 /* mask of callee-saved registers */ + +#define R_CALL 0x0001 /* mask of regs than can hold call dst */ +#define R_BYTE R_TMPS /* mask of regs that can perform byte-wide instructions */ + +/* special registers */ +#define REG_FP 5 /* frame pointer register */ +#define REG_SP 4 /* stack pointer register */ +#define REG_RET 0 /* returned value register */ +#define REG_FORK 0 /* result of conditional branches */ + +/* memory model */ +#define X64_ABS_RL (OUT_RL32) diff --git a/x86.c b/x86.c dissimilarity index 81% index b03a2fa..4fd0b22 100644 --- a/x86.c +++ b/x86.c @@ -1,1313 +1,544 @@ -#include -#include -#include -#include "gen.h" -#include "out.h" -#include "tok.h" - -/* variable location */ -#define LOC_REG 0x01 -#define LOC_MEM 0x02 -#define LOC_NUM 0x04 -#define LOC_SYM 0x08 -#define LOC_LOCAL 0x10 - -/* special registers */ -#define REG_FP R_RBP -#define REG_SP R_RSP -#define REG_RET R_RAX -#define REG_FORK R_RAX - -/* registers */ -#define R_RAX 0x00 -#define R_RCX 0x01 -#define R_RDX 0x02 -#define R_RBX 0x03 -#define R_RSP 0x04 -#define R_RBP 0x05 -#define R_RSI 0x06 -#define R_RDI 0x07 -/* x86_64 registers */ -#define R_R8 0x08 -#define R_R9 0x09 -#define R_R10 0x0a -#define R_R11 0x0b -#define R_R12 0x0c -#define R_R13 0x0d -#define R_R14 0x0e -#define R_R15 0x0f -#define N_REGS 16 -#define N_ARGS ARRAY_SIZE(argregs) -#define N_TMPS ARRAY_SIZE(tmpregs) -#define R_TMPS 0xffcf -#define R_ARGS 0x03c6 -#define R_SAVED 0xf008 - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) -void err(char *msg); - -static char cs[SECSIZE]; /* code segment */ -static int cslen; -static char ds[SECSIZE]; /* data segment */ -static int dslen; -static long bsslen; /* bss segment size */ - -static long sp; /* stack pointer offset from R_RBP */ -static long sp_max; /* maximum stack pointer offset */ -static long sp_tmp; /* sp for the first tmp on the stack */ -static long func_beg; /* function address in CS */ -static long func_fpsub; /* stack pointer sub address in CS */ -static int func_argc; /* # of args */ -static int func_vararg; /* vararg function */ - -#define TMP(i) (((i) < ntmp) ? &tmps[ntmp - 1 - (i)] : NULL) - -static struct tmp { - long addr; - char sym[NAMELEN]; - long off; /* offset from a symbol or a local */ - unsigned loc; /* variable location */ - unsigned bt; /* type of address; zero when not a pointer */ -} tmps[MAXTMP]; -static int ntmp; - -/* arch-specific functions */ -static void i_ldr(int l, int rd, int rn, int off, int bt); -static void i_mov(int rd, int rn, int bt); -static void i_add(int op, int rd, int rn, int rm); -static void i_shl(int op, int rd, int rm, int rs); -static void i_mul(int rd, int rn, int rm); -static void i_div(int op, int rd, int rn, int rm); -static void i_cmp(int rn, int rm); -static int i_decodeable(long imm); -static void i_add_imm(int op, int rd, int rn, long n); -static void i_shl_imm(int op, int rd, int rn, long n); -static void i_cmp_imm(int rn, long n); -static void i_add_anyimm(int rd, int rn, long n); -static void i_num(int rd, long n); -static void i_sym(int rd, char *sym, int off); -static void i_set(int op, int rd); -static void i_neg(int rd); -static void i_not(int rd); -static void i_lnot(int rd); -static void i_zx(int rd, int bits); -static void i_sx(int rd, int bits); -static void i_b(void); -static void i_bz(int rn, int z); -static void i_b_fill(long src, long dst); -static void i_call(char *sym, int off); -static void i_call_reg(int rd); -static void i_prolog(void); -static void i_epilog(void); - -static struct tmp *regs[N_REGS]; -static int tmpregs[] = {R_RAX, R_RDI, R_RSI, R_RDX, R_RCX, R_R8, R_R9, - R_R10, R_R11}; -static int argregs[] = {R_RDI, R_RSI, R_RDX, R_RCX, R_R8, R_R9}; - -/* labels and jmps */ -#define MAXJMPS (1 << 14) - -static long labels[MAXJMPS]; -static int nlabels; -static long jmp_loc[MAXJMPS]; -static int jmp_goal[MAXJMPS]; -static int njmps; - -void o_label(int id) -{ - if (id > nlabels) - nlabels = id + 1; - labels[id] = cslen; -} - -static void jmp_add(int id) -{ - if (njmps >= MAXJMPS) - err("nomem: MAXJMPS reached!\n"); - jmp_loc[njmps] = cslen - 4; - jmp_goal[njmps] = id; - njmps++; -} - -static void jmp_fill(void) -{ - int i; - for (i = 0; i < njmps; i++) - i_b_fill(jmp_loc[i], labels[jmp_goal[i]]); -} - -/* generating code */ - -static void putint(char *s, long n, int l) -{ - while (l--) { - *s++ = n; - n >>= 8; - } -} - -static void os(void *s, int n) -{ - while (n--) - cs[cslen++] = *(char *) (s++); -} - -static void oi(long n, int l) -{ - while (l--) { - cs[cslen++] = n; - n >>= 8; - } -} - -static long sp_push(int size) -{ - sp += size; - if (sp > sp_max) - sp_max = sp; - return sp; -} - -static void tmp_mem(struct tmp *tmp) -{ - int src = tmp->addr; - if (tmp->loc != LOC_REG) - return; - if (sp_tmp == -1) - sp_tmp = sp; - tmp->addr = -sp_push(LONGSZ); - i_ldr(0, src, REG_FP, tmp->addr, LONGSZ); - regs[src] = NULL; - tmp->loc = LOC_MEM; -} - -static void num_cast(struct tmp *t, unsigned bt) -{ - if (!(bt & BT_SIGNED) && BT_SZ(bt) != LONGSZ) - t->addr &= ((1l << (long) (BT_SZ(bt) * 8)) - 1); - if (bt & BT_SIGNED && BT_SZ(bt) != LONGSZ && - t->addr > (1l << (BT_SZ(bt) * 8 - 1))) - t->addr = -((1l << (BT_SZ(bt) * 8)) - t->addr); -} - -static void tmp_reg(struct tmp *tmp, int dst, int deref) -{ - int bt = tmp->bt; - if (!tmp->bt) - deref = 0; - if (deref) - tmp->bt = 0; - if (tmp->loc == LOC_NUM) { - i_num(dst, tmp->addr); - tmp->addr = dst; - regs[dst] = tmp; - tmp->loc = LOC_REG; - } - if (tmp->loc == LOC_SYM) { - i_sym(dst, tmp->sym, tmp->off); - tmp->addr = dst; - regs[dst] = tmp; - tmp->loc = LOC_REG; - } - if (tmp->loc == LOC_REG) { - if (deref) - i_ldr(1, dst, tmp->addr, 0, bt); - else if (dst != tmp->addr) - i_mov(dst, tmp->addr, LONGSZ); - regs[tmp->addr] = NULL; - } - if (tmp->loc == LOC_LOCAL) { - if (deref) - i_ldr(1, dst, REG_FP, tmp->addr + tmp->off, bt); - else - i_add_anyimm(dst, REG_FP, tmp->addr + tmp->off); - } - if (tmp->loc == LOC_MEM) { - i_ldr(1, dst, REG_FP, tmp->addr, LONGSZ); - if (deref) - i_ldr(1, dst, dst, 0, bt); - } - tmp->addr = dst; - regs[dst] = tmp; - tmp->loc = LOC_REG; -} - -static void reg_free(int reg) -{ - int i; - if (!regs[reg]) - return; - for (i = 0; i < N_TMPS; i++) - if (!regs[tmpregs[i]]) { - tmp_reg(regs[reg], tmpregs[i], 0); - return; - } - tmp_mem(regs[reg]); -} - -static void reg_for(int reg, struct tmp *t) -{ - if (regs[reg] && regs[reg] != t) - reg_free(reg); -} - -static void tmp_mv(struct tmp *t, int reg) -{ - reg_for(reg, t); - tmp_reg(t, reg, 0); -} - -static void tmp_to(struct tmp *t, int reg) -{ - reg_for(reg, t); - tmp_reg(t, reg, 1); -} - -static void tmp_drop(int n) -{ - int i; - for (i = ntmp - n; i < ntmp; i++) - if (tmps[i].loc == LOC_REG) - regs[tmps[i].addr] = NULL; - ntmp -= n; -} - -static void tmp_pop(int reg) -{ - struct tmp *t = TMP(0); - tmp_to(t, reg); - tmp_drop(1); -} - -static struct tmp *tmp_new(void) -{ - return &tmps[ntmp++]; -} - -static void tmp_push(int reg) -{ - struct tmp *t = tmp_new(); - t->addr = reg; - t->bt = 0; - t->loc = LOC_REG; - regs[reg] = t; -} - -void o_local(long addr) -{ - struct tmp *t = tmp_new(); - t->addr = -addr; - t->loc = LOC_LOCAL; - t->bt = 0; - t->off = 0; -} - -void o_num(long num) -{ - struct tmp *t = tmp_new(); - t->addr = num; - t->bt = 0; - t->loc = LOC_NUM; -} - -void o_sym(char *name) -{ - struct tmp *t = tmp_new(); - strcpy(t->sym, name); - t->loc = LOC_SYM; - t->bt = 0; - t->off = 0; -} - -void o_tmpdrop(int n) -{ - if (n == -1 || n > ntmp) - n = ntmp; - tmp_drop(n); - if (!ntmp) { - if (sp_tmp != -1) - sp = sp_tmp; - sp_tmp = -1; - } -} - -/* make sure tmps remain intact after a conditional expression */ -void o_fork(void) -{ - int i; - for (i = 0; i < ntmp - 1; i++) - tmp_mem(&tmps[i]); -} - -void o_forkpush(void) -{ - tmp_pop(REG_FORK); -} - -void o_forkjoin(void) -{ - tmp_push(REG_FORK); -} - -void o_tmpswap(void) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - struct tmp t; - memcpy(&t, t1, sizeof(t)); - memcpy(t1, t2, sizeof(t)); - memcpy(t2, &t, sizeof(t)); - if (t1->loc == LOC_REG) - regs[t1->addr] = t1; - if (t2->loc == LOC_REG) - regs[t2->addr] = t2; -} - -static int reg_get(int mask) -{ - int i; - for (i = 0; i < N_TMPS; i++) - if ((1 << tmpregs[i]) & mask && !regs[tmpregs[i]]) - return tmpregs[i]; - for (i = 0; i < N_TMPS; i++) - if ((1 << tmpregs[i]) & mask) { - reg_free(tmpregs[i]); - return tmpregs[i]; - } - return 0; -} - -static int reg_fortmp(struct tmp *t, int notmask) -{ - if (t->loc == LOC_REG && !(notmask & (1 << t->addr))) - return t->addr; - return reg_get(~notmask); -} - -static void tmp_copy(struct tmp *t1) -{ - struct tmp *t2 = tmp_new(); - memcpy(t2, t1, sizeof(*t1)); - if (!(t1->loc & (LOC_REG | LOC_MEM))) - return; - if (t1->loc == LOC_MEM) { - tmp_mv(t2, reg_get(~0)); - } else if (t1->loc == LOC_REG) { - t2->addr = reg_fortmp(t2, 1 << t1->addr); - i_mov(t2->addr, t1->addr, LONGSZ); - regs[t2->addr] = t2; - } -} - -void o_tmpcopy(void) -{ - tmp_copy(TMP(0)); -} - -void o_cast(unsigned bt) -{ - struct tmp *t = TMP(0); - if (!t->bt && t->loc == LOC_NUM) { - num_cast(t, bt); - return; - } - if (BT_SZ(bt) != LONGSZ) { - int reg = reg_fortmp(t, 0); - tmp_to(t, reg); - if (bt & BT_SIGNED) - i_sx(reg, BT_SZ(bt) * 8); - else - i_zx(reg, BT_SZ(bt) * 8); - } -} - -void o_func_beg(char *name, int argc, int global, int vararg) -{ - out_sym(name, (global ? OUT_GLOB : 0) | OUT_CS, cslen, 0); - func_argc = argc; - func_vararg = vararg; - i_prolog(); - sp = 0; - sp_max = sp; - ntmp = 0; - sp_tmp = -1; - nlabels = 0; - njmps = 0; - memset(regs, 0, sizeof(regs)); -} - -void o_deref(unsigned bt) -{ - struct tmp *t = TMP(0); - if (t->bt) - tmp_to(t, reg_fortmp(t, 0)); - t->bt = bt; -} - -void o_load(void) -{ - struct tmp *t = TMP(0); - tmp_to(t, reg_fortmp(t, 0)); -} - -#define TMP_NUM(t) ((t)->loc == LOC_NUM && !(t)->bt) -#define LOCAL_PTR(t) ((t)->loc == LOC_LOCAL && !(t)->bt) -#define SYM_PTR(t) ((t)->loc == LOC_SYM && !(t)->bt) - -int o_popnum(long *c) -{ - struct tmp *t = TMP(0); - if (!TMP_NUM(t)) - return 1; - *c = t->addr; - tmp_drop(1); - return 0; -} - -void o_ret(int rets) -{ - if (rets) - tmp_pop(REG_RET); - else - i_num(REG_RET, 0); - o_jmp(0); -} - -void o_func_end(void) -{ - o_label(0); - jmp_fill(); - i_epilog(); -} - -long o_mklocal(int sz) -{ - return sp_push(ALIGN(sz, LONGSZ)); -} - -void o_rmlocal(long addr, int sz) -{ - sp = addr - ALIGN(sz, LONGSZ); -} - -long o_arg2loc(int i) -{ - return -LONGSZ * (i + 2); -} - -void o_assign(unsigned bt) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - int r1 = reg_fortmp(t1, 0); - int r2 = reg_fortmp(t2, 1 << r1); - int off = 0; - tmp_to(t1, r1); - if (t2->bt) - tmp_to(t2, r2); - if (t2->loc == LOC_LOCAL) { - r2 = REG_FP; - off = t2->addr + t2->off; - } else { - tmp_to(t2, r2); - } - i_ldr(0, r1, r2, off, bt); - tmp_drop(2); - tmp_push(r1); -} - -static long cu(int op, long i) -{ - switch (op & 0xff) { - case O_NEG: - return -i; - case O_NOT: - return ~i; - case O_LNOT: - return !i; - } - return 0; -} - -static int c_uop(int op) -{ - struct tmp *t1 = TMP(0); - if (!TMP_NUM(t1)) - return 1; - tmp_drop(1); - o_num(cu(op, t1->addr)); - return 0; -} - -static long cb(int op, long a, long b) -{ - switch (op & 0xff) { - case O_ADD: - return a + b; - case O_SUB: - return a - b; - case O_AND: - return a & b; - case O_OR: - return a | b; - case O_XOR: - return a ^ b; - case O_MUL: - return a * b; - case O_DIV: - return a / b; - case O_MOD: - return a % b; - case O_SHL: - return a << b; - case O_SHR: - if (op & O_SIGNED) - return a >> b; - else - return (unsigned long) a >> b; - case O_LT: - return a < b; - case O_GT: - return a > b; - case O_LE: - return a <= b; - case O_GE: - return a >= b; - case O_EQ: - return a == b; - case O_NEQ: - return a != b; - } - return 0; -} - -static int c_bop(int op) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - int locs = LOCAL_PTR(t1) + LOCAL_PTR(t2); - int syms = SYM_PTR(t1) + SYM_PTR(t2); - int nums = TMP_NUM(t1) + TMP_NUM(t2); - if (syms + locs == 2 || syms + nums + locs != 2) - return 1; - if (nums == 1) - if ((op & 0xff) != O_ADD && ((op & 0xff) != O_SUB || TMP_NUM(t2))) - return 1; - if (nums == 1) { - long o1 = TMP_NUM(t1) ? t1->addr : t1->off; - long o2 = TMP_NUM(t2) ? t2->addr : t2->off; - long ret = cb(op, o2, o1); - if (!TMP_NUM(t1)) - o_tmpswap(); - t2->off = ret; - tmp_drop(1); - } else { - long ret = cb(op, t2->addr, t1->addr); - tmp_drop(2); - o_num(ret); - } - return 0; -} - -void o_uop(int op) -{ - int r1 = (op & 0xff) == O_LNOT ? R_RAX : reg_fortmp(TMP(0), 0); - if (!c_uop(op)) - return; - tmp_to(TMP(0), r1); - switch (op & 0xff) { - case O_NEG: - i_neg(r1); - break; - case O_NOT: - i_not(r1); - break; - case O_LNOT: - i_lnot(r1); - break; - } -} - -static void bin_regs(int *r1, int *r2, int mask1, int mask2) -{ - struct tmp *t2 = TMP(0); - struct tmp *t1 = TMP(1); - *r2 = reg_fortmp(t2, ~mask1); - tmp_to(t2, *r2); - *r1 = reg_fortmp(t1, ~mask2 | (1 << *r2)); - tmp_pop(*r2); - tmp_pop(*r1); -} - -static int bop_imm(int *r1, long *n, int swap) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - if (!TMP_NUM(t1) && (!swap || !TMP_NUM(t2))) - return 1; - *n = TMP_NUM(t1) ? t1->addr : t2->addr; - if (!i_decodeable(*n)) - return 1; - if (!TMP_NUM(t1)) - o_tmpswap(); - *r1 = reg_fortmp(t2, 0); - tmp_drop(1); - tmp_pop(*r1); - return 0; -} - -static void bin_add(int op) -{ - int r1, r2; - long n; - if (!bop_imm(&r1, &n, (op & 0xff) != O_SUB)) { - i_add_imm(op, r1, r1, n); - } else { - bin_regs(&r1, &r2, R_TMPS, R_TMPS); - i_add(op, r1, r1, r2); - } - tmp_push(r1); -} - -static void bin_shx(int op) -{ - int r1, r2; - long n; - if (!bop_imm(&r1, &n, 0)) { - i_shl_imm(op, r1, r1, n); - } else { - bin_regs(&r1, &r2, 1 << R_RCX, R_TMPS); - i_shl(op, r1, r1, r2); - } - tmp_push(r1); -} - -static int log2a(unsigned long n) -{ - int i = 0; - for (i = 0; i < LONGSZ * 8; i++) - if (n & (1u << i)) - break; - if (i == LONGSZ * 8 || !(n >> (i + 1))) - return i; - return -1; -} - -/* optimized version of mul/div/mod for powers of two */ -static int mul_2(int op) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - long n; - int r2; - int p; - if ((op & 0xff) == O_MUL && t2->loc == LOC_NUM && !t2->bt) - o_tmpswap(); - if (t1->loc != LOC_NUM || t1->bt) - return 1; - n = t1->addr; - p = log2a(n); - if (n && p == -1) - return 1; - if ((op & 0xff) == O_MUL) { - tmp_drop(1); - if (n == 1) - return 0; - if (n == 0) { - tmp_drop(1); - o_num(0); - return 0; - } - r2 = reg_fortmp(t2, 0); - tmp_to(t2, r2); - i_shl_imm(O_SHL, r2, r2, p); - return 0; - } - if (op == O_DIV) { - tmp_drop(1); - if (n == 1) - return 0; - r2 = reg_fortmp(t2, 0); - tmp_to(t2, r2); - i_shl_imm((op & O_SIGNED) | O_SHR, r2, r2, p); - return 0; - } - if (op == O_MOD) { - tmp_drop(1); - if (n == 1) { - tmp_drop(1); - o_num(0); - return 0; - } - r2 = reg_fortmp(t2, 0); - tmp_to(t2, r2); - i_zx(r2, p); - return 0; - } - return 1; -} - -static void mulop(int *r1, int *r2, int rop) -{ - struct tmp *t1 = TMP(0); - struct tmp *t2 = TMP(1); - if (t1->loc & LOC_REG && t1->addr != R_RAX && t1->addr != R_RDX) - rop = t1->addr; - tmp_to(t1, rop); - tmp_to(t2, R_RAX); - if (rop != R_RDX) - reg_free(R_RDX); - tmp_drop(2); - *r1 = rop; - *r2 = R_RAX; -} - -static void bin_mul(int op) -{ - int r1, r2; - if (!mul_2(op)) - return; - mulop(&r1, &r2, (op & 0xff) == O_MUL ? R_RDX : R_RCX); - if ((op & 0xff) == O_MUL) { - i_mul(R_RAX, r1, r2); - tmp_push(R_RAX); - } - if ((op & 0xff) == O_DIV) { - i_div(op, R_RAX, r1, r2); - tmp_push(R_RAX); - } - if ((op & 0xff) == O_MOD) { - i_div(op, R_RDX, r1, r2); - tmp_push(R_RDX); - } -} - -static void bin_cmp(int op) -{ - int r1, r2; - long n; - if (!bop_imm(&r1, &n, (op & 0xff) == O_EQ || (op & 0xff) == O_NEQ)) { - i_cmp_imm(r1, n); - } else { - bin_regs(&r1, &r2, R_TMPS, R_TMPS); - i_cmp(r1, r2); - } - r1 = R_RAX; - reg_free(r1); - i_set(op, r1); - tmp_push(r1); -} - -void o_bop(int op) -{ - if (!c_bop(op)) - return; - if ((op & 0xf0) == 0x00) - bin_add(op); - if ((op & 0xf0) == 0x10) - bin_shx(op); - if ((op & 0xf0) == 0x20) - bin_mul(op); - if ((op & 0xf0) == 0x30) - bin_cmp(op); -} - -void o_memcpy(void) -{ - struct tmp *t0 = TMP(0); - struct tmp *t1 = TMP(1); - struct tmp *t2 = TMP(2); - tmp_to(t0, R_RCX); - tmp_to(t1, R_RSI); - tmp_to(t2, R_RDI); - os("\xfc\xf3\xa4", 3); /* cld; rep movs */ - tmp_drop(2); -} - -void o_memset(void) -{ - struct tmp *t0 = TMP(0); - struct tmp *t1 = TMP(1); - struct tmp *t2 = TMP(2); - tmp_to(t0, R_RCX); - tmp_to(t1, R_RAX); - tmp_to(t2, R_RDI); - os("\xfc\xf3\xaa", 3); /* cld; rep stosb */ - tmp_drop(2); -} - -static void jxz(int id, int z) -{ - int r = reg_fortmp(TMP(0), 0); - tmp_pop(r); - i_bz(r, z); - jmp_add(id); -} - -void o_jz(int id) -{ - jxz(id, 1); -} - -void o_jnz(int id) -{ - jxz(id, 0); -} - -void o_jmp(int id) -{ - i_b(); - jmp_add(id); -} - -void o_call(int argc, int rets) -{ - struct tmp *t; - int i; - int aregs = MIN(N_ARGS, argc); - for (i = 0; i < N_TMPS; i++) - if (regs[tmpregs[i]] && regs[tmpregs[i]] - tmps < ntmp - argc) - tmp_mem(regs[tmpregs[i]]); - if (argc > aregs) { - sp_push(LONGSZ * (argc - aregs)); - for (i = argc - 1; i >= aregs; --i) { - int reg = reg_fortmp(TMP(0), 0); - tmp_pop(reg); - i_ldr(0, reg, REG_SP, (i - aregs) * LONGSZ, LONGSZ); - } - } - for (i = aregs - 1; i >= 0; --i) - tmp_to(TMP(aregs - i - 1), argregs[i]); - tmp_drop(aregs); - t = TMP(0); - if (t->loc == LOC_SYM && !t->bt) { - i_call(t->sym, t->off); - tmp_drop(1); - } else { - int reg = reg_fortmp(t, 0); - tmp_pop(reg); - i_call_reg(reg); - } - if (rets) - tmp_push(REG_RET); -} - -void o_mkbss(char *name, int size, int global) -{ - out_sym(name, OUT_BSS | (global ? OUT_GLOB : 0), bsslen, size); - bsslen += ALIGN(size, OUT_ALIGNMENT); -} - -#define MAXDATS (1 << 10) -static char dat_names[MAXDATS][NAMELEN]; -static int dat_offs[MAXDATS]; -static int ndats; - -void *o_mkdat(char *name, int size, int global) -{ - void *addr = ds + dslen; - int idx = ndats++; - if (idx >= MAXDATS) - err("nomem: MAXDATS reached!\n"); - strcpy(dat_names[idx], name); - dat_offs[idx] = dslen; - out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), dslen, size); - dslen += ALIGN(size, OUT_ALIGNMENT); - return addr; -} - -static int dat_off(char *name) -{ - int i; - for (i = 0; i < ndats; i++) - if (!strcmp(name, dat_names[i])) - return dat_offs[i]; - return 0; -} - -void o_datset(char *name, int off, unsigned bt) -{ - struct tmp *t = TMP(0); - int sym_off = dat_off(name) + off; - if (t->loc == LOC_NUM && !t->bt) { - num_cast(t, bt); - memcpy(ds + sym_off, &t->addr, BT_SZ(bt)); - } - if (t->loc == LOC_SYM && !t->bt) { - out_rel(t->sym, OUT_DS, sym_off); - memcpy(ds + sym_off, &t->off, BT_SZ(bt)); - } - tmp_drop(1); -} - -void o_write(int fd) -{ - out_write(fd, cs, cslen, ds, dslen); -} - -/* X86 arch specific functions */ - -#define I_MOV 0x89 -#define I_MOVI 0xc7 -#define I_MOVIR 0xb8 -#define I_MOVR 0x8b -#define I_MOVSXD 0x63 -#define I_SHX 0xd3 -#define I_CMP 0x3b -#define I_TST 0x85 -#define I_LEA 0x8d -#define I_NOT 0xf7 -#define I_CALL 0xff -#define I_MUL 0xf7 -#define I_XOR 0x33 -#define I_TEST 0x85 -#define I_CQO 0x99 -#define I_PUSH 0x50 -#define I_POP 0x58 - -#define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1)) -#define O2(op) (((op) >> 8) & 0xff) -#define O1(op) ((op) & 0xff) -#define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2)) -#define REX(r1, r2) (0x48 | (((r1) & 8) >> 1) | (((r2) & 8) >> 3)) - -/* for optimizing cmp + jmp */ -#define OPT_ISCMP() (last_set + 7 == cslen) -#define OPT_CCOND() (cs[last_set + 1]) - -static long last_set = -1; - -static void op_x(int op, int r1, int r2, int bt) -{ - int sz = BT_SZ(bt); - int rex = 0; - if (sz == 8) - rex |= 8; - if (sz == 1) - rex |= 0x40; - if (r1 & 0x8) - rex |= 4; - if (r2 & 0x8) - rex |= 1; - if (sz == 2) - oi(0x66, 1); - if (rex) - oi(rex | 0x40, 1); - if (op & 0x10000) - oi(O2(op), 1); - oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1); -} - -#define op_mr op_rm - -/* op_*(): r=reg, m=mem, i=imm, s=sym */ -static void op_rm(int op, int src, int base, int off, int bt) -{ - int dis = off == (char) off ? 1 : 4; - int mod = dis == 4 ? 2 : 1; - if (!off && (base & 7) != R_RBP) - mod = 0; - op_x(op, src, base, bt); - oi(MODRM(mod, src & 0x07, base & 0x07), 1); - if ((base & 7) == R_RSP) - oi(0x24, 1); - if (mod) - oi(off, dis); -} - -static void op_rr(int op, int src, int dst, int bt) -{ - op_x(op, src, dst, bt); - oi(MODRM(3, src & 0x07, dst & 0x07), 1); -} - -#define movrx_bt(bt) (((bt) == 4) ? 4 : LONGSZ) - -static int movrx_op(int bt, int mov) -{ - int sz = BT_SZ(bt); - if (sz == 4) - return bt & BT_SIGNED ? I_MOVSXD : mov; - if (sz == 2) - return OP2(0x0f, bt & BT_SIGNED ? 0xbf : 0xb7); - if (sz == 1) - return OP2(0x0f, bt & BT_SIGNED ? 0xbe : 0xb6); - return mov; -} - -static void mov_r2r(int r1, int r2, unsigned bt) -{ - if (r1 != r2 || BT_SZ(bt) != LONGSZ) - op_rr(movrx_op(bt, I_MOV), r1, r2, movrx_bt(bt)); -} - -static void mov_m2r(int dst, int base, int off, int bt) -{ - op_rm(movrx_op(bt, I_MOVR), dst, base, off, movrx_bt(bt)); -} - -static void i_zx(int rd, int bits) -{ - if (bits & 0x07) { - i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits); - i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits); - } else { - mov_r2r(rd, rd, bits >> 3); - } -} - -static void i_sx(int rd, int bits) -{ - mov_r2r(rd, rd, BT_SIGNED | (bits >> 3)); -} - -static void i_add(int op, int rd, int rn, int rm) -{ - /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */ - static int rx[] = {0003, 0053, 0043, 0013, 0063}; - if (rn != rd) - die("this is cisc!\n"); - op_rr(rx[op & 0x0f], rd, rm, LONGSZ); -} - -static void i_add_imm(int op, int rd, int rn, long n) -{ - /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */ - static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0}; - unsigned char s[4] = {REX(0, rd), 0x83, rx[op & 0x0f] | (rd & 7), n & 0xff}; - if (rn != rd) - die("this is cisc!\n"); - os((void *) s, 4); -} - -static int i_decodeable(long imm) -{ - return imm <= 127 && imm >= -128; -} - -static void i_num(int rd, long n) -{ - if (!n) { - op_rr(I_XOR, rd, rd, 4); - return; - } - if (n < 0 && -n <= 0xffffffff) { - op_rr(I_MOVI, 0, rd, LONGSZ); - oi(n, 4); - } else { - int len = 8; - if (n > 0 && n <= 0xffffffff) - len = 4; - op_x(I_MOVIR + (rd & 7), 0, rd, len); - oi(n, len); - } -} - -static void i_add_anyimm(int rd, int rn, long n) -{ - op_rm(I_LEA, rd, rn, n, LONGSZ); -} - -static void i_mul(int rd, int rn, int rm) -{ - if (rn != R_RDX) - i_num(R_RDX, 0); - op_rr(I_MUL, 4, rn, LONGSZ); -} - -static void i_div(int op, int rd, int rn, int rm) -{ - if (rn != R_RDX) { - if (op & O_SIGNED) - op_x(I_CQO, R_RAX, R_RDX, LONGSZ); - else - i_num(R_RDX, 0); - } - op_rr(I_MUL, op & O_SIGNED ? 7 : 6, rn, LONGSZ); -} - -static void i_tst(int rn, int rm) -{ - op_rr(I_TST, rn, rm, LONGSZ); -} - -static void i_cmp(int rn, int rm) -{ - op_rr(I_CMP, rn, rm, LONGSZ); -} - -static void i_cmp_imm(int rn, long n) -{ - unsigned char s[4] = {REX(0, rn), 0x83, 0xf8 | rn, n & 0xff}; - os(s, 4); -} - -static void i_set(int op, int rd) -{ - /* lt, gt, le, ge, eq, neq */ - static int ucond[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95}; - static int scond[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95}; - int cond = op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f]; - char set[] = "\x0f\x00\xc0"; - if (rd != R_RAX) - die("set works only with R_RAX\n"); - set[1] = cond; - last_set = cslen; - os(set, 3); /* setl al */ - os("\x48\x0f\xb6\xc0", 4); /* movzx rax, al */ -} - -static void i_shl(int op, int rd, int rm, int rs) -{ - int sm = 4; - if ((op & 0x0f) == 1) - sm = op & O_SIGNED ? 7 : 5; - if (rd != rm) - die("this is cisc!\n"); - op_rr(I_SHX, sm, rd, LONGSZ); -} - -static void i_shl_imm(int op, int rd, int rn, long n) -{ - int sm = (op & 0x1) ? (op & O_SIGNED ? 0xf8 : 0xe8) : 0xe0 ; - char s[4] = {REX(0, rn), 0xc1, sm | (rn & 7), n & 0xff}; - if (rd != rn) - die("this is cisc!\n"); - os(s, 4); -} - -static void i_mov(int rd, int rn, int bt) -{ - op_rr(movrx_op(bt, I_MOVR), rd, rn, movrx_bt(bt)); -} - -static void i_ldr(int l, int rd, int rn, int off, int bt) -{ - if (l) - mov_m2r(rd, rn, off, bt); - else - op_rm(I_MOV, rd, rn, off, bt); -} - -static void i_sym(int rd, char *sym, int off) -{ - op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ); - out_rel(sym, OUT_CS, cslen); - oi(off, 8); -} - -static void i_neg(int rd) -{ - op_rr(I_NOT, 3, rd, LONGSZ); -} - -static void i_not(int rd) -{ - op_rr(I_NOT, 2, rd, LONGSZ); -} - -static void i_lnot(int rd) -{ - if (OPT_ISCMP()) { - cs[last_set + 1] ^= 0x01; - } else { - char cmp[] = "\x00\x83\xf8\x00"; - cmp[0] = REX(0, rd); - cmp[2] |= rd & 7; - os(cmp, 4); /* cmp rax, 0 */ - i_set(O_EQ, rd); - } -} - -static void jx(int x, long addr) -{ - char op[2] = {0x0f}; - op[1] = x; - os(op, 2); /* jx $addr */ - oi(addr - cslen - 4, 4); -} - -static void i_bz(int rn, int z) -{ - if (OPT_ISCMP()) { - int cond = OPT_CCOND(); - cslen = last_set; - jx((!z ? cond : cond ^ 0x01) & ~0x10, 0); - last_set = -1; - } else { - i_tst(rn, rn); - jx(z ? 0x84 : 0x85, 0); - } -} - -static void i_b(void) -{ - os("\xe9", 1); /* jmp $addr */ - oi(0, 4); -} - -static void i_b_fill(long src, long dst) -{ - putint((void *) (cs + src), (dst - src) - 4, 4); -} - -static void i_call_reg(int rd) -{ - op_rr(I_CALL, 2, rd, LONGSZ); -} - -static void i_call(char *sym, int off) -{ - os("\xe8", 1); /* call $x */ - out_rel(sym, OUT_CS | OUT_REL, cslen); - oi(-4 + off, 4); -} - -static void i_push(int reg) -{ - op_x(I_PUSH | (reg & 0x7), 0, reg, 4); -} - -static void i_pop(int reg) -{ - op_x(I_POP | (reg & 0x7), 0, reg, 4); -} - -static void i_saveargs(void) -{ - int i; - int saved = func_vararg ? N_ARGS : MIN(N_ARGS, func_argc); - os("\x58", 1); /* pop rax */ - for (i = saved - 1; i >= 0; i--) - i_push(argregs[i]); - os("\x50", 1); /* push rax */ -} - -static void i_prolog(void) -{ - last_set = -1; - i_saveargs(); - os("\x55", 1); /* push rbp */ - os("\x48\x89\xe5", 3); /* mov rbp, rsp */ - os("\x48\x81\xec", 3); /* sub rsp, $xxx */ - func_fpsub = cslen; - oi(0, 4); -} - -static void i_epilog(void) -{ - int saved = func_vararg ? N_ARGS : MIN(N_ARGS, func_argc); - int diff = ALIGN(sp_max, 16) + (saved & 1 ? 8 : 0); - if (diff) { - putint(cs + func_fpsub, diff, 4); - } - os("\xc9", 1); /* leave */ - if (saved) { - os("\xc2", 1); /* ret n */ - oi(saved * LONGSZ, 2); - } else { - os("\xc3", 1); /* ret */ - } -} +#include "tok.h" +#include "gen.h" +#include "out.h" + +/* registers */ +#define R_RAX 0x00 +#define R_RCX 0x01 +#define R_RDX 0x02 +#define R_RBX 0x03 +#define R_RSP 0x04 +#define R_RBP 0x05 +#define R_RSI 0x06 +#define R_RDI 0x07 + +/* x86 opcodes */ +#define I_MOV 0x89 +#define I_MOVI 0xc7 +#define I_MOVIR 0xb8 +#define I_MOVR 0x8b +#define I_MOVSXD 0x63 +#define I_SHX 0xd3 +#define I_CMP 0x3b +#define I_TST 0x85 +#define I_LEA 0x8d +#define I_NOT 0xf7 +#define I_CALL 0xff +#define I_MUL 0xf7 +#define I_XOR 0x33 +#define I_TEST 0x85 +#define I_CQO 0x99 +#define I_PUSH 0x50 +#define I_POP 0x58 + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) + +int tmpregs[] = {0, 1, 2, 6, 7, 3}; +int argregs[] = {0}; + +#define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1)) +#define O2(op) (((op) >> 8) & 0xff) +#define O1(op) ((op) & 0xff) +#define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2)) + +static void putint(char *s, long n, int l) +{ + while (l--) { + *s++ = n; + n >>= 8; + } +} + +static void op_x(int op, int r1, int r2, int bt) +{ + int sz = BT_SZ(bt); + if (sz == 2) + oi(0x66, 1); + if (op & 0x10000) + oi(O2(op), 1); + oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1); +} + +#define op_mr op_rm + +/* op_*(): r=reg, m=mem, i=imm, s=sym */ +static void op_rm(int op, int src, int base, int off, int bt) +{ + int dis = off == (char) off ? 1 : 4; + int mod = dis == 4 ? 2 : 1; + if (!off && (base & 7) != R_RBP) + mod = 0; + op_x(op, src, base, bt); + oi(MODRM(mod, src & 0x07, base & 0x07), 1); + if ((base & 7) == R_RSP) + oi(0x24, 1); + if (mod) + oi(off, dis); +} + +static void op_rr(int op, int src, int dst, int bt) +{ + op_x(op, src, dst, bt); + oi(MODRM(3, src & 0x07, dst & 0x07), 1); +} + +#define movrx_bt(bt) (LONGSZ) + +static int movrx_op(int bt, int mov) +{ + int sz = BT_SZ(bt); + if (sz == 2) + return OP2(0x0f, bt & BT_SIGNED ? 0xbf : 0xb7); + if (sz == 1) + return OP2(0x0f, bt & BT_SIGNED ? 0xbe : 0xb6); + return mov; +} + +static void mov_r2r(int rd, int r1, unsigned bt) +{ + if (rd != r1 || BT_SZ(bt) != LONGSZ) + op_rr(movrx_op(bt, I_MOVR), rd, r1, movrx_bt(bt)); +} + +static void mov_m2r(int dst, int base, int off, int bt) +{ + op_rm(movrx_op(bt, I_MOVR), dst, base, off, movrx_bt(bt)); +} + +int i_imm(int op, long imm) +{ + if ((op & 0xf0) == 0x20) + return 0; + return imm <= 127 && imm >= -128; +} + +static void i_push(int reg) +{ + op_x(I_PUSH | (reg & 0x7), 0, reg, 4); +} + +static void i_pop(int reg) +{ + op_x(I_POP | (reg & 0x7), 0, reg, 4); +} + +void i_mov(int rd, int rn) +{ + op_rr(movrx_op(LONGSZ, I_MOVR), rd, rn, movrx_bt(LONGSZ)); +} + +void i_load(int rd, int rn, int off, int bt) +{ + mov_m2r(rd, rn, off, bt); +} + +void i_save(int rd, int rn, int off, int bt) +{ + op_rm(I_MOV, rd, rn, off, bt); +} + +void i_reg(int op, int *rd, int *r1, int *r2, int *tmp) +{ + *rd = 0; + *r1 = R_TMPS; + *r2 = op & O_IMM ? 0 : R_TMPS; + *tmp = 0; + if ((op & 0xf0) == 0x00) /* add */ + return; + if ((op & 0xf0) == 0x10) { /* shl */ + if (~op & O_IMM) { + *r2 = 1 << R_RCX; + *r1 = R_TMPS & ~*r2; + } + return; + } + if ((op & 0xf0) == 0x20) { /* mul */ + *rd = (op & 0xff) == O_MOD ? (1 << R_RDX) : (1 << R_RAX); + *r1 = (1 << R_RAX); + *r2 = R_TMPS & ~*rd & ~*r1; + if ((op & 0xff) == O_DIV) + *r2 &= ~(1 << R_RDX); + *tmp = (1 << R_RDX) | (1 << R_RAX); + return; + } + if ((op & 0xf0) == 0x30) { /* cmp */ + *rd = 1 << R_RAX; + return; + } + if ((op & 0xf0) == 0x40) { /* uop */ + *r2 = 0; + if ((op & 0xff) == O_LNOT) + *r1 = 1 << R_RAX; + return; + } + if ((op & 0xf0) == 0x50) { /* etc */ + if (op == O_MSET) { + *rd = 1 << R_RDI; + *r1 = 1 << R_RAX; + *r2 = 1 << R_RCX; + } + if (op == O_MCPY) { + *rd = 1 << R_RDI; + *r1 = 1 << R_RSI; + *r2 = 1 << R_RCX; + } + if (op == O_SX || op == O_ZX) { + *rd = R_TMPS; + *r1 = R_BYTE; + *r2 = 0; + } + if (op == O_MOV) { + *rd = R_TMPS; + *r2 = 0; + } + return; + } +} + +static void i_add(int op, int rd, int r1, int r2) +{ + /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */ + static int rx[] = {0003, 0053, 0043, 0013, 0063}; + op_rr(rx[op & 0x0f], rd, r2, LONGSZ); +} + +static void i_add_imm(int op, int rd, int rn, long n) +{ + /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */ + static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0}; + unsigned char s[3] = {0x83, rx[op & 0x0f] | rd, n & 0xff}; + os((void *) s, 3); +} + +void i_num(int rd, long n) +{ + if (!n) { + op_rr(I_XOR, rd, rd, 4); + return; + } else { + op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ); + oi(n, LONGSZ); + } +} + +static void i_mul(int rd, int r1, int r2) +{ + if (r2 != R_RDX) + i_num(R_RDX, 0); + op_rr(I_MUL, 4, r2, LONGSZ); +} + +static void i_div(int op, int rd, int r1, int r2) +{ + if (r2 != R_RDX) { + if (op & O_SIGNED) + op_x(I_CQO, R_RAX, R_RDX, LONGSZ); + else + i_num(R_RDX, 0); + } + op_rr(I_MUL, op & O_SIGNED ? 7 : 6, r2, LONGSZ); +} + +static void i_tst(int rn, int rm) +{ + op_rr(I_TST, rn, rm, LONGSZ); +} + +static void i_cmp(int rn, int rm) +{ + op_rr(I_CMP, rn, rm, LONGSZ); +} + +static void i_cmp_imm(int rn, long n) +{ + unsigned char s[3] = {0x83, 0xf8 | rn, n & 0xff}; + os(s, 3); +} + +static void i_shl(int op, int rd, int r1, int rs) +{ + int sm = 4; + if ((op & 0x0f) == 1) + sm = op & O_SIGNED ? 7 : 5; + op_rr(I_SHX, sm, rd, LONGSZ); +} + +static void i_shl_imm(int op, int rd, int rn, long n) +{ + int sm = (op & 0x1) ? (op & O_SIGNED ? 0xf8 : 0xe8) : 0xe0; + char s[3] = {0xc1, sm | rn, n & 0xff}; + os(s, 3); +} + +void i_sym(int rd, char *sym, int off) +{ + op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ); + if (!pass1) + out_rel(sym, OUT_CS, cslen); + oi(off, LONGSZ); +} + +static void i_neg(int rd) +{ + op_rr(I_NOT, 3, rd, LONGSZ); +} + +static void i_not(int rd) +{ + op_rr(I_NOT, 2, rd, LONGSZ); +} + +/* for optimizing cmp + tst + jmp to cmp + jmp */ +#define OPT_ISCMP() (last_set >= 0 && last_set + 6 == cslen) +#define OPT_CCOND() (cs[last_set + 1]) + +static long last_set = -1; + +static void i_set(int op, int rd) +{ + /* lt, gt, le, ge, eq, neq */ + static int ucond[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95}; + static int scond[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95}; + int cond = op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f]; + char set[] = "\x0f\x00\xc0"; + set[1] = cond; + last_set = cslen; + os(set, 3); /* setl al */ + os("\x0f\xb6\xc0", 3); /* movzx rax, al */ +} + +static void i_lnot(int rd) +{ + if (OPT_ISCMP()) { + cs[last_set + 1] ^= 0x01; + } else { + char cmp[] = "\x83\xf8\x00"; + cmp[1] |= rd; + os(cmp, 3); /* cmp eax, 0 */ + i_set(O_EQ, rd); + } +} + +static void jx(int x, int nbytes) +{ + char op[2] = {0x0f}; + if (nbytes == 1) { + op[0] = 0x70 | (x & 0x0f); + os(op, 1); /* jx $addr */ + } else { + op[1] = x; + os(op, 2); /* jx $addr */ + } + oi(0, nbytes); +} + +void i_jmp(int rn, int z, int nbytes) +{ + if (!nbytes) + return; + if (nbytes > 1) + nbytes = 4; + if (rn >= 0) { + if (OPT_ISCMP()) { + int cond = OPT_CCOND(); + cslen = last_set; + jx((!z ? cond : cond ^ 0x01) & ~0x10, nbytes); + last_set = -1; + } else { + i_tst(rn, rn); + jx(z ? 0x84 : 0x85, nbytes); + } + } else { + os(nbytes == 1 ? "\xeb" : "\xe9", 1); /* jmp $addr */ + oi(0, nbytes); + } +} + +long i_fill(long src, long dst, int nbytes) +{ + if (!nbytes) + return 0; + if (nbytes > 1) + nbytes = 4; + putint((void *) (cs + src - nbytes), dst - src, nbytes); + return dst - src; +} + +static void i_zx(int rd, int r1, int bits) +{ + if (bits & 0x07) { + i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits); + i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits); + } else { + mov_r2r(rd, r1, bits >> 3); + } +} + +static void i_sx(int rd, int r1, int bits) +{ + mov_r2r(rd, r1, BT_SIGNED | (bits >> 3)); +} + +void i_op(int op, int rd, int r1, int r2) +{ + if ((op & 0xf0) == 0x00) + i_add(op, r1, r1, r2); + if ((op & 0xf0) == 0x10) + i_shl(op, r1, r1, r2); + if ((op & 0xf0) == 0x20) { + if ((op & 0xff) == O_MUL) + i_mul(R_RAX, r1, r2); + if ((op & 0xff) == O_DIV) + i_div(op, R_RAX, r1, r2); + if ((op & 0xff) == O_MOD) + i_div(op, R_RDX, r1, r2); + return; + } + if ((op & 0xf0) == 0x30) { + i_cmp(r1, r2); + i_set(op, rd); + return; + } + if ((op & 0xf0) == 0x40) { /* uop */ + if ((op & 0xff) == O_NEG) + i_neg(r1); + if ((op & 0xff) == O_NOT) + i_not(r1); + if ((op & 0xff) == O_LNOT) + i_lnot(r1); + return; + } +} + +static void i_add_anyimm(int rd, int rn, long n) +{ + op_rm(I_LEA, rd, rn, n, LONGSZ); +} + +void i_op_imm(int op, int rd, int r1, long n) +{ + if ((op & 0xf0) == 0x00) { /* add */ + if (rd == r1 && i_imm(O_ADD, n)) + i_add_imm(op, rd, r1, n); + else + i_add_anyimm(rd, r1, n); + } + if ((op & 0xf0) == 0x10) /* shl */ + i_shl_imm(op, rd, r1, n); + if ((op & 0xf0) == 0x20) /* mul */ + die("mul/imm not implemented"); + if ((op & 0xf0) == 0x30) { /* imm */ + i_cmp_imm(r1, n); + i_set(op, rd); + } + if ((op & 0xf0) == 0x50) { /* etc */ + if ((op & 0xff) == O_ZX) + i_zx(rd, r1, n); + if ((op & 0xff) == O_SX) + i_sx(rd, r1, n); + if ((op & 0xff) == O_MOV) + i_mov(rd, r1); + } +} + +void i_memcpy(int r0, int r1, int r2) +{ + os("\xfc\xf3\xa4", 3); /* cld; rep movs */ +} + +void i_memset(int r0, int r1, int r2) +{ + os("\xfc\xf3\xaa", 3); /* cld; rep stosb */ +} + +void i_call_reg(int rd) +{ + op_rr(I_CALL, 2, rd, LONGSZ); +} + +void i_call(char *sym, int off) +{ + os("\xe8", 1); /* call $x */ + if (!pass1) + out_rel(sym, OUT_CS | OUT_RLREL, cslen); + oi(-4 + off, 4); +} + +static int func_argc; +static int func_varg; +static int func_spsub; +static int func_sargs; +static int func_sregs; +static int func_initfp; +static int spsub_addr; + +int i_args(void) +{ + return LONGSZ << 1; +} + +int i_sp(void) +{ + int i; + int n = 0; + for (i = 0; i < N_TMPS; i++) + if ((1 << tmpregs[i]) & func_sregs) + n += LONGSZ; + return -n; +} + +void i_prolog(int argc, int varg, int sargs, int sregs, int initfp, int subsp) +{ + int i; + last_set = -1; + func_argc = argc; + func_varg = varg; + func_sargs = sargs; + func_sregs = sregs; + func_initfp = initfp; + func_spsub = subsp; + if (initfp) { + os("\x55", 1); /* push rbp */ + os("\x89\xe5", 2); /* mov rbp, rsp */ + } + if (func_sregs) { + for (i = N_TMPS - 1; i >= 0; i--) + if ((1 << tmpregs[i]) & func_sregs) + i_push(tmpregs[i]); + } + if (func_spsub) { + os("\x81\xec", 2); /* sub rsp, $xxx */ + spsub_addr = cslen; + oi(0, 4); + } +} + +void i_epilog(int sp_max) +{ + int diff; + int nsregs = 0; + int i; + for (i = 0; i < N_TMPS; i++) + if ((1 << tmpregs[i]) & func_sregs) + nsregs++; + diff = ALIGN(-sp_max - nsregs * LONGSZ, 16); + /* forcing 16-byte alignment */ + diff = nsregs & 1 ? diff + LONGSZ : diff; + if (func_spsub && diff) { + i_add_anyimm(R_RSP, R_RBP, -nsregs * LONGSZ); + putint(cs + spsub_addr, diff, 4); + } + if (func_sregs) { + for (i = 0; i < N_TMPS; i++) + if ((1 << tmpregs[i]) & func_sregs) + i_pop(tmpregs[i]); + } + if (func_initfp) + os("\xc9", 1); /* leave */ + os("\xc3", 1); /* ret */ +} + +void i_done(void) +{ +} diff --git a/x86.h b/x86.h new file mode 100644 index 0000000..fa059c9 --- /dev/null +++ b/x86.h @@ -0,0 +1,18 @@ +#define LONGSZ 4 /* word size */ +#define I_ARCH "__i386__" + +#define N_REGS 8 /* number of registers */ +#define N_ARGS 0 /* number of arg registers */ +#define N_TMPS 6 /* number of tmp registers */ +#define R_TMPS 0x00cf /* mask of tmp registers */ +#define R_ARGS 0x0000 /* mask of arg registers */ +#define R_SAVED 0x00c8 /* mask of callee-saved registers */ + +#define R_CALL 0x0001 /* mask of regs than can hold call dst */ +#define R_BYTE 0x0007 /* mask of regs that can perform byte-wide instructions */ + +/* special registers */ +#define REG_FP 5 /* frame pointer register */ +#define REG_SP 4 /* stack pointer register */ +#define REG_RET 0 /* returned value register */ +#define REG_FORK 0 /* result of conditional branches */ -- 2.11.4.GIT