From 830b7533c99f86203c4fbaf94897679678c3bad0 Mon Sep 17 00:00:00 2001 From: Shinichiro Hamaji Date: Mon, 13 Apr 2009 00:27:04 +0900 Subject: [PATCH] Generate PIC code so that we can create shared objects properly. - Add got_table in TCCState. This approach is naive and the distance between executable code and GOT can be longer than 32bit. - Handle R_X86_64_GOTPCREL properly. We use got_table for TCC_OUTPUT_MEMORY case for now. - Fix load() and store() so that they access global variables via GOT. --- tcc.c | 7 +++- tccelf.c | 29 ++++++++++++-- x86_64-gen.c | 126 +++++++++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 128 insertions(+), 34 deletions(-) diff --git a/tcc.c b/tcc.c index 548440dc..02da926a 100644 --- a/tcc.c +++ b/tcc.c @@ -544,9 +544,12 @@ struct TCCState { FILE *outfile; #ifdef TCC_TARGET_X86_64 - /* buffer to store jump tables */ + /* buffer to store jump tables used when the output is memory */ char *jmp_table; int jmp_table_num; + /* buffer to store got tables used when the output is memory */ + void **got_table; + int got_table_num; #endif }; @@ -10505,6 +10508,7 @@ TCCState *tcc_new(void) #ifdef TCC_TARGET_X86_64 s->jmp_table = NULL; + s->got_table = NULL; #endif return s; } @@ -10544,6 +10548,7 @@ void tcc_delete(TCCState *s1) #ifdef TCC_TARGET_X86_64 tcc_free(s1->jmp_table); + tcc_free(s1->got_table); #endif tcc_free(s1); } diff --git a/tccelf.c b/tccelf.c index 093f18d0..56ee75a1 100644 --- a/tccelf.c +++ b/tccelf.c @@ -502,6 +502,25 @@ static unsigned long add_jmp_table(TCCState *s1, unsigned long val) *(unsigned long *)(p + 6) = val; return (unsigned long)p; } + +#define GOT_TABLE_ENTRY_MAX_NUM 4096 +static unsigned long add_got_table(TCCState *s1, unsigned long val) +{ + unsigned long *p; + if (!s1->got_table) { + int size = sizeof(void *) * GOT_TABLE_ENTRY_MAX_NUM; + s1->got_table_num = 0; + s1->got_table = (char *)tcc_malloc(size); + } + if (s1->got_table_num == GOT_TABLE_ENTRY_MAX_NUM) { + error("relocating >%d symbols are not supported", + GOT_TABLE_ENTRY_MAX_NUM); + } + p = s1->got_table + s1->got_table_num; + s1->got_table_num++; + *p = val; + return (unsigned long)p; +} #endif /* relocate a given section (CPU dependent) */ @@ -727,11 +746,13 @@ static void relocate_section(TCCState *s1, Section *s) *(int *)ptr = val; break; case R_X86_64_GOTPCREL: - *(int *)ptr += s1->got->sh_addr - addr; - /* XXX: is this OK? */ - if (s1->output_type == TCC_OUTPUT_DLL) { - *(int *)ptr += s1->got_offsets[sym_index] - 4; + if (s1->output_type == TCC_OUTPUT_MEMORY) { + val = add_got_table(s1, val - rel->r_addend) + rel->r_addend; + *(int *)ptr += val - addr; + break; } + *(int *)ptr += (s1->got->sh_addr - addr + + s1->got_offsets[sym_index] - 4); break; case R_X86_64_GOTTPOFF: *(int *)ptr += val - s1->got->sh_addr; diff --git a/x86_64-gen.c b/x86_64-gen.c index 1cbe414c..8e393728 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -53,9 +53,11 @@ enum { TREG_XMM0 = 3, TREG_ST0 = 4, + + TREG_MEM = 0x10, }; -#define REX_BASE(reg) ((reg) >> 3) +#define REX_BASE(reg) (((reg) >> 3) & 1) #define REG_VALUE(reg) ((reg) & 7) int reg_classes[NB_REGS] = { @@ -205,15 +207,37 @@ static void gen_addrpc32(int r, Sym *sym, int c) gen_le32(c-4); } -/* generate a modrm reference. 'op_reg' contains the addtionnal 3 - opcode bits */ -static void gen_modrm(int op_reg, int r, Sym *sym, int c) +/* output got address with relocation */ +static void gen_gotpcrel(int r, Sym *sym, int c) +{ + Section *sr; + ElfW(Rela) *rel; + greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL); + sr = cur_text_section->reloc; + rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela))); + rel->r_addend = -4; + gen_le32(0); + + if (c) { + /* we use add c, %xxx for displacement */ + o(0x48 + REX_BASE(r)); + o(0x81); + o(0xc0 + REG_VALUE(r)); + gen_le32(c); + } +} + +static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got) { - op_reg = op_reg << 3; + op_reg = REG_VALUE(op_reg) << 3; if ((r & VT_VALMASK) == VT_CONST) { /* constant memory reference */ o(0x05 | op_reg); - gen_addrpc32(r, sym, c); + if (is_got) { + gen_gotpcrel(r, sym, c); + } else { + gen_addrpc32(r, sym, c); + } } else if ((r & VT_VALMASK) == VT_LOCAL) { /* currently, we use only ebp as base */ if (c == (char)c) { @@ -223,6 +247,13 @@ static void gen_modrm(int op_reg, int r, Sym *sym, int c) } else { oad(0x85 | op_reg, c); } + } else if ((r & VT_VALMASK) >= TREG_MEM) { + if (c) { + g(0x80 | op_reg | REG_VALUE(r)); + gen_le32(c); + } else { + g(0x00 | op_reg | REG_VALUE(r)); + } } else { g(0x00 | op_reg | (r & VT_VALMASK)); } @@ -230,8 +261,16 @@ static void gen_modrm(int op_reg, int r, Sym *sym, int c) /* generate a modrm reference. 'op_reg' contains the addtionnal 3 opcode bits */ +static void gen_modrm(int op_reg, int r, Sym *sym, int c) +{ + gen_modrm_impl(op_reg, r, sym, c, 0); +} + +/* generate a modrm reference. 'op_reg' contains the addtionnal 3 + opcode bits */ static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c) { + int is_got; int rex = 0x48 | (REX_BASE(op_reg) << 2); if ((r & VT_VALMASK) != VT_CONST && (r & VT_VALMASK) != VT_LOCAL) { @@ -239,23 +278,8 @@ static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c) } o(rex); o(opcode); - op_reg = REG_VALUE(op_reg) << 3; - if ((r & VT_VALMASK) == VT_CONST) { - /* constant memory reference */ - o(0x05 | op_reg); - gen_addrpc32(r, sym, c); - } else if ((r & VT_VALMASK) == VT_LOCAL) { - /* currently, we use only ebp as base */ - if (c == (char)c) { - /* short reference */ - o(0x45 | op_reg); - g(c); - } else { - oad(0x85 | op_reg, c); - } - } else { - g(0x00 | op_reg | (r & VT_VALMASK)); - } + is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC); + gen_modrm_impl(op_reg, r, sym, c, is_got); } @@ -269,6 +293,21 @@ void load(int r, SValue *sv) ft = sv->type.t; fc = sv->c.ul; + /* we use indirect access via got */ + if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) && + (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) { + /* use the result register as a temporal register */ + int tr = r | TREG_MEM; + if (is_float(ft)) { + /* we cannot use float registers as a temporal register */ + tr = get_reg(RC_INT) | TREG_MEM; + } + gen_modrm64(0x8b, tr, fr, sv->sym, 0); + + /* load from the temporal register */ + fr = tr | VT_LVAL; + } + v = fr & VT_VALMASK; if (fr & VT_LVAL) { if (v == VT_LLOCAL) { @@ -305,14 +344,21 @@ void load(int r, SValue *sv) } else { if (v == VT_CONST) { if ((ft & VT_BTYPE) == VT_LLONG) { + assert(!(fr & VT_SYM)); o(0x48); o(0xb8 + REG_VALUE(r)); /* mov $xx, r */ gen_addr64(fr, sv->sym, sv->c.ull); } else { if (fr & VT_SYM) { - o(0x8d48); - o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */ - gen_addrpc32(fr, sv->sym, fc); + if (sv->sym->type.t & VT_STATIC) { + o(0x8d48); + o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */ + gen_addrpc32(fr, sv->sym, fc); + } else { + o(0x8b48); + o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */ + gen_gotpcrel(r, sv->sym, fc); + } } else { o(0xb8 + REG_VALUE(r)); /* mov $xx, r */ gen_le32(fc); @@ -362,25 +408,42 @@ void store(int r, SValue *v) { int fr, bt, ft, fc; int op64 = 0; + /* store the REX prefix in this variable when PIC is enabled */ + int pic = 0; ft = v->type.t; fc = v->c.ul; fr = v->r & VT_VALMASK; bt = ft & VT_BTYPE; + + /* we need to access the variable via got */ + if (fr == VT_CONST && (v->r & VT_SYM)) { + /* mov xx(%rip), %r11 */ + o(0x1d8b4c); + gen_gotpcrel(TREG_R11, v->sym, v->c.ul); + pic = is64_type(bt) ? 0x49 : 0x41; + } + /* XXX: incorrect if float reg to reg */ if (bt == VT_FLOAT) { - o(0x7e0f66); /* movd */ + o(0x66); + o(pic); + o(0x7e0f); /* movd */ r = 0; } else if (bt == VT_DOUBLE) { - o(0xd60f66); /* movq */ + o(0x66); + o(pic); + o(0xd60f); /* movq */ r = 0; } else if (bt == VT_LDOUBLE) { o(0xc0d9); /* fld %st(0) */ + o(pic); o(0xdb); /* fstpt */ r = 7; } else { if (bt == VT_SHORT) o(0x66); + o(pic); if (bt == VT_BYTE || bt == VT_BOOL) o(0x88); else if (is64_type(bt)) @@ -388,7 +451,12 @@ void store(int r, SValue *v) else o(0x89); } - if (op64) { + if (pic) { + /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */ + if (op64) + o(op64); + o(3 + (r << 3)); + } else if (op64) { if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) { -- 2.11.4.GIT