From 50b040ef83252730d75463bdeea589294e698532 Mon Sep 17 00:00:00 2001 From: grischka Date: Sat, 19 Dec 2009 22:40:28 +0100 Subject: [PATCH] win64: add tiny unwind data for setjmp/longjmp This enables native unwind semantics with longjmp on win64 by putting an entry into the .pdata section for each compiled fuction. Also, the function now use a fixed stack and store arguments into X(%rsp) rather than using push. --- i386-asm.c | 4 + tcc.h | 7 ++ tccpe.c | 62 ++++++++- tccrun.c | 7 ++ win32/include/_mingw.h | 2 +- win32/lib/chkstk.S | 14 ++- x86_64-gen.c | 336 ++++++++++++++++++++++++++++++++++--------------- 7 files changed, 322 insertions(+), 110 deletions(-) diff --git a/i386-asm.c b/i386-asm.c index a378191b..075d2eab 100644 --- a/i386-asm.c +++ b/i386-asm.c @@ -426,6 +426,10 @@ static void gen_disp32(ExprValue *pe) elimination in the linker */ gen_le32(pe->v + sym->jnext - ind - 4); } else { + if (sym && sym->type.t == VT_VOID) { + sym->type.t = VT_FUNC; + sym->type.ref = NULL; + } gen_addrpc32(VT_SYM, sym, pe->v); } } diff --git a/tcc.h b/tcc.h index ea4ae21a..130c1b95 100644 --- a/tcc.h +++ b/tcc.h @@ -521,6 +521,10 @@ struct TCCState { /* PE info */ int pe_subsystem; unsigned long pe_file_align; + struct pe_uw { + Section *pdata; + int sym_1, sym_2, offs_1; + } pe_unwind; #endif #ifndef TCC_TARGET_PE @@ -1167,6 +1171,9 @@ ST_FUNC int pe_output_file(TCCState * s1, const char *filename); ST_FUNC int pe_dllimport(int r, SValue *sv, void (*fn)(int r, SValue *sv)); /* tiny_impdef.c */ ST_FUNC char *get_export_names(FILE *fp); +#ifdef TCC_TARGET_X86_64 +ST_FUNC void pe_add_unwind_data(unsigned start, unsigned end, unsigned stack); +#endif #endif /* ------------ tccrun.c ----------------- */ diff --git a/tccpe.c b/tccpe.c index 1b2724d6..be92129a 100644 --- a/tccpe.c +++ b/tccpe.c @@ -289,6 +289,7 @@ enum { sec_data , sec_bss , sec_idata , + sec_pdata , sec_other , sec_rsrc , sec_stab , @@ -301,6 +302,7 @@ static const DWORD pe_sec_flags[] = { 0xC0000040, /* ".data" , */ 0xC0000080, /* ".bss" , */ 0x40000040, /* ".idata" , */ + 0x40000040, /* ".pdata" , */ 0xE0000060, /* < other > , */ 0x40000040, /* ".rsrc" , */ 0x42000802, /* ".stab" , */ @@ -634,6 +636,10 @@ static int pe_write(struct pe_info *pe) pe_set_datadir(&pe_header, IMAGE_DIRECTORY_ENTRY_RESOURCE, addr, size); break; + case sec_pdata: + pe_set_datadir(&pe_header, IMAGE_DIRECTORY_ENTRY_EXCEPTION, addr, size); + break; + case sec_stab: break; } @@ -1037,6 +1043,8 @@ static int pe_section_class(Section *s) return sec_rsrc; if (0 == strcmp(name, ".iedat")) return sec_idata; + if (0 == strcmp(name, ".pdata")) + return sec_pdata; return sec_other; } else if (type == SHT_NOBITS) { if (flags & SHF_WRITE) @@ -1086,7 +1094,7 @@ static int pe_assign_addresses (struct pe_info *pe) #ifdef PE_MERGE_DATA if (c == sec_bss && pe->sec_count && si[-1].cls == sec_data) { /* append .bss to .data */ - s->sh_addr = addr = ((addr-1) | 15) + 1; + s->sh_addr = addr = ((addr-1) | (s->sh_addralign-1)) + 1; addr += s->data_offset; si[-1].sh_size = addr - si[-1].sh_addr; continue; @@ -1162,7 +1170,7 @@ static void pe_relocate_rva (struct pe_info *pe, Section *s) ElfW(Sym) *sym = (ElfW(Sym) *)symtab_section->data + sym_index; addr = sym->st_value; } - // printf("reloc rva %08x %08x\n", (DWORD)rel->r_offset, addr); + // printf("reloc rva %08x %08x %s\n", (DWORD)rel->r_offset, addr, s->name); *(DWORD*)(s->data + rel->r_offset) += addr - pe->imagebase; } } @@ -1625,6 +1633,56 @@ ST_FUNC int pe_dllimport(int r, SValue *sv, void (*fn)(int r, SValue *sv)) /* ------------------------------------------------------------- */ #ifdef TCC_TARGET_X86_64 +ST_FUNC void pe_add_unwind_data(unsigned start, unsigned end, unsigned stack) +{ + static const char uw_info[] = { + 0x01, // UBYTE: 3 Version , UBYTE: 5 Flags + 0x04, // UBYTE Size of prolog + 0x02, // UBYTE Count of unwind codes + 0x05, // UBYTE: 4 Frame Register (rbp), UBYTE: 4 Frame Register offset (scaled) + // USHORT * n Unwind codes array + // 0x0b, 0x01, 0xff, 0xff, // stack size + 0x04, 0x03, // set frame ptr (mov rsp -> rbp) + 0x01, 0x50 // push reg (rbp) + }; + + struct pe_uw *pe_uw = &tcc_state->pe_unwind; + + Section *uw, *pd; + WORD *p1; + DWORD *p2; + unsigned o2; + + uw = data_section; + pd = pe_uw->pdata; + if (NULL == pd) + { + pe_uw->pdata = pd = find_section(tcc_state, ".pdata"); + pe_uw->pdata->sh_addralign = 4; + section_ptr_add(uw, -uw->data_offset & 3); + pe_uw->offs_1 = uw->data_offset; + p1 = section_ptr_add(uw, sizeof uw_info); + /* use one common entry for all functions */ + memcpy(p1, uw_info, sizeof uw_info); + pe_uw->sym_1 = put_elf_sym(symtab_section, 0, 0, 0, 0, text_section->sh_num, NULL); + pe_uw->sym_2 = put_elf_sym(symtab_section, 0, 0, 0, 0, uw->sh_num, NULL); + } + + o2 = pd->data_offset; + p2 = section_ptr_add(pd, 3 * sizeof (DWORD)); + /* record this function */ + p2[0] = start; + p2[1] = end; + p2[2] = pe_uw->offs_1; + /* put relocations on it */ + put_elf_reloc(symtab_section, pd, o2, R_X86_64_RELATIVE, pe_uw->sym_1); + put_elf_reloc(symtab_section, pd, o2+4, R_X86_64_RELATIVE, pe_uw->sym_1); + put_elf_reloc(symtab_section, pd, o2+8, R_X86_64_RELATIVE, pe_uw->sym_2); +} +#endif + +/* ------------------------------------------------------------- */ +#ifdef TCC_TARGET_X86_64 #define PE_STDSYM(n,s) n #else #define PE_STDSYM(n,s) "_" n s diff --git a/tccrun.c b/tccrun.c index e88f87e4..52855a78 100644 --- a/tccrun.c +++ b/tccrun.c @@ -78,6 +78,13 @@ int tcc_run(TCCState *s1, int argc, char **argv) return ret; } #endif + +#ifdef TCC_TARGET_PE + { + unsigned char *p = tcc_get_symbol(s1, "tinyc_no_getbp"); + if (p) *p = 0; + } +#endif return (*prog_main)(argc, argv); } diff --git a/win32/include/_mingw.h b/win32/include/_mingw.h index bbac35e2..baa454b2 100644 --- a/win32/include/_mingw.h +++ b/win32/include/_mingw.h @@ -58,7 +58,7 @@ typedef unsigned __int64 uintptr_t; #define _AMD64_ 1 #define __x86_64 1 #define USE_MINGW_SETJMP_TWO_ARGS -#define mingw_getsp tinyc_getsp +#define mingw_getsp tinyc_getbp #else typedef __int32 intptr_t; typedef unsigned __int32 uintptr_t; diff --git a/win32/lib/chkstk.S b/win32/lib/chkstk.S index 90a6f0dc..a757b161 100644 --- a/win32/lib/chkstk.S +++ b/win32/lib/chkstk.S @@ -57,11 +57,19 @@ P0: jmp *8(%rax) /* ---------------------------------------------- */ -/* avoid msvcrt unwind stuff for setjmp/longjmp */ +/* setjmp/longjmp support */ -.globl tinyc_getsp -tinyc_getsp: +.globl tinyc_no_getbp +tinyc_no_getbp: + .byte 0x90 + +.globl tinyc_getbp +tinyc_getbp: xor %rax,%rax + cmp %al,tinyc_no_getbp(%rax) + je t1 + mov %rbp,%rax +t1: ret /* ---------------------------------------------- */ diff --git a/x86_64-gen.c b/x86_64-gen.c index d9f7528f..2e7c8beb 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -527,16 +527,6 @@ void store(int r, SValue *v) } } -static void gadd_sp(int val) -{ - if (val == (char)val) { - o(0xc48348); - g(val); - } else { - oad(0xc48148, val); /* add $xxx, %rsp */ - } -} - /* 'is_jmp' is '1' if it is a jump */ static void gcall_or_jmp(int is_jmp) { @@ -564,16 +554,238 @@ static void gcall_or_jmp(int is_jmp) } #ifdef TCC_TARGET_PE + #define REGN 4 static const uint8_t arg_regs[] = { TREG_RCX, TREG_RDX, TREG_R8, TREG_R9 }; + +static int func_scratch; + +/* Generate function call. The function address is pushed first, then + all the parameters in call order. This functions pops all the + parameters and the function address. */ + +void gen_offs_sp(int b, int r, int d) +{ + if (d == (char)d) { + o(b | 0x4000 | (r << 11)); + g(d); + } else { + o(b | 0x8000 | (r << 11)); + gen_le32(d); + } +} + +void gfunc_call(int nb_args) +{ + int size, align, r, args_size, i, d, j, bt; + int nb_reg_args, gen_reg; + + /* calculate the number of integer/float arguments */ + nb_reg_args = 0; + for(i = 0; i < nb_args; i++) { + bt = (vtop[-i].type.t & VT_BTYPE); + if (bt != VT_STRUCT && bt != VT_LDOUBLE) + nb_reg_args++; + } + + args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE; + save_regs(0); /* save used temporary registers */ + + /* for struct arguments, we need to call memcpy and the function + call breaks register passing arguments we are preparing. + So, we process arguments which will be passed by stack first. */ + for(i = 0; i < nb_args; i++) { + SValue *sv = &vtop[-i]; + bt = (sv->type.t & VT_BTYPE); + if (bt == VT_STRUCT) { + size = type_size(&sv->type, &align); + /* align to stack align size */ + size = (size + 15) & ~16; + /* generate structure store */ + r = get_reg(RC_INT); + o(0x48); + gen_offs_sp(0x24048d, r, args_size); + args_size += size; + + /* generate memcpy call */ + vset(&sv->type, r | VT_LVAL, 0); + *++vtop = *sv; + vstore(); + --vtop; + + } else if (bt == VT_LDOUBLE) { + + gv(RC_ST0); + gen_offs_sp(0x243cdb, 0, args_size); + args_size += 16; + + } + } + + if (func_scratch < args_size) + func_scratch = args_size; + + gen_reg = nb_reg_args; + for(i = 0; i < nb_args; i++) { + bt = (vtop->type.t & VT_BTYPE); + if (bt == VT_STRUCT || bt == VT_LDOUBLE) { + ; /* done */ + + } else if (is_sse_float(vtop->type.t)) { + gv(RC_FLOAT); /* only one float register */ + j = --gen_reg; + if (j >= REGN) { + o(0x0f66), + /* movq %xmm0, j*8(%rsp) */ + gen_offs_sp(0x2444d6, 0, j*8); + } else { + d = arg_regs[j]; + /* movaps %xmm0, %xmmN */ + o(0x280f); + o(0xc0 + (j << 3)); + /* mov %xmm0, %rxx */ + o(0x66); + o(0x7e0f48 + (d >= 8)); + o(0xc0 + (d & 7)); + } + } else { + r = gv(RC_INT); + j = --gen_reg; + if (j >= REGN) { + o(0x48); + gen_offs_sp(0x244489, r, j*8); + } else { + d = arg_regs[j]; + if (d != r) { + o(0x8948 + (d >= 8)); + o(0xc0 + r*8 + (d & 7)); + } + } + } + vtop--; + } + gcall_or_jmp(0); + vtop--; +} + + +#define FUNC_PROLOG_SIZE 11 + +/* generate function prolog of type 't' */ +void gfunc_prolog(CType *func_type) +{ + int addr, align, size, reg_param_index, bt; + Sym *sym; + CType *type; + + func_ret_sub = 0; + func_scratch = 0; + loc = 0; + + addr = PTR_SIZE * 2; + ind += FUNC_PROLOG_SIZE; + func_sub_sp_offset = ind; + reg_param_index = 0; + + sym = func_type->ref; + + /* if the function returns a structure, then add an + implicit pointer parameter */ + func_vt = sym->type; + if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { + gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); + reg_param_index++; + addr += PTR_SIZE; + } + + /* define parameters */ + while ((sym = sym->next) != NULL) { + type = &sym->type; + bt = type->t & VT_BTYPE; + if (bt == VT_STRUCT || bt == VT_LDOUBLE) + continue; + if (reg_param_index < REGN) { + /* save arguments passed by register */ + gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); + } + sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr); + reg_param_index++; + addr += PTR_SIZE; + } + + while (reg_param_index < REGN) { + if (func_type->ref->c == FUNC_ELLIPSIS) + gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); + reg_param_index++; + addr += PTR_SIZE; + } + + sym = func_type->ref; + while ((sym = sym->next) != NULL) { + type = &sym->type; + bt = type->t & VT_BTYPE; + if (bt == VT_STRUCT || bt == VT_LDOUBLE) { + size = type_size(type, &align); + size = (size + 15) & -16; + sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr); + addr += size; + } + } +} + +/* generate function epilog */ +void gfunc_epilog(void) +{ + int v, saved_ind; + + o(0xc9); /* leave */ + if (func_ret_sub == 0) { + o(0xc3); /* ret */ + } else { + o(0xc2); /* ret n */ + g(func_ret_sub); + g(func_ret_sub >> 8); + } + + saved_ind = ind; + ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; + /* align local size to word & save local variables */ + v = (func_scratch + -loc + 15) & -16; + + pe_add_unwind_data(ind, saved_ind, v); + + if (v >= 4096) { + Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0); + oad(0xb8, v); /* mov stacksize, %eax */ + oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */ + greloc(cur_text_section, sym, ind-4, R_X86_64_PC32); + o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */ + } else { + o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ + o(0xec8148); /* sub rsp, stacksize */ + gen_le32(v); + } + ind = saved_ind; +} + #else + +static void gadd_sp(int val) +{ + if (val == (char)val) { + o(0xc48348); + g(val); + } else { + oad(0xc48148, val); /* add $xxx, %rsp */ + } +} + #define REGN 6 static const uint8_t arg_regs[REGN] = { TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9 }; -#endif /* Generate function call. The function address is pushed first, then all the parameters in call order. This functions pops all the @@ -593,11 +805,9 @@ void gfunc_call(int nb_args) args_size += type_size(&vtop->type, &align); } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) { args_size += 16; -#ifndef TCC_TARGET_PE } else if (is_sse_float(vtop[-i].type.t)) { nb_sse_args++; if (nb_sse_args > 8) args_size += 8; -#endif } else { nb_reg_args++; if (nb_reg_args > REGN) args_size += 8; @@ -611,10 +821,6 @@ void gfunc_call(int nb_args) gen_reg = nb_reg_args; sse_reg = nb_sse_args; -#ifdef TCC_TARGET_PE - save_regs(0); /* save used temporary registers */ -#endif - /* adjust stack to align SSE boundary */ if (args_size &= 8) { o(0x50); /* push $rax */ @@ -650,13 +856,8 @@ void gfunc_call(int nb_args) g(0x00); args_size += size; } else if (is_sse_float(vtop->type.t)) { -#ifdef TCC_TARGET_PE - int j = --gen_reg; - if (j >= REGN) { -#else int j = --sse_reg; if (j >= 8) { -#endif gv(RC_FLOAT); o(0x50); /* push $rax */ /* movq %xmm0, (%rsp) */ @@ -688,42 +889,6 @@ void gfunc_call(int nb_args) if ((vtop->type.t & VT_BTYPE) == VT_STRUCT || (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { } else if (is_sse_float(vtop->type.t)) { -#ifdef TCC_TARGET_PE - int j = --gen_reg; - if (j < REGN) { - int d = arg_regs[j]; - gv(RC_FLOAT); /* only one float register */ - /* movaps %xmm0, %xmmN */ - o(0x280f); - o(0xc0 + (j << 3)); - o(0x50); - o(0xd60f66); /* movq %xmm0, (%rsp) */ - o(0x2404 + (j << 3)); - if (d < 8) { - o(0x58 + d); /* pop d */ - } else { - o(0x58); - o(0xc08949 + d - 8); - } - } - } else { - int j = --gen_reg; - /* simple type */ - /* XXX: implicit cast ? */ - if (j < REGN) { - int d = arg_regs[j]; - r = gv(RC_INT); - if (d != r) { - if (d < 8) { - o(0x8948); /* mov */ - o(0xc0 + r * 8 + d); - } else { - o(0x8949); /* mov */ - o(0xc0 + r * 8 + d - 8); - } - } - } -#else int j = --sse_reg; if (j < 8) { gv(RC_FLOAT); /* only one float register */ @@ -750,16 +915,10 @@ void gfunc_call(int nb_args) o(0xc0 + r * 8 + j - 4); } } -#endif } vtop--; } -#ifdef TCC_TARGET_PE - /* allocate scratch space */ - gadd_sp(-8*REGN); - args_size += 8*REGN; -#else save_regs(0); /* save used temporary registers */ /* Copy R10 and R11 into RDX and RCX, respectively */ @@ -771,13 +930,13 @@ void gfunc_call(int nb_args) } oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */ -#endif gcall_or_jmp(0); if (args_size) gadd_sp(args_size); vtop--; } + #define FUNC_PROLOG_SIZE 11 static void push_arg_reg(int i) { @@ -793,15 +952,13 @@ void gfunc_prolog(CType *func_type) Sym *sym; CType *type; - func_ret_sub = 0; - sym = func_type->ref; addr = PTR_SIZE * 2; loc = 0; ind += FUNC_PROLOG_SIZE; func_sub_sp_offset = ind; + func_ret_sub = 0; -#ifndef TCC_TARGET_PE if (func_type->ref->c == FUNC_ELLIPSIS) { int seen_reg_num, seen_sse_num, seen_stack_size; seen_reg_num = seen_sse_num = 0; @@ -857,7 +1014,6 @@ void gfunc_prolog(CType *func_type) push_arg_reg(REGN-1-i); } } -#endif sym = func_type->ref; param_index = 0; @@ -880,7 +1036,6 @@ void gfunc_prolog(CType *func_type) type = &sym->type; size = type_size(type, &align); size = (size + 3) & ~3; -#ifndef TCC_TARGET_PE if (is_sse_float(type->t)) { if (sse_param_index < 8) { /* save arguments passed by register */ @@ -893,21 +1048,12 @@ void gfunc_prolog(CType *func_type) addr += size; } sse_param_index++; - } else -#endif - if ((type->t & VT_BTYPE) == VT_STRUCT || + + } else if ((type->t & VT_BTYPE) == VT_STRUCT || (type->t & VT_BTYPE) == VT_LDOUBLE) { param_addr = addr; addr += size; } else { -#ifdef TCC_TARGET_PE - if (reg_param_index < REGN) { - /* save arguments passed by register */ - gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); - } - param_addr = addr; - addr += 8; -#else if (reg_param_index < REGN) { /* save arguments passed by register */ push_arg_reg(reg_param_index); @@ -916,21 +1062,12 @@ void gfunc_prolog(CType *func_type) param_addr = addr; addr += 8; } -#endif reg_param_index++; } sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, param_addr); param_index++; } -#ifdef TCC_TARGET_PE - if (func_type->ref->c == FUNC_ELLIPSIS) { - for (i = reg_param_index; i < REGN; ++i) { - gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, addr); - addr += 8; - } - } -#endif } /* generate function epilog */ @@ -950,23 +1087,14 @@ void gfunc_epilog(void) v = (-loc + 15) & -16; saved_ind = ind; ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; -#ifdef TCC_TARGET_PE - if (v >= 4096) { - Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0); - oad(0xb8, v); /* mov stacksize, %eax */ - oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */ - greloc(cur_text_section, sym, ind-4, R_X86_64_PC32); - o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */ - } else -#endif - { - o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ - o(0xec8148); /* sub rsp, stacksize */ - gen_le32(v); - } + o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ + o(0xec8148); /* sub rsp, stacksize */ + gen_le32(v); ind = saved_ind; } +#endif /* not PE */ + /* generate a jump to a label */ int gjmp(int t) { -- 2.11.4.GIT