From 0a9873aa22731077fad295a4aad2fc1f390c8ac7 Mon Sep 17 00:00:00 2001 From: Shinichiro Hamaji Date: Tue, 2 Dec 2008 03:19:25 +0900 Subject: [PATCH] Add support of x86-64. Most change was done in #ifdef TCC_TARGET_X86_64. So, nothing should be broken by this change. Summary of current status of x86-64 support: - produces x86-64 object files and executables. - the x86-64 code generator is based on x86's. -- for long long integers, we use 64bit registers instead of tcc's generic implementation. -- for float or double, we use SSE. SSE registers are not utilized well (we only use xmm0 and xmm1). -- for long double, we use x87 FPU. - passes make test. - passes ./libtcc_test. - can compile tcc.c. The compiled tcc can compile tcc.c, too. (there should be some bugs since the binary size of tcc2 and tcc3 is differ where tcc tcc.c -o tcc2 and tcc2 tcc.c -o tcc3) - can compile links browser. It seems working. - not tested well. I tested this work only on my linux box with few programs. - calling convention of long-double-integer or struct is not exactly the same as GCC's x86-64 ABI. - implementation of tcc -run is naive (tcc -run tcctest.c works, but tcc -run tcc.c doesn't work). Relocating 64bit addresses seems to be not as simple as 32bit environments. - shared object support isn't unimplemented - no bounds checker support - some builtin functions such as __divdi3 aren't supported --- Makefile | 23 +- configure | 6 + libtcc1.c | 5 + stdarg.h | 73 +++- tcc.c | 138 +++++- tccelf.c | 139 +++++- x86_64-gen.c | 1355 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 1724 insertions(+), 15 deletions(-) create mode 100644 x86_64-gen.c diff --git a/Makefile b/Makefile index 1e6adad8..6ff3eb56 100644 --- a/Makefile +++ b/Makefile @@ -9,20 +9,28 @@ LIBS=-lm ifndef CONFIG_NOLDL LIBS+=-ldl endif +ifneq ($(ARCH),x86-64) BCHECK_O=bcheck.o endif +endif CFLAGS_P=$(CFLAGS) -pg -static -DCONFIG_TCC_STATIC LIBS_P= +ifneq ($(GCC_MAJOR),2) +CFLAGS+=-fno-strict-aliasing +endif + +ifeq ($(ARCH),i386) CFLAGS+=-mpreferred-stack-boundary=2 ifeq ($(GCC_MAJOR),2) CFLAGS+=-m386 -malign-functions=0 else -CFLAGS+=-march=i386 -falign-functions=0 -fno-strict-aliasing +CFLAGS+=-march=i386 -falign-functions=0 ifneq ($(GCC_MAJOR),3) CFLAGS+=-Wno-pointer-sign -Wno-sign-compare endif endif +endif DISAS=objdump -d INSTALL=install @@ -50,6 +58,9 @@ ifdef CONFIG_CROSS PROGS+=c67-tcc$(EXESUF) i386-win32-tcc$(EXESUF) endif endif +ifeq ($(ARCH),x86-64) +PROGS=tcc$(EXESUF) +endif ifdef CONFIG_USE_LIBGCC LIBTCC1= @@ -163,6 +174,10 @@ ARMFLAGS += $(if $(shell grep -l "^Features.* \(vfp\|iwmmxt\) " /proc/cpuinfo),- tcc$(EXESUF): tcc.c arm-gen.c tccelf.c tccasm.c tcctok.h libtcc.h $(CC) $(CFLAGS) -DTCC_TARGET_ARM $(ARMFLAGS) -o $@ $< $(LIBS) endif +ifeq ($(ARCH),x86-64) +tcc$(EXESUF): tcc.c tccelf.c tccasm.c tcctok.h libtcc.h x86_64-gen.c + $(CC) $(CFLAGS) -DTCC_TARGET_X86_64 -o $@ $< $(LIBS) +endif endif # Cross Tiny C Compilers @@ -238,7 +253,9 @@ else ifndef CONFIG_USE_LIBGCC $(INSTALL) -m644 libtcc1.a "$(DESTDIR)$(tccdir)" endif +ifneq ($(ARCH),x86-64) $(INSTALL) -m644 $(BCHECK_O) "$(DESTDIR)$(tccdir)" +endif $(INSTALL) -m644 stdarg.h stddef.h stdbool.h float.h varargs.h \ tcclib.h "$(DESTDIR)$(tccdir)/include" endif @@ -272,8 +289,12 @@ libtcc.o: tcc.c i386-gen.c Makefile ifdef CONFIG_WIN32 $(CC) $(CFLAGS) -DTCC_TARGET_PE -DLIBTCC -c -o $@ $< else +ifeq ($(ARCH),x86-64) + $(CC) $(CFLAGS) -DTCC_TARGET_X86_64 -DLIBTCC -c -o $@ $< +else $(CC) $(CFLAGS) -DLIBTCC -c -o $@ $< endif +endif libtcc.a: libtcc.o $(AR) rcs $@ $^ diff --git a/configure b/configure index 63328cc2..f4c718d7 100755 --- a/configure +++ b/configure @@ -39,6 +39,9 @@ case "$cpu" in i386|i486|i586|i686|i86pc|BePC) cpu="x86" ;; + x86_64) + cpu="x86-64" + ;; armv4l) cpu="armv4l" ;; @@ -313,6 +316,9 @@ echo "EXESUF=$EXESUF" >> config.mak if test "$cpu" = "x86" ; then echo "ARCH=i386" >> config.mak echo "#define HOST_I386 1" >> $TMPH +elif test "$cpu" = "x86-64" ; then + echo "ARCH=x86-64" >> config.mak + echo "#define HOST_X86_64 1" >> $TMPH elif test "$cpu" = "armv4l" ; then echo "ARCH=arm" >> config.mak echo "#define HOST_ARM 1" >> $TMPH diff --git a/libtcc1.c b/libtcc1.c index 96bf22cf..b079477e 100644 --- a/libtcc1.c +++ b/libtcc1.c @@ -106,6 +106,9 @@ union float_long { long l; }; +/* XXX: we don't support several builtin supports for now */ +#ifndef __x86_64__ + /* XXX: use gcc/tcc intrinsic ? */ #if defined(__i386__) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ @@ -482,6 +485,8 @@ unsigned short __tcc_fpu_control = 0x137f; unsigned short __tcc_int_fpu_control = 0x137f | 0x0c00; #endif +#endif /* !__x86_64__ */ + /* XXX: fix tcc's code generator to do this instead */ float __floatundisf(unsigned long long a) { diff --git a/stdarg.h b/stdarg.h index a9b22b7b..899358ce 100644 --- a/stdarg.h +++ b/stdarg.h @@ -1,6 +1,75 @@ #ifndef _STDARG_H #define _STDARG_H +#ifdef __x86_64__ + +#ifdef __TINYC__ + +#include + +/* GCC compatible definition of va_list. */ +struct __va_list_struct { + unsigned int gp_offset; + unsigned int fp_offset; + union { + unsigned int overflow_offset; + char *overflow_arg_area; + }; + char *reg_save_area; +}; + +typedef struct __va_list_struct *va_list; + +/* avoid #define malloc tcc_malloc. + XXX: add __malloc or something into libtcc? */ +inline void *__va_list_malloc(size_t size) { return malloc(size); } +inline void __va_list_free(void *ptr) { free(ptr); } + +/* XXX: this lacks the support of aggregated types. */ +#define va_start(ap, last) \ + (ap = (va_list)__va_list_malloc(sizeof(struct __va_list_struct)), \ + *ap = *(struct __va_list_struct*)( \ + (char*)__builtin_frame_address(0) - 16), \ + ap->overflow_arg_area = ((char *)__builtin_frame_address(0) + \ + ap->overflow_offset), \ + ap->reg_save_area = (char *)__builtin_frame_address(0) - 176 - 16 \ + ) +#define va_arg(ap, type) \ + (*(type*)(__builtin_types_compatible_p(type, long double) \ + ? (ap->overflow_arg_area += 16, \ + ap->overflow_arg_area - 16) \ + : __builtin_types_compatible_p(type, double) \ + ? (ap->fp_offset < 128 + 48 \ + ? (ap->fp_offset += 16, \ + ap->reg_save_area + ap->fp_offset - 16) \ + : (ap->overflow_arg_area += 8, \ + ap->overflow_arg_area - 8)) \ + : (ap->gp_offset < 48 \ + ? (ap->gp_offset += 8, \ + ap->reg_save_area + ap->gp_offset - 8) \ + : (ap->overflow_arg_area += 8, \ + ap->overflow_arg_area - 8)) \ + )) +#define va_copy(dest, src) \ + ((dest) = (va_list)malloc(sizeof(struct __va_list_struct)), \ + *(dest) = *(src)) +#define va_end(ap) __va_list_free(ap) + +#else + +/* for GNU C */ + +typedef __builtin_va_list va_list; + +#define va_start(ap, last) __builtin_va_start(ap, last) +#define va_arg(ap, type) __builtin_va_arg(ap, type) +#define va_copy(dest, src) __builtin_va_copy(dest, src) +#define va_end(ap) __builtin_va_end(ap) + +#endif + +#else + typedef char *va_list; /* only correct for i386 */ @@ -9,8 +78,10 @@ typedef char *va_list; #define va_copy(dest, src) (dest) = (src) #define va_end(ap) +#endif + /* fix a buggy dependency on GCC in libio.h */ typedef va_list __gnuc_va_list; #define _VA_LIST_DEFINED -#endif +#endif /* _STDARG_H */ diff --git a/tcc.c b/tcc.c index afb1343d..0bfc5c01 100644 --- a/tcc.c +++ b/tcc.c @@ -79,15 +79,16 @@ //#define TCC_TARGET_I386 /* i386 code generator */ //#define TCC_TARGET_ARM /* ARMv4 code generator */ //#define TCC_TARGET_C67 /* TMS320C67xx code generator */ +//#define TCC_TARGET_X86_64 /* x86-64 code generator */ /* default target is I386 */ #if !defined(TCC_TARGET_I386) && !defined(TCC_TARGET_ARM) && \ - !defined(TCC_TARGET_C67) + !defined(TCC_TARGET_C67) && !defined(TCC_TARGET_X86_64) #define TCC_TARGET_I386 #endif #if !defined(_WIN32) && !defined(TCC_UCLIBC) && !defined(TCC_TARGET_ARM) && \ - !defined(TCC_TARGET_C67) + !defined(TCC_TARGET_C67) && !defined(TCC_TARGET_X86_64) #define CONFIG_TCC_BCHECK /* enable bound checking code */ #endif @@ -96,7 +97,8 @@ #endif /* define it to include assembler support */ -#if !defined(TCC_TARGET_ARM) && !defined(TCC_TARGET_C67) +#if !defined(TCC_TARGET_ARM) && !defined(TCC_TARGET_C67) && \ + !defined(TCC_TARGET_X86_64) #define CONFIG_TCC_ASM #endif @@ -531,6 +533,12 @@ struct TCCState { /* output file for preprocessing */ FILE *outfile; + +#ifdef TCC_TARGET_X86_64 + /* buffer to store jump tables */ + char *jmp_table; + int jmp_table_num; +#endif }; /* The current value can be: */ @@ -938,6 +946,10 @@ static inline int is_float(int t) #include "c67-gen.c" #endif +#ifdef TCC_TARGET_X86_64 +#include "x86_64-gen.c" +#endif + #ifdef CONFIG_TCC_STATIC #define RTLD_LAZY 0x001 @@ -4769,26 +4781,33 @@ void save_reg(int r) r = p->r & VT_VALMASK; /* store register in the stack */ type = &p->type; +#ifndef TCC_TARGET_X86_64 if ((p->r & VT_LVAL) || (!is_float(type->t) && (type->t & VT_BTYPE) != VT_LLONG)) type = &int_type; +#else + if (p->r & VT_LVAL) + type = &char_pointer_type; +#endif size = type_size(type, &align); loc = (loc - size) & -align; sv.type.t = type->t; sv.r = VT_LOCAL | VT_LVAL; sv.c.ul = loc; store(r, &sv); -#ifdef TCC_TARGET_I386 +#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) /* x86 specific: need to pop fp register ST0 if saved */ if (r == TREG_ST0) { o(0xd9dd); /* fstp %st(1) */ } #endif +#ifndef TCC_TARGET_X86_64 /* special long long case */ if ((type->t & VT_BTYPE) == VT_LLONG) { sv.c.ul += 4; store(p->r2, &sv); } +#endif l = loc; saved = 1; } @@ -4939,8 +4958,7 @@ void gbound(void) register value (such as structures). */ int gv(int rc) { - int r, r2, rc2, bit_pos, bit_size, size, align, i; - unsigned long long ll; + int r, rc2, bit_pos, bit_size, size, align, i; /* NOTE: get_reg can modify vstack[] */ if (vtop->type.t & VT_BITFIELD) { @@ -5019,7 +5037,10 @@ int gv(int rc) ((vtop->type.t & VT_BTYPE) == VT_LLONG && !(reg_classes[vtop->r2] & rc2))) { r = get_reg(rc); +#ifndef TCC_TARGET_X86_64 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { + int r2; + unsigned long long ll; /* two register type load : expand to two words temporarily */ if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { @@ -5059,7 +5080,9 @@ int gv(int rc) vpop(); /* write second register */ vtop->r2 = r2; - } else if ((vtop->r & VT_LVAL) && !is_float(vtop->type.t)) { + } else +#endif + if ((vtop->r & VT_LVAL) && !is_float(vtop->type.t)) { int t1, t; /* lvalue of scalar type : need to use lvalue type because of possible cast */ @@ -5224,7 +5247,7 @@ void vpop(void) { int v; v = vtop->r & VT_VALMASK; -#ifdef TCC_TARGET_I386 +#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) /* for x86, we need to pop the FP stack */ if (v == TREG_ST0 && !nocode_wanted) { o(0xd9dd); /* fstp %st(1) */ @@ -5265,6 +5288,11 @@ void gv_dup(void) sv.type.t = VT_INT; if (is_float(t)) { rc = RC_FLOAT; +#ifdef TCC_TARGET_X86_64 + if ((t & VT_BTYPE) == VT_LDOUBLE) { + rc = RC_ST0; + } +#endif sv.type.t = t; } r = gv(rc); @@ -5278,6 +5306,7 @@ void gv_dup(void) } } +#ifndef TCC_TARGET_X86_64 /* generate CPU independent (unsigned) long long operations */ void gen_opl(int op) { @@ -5512,6 +5541,7 @@ void gen_opl(int op) break; } } +#endif /* handle integer constant optimizations and various machine independent opt */ @@ -5790,7 +5820,11 @@ void gen_op(int op) if (op >= TOK_ULT && op <= TOK_LOR) { check_comparison_pointer_types(vtop - 1, vtop, op); /* pointers are handled are unsigned */ +#ifdef TCC_TARGET_X86_64 + t = VT_LLONG | VT_UNSIGNED; +#else t = VT_INT | VT_UNSIGNED; +#endif goto std_op; } /* if both pointers, then it must be the '-' op */ @@ -5802,7 +5836,11 @@ void gen_op(int op) u = pointed_size(&vtop[-1].type); gen_opic(op); /* set to integer type */ +#ifdef TCC_TARGET_X86_64 + vtop->type.t = VT_LLONG; +#else vtop->type.t = VT_INT; +#endif vpushi(u); gen_op(TOK_PDIV); } else { @@ -5815,8 +5853,18 @@ void gen_op(int op) swap(&t1, &t2); } type1 = vtop[-1].type; +#ifdef TCC_TARGET_X86_64 + { + CValue cval; + CType ctype; + ctype.t = VT_LLONG; + cval.ull = pointed_size(&vtop[-1].type); + vsetc(&ctype, VT_CONST, &cval); + } +#else /* XXX: cast to int ? (long long case) */ vpushi(pointed_size(&vtop[-1].type)); +#endif gen_op('*'); #ifdef CONFIG_TCC_BCHECK /* if evaluating constant expression, no code should be @@ -6099,6 +6147,7 @@ static void gen_cast(CType *type) } else if ((dbt & VT_BTYPE) == VT_LLONG) { if ((sbt & VT_BTYPE) != VT_LLONG) { /* scalar to long long */ +#ifndef TCC_TARGET_X86_64 /* machine independent conversion */ gv(RC_INT); /* generate high word */ @@ -6113,6 +6162,14 @@ static void gen_cast(CType *type) /* patch second register */ vtop[-1].r2 = vtop->r; vpop(); +#else + int r = gv(RC_INT); + if (sbt != (VT_INT | VT_UNSIGNED)) { + /* x86_64 specific: movslq */ + o(0x6348); + o(0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r)); + } +#endif } } else if (dbt == VT_BOOL) { /* scalar to bool */ @@ -6571,20 +6628,31 @@ void vstore(void) #endif if (!nocode_wanted) { rc = RC_INT; - if (is_float(ft)) + if (is_float(ft)) { rc = RC_FLOAT; +#ifdef TCC_TARGET_X86_64 + if ((ft & VT_BTYPE) == VT_LDOUBLE) { + rc = RC_ST0; + } +#endif + } r = gv(rc); /* generate value */ /* if lvalue was saved on stack, must read it */ if ((vtop[-1].r & VT_VALMASK) == VT_LLOCAL) { SValue sv; t = get_reg(RC_INT); +#ifdef TCC_TARGET_X86_64 + sv.type.t = VT_PTR; +#else sv.type.t = VT_INT; +#endif sv.r = VT_LOCAL | VT_LVAL; sv.c.ul = vtop[-1].c.ul; load(t, &sv); vtop[-1].r = t | VT_LVAL; } store(r, vtop - 1); +#ifndef TCC_TARGET_X86_64 /* two word case handling : store second register at word + 4 */ if ((ft & VT_BTYPE) == VT_LLONG) { vswap(); @@ -6598,6 +6666,7 @@ void vstore(void) /* XXX: it works because r2 is spilled last ! */ store(vtop->r2, vtop - 1); } +#endif } vswap(); vtop--; /* NOT vpop() because on x86 it would flush the fp stack */ @@ -7107,7 +7176,11 @@ the_end: /* long is never used as type */ if ((t & VT_BTYPE) == VT_LONG) +#ifndef TCC_TARGET_X86_64 t = (t & ~VT_BTYPE) | VT_INT; +#else + t = (t & ~VT_BTYPE) | VT_LLONG; +#endif type->t = t; return type_found; } @@ -8044,8 +8117,14 @@ static void expr_eq(void) if (vtop != vstack) { /* needed to avoid having different registers saved in each branch */ - if (is_float(vtop->type.t)) + if (is_float(vtop->type.t)) { rc = RC_FLOAT; +#ifdef TCC_TARGET_X86_64 + if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { + rc = RC_ST0; + } +#endif + } else rc = RC_INT; gv(rc); @@ -8115,6 +8194,11 @@ static void expr_eq(void) rc = RC_INT; if (is_float(type.t)) { rc = RC_FLOAT; +#ifdef TCC_TARGET_X86_64 + if ((type.t & VT_BTYPE) == VT_LDOUBLE) { + rc = RC_ST0; + } +#endif } else if ((type.t & VT_BTYPE) == VT_LLONG) { /* for long longs, we use fixed registers to avoid having to handle a complicated move */ @@ -9982,6 +10066,30 @@ static int rt_get_caller_pc(unsigned long *paddr, return 0; } } +#elif defined(__x86_64__) +/* return the PC at frame level 'level'. Return non zero if not found */ +static int rt_get_caller_pc(unsigned long *paddr, + ucontext_t *uc, int level) +{ + unsigned long fp; + int i; + + if (level == 0) { + /* XXX: only support linux */ + *paddr = uc->uc_mcontext.gregs[REG_RIP]; + return 0; + } else { + fp = uc->uc_mcontext.gregs[REG_RBP]; + for(i=1;i= 0xc0000000) + return -1; + fp = ((unsigned long *)fp)[0]; + } + *paddr = ((unsigned long *)fp)[1]; + return 0; + } +} #else #warning add arch specific rt_get_caller_pc() @@ -10235,6 +10343,9 @@ TCCState *tcc_new(void) #if defined(TCC_TARGET_I386) tcc_define_symbol(s, "__i386__", NULL); #endif +#if defined(TCC_TARGET_X86_64) + tcc_define_symbol(s, "__x86_64__", NULL); +#endif #if defined(TCC_TARGET_ARM) tcc_define_symbol(s, "__ARM_ARCH_4__", NULL); tcc_define_symbol(s, "__arm_elf__", NULL); @@ -10301,6 +10412,10 @@ TCCState *tcc_new(void) /* XXX: currently the PE linker is not ready to support that */ s->leading_underscore = 1; #endif + +#ifdef TCC_TARGET_X86_64 + s->jmp_table = NULL; +#endif return s; } @@ -10336,6 +10451,9 @@ void tcc_delete(TCCState *s1) dynarray_reset(&s1->include_paths, &s1->nb_include_paths); dynarray_reset(&s1->sysinclude_paths, &s1->nb_sysinclude_paths); +#ifdef TCC_TARGET_X86_64 + tcc_free(s1->jmp_table); +#endif tcc_free(s1); } diff --git a/tccelf.c b/tccelf.c index d41d06cc..a0c23808 100644 --- a/tccelf.c +++ b/tccelf.c @@ -282,6 +282,9 @@ static void put_elf_reloc(Section *symtab, Section *s, unsigned long offset, rel = section_ptr_add(sr, sizeof(ElfW_Rel)); rel->r_offset = offset; rel->r_info = ELFW(R_INFO)(symbol, type); +#ifdef TCC_TARGET_X86_64 + rel->r_addend = 0; +#endif } /* put stab debug information */ @@ -469,6 +472,33 @@ static void relocate_syms(TCCState *s1, int do_resolve) } } +#ifdef TCC_TARGET_X86_64 +#define JMP_TABLE_ENTRY_SIZE 14 +#define JMP_TABLE_ENTRY_MAX_NUM 4096 +static unsigned long add_jmp_table(TCCState *s1, unsigned long val) +{ + char *p; + if (!s1->jmp_table) { + int size = JMP_TABLE_ENTRY_SIZE * JMP_TABLE_ENTRY_MAX_NUM; + s1->jmp_table_num = 0; + s1->jmp_table = (char *)tcc_malloc(size); + set_pages_executable(s1->jmp_table, size); + } + if (s1->jmp_table_num == JMP_TABLE_ENTRY_MAX_NUM) { + error("relocating >%d symbols are not supported", + JMP_TABLE_ENTRY_MAX_NUM); + } + p = s1->jmp_table + s1->jmp_table_num * JMP_TABLE_ENTRY_SIZE; + s1->jmp_table_num++; + /* jmp *0x0(%rip) */ + p[0] = 0xff; + p[1] = 0x25; + *(int *)(p + 2) = 0; + *(unsigned long *)(p + 6) = val; + return (unsigned long)p; +} +#endif + /* relocate a given section (CPU dependent) */ static void relocate_section(TCCState *s1, Section *s) { @@ -493,6 +523,10 @@ static void relocate_section(TCCState *s1, Section *s) sym_index = ELFW(R_SYM)(rel->r_info); sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; val = sym->st_value; +#ifdef TCC_TARGET_X86_64 + /* XXX: not tested */ + val += rel->r_addend; +#endif type = ELFW(R_TYPE)(rel->r_info); addr = s->sh_addr + rel->r_offset; @@ -620,6 +654,62 @@ static void relocate_section(TCCState *s1, Section *s) fprintf(stderr,"FIXME: handle reloc type %x at %lx [%.8x] to %lx\n", type,addr,(unsigned int )ptr,val); break; +#elif defined(TCC_TARGET_X86_64) + case R_X86_64_64: + *(long long *)ptr += val; + break; + case R_X86_64_32: + case R_X86_64_32S: + *(int *)ptr += val; + break; + case R_X86_64_PC32: { + long diff = val - addr; + if (diff < -2147483648 || diff > 2147483647) { + /* XXX: naive support for over 32bit jump */ + if (s1->output_type == TCC_OUTPUT_MEMORY) { + val = add_jmp_table(s1, val); + diff = val - addr; + } + if (diff <= -2147483647 || diff > 2147483647) { +#if 0 + /* output memory map to debug easily */ + FILE* fp; + char buf[4096]; + int size; + Dl_info di; + printf("%ld - %ld = %ld\n", val, addr, diff); + dladdr((void *)addr, &di); + printf("addr = %lx = %lx+%lx(%s) ptr=%p\n", + addr, s->sh_addr, rel->r_offset, di.dli_sname, + ptr); + fp = fopen("/proc/self/maps", "r"); + size = fread(buf, 1, 4095, fp); + buf[size] = '\0'; + printf("%s", buf); +#endif + error("internal error: relocation failed"); + } + } + *(int *)ptr += val - addr; + } + break; + case R_X86_64_PLT32: + *(int *)ptr += val - addr; + break; + case R_X86_64_GLOB_DAT: + case R_X86_64_JUMP_SLOT: + *(int *)ptr = val; + break; + case R_X86_64_GOTPCREL: + *(int *)ptr += s1->got->sh_addr - addr; + break; + case R_X86_64_GOTTPOFF: + *(int *)ptr += val - s1->got->sh_addr; + break; + case R_X86_64_GOT32: + /* we load the got offset */ + *(int *)ptr += s1->got_offsets[sym_index]; + break; #else #error unsupported processor #endif @@ -708,7 +798,8 @@ static void put32(unsigned char *p, uint32_t val) p[3] = val >> 24; } -#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_ARM) +#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_ARM) || \ + defined(TCC_TARGET_X86_64) static uint32_t get32(unsigned char *p) { return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); @@ -769,8 +860,14 @@ static void put_got_entry(TCCState *s1, sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; name = symtab_section->link->data + sym->st_name; offset = sym->st_value; -#ifdef TCC_TARGET_I386 - if (reloc_type == R_386_JMP_SLOT) { +#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) + if (reloc_type == +#ifdef TCC_TARGET_X86_64 + R_X86_64_JUMP_SLOT +#else + R_386_JMP_SLOT +#endif + ) { Section *plt; uint8_t *p; int modrm; @@ -934,6 +1031,25 @@ static void build_got_entries(TCCState *s1) sym_index); } break; +#elif defined(TCC_TARGET_X86_64) + case R_X86_64_GOT32: + case R_X86_64_GOTTPOFF: + case R_X86_64_GOTPCREL: + case R_X86_64_PLT32: + if (!s1->got) + build_got(s1); + if (type == R_X86_64_GOT32 || type == R_X86_64_PLT32) { + sym_index = ELFW(R_SYM)(rel->r_info); + sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; + /* look at the symbol got offset. If none, then add one */ + if (type == R_X86_64_GOT32) + reloc_type = R_X86_64_GLOB_DAT; + else + reloc_type = R_X86_64_JUMP_SLOT; + put_got_entry(s1, reloc_type, sym->st_size, sym->st_info, + sym_index); + } + break; #else #error unsupported CPU #endif @@ -1130,6 +1246,8 @@ static char elf_interp[] = "/usr/libexec/ld-elf.so.1"; #else #ifdef TCC_ARM_EABI static char elf_interp[] = "/lib/ld-linux.so.3"; +#elif defined(TCC_TARGET_X86_64) +static char elf_interp[] = "/lib/ld-linux-x86-64.so.2"; #else static char elf_interp[] = "/lib/ld-linux.so.2"; #endif @@ -1593,6 +1711,15 @@ int elf_output_file(TCCState *s1, const char *filename) put32(p + 2, get32(p + 2) + s1->got->sh_addr); p += 16; } +#elif defined(TCC_TARGET_X86_64) + int x = s1->got->sh_addr - s1->plt->sh_addr - 6; + put32(p + 2, get32(p + 2) + x); + put32(p + 8, get32(p + 8) + x - 6); + p += 16; + while (p < p_end) { + put32(p + 2, get32(p + 2) + x + s1->plt->data - p); + p += 16; + } #elif defined(TCC_TARGET_ARM) int x; x=s1->got->sh_addr - s1->plt->sh_addr - 12; @@ -1632,9 +1759,15 @@ int elf_output_file(TCCState *s1, const char *filename) put_dt(dynamic, DT_SYMTAB, s1->dynsym->sh_addr); put_dt(dynamic, DT_STRSZ, dynstr->data_offset); put_dt(dynamic, DT_SYMENT, sizeof(ElfW(Sym))); +#ifdef TCC_TARGET_X86_64 + put_dt(dynamic, DT_RELA, rel_addr); + put_dt(dynamic, DT_RELASZ, rel_size); + put_dt(dynamic, DT_RELAENT, sizeof(ElfW_Rel)); +#else put_dt(dynamic, DT_REL, rel_addr); put_dt(dynamic, DT_RELSZ, rel_size); put_dt(dynamic, DT_RELENT, sizeof(ElfW_Rel)); +#endif if (do_debug) put_dt(dynamic, DT_DEBUG, 0); put_dt(dynamic, DT_NULL, 0); diff --git a/x86_64-gen.c b/x86_64-gen.c new file mode 100644 index 00000000..2435d4d8 --- /dev/null +++ b/x86_64-gen.c @@ -0,0 +1,1355 @@ +/* + * x86-64 code generator for TCC + * + * Copyright (c) 2008 Shinichiro Hamaji + * + * Based on i386-gen.c by Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +/* number of available registers */ +#define NB_REGS 5 + +/* a register can belong to several classes. The classes must be + sorted from more general to more precise (see gv2() code which does + assumptions on it). */ +#define RC_INT 0x0001 /* generic integer register */ +#define RC_FLOAT 0x0002 /* generic float register */ +#define RC_RAX 0x0004 +#define RC_RCX 0x0008 +#define RC_RDX 0x0010 +#define RC_XMM0 0x0020 +#define RC_ST0 0x0040 /* only for long double */ +#define RC_IRET RC_RAX /* function return: integer register */ +#define RC_LRET RC_RDX /* function return: second integer register */ +#define RC_FRET RC_XMM0 /* function return: float register */ + +/* pretty names for the registers */ +enum { + TREG_RAX = 0, + TREG_RCX = 1, + TREG_RDX = 2, + TREG_RSI = 6, + TREG_RDI = 7, + TREG_R8 = 8, + TREG_R9 = 9, + TREG_R10 = 10, + TREG_R11 = 11, + + TREG_XMM0 = 3, + TREG_ST0 = 4, +}; + +#define REX_BASE(reg) ((reg) >> 3) +#define REG_VALUE(reg) ((reg) & 7) + +int reg_classes[NB_REGS] = { + /* eax */ RC_INT | RC_RAX, + /* ecx */ RC_INT | RC_RCX, + /* edx */ RC_INT | RC_RDX, + /* xmm0 */ RC_FLOAT | RC_XMM0, + /* st0 */ RC_ST0, +}; + +/* return registers for function */ +#define REG_IRET TREG_RAX /* single word int return register */ +#define REG_LRET TREG_RDX /* second word return register (for long long) */ +#define REG_FRET TREG_XMM0 /* float return register */ + +/* defined if function parameters must be evaluated in reverse order */ +#define INVERT_FUNC_PARAMS + +/* pointer size, in bytes */ +#define PTR_SIZE 8 + +/* long double size and alignment, in bytes */ +#define LDOUBLE_SIZE 16 +#define LDOUBLE_ALIGN 8 +/* maximum alignment (for aligned attribute support) */ +#define MAX_ALIGN 8 + +/******************************************************/ +/* ELF defines */ + +#define EM_TCC_TARGET EM_X86_64 + +/* relocation type for 32 bit data relocation */ +#define R_DATA_32 R_X86_64_32 +#define R_JMP_SLOT R_X86_64_JUMP_SLOT +#define R_COPY R_X86_64_COPY + +#define ELF_START_ADDR 0x08048000 +#define ELF_PAGE_SIZE 0x1000 + +/******************************************************/ + +static unsigned long func_sub_sp_offset; +static int func_ret_sub; + +/* XXX: make it faster ? */ +void g(int c) +{ + int ind1; + ind1 = ind + 1; + if (ind1 > cur_text_section->data_allocated) + section_realloc(cur_text_section, ind1); + cur_text_section->data[ind] = c; + ind = ind1; +} + +void o(unsigned int c) +{ + while (c) { + g(c); + c = c >> 8; + } +} + +void gen_le32(int c) +{ + g(c); + g(c >> 8); + g(c >> 16); + g(c >> 24); +} + +void gen_le64(int64_t c) +{ + g(c); + g(c >> 8); + g(c >> 16); + g(c >> 24); + g(c >> 32); + g(c >> 40); + g(c >> 48); + g(c >> 56); +} + +/* output a symbol and patch all calls to it */ +void gsym_addr(int t, int a) +{ + int n, *ptr; + while (t) { + ptr = (int *)(cur_text_section->data + t); + n = *ptr; /* next value */ + *ptr = a - t - 4; + t = n; + } +} + +void gsym(int t) +{ + gsym_addr(t, ind); +} + +/* psym is used to put an instruction with a data field which is a + reference to a symbol. It is in fact the same as oad ! */ +#define psym oad + +static int is64_type(int t) +{ + return ((t & VT_BTYPE) == VT_PTR || + (t & VT_BTYPE) == VT_FUNC || + (t & VT_BTYPE) == VT_LLONG); +} + +static int is_sse_float(int t) { + int bt; + bt = t & VT_BTYPE; + return bt == VT_DOUBLE || bt == VT_FLOAT; +} + +/* instruction + 4 bytes data. Return the address of the data */ +static int oad(int c, int s) +{ + int ind1; + + o(c); + ind1 = ind + 4; + if (ind1 > cur_text_section->data_allocated) + section_realloc(cur_text_section, ind1); + *(int *)(cur_text_section->data + ind) = s; + s = ind; + ind = ind1; + return s; +} + +/* output constant with relocation if 'r & VT_SYM' is true */ +static void gen_addr64(int r, Sym *sym, int64_t c) +{ + if (r & VT_SYM) + greloc(cur_text_section, sym, ind, R_X86_64_64); + gen_le64(c); +} + +/* output constant with relocation if 'r & VT_SYM' is true */ +static void gen_addr32(int r, Sym *sym, int c) +{ + if (r & VT_SYM) + greloc(cur_text_section, sym, ind, R_X86_64_32); + gen_le32(c); +} + +/* output constant with relocation if 'r & VT_SYM' is true */ +static void gen_addrpc32(int r, Sym *sym, int c) +{ + if (r & VT_SYM) + greloc(cur_text_section, sym, ind, R_X86_64_PC32); + gen_le32(c-4); +} + +/* generate a modrm reference. 'op_reg' contains the addtionnal 3 + opcode bits */ +static void gen_modrm(int op_reg, int r, Sym *sym, int c) +{ + op_reg = op_reg << 3; + if ((r & VT_VALMASK) == VT_CONST) { + /* constant memory reference */ + o(0x05 | op_reg); + gen_addrpc32(r, sym, c); + } else if ((r & VT_VALMASK) == VT_LOCAL) { + /* currently, we use only ebp as base */ + if (c == (char)c) { + /* short reference */ + o(0x45 | op_reg); + g(c); + } else { + oad(0x85 | op_reg, c); + } + } else { + g(0x00 | op_reg | (r & VT_VALMASK)); + } +} + +/* generate a modrm reference. 'op_reg' contains the addtionnal 3 + opcode bits */ +static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c) +{ + int rex = 0x48 | (REX_BASE(op_reg) << 2); + if ((r & VT_VALMASK) != VT_CONST && + (r & VT_VALMASK) != VT_LOCAL) { + rex |= REX_BASE(VT_VALMASK & r); + } + o(rex); + o(opcode); + op_reg = REG_VALUE(op_reg) << 3; + if ((r & VT_VALMASK) == VT_CONST) { + /* constant memory reference */ + o(0x05 | op_reg); + gen_addrpc32(r, sym, c); + } else if ((r & VT_VALMASK) == VT_LOCAL) { + /* currently, we use only ebp as base */ + if (c == (char)c) { + /* short reference */ + o(0x45 | op_reg); + g(c); + } else { + oad(0x85 | op_reg, c); + } + } else { + g(0x00 | op_reg | (r & VT_VALMASK)); + } +} + + +/* load 'r' from value 'sv' */ +void load(int r, SValue *sv) +{ + int v, t, ft, fc, fr; + SValue v1; + + fr = sv->r; + ft = sv->type.t; + fc = sv->c.ul; + + v = fr & VT_VALMASK; + if (fr & VT_LVAL) { + if (v == VT_LLOCAL) { + v1.type.t = VT_PTR; + v1.r = VT_LOCAL | VT_LVAL; + v1.c.ul = fc; + load(r, &v1); + fr = r; + } + if ((ft & VT_BTYPE) == VT_FLOAT) { + o(0x6e0f66); /* movd */ + r = 0; + } else if ((ft & VT_BTYPE) == VT_DOUBLE) { + o(0x7e0ff3); /* movq */ + r = 0; + } else if ((ft & VT_BTYPE) == VT_LDOUBLE) { + o(0xdb); /* fldt */ + r = 5; + } else if ((ft & VT_TYPE) == VT_BYTE) { + o(0xbe0f); /* movsbl */ + } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) { + o(0xb60f); /* movzbl */ + } else if ((ft & VT_TYPE) == VT_SHORT) { + o(0xbf0f); /* movswl */ + } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) { + o(0xb70f); /* movzwl */ + } else if (is64_type(ft)) { + gen_modrm64(0x8b, r, fr, sv->sym, fc); + return; + } else { + o(0x8b); /* movl */ + } + gen_modrm(r, fr, sv->sym, fc); + } else { + if (v == VT_CONST) { + if ((ft & VT_TYPE) == VT_LLONG) { + o(0x48); + o(0xb8 + REG_VALUE(r)); /* mov $xx, r */ + gen_addr64(fr, sv->sym, sv->c.ull); + } else { + o(0xc748); + o(0xc0 + REG_VALUE(r)); /* mov $xx, r */ + gen_addr32(fr, sv->sym, fc); + } + } else if (v == VT_LOCAL) { + o(0x48 | REX_BASE(r)); + o(0x8d); /* lea xxx(%ebp), r */ + gen_modrm(r, VT_LOCAL, sv->sym, fc); + } else if (v == VT_CMP) { + oad(0xb8 + r, 0); /* mov $0, r */ + o(0x0f); /* setxx %br */ + o(fc); + o(0xc0 + r); + } else if (v == VT_JMP || v == VT_JMPI) { + t = v & 1; + oad(0xb8 + r, t); /* mov $1, r */ + o(0x05eb); /* jmp after */ + gsym(fc); + oad(0xb8 + r, t ^ 1); /* mov $0, r */ + } else if (v != r) { + if (r == TREG_XMM0) { + assert(v == TREG_ST0); + /* gen_cvt_ftof(VT_DOUBLE); */ + o(0xf0245cdd); /* fstpl -0x10(%rsp) */ + /* movsd -0x10(%rsp),%xmm0 */ + o(0x44100ff2); + o(0xf024); + } else if (r == TREG_ST0) { + assert(v == TREG_XMM0); + /* gen_cvt_ftof(VT_LDOUBLE); */ + /* movsd %xmm0,-0x10(%rsp) */ + o(0x44110ff2); + o(0xf024); + o(0xf02444dd); /* fldl -0x10(%rsp) */ + } else { + o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2)); + o(0x89); + o(0xc0 + r + v * 8); /* mov v, r */ + } + } + } +} + +/* store register 'r' in lvalue 'v' */ +void store(int r, SValue *v) +{ + int fr, bt, ft, fc; + int op64 = 0; + + ft = v->type.t; + fc = v->c.ul; + fr = v->r & VT_VALMASK; + bt = ft & VT_BTYPE; + /* XXX: incorrect if float reg to reg */ + if (bt == VT_FLOAT) { + o(0x7e0f66); /* movd */ + r = 0; + } else if (bt == VT_DOUBLE) { + o(0xd60f66); /* movq */ + r = 0; + } else if (bt == VT_LDOUBLE) { + o(0xc0d9); /* fld %st(0) */ + o(0xdb); /* fstpt */ + r = 7; + } else { + if (bt == VT_SHORT) + o(0x66); + if (bt == VT_BYTE || bt == VT_BOOL) + o(0x88); + else if (is64_type(bt)) + op64 = 0x89; + else + o(0x89); + } + if (op64) { + if (fr == VT_CONST || + fr == VT_LOCAL || + (v->r & VT_LVAL)) { + gen_modrm64(op64, r, v->r, v->sym, fc); + } else if (fr != r) { + /* XXX: don't we really come here? */ + abort(); + o(0xc0 + fr + r * 8); /* mov r, fr */ + } + } else { + if (fr == VT_CONST || + fr == VT_LOCAL || + (v->r & VT_LVAL)) { + gen_modrm(r, v->r, v->sym, fc); + } else if (fr != r) { + /* XXX: don't we really come here? */ + abort(); + o(0xc0 + fr + r * 8); /* mov r, fr */ + } + } +} + +static void gadd_sp(int val) +{ + if (val == (char)val) { + o(0xc48348); + g(val); + } else { + oad(0xc48148, val); /* add $xxx, %rsp */ + } +} + +/* 'is_jmp' is '1' if it is a jump */ +static void gcall_or_jmp(int is_jmp) +{ + int r; + if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { + /* constant case */ + if (vtop->r & VT_SYM) { + /* relocation case */ + greloc(cur_text_section, vtop->sym, + ind + 1, R_X86_64_PC32); + } else { + /* put an empty PC32 relocation */ + put_elf_reloc(symtab_section, cur_text_section, + ind + 1, R_X86_64_PC32, 0); + } + oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */ + } else { + /* otherwise, indirect call */ + r = TREG_R11; + load(r, vtop); + o(0x41); /* REX */ + o(0xff); /* call/jmp *r */ + o(0xd0 + REG_VALUE(r) + (is_jmp << 4)); + } +} + +static uint8_t arg_regs[6] = { + TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9 +}; +/* Generate function call. The function address is pushed first, then + all the parameters in call order. This functions pops all the + parameters and the function address. */ +void gfunc_call(int nb_args) +{ + int size, align, r, args_size, i, func_call; + Sym *func_sym; + SValue *orig_vtop; + int nb_reg_args = 0; + int nb_sse_args = 0; + int sse_reg, gen_reg; + + /* calculate the number of integer/float arguments */ + args_size = 0; + for(i = 0; i < nb_args; i++) { + if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) { + args_size += type_size(&vtop->type, &align); + } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) { + args_size += 16; + } else if (is_sse_float(vtop[-i].type.t)) { + nb_sse_args++; + if (nb_sse_args > 8) args_size += 8; + } else { + nb_reg_args++; + if (nb_reg_args > 6) args_size += 8; + } + } + + /* for struct arguments, we need to call memcpy and the function + call breaks register passing arguments we are preparing. + So, we process arguments which will be passed by stack first. */ + orig_vtop = vtop; + gen_reg = nb_reg_args; + sse_reg = nb_sse_args; + /* adjust stack to align SSE boundary */ + if (args_size &= 8) { + o(0x50); /* push $rax */ + } + for(i = 0; i < nb_args; i++) { + if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { + size = type_size(&vtop->type, &align); + /* align to stack align size */ + size = (size + 3) & ~3; + /* allocate the necessary size on stack */ + o(0x48); + oad(0xec81, size); /* sub $xxx, %rsp */ + /* generate structure store */ + r = get_reg(RC_INT); + o(0x48 + REX_BASE(r)); + o(0x89); /* mov %rsp, r */ + o(0xe0 + r); + { + /* following code breaks vtop[1] */ + SValue tmp = vtop[1]; + vset(&vtop->type, r | VT_LVAL, 0); + vswap(); + vstore(); + vtop[1] = tmp; + } + args_size += size; + } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { + gv(RC_ST0); + size = LDOUBLE_SIZE; + oad(0xec8148, size); /* sub $xxx, %rsp */ + o(0x7cdb); /* fstpt 0(%rsp) */ + g(0x24); + g(0x00); + args_size += size; + } else if (is_sse_float(vtop->type.t)) { + int j = --sse_reg; + if (j >= 8) { + gv(RC_FLOAT); + o(0x50); /* push $rax */ + /* movq %xmm0, (%rsp) */ + o(0x04d60f66); + o(0x24); + args_size += 8; + } + } else { + int j = --gen_reg; + /* simple type */ + /* XXX: implicit cast ? */ + if (j >= 6) { + r = gv(RC_INT); + o(0x50 + r); /* push r */ + args_size += 8; + } + } + vtop--; + } + vtop = orig_vtop; + + /* then, we prepare register passing arguments. + Note that we cannot set RDX and RCX in this loop because gv() + may break these temporary registers. Let's use R10 and R11 + instead of them */ + gen_reg = nb_reg_args; + sse_reg = nb_sse_args; + for(i = 0; i < nb_args; i++) { + if ((vtop->type.t & VT_BTYPE) == VT_STRUCT || + (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { + } else if (is_sse_float(vtop->type.t)) { + int j = --sse_reg; + if (j < 8) { + gv(RC_FLOAT); /* only one float register */ + /* movaps %xmm0, %xmmN */ + o(0x280f); + o(0xc0 + (sse_reg << 3)); + } + } else { + int j = --gen_reg; + /* simple type */ + /* XXX: implicit cast ? */ + if (j < 6) { + r = gv(RC_INT); + if (j < 2) { + o(0x8948); /* mov */ + o(0xc0 + r * 8 + arg_regs[j]); + } else if (j < 4) { + o(0x8949); /* mov */ + /* j=2: r10, j=3: r11 */ + o(0xc0 + r * 8 + j); + } else { + o(0x8949); /* mov */ + /* j=4: r8, j=5: r9 */ + o(0xc0 + r * 8 + j - 4); + } + } + } + vtop--; + } + + /* Copy R10 and R11 into RDX and RCX, respectively */ + if (nb_reg_args > 2) { + o(0xd2894c); /* mov %r10, %rdx */ + if (nb_reg_args > 3) { + o(0xd9894c); /* mov %r11, %rcx */ + } + } + + save_regs(0); /* save used temporary registers */ + + func_sym = vtop->type.ref; + func_call = FUNC_CALL(func_sym->r); + oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */ + gcall_or_jmp(0); + if (args_size) + gadd_sp(args_size); + vtop--; +} + +#ifdef TCC_TARGET_PE +/* XXX: support PE? */ +#warning "PE isn't tested at all" +#define FUNC_PROLOG_SIZE 12 +#else +#define FUNC_PROLOG_SIZE 11 +#endif + +static void push_arg_reg(int i) { + loc -= 8; + gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc); +} + +/* generate function prolog of type 't' */ +void gfunc_prolog(CType *func_type) +{ + int i, addr, align, size, func_call; + int param_index, param_addr, reg_param_index, sse_param_index; + Sym *sym; + CType *type; + + func_ret_sub = 0; + + sym = func_type->ref; + func_call = FUNC_CALL(sym->r); + addr = PTR_SIZE * 2; + loc = 0; + ind += FUNC_PROLOG_SIZE; + func_sub_sp_offset = ind; + + if (func_type->ref->c == FUNC_ELLIPSIS) { + int seen_reg_num, seen_sse_num, seen_stack_size; + seen_reg_num = seen_sse_num = 0; + /* frame pointer and return address */ + seen_stack_size = PTR_SIZE * 2; + /* count the number of seen parameters */ + sym = func_type->ref; + while ((sym = sym->next) != NULL) { + type = &sym->type; + if (is_sse_float(type->t)) { + if (seen_sse_num < 8) { + seen_sse_num++; + } else { + seen_stack_size += 8; + } + } else if ((type->t & VT_BTYPE) == VT_STRUCT) { + size = type_size(type, &align); + size = (size + 3) & ~3; + seen_stack_size += size; + } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) { + seen_stack_size += LDOUBLE_SIZE; + } else { + if (seen_reg_num < 6) { + seen_reg_num++; + } else { + seen_stack_size += 8; + } + } + } + + loc -= 16; + /* movl $0x????????, -0x10(%rbp) */ + o(0xf045c7); + gen_le32(seen_reg_num * 8); + /* movl $0x????????, -0xc(%rbp) */ + o(0xf445c7); + gen_le32(seen_sse_num * 16 + 48); + /* movl $0x????????, -0x8(%rbp) */ + o(0xf845c7); + gen_le32(seen_stack_size); + + /* save all register passing arguments */ + for (i = 0; i < 8; i++) { + loc -= 16; + o(0xd60f66); /* movq */ + gen_modrm(7 - i, VT_LOCAL, NULL, loc); + /* movq $0, loc+8(%rbp) */ + o(0x85c748); + gen_le32(loc + 8); + gen_le32(0); + } + for (i = 0; i < 6; i++) { + push_arg_reg(5 - i); + } + } + + sym = func_type->ref; + param_index = 0; + reg_param_index = 0; + sse_param_index = 0; + + /* if the function returns a structure, then add an + implicit pointer parameter */ + func_vt = sym->type; + if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { + push_arg_reg(reg_param_index); + param_addr = loc; + + func_vc = loc; + param_index++; + reg_param_index++; + } + /* define parameters */ + while ((sym = sym->next) != NULL) { + type = &sym->type; + size = type_size(type, &align); + size = (size + 3) & ~3; + if (is_sse_float(type->t)) { + if (sse_param_index < 8) { + /* save arguments passed by register */ + loc -= 8; + o(0xd60f66); /* movq */ + gen_modrm(sse_param_index, VT_LOCAL, NULL, loc); + param_addr = loc; + } else { + param_addr = addr; + addr += size; + } + sse_param_index++; + } else if ((type->t & VT_BTYPE) == VT_STRUCT || + (type->t & VT_BTYPE) == VT_LDOUBLE) { + param_addr = addr; + addr += size; + } else { + if (reg_param_index < 6) { + /* save arguments passed by register */ + push_arg_reg(reg_param_index); + param_addr = loc; + } else { + param_addr = addr; + addr += 8; + } + reg_param_index++; + } + sym_push(sym->v & ~SYM_FIELD, type, + VT_LOCAL | VT_LVAL, param_addr); + param_index++; + } +} + +/* generate function epilog */ +void gfunc_epilog(void) +{ + int v, saved_ind; + + o(0xc9); /* leave */ + if (func_ret_sub == 0) { + o(0xc3); /* ret */ + } else { + o(0xc2); /* ret n */ + g(func_ret_sub); + g(func_ret_sub >> 8); + } + /* align local size to word & save local variables */ + v = (-loc + 15) & -16; + saved_ind = ind; + ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; +#ifdef TCC_TARGET_PE + if (v >= 4096) { + Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0); + oad(0xb8, v); /* mov stacksize, %eax */ + oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */ + greloc(cur_text_section, sym, ind-4, R_X86_64_PC32); + } else +#endif + { + o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ + o(0xec8148); /* sub rsp, stacksize */ + gen_le32(v); +#if FUNC_PROLOG_SIZE == 12 + o(0x90); /* adjust to FUNC_PROLOG_SIZE */ +#endif + } + ind = saved_ind; +} + +/* generate a jump to a label */ +int gjmp(int t) +{ + return psym(0xe9, t); +} + +/* generate a jump to a fixed address */ +void gjmp_addr(int a) +{ + int r; + r = a - ind - 2; + if (r == (char)r) { + g(0xeb); + g(r); + } else { + oad(0xe9, a - ind - 5); + } +} + +/* generate a test. set 'inv' to invert test. Stack entry is popped */ +int gtst(int inv, int t) +{ + int v, *p; + + v = vtop->r & VT_VALMASK; + if (v == VT_CMP) { + /* fast case : can jump directly since flags are set */ + g(0x0f); + t = psym((vtop->c.i - 16) ^ inv, t); + } else if (v == VT_JMP || v == VT_JMPI) { + /* && or || optimization */ + if ((v & 1) == inv) { + /* insert vtop->c jump list in t */ + p = &vtop->c.i; + while (*p != 0) + p = (int *)(cur_text_section->data + *p); + *p = t; + t = vtop->c.i; + } else { + t = gjmp(t); + gsym(vtop->c.i); + } + } else { + /* XXX: not tested */ + if (is_float(vtop->type.t) || + (vtop->type.t & VT_BTYPE) == VT_LLONG) { + vpushi(0); + gen_op(TOK_NE); + } + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { + /* constant jmp optimization */ + if ((vtop->c.i != 0) != inv) + t = gjmp(t); + } else { + v = gv(RC_INT); + o(0x85); + o(0xc0 + v * 9); + g(0x0f); + t = psym(0x85 ^ inv, t); + } + } + vtop--; + return t; +} + +/* generate an integer binary operation */ +void gen_opi(int op) +{ + int r, fr, opc, c; + + switch(op) { + case '+': + case TOK_ADDC1: /* add with carry generation */ + opc = 0; + gen_op8: + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST && + !is64_type(vtop->type.t)) { + /* constant case */ + vswap(); + r = gv(RC_INT); + if (is64_type(vtop->type.t)) { + o(0x48 | REX_BASE(r)); + } + vswap(); + c = vtop->c.i; + if (c == (char)c) { + /* XXX: generate inc and dec for smaller code ? */ + o(0x83); + o(0xc0 | (opc << 3) | REG_VALUE(r)); + g(c); + } else { + o(0x81); + oad(0xc0 | (opc << 3) | REG_VALUE(r), c); + } + } else { + gv2(RC_INT, RC_INT); + r = vtop[-1].r; + fr = vtop[0].r; + if (opc != 7 || + is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) || + is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) { + o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2)); + } + o((opc << 3) | 0x01); + o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8); + } + vtop--; + if (op >= TOK_ULT && op <= TOK_GT) { + vtop->r = VT_CMP; + vtop->c.i = op; + } + break; + case '-': + case TOK_SUBC1: /* sub with carry generation */ + opc = 5; + goto gen_op8; + case TOK_ADDC2: /* add with carry use */ + opc = 2; + goto gen_op8; + case TOK_SUBC2: /* sub with carry use */ + opc = 3; + goto gen_op8; + case '&': + opc = 4; + goto gen_op8; + case '^': + opc = 6; + goto gen_op8; + case '|': + opc = 1; + goto gen_op8; + case '*': + gv2(RC_INT, RC_INT); + r = vtop[-1].r; + fr = vtop[0].r; + if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) || + is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) { + o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2)); + } + vtop--; + o(0xaf0f); /* imul fr, r */ + o(0xc0 + fr + r * 8); + break; + case TOK_SHL: + opc = 4; + goto gen_shift; + case TOK_SHR: + opc = 5; + goto gen_shift; + case TOK_SAR: + opc = 7; + gen_shift: + opc = 0xc0 | (opc << 3); + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { + /* constant case */ + vswap(); + r = gv(RC_INT); + if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { + o(0x48 | REX_BASE(r)); + c = 0x3f; + } else { + c = 0x1f; + } + vswap(); + c &= vtop->c.i; + o(0xc1); /* shl/shr/sar $xxx, r */ + o(opc | r); + g(c); + } else { + /* we generate the shift in ecx */ + gv2(RC_INT, RC_RCX); + r = vtop[-1].r; + if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) { + o(0x48 | REX_BASE(r)); + } + o(0xd3); /* shl/shr/sar %cl, r */ + o(opc | r); + } + vtop--; + break; + case '/': + case TOK_UDIV: + case TOK_PDIV: + case '%': + case TOK_UMOD: + case TOK_UMULL: + /* first operand must be in eax */ + /* XXX: need better constraint for second operand */ + gv2(RC_RAX, RC_RCX); + r = vtop[-1].r; + fr = vtop[0].r; + vtop--; + save_reg(TREG_RDX); + if (op == TOK_UMULL) { + o(0xf7); /* mul fr */ + o(0xe0 + fr); + vtop->r2 = TREG_RDX; + r = TREG_RAX; + } else { + if (op == TOK_UDIV || op == TOK_UMOD) { + o(0xf7d231); /* xor %edx, %edx, div fr, %eax */ + o(0xf0 + fr); + } else { + if ((vtop->type.t & VT_BTYPE) & VT_LLONG) { + o(0x9948); /* cqto */ + o(0x48 + REX_BASE(fr)); + } else { + o(0x99); /* cltd */ + } + o(0xf7); /* idiv fr, %eax */ + o(0xf8 + fr); + } + if (op == '%' || op == TOK_UMOD) + r = TREG_RDX; + else + r = TREG_RAX; + } + vtop->r = r; + break; + default: + opc = 7; + goto gen_op8; + } +} + +void gen_opl(int op) +{ + gen_opi(op); +} + +/* generate a floating point operation 'v = t1 op t2' instruction. The + two operands are guaranted to have the same floating point type */ +/* XXX: need to use ST1 too */ +void gen_opf(int op) +{ + int a, ft, fc, swapped, r; + int float_type = + (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; + + /* convert constants to memory references */ + if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { + vswap(); + gv(float_type); + vswap(); + } + if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) + gv(float_type); + + /* must put at least one value in the floating point register */ + if ((vtop[-1].r & VT_LVAL) && + (vtop[0].r & VT_LVAL)) { + vswap(); + gv(float_type); + vswap(); + } + swapped = 0; + /* swap the stack if needed so that t1 is the register and t2 is + the memory reference */ + if (vtop[-1].r & VT_LVAL) { + vswap(); + swapped = 1; + } + if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { + if (op >= TOK_ULT && op <= TOK_GT) { + /* load on stack second operand */ + load(TREG_ST0, vtop); + save_reg(TREG_RAX); /* eax is used by FP comparison code */ + if (op == TOK_GE || op == TOK_GT) + swapped = !swapped; + else if (op == TOK_EQ || op == TOK_NE) + swapped = 0; + if (swapped) + o(0xc9d9); /* fxch %st(1) */ + o(0xe9da); /* fucompp */ + o(0xe0df); /* fnstsw %ax */ + if (op == TOK_EQ) { + o(0x45e480); /* and $0x45, %ah */ + o(0x40fC80); /* cmp $0x40, %ah */ + } else if (op == TOK_NE) { + o(0x45e480); /* and $0x45, %ah */ + o(0x40f480); /* xor $0x40, %ah */ + op = TOK_NE; + } else if (op == TOK_GE || op == TOK_LE) { + o(0x05c4f6); /* test $0x05, %ah */ + op = TOK_EQ; + } else { + o(0x45c4f6); /* test $0x45, %ah */ + op = TOK_EQ; + } + vtop--; + vtop->r = VT_CMP; + vtop->c.i = op; + } else { + /* no memory reference possible for long double operations */ + load(TREG_ST0, vtop); + swapped = !swapped; + + switch(op) { + default: + case '+': + a = 0; + break; + case '-': + a = 4; + if (swapped) + a++; + break; + case '*': + a = 1; + break; + case '/': + a = 6; + if (swapped) + a++; + break; + } + ft = vtop->type.t; + fc = vtop->c.ul; + o(0xde); /* fxxxp %st, %st(1) */ + o(0xc1 + (a << 3)); + vtop--; + } + } else { + if (op >= TOK_ULT && op <= TOK_GT) { + /* if saved lvalue, then we must reload it */ + r = vtop->r; + fc = vtop->c.ul; + if ((r & VT_VALMASK) == VT_LLOCAL) { + SValue v1; + r = get_reg(RC_INT); + v1.type.t = VT_INT; + v1.r = VT_LOCAL | VT_LVAL; + v1.c.ul = fc; + load(r, &v1); + fc = 0; + } + + if (op == TOK_EQ || op == TOK_NE) { + swapped = 0; + } else { + if (op == TOK_LE || op == TOK_LT) + swapped = !swapped; + if (op == TOK_LE || op == TOK_GE) { + op = 0x93; /* setae */ + } else { + op = 0x97; /* seta */ + } + } + + if (swapped) { + o(0x7e0ff3); /* movq */ + gen_modrm(1, r, vtop->sym, fc); + + if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) { + o(0x66); + } + o(0x2e0f); /* ucomisd %xmm0, %xmm1 */ + o(0xc8); + } else { + if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) { + o(0x66); + } + o(0x2e0f); /* ucomisd */ + gen_modrm(0, r, vtop->sym, fc); + } + + vtop--; + vtop->r = VT_CMP; + vtop->c.i = op; + } else { + /* no memory reference possible for long double operations */ + if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { + load(TREG_XMM0, vtop); + swapped = !swapped; + } + switch(op) { + default: + case '+': + a = 0; + break; + case '-': + a = 4; + break; + case '*': + a = 1; + break; + case '/': + a = 6; + break; + } + ft = vtop->type.t; + fc = vtop->c.ul; + if ((ft & VT_BTYPE) == VT_LDOUBLE) { + o(0xde); /* fxxxp %st, %st(1) */ + o(0xc1 + (a << 3)); + } else { + /* if saved lvalue, then we must reload it */ + r = vtop->r; + if ((r & VT_VALMASK) == VT_LLOCAL) { + SValue v1; + r = get_reg(RC_INT); + v1.type.t = VT_INT; + v1.r = VT_LOCAL | VT_LVAL; + v1.c.ul = fc; + load(r, &v1); + fc = 0; + } + if (swapped) { + /* movq %xmm0,%xmm1 */ + o(0x7e0ff3); + o(0xc8); + load(TREG_XMM0, vtop); + /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */ + if ((ft & VT_BTYPE) == VT_DOUBLE) { + o(0xf2); + } else { + o(0xf3); + } + o(0x0f); + o(0x58 + a); + o(0xc1); + } else { + if ((ft & VT_BTYPE) == VT_DOUBLE) { + o(0xf2); + } else { + o(0xf3); + } + o(0x0f); + o(0x58 + a); + gen_modrm(0, r, vtop->sym, fc); + } + } + vtop--; + } + } +} + +/* convert integers to fp 't' type. Must handle 'int', 'unsigned int' + and 'long long' cases. */ +void gen_cvt_itof(int t) +{ + if ((t & VT_BTYPE) == VT_LDOUBLE) { + save_reg(TREG_ST0); + gv(RC_INT); + if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { + /* signed long long to float/double/long double (unsigned case + is handled generically) */ + o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ + o(0x242cdf); /* fildll (%rsp) */ + o(0x08c48348); /* add $8, %rsp */ + } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == + (VT_INT | VT_UNSIGNED)) { + /* unsigned int to float/double/long double */ + o(0x6a); /* push $0 */ + g(0x00); + o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ + o(0x242cdf); /* fildll (%rsp) */ + o(0x10c48348); /* add $16, %rsp */ + } else { + /* int to float/double/long double */ + o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ + o(0x2404db); /* fildl (%rsp) */ + o(0x08c48348); /* add $8, %rsp */ + } + vtop->r = TREG_ST0; + } else { + save_reg(TREG_XMM0); + gv(RC_INT); + o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT)); + if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == + (VT_INT | VT_UNSIGNED) || + (vtop->type.t & VT_BTYPE) == VT_LLONG) { + o(0x48); /* REX */ + } + o(0x2a0f); + o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */ + vtop->r = TREG_XMM0; + } +} + +/* convert from one floating point type to another */ +void gen_cvt_ftof(int t) +{ + int ft, bt, tbt; + + ft = vtop->type.t; + bt = ft & VT_BTYPE; + tbt = t & VT_BTYPE; + + if (bt == VT_FLOAT) { + gv(RC_FLOAT); + if (tbt == VT_DOUBLE) { + o(0xc0140f); /* unpcklps */ + o(0xc05a0f); /* cvtps2pd */ + } else if (tbt == VT_LDOUBLE) { + /* movss %xmm0,-0x10(%rsp) */ + o(0x44110ff3); + o(0xf024); + o(0xf02444d9); /* flds -0x10(%rsp) */ + vtop->r = TREG_ST0; + } + } else if (bt == VT_DOUBLE) { + gv(RC_FLOAT); + if (tbt == VT_FLOAT) { + o(0xc0140f66); /* unpcklpd */ + o(0xc05a0f66); /* cvtpd2ps */ + } else if (tbt == VT_LDOUBLE) { + /* movsd %xmm0,-0x10(%rsp) */ + o(0x44110ff2); + o(0xf024); + o(0xf02444dd); /* fldl -0x10(%rsp) */ + vtop->r = TREG_ST0; + } + } else { + gv(RC_ST0); + if (tbt == VT_DOUBLE) { + o(0xf0245cdd); /* fstpl -0x10(%rsp) */ + /* movsd -0x10(%rsp),%xmm0 */ + o(0x44100ff2); + o(0xf024); + vtop->r = TREG_XMM0; + } else if (tbt == VT_FLOAT) { + o(0xf0245cd9); /* fstps -0x10(%rsp) */ + /* movss -0x10(%rsp),%xmm0 */ + o(0x44100ff3); + o(0xf024); + vtop->r = TREG_XMM0; + } + } +} + +/* convert fp to int 't' type */ +void gen_cvt_ftoi(int t) +{ + int ft, bt, size, r; + ft = vtop->type.t; + bt = ft & VT_BTYPE; + if (bt == VT_LDOUBLE) { + gen_cvt_ftof(VT_DOUBLE); + bt = VT_DOUBLE; + } + + gv(RC_FLOAT); + if (t != VT_INT) + size = 8; + else + size = 4; + + r = get_reg(RC_INT); + if (bt == VT_FLOAT) { + o(0xf3); + } else if (bt == VT_DOUBLE) { + o(0xf2); + } else { + assert(0); + } + if (size == 8) { + o(0x48 + REX_BASE(r)); + } + o(0x2c0f); /* cvttss2si or cvttsd2si */ + o(0xc0 + (REG_VALUE(r) << 3)); + vtop->r = r; +} + +/* computed goto support */ +void ggoto(void) +{ + gcall_or_jmp(1); + vtop--; +} + +/* end of x86-64 code generator */ +/*************************************************************/ -- 2.11.4.GIT