From 68666eee2ade45ab73b07361367d0a38d350d663 Mon Sep 17 00:00:00 2001 From: grischka Date: Wed, 8 Feb 2017 19:45:31 +0100 Subject: [PATCH] tccgen: factor out gfunc_return Also: - on windows i386 and x86-64, structures of size <= 8 are NOT returned in registers if size is not one of 1,2,4,8. - cleanup: put all tv-push/pop/swap/rot into one place --- arm64-gen.c | 13 ++- i386-gen.c | 21 ++-- tcc.h | 3 +- tccgen.c | 354 +++++++++++++++++++++++++++++------------------------------ x86_64-gen.c | 20 ++-- 5 files changed, 199 insertions(+), 212 deletions(-) diff --git a/arm64-gen.c b/arm64-gen.c index 71e977e8..0ff60990 100644 --- a/arm64-gen.c +++ b/arm64-gen.c @@ -1193,9 +1193,9 @@ ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, return 0; } -ST_FUNC void greturn(void) +ST_FUNC void gfunc_return(CType *func_type) { - CType *t = &func_vt; + CType *t = func_type; unsigned long a; arm64_pcs(0, &t, &a); @@ -1203,8 +1203,8 @@ ST_FUNC void greturn(void) case -1: break; case 0: - if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { - int align, size = type_size(&func_vt, &align); + if ((func_type->t & VT_BTYPE) == VT_STRUCT) { + int align, size = type_size(func_type, &align); gaddrof(); gv(RC_R(0)); arm64_ldrs(0, size); @@ -1213,7 +1213,7 @@ ST_FUNC void greturn(void) gv(RC_IRET); break; case 1: { - CType type = func_vt; + CType type = *func_type; mk_pointer(&type); vset(&type, VT_LOCAL | VT_LVAL, func_vc); indir(); @@ -1222,7 +1222,7 @@ ST_FUNC void greturn(void) break; } case 16: - if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { + if ((func_type->t & VT_BTYPE) == VT_STRUCT) { uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz); gaddrof(); gv(RC_R(0)); @@ -1237,6 +1237,7 @@ ST_FUNC void greturn(void) default: assert(0); } + vtop--; } ST_FUNC void gfunc_epilog(void) diff --git a/i386-gen.c b/i386-gen.c index 7d9c3d00..759f33c9 100644 --- a/i386-gen.c +++ b/i386-gen.c @@ -396,21 +396,21 @@ ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int { #ifdef TCC_TARGET_PE int size, align; - *ret_align = 1; // Never have to re-align return values for x86 *regsize = 4; size = type_size(vt, &align); - if (size > 8) { + if (size > 8 || (size & (size - 1))) return 0; - } else if (size > 4) { - ret->ref = NULL; + if (size == 8) ret->t = VT_LLONG; - return 1; - } else { - ret->ref = NULL; + else if (size == 4) ret->t = VT_INT; - return 1; - } + else if (size == 2) + ret->t = VT_SHORT; + else + ret->t = VT_BYTE; + ret->ref = NULL; + return 1; #else *ret_align = 1; // Never have to re-align return values for x86 return 0; @@ -547,7 +547,8 @@ ST_FUNC void gfunc_prolog(CType *func_type) func_var = (sym->c == FUNC_ELLIPSIS); #ifdef TCC_TARGET_PE size = type_size(&func_vt,&align); - if (((func_vt.t & VT_BTYPE) == VT_STRUCT) && (size > 8)) { + if (((func_vt.t & VT_BTYPE) == VT_STRUCT) + && (size > 8 || (size & (size - 1)))) { #else if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { #endif diff --git a/tcc.h b/tcc.h index 7997e86f..e5001401 100644 --- a/tcc.h +++ b/tcc.h @@ -1273,7 +1273,6 @@ ST_FUNC void check_vstack(void); ST_INLN int is_float(int t); ST_FUNC int ieee_finite(double d); ST_FUNC void test_lvalue(void); -ST_FUNC void swap(int *p, int *q); ST_FUNC void vpushi(int v); ST_FUNC Sym *external_global_sym(int v, CType *type, int r); ST_FUNC void vset(CType *type, int r, long v); @@ -1519,7 +1518,7 @@ ST_FUNC void gen_cvt_itof1(int t); #ifdef TCC_TARGET_ARM64 ST_FUNC void gen_cvt_sxtw(void); ST_FUNC void gen_opl(int op); -ST_FUNC void greturn(void); +ST_FUNC void gfunc_return(CType *func_type); ST_FUNC void gen_va_start(void); ST_FUNC void gen_va_arg(CType *t); ST_FUNC void gen_clear_cache(void); diff --git a/tccgen.c b/tccgen.c index 2804539e..0f08c354 100644 --- a/tccgen.c +++ b/tccgen.c @@ -559,14 +559,6 @@ static void apply_visibility(Sym *sym, CType *type) /* ------------------------------------------------------------------------- */ -ST_FUNC void swap(int *p, int *q) -{ - int t; - t = *p; - *p = *q; - *q = t; -} - static void vsetc(CType *type, int r, CValue *vc) { int v; @@ -583,12 +575,15 @@ static void vsetc(CType *type, int r, CValue *vc) as their value might still be used for real. All values we push under nocode_wanted will eventually be popped again, so that the VT_CMP/VT_JMP value will be in vtop - when code is unsuppressed again. */ + when code is unsuppressed again. + + Same logic below in vswap(); */ if (vtop >= vstack && !nocode_wanted) { v = vtop->r & VT_VALMASK; if (v == VT_CMP || (v & ~1) == VT_JMP) gv(RC_INT); } + vtop++; vtop->type = *type; vtop->r = r; @@ -597,6 +592,38 @@ static void vsetc(CType *type, int r, CValue *vc) vtop->sym = NULL; } +ST_FUNC void vswap(void) +{ + SValue tmp; + /* cannot vswap cpu flags. See comment at vsetc() above */ + if (vtop >= vstack && !nocode_wanted) { + int v = vtop->r & VT_VALMASK; + if (v == VT_CMP || (v & ~1) == VT_JMP) + gv(RC_INT); + } + tmp = vtop[0]; + vtop[0] = vtop[-1]; + vtop[-1] = tmp; +} + +/* pop stack value */ +ST_FUNC void vpop(void) +{ + int v; + v = vtop->r & VT_VALMASK; +#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) + /* for x86, we need to pop the FP stack */ + if (v == TREG_ST0) { + o(0xd8dd); /* fstp %st(0) */ + } else +#endif + if (v == VT_JMP || v == VT_JMPI) { + /* need to put correct jump if && or || without test */ + gsym(vtop->c.i); + } + vtop--; +} + /* push constant of type "type" with useless value */ ST_FUNC void vpush(CType *type) { @@ -637,6 +664,71 @@ static inline void vpushll(long long v) vpush64(VT_LLONG, v); } +ST_FUNC void vset(CType *type, int r, long v) +{ + CValue cval; + + cval.i = v; + vsetc(type, r, &cval); +} + +static void vseti(int r, int v) +{ + CType type; + type.t = VT_INT; + type.ref = 0; + vset(&type, r, v); +} + +ST_FUNC void vpushv(SValue *v) +{ + if (vtop >= vstack + (VSTACK_SIZE - 1)) + tcc_error("memory full (vstack)"); + vtop++; + *vtop = *v; +} + +static void vdup(void) +{ + vpushv(vtop); +} + +/* rotate n first stack elements to the bottom + I1 ... In -> I2 ... In I1 [top is right] +*/ +ST_FUNC void vrotb(int n) +{ + int i; + SValue tmp; + + tmp = vtop[-n + 1]; + for(i=-n+1;i!=0;i++) + vtop[i] = vtop[i+1]; + vtop[0] = tmp; +} + +/* rotate the n elements before entry e towards the top + I1 ... In ... -> In I1 ... I(n-1) ... [top is right] + */ +ST_FUNC void vrote(SValue *e, int n) +{ + int i; + SValue tmp; + + tmp = *e; + for(i = 0;i < n - 1; i++) + e[-i] = e[-i - 1]; + e[-n + 1] = tmp; +} + +/* rotate n first stack elements to the top + I1 ... In -> In I1 ... I(n-1) [top is right] + */ +ST_FUNC void vrott(int n) +{ + vrote(vtop, n); +} + /* push a symbol value of TYPE */ static inline void vpushsym(CType *type, Sym *sym) { @@ -715,56 +807,6 @@ ST_FUNC void vpush_global_sym(CType *type, int v) vpushsym(type, external_global_sym(v, type, 0)); } -ST_FUNC void vset(CType *type, int r, long v) -{ - CValue cval; - - cval.i = v; - vsetc(type, r, &cval); -} - -static void vseti(int r, int v) -{ - CType type; - type.t = VT_INT; - type.ref = 0; - vset(&type, r, v); -} - -ST_FUNC void vswap(void) -{ - SValue tmp; - /* cannot let cpu flags if other instruction are generated. Also - avoid leaving VT_JMP anywhere except on the top of the stack - because it would complicate the code generator. */ - if (vtop >= vstack) { - int v = vtop->r & VT_VALMASK; - if (v == VT_CMP || (v & ~1) == VT_JMP) - gv(RC_INT); - } - tmp = vtop[0]; - vtop[0] = vtop[-1]; - vtop[-1] = tmp; - -/* XXX: +2% overall speed possible with optimized memswap - * - * memswap(&vtop[0], &vtop[1], sizeof *vtop); - */ -} - -ST_FUNC void vpushv(SValue *v) -{ - if (vtop >= vstack + (VSTACK_SIZE - 1)) - tcc_error("memory full (vstack)"); - vtop++; - *vtop = *v; -} - -static void vdup(void) -{ - vpushv(vtop); -} - /* save registers up to (vtop - n) stack entry */ ST_FUNC void save_regs(int n) { @@ -1297,60 +1339,6 @@ static void lbuild(int t) } #endif -/* rotate n first stack elements to the bottom - I1 ... In -> I2 ... In I1 [top is right] -*/ -ST_FUNC void vrotb(int n) -{ - int i; - SValue tmp; - - tmp = vtop[-n + 1]; - for(i=-n+1;i!=0;i++) - vtop[i] = vtop[i+1]; - vtop[0] = tmp; -} - -/* rotate the n elements before entry e towards the top - I1 ... In ... -> In I1 ... I(n-1) ... [top is right] - */ -ST_FUNC void vrote(SValue *e, int n) -{ - int i; - SValue tmp; - - tmp = *e; - for(i = 0;i < n - 1; i++) - e[-i] = e[-i - 1]; - e[-n + 1] = tmp; -} - -/* rotate n first stack elements to the top - I1 ... In -> In I1 ... I(n-1) [top is right] - */ -ST_FUNC void vrott(int n) -{ - vrote(vtop, n); -} - -/* pop stack value */ -ST_FUNC void vpop(void) -{ - int v; - v = vtop->r & VT_VALMASK; -#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) - /* for x86, we need to pop the FP stack */ - if (v == TREG_ST0) { - o(0xd8dd); /* fstp %st(0) */ - } else -#endif - if (v == VT_JMP || v == VT_JMPI) { - /* need to put correct jump if && or || without test */ - gsym(vtop->c.i); - } - vtop--; -} - /* convert stack entry to register and duplicate its value in another register */ static void gv_dup(void) @@ -2025,7 +2013,7 @@ redo: /* Put pointer as first operand */ if (bt2 == VT_PTR) { vswap(); - swap(&t1, &t2); + t = t1, t1 = t2, t2 = t; } #if PTR_SIZE == 4 if ((vtop[0].type.t & VT_BTYPE) == VT_LLONG) @@ -4668,11 +4656,11 @@ ST_FUNC void unary(void) subtract(-0, x). */ vpush(&vtop->type); if (t == VT_FLOAT) - vtop->c.f = -0.0f; + vtop->c.f = -1.0 * 0.0; else if (t == VT_DOUBLE) - vtop->c.d = -0.0; + vtop->c.d = -1.0 * 0.0; else - vtop->c.ld = -0.0; + vtop->c.ld = -1.0 * 0.0; } else vpushi(0); vswap(); @@ -5460,6 +5448,72 @@ static void label_or_decl(int l) decl(l); } +#ifndef TCC_TARGET_ARM64 +static void gfunc_return(CType *func_type) +{ + if ((func_type->t & VT_BTYPE) == VT_STRUCT) { + CType type, ret_type; + int ret_align, ret_nregs, regsize; + ret_nregs = gfunc_sret(func_type, func_var, &ret_type, + &ret_align, ®size); + if (0 == ret_nregs) { + /* if returning structure, must copy it to implicit + first pointer arg location */ + type = *func_type; + mk_pointer(&type); + vset(&type, VT_LOCAL | VT_LVAL, func_vc); + indir(); + vswap(); + /* copy structure value to pointer */ + vstore(); + } else { + /* returning structure packed into registers */ + int r, size, addr, align; + size = type_size(func_type,&align); + if ((vtop->r != (VT_LOCAL | VT_LVAL) || + (vtop->c.i & (ret_align-1))) + && (align & (ret_align-1))) { + loc = (loc - size) & -ret_align; + addr = loc; + type = *func_type; + vset(&type, VT_LOCAL | VT_LVAL, addr); + vswap(); + vstore(); + vpop(); + vset(&ret_type, VT_LOCAL | VT_LVAL, addr); + } + vtop->type = ret_type; + if (is_float(ret_type.t)) + r = rc_fret(ret_type.t); + else + r = RC_IRET; + + if (ret_nregs == 1) + gv(r); + else { + for (;;) { + vdup(); + gv(r); + vpop(); + if (--ret_nregs == 0) + break; + /* We assume that when a structure is returned in multiple + registers, their classes are consecutive values of the + suite s(n) = 2^n */ + r <<= 1; + vtop->c.i += regsize; + } + } + } + } else if (is_float(func_type->t)) { + gv(rc_fret(func_type->t)); + } else { + gv(RC_IRET); + } + vtop--; /* NOT vpop() because on x86 it would flush the fp stack */ +} +#endif + static int case_cmp(const void *pa, const void *pb) { int64_t a = (*(struct case_t**) pa)->v1; @@ -5655,71 +5709,7 @@ static void block(int *bsym, int *csym, int is_expr) if (tok != ';') { gexpr(); gen_assign_cast(&func_vt); -#ifdef TCC_TARGET_ARM64 - // Perhaps it would be better to use this for all backends: - greturn(); -#else - if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { - CType type, ret_type; - int ret_align, ret_nregs, regsize; - ret_nregs = gfunc_sret(&func_vt, func_var, &ret_type, - &ret_align, ®size); - if (0 == ret_nregs) { - /* if returning structure, must copy it to implicit - first pointer arg location */ - type = func_vt; - mk_pointer(&type); - vset(&type, VT_LOCAL | VT_LVAL, func_vc); - indir(); - vswap(); - /* copy structure value to pointer */ - vstore(); - } else { - /* returning structure packed into registers */ - int r, size, addr, align; - size = type_size(&func_vt,&align); - if ((vtop->r != (VT_LOCAL | VT_LVAL) || - (vtop->c.i & (ret_align-1))) - && (align & (ret_align-1))) { - loc = (loc - size) & -ret_align; - addr = loc; - type = func_vt; - vset(&type, VT_LOCAL | VT_LVAL, addr); - vswap(); - vstore(); - vpop(); - vset(&ret_type, VT_LOCAL | VT_LVAL, addr); - } - vtop->type = ret_type; - if (is_float(ret_type.t)) - r = rc_fret(ret_type.t); - else - r = RC_IRET; - - if (ret_nregs == 1) - gv(r); - else { - for (;;) { - vdup(); - gv(r); - vpop(); - if (--ret_nregs == 0) - break; - /* We assume that when a structure is returned in multiple - registers, their classes are consecutive values of the - suite s(n) = 2^n */ - r <<= 1; - vtop->c.i += regsize; - } - } - } - } else if (is_float(func_vt.t)) { - gv(rc_fret(func_vt.t)); - } else { - gv(RC_IRET); - } -#endif - vtop--; /* NOT vpop() because on x86 it would flush the fp stack */ + gfunc_return(&func_vt); } skip(';'); /* jump unless last stmt in top-level block */ diff --git a/x86_64-gen.c b/x86_64-gen.c index c94eb051..7b61b48a 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -752,25 +752,21 @@ void gen_offs_sp(int b, int r, int d) ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) { int size, align; - *regsize = 8; *ret_align = 1; // Never have to re-align return values for x86-64 + *regsize = 8; size = type_size(vt, &align); - ret->ref = NULL; - if (size > 8) { + if (size > 8 || (size & (size - 1))) return 0; - } else if (size > 4) { + if (size == 8) ret->t = VT_LLONG; - return 1; - } else if (size > 2) { + else if (size == 4) ret->t = VT_INT; - return 1; - } else if (size > 1) { + else if (size == 2) ret->t = VT_SHORT; - return 1; - } else { + else ret->t = VT_BYTE; - return 1; - } + ret->ref = NULL; + return 1; } static int is_sse_float(int t) { -- 2.11.4.GIT