From 515169f21bf6c75a3179822e1dd9e006f5ef4412 Mon Sep 17 00:00:00 2001 From: jiang <30155751@qq.com> Date: Tue, 29 Apr 2014 23:57:22 +0800 Subject: [PATCH] Reduce the generation of machine code for x86_64, Less of size --- tcc.h | 8 +- tccgen.c | 3 +- x86_64-gen.c | 484 +++++++++++++++++++++++++++++------------------------------ 3 files changed, 244 insertions(+), 251 deletions(-) diff --git a/tcc.h b/tcc.h index c93cedfe..24b5b5aa 100644 --- a/tcc.h +++ b/tcc.h @@ -738,19 +738,21 @@ struct TCCState { #define VT_CMP 0x0033 /* the value is stored in processor flags (in vc) */ #define VT_JMP 0x0034 /* value is the consequence of jmp true (even) */ #define VT_JMPI 0x0035 /* value is the consequence of jmp false (odd) */ -#define VT_REF 0x0040 /* value is pointer to structure rather than address */ +#define TREG_MEM 0x0040 /* x86_64-gen.c add for tcc.h: The current value can be */ +#define VT_REF 0x0080 /* value is pointer to structure rather than address */ #define VT_LVAL 0x0100 /* var is an lvalue */ #define VT_SYM 0x0200 /* a symbol value is added */ #define VT_MUSTCAST 0x0400 /* value must be casted to be correct (used for char/short stored in integer registers) */ #define VT_MUSTBOUND 0x0800 /* bound checking must be done before dereferencing value */ -#define VT_BOUNDED 0x8000 /* value is bounded. The address of the - bounding function call point is in vc */ #define VT_LVAL_BYTE 0x1000 /* lvalue is a byte */ #define VT_LVAL_SHORT 0x2000 /* lvalue is a short */ #define VT_LVAL_UNSIGNED 0x4000 /* lvalue is unsigned */ #define VT_LVAL_TYPE (VT_LVAL_BYTE | VT_LVAL_SHORT | VT_LVAL_UNSIGNED) +#define VT_BOUNDED 0x8000 /* value is bounded. The address of the + bounding function call point is in vc */ +#define VT_TMP 0x10000 /* types */ #define VT_BTYPE 0x000f /* mask for basic type */ diff --git a/tccgen.c b/tccgen.c index 1a89d4a4..71a426af 100644 --- a/tccgen.c +++ b/tccgen.c @@ -909,8 +909,9 @@ ST_FUNC int gv(int rc) /* one register type load */ load(r, vtop); } + vtop->r = r; + vtop->c.ptr_offset = 0; } - vtop->r = r; #ifdef TCC_TARGET_C67 /* uses register pairs for doubles */ if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) diff --git a/x86_64-gen.c b/x86_64-gen.c index ae653283..a8a7e87a 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -29,28 +29,38 @@ /* a register can belong to several classes. The classes must be sorted from more general to more precise (see gv2() code which does assumptions on it). */ -#define RC_INT 0x0001 /* generic integer register */ -#define RC_FLOAT 0x0002 /* generic float register */ -#define RC_RAX 0x0004 -#define RC_RCX 0x0008 -#define RC_RDX 0x0010 -#define RC_ST0 0x0080 /* only for long double */ -#define RC_R8 0x0100 -#define RC_R9 0x0200 -#define RC_R10 0x0400 -#define RC_R11 0x0800 -#define RC_XMM0 0x1000 -#define RC_XMM1 0x2000 -#define RC_XMM2 0x4000 -#define RC_XMM3 0x8000 -#define RC_XMM4 0x10000 -#define RC_XMM5 0x20000 -#define RC_XMM6 0x40000 -#define RC_XMM7 0x80000 -#define RC_IRET RC_RAX /* function return: integer register */ -#define RC_LRET RC_RDX /* function return: second integer register */ -#define RC_FRET RC_XMM0 /* function return: float register */ -#define RC_QRET RC_XMM1 /* function return: second float register */ +#define RC_INT 0x0001 /* generic integer register */ +#define RC_FLOAT 0x0002 /* generic float register */ +#define RC_RAX 0x0004 +#define RC_RCX 0x0008 +#define RC_RDX 0x0010 +#define RC_ST0 0x0020 /* only for long double */ +#define RC_R8 0x0040 +#define RC_R9 0x0080 +#define RC_XMM0 0x0100 +#define RC_XMM1 0x0200 +#define RC_XMM2 0x0400 +#define RC_XMM3 0x0800 +#define RC_XMM4 0x1000 +#define RC_XMM5 0x2000 +#define RC_XMM6 0x4000 +#define RC_XMM7 0x8000 +#define RC_RSI 0x10000 +#define RC_RDI 0x20000 +#define RC_INT1 0x40000 /* function_pointer */ +#define RC_INT2 0x80000 +#define RC_RBX 0x100000 +#define RC_R10 0x200000 +#define RC_R11 0x400000 +#define RC_R12 0x800000 +#define RC_R13 0x1000000 +#define RC_R14 0x2000000 +#define RC_R15 0x4000000 +#define RC_IRET RC_RAX /* function return: integer register */ +#define RC_LRET RC_RDX /* function return: second integer register */ +#define RC_FRET RC_XMM0 /* function return: float register */ +#define RC_QRET RC_XMM1 /* function return: second float register */ +#define RC_MASK (RC_INT|RC_INT1|RC_INT2|RC_FLOAT) /* pretty names for the registers */ enum { @@ -58,6 +68,7 @@ enum { TREG_RCX = 1, TREG_RDX = 2, TREG_RSP = 4, + TREG_ST0 = 5, TREG_RSI = 6, TREG_RDI = 7, @@ -75,13 +86,11 @@ enum { TREG_XMM6 = 22, TREG_XMM7 = 23, - TREG_ST0 = 24, - - TREG_MEM = 0x20, }; #define REX_BASE(reg) (((reg) >> 3) & 1) #define REG_VALUE(reg) ((reg) & 7) +#define FLAG_GOT 0X01 /* return registers for function */ #define REG_IRET TREG_RAX /* single word int return register */ @@ -122,34 +131,30 @@ enum { #include ST_DATA const int reg_classes[NB_REGS] = { - /* eax */ RC_INT | RC_RAX, - /* ecx */ RC_INT | RC_RCX, - /* edx */ RC_INT | RC_RDX, - 0, - 0, - 0, - 0, - 0, - RC_R8, - RC_R9, - RC_R10, - RC_R11, - 0, - 0, - 0, + /* eax */ RC_INT|RC_RAX|RC_INT2, + /* ecx */ RC_INT|RC_RCX|RC_INT2, + /* edx */ RC_INT|RC_RDX, + RC_INT|RC_INT1|RC_INT2|RC_RBX, 0, - /* xmm0 */ RC_FLOAT | RC_XMM0, - /* xmm1 */ RC_FLOAT | RC_XMM1, - /* xmm2 */ RC_FLOAT | RC_XMM2, - /* xmm3 */ RC_FLOAT | RC_XMM3, - /* xmm4 */ RC_FLOAT | RC_XMM4, - /* xmm5 */ RC_FLOAT | RC_XMM5, - /* xmm6 an xmm7 are included so gv() can be used on them, - but they are not tagged with RC_FLOAT because they are - callee saved on Windows */ - RC_XMM6, - RC_XMM7, - /* st0 */ RC_ST0 + /* st0 */ RC_ST0, + RC_RSI|RC_INT2, + RC_RDI|RC_INT2, + RC_INT|RC_R8|RC_INT2, + RC_INT|RC_R9|RC_INT2, + RC_INT|RC_INT1|RC_INT2|RC_R10, + RC_INT|RC_INT1|RC_INT2|RC_R11, + RC_INT|RC_INT1|RC_INT2|RC_R12, + RC_INT|RC_INT1|RC_INT2|RC_R13, + RC_INT|RC_INT1|RC_INT2|RC_R14, + RC_INT|RC_INT1|RC_INT2|RC_R15, + /* xmm0 */ RC_FLOAT | RC_XMM0, + RC_FLOAT|RC_XMM1, + RC_FLOAT|RC_XMM2, + RC_FLOAT|RC_XMM3, + RC_FLOAT|RC_XMM4, + RC_FLOAT|RC_XMM5, + RC_FLOAT|RC_XMM6, + RC_FLOAT|RC_XMM7, }; static unsigned long func_sub_sp_offset; @@ -324,7 +329,7 @@ static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got) } else { oad(0x85 | op_reg, c); } - } else if ((r & VT_VALMASK) >= TREG_MEM) { + } else if (r & TREG_MEM) { if (c) { g(0x80 | op_reg | REG_VALUE(r)); gen_le32(c); @@ -1609,39 +1614,42 @@ int gtst(int inv, int t) /* generate an integer binary operation */ void gen_opi(int op) { - int r, fr, opc, c; - int ll, uu, cc; + int r, fr, opc, fc, c, ll, uu, cc, tt2; + fr = vtop[0].r; + fc = vtop->c.ul; ll = is64_type(vtop[-1].type.t); - uu = (vtop[-1].type.t & VT_UNSIGNED) != 0; - cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; + cc = (fr & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; + tt2 = (fr & (VT_LVAL | VT_LVAL_TYPE)) == VT_LVAL; switch(op) { case '+': case TOK_ADDC1: /* add with carry generation */ opc = 0; gen_op8: + vswap(); + r = gv(RC_INT); + vswap(); if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) { /* constant case */ - vswap(); - r = gv(RC_INT); - vswap(); c = vtop->c.i; if (c == (char)c) { /* XXX: generate inc and dec for smaller code ? */ - orex(ll, r, 0, 0x83); - o(0xc0 | (opc << 3) | REG_VALUE(r)); - g(c); + orex(ll, r, 0, 0x83); + o(0xc0 + REG_VALUE(r) + opc*8); + g(c); } else { orex(ll, r, 0, 0x81); - oad(0xc0 | (opc << 3) | REG_VALUE(r), c); + oad(0xc0 + REG_VALUE(r) + opc*8, c); } } else { - gv2(RC_INT, RC_INT); - r = vtop[-1].r; - fr = vtop[0].r; - orex(ll, r, fr, (opc << 3) | 0x01); - o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8); + if(!tt2) + fr = gv(RC_INT); + orex(ll, fr, r, 0x03 + opc*8); + if(fr >= VT_CONST) + gen_modrm(r, fr, vtop->sym, fc); + else + o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8); } vtop--; if (op >= TOK_ULT && op <= TOK_GT) { @@ -1669,11 +1677,27 @@ void gen_opi(int op) opc = 1; goto gen_op8; case '*': - gv2(RC_INT, RC_INT); - r = vtop[-1].r; - fr = vtop[0].r; - orex(ll, fr, r, 0xaf0f); /* imul fr, r */ - o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8); + opc = 5; + vswap(); + r = gv(RC_INT); + vswap(); + if(!tt2) + fr = gv(RC_INT); + if(r == TREG_RAX){ + if(fr != TREG_RDX) + save_reg(TREG_RDX); + orex(ll, fr, r, 0xf7); + if(fr >= VT_CONST) + gen_modrm(opc, fr, vtop->sym, fc); + else + o(0xc0 + REG_VALUE(fr) + opc*8); + }else{ + orex(ll, fr, r, 0xaf0f); /* imul fr, r */ + if(fr >= VT_CONST) + gen_modrm(r, fr, vtop->sym, fc); + else + o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8); + } vtop--; break; case TOK_SHL: @@ -1685,47 +1709,62 @@ void gen_opi(int op) case TOK_SAR: opc = 7; gen_shift: - opc = 0xc0 | (opc << 3); if (cc) { /* constant case */ vswap(); r = gv(RC_INT); vswap(); - orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */ - o(opc | REG_VALUE(r)); - g(vtop->c.i & (ll ? 63 : 31)); + c = vtop->c.i; + if(c == 1){ + orex(ll, r, 0, 0xd1); + o(0xc0 + REG_VALUE(r) + opc*8); + }else{ + orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */ + o(0xc0 + REG_VALUE(r) + opc*8); + g(c & (ll ? 0x3f : 0x1f)); + } } else { /* we generate the shift in ecx */ gv2(RC_INT, RC_RCX); r = vtop[-1].r; orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */ - o(opc | REG_VALUE(r)); + o(0xc0 + REG_VALUE(r) + opc*8); } vtop--; break; case TOK_UDIV: case TOK_UMOD: + opc = 6; uu = 1; goto divmod; case '/': case '%': case TOK_PDIV: + opc = 7; uu = 0; divmod: /* first operand must be in eax */ /* XXX: need better constraint for second operand */ - gv2(RC_RAX, RC_RCX); - r = vtop[-1].r; - fr = vtop[0].r; - vtop--; - save_reg(TREG_RDX); - orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */ - orex(ll, fr, 0, 0xf7); /* div fr, %eax */ - o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr)); + if(!tt2){ + gv2(RC_RAX, RC_INT2); + fr = vtop[0].r; + }else{ + vswap(); + gv(RC_RAX); + vswap(); + } + save_reg(TREG_RDX); + orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cdq RDX:RAX <- sign-extend of RAX. */ + orex(ll, fr, 0, 0xf7); /* div fr, %eax */ + if(fr >= VT_CONST) + gen_modrm(opc, fr, vtop->sym, fc); + else + o(0xc0 + REG_VALUE(fr) + opc*8); if (op == '%' || op == TOK_UMOD) r = TREG_RDX; else r = TREG_RAX; + vtop--; vtop->r = r; break; default: @@ -1744,9 +1783,8 @@ void gen_opl(int op) /* XXX: need to use ST1 too */ void gen_opf(int op) { - int a, ft, fc, swapped, r; - int float_type = - (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; + int a, ft, fc, swapped, fr, r; + int float_type = (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; /* convert constants to memory references */ if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { @@ -1757,21 +1795,23 @@ void gen_opf(int op) if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) gv(float_type); - /* must put at least one value in the floating point register */ - if ((vtop[-1].r & VT_LVAL) && - (vtop[0].r & VT_LVAL)) { - vswap(); - gv(float_type); - vswap(); - } - swapped = 0; - /* swap the stack if needed so that t1 is the register and t2 is - the memory reference */ - if (vtop[-1].r & VT_LVAL) { - vswap(); - swapped = 1; - } - if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { + swapped = 0; + fc = vtop->c.ul; + ft = vtop->type.t; + + if ((ft & VT_BTYPE) == VT_LDOUBLE) { + /* swap the stack if needed so that t1 is the register and t2 is + the memory reference */ + /* must put at least one value in the floating point register */ + if ((vtop[-1].r & VT_LVAL) && (vtop[0].r & VT_LVAL)) { + vswap(); + gv(float_type); + vswap(); + } + if (vtop[-1].r & VT_LVAL) { + vswap(); + swapped = 1; + } if (op >= TOK_ULT && op <= TOK_GT) { /* load on stack second operand */ load(TREG_ST0, vtop); @@ -1782,10 +1822,10 @@ void gen_opf(int op) swapped = 0; if (swapped) o(0xc9d9); /* fxch %st(1) */ - if (op == TOK_EQ || op == TOK_NE) - o(0xe9da); /* fucompp */ - else - o(0xd9de); /* fcompp */ + if (op == TOK_EQ || op == TOK_NE) + o(0xe9da); /* fucompp */ + else + o(0xd9de); /* fcompp */ o(0xe0df); /* fnstsw %ax */ if (op == TOK_EQ) { o(0x45e480); /* and $0x45, %ah */ @@ -1808,7 +1848,6 @@ void gen_opf(int op) /* no memory reference possible for long double operations */ load(TREG_ST0, vtop); swapped = !swapped; - switch(op) { default: case '+': @@ -1828,63 +1867,45 @@ void gen_opf(int op) a++; break; } - ft = vtop->type.t; - fc = vtop->c.ul; o(0xde); /* fxxxp %st, %st(1) */ o(0xc1 + (a << 3)); vtop--; } } else { + vswap(); + gv(float_type); + vswap(); + fr = vtop->r; + r = vtop[-1].r; if (op >= TOK_ULT && op <= TOK_GT) { - /* if saved lvalue, then we must reload it */ - r = vtop->r; - fc = vtop->c.ul; - if ((r & VT_VALMASK) == VT_LLOCAL) { - SValue v1; - r = get_reg(RC_INT); - v1.type.t = VT_PTR; - v1.r = VT_LOCAL | VT_LVAL; - v1.c.ul = fc; - load(r, &v1); - fc = 0; - } - - if (op == TOK_EQ || op == TOK_NE) { - swapped = 0; - } else { - if (op == TOK_LE || op == TOK_LT) - swapped = !swapped; - if (op == TOK_LE || op == TOK_GE) { - op = 0x93; /* setae */ - } else { - op = 0x97; /* seta */ - } - } - - if (swapped) { - gv(RC_FLOAT); - vswap(); - } - assert(!(vtop[-1].r & VT_LVAL)); - - if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) - o(0x66); - if (op == TOK_EQ || op == TOK_NE) - o(0x2e0f); /* ucomisd */ - else - o(0x2f0f); /* comisd */ - - if (vtop->r & VT_LVAL) { - gen_modrm(vtop[-1].r, r, vtop->sym, fc); - } else { - o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); - } - + switch(op){ + case TOK_LE: + op = TOK_ULE; /* setae */ + break; + case TOK_LT: + op = TOK_ULT; + break; + case TOK_GE: + op = TOK_UGE; + break; + case TOK_GT: + op = TOK_UGT; /* seta */ + break; + } + assert(!(vtop[-1].r & VT_LVAL)); + if ((ft & VT_BTYPE) == VT_DOUBLE) + o(0x66); + o(0x2e0f); /* ucomisd */ + if(fr >= VT_CONST) + gen_modrm(r, fr, vtop->sym, fc); + else + o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8); vtop--; vtop->r = VT_CMP; vtop->c.i = op | 0x100; } else { - assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); + assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); + /* no memory reference possible for long double operations */ switch(op) { default: case '+': @@ -1900,44 +1921,20 @@ void gen_opf(int op) a = 6; break; } - ft = vtop->type.t; - fc = vtop->c.ul; - assert((ft & VT_BTYPE) != VT_LDOUBLE); - - r = vtop->r; - /* if saved lvalue, then we must reload it */ - if ((vtop->r & VT_VALMASK) == VT_LLOCAL) { - SValue v1; - r = get_reg(RC_INT); - v1.type.t = VT_PTR; - v1.r = VT_LOCAL | VT_LVAL; - v1.c.ul = fc; - load(r, &v1); - fc = 0; - } - - assert(!(vtop[-1].r & VT_LVAL)); - if (swapped) { - assert(vtop->r & VT_LVAL); - gv(RC_FLOAT); - vswap(); - } - - if ((ft & VT_BTYPE) == VT_DOUBLE) { - o(0xf2); - } else { - o(0xf3); - } - o(0x0f); - o(0x58 + a); - - if (vtop->r & VT_LVAL) { - gen_modrm(vtop[-1].r, r, vtop->sym, fc); - } else { - o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); - } - - vtop--; + assert((ft & VT_BTYPE) != VT_LDOUBLE); + assert(!(vtop[-1].r & VT_LVAL)); + if ((ft & VT_BTYPE) == VT_DOUBLE) { + o(0xf2); + } else { + o(0xf3); + } + o(0x0f); + o(0x58 + a); + if(fr >= VT_CONST) + gen_modrm(r, fr, vtop->sym, fc); + else + o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8); + vtop--; } } } @@ -1946,103 +1943,96 @@ void gen_opf(int op) and 'long long' cases. */ void gen_cvt_itof(int t) { - if ((t & VT_BTYPE) == VT_LDOUBLE) { + int ft, bt, tbt, r; + + ft = vtop->type.t; + bt = ft & VT_BTYPE; + tbt = t & VT_BTYPE; + r = gv(RC_INT); + + if (tbt == VT_LDOUBLE) { save_reg(TREG_ST0); - gv(RC_INT); - if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { + if ((ft & VT_BTYPE) == VT_LLONG) { /* signed long long to float/double/long double (unsigned case is handled generically) */ - o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ + o(0x50 + REG_VALUE(r)); /* push r */ o(0x242cdf); /* fildll (%rsp) */ o(0x08c48348); /* add $8, %rsp */ - } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == - (VT_INT | VT_UNSIGNED)) { + } else if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED)) { /* unsigned int to float/double/long double */ o(0x6a); /* push $0 */ g(0x00); - o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ + o(0x50 + REG_VALUE(r)); /* push r */ o(0x242cdf); /* fildll (%rsp) */ o(0x10c48348); /* add $16, %rsp */ } else { /* int to float/double/long double */ - o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ + o(0x50 + REG_VALUE(r)); /* push r */ o(0x2404db); /* fildl (%rsp) */ o(0x08c48348); /* add $8, %rsp */ } vtop->r = TREG_ST0; } else { - int r = get_reg(RC_FLOAT); - gv(RC_INT); - o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0)); - if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == - (VT_INT | VT_UNSIGNED) || - (vtop->type.t & VT_BTYPE) == VT_LLONG) { + int r_xmm; + r_xmm = get_reg(RC_FLOAT); + o(0xf2 + (tbt == VT_FLOAT)); + if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED) || bt == VT_LLONG) { o(0x48); /* REX */ } o(0x2a0f); - o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */ - vtop->r = r; + o(0xc0 + REG_VALUE(r) + REG_VALUE(r_xmm)*8); /* cvtsi2sd or cvtsi2ss */ + vtop->r = r_xmm; } } /* convert from one floating point type to another */ void gen_cvt_ftof(int t) { - int ft, bt, tbt; + int ft, bt, tbt, r; ft = vtop->type.t; bt = ft & VT_BTYPE; tbt = t & VT_BTYPE; - - if (bt == VT_FLOAT) { - gv(RC_FLOAT); + + if(bt == VT_LDOUBLE) + r = get_reg(RC_FLOAT); + else + r = gv(RC_FLOAT); + if (bt == VT_FLOAT) { if (tbt == VT_DOUBLE) { - o(0x140f); /* unpcklps */ - o(0xc0 + REG_VALUE(vtop->r)*9); o(0x5a0f); /* cvtps2pd */ - o(0xc0 + REG_VALUE(vtop->r)*9); + o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8); } else if (tbt == VT_LDOUBLE) { - save_reg(RC_ST0); - /* movss %xmm0,-0x10(%rsp) */ + /* movss %xmm0-7,-0x10(%rsp) */ o(0x110ff3); - o(0x44 + REG_VALUE(vtop->r)*8); - o(0xf024); + o(0xf02444 + REG_VALUE(r)*8); o(0xf02444d9); /* flds -0x10(%rsp) */ vtop->r = TREG_ST0; } } else if (bt == VT_DOUBLE) { - gv(RC_FLOAT); if (tbt == VT_FLOAT) { - o(0x140f66); /* unpcklpd */ - o(0xc0 + REG_VALUE(vtop->r)*9); o(0x5a0f66); /* cvtpd2ps */ - o(0xc0 + REG_VALUE(vtop->r)*9); + o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8); } else if (tbt == VT_LDOUBLE) { - save_reg(RC_ST0); - /* movsd %xmm0,-0x10(%rsp) */ + /* movsd %xmm0-7,-0x10(%rsp) */ o(0x110ff2); - o(0x44 + REG_VALUE(vtop->r)*8); - o(0xf024); + o(0xf02444 + REG_VALUE(r)*8); o(0xf02444dd); /* fldl -0x10(%rsp) */ vtop->r = TREG_ST0; } } else { - int r; gv(RC_ST0); - r = get_reg(RC_FLOAT); if (tbt == VT_DOUBLE) { o(0xf0245cdd); /* fstpl -0x10(%rsp) */ - /* movsd -0x10(%rsp),%xmm0 */ + /* movsd -0x10(%rsp),%xmm0-7 */ o(0x100ff2); - o(0x44 + REG_VALUE(r)*8); - o(0xf024); + o(0xf02444 + REG_VALUE(r)*8); vtop->r = r; } else if (tbt == VT_FLOAT) { o(0xf0245cd9); /* fstps -0x10(%rsp) */ - /* movss -0x10(%rsp),%xmm0 */ + /* movss -0x10(%rsp),%xmm0-7 */ o(0x100ff3); - o(0x44 + REG_VALUE(r)*8); - o(0xf024); + o(0xf02444 + REG_VALUE(r)*8); vtop->r = r; } } @@ -2051,20 +2041,20 @@ void gen_cvt_ftof(int t) /* convert fp to int 't' type */ void gen_cvt_ftoi(int t) { - int ft, bt, size, r; + int ft, bt, ll, r, r_xmm; + ft = vtop->type.t; bt = ft & VT_BTYPE; + if (bt == VT_LDOUBLE) { gen_cvt_ftof(VT_DOUBLE); bt = VT_DOUBLE; } - - gv(RC_FLOAT); - if (t != VT_INT) - size = 8; + r_xmm = gv(RC_FLOAT); + if ((t & VT_BTYPE) == VT_INT) + ll = 0; else - size = 4; - + ll = 1; r = get_reg(RC_INT); if (bt == VT_FLOAT) { o(0xf3); @@ -2073,8 +2063,8 @@ void gen_cvt_ftoi(int t) } else { assert(0); } - orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */ - o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8); + orex(ll, r, r_xmm, 0x2c0f); /* cvttss2si or cvttsd2si */ + o(0xc0 + REG_VALUE(r_xmm) + (REG_VALUE(r) << 3)); vtop->r = r; } -- 2.11.4.GIT