x86_64-gen.c

   1 /*
   2  *  x86-64 code generator for TCC
   3  *
   4  *  Copyright (c) 2008 Shinichiro Hamaji
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #include <assert.h>
  24
  25 /* number of available registers */
  26 #define NB_REGS             5
  27
  28 /* a register can belong to several classes. The classes must be
  29    sorted from more general to more precise (see gv2() code which does
  30    assumptions on it). */
  31 #define RC_INT     0x0001 /* generic integer register */
  32 #define RC_FLOAT   0x0002 /* generic float register */
  33 #define RC_RAX     0x0004
  34 #define RC_RCX     0x0008
  35 #define RC_RDX     0x0010
  36 #define RC_XMM0    0x0020
  37 #define RC_ST0     0x0040 /* only for long double */
  38 #define RC_IRET    RC_RAX /* function return: integer register */
  39 #define RC_LRET    RC_RDX /* function return: second integer register */
  40 #define RC_FRET    RC_XMM0 /* function return: float register */
  41
  42 /* pretty names for the registers */
  43 enum {
  44     TREG_RAX = 0,
  45     TREG_RCX = 1,
  46     TREG_RDX = 2,
  47     TREG_RSI = 6,
  48     TREG_RDI = 7,
  49     TREG_R8  = 8,
  50     TREG_R9  = 9,
  51     TREG_R10 = 10,
  52     TREG_R11 = 11,
  53
  54     TREG_XMM0 = 3,
  55     TREG_ST0 = 4,
  56
  57     TREG_MEM = 0x10,
  58 };
  59
  60 #define REX_BASE(reg) (((reg) >> 3) & 1)
  61 #define REG_VALUE(reg) ((reg) & 7)
  62
  63 int reg_classes[NB_REGS] = {
  64     /* eax */ RC_INT | RC_RAX,
  65     /* ecx */ RC_INT | RC_RCX,
  66     /* edx */ RC_INT | RC_RDX,
  67     /* xmm0 */ RC_FLOAT | RC_XMM0,
  68     /* st0 */ RC_ST0,
  69 };
  70
  71 /* return registers for function */
  72 #define REG_IRET TREG_RAX /* single word int return register */
  73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
  74 #define REG_FRET TREG_XMM0 /* float return register */
  75
  76 /* defined if function parameters must be evaluated in reverse order */
  77 #define INVERT_FUNC_PARAMS
  78
  79 /* pointer size, in bytes */
  80 #define PTR_SIZE 8
  81
  82 /* long double size and alignment, in bytes */
  83 #define LDOUBLE_SIZE  16
  84 #define LDOUBLE_ALIGN 8
  85 /* maximum alignment (for aligned attribute support) */
  86 #define MAX_ALIGN     8
  87
  88 /******************************************************/
  89 /* ELF defines */
  90
  91 #define EM_TCC_TARGET EM_X86_64
  92
  93 /* relocation type for 32 bit data relocation */
  94 #define R_DATA_32   R_X86_64_64
  95 #define R_JMP_SLOT  R_X86_64_JUMP_SLOT
  96 #define R_COPY      R_X86_64_COPY
  97
  98 #define ELF_START_ADDR 0x08048000
  99 #define ELF_PAGE_SIZE  0x1000
 100
 101 /******************************************************/
 102
 103 static unsigned long func_sub_sp_offset;
 104 static int func_ret_sub;
 105
 106 /* XXX: make it faster ? */
 107 void g(int c)
 108 {
 109     int ind1;
 110     ind1 = ind + 1;
 111     if (ind1 > cur_text_section->data_allocated)
 112         section_realloc(cur_text_section, ind1);
 113     cur_text_section->data[ind] = c;
 114     ind = ind1;
 115 }
 116
 117 void o(unsigned int c)
 118 {
 119     while (c) {
 120         g(c);
 121         c = c >> 8;
 122     }
 123 }
 124
 125 void gen_le32(int c)
 126 {
 127     g(c);
 128     g(c >> 8);
 129     g(c >> 16);
 130     g(c >> 24);
 131 }
 132
 133 void gen_le64(int64_t c)
 134 {
 135     g(c);
 136     g(c >> 8);
 137     g(c >> 16);
 138     g(c >> 24);
 139     g(c >> 32);
 140     g(c >> 40);
 141     g(c >> 48);
 142     g(c >> 56);
 143 }
 144
 145 /* output a symbol and patch all calls to it */
 146 void gsym_addr(int t, int a)
 147 {
 148     int n, *ptr;
 149     while (t) {
 150         ptr = (int *)(cur_text_section->data + t);
 151         n = *ptr; /* next value */
 152         *ptr = a - t - 4;
 153         t = n;
 154     }
 155 }
 156
 157 void gsym(int t)
 158 {
 159     gsym_addr(t, ind);
 160 }
 161
 162 /* psym is used to put an instruction with a data field which is a
 163    reference to a symbol. It is in fact the same as oad ! */
 164 #define psym oad
 165
 166 static int is64_type(int t)
 167 {
 168     return ((t & VT_BTYPE) == VT_PTR ||
 169             (t & VT_BTYPE) == VT_FUNC ||
 170             (t & VT_BTYPE) == VT_LLONG);
 171 }
 172
 173 static int is_sse_float(int t) {
 174     int bt;
 175     bt = t & VT_BTYPE;
 176     return bt == VT_DOUBLE || bt == VT_FLOAT;
 177 }
 178
 179 /* instruction + 4 bytes data. Return the address of the data */
 180 static int oad(int c, int s)
 181 {
 182     int ind1;
 183
 184     o(c);
 185     ind1 = ind + 4;
 186     if (ind1 > cur_text_section->data_allocated)
 187         section_realloc(cur_text_section, ind1);
 188     *(int *)(cur_text_section->data + ind) = s;
 189     s = ind;
 190     ind = ind1;
 191     return s;
 192 }
 193
 194 /* output constant with relocation if 'r & VT_SYM' is true */
 195 static void gen_addr64(int r, Sym *sym, int64_t c)
 196 {
 197     if (r & VT_SYM)
 198         greloc(cur_text_section, sym, ind, R_X86_64_64);
 199     gen_le64(c);
 200 }
 201
 202 /* output constant with relocation if 'r & VT_SYM' is true */
 203 static void gen_addrpc32(int r, Sym *sym, int c)
 204 {
 205     if (r & VT_SYM)
 206         greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 207     gen_le32(c-4);
 208 }
 209
 210 /* output got address with relocation */
 211 static void gen_gotpcrel(int r, Sym *sym, int c)
 212 {
 213     Section *sr;
 214     ElfW(Rela) *rel;
 215     greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
 216     sr = cur_text_section->reloc;
 217     rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
 218     rel->r_addend = -4;
 219     gen_le32(0);
 220
 221     if (c) {
 222         /* we use add c, %xxx for displacement */
 223         o(0x48 + REX_BASE(r));
 224         o(0x81);
 225         o(0xc0 + REG_VALUE(r));
 226         gen_le32(c);
 227     }
 228 }
 229
 230 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
 231 {
 232     op_reg = REG_VALUE(op_reg) << 3;
 233     if ((r & VT_VALMASK) == VT_CONST) {
 234         /* constant memory reference */
 235         o(0x05 | op_reg);
 236         if (is_got) {
 237             gen_gotpcrel(r, sym, c);
 238         } else {
 239             gen_addrpc32(r, sym, c);
 240         }
 241     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 242         /* currently, we use only ebp as base */
 243         if (c == (char)c) {
 244             /* short reference */
 245             o(0x45 | op_reg);
 246             g(c);
 247         } else {
 248             oad(0x85 | op_reg, c);
 249         }
 250     } else if ((r & VT_VALMASK) >= TREG_MEM) {
 251         if (c) {
 252             g(0x80 | op_reg | REG_VALUE(r));
 253             gen_le32(c);
 254         } else {
 255             g(0x00 | op_reg | REG_VALUE(r));
 256         }
 257     } else {
 258         g(0x00 | op_reg | (r & VT_VALMASK));
 259     }
 260 }
 261
 262 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 263    opcode bits */
 264 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
 265 {
 266     gen_modrm_impl(op_reg, r, sym, c, 0);
 267 }
 268
 269 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 270    opcode bits */
 271 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
 272 {
 273     int is_got;
 274     int rex = 0x48 | (REX_BASE(op_reg) << 2);
 275     if ((r & VT_VALMASK) != VT_CONST &&
 276         (r & VT_VALMASK) != VT_LOCAL) {
 277         rex |= REX_BASE(VT_VALMASK & r);
 278     }
 279     o(rex);
 280     o(opcode);
 281     is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
 282     gen_modrm_impl(op_reg, r, sym, c, is_got);
 283 }
 284
 285
 286 /* load 'r' from value 'sv' */
 287 void load(int r, SValue *sv)
 288 {
 289     int v, t, ft, fc, fr;
 290     SValue v1;
 291
 292     fr = sv->r;
 293     ft = sv->type.t;
 294     fc = sv->c.ul;
 295
 296     /* we use indirect access via got */
 297     if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
 298         (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
 299         /* use the result register as a temporal register */
 300         int tr = r | TREG_MEM;
 301         if (is_float(ft)) {
 302             /* we cannot use float registers as a temporal register */
 303             tr = get_reg(RC_INT) | TREG_MEM;
 304         }
 305         gen_modrm64(0x8b, tr, fr, sv->sym, 0);
 306
 307         /* load from the temporal register */
 308         fr = tr | VT_LVAL;
 309     }
 310
 311     v = fr & VT_VALMASK;
 312     if (fr & VT_LVAL) {
 313         if (v == VT_LLOCAL) {
 314             v1.type.t = VT_PTR;
 315             v1.r = VT_LOCAL | VT_LVAL;
 316             v1.c.ul = fc;
 317             load(r, &v1);
 318             fr = r;
 319         }
 320         if ((ft & VT_BTYPE) == VT_FLOAT) {
 321             o(0x6e0f66); /* movd */
 322             r = 0;
 323         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
 324             o(0x7e0ff3); /* movq */
 325             r = 0;
 326         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 327             o(0xdb); /* fldt */
 328             r = 5;
 329         } else if ((ft & VT_TYPE) == VT_BYTE) {
 330             o(0xbe0f);   /* movsbl */
 331         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
 332             o(0xb60f);   /* movzbl */
 333         } else if ((ft & VT_TYPE) == VT_SHORT) {
 334             o(0xbf0f);   /* movswl */
 335         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
 336             o(0xb70f);   /* movzwl */
 337         } else if (is64_type(ft)) {
 338             gen_modrm64(0x8b, r, fr, sv->sym, fc);
 339             return;
 340         } else {
 341             o(0x8b);   /* movl */
 342         }
 343         gen_modrm(r, fr, sv->sym, fc);
 344     } else {
 345         if (v == VT_CONST) {
 346             if ((ft & VT_BTYPE) == VT_LLONG) {
 347                 assert(!(fr & VT_SYM));
 348                 o(0x48);
 349                 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 350                 gen_addr64(fr, sv->sym, sv->c.ull);
 351             } else {
 352                 if (fr & VT_SYM) {
 353                     if (sv->sym->type.t & VT_STATIC) {
 354                         o(0x8d48);
 355                         o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 356                         gen_addrpc32(fr, sv->sym, fc);
 357                     } else {
 358                         o(0x8b48);
 359                         o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
 360                         gen_gotpcrel(r, sv->sym, fc);
 361                     }
 362                 } else {
 363                     o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 364                     gen_le32(fc);
 365                 }
 366             }
 367         } else if (v == VT_LOCAL) {
 368             o(0x48 | REX_BASE(r));
 369             o(0x8d); /* lea xxx(%ebp), r */
 370             gen_modrm(r, VT_LOCAL, sv->sym, fc);
 371         } else if (v == VT_CMP) {
 372             oad(0xb8 + r, 0); /* mov $0, r */
 373             o(0x0f); /* setxx %br */
 374             o(fc);
 375             o(0xc0 + r);
 376         } else if (v == VT_JMP || v == VT_JMPI) {
 377             t = v & 1;
 378             oad(0xb8 + r, t); /* mov $1, r */
 379             o(0x05eb); /* jmp after */
 380             gsym(fc);
 381             oad(0xb8 + r, t ^ 1); /* mov $0, r */
 382         } else if (v != r) {
 383             if (r == TREG_XMM0) {
 384                 assert(v == TREG_ST0);
 385                 /* gen_cvt_ftof(VT_DOUBLE); */
 386                 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
 387                 /* movsd -0x10(%rsp),%xmm0 */
 388                 o(0x44100ff2);
 389                 o(0xf024);
 390             } else if (r == TREG_ST0) {
 391                 assert(v == TREG_XMM0);
 392                 /* gen_cvt_ftof(VT_LDOUBLE); */
 393                 /* movsd %xmm0,-0x10(%rsp) */
 394                 o(0x44110ff2);
 395                 o(0xf024);
 396                 o(0xf02444dd); /* fldl -0x10(%rsp) */
 397             } else {
 398                 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
 399                 o(0x89);
 400                 o(0xc0 + r + v * 8); /* mov v, r */
 401             }
 402         }
 403     }
 404 }
 405
 406 /* store register 'r' in lvalue 'v' */
 407 void store(int r, SValue *v)
 408 {
 409     int fr, bt, ft, fc;
 410     int op64 = 0;
 411     /* store the REX prefix in this variable when PIC is enabled */
 412     int pic = 0;
 413
 414     ft = v->type.t;
 415     fc = v->c.ul;
 416     fr = v->r & VT_VALMASK;
 417     bt = ft & VT_BTYPE;
 418
 419     /* we need to access the variable via got */
 420     if (fr == VT_CONST && (v->r & VT_SYM)) {
 421         /* mov xx(%rip), %r11 */
 422         o(0x1d8b4c);
 423         gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
 424         pic = is64_type(bt) ? 0x49 : 0x41;
 425     }
 426
 427     /* XXX: incorrect if float reg to reg */
 428     if (bt == VT_FLOAT) {
 429         o(0x66);
 430         o(pic);
 431         o(0x7e0f); /* movd */
 432         r = 0;
 433     } else if (bt == VT_DOUBLE) {
 434         o(0x66);
 435         o(pic);
 436         o(0xd60f); /* movq */
 437         r = 0;
 438     } else if (bt == VT_LDOUBLE) {
 439         o(0xc0d9); /* fld %st(0) */
 440         o(pic);
 441         o(0xdb); /* fstpt */
 442         r = 7;
 443     } else {
 444         if (bt == VT_SHORT)
 445             o(0x66);
 446         o(pic);
 447         if (bt == VT_BYTE || bt == VT_BOOL)
 448             o(0x88);
 449         else if (is64_type(bt))
 450             op64 = 0x89;
 451         else
 452             o(0x89);
 453     }
 454     if (pic) {
 455         /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
 456         if (op64)
 457             o(op64);
 458         o(3 + (r << 3));
 459     } else if (op64) {
 460         if (fr == VT_CONST ||
 461             fr == VT_LOCAL ||
 462             (v->r & VT_LVAL)) {
 463             gen_modrm64(op64, r, v->r, v->sym, fc);
 464         } else if (fr != r) {
 465             /* XXX: don't we really come here? */
 466             abort();
 467             o(0xc0 + fr + r * 8); /* mov r, fr */
 468         }
 469     } else {
 470         if (fr == VT_CONST ||
 471             fr == VT_LOCAL ||
 472             (v->r & VT_LVAL)) {
 473             gen_modrm(r, v->r, v->sym, fc);
 474         } else if (fr != r) {
 475             /* XXX: don't we really come here? */
 476             abort();
 477             o(0xc0 + fr + r * 8); /* mov r, fr */
 478         }
 479     }
 480 }
 481
 482 static void gadd_sp(int val)
 483 {
 484     if (val == (char)val) {
 485         o(0xc48348);
 486         g(val);
 487     } else {
 488         oad(0xc48148, val); /* add $xxx, %rsp */
 489     }
 490 }
 491
 492 /* 'is_jmp' is '1' if it is a jump */
 493 static void gcall_or_jmp(int is_jmp)
 494 {
 495     int r;
 496     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 497         /* constant case */
 498         if (vtop->r & VT_SYM) {
 499             /* relocation case */
 500             greloc(cur_text_section, vtop->sym,
 501                    ind + 1, R_X86_64_PC32);
 502         } else {
 503             /* put an empty PC32 relocation */
 504             put_elf_reloc(symtab_section, cur_text_section,
 505                           ind + 1, R_X86_64_PC32, 0);
 506         }
 507         oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
 508     } else {
 509         /* otherwise, indirect call */
 510         r = TREG_R11;
 511         load(r, vtop);
 512         o(0x41); /* REX */
 513         o(0xff); /* call/jmp *r */
 514         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
 515     }
 516 }
 517
 518 static uint8_t arg_regs[6] = {
 519     TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
 520 };
 521 /* Generate function call. The function address is pushed first, then
 522    all the parameters in call order. This functions pops all the
 523    parameters and the function address. */
 524 void gfunc_call(int nb_args)
 525 {
 526     int size, align, r, args_size, i, func_call;
 527     Sym *func_sym;
 528     SValue *orig_vtop;
 529     int nb_reg_args = 0;
 530     int nb_sse_args = 0;
 531     int sse_reg, gen_reg;
 532
 533     /* calculate the number of integer/float arguments */
 534     args_size = 0;
 535     for(i = 0; i < nb_args; i++) {
 536         if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
 537             args_size += type_size(&vtop->type, &align);
 538         } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
 539             args_size += 16;
 540         } else if (is_sse_float(vtop[-i].type.t)) {
 541             nb_sse_args++;
 542             if (nb_sse_args > 8) args_size += 8;
 543         } else {
 544             nb_reg_args++;
 545             if (nb_reg_args > 6) args_size += 8;
 546         }
 547     }
 548
 549     /* for struct arguments, we need to call memcpy and the function
 550        call breaks register passing arguments we are preparing.
 551        So, we process arguments which will be passed by stack first. */
 552     orig_vtop = vtop;
 553     gen_reg = nb_reg_args;
 554     sse_reg = nb_sse_args;
 555     /* adjust stack to align SSE boundary */
 556     if (args_size &= 8) {
 557         o(0x50); /* push $rax */
 558     }
 559     for(i = 0; i < nb_args; i++) {
 560         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 561             size = type_size(&vtop->type, &align);
 562             /* align to stack align size */
 563             size = (size + 3) & ~3;
 564             /* allocate the necessary size on stack */
 565             o(0x48);
 566             oad(0xec81, size); /* sub $xxx, %rsp */
 567             /* generate structure store */
 568             r = get_reg(RC_INT);
 569             o(0x48 + REX_BASE(r));
 570             o(0x89); /* mov %rsp, r */
 571             o(0xe0 + r);
 572             {
 573                 /* following code breaks vtop[1] */
 574                 SValue tmp = vtop[1];
 575                 vset(&vtop->type, r | VT_LVAL, 0);
 576                 vswap();
 577                 vstore();
 578                 vtop[1] = tmp;
 579             }
 580             args_size += size;
 581         } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 582             gv(RC_ST0);
 583             size = LDOUBLE_SIZE;
 584             oad(0xec8148, size); /* sub $xxx, %rsp */
 585             o(0x7cdb); /* fstpt 0(%rsp) */
 586             g(0x24);
 587             g(0x00);
 588             args_size += size;
 589         } else if (is_sse_float(vtop->type.t)) {
 590             int j = --sse_reg;
 591             if (j >= 8) {
 592                 gv(RC_FLOAT);
 593                 o(0x50); /* push $rax */
 594                 /* movq %xmm0, (%rsp) */
 595                 o(0x04d60f66);
 596                 o(0x24);
 597                 args_size += 8;
 598             }
 599         } else {
 600             int j = --gen_reg;
 601             /* simple type */
 602             /* XXX: implicit cast ? */
 603             if (j >= 6) {
 604                 r = gv(RC_INT);
 605                 o(0x50 + r); /* push r */
 606                 args_size += 8;
 607             }
 608         }
 609         vtop--;
 610     }
 611     vtop = orig_vtop;
 612
 613     /* then, we prepare register passing arguments.
 614        Note that we cannot set RDX and RCX in this loop because gv()
 615        may break these temporary registers. Let's use R10 and R11
 616        instead of them */
 617     gen_reg = nb_reg_args;
 618     sse_reg = nb_sse_args;
 619     for(i = 0; i < nb_args; i++) {
 620         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
 621             (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 622         } else if (is_sse_float(vtop->type.t)) {
 623             int j = --sse_reg;
 624             if (j < 8) {
 625                 gv(RC_FLOAT); /* only one float register */
 626                 /* movaps %xmm0, %xmmN */
 627                 o(0x280f);
 628                 o(0xc0 + (sse_reg << 3));
 629             }
 630         } else {
 631             int j = --gen_reg;
 632             /* simple type */
 633             /* XXX: implicit cast ? */
 634             if (j < 6) {
 635                 r = gv(RC_INT);
 636                 if (j < 2) {
 637                     o(0x8948); /* mov */
 638                     o(0xc0 + r * 8 + arg_regs[j]);
 639                 } else if (j < 4) {
 640                     o(0x8949); /* mov */
 641                     /* j=2: r10, j=3: r11 */
 642                     o(0xc0 + r * 8 + j);
 643                 } else {
 644                     o(0x8949); /* mov */
 645                     /* j=4: r8, j=5: r9 */
 646                     o(0xc0 + r * 8 + j - 4);
 647                 }
 648             }
 649         }
 650         vtop--;
 651     }
 652
 653     save_regs(0); /* save used temporary registers */
 654
 655     /* Copy R10 and R11 into RDX and RCX, respectively */
 656     if (nb_reg_args > 2) {
 657         o(0xd2894c); /* mov %r10, %rdx */
 658         if (nb_reg_args > 3) {
 659             o(0xd9894c); /* mov %r11, %rcx */
 660         }
 661     }
 662
 663     func_sym = vtop->type.ref;
 664     func_call = FUNC_CALL(func_sym->r);
 665     oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
 666     gcall_or_jmp(0);
 667     if (args_size)
 668         gadd_sp(args_size);
 669     vtop--;
 670 }
 671
 672 #ifdef TCC_TARGET_PE
 673 /* XXX: support PE? */
 674 #warning "PE isn't tested at all"
 675 #define FUNC_PROLOG_SIZE 12
 676 #else
 677 #define FUNC_PROLOG_SIZE 11
 678 #endif
 679
 680 static void push_arg_reg(int i) {
 681     loc -= 8;
 682     gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
 683 }
 684
 685 /* generate function prolog of type 't' */
 686 void gfunc_prolog(CType *func_type)
 687 {
 688     int i, addr, align, size, func_call;
 689     int param_index, param_addr, reg_param_index, sse_param_index;
 690     Sym *sym;
 691     CType *type;
 692
 693     func_ret_sub = 0;
 694
 695     sym = func_type->ref;
 696     func_call = FUNC_CALL(sym->r);
 697     addr = PTR_SIZE * 2;
 698     loc = 0;
 699     ind += FUNC_PROLOG_SIZE;
 700     func_sub_sp_offset = ind;
 701
 702     if (func_type->ref->c == FUNC_ELLIPSIS) {
 703         int seen_reg_num, seen_sse_num, seen_stack_size;
 704         seen_reg_num = seen_sse_num = 0;
 705         /* frame pointer and return address */
 706         seen_stack_size = PTR_SIZE * 2;
 707         /* count the number of seen parameters */
 708         sym = func_type->ref;
 709         while ((sym = sym->next) != NULL) {
 710             type = &sym->type;
 711             if (is_sse_float(type->t)) {
 712                 if (seen_sse_num < 8) {
 713                     seen_sse_num++;
 714                 } else {
 715                     seen_stack_size += 8;
 716                 }
 717             } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
 718                 size = type_size(type, &align);
 719                 size = (size + 3) & ~3;
 720                 seen_stack_size += size;
 721             } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
 722                 seen_stack_size += LDOUBLE_SIZE;
 723             } else {
 724                 if (seen_reg_num < 6) {
 725                     seen_reg_num++;
 726                 } else {
 727                     seen_stack_size += 8;
 728                 }
 729             }
 730         }
 731
 732         loc -= 16;
 733         /* movl $0x????????, -0x10(%rbp) */
 734         o(0xf045c7);
 735         gen_le32(seen_reg_num * 8);
 736         /* movl $0x????????, -0xc(%rbp) */
 737         o(0xf445c7);
 738         gen_le32(seen_sse_num * 16 + 48);
 739         /* movl $0x????????, -0x8(%rbp) */
 740         o(0xf845c7);
 741         gen_le32(seen_stack_size);
 742
 743         /* save all register passing arguments */
 744         for (i = 0; i < 8; i++) {
 745             loc -= 16;
 746             o(0xd60f66); /* movq */
 747             gen_modrm(7 - i, VT_LOCAL, NULL, loc);
 748             /* movq $0, loc+8(%rbp) */
 749             o(0x85c748);
 750             gen_le32(loc + 8);
 751             gen_le32(0);
 752         }
 753         for (i = 0; i < 6; i++) {
 754             push_arg_reg(5 - i);
 755         }
 756     }
 757
 758     sym = func_type->ref;
 759     param_index = 0;
 760     reg_param_index = 0;
 761     sse_param_index = 0;
 762
 763     /* if the function returns a structure, then add an
 764        implicit pointer parameter */
 765     func_vt = sym->type;
 766     if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
 767         push_arg_reg(reg_param_index);
 768         param_addr = loc;
 769
 770         func_vc = loc;
 771         param_index++;
 772         reg_param_index++;
 773     }
 774     /* define parameters */
 775     while ((sym = sym->next) != NULL) {
 776         type = &sym->type;
 777         size = type_size(type, &align);
 778         size = (size + 3) & ~3;
 779         if (is_sse_float(type->t)) {
 780             if (sse_param_index < 8) {
 781                 /* save arguments passed by register */
 782                 loc -= 8;
 783                 o(0xd60f66); /* movq */
 784                 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
 785                 param_addr = loc;
 786             } else {
 787                 param_addr = addr;
 788                 addr += size;
 789             }
 790             sse_param_index++;
 791         } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
 792                    (type->t & VT_BTYPE) == VT_LDOUBLE) {
 793             param_addr = addr;
 794             addr += size;
 795         } else {
 796             if (reg_param_index < 6) {
 797                 /* save arguments passed by register */
 798                 push_arg_reg(reg_param_index);
 799                 param_addr = loc;
 800             } else {
 801                 param_addr = addr;
 802                 addr += 8;
 803             }
 804             reg_param_index++;
 805         }
 806         sym_push(sym->v & ~SYM_FIELD, type,
 807                  VT_LOCAL | VT_LVAL, param_addr);
 808         param_index++;
 809     }
 810 }
 811
 812 /* generate function epilog */
 813 void gfunc_epilog(void)
 814 {
 815     int v, saved_ind;
 816
 817     o(0xc9); /* leave */
 818     if (func_ret_sub == 0) {
 819         o(0xc3); /* ret */
 820     } else {
 821         o(0xc2); /* ret n */
 822         g(func_ret_sub);
 823         g(func_ret_sub >> 8);
 824     }
 825     /* align local size to word & save local variables */
 826     v = (-loc + 15) & -16;
 827     saved_ind = ind;
 828     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
 829 #ifdef TCC_TARGET_PE
 830     if (v >= 4096) {
 831         Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
 832         oad(0xb8, v); /* mov stacksize, %eax */
 833         oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
 834         greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
 835     } else
 836 #endif
 837     {
 838         o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
 839         o(0xec8148);  /* sub rsp, stacksize */
 840         gen_le32(v);
 841 #if FUNC_PROLOG_SIZE == 12
 842         o(0x90);  /* adjust to FUNC_PROLOG_SIZE */
 843 #endif
 844     }
 845     ind = saved_ind;
 846 }
 847
 848 /* generate a jump to a label */
 849 int gjmp(int t)
 850 {
 851     return psym(0xe9, t);
 852 }
 853
 854 /* generate a jump to a fixed address */
 855 void gjmp_addr(int a)
 856 {
 857     int r;
 858     r = a - ind - 2;
 859     if (r == (char)r) {
 860         g(0xeb);
 861         g(r);
 862     } else {
 863         oad(0xe9, a - ind - 5);
 864     }
 865 }
 866
 867 /* generate a test. set 'inv' to invert test. Stack entry is popped */
 868 int gtst(int inv, int t)
 869 {
 870     int v, *p;
 871
 872     v = vtop->r & VT_VALMASK;
 873     if (v == VT_CMP) {
 874         /* fast case : can jump directly since flags are set */
 875         g(0x0f);
 876         t = psym((vtop->c.i - 16) ^ inv, t);
 877     } else if (v == VT_JMP || v == VT_JMPI) {
 878         /* && or || optimization */
 879         if ((v & 1) == inv) {
 880             /* insert vtop->c jump list in t */
 881             p = &vtop->c.i;
 882             while (*p != 0)
 883                 p = (int *)(cur_text_section->data + *p);
 884             *p = t;
 885             t = vtop->c.i;
 886         } else {
 887             t = gjmp(t);
 888             gsym(vtop->c.i);
 889         }
 890     } else {
 891         if (is_float(vtop->type.t) ||
 892             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
 893             vpushi(0);
 894             gen_op(TOK_NE);
 895         }
 896         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 897             /* constant jmp optimization */
 898             if ((vtop->c.i != 0) != inv)
 899                 t = gjmp(t);
 900         } else {
 901             v = gv(RC_INT);
 902             o(0x85);
 903             o(0xc0 + v * 9);
 904             g(0x0f);
 905             t = psym(0x85 ^ inv, t);
 906         }
 907     }
 908     vtop--;
 909     return t;
 910 }
 911
 912 /* generate an integer binary operation */
 913 void gen_opi(int op)
 914 {
 915     int r, fr, opc, c;
 916
 917     switch(op) {
 918     case '+':
 919     case TOK_ADDC1: /* add with carry generation */
 920         opc = 0;
 921     gen_op8:
 922         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
 923             !is64_type(vtop->type.t)) {
 924             /* constant case */
 925             vswap();
 926             r = gv(RC_INT);
 927             if (is64_type(vtop->type.t)) {
 928                 o(0x48 | REX_BASE(r));
 929             }
 930             vswap();
 931             c = vtop->c.i;
 932             if (c == (char)c) {
 933                 /* XXX: generate inc and dec for smaller code ? */
 934                 o(0x83);
 935                 o(0xc0 | (opc << 3) | REG_VALUE(r));
 936                 g(c);
 937             } else {
 938                 o(0x81);
 939                 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
 940             }
 941         } else {
 942             gv2(RC_INT, RC_INT);
 943             r = vtop[-1].r;
 944             fr = vtop[0].r;
 945             if (opc != 7 ||
 946                 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
 947                 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
 948                 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
 949             }
 950             o((opc << 3) | 0x01);
 951             o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
 952         }
 953         vtop--;
 954         if (op >= TOK_ULT && op <= TOK_GT) {
 955             vtop->r = VT_CMP;
 956             vtop->c.i = op;
 957         }
 958         break;
 959     case '-':
 960     case TOK_SUBC1: /* sub with carry generation */
 961         opc = 5;
 962         goto gen_op8;
 963     case TOK_ADDC2: /* add with carry use */
 964         opc = 2;
 965         goto gen_op8;
 966     case TOK_SUBC2: /* sub with carry use */
 967         opc = 3;
 968         goto gen_op8;
 969     case '&':
 970         opc = 4;
 971         goto gen_op8;
 972     case '^':
 973         opc = 6;
 974         goto gen_op8;
 975     case '|':
 976         opc = 1;
 977         goto gen_op8;
 978     case '*':
 979         gv2(RC_INT, RC_INT);
 980         r = vtop[-1].r;
 981         fr = vtop[0].r;
 982         if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
 983             is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
 984             o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
 985         }
 986         vtop--;
 987         o(0xaf0f); /* imul fr, r */
 988         o(0xc0 + fr + r * 8);
 989         break;
 990     case TOK_SHL:
 991         opc = 4;
 992         goto gen_shift;
 993     case TOK_SHR:
 994         opc = 5;
 995         goto gen_shift;
 996     case TOK_SAR:
 997         opc = 7;
 998     gen_shift:
 999         opc = 0xc0 | (opc << 3);
1000         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1001             /* constant case */
1002             vswap();
1003             r = gv(RC_INT);
1004             if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1005                 o(0x48 | REX_BASE(r));
1006                 c = 0x3f;
1007             } else {
1008                 c = 0x1f;
1009             }
1010             vswap();
1011             c &= vtop->c.i;
1012             o(0xc1); /* shl/shr/sar $xxx, r */
1013             o(opc | r);
1014             g(c);
1015         } else {
1016             /* we generate the shift in ecx */
1017             gv2(RC_INT, RC_RCX);
1018             r = vtop[-1].r;
1019             if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
1020                 o(0x48 | REX_BASE(r));
1021             }
1022             o(0xd3); /* shl/shr/sar %cl, r */
1023             o(opc | r);
1024         }
1025         vtop--;
1026         break;
1027     case '/':
1028     case TOK_UDIV:
1029     case TOK_PDIV:
1030     case '%':
1031     case TOK_UMOD:
1032     case TOK_UMULL:
1033         /* first operand must be in eax */
1034         /* XXX: need better constraint for second operand */
1035         gv2(RC_RAX, RC_RCX);
1036         r = vtop[-1].r;
1037         fr = vtop[0].r;
1038         vtop--;
1039         save_reg(TREG_RDX);
1040         if (op == TOK_UMULL) {
1041             o(0xf7); /* mul fr */
1042             o(0xe0 + fr);
1043             vtop->r2 = TREG_RDX;
1044             r = TREG_RAX;
1045         } else {
1046             if (op == TOK_UDIV || op == TOK_UMOD) {
1047                 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
1048                 o(0xf0 + fr);
1049             } else {
1050                 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1051                     o(0x9948); /* cqto */
1052                     o(0x48 + REX_BASE(fr));
1053                 } else {
1054                     o(0x99); /* cltd */
1055                 }
1056                 o(0xf7); /* idiv fr, %eax */
1057                 o(0xf8 + fr);
1058             }
1059             if (op == '%' || op == TOK_UMOD)
1060                 r = TREG_RDX;
1061             else
1062                 r = TREG_RAX;
1063         }
1064         vtop->r = r;
1065         break;
1066     default:
1067         opc = 7;
1068         goto gen_op8;
1069     }
1070 }
1071
1072 void gen_opl(int op)
1073 {
1074     gen_opi(op);
1075 }
1076
1077 /* generate a floating point operation 'v = t1 op t2' instruction. The
1078    two operands are guaranted to have the same floating point type */
1079 /* XXX: need to use ST1 too */
1080 void gen_opf(int op)
1081 {
1082     int a, ft, fc, swapped, r;
1083     int float_type =
1084         (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1085
1086     /* convert constants to memory references */
1087     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1088         vswap();
1089         gv(float_type);
1090         vswap();
1091     }
1092     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1093         gv(float_type);
1094
1095     /* must put at least one value in the floating point register */
1096     if ((vtop[-1].r & VT_LVAL) &&
1097         (vtop[0].r & VT_LVAL)) {
1098         vswap();
1099         gv(float_type);
1100         vswap();
1101     }
1102     swapped = 0;
1103     /* swap the stack if needed so that t1 is the register and t2 is
1104        the memory reference */
1105     if (vtop[-1].r & VT_LVAL) {
1106         vswap();
1107         swapped = 1;
1108     }
1109     if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1110         if (op >= TOK_ULT && op <= TOK_GT) {
1111             /* load on stack second operand */
1112             load(TREG_ST0, vtop);
1113             save_reg(TREG_RAX); /* eax is used by FP comparison code */
1114             if (op == TOK_GE || op == TOK_GT)
1115                 swapped = !swapped;
1116             else if (op == TOK_EQ || op == TOK_NE)
1117                 swapped = 0;
1118             if (swapped)
1119                 o(0xc9d9); /* fxch %st(1) */
1120             o(0xe9da); /* fucompp */
1121             o(0xe0df); /* fnstsw %ax */
1122             if (op == TOK_EQ) {
1123                 o(0x45e480); /* and $0x45, %ah */
1124                 o(0x40fC80); /* cmp $0x40, %ah */
1125             } else if (op == TOK_NE) {
1126                 o(0x45e480); /* and $0x45, %ah */
1127                 o(0x40f480); /* xor $0x40, %ah */
1128                 op = TOK_NE;
1129             } else if (op == TOK_GE || op == TOK_LE) {
1130                 o(0x05c4f6); /* test $0x05, %ah */
1131                 op = TOK_EQ;
1132             } else {
1133                 o(0x45c4f6); /* test $0x45, %ah */
1134                 op = TOK_EQ;
1135             }
1136             vtop--;
1137             vtop->r = VT_CMP;
1138             vtop->c.i = op;
1139         } else {
1140             /* no memory reference possible for long double operations */
1141             load(TREG_ST0, vtop);
1142             swapped = !swapped;
1143
1144             switch(op) {
1145             default:
1146             case '+':
1147                 a = 0;
1148                 break;
1149             case '-':
1150                 a = 4;
1151                 if (swapped)
1152                     a++;
1153                 break;
1154             case '*':
1155                 a = 1;
1156                 break;
1157             case '/':
1158                 a = 6;
1159                 if (swapped)
1160                     a++;
1161                 break;
1162             }
1163             ft = vtop->type.t;
1164             fc = vtop->c.ul;
1165             o(0xde); /* fxxxp %st, %st(1) */
1166             o(0xc1 + (a << 3));
1167             vtop--;
1168         }
1169     } else {
1170         if (op >= TOK_ULT && op <= TOK_GT) {
1171             /* if saved lvalue, then we must reload it */
1172             r = vtop->r;
1173             fc = vtop->c.ul;
1174             if ((r & VT_VALMASK) == VT_LLOCAL) {
1175                 SValue v1;
1176                 r = get_reg(RC_INT);
1177                 v1.type.t = VT_INT;
1178                 v1.r = VT_LOCAL | VT_LVAL;
1179                 v1.c.ul = fc;
1180                 load(r, &v1);
1181                 fc = 0;
1182             }
1183
1184             if (op == TOK_EQ || op == TOK_NE) {
1185                 swapped = 0;
1186             } else {
1187                 if (op == TOK_LE || op == TOK_LT)
1188                     swapped = !swapped;
1189                 if (op == TOK_LE || op == TOK_GE) {
1190                     op = 0x93; /* setae */
1191                 } else {
1192                     op = 0x97; /* seta */
1193                 }
1194             }
1195
1196             if (swapped) {
1197                 o(0x7e0ff3); /* movq */
1198                 gen_modrm(1, r, vtop->sym, fc);
1199
1200                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1201                     o(0x66);
1202                 }
1203                 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1204                 o(0xc8);
1205             } else {
1206                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1207                     o(0x66);
1208                 }
1209                 o(0x2e0f); /* ucomisd */
1210                 gen_modrm(0, r, vtop->sym, fc);
1211             }
1212
1213             vtop--;
1214             vtop->r = VT_CMP;
1215             vtop->c.i = op;
1216         } else {
1217             /* no memory reference possible for long double operations */
1218             if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1219                 load(TREG_XMM0, vtop);
1220                 swapped = !swapped;
1221             }
1222             switch(op) {
1223             default:
1224             case '+':
1225                 a = 0;
1226                 break;
1227             case '-':
1228                 a = 4;
1229                 break;
1230             case '*':
1231                 a = 1;
1232                 break;
1233             case '/':
1234                 a = 6;
1235                 break;
1236             }
1237             ft = vtop->type.t;
1238             fc = vtop->c.ul;
1239             if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1240                 o(0xde); /* fxxxp %st, %st(1) */
1241                 o(0xc1 + (a << 3));
1242             } else {
1243                 /* if saved lvalue, then we must reload it */
1244                 r = vtop->r;
1245                 if ((r & VT_VALMASK) == VT_LLOCAL) {
1246                     SValue v1;
1247                     r = get_reg(RC_INT);
1248                     v1.type.t = VT_INT;
1249                     v1.r = VT_LOCAL | VT_LVAL;
1250                     v1.c.ul = fc;
1251                     load(r, &v1);
1252                     fc = 0;
1253                 }
1254                 if (swapped) {
1255                     /* movq %xmm0,%xmm1 */
1256                     o(0x7e0ff3);
1257                     o(0xc8);
1258                     load(TREG_XMM0, vtop);
1259                     /* subsd  %xmm1,%xmm0 (f2 0f 5c c1) */
1260                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1261                         o(0xf2);
1262                     } else {
1263                         o(0xf3);
1264                     }
1265                     o(0x0f);
1266                     o(0x58 + a);
1267                     o(0xc1);
1268                 } else {
1269                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1270                         o(0xf2);
1271                     } else {
1272                         o(0xf3);
1273                     }
1274                     o(0x0f);
1275                     o(0x58 + a);
1276                     gen_modrm(0, r, vtop->sym, fc);
1277                 }
1278             }
1279             vtop--;
1280         }
1281     }
1282 }
1283
1284 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1285    and 'long long' cases. */
1286 void gen_cvt_itof(int t)
1287 {
1288     if ((t & VT_BTYPE) == VT_LDOUBLE) {
1289         save_reg(TREG_ST0);
1290         gv(RC_INT);
1291         if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1292             /* signed long long to float/double/long double (unsigned case
1293                is handled generically) */
1294             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1295             o(0x242cdf); /* fildll (%rsp) */
1296             o(0x08c48348); /* add $8, %rsp */
1297         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1298                    (VT_INT | VT_UNSIGNED)) {
1299             /* unsigned int to float/double/long double */
1300             o(0x6a); /* push $0 */
1301             g(0x00);
1302             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1303             o(0x242cdf); /* fildll (%rsp) */
1304             o(0x10c48348); /* add $16, %rsp */
1305         } else {
1306             /* int to float/double/long double */
1307             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1308             o(0x2404db); /* fildl (%rsp) */
1309             o(0x08c48348); /* add $8, %rsp */
1310         }
1311         vtop->r = TREG_ST0;
1312     } else {
1313         save_reg(TREG_XMM0);
1314         gv(RC_INT);
1315         o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1316         if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1317             (VT_INT | VT_UNSIGNED) ||
1318             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1319             o(0x48); /* REX */
1320         }
1321         o(0x2a0f);
1322         o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1323         vtop->r = TREG_XMM0;
1324     }
1325 }
1326
1327 /* convert from one floating point type to another */
1328 void gen_cvt_ftof(int t)
1329 {
1330     int ft, bt, tbt;
1331
1332     ft = vtop->type.t;
1333     bt = ft & VT_BTYPE;
1334     tbt = t & VT_BTYPE;
1335
1336     if (bt == VT_FLOAT) {
1337         gv(RC_FLOAT);
1338         if (tbt == VT_DOUBLE) {
1339             o(0xc0140f); /* unpcklps */
1340             o(0xc05a0f); /* cvtps2pd */
1341         } else if (tbt == VT_LDOUBLE) {
1342             /* movss %xmm0,-0x10(%rsp) */
1343             o(0x44110ff3);
1344             o(0xf024);
1345             o(0xf02444d9); /* flds -0x10(%rsp) */
1346             vtop->r = TREG_ST0;
1347         }
1348     } else if (bt == VT_DOUBLE) {
1349         gv(RC_FLOAT);
1350         if (tbt == VT_FLOAT) {
1351             o(0xc0140f66); /* unpcklpd */
1352             o(0xc05a0f66); /* cvtpd2ps */
1353         } else if (tbt == VT_LDOUBLE) {
1354             /* movsd %xmm0,-0x10(%rsp) */
1355             o(0x44110ff2);
1356             o(0xf024);
1357             o(0xf02444dd); /* fldl -0x10(%rsp) */
1358             vtop->r = TREG_ST0;
1359         }
1360     } else {
1361         gv(RC_ST0);
1362         if (tbt == VT_DOUBLE) {
1363             o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1364             /* movsd -0x10(%rsp),%xmm0 */
1365             o(0x44100ff2);
1366             o(0xf024);
1367             vtop->r = TREG_XMM0;
1368         } else if (tbt == VT_FLOAT) {
1369             o(0xf0245cd9); /* fstps -0x10(%rsp) */
1370             /* movss -0x10(%rsp),%xmm0 */
1371             o(0x44100ff3);
1372             o(0xf024);
1373             vtop->r = TREG_XMM0;
1374         }
1375     }
1376 }
1377
1378 /* convert fp to int 't' type */
1379 void gen_cvt_ftoi(int t)
1380 {
1381     int ft, bt, size, r;
1382     ft = vtop->type.t;
1383     bt = ft & VT_BTYPE;
1384     if (bt == VT_LDOUBLE) {
1385         gen_cvt_ftof(VT_DOUBLE);
1386         bt = VT_DOUBLE;
1387     }
1388
1389     gv(RC_FLOAT);
1390     if (t != VT_INT)
1391         size = 8;
1392     else
1393         size = 4;
1394
1395     r = get_reg(RC_INT);
1396     if (bt == VT_FLOAT) {
1397         o(0xf3);
1398     } else if (bt == VT_DOUBLE) {
1399         o(0xf2);
1400     } else {
1401         assert(0);
1402     }
1403     if (size == 8) {
1404         o(0x48 + REX_BASE(r));
1405     }
1406     o(0x2c0f); /* cvttss2si or cvttsd2si */
1407     o(0xc0 + (REG_VALUE(r) << 3));
1408     vtop->r = r;
1409 }
1410
1411 /* computed goto support */
1412 void ggoto(void)
1413 {
1414     gcall_or_jmp(1);
1415     vtop--;
1416 }
1417
1418 /* end of x86-64 code generator */
1419 /*************************************************************/