x86_64-gen.c

   1 /*
   2  *  x86-64 code generator for TCC
   3  *
   4  *  Copyright (c) 2008 Shinichiro Hamaji
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #include <assert.h>
  24
  25 /* number of available registers */
  26 #define NB_REGS             5
  27
  28 /* a register can belong to several classes. The classes must be
  29    sorted from more general to more precise (see gv2() code which does
  30    assumptions on it). */
  31 #define RC_INT     0x0001 /* generic integer register */
  32 #define RC_FLOAT   0x0002 /* generic float register */
  33 #define RC_RAX     0x0004
  34 #define RC_RCX     0x0008
  35 #define RC_RDX     0x0010
  36 #define RC_XMM0    0x0020
  37 #define RC_ST0     0x0040 /* only for long double */
  38 #define RC_IRET    RC_RAX /* function return: integer register */
  39 #define RC_LRET    RC_RDX /* function return: second integer register */
  40 #define RC_FRET    RC_XMM0 /* function return: float register */
  41
  42 /* pretty names for the registers */
  43 enum {
  44     TREG_RAX = 0,
  45     TREG_RCX = 1,
  46     TREG_RDX = 2,
  47     TREG_RSI = 6,
  48     TREG_RDI = 7,
  49     TREG_R8  = 8,
  50     TREG_R9  = 9,
  51     TREG_R10 = 10,
  52     TREG_R11 = 11,
  53
  54     TREG_XMM0 = 3,
  55     TREG_ST0 = 4,
  56
  57     TREG_MEM = 0x10,
  58 };
  59
  60 #define REX_BASE(reg) (((reg) >> 3) & 1)
  61 #define REG_VALUE(reg) ((reg) & 7)
  62
  63 const int reg_classes[NB_REGS] = {
  64     /* eax */ RC_INT | RC_RAX,
  65     /* ecx */ RC_INT | RC_RCX,
  66     /* edx */ RC_INT | RC_RDX,
  67     /* xmm0 */ RC_FLOAT | RC_XMM0,
  68     /* st0 */ RC_ST0,
  69 };
  70
  71 /* return registers for function */
  72 #define REG_IRET TREG_RAX /* single word int return register */
  73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
  74 #define REG_FRET TREG_XMM0 /* float return register */
  75
  76 /* defined if function parameters must be evaluated in reverse order */
  77 #define INVERT_FUNC_PARAMS
  78
  79 /* pointer size, in bytes */
  80 #define PTR_SIZE 8
  81
  82 /* long double size and alignment, in bytes */
  83 #define LDOUBLE_SIZE  16
  84 #define LDOUBLE_ALIGN 8
  85 /* maximum alignment (for aligned attribute support) */
  86 #define MAX_ALIGN     8
  87
  88 /******************************************************/
  89 /* ELF defines */
  90
  91 #define EM_TCC_TARGET EM_X86_64
  92
  93 /* relocation type for 32 bit data relocation */
  94 #define R_DATA_32   R_X86_64_32
  95 #define R_DATA_PTR  R_X86_64_64
  96 #define R_JMP_SLOT  R_X86_64_JUMP_SLOT
  97 #define R_COPY      R_X86_64_COPY
  98
  99 #define ELF_START_ADDR 0x08048000
 100 #define ELF_PAGE_SIZE  0x1000
 101
 102 /******************************************************/
 103
 104 static unsigned long func_sub_sp_offset;
 105 static int func_ret_sub;
 106
 107 /* XXX: make it faster ? */
 108 void g(int c)
 109 {
 110     int ind1;
 111     ind1 = ind + 1;
 112     if (ind1 > cur_text_section->data_allocated)
 113         section_realloc(cur_text_section, ind1);
 114     cur_text_section->data[ind] = c;
 115     ind = ind1;
 116 }
 117
 118 void o(unsigned int c)
 119 {
 120     while (c) {
 121         g(c);
 122         c = c >> 8;
 123     }
 124 }
 125
 126 void gen_le32(int c)
 127 {
 128     g(c);
 129     g(c >> 8);
 130     g(c >> 16);
 131     g(c >> 24);
 132 }
 133
 134 void gen_le64(int64_t c)
 135 {
 136     g(c);
 137     g(c >> 8);
 138     g(c >> 16);
 139     g(c >> 24);
 140     g(c >> 32);
 141     g(c >> 40);
 142     g(c >> 48);
 143     g(c >> 56);
 144 }
 145
 146 /* output a symbol and patch all calls to it */
 147 void gsym_addr(int t, int a)
 148 {
 149     int n, *ptr;
 150     while (t) {
 151         ptr = (int *)(cur_text_section->data + t);
 152         n = *ptr; /* next value */
 153         *ptr = a - t - 4;
 154         t = n;
 155     }
 156 }
 157
 158 void gsym(int t)
 159 {
 160     gsym_addr(t, ind);
 161 }
 162
 163 /* psym is used to put an instruction with a data field which is a
 164    reference to a symbol. It is in fact the same as oad ! */
 165 #define psym oad
 166
 167 static int is64_type(int t)
 168 {
 169     return ((t & VT_BTYPE) == VT_PTR ||
 170             (t & VT_BTYPE) == VT_FUNC ||
 171             (t & VT_BTYPE) == VT_LLONG);
 172 }
 173
 174 static int is_sse_float(int t) {
 175     int bt;
 176     bt = t & VT_BTYPE;
 177     return bt == VT_DOUBLE || bt == VT_FLOAT;
 178 }
 179
 180 /* instruction + 4 bytes data. Return the address of the data */
 181 static int oad(int c, int s)
 182 {
 183     int ind1;
 184
 185     o(c);
 186     ind1 = ind + 4;
 187     if (ind1 > cur_text_section->data_allocated)
 188         section_realloc(cur_text_section, ind1);
 189     *(int *)(cur_text_section->data + ind) = s;
 190     s = ind;
 191     ind = ind1;
 192     return s;
 193 }
 194
 195 #if 0
 196 /* output constant with relocation if 'r & VT_SYM' is true */
 197 static void gen_addr64(int r, Sym *sym, int64_t c)
 198 {
 199     if (r & VT_SYM)
 200         greloc(cur_text_section, sym, ind, R_X86_64_64);
 201     gen_le64(c);
 202 }
 203 #endif
 204
 205 /* output constant with relocation if 'r & VT_SYM' is true */
 206 static void gen_addrpc32(int r, Sym *sym, int c)
 207 {
 208     if (r & VT_SYM)
 209         greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 210     gen_le32(c-4);
 211 }
 212
 213 /* output got address with relocation */
 214 static void gen_gotpcrel(int r, Sym *sym, int c)
 215 {
 216 #ifndef TCC_TARGET_PE
 217     Section *sr;
 218     ElfW(Rela) *rel;
 219     greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
 220     sr = cur_text_section->reloc;
 221     rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
 222     rel->r_addend = -4;
 223 #else
 224     printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
 225         cur_text_section->data[ind-3],
 226         cur_text_section->data[ind-2],
 227         cur_text_section->data[ind-1]
 228         );
 229     greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 230 #endif
 231     gen_le32(0);
 232
 233     if (c) {
 234         /* we use add c, %xxx for displacement */
 235         o(0x48 + REX_BASE(r));
 236         o(0x81);
 237         o(0xc0 + REG_VALUE(r));
 238         gen_le32(c);
 239     }
 240 }
 241
 242 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
 243 {
 244     op_reg = REG_VALUE(op_reg) << 3;
 245     if ((r & VT_VALMASK) == VT_CONST) {
 246         /* constant memory reference */
 247         o(0x05 | op_reg);
 248         if (is_got) {
 249             gen_gotpcrel(r, sym, c);
 250         } else {
 251             gen_addrpc32(r, sym, c);
 252         }
 253     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 254         /* currently, we use only ebp as base */
 255         if (c == (char)c) {
 256             /* short reference */
 257             o(0x45 | op_reg);
 258             g(c);
 259         } else {
 260             oad(0x85 | op_reg, c);
 261         }
 262     } else if ((r & VT_VALMASK) >= TREG_MEM) {
 263         if (c) {
 264             g(0x80 | op_reg | REG_VALUE(r));
 265             gen_le32(c);
 266         } else {
 267             g(0x00 | op_reg | REG_VALUE(r));
 268         }
 269     } else {
 270         g(0x00 | op_reg | (r & VT_VALMASK));
 271     }
 272 }
 273
 274 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 275    opcode bits */
 276 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
 277 {
 278     gen_modrm_impl(op_reg, r, sym, c, 0);
 279 }
 280
 281 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 282    opcode bits */
 283 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
 284 {
 285     int is_got;
 286     int rex = 0x48 | (REX_BASE(op_reg) << 2);
 287     if ((r & VT_VALMASK) != VT_CONST &&
 288         (r & VT_VALMASK) != VT_LOCAL) {
 289         rex |= REX_BASE(VT_VALMASK & r);
 290     }
 291     o(rex);
 292     o(opcode);
 293     is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
 294     gen_modrm_impl(op_reg, r, sym, c, is_got);
 295 }
 296
 297
 298 /* load 'r' from value 'sv' */
 299 void load(int r, SValue *sv)
 300 {
 301     int v, t, ft, fc, fr;
 302     SValue v1;
 303
 304     fr = sv->r;
 305     ft = sv->type.t;
 306     fc = sv->c.ul;
 307
 308 #ifndef TCC_TARGET_PE
 309     /* we use indirect access via got */
 310     if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
 311         (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
 312         /* use the result register as a temporal register */
 313         int tr = r | TREG_MEM;
 314         if (is_float(ft)) {
 315             /* we cannot use float registers as a temporal register */
 316             tr = get_reg(RC_INT) | TREG_MEM;
 317         }
 318         gen_modrm64(0x8b, tr, fr, sv->sym, 0);
 319
 320         /* load from the temporal register */
 321         fr = tr | VT_LVAL;
 322     }
 323 #endif
 324
 325     v = fr & VT_VALMASK;
 326     if (fr & VT_LVAL) {
 327         if (v == VT_LLOCAL) {
 328             v1.type.t = VT_PTR;
 329             v1.r = VT_LOCAL | VT_LVAL;
 330             v1.c.ul = fc;
 331             load(r, &v1);
 332             fr = r;
 333         }
 334         if ((ft & VT_BTYPE) == VT_FLOAT) {
 335             o(0x6e0f66); /* movd */
 336             r = 0;
 337         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
 338             o(0x7e0ff3); /* movq */
 339             r = 0;
 340         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 341             o(0xdb); /* fldt */
 342             r = 5;
 343         } else if ((ft & VT_TYPE) == VT_BYTE) {
 344             o(0xbe0f);   /* movsbl */
 345         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
 346             o(0xb60f);   /* movzbl */
 347         } else if ((ft & VT_TYPE) == VT_SHORT) {
 348             o(0xbf0f);   /* movswl */
 349         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
 350             o(0xb70f);   /* movzwl */
 351         } else if (is64_type(ft)) {
 352             gen_modrm64(0x8b, r, fr, sv->sym, fc);
 353             return;
 354         } else {
 355             o(0x8b);   /* movl */
 356         }
 357         gen_modrm(r, fr, sv->sym, fc);
 358     } else {
 359         if (v == VT_CONST) {
 360             if (fr & VT_SYM) {
 361 #ifdef TCC_TARGET_PE
 362                 o(0x8d48);
 363                 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 364                 gen_addrpc32(fr, sv->sym, fc);
 365 #else
 366                 if (sv->sym->type.t & VT_STATIC) {
 367                     o(0x8d48);
 368                     o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 369                     gen_addrpc32(fr, sv->sym, fc);
 370                 } else {
 371                     o(0x8b48);
 372                     o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
 373                     gen_gotpcrel(r, sv->sym, fc);
 374                 }
 375 #endif
 376             } else if (is64_type(ft)) {
 377                 o(0x48);
 378                 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 379                 gen_le64(sv->c.ull);
 380             } else {
 381                 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 382                 gen_le32(fc);
 383             }
 384         } else if (v == VT_LOCAL) {
 385             o(0x48 | REX_BASE(r));
 386             o(0x8d); /* lea xxx(%ebp), r */
 387             gen_modrm(r, VT_LOCAL, sv->sym, fc);
 388         } else if (v == VT_CMP) {
 389             oad(0xb8 + r, 0); /* mov $0, r */
 390             o(0x0f); /* setxx %br */
 391             o(fc);
 392             o(0xc0 + r);
 393         } else if (v == VT_JMP || v == VT_JMPI) {
 394             t = v & 1;
 395             oad(0xb8 + r, t); /* mov $1, r */
 396             o(0x05eb); /* jmp after */
 397             gsym(fc);
 398             oad(0xb8 + r, t ^ 1); /* mov $0, r */
 399         } else if (v != r) {
 400             if (r == TREG_XMM0) {
 401                 assert(v == TREG_ST0);
 402                 /* gen_cvt_ftof(VT_DOUBLE); */
 403                 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
 404                 /* movsd -0x10(%rsp),%xmm0 */
 405                 o(0x44100ff2);
 406                 o(0xf024);
 407             } else if (r == TREG_ST0) {
 408                 assert(v == TREG_XMM0);
 409                 /* gen_cvt_ftof(VT_LDOUBLE); */
 410                 /* movsd %xmm0,-0x10(%rsp) */
 411                 o(0x44110ff2);
 412                 o(0xf024);
 413                 o(0xf02444dd); /* fldl -0x10(%rsp) */
 414             } else {
 415                 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
 416                 o(0x89);
 417                 o(0xc0 + r + v * 8); /* mov v, r */
 418             }
 419         }
 420     }
 421 }
 422
 423 /* store register 'r' in lvalue 'v' */
 424 void store(int r, SValue *v)
 425 {
 426     int fr, bt, ft, fc;
 427     int op64 = 0;
 428     /* store the REX prefix in this variable when PIC is enabled */
 429     int pic = 0;
 430
 431     ft = v->type.t;
 432     fc = v->c.ul;
 433     fr = v->r & VT_VALMASK;
 434     bt = ft & VT_BTYPE;
 435
 436 #ifndef TCC_TARGET_PE
 437     /* we need to access the variable via got */
 438     if (fr == VT_CONST && (v->r & VT_SYM)) {
 439         /* mov xx(%rip), %r11 */
 440         o(0x1d8b4c);
 441         gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
 442         pic = is64_type(bt) ? 0x49 : 0x41;
 443     }
 444 #endif
 445
 446     /* XXX: incorrect if float reg to reg */
 447     if (bt == VT_FLOAT) {
 448         o(0x66);
 449         o(pic);
 450         o(0x7e0f); /* movd */
 451         r = 0;
 452     } else if (bt == VT_DOUBLE) {
 453         o(0x66);
 454         o(pic);
 455         o(0xd60f); /* movq */
 456         r = 0;
 457     } else if (bt == VT_LDOUBLE) {
 458         o(0xc0d9); /* fld %st(0) */
 459         o(pic);
 460         o(0xdb); /* fstpt */
 461         r = 7;
 462     } else {
 463         if (bt == VT_SHORT)
 464             o(0x66);
 465         o(pic);
 466         if (bt == VT_BYTE || bt == VT_BOOL)
 467             o(0x88);
 468         else if (is64_type(bt))
 469             op64 = 0x89;
 470         else
 471             o(0x89);
 472     }
 473     if (pic) {
 474         /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
 475         if (op64)
 476             o(op64);
 477         o(3 + (r << 3));
 478     } else if (op64) {
 479         if (fr == VT_CONST ||
 480             fr == VT_LOCAL ||
 481             (v->r & VT_LVAL)) {
 482             gen_modrm64(op64, r, v->r, v->sym, fc);
 483         } else if (fr != r) {
 484             /* XXX: don't we really come here? */
 485             abort();
 486             o(0xc0 + fr + r * 8); /* mov r, fr */
 487         }
 488     } else {
 489         if (fr == VT_CONST ||
 490             fr == VT_LOCAL ||
 491             (v->r & VT_LVAL)) {
 492             gen_modrm(r, v->r, v->sym, fc);
 493         } else if (fr != r) {
 494             /* XXX: don't we really come here? */
 495             abort();
 496             o(0xc0 + fr + r * 8); /* mov r, fr */
 497         }
 498     }
 499 }
 500
 501 static void gadd_sp(int val)
 502 {
 503     if (val == (char)val) {
 504         o(0xc48348);
 505         g(val);
 506     } else {
 507         oad(0xc48148, val); /* add $xxx, %rsp */
 508     }
 509 }
 510
 511 /* 'is_jmp' is '1' if it is a jump */
 512 static void gcall_or_jmp(int is_jmp)
 513 {
 514     int r;
 515     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 516         /* constant case */
 517         if (vtop->r & VT_SYM) {
 518             /* relocation case */
 519             greloc(cur_text_section, vtop->sym,
 520                    ind + 1, R_X86_64_PC32);
 521         } else {
 522             /* put an empty PC32 relocation */
 523             put_elf_reloc(symtab_section, cur_text_section,
 524                           ind + 1, R_X86_64_PC32, 0);
 525         }
 526         oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
 527     } else {
 528         /* otherwise, indirect call */
 529         r = TREG_R11;
 530         load(r, vtop);
 531         o(0x41); /* REX */
 532         o(0xff); /* call/jmp *r */
 533         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
 534     }
 535 }
 536
 537 #ifdef TCC_TARGET_PE
 538 #define REGN 4
 539 static const uint8_t arg_regs[] = {
 540     TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
 541 };
 542 #else
 543 #define REGN 6
 544 static const uint8_t arg_regs[REGN] = {
 545     TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
 546 };
 547 #endif
 548
 549 /* Generate function call. The function address is pushed first, then
 550    all the parameters in call order. This functions pops all the
 551    parameters and the function address. */
 552 void gfunc_call(int nb_args)
 553 {
 554     int size, align, r, args_size, i;
 555     SValue *orig_vtop;
 556     int nb_reg_args = 0;
 557     int nb_sse_args = 0;
 558     int sse_reg, gen_reg;
 559
 560     /* calculate the number of integer/float arguments */
 561     args_size = 0;
 562     for(i = 0; i < nb_args; i++) {
 563         if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
 564             args_size += type_size(&vtop->type, &align);
 565         } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
 566             args_size += 16;
 567 #ifndef TCC_TARGET_PE
 568         } else if (is_sse_float(vtop[-i].type.t)) {
 569             nb_sse_args++;
 570             if (nb_sse_args > 8) args_size += 8;
 571 #endif
 572         } else {
 573             nb_reg_args++;
 574             if (nb_reg_args > REGN) args_size += 8;
 575         }
 576     }
 577
 578     /* for struct arguments, we need to call memcpy and the function
 579        call breaks register passing arguments we are preparing.
 580        So, we process arguments which will be passed by stack first. */
 581     orig_vtop = vtop;
 582     gen_reg = nb_reg_args;
 583     sse_reg = nb_sse_args;
 584
 585 #ifdef TCC_TARGET_PE
 586     save_regs(0); /* save used temporary registers */
 587 #endif
 588
 589     /* adjust stack to align SSE boundary */
 590     if (args_size &= 8) {
 591         o(0x50); /* push $rax */
 592     }
 593     for(i = 0; i < nb_args; i++) {
 594         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 595             size = type_size(&vtop->type, &align);
 596             /* align to stack align size */
 597             size = (size + 3) & ~3;
 598             /* allocate the necessary size on stack */
 599             o(0x48);
 600             oad(0xec81, size); /* sub $xxx, %rsp */
 601             /* generate structure store */
 602             r = get_reg(RC_INT);
 603             o(0x48 + REX_BASE(r));
 604             o(0x89); /* mov %rsp, r */
 605             o(0xe0 + r);
 606             {
 607                 /* following code breaks vtop[1] */
 608                 SValue tmp = vtop[1];
 609                 vset(&vtop->type, r | VT_LVAL, 0);
 610                 vswap();
 611                 vstore();
 612                 vtop[1] = tmp;
 613             }
 614             args_size += size;
 615         } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 616             gv(RC_ST0);
 617             size = LDOUBLE_SIZE;
 618             oad(0xec8148, size); /* sub $xxx, %rsp */
 619             o(0x7cdb); /* fstpt 0(%rsp) */
 620             g(0x24);
 621             g(0x00);
 622             args_size += size;
 623         } else if (is_sse_float(vtop->type.t)) {
 624 #ifdef TCC_TARGET_PE
 625             int j = --gen_reg;
 626             if (j >= REGN) {
 627 #else
 628             int j = --sse_reg;
 629             if (j >= 8) {
 630 #endif
 631                 gv(RC_FLOAT);
 632                 o(0x50); /* push $rax */
 633                 /* movq %xmm0, (%rsp) */
 634                 o(0x04d60f66);
 635                 o(0x24);
 636                 args_size += 8;
 637             }
 638         } else {
 639             int j = --gen_reg;
 640             /* simple type */
 641             /* XXX: implicit cast ? */
 642             if (j >= REGN) {
 643                 r = gv(RC_INT);
 644                 o(0x50 + r); /* push r */
 645                 args_size += 8;
 646             }
 647         }
 648         vtop--;
 649     }
 650     vtop = orig_vtop;
 651
 652     /* then, we prepare register passing arguments.
 653        Note that we cannot set RDX and RCX in this loop because gv()
 654        may break these temporary registers. Let's use R10 and R11
 655        instead of them */
 656     gen_reg = nb_reg_args;
 657     sse_reg = nb_sse_args;
 658     for(i = 0; i < nb_args; i++) {
 659         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
 660             (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 661         } else if (is_sse_float(vtop->type.t)) {
 662 #ifdef TCC_TARGET_PE
 663             int j = --gen_reg;
 664             if (j < REGN) {
 665                 int d = arg_regs[j];
 666                 gv(RC_FLOAT); /* only one float register */
 667                 /* movaps %xmm0, %xmmN */
 668                 o(0x280f);
 669                 o(0xc0 + (j << 3));
 670                 o(0x50);
 671                 o(0xd60f66); /* movq %xmm0, (%rsp) */
 672                 o(0x2404 + (j << 3));
 673                 if (d < 8) {
 674                     o(0x58 + d); /* pop d */
 675                 } else {
 676                     o(0x58);
 677                     o(0xc08949 + d - 8);
 678                 }
 679             }
 680         } else {
 681             int j = --gen_reg;
 682             /* simple type */
 683             /* XXX: implicit cast ? */
 684             if (j < REGN) {
 685                 int d = arg_regs[j];
 686                 r = gv(RC_INT);
 687                 if (d != r) {
 688                     if (d < 8) {
 689                         o(0x8948); /* mov */
 690                         o(0xc0 + r * 8 + d);
 691                     } else {
 692                         o(0x8949); /* mov */
 693                         o(0xc0 + r * 8 + d - 8);
 694                     }
 695                 }
 696             }
 697 #else
 698             int j = --sse_reg;
 699             if (j < 8) {
 700                 gv(RC_FLOAT); /* only one float register */
 701                 /* movaps %xmm0, %xmmN */
 702                 o(0x280f);
 703                 o(0xc0 + (sse_reg << 3));
 704             }
 705         } else {
 706             int j = --gen_reg;
 707             /* simple type */
 708             /* XXX: implicit cast ? */
 709             if (j < REGN) {
 710                 r = gv(RC_INT);
 711                 if (j < 2) {
 712                     o(0x8948); /* mov */
 713                     o(0xc0 + r * 8 + arg_regs[j]);
 714                 } else if (j < 4) {
 715                     o(0x8949); /* mov */
 716                     /* j=2: r10, j=3: r11 */
 717                     o(0xc0 + r * 8 + j);
 718                 } else {
 719                     o(0x8949); /* mov */
 720                     /* j=4: r8, j=5: r9 */
 721                     o(0xc0 + r * 8 + j - 4);
 722                 }
 723             }
 724 #endif
 725         }
 726         vtop--;
 727     }
 728
 729 #ifdef TCC_TARGET_PE
 730     /* allocate scratch space */
 731     gadd_sp(-8*REGN);
 732     args_size += 8*REGN;
 733 #else
 734     save_regs(0); /* save used temporary registers */
 735
 736     /* Copy R10 and R11 into RDX and RCX, respectively */
 737     if (nb_reg_args > 2) {
 738         o(0xd2894c); /* mov %r10, %rdx */
 739         if (nb_reg_args > 3) {
 740             o(0xd9894c); /* mov %r11, %rcx */
 741         }
 742     }
 743
 744     oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
 745 #endif
 746     gcall_or_jmp(0);
 747     if (args_size)
 748         gadd_sp(args_size);
 749     vtop--;
 750 }
 751
 752 #define FUNC_PROLOG_SIZE 11
 753
 754 static void push_arg_reg(int i) {
 755     loc -= 8;
 756     gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
 757 }
 758
 759 /* generate function prolog of type 't' */
 760 void gfunc_prolog(CType *func_type)
 761 {
 762     int i, addr, align, size;
 763     int param_index, param_addr, reg_param_index, sse_param_index;
 764     Sym *sym;
 765     CType *type;
 766
 767     func_ret_sub = 0;
 768
 769     sym = func_type->ref;
 770     addr = PTR_SIZE * 2;
 771     loc = 0;
 772     ind += FUNC_PROLOG_SIZE;
 773     func_sub_sp_offset = ind;
 774
 775 #ifndef TCC_TARGET_PE
 776     if (func_type->ref->c == FUNC_ELLIPSIS) {
 777         int seen_reg_num, seen_sse_num, seen_stack_size;
 778         seen_reg_num = seen_sse_num = 0;
 779         /* frame pointer and return address */
 780         seen_stack_size = PTR_SIZE * 2;
 781         /* count the number of seen parameters */
 782         sym = func_type->ref;
 783         while ((sym = sym->next) != NULL) {
 784             type = &sym->type;
 785             if (is_sse_float(type->t)) {
 786                 if (seen_sse_num < 8) {
 787                     seen_sse_num++;
 788                 } else {
 789                     seen_stack_size += 8;
 790                 }
 791             } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
 792                 size = type_size(type, &align);
 793                 size = (size + 3) & ~3;
 794                 seen_stack_size += size;
 795             } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
 796                 seen_stack_size += LDOUBLE_SIZE;
 797             } else {
 798                 if (seen_reg_num < REGN) {
 799                     seen_reg_num++;
 800                 } else {
 801                     seen_stack_size += 8;
 802                 }
 803             }
 804         }
 805
 806         loc -= 16;
 807         /* movl $0x????????, -0x10(%rbp) */
 808         o(0xf045c7);
 809         gen_le32(seen_reg_num * 8);
 810         /* movl $0x????????, -0xc(%rbp) */
 811         o(0xf445c7);
 812         gen_le32(seen_sse_num * 16 + 48);
 813         /* movl $0x????????, -0x8(%rbp) */
 814         o(0xf845c7);
 815         gen_le32(seen_stack_size);
 816
 817         /* save all register passing arguments */
 818         for (i = 0; i < 8; i++) {
 819             loc -= 16;
 820             o(0xd60f66); /* movq */
 821             gen_modrm(7 - i, VT_LOCAL, NULL, loc);
 822             /* movq $0, loc+8(%rbp) */
 823             o(0x85c748);
 824             gen_le32(loc + 8);
 825             gen_le32(0);
 826         }
 827         for (i = 0; i < REGN; i++) {
 828             push_arg_reg(REGN-1-i);
 829         }
 830     }
 831 #endif
 832
 833     sym = func_type->ref;
 834     param_index = 0;
 835     reg_param_index = 0;
 836     sse_param_index = 0;
 837
 838     /* if the function returns a structure, then add an
 839        implicit pointer parameter */
 840     func_vt = sym->type;
 841     if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
 842         push_arg_reg(reg_param_index);
 843         param_addr = loc;
 844
 845         func_vc = loc;
 846         param_index++;
 847         reg_param_index++;
 848     }
 849     /* define parameters */
 850     while ((sym = sym->next) != NULL) {
 851         type = &sym->type;
 852         size = type_size(type, &align);
 853         size = (size + 3) & ~3;
 854 #ifndef TCC_TARGET_PE
 855         if (is_sse_float(type->t)) {
 856             if (sse_param_index < 8) {
 857                 /* save arguments passed by register */
 858                 loc -= 8;
 859                 o(0xd60f66); /* movq */
 860                 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
 861                 param_addr = loc;
 862             } else {
 863                 param_addr = addr;
 864                 addr += size;
 865             }
 866             sse_param_index++;
 867         } else
 868 #endif
 869         if ((type->t & VT_BTYPE) == VT_STRUCT ||
 870                    (type->t & VT_BTYPE) == VT_LDOUBLE) {
 871             param_addr = addr;
 872             addr += size;
 873         } else {
 874 #ifdef TCC_TARGET_PE
 875             if (reg_param_index < REGN) {
 876                 /* save arguments passed by register */
 877                 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
 878             }
 879             param_addr = addr;
 880             addr += 8;
 881 #else
 882             if (reg_param_index < REGN) {
 883                 /* save arguments passed by register */
 884                 push_arg_reg(reg_param_index);
 885                 param_addr = loc;
 886             } else {
 887                 param_addr = addr;
 888                 addr += 8;
 889             }
 890 #endif
 891             reg_param_index++;
 892         }
 893         sym_push(sym->v & ~SYM_FIELD, type,
 894                  VT_LOCAL | VT_LVAL, param_addr);
 895         param_index++;
 896     }
 897 #ifdef TCC_TARGET_PE
 898     if (func_type->ref->c == FUNC_ELLIPSIS) {
 899         for (i = reg_param_index; i < REGN; ++i) {
 900             gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, addr);
 901             addr += 8;
 902         }
 903     }
 904 #endif
 905 }
 906
 907 /* generate function epilog */
 908 void gfunc_epilog(void)
 909 {
 910     int v, saved_ind;
 911
 912     o(0xc9); /* leave */
 913     if (func_ret_sub == 0) {
 914         o(0xc3); /* ret */
 915     } else {
 916         o(0xc2); /* ret n */
 917         g(func_ret_sub);
 918         g(func_ret_sub >> 8);
 919     }
 920     /* align local size to word & save local variables */
 921     v = (-loc + 15) & -16;
 922     saved_ind = ind;
 923     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
 924 #ifdef TCC_TARGET_PE
 925     if (v >= 4096) {
 926         Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
 927         oad(0xb8, v); /* mov stacksize, %eax */
 928         oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
 929         greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
 930         o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
 931     } else
 932 #endif
 933     {
 934         o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
 935         o(0xec8148);  /* sub rsp, stacksize */
 936         gen_le32(v);
 937     }
 938     ind = saved_ind;
 939 }
 940
 941 /* generate a jump to a label */
 942 int gjmp(int t)
 943 {
 944     return psym(0xe9, t);
 945 }
 946
 947 /* generate a jump to a fixed address */
 948 void gjmp_addr(int a)
 949 {
 950     int r;
 951     r = a - ind - 2;
 952     if (r == (char)r) {
 953         g(0xeb);
 954         g(r);
 955     } else {
 956         oad(0xe9, a - ind - 5);
 957     }
 958 }
 959
 960 /* generate a test. set 'inv' to invert test. Stack entry is popped */
 961 int gtst(int inv, int t)
 962 {
 963     int v, *p;
 964
 965     v = vtop->r & VT_VALMASK;
 966     if (v == VT_CMP) {
 967         /* fast case : can jump directly since flags are set */
 968         g(0x0f);
 969         t = psym((vtop->c.i - 16) ^ inv, t);
 970     } else if (v == VT_JMP || v == VT_JMPI) {
 971         /* && or || optimization */
 972         if ((v & 1) == inv) {
 973             /* insert vtop->c jump list in t */
 974             p = &vtop->c.i;
 975             while (*p != 0)
 976                 p = (int *)(cur_text_section->data + *p);
 977             *p = t;
 978             t = vtop->c.i;
 979         } else {
 980             t = gjmp(t);
 981             gsym(vtop->c.i);
 982         }
 983     } else {
 984         if (is_float(vtop->type.t) ||
 985             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
 986             vpushi(0);
 987             gen_op(TOK_NE);
 988         }
 989         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 990             /* constant jmp optimization */
 991             if ((vtop->c.i != 0) != inv)
 992                 t = gjmp(t);
 993         } else {
 994             v = gv(RC_INT);
 995             o(0x85);
 996             o(0xc0 + v * 9);
 997             g(0x0f);
 998             t = psym(0x85 ^ inv, t);
 999         }
1000     }
1001     vtop--;
1002     return t;
1003 }
1004
1005 /* generate an integer binary operation */
1006 void gen_opi(int op)
1007 {
1008     int r, fr, opc, c;
1009
1010     switch(op) {
1011     case '+':
1012     case TOK_ADDC1: /* add with carry generation */
1013         opc = 0;
1014     gen_op8:
1015         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
1016             !is64_type(vtop->type.t)) {
1017             /* constant case */
1018             vswap();
1019             r = gv(RC_INT);
1020             if (is64_type(vtop->type.t)) {
1021                 o(0x48 | REX_BASE(r));
1022             }
1023             vswap();
1024             c = vtop->c.i;
1025             if (c == (char)c) {
1026                 /* XXX: generate inc and dec for smaller code ? */
1027                 o(0x83);
1028                 o(0xc0 | (opc << 3) | REG_VALUE(r));
1029                 g(c);
1030             } else {
1031                 o(0x81);
1032                 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1033             }
1034         } else {
1035             gv2(RC_INT, RC_INT);
1036             r = vtop[-1].r;
1037             fr = vtop[0].r;
1038             if (opc != 7 ||
1039                 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1040                 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1041                 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
1042             }
1043             o((opc << 3) | 0x01);
1044             o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1045         }
1046         vtop--;
1047         if (op >= TOK_ULT && op <= TOK_GT) {
1048             vtop->r = VT_CMP;
1049             vtop->c.i = op;
1050         }
1051         break;
1052     case '-':
1053     case TOK_SUBC1: /* sub with carry generation */
1054         opc = 5;
1055         goto gen_op8;
1056     case TOK_ADDC2: /* add with carry use */
1057         opc = 2;
1058         goto gen_op8;
1059     case TOK_SUBC2: /* sub with carry use */
1060         opc = 3;
1061         goto gen_op8;
1062     case '&':
1063         opc = 4;
1064         goto gen_op8;
1065     case '^':
1066         opc = 6;
1067         goto gen_op8;
1068     case '|':
1069         opc = 1;
1070         goto gen_op8;
1071     case '*':
1072         gv2(RC_INT, RC_INT);
1073         r = vtop[-1].r;
1074         fr = vtop[0].r;
1075         if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1076             is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1077             o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
1078         }
1079         vtop--;
1080         o(0xaf0f); /* imul fr, r */
1081         o(0xc0 + fr + r * 8);
1082         break;
1083     case TOK_SHL:
1084         opc = 4;
1085         goto gen_shift;
1086     case TOK_SHR:
1087         opc = 5;
1088         goto gen_shift;
1089     case TOK_SAR:
1090         opc = 7;
1091     gen_shift:
1092         opc = 0xc0 | (opc << 3);
1093         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1094             /* constant case */
1095             vswap();
1096             r = gv(RC_INT);
1097             if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1098                 o(0x48 | REX_BASE(r));
1099                 c = 0x3f;
1100             } else {
1101                 c = 0x1f;
1102             }
1103             vswap();
1104             c &= vtop->c.i;
1105             o(0xc1); /* shl/shr/sar $xxx, r */
1106             o(opc | r);
1107             g(c);
1108         } else {
1109             /* we generate the shift in ecx */
1110             gv2(RC_INT, RC_RCX);
1111             r = vtop[-1].r;
1112             if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
1113                 o(0x48 | REX_BASE(r));
1114             }
1115             o(0xd3); /* shl/shr/sar %cl, r */
1116             o(opc | r);
1117         }
1118         vtop--;
1119         break;
1120     case '/':
1121     case TOK_UDIV:
1122     case TOK_PDIV:
1123     case '%':
1124     case TOK_UMOD:
1125     case TOK_UMULL:
1126         /* first operand must be in eax */
1127         /* XXX: need better constraint for second operand */
1128         gv2(RC_RAX, RC_RCX);
1129         r = vtop[-1].r;
1130         fr = vtop[0].r;
1131         vtop--;
1132         save_reg(TREG_RDX);
1133         if (op == TOK_UMULL) {
1134             o(0xf7); /* mul fr */
1135             o(0xe0 + fr);
1136             vtop->r2 = TREG_RDX;
1137             r = TREG_RAX;
1138         } else {
1139             if (op == TOK_UDIV || op == TOK_UMOD) {
1140                 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
1141                 o(0xf0 + fr);
1142             } else {
1143                 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1144                     o(0x9948); /* cqto */
1145                     o(0x48 + REX_BASE(fr));
1146                 } else {
1147                     o(0x99); /* cltd */
1148                 }
1149                 o(0xf7); /* idiv fr, %eax */
1150                 o(0xf8 + fr);
1151             }
1152             if (op == '%' || op == TOK_UMOD)
1153                 r = TREG_RDX;
1154             else
1155                 r = TREG_RAX;
1156         }
1157         vtop->r = r;
1158         break;
1159     default:
1160         opc = 7;
1161         goto gen_op8;
1162     }
1163 }
1164
1165 void gen_opl(int op)
1166 {
1167     gen_opi(op);
1168 }
1169
1170 /* generate a floating point operation 'v = t1 op t2' instruction. The
1171    two operands are guaranted to have the same floating point type */
1172 /* XXX: need to use ST1 too */
1173 void gen_opf(int op)
1174 {
1175     int a, ft, fc, swapped, r;
1176     int float_type =
1177         (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1178
1179     /* convert constants to memory references */
1180     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1181         vswap();
1182         gv(float_type);
1183         vswap();
1184     }
1185     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1186         gv(float_type);
1187
1188     /* must put at least one value in the floating point register */
1189     if ((vtop[-1].r & VT_LVAL) &&
1190         (vtop[0].r & VT_LVAL)) {
1191         vswap();
1192         gv(float_type);
1193         vswap();
1194     }
1195     swapped = 0;
1196     /* swap the stack if needed so that t1 is the register and t2 is
1197        the memory reference */
1198     if (vtop[-1].r & VT_LVAL) {
1199         vswap();
1200         swapped = 1;
1201     }
1202     if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1203         if (op >= TOK_ULT && op <= TOK_GT) {
1204             /* load on stack second operand */
1205             load(TREG_ST0, vtop);
1206             save_reg(TREG_RAX); /* eax is used by FP comparison code */
1207             if (op == TOK_GE || op == TOK_GT)
1208                 swapped = !swapped;
1209             else if (op == TOK_EQ || op == TOK_NE)
1210                 swapped = 0;
1211             if (swapped)
1212                 o(0xc9d9); /* fxch %st(1) */
1213             o(0xe9da); /* fucompp */
1214             o(0xe0df); /* fnstsw %ax */
1215             if (op == TOK_EQ) {
1216                 o(0x45e480); /* and $0x45, %ah */
1217                 o(0x40fC80); /* cmp $0x40, %ah */
1218             } else if (op == TOK_NE) {
1219                 o(0x45e480); /* and $0x45, %ah */
1220                 o(0x40f480); /* xor $0x40, %ah */
1221                 op = TOK_NE;
1222             } else if (op == TOK_GE || op == TOK_LE) {
1223                 o(0x05c4f6); /* test $0x05, %ah */
1224                 op = TOK_EQ;
1225             } else {
1226                 o(0x45c4f6); /* test $0x45, %ah */
1227                 op = TOK_EQ;
1228             }
1229             vtop--;
1230             vtop->r = VT_CMP;
1231             vtop->c.i = op;
1232         } else {
1233             /* no memory reference possible for long double operations */
1234             load(TREG_ST0, vtop);
1235             swapped = !swapped;
1236
1237             switch(op) {
1238             default:
1239             case '+':
1240                 a = 0;
1241                 break;
1242             case '-':
1243                 a = 4;
1244                 if (swapped)
1245                     a++;
1246                 break;
1247             case '*':
1248                 a = 1;
1249                 break;
1250             case '/':
1251                 a = 6;
1252                 if (swapped)
1253                     a++;
1254                 break;
1255             }
1256             ft = vtop->type.t;
1257             fc = vtop->c.ul;
1258             o(0xde); /* fxxxp %st, %st(1) */
1259             o(0xc1 + (a << 3));
1260             vtop--;
1261         }
1262     } else {
1263         if (op >= TOK_ULT && op <= TOK_GT) {
1264             /* if saved lvalue, then we must reload it */
1265             r = vtop->r;
1266             fc = vtop->c.ul;
1267             if ((r & VT_VALMASK) == VT_LLOCAL) {
1268                 SValue v1;
1269                 r = get_reg(RC_INT);
1270                 v1.type.t = VT_INT;
1271                 v1.r = VT_LOCAL | VT_LVAL;
1272                 v1.c.ul = fc;
1273                 load(r, &v1);
1274                 fc = 0;
1275             }
1276
1277             if (op == TOK_EQ || op == TOK_NE) {
1278                 swapped = 0;
1279             } else {
1280                 if (op == TOK_LE || op == TOK_LT)
1281                     swapped = !swapped;
1282                 if (op == TOK_LE || op == TOK_GE) {
1283                     op = 0x93; /* setae */
1284                 } else {
1285                     op = 0x97; /* seta */
1286                 }
1287             }
1288
1289             if (swapped) {
1290                 o(0x7e0ff3); /* movq */
1291                 gen_modrm(1, r, vtop->sym, fc);
1292
1293                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1294                     o(0x66);
1295                 }
1296                 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1297                 o(0xc8);
1298             } else {
1299                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1300                     o(0x66);
1301                 }
1302                 o(0x2e0f); /* ucomisd */
1303                 gen_modrm(0, r, vtop->sym, fc);
1304             }
1305
1306             vtop--;
1307             vtop->r = VT_CMP;
1308             vtop->c.i = op;
1309         } else {
1310             /* no memory reference possible for long double operations */
1311             if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1312                 load(TREG_XMM0, vtop);
1313                 swapped = !swapped;
1314             }
1315             switch(op) {
1316             default:
1317             case '+':
1318                 a = 0;
1319                 break;
1320             case '-':
1321                 a = 4;
1322                 break;
1323             case '*':
1324                 a = 1;
1325                 break;
1326             case '/':
1327                 a = 6;
1328                 break;
1329             }
1330             ft = vtop->type.t;
1331             fc = vtop->c.ul;
1332             if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1333                 o(0xde); /* fxxxp %st, %st(1) */
1334                 o(0xc1 + (a << 3));
1335             } else {
1336                 /* if saved lvalue, then we must reload it */
1337                 r = vtop->r;
1338                 if ((r & VT_VALMASK) == VT_LLOCAL) {
1339                     SValue v1;
1340                     r = get_reg(RC_INT);
1341                     v1.type.t = VT_INT;
1342                     v1.r = VT_LOCAL | VT_LVAL;
1343                     v1.c.ul = fc;
1344                     load(r, &v1);
1345                     fc = 0;
1346                 }
1347                 if (swapped) {
1348                     /* movq %xmm0,%xmm1 */
1349                     o(0x7e0ff3);
1350                     o(0xc8);
1351                     load(TREG_XMM0, vtop);
1352                     /* subsd  %xmm1,%xmm0 (f2 0f 5c c1) */
1353                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1354                         o(0xf2);
1355                     } else {
1356                         o(0xf3);
1357                     }
1358                     o(0x0f);
1359                     o(0x58 + a);
1360                     o(0xc1);
1361                 } else {
1362                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1363                         o(0xf2);
1364                     } else {
1365                         o(0xf3);
1366                     }
1367                     o(0x0f);
1368                     o(0x58 + a);
1369                     gen_modrm(0, r, vtop->sym, fc);
1370                 }
1371             }
1372             vtop--;
1373         }
1374     }
1375 }
1376
1377 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1378    and 'long long' cases. */
1379 void gen_cvt_itof(int t)
1380 {
1381     if ((t & VT_BTYPE) == VT_LDOUBLE) {
1382         save_reg(TREG_ST0);
1383         gv(RC_INT);
1384         if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1385             /* signed long long to float/double/long double (unsigned case
1386                is handled generically) */
1387             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1388             o(0x242cdf); /* fildll (%rsp) */
1389             o(0x08c48348); /* add $8, %rsp */
1390         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1391                    (VT_INT | VT_UNSIGNED)) {
1392             /* unsigned int to float/double/long double */
1393             o(0x6a); /* push $0 */
1394             g(0x00);
1395             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1396             o(0x242cdf); /* fildll (%rsp) */
1397             o(0x10c48348); /* add $16, %rsp */
1398         } else {
1399             /* int to float/double/long double */
1400             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1401             o(0x2404db); /* fildl (%rsp) */
1402             o(0x08c48348); /* add $8, %rsp */
1403         }
1404         vtop->r = TREG_ST0;
1405     } else {
1406         save_reg(TREG_XMM0);
1407         gv(RC_INT);
1408         o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1409         if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1410             (VT_INT | VT_UNSIGNED) ||
1411             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1412             o(0x48); /* REX */
1413         }
1414         o(0x2a0f);
1415         o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1416         vtop->r = TREG_XMM0;
1417     }
1418 }
1419
1420 /* convert from one floating point type to another */
1421 void gen_cvt_ftof(int t)
1422 {
1423     int ft, bt, tbt;
1424
1425     ft = vtop->type.t;
1426     bt = ft & VT_BTYPE;
1427     tbt = t & VT_BTYPE;
1428
1429     if (bt == VT_FLOAT) {
1430         gv(RC_FLOAT);
1431         if (tbt == VT_DOUBLE) {
1432             o(0xc0140f); /* unpcklps */
1433             o(0xc05a0f); /* cvtps2pd */
1434         } else if (tbt == VT_LDOUBLE) {
1435             /* movss %xmm0,-0x10(%rsp) */
1436             o(0x44110ff3);
1437             o(0xf024);
1438             o(0xf02444d9); /* flds -0x10(%rsp) */
1439             vtop->r = TREG_ST0;
1440         }
1441     } else if (bt == VT_DOUBLE) {
1442         gv(RC_FLOAT);
1443         if (tbt == VT_FLOAT) {
1444             o(0xc0140f66); /* unpcklpd */
1445             o(0xc05a0f66); /* cvtpd2ps */
1446         } else if (tbt == VT_LDOUBLE) {
1447             /* movsd %xmm0,-0x10(%rsp) */
1448             o(0x44110ff2);
1449             o(0xf024);
1450             o(0xf02444dd); /* fldl -0x10(%rsp) */
1451             vtop->r = TREG_ST0;
1452         }
1453     } else {
1454         gv(RC_ST0);
1455         if (tbt == VT_DOUBLE) {
1456             o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1457             /* movsd -0x10(%rsp),%xmm0 */
1458             o(0x44100ff2);
1459             o(0xf024);
1460             vtop->r = TREG_XMM0;
1461         } else if (tbt == VT_FLOAT) {
1462             o(0xf0245cd9); /* fstps -0x10(%rsp) */
1463             /* movss -0x10(%rsp),%xmm0 */
1464             o(0x44100ff3);
1465             o(0xf024);
1466             vtop->r = TREG_XMM0;
1467         }
1468     }
1469 }
1470
1471 /* convert fp to int 't' type */
1472 void gen_cvt_ftoi(int t)
1473 {
1474     int ft, bt, size, r;
1475     ft = vtop->type.t;
1476     bt = ft & VT_BTYPE;
1477     if (bt == VT_LDOUBLE) {
1478         gen_cvt_ftof(VT_DOUBLE);
1479         bt = VT_DOUBLE;
1480     }
1481
1482     gv(RC_FLOAT);
1483     if (t != VT_INT)
1484         size = 8;
1485     else
1486         size = 4;
1487
1488     r = get_reg(RC_INT);
1489     if (bt == VT_FLOAT) {
1490         o(0xf3);
1491     } else if (bt == VT_DOUBLE) {
1492         o(0xf2);
1493     } else {
1494         assert(0);
1495     }
1496     if (size == 8) {
1497         o(0x48 + REX_BASE(r));
1498     }
1499     o(0x2c0f); /* cvttss2si or cvttsd2si */
1500     o(0xc0 + (REG_VALUE(r) << 3));
1501     vtop->r = r;
1502 }
1503
1504 /* computed goto support */
1505 void ggoto(void)
1506 {
1507     gcall_or_jmp(1);
1508     vtop--;
1509 }
1510
1511 /* end of x86-64 code generator */
1512 /*************************************************************/