x86_64-gen.c

   1 /*
   2  *  x86-64 code generator for TCC
   3  *
   4  *  Copyright (c) 2008 Shinichiro Hamaji
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #include <assert.h>
  24
  25 /* number of available registers */
  26 #define NB_REGS             5
  27
  28 /* a register can belong to several classes. The classes must be
  29    sorted from more general to more precise (see gv2() code which does
  30    assumptions on it). */
  31 #define RC_INT     0x0001 /* generic integer register */
  32 #define RC_FLOAT   0x0002 /* generic float register */
  33 #define RC_RAX     0x0004
  34 #define RC_RCX     0x0008
  35 #define RC_RDX     0x0010
  36 #define RC_XMM0    0x0020
  37 #define RC_ST0     0x0040 /* only for long double */
  38 #define RC_IRET    RC_RAX /* function return: integer register */
  39 #define RC_LRET    RC_RDX /* function return: second integer register */
  40 #define RC_FRET    RC_XMM0 /* function return: float register */
  41
  42 /* pretty names for the registers */
  43 enum {
  44     TREG_RAX = 0,
  45     TREG_RCX = 1,
  46     TREG_RDX = 2,
  47     TREG_RSI = 6,
  48     TREG_RDI = 7,
  49     TREG_R8  = 8,
  50     TREG_R9  = 9,
  51     TREG_R10 = 10,
  52     TREG_R11 = 11,
  53
  54     TREG_XMM0 = 3,
  55     TREG_ST0 = 4,
  56
  57     TREG_MEM = 0x10,
  58 };
  59
  60 #define REX_BASE(reg) (((reg) >> 3) & 1)
  61 #define REG_VALUE(reg) ((reg) & 7)
  62
  63 const int reg_classes[NB_REGS] = {
  64     /* eax */ RC_INT | RC_RAX,
  65     /* ecx */ RC_INT | RC_RCX,
  66     /* edx */ RC_INT | RC_RDX,
  67     /* xmm0 */ RC_FLOAT | RC_XMM0,
  68     /* st0 */ RC_ST0,
  69 };
  70
  71 /* return registers for function */
  72 #define REG_IRET TREG_RAX /* single word int return register */
  73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
  74 #define REG_FRET TREG_XMM0 /* float return register */
  75
  76 /* defined if function parameters must be evaluated in reverse order */
  77 #define INVERT_FUNC_PARAMS
  78
  79 /* pointer size, in bytes */
  80 #define PTR_SIZE 8
  81
  82 /* long double size and alignment, in bytes */
  83 #define LDOUBLE_SIZE  16
  84 #define LDOUBLE_ALIGN 8
  85 /* maximum alignment (for aligned attribute support) */
  86 #define MAX_ALIGN     8
  87
  88 /******************************************************/
  89 /* ELF defines */
  90
  91 #define EM_TCC_TARGET EM_X86_64
  92
  93 /* relocation type for 32 bit data relocation */
  94 #define R_DATA_32   R_X86_64_32
  95 #define R_DATA_PTR  R_X86_64_64
  96 #define R_JMP_SLOT  R_X86_64_JUMP_SLOT
  97 #define R_COPY      R_X86_64_COPY
  98
  99 #define ELF_START_ADDR 0x08048000
 100 #define ELF_PAGE_SIZE  0x1000
 101
 102 /******************************************************/
 103
 104 static unsigned long func_sub_sp_offset;
 105 static int func_ret_sub;
 106
 107 /* XXX: make it faster ? */
 108 void g(int c)
 109 {
 110     int ind1;
 111     ind1 = ind + 1;
 112     if (ind1 > cur_text_section->data_allocated)
 113         section_realloc(cur_text_section, ind1);
 114     cur_text_section->data[ind] = c;
 115     ind = ind1;
 116 }
 117
 118 void o(unsigned int c)
 119 {
 120     while (c) {
 121         g(c);
 122         c = c >> 8;
 123     }
 124 }
 125
 126 void gen_le16(int v)
 127 {
 128     g(v);
 129     g(v >> 8);
 130 }
 131
 132 void gen_le32(int c)
 133 {
 134     g(c);
 135     g(c >> 8);
 136     g(c >> 16);
 137     g(c >> 24);
 138 }
 139
 140 void gen_le64(int64_t c)
 141 {
 142     g(c);
 143     g(c >> 8);
 144     g(c >> 16);
 145     g(c >> 24);
 146     g(c >> 32);
 147     g(c >> 40);
 148     g(c >> 48);
 149     g(c >> 56);
 150 }
 151
 152 /* output a symbol and patch all calls to it */
 153 void gsym_addr(int t, int a)
 154 {
 155     int n, *ptr;
 156     while (t) {
 157         ptr = (int *)(cur_text_section->data + t);
 158         n = *ptr; /* next value */
 159         *ptr = a - t - 4;
 160         t = n;
 161     }
 162 }
 163
 164 void gsym(int t)
 165 {
 166     gsym_addr(t, ind);
 167 }
 168
 169 /* psym is used to put an instruction with a data field which is a
 170    reference to a symbol. It is in fact the same as oad ! */
 171 #define psym oad
 172
 173 static int is64_type(int t)
 174 {
 175     return ((t & VT_BTYPE) == VT_PTR ||
 176             (t & VT_BTYPE) == VT_FUNC ||
 177             (t & VT_BTYPE) == VT_LLONG);
 178 }
 179
 180 static int is_sse_float(int t) {
 181     int bt;
 182     bt = t & VT_BTYPE;
 183     return bt == VT_DOUBLE || bt == VT_FLOAT;
 184 }
 185
 186 /* instruction + 4 bytes data. Return the address of the data */
 187 static int oad(int c, int s)
 188 {
 189     int ind1;
 190
 191     o(c);
 192     ind1 = ind + 4;
 193     if (ind1 > cur_text_section->data_allocated)
 194         section_realloc(cur_text_section, ind1);
 195     *(int *)(cur_text_section->data + ind) = s;
 196     s = ind;
 197     ind = ind1;
 198     return s;
 199 }
 200
 201 static void gen_addr32(int r, Sym *sym, int c)
 202 {
 203     if (r & VT_SYM)
 204         greloc(cur_text_section, sym, ind, R_X86_64_32);
 205     gen_le32(c);
 206 }
 207
 208 /* output constant with relocation if 'r & VT_SYM' is true */
 209 static void gen_addr64(int r, Sym *sym, int64_t c)
 210 {
 211     if (r & VT_SYM)
 212         greloc(cur_text_section, sym, ind, R_X86_64_64);
 213     gen_le64(c);
 214 }
 215
 216 /* output constant with relocation if 'r & VT_SYM' is true */
 217 static void gen_addrpc32(int r, Sym *sym, int c)
 218 {
 219     if (r & VT_SYM)
 220         greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 221     gen_le32(c-4);
 222 }
 223
 224 /* output got address with relocation */
 225 static void gen_gotpcrel(int r, Sym *sym, int c)
 226 {
 227 #ifndef TCC_TARGET_PE
 228     Section *sr;
 229     ElfW(Rela) *rel;
 230     greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
 231     sr = cur_text_section->reloc;
 232     rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
 233     rel->r_addend = -4;
 234 #else
 235     printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
 236         cur_text_section->data[ind-3],
 237         cur_text_section->data[ind-2],
 238         cur_text_section->data[ind-1]
 239         );
 240     greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 241 #endif
 242     gen_le32(0);
 243
 244     if (c) {
 245         /* we use add c, %xxx for displacement */
 246         o(0x48 + REX_BASE(r));
 247         o(0x81);
 248         o(0xc0 + REG_VALUE(r));
 249         gen_le32(c);
 250     }
 251 }
 252
 253 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
 254 {
 255     op_reg = REG_VALUE(op_reg) << 3;
 256     if ((r & VT_VALMASK) == VT_CONST) {
 257         /* constant memory reference */
 258         o(0x05 | op_reg);
 259         if (is_got) {
 260             gen_gotpcrel(r, sym, c);
 261         } else {
 262             gen_addrpc32(r, sym, c);
 263         }
 264     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 265         /* currently, we use only ebp as base */
 266         if (c == (char)c) {
 267             /* short reference */
 268             o(0x45 | op_reg);
 269             g(c);
 270         } else {
 271             oad(0x85 | op_reg, c);
 272         }
 273     } else if ((r & VT_VALMASK) >= TREG_MEM) {
 274         if (c) {
 275             g(0x80 | op_reg | REG_VALUE(r));
 276             gen_le32(c);
 277         } else {
 278             g(0x00 | op_reg | REG_VALUE(r));
 279         }
 280     } else {
 281         g(0x00 | op_reg | (r & VT_VALMASK));
 282     }
 283 }
 284
 285 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 286    opcode bits */
 287 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
 288 {
 289     gen_modrm_impl(op_reg, r, sym, c, 0);
 290 }
 291
 292 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 293    opcode bits */
 294 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
 295 {
 296     int is_got;
 297     int rex = 0x48 | (REX_BASE(op_reg) << 2);
 298     if ((r & VT_VALMASK) != VT_CONST &&
 299         (r & VT_VALMASK) != VT_LOCAL) {
 300         rex |= REX_BASE(VT_VALMASK & r);
 301     }
 302     o(rex);
 303     o(opcode);
 304     is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
 305     gen_modrm_impl(op_reg, r, sym, c, is_got);
 306 }
 307
 308
 309 /* load 'r' from value 'sv' */
 310 void load(int r, SValue *sv)
 311 {
 312     int v, t, ft, fc, fr;
 313     SValue v1;
 314
 315     fr = sv->r;
 316     ft = sv->type.t;
 317     fc = sv->c.ul;
 318
 319 #ifndef TCC_TARGET_PE
 320     /* we use indirect access via got */
 321     if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
 322         (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
 323         /* use the result register as a temporal register */
 324         int tr = r | TREG_MEM;
 325         if (is_float(ft)) {
 326             /* we cannot use float registers as a temporal register */
 327             tr = get_reg(RC_INT) | TREG_MEM;
 328         }
 329         gen_modrm64(0x8b, tr, fr, sv->sym, 0);
 330
 331         /* load from the temporal register */
 332         fr = tr | VT_LVAL;
 333     }
 334 #endif
 335
 336     v = fr & VT_VALMASK;
 337     if (fr & VT_LVAL) {
 338         if (v == VT_LLOCAL) {
 339             v1.type.t = VT_PTR;
 340             v1.r = VT_LOCAL | VT_LVAL;
 341             v1.c.ul = fc;
 342             load(r, &v1);
 343             fr = r;
 344         }
 345         if ((ft & VT_BTYPE) == VT_FLOAT) {
 346             o(0x6e0f66); /* movd */
 347             r = 0;
 348         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
 349             o(0x7e0ff3); /* movq */
 350             r = 0;
 351         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 352             o(0xdb); /* fldt */
 353             r = 5;
 354         } else if ((ft & VT_TYPE) == VT_BYTE) {
 355             o(0xbe0f);   /* movsbl */
 356         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
 357             o(0xb60f);   /* movzbl */
 358         } else if ((ft & VT_TYPE) == VT_SHORT) {
 359             o(0xbf0f);   /* movswl */
 360         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
 361             o(0xb70f);   /* movzwl */
 362         } else if (is64_type(ft)) {
 363             gen_modrm64(0x8b, r, fr, sv->sym, fc);
 364             return;
 365         } else {
 366             o(0x8b);   /* movl */
 367         }
 368         gen_modrm(r, fr, sv->sym, fc);
 369     } else {
 370         if (v == VT_CONST) {
 371             if (fr & VT_SYM) {
 372 #ifdef TCC_TARGET_PE
 373                 o(0x8d48);
 374                 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 375                 gen_addrpc32(fr, sv->sym, fc);
 376 #else
 377                 if (sv->sym->type.t & VT_STATIC) {
 378                     o(0x8d48);
 379                     o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 380                     gen_addrpc32(fr, sv->sym, fc);
 381                 } else {
 382                     o(0x8b48);
 383                     o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
 384                     gen_gotpcrel(r, sv->sym, fc);
 385                 }
 386 #endif
 387             } else if (is64_type(ft)) {
 388                 o(0x48);
 389                 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 390                 gen_le64(sv->c.ull);
 391             } else {
 392                 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 393                 gen_le32(fc);
 394             }
 395         } else if (v == VT_LOCAL) {
 396             o(0x48 | REX_BASE(r));
 397             o(0x8d); /* lea xxx(%ebp), r */
 398             gen_modrm(r, VT_LOCAL, sv->sym, fc);
 399         } else if (v == VT_CMP) {
 400             oad(0xb8 + r, 0); /* mov $0, r */
 401             o(0x0f); /* setxx %br */
 402             o(fc);
 403             o(0xc0 + r);
 404         } else if (v == VT_JMP || v == VT_JMPI) {
 405             t = v & 1;
 406             oad(0xb8 + r, t); /* mov $1, r */
 407             o(0x05eb); /* jmp after */
 408             gsym(fc);
 409             oad(0xb8 + r, t ^ 1); /* mov $0, r */
 410         } else if (v != r) {
 411             if (r == TREG_XMM0) {
 412                 assert(v == TREG_ST0);
 413                 /* gen_cvt_ftof(VT_DOUBLE); */
 414                 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
 415                 /* movsd -0x10(%rsp),%xmm0 */
 416                 o(0x44100ff2);
 417                 o(0xf024);
 418             } else if (r == TREG_ST0) {
 419                 assert(v == TREG_XMM0);
 420                 /* gen_cvt_ftof(VT_LDOUBLE); */
 421                 /* movsd %xmm0,-0x10(%rsp) */
 422                 o(0x44110ff2);
 423                 o(0xf024);
 424                 o(0xf02444dd); /* fldl -0x10(%rsp) */
 425             } else {
 426                 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
 427                 o(0x89);
 428                 o(0xc0 + r + v * 8); /* mov v, r */
 429             }
 430         }
 431     }
 432 }
 433
 434 /* store register 'r' in lvalue 'v' */
 435 void store(int r, SValue *v)
 436 {
 437     int fr, bt, ft, fc;
 438     int op64 = 0;
 439     /* store the REX prefix in this variable when PIC is enabled */
 440     int pic = 0;
 441
 442     ft = v->type.t;
 443     fc = v->c.ul;
 444     fr = v->r & VT_VALMASK;
 445     bt = ft & VT_BTYPE;
 446
 447 #ifndef TCC_TARGET_PE
 448     /* we need to access the variable via got */
 449     if (fr == VT_CONST && (v->r & VT_SYM)) {
 450         /* mov xx(%rip), %r11 */
 451         o(0x1d8b4c);
 452         gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
 453         pic = is64_type(bt) ? 0x49 : 0x41;
 454     }
 455 #endif
 456
 457     /* XXX: incorrect if float reg to reg */
 458     if (bt == VT_FLOAT) {
 459         o(0x66);
 460         o(pic);
 461         o(0x7e0f); /* movd */
 462         r = 0;
 463     } else if (bt == VT_DOUBLE) {
 464         o(0x66);
 465         o(pic);
 466         o(0xd60f); /* movq */
 467         r = 0;
 468     } else if (bt == VT_LDOUBLE) {
 469         o(0xc0d9); /* fld %st(0) */
 470         o(pic);
 471         o(0xdb); /* fstpt */
 472         r = 7;
 473     } else {
 474         if (bt == VT_SHORT)
 475             o(0x66);
 476         o(pic);
 477         if (bt == VT_BYTE || bt == VT_BOOL)
 478             o(0x88);
 479         else if (is64_type(bt))
 480             op64 = 0x89;
 481         else
 482             o(0x89);
 483     }
 484     if (pic) {
 485         /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
 486         if (op64)
 487             o(op64);
 488         o(3 + (r << 3));
 489     } else if (op64) {
 490         if (fr == VT_CONST ||
 491             fr == VT_LOCAL ||
 492             (v->r & VT_LVAL)) {
 493             gen_modrm64(op64, r, v->r, v->sym, fc);
 494         } else if (fr != r) {
 495             /* XXX: don't we really come here? */
 496             abort();
 497             o(0xc0 + fr + r * 8); /* mov r, fr */
 498         }
 499     } else {
 500         if (fr == VT_CONST ||
 501             fr == VT_LOCAL ||
 502             (v->r & VT_LVAL)) {
 503             gen_modrm(r, v->r, v->sym, fc);
 504         } else if (fr != r) {
 505             /* XXX: don't we really come here? */
 506             abort();
 507             o(0xc0 + fr + r * 8); /* mov r, fr */
 508         }
 509     }
 510 }
 511
 512 static void gadd_sp(int val)
 513 {
 514     if (val == (char)val) {
 515         o(0xc48348);
 516         g(val);
 517     } else {
 518         oad(0xc48148, val); /* add $xxx, %rsp */
 519     }
 520 }
 521
 522 /* 'is_jmp' is '1' if it is a jump */
 523 static void gcall_or_jmp(int is_jmp)
 524 {
 525     int r;
 526     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 527         /* constant case */
 528         if (vtop->r & VT_SYM) {
 529             /* relocation case */
 530             greloc(cur_text_section, vtop->sym,
 531                    ind + 1, R_X86_64_PC32);
 532         } else {
 533             /* put an empty PC32 relocation */
 534             put_elf_reloc(symtab_section, cur_text_section,
 535                           ind + 1, R_X86_64_PC32, 0);
 536         }
 537         oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
 538     } else {
 539         /* otherwise, indirect call */
 540         r = TREG_R11;
 541         load(r, vtop);
 542         o(0x41); /* REX */
 543         o(0xff); /* call/jmp *r */
 544         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
 545     }
 546 }
 547
 548 #ifdef TCC_TARGET_PE
 549 #define REGN 4
 550 static const uint8_t arg_regs[] = {
 551     TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
 552 };
 553 #else
 554 #define REGN 6
 555 static const uint8_t arg_regs[REGN] = {
 556     TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
 557 };
 558 #endif
 559
 560 /* Generate function call. The function address is pushed first, then
 561    all the parameters in call order. This functions pops all the
 562    parameters and the function address. */
 563 void gfunc_call(int nb_args)
 564 {
 565     int size, align, r, args_size, i;
 566     SValue *orig_vtop;
 567     int nb_reg_args = 0;
 568     int nb_sse_args = 0;
 569     int sse_reg, gen_reg;
 570
 571     /* calculate the number of integer/float arguments */
 572     args_size = 0;
 573     for(i = 0; i < nb_args; i++) {
 574         if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
 575             args_size += type_size(&vtop->type, &align);
 576         } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
 577             args_size += 16;
 578 #ifndef TCC_TARGET_PE
 579         } else if (is_sse_float(vtop[-i].type.t)) {
 580             nb_sse_args++;
 581             if (nb_sse_args > 8) args_size += 8;
 582 #endif
 583         } else {
 584             nb_reg_args++;
 585             if (nb_reg_args > REGN) args_size += 8;
 586         }
 587     }
 588
 589     /* for struct arguments, we need to call memcpy and the function
 590        call breaks register passing arguments we are preparing.
 591        So, we process arguments which will be passed by stack first. */
 592     orig_vtop = vtop;
 593     gen_reg = nb_reg_args;
 594     sse_reg = nb_sse_args;
 595
 596 #ifdef TCC_TARGET_PE
 597     save_regs(0); /* save used temporary registers */
 598 #endif
 599
 600     /* adjust stack to align SSE boundary */
 601     if (args_size &= 8) {
 602         o(0x50); /* push $rax */
 603     }
 604     for(i = 0; i < nb_args; i++) {
 605         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 606             size = type_size(&vtop->type, &align);
 607             /* align to stack align size */
 608             size = (size + 3) & ~3;
 609             /* allocate the necessary size on stack */
 610             o(0x48);
 611             oad(0xec81, size); /* sub $xxx, %rsp */
 612             /* generate structure store */
 613             r = get_reg(RC_INT);
 614             o(0x48 + REX_BASE(r));
 615             o(0x89); /* mov %rsp, r */
 616             o(0xe0 + r);
 617             {
 618                 /* following code breaks vtop[1] */
 619                 SValue tmp = vtop[1];
 620                 vset(&vtop->type, r | VT_LVAL, 0);
 621                 vswap();
 622                 vstore();
 623                 vtop[1] = tmp;
 624             }
 625             args_size += size;
 626         } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 627             gv(RC_ST0);
 628             size = LDOUBLE_SIZE;
 629             oad(0xec8148, size); /* sub $xxx, %rsp */
 630             o(0x7cdb); /* fstpt 0(%rsp) */
 631             g(0x24);
 632             g(0x00);
 633             args_size += size;
 634         } else if (is_sse_float(vtop->type.t)) {
 635 #ifdef TCC_TARGET_PE
 636             int j = --gen_reg;
 637             if (j >= REGN) {
 638 #else
 639             int j = --sse_reg;
 640             if (j >= 8) {
 641 #endif
 642                 gv(RC_FLOAT);
 643                 o(0x50); /* push $rax */
 644                 /* movq %xmm0, (%rsp) */
 645                 o(0x04d60f66);
 646                 o(0x24);
 647                 args_size += 8;
 648             }
 649         } else {
 650             int j = --gen_reg;
 651             /* simple type */
 652             /* XXX: implicit cast ? */
 653             if (j >= REGN) {
 654                 r = gv(RC_INT);
 655                 o(0x50 + r); /* push r */
 656                 args_size += 8;
 657             }
 658         }
 659         vtop--;
 660     }
 661     vtop = orig_vtop;
 662
 663     /* then, we prepare register passing arguments.
 664        Note that we cannot set RDX and RCX in this loop because gv()
 665        may break these temporary registers. Let's use R10 and R11
 666        instead of them */
 667     gen_reg = nb_reg_args;
 668     sse_reg = nb_sse_args;
 669     for(i = 0; i < nb_args; i++) {
 670         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
 671             (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 672         } else if (is_sse_float(vtop->type.t)) {
 673 #ifdef TCC_TARGET_PE
 674             int j = --gen_reg;
 675             if (j < REGN) {
 676                 int d = arg_regs[j];
 677                 gv(RC_FLOAT); /* only one float register */
 678                 /* movaps %xmm0, %xmmN */
 679                 o(0x280f);
 680                 o(0xc0 + (j << 3));
 681                 o(0x50);
 682                 o(0xd60f66); /* movq %xmm0, (%rsp) */
 683                 o(0x2404 + (j << 3));
 684                 if (d < 8) {
 685                     o(0x58 + d); /* pop d */
 686                 } else {
 687                     o(0x58);
 688                     o(0xc08949 + d - 8);
 689                 }
 690             }
 691         } else {
 692             int j = --gen_reg;
 693             /* simple type */
 694             /* XXX: implicit cast ? */
 695             if (j < REGN) {
 696                 int d = arg_regs[j];
 697                 r = gv(RC_INT);
 698                 if (d != r) {
 699                     if (d < 8) {
 700                         o(0x8948); /* mov */
 701                         o(0xc0 + r * 8 + d);
 702                     } else {
 703                         o(0x8949); /* mov */
 704                         o(0xc0 + r * 8 + d - 8);
 705                     }
 706                 }
 707             }
 708 #else
 709             int j = --sse_reg;
 710             if (j < 8) {
 711                 gv(RC_FLOAT); /* only one float register */
 712                 /* movaps %xmm0, %xmmN */
 713                 o(0x280f);
 714                 o(0xc0 + (sse_reg << 3));
 715             }
 716         } else {
 717             int j = --gen_reg;
 718             /* simple type */
 719             /* XXX: implicit cast ? */
 720             if (j < REGN) {
 721                 r = gv(RC_INT);
 722                 if (j < 2) {
 723                     o(0x8948); /* mov */
 724                     o(0xc0 + r * 8 + arg_regs[j]);
 725                 } else if (j < 4) {
 726                     o(0x8949); /* mov */
 727                     /* j=2: r10, j=3: r11 */
 728                     o(0xc0 + r * 8 + j);
 729                 } else {
 730                     o(0x8949); /* mov */
 731                     /* j=4: r8, j=5: r9 */
 732                     o(0xc0 + r * 8 + j - 4);
 733                 }
 734             }
 735 #endif
 736         }
 737         vtop--;
 738     }
 739
 740 #ifdef TCC_TARGET_PE
 741     /* allocate scratch space */
 742     gadd_sp(-8*REGN);
 743     args_size += 8*REGN;
 744 #else
 745     save_regs(0); /* save used temporary registers */
 746
 747     /* Copy R10 and R11 into RDX and RCX, respectively */
 748     if (nb_reg_args > 2) {
 749         o(0xd2894c); /* mov %r10, %rdx */
 750         if (nb_reg_args > 3) {
 751             o(0xd9894c); /* mov %r11, %rcx */
 752         }
 753     }
 754
 755     oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
 756 #endif
 757     gcall_or_jmp(0);
 758     if (args_size)
 759         gadd_sp(args_size);
 760     vtop--;
 761 }
 762
 763 #define FUNC_PROLOG_SIZE 11
 764
 765 static void push_arg_reg(int i) {
 766     loc -= 8;
 767     gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
 768 }
 769
 770 /* generate function prolog of type 't' */
 771 void gfunc_prolog(CType *func_type)
 772 {
 773     int i, addr, align, size;
 774     int param_index, param_addr, reg_param_index, sse_param_index;
 775     Sym *sym;
 776     CType *type;
 777
 778     func_ret_sub = 0;
 779
 780     sym = func_type->ref;
 781     addr = PTR_SIZE * 2;
 782     loc = 0;
 783     ind += FUNC_PROLOG_SIZE;
 784     func_sub_sp_offset = ind;
 785
 786 #ifndef TCC_TARGET_PE
 787     if (func_type->ref->c == FUNC_ELLIPSIS) {
 788         int seen_reg_num, seen_sse_num, seen_stack_size;
 789         seen_reg_num = seen_sse_num = 0;
 790         /* frame pointer and return address */
 791         seen_stack_size = PTR_SIZE * 2;
 792         /* count the number of seen parameters */
 793         sym = func_type->ref;
 794         while ((sym = sym->next) != NULL) {
 795             type = &sym->type;
 796             if (is_sse_float(type->t)) {
 797                 if (seen_sse_num < 8) {
 798                     seen_sse_num++;
 799                 } else {
 800                     seen_stack_size += 8;
 801                 }
 802             } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
 803                 size = type_size(type, &align);
 804                 size = (size + 3) & ~3;
 805                 seen_stack_size += size;
 806             } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
 807                 seen_stack_size += LDOUBLE_SIZE;
 808             } else {
 809                 if (seen_reg_num < REGN) {
 810                     seen_reg_num++;
 811                 } else {
 812                     seen_stack_size += 8;
 813                 }
 814             }
 815         }
 816
 817         loc -= 16;
 818         /* movl $0x????????, -0x10(%rbp) */
 819         o(0xf045c7);
 820         gen_le32(seen_reg_num * 8);
 821         /* movl $0x????????, -0xc(%rbp) */
 822         o(0xf445c7);
 823         gen_le32(seen_sse_num * 16 + 48);
 824         /* movl $0x????????, -0x8(%rbp) */
 825         o(0xf845c7);
 826         gen_le32(seen_stack_size);
 827
 828         /* save all register passing arguments */
 829         for (i = 0; i < 8; i++) {
 830             loc -= 16;
 831             o(0xd60f66); /* movq */
 832             gen_modrm(7 - i, VT_LOCAL, NULL, loc);
 833             /* movq $0, loc+8(%rbp) */
 834             o(0x85c748);
 835             gen_le32(loc + 8);
 836             gen_le32(0);
 837         }
 838         for (i = 0; i < REGN; i++) {
 839             push_arg_reg(REGN-1-i);
 840         }
 841     }
 842 #endif
 843
 844     sym = func_type->ref;
 845     param_index = 0;
 846     reg_param_index = 0;
 847     sse_param_index = 0;
 848
 849     /* if the function returns a structure, then add an
 850        implicit pointer parameter */
 851     func_vt = sym->type;
 852     if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
 853         push_arg_reg(reg_param_index);
 854         param_addr = loc;
 855
 856         func_vc = loc;
 857         param_index++;
 858         reg_param_index++;
 859     }
 860     /* define parameters */
 861     while ((sym = sym->next) != NULL) {
 862         type = &sym->type;
 863         size = type_size(type, &align);
 864         size = (size + 3) & ~3;
 865 #ifndef TCC_TARGET_PE
 866         if (is_sse_float(type->t)) {
 867             if (sse_param_index < 8) {
 868                 /* save arguments passed by register */
 869                 loc -= 8;
 870                 o(0xd60f66); /* movq */
 871                 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
 872                 param_addr = loc;
 873             } else {
 874                 param_addr = addr;
 875                 addr += size;
 876             }
 877             sse_param_index++;
 878         } else
 879 #endif
 880         if ((type->t & VT_BTYPE) == VT_STRUCT ||
 881                    (type->t & VT_BTYPE) == VT_LDOUBLE) {
 882             param_addr = addr;
 883             addr += size;
 884         } else {
 885 #ifdef TCC_TARGET_PE
 886             if (reg_param_index < REGN) {
 887                 /* save arguments passed by register */
 888                 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
 889             }
 890             param_addr = addr;
 891             addr += 8;
 892 #else
 893             if (reg_param_index < REGN) {
 894                 /* save arguments passed by register */
 895                 push_arg_reg(reg_param_index);
 896                 param_addr = loc;
 897             } else {
 898                 param_addr = addr;
 899                 addr += 8;
 900             }
 901 #endif
 902             reg_param_index++;
 903         }
 904         sym_push(sym->v & ~SYM_FIELD, type,
 905                  VT_LOCAL | VT_LVAL, param_addr);
 906         param_index++;
 907     }
 908 #ifdef TCC_TARGET_PE
 909     if (func_type->ref->c == FUNC_ELLIPSIS) {
 910         for (i = reg_param_index; i < REGN; ++i) {
 911             gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, addr);
 912             addr += 8;
 913         }
 914     }
 915 #endif
 916 }
 917
 918 /* generate function epilog */
 919 void gfunc_epilog(void)
 920 {
 921     int v, saved_ind;
 922
 923     o(0xc9); /* leave */
 924     if (func_ret_sub == 0) {
 925         o(0xc3); /* ret */
 926     } else {
 927         o(0xc2); /* ret n */
 928         g(func_ret_sub);
 929         g(func_ret_sub >> 8);
 930     }
 931     /* align local size to word & save local variables */
 932     v = (-loc + 15) & -16;
 933     saved_ind = ind;
 934     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
 935 #ifdef TCC_TARGET_PE
 936     if (v >= 4096) {
 937         Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
 938         oad(0xb8, v); /* mov stacksize, %eax */
 939         oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
 940         greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
 941         o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
 942     } else
 943 #endif
 944     {
 945         o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
 946         o(0xec8148);  /* sub rsp, stacksize */
 947         gen_le32(v);
 948     }
 949     ind = saved_ind;
 950 }
 951
 952 /* generate a jump to a label */
 953 int gjmp(int t)
 954 {
 955     return psym(0xe9, t);
 956 }
 957
 958 /* generate a jump to a fixed address */
 959 void gjmp_addr(int a)
 960 {
 961     int r;
 962     r = a - ind - 2;
 963     if (r == (char)r) {
 964         g(0xeb);
 965         g(r);
 966     } else {
 967         oad(0xe9, a - ind - 5);
 968     }
 969 }
 970
 971 /* generate a test. set 'inv' to invert test. Stack entry is popped */
 972 int gtst(int inv, int t)
 973 {
 974     int v, *p;
 975
 976     v = vtop->r & VT_VALMASK;
 977     if (v == VT_CMP) {
 978         /* fast case : can jump directly since flags are set */
 979         g(0x0f);
 980         t = psym((vtop->c.i - 16) ^ inv, t);
 981     } else if (v == VT_JMP || v == VT_JMPI) {
 982         /* && or || optimization */
 983         if ((v & 1) == inv) {
 984             /* insert vtop->c jump list in t */
 985             p = &vtop->c.i;
 986             while (*p != 0)
 987                 p = (int *)(cur_text_section->data + *p);
 988             *p = t;
 989             t = vtop->c.i;
 990         } else {
 991             t = gjmp(t);
 992             gsym(vtop->c.i);
 993         }
 994     } else {
 995         if (is_float(vtop->type.t) ||
 996             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
 997             vpushi(0);
 998             gen_op(TOK_NE);
 999         }
1000         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1001             /* constant jmp optimization */
1002             if ((vtop->c.i != 0) != inv)
1003                 t = gjmp(t);
1004         } else {
1005             v = gv(RC_INT);
1006             o(0x85);
1007             o(0xc0 + v * 9);
1008             g(0x0f);
1009             t = psym(0x85 ^ inv, t);
1010         }
1011     }
1012     vtop--;
1013     return t;
1014 }
1015
1016 /* generate an integer binary operation */
1017 void gen_opi(int op)
1018 {
1019     int r, fr, opc, c;
1020
1021     switch(op) {
1022     case '+':
1023     case TOK_ADDC1: /* add with carry generation */
1024         opc = 0;
1025     gen_op8:
1026         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
1027             !is64_type(vtop->type.t)) {
1028             /* constant case */
1029             vswap();
1030             r = gv(RC_INT);
1031             if (is64_type(vtop->type.t)) {
1032                 o(0x48 | REX_BASE(r));
1033             }
1034             vswap();
1035             c = vtop->c.i;
1036             if (c == (char)c) {
1037                 /* XXX: generate inc and dec for smaller code ? */
1038                 o(0x83);
1039                 o(0xc0 | (opc << 3) | REG_VALUE(r));
1040                 g(c);
1041             } else {
1042                 o(0x81);
1043                 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1044             }
1045         } else {
1046             gv2(RC_INT, RC_INT);
1047             r = vtop[-1].r;
1048             fr = vtop[0].r;
1049             if (opc != 7 ||
1050                 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1051                 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1052                 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
1053             }
1054             o((opc << 3) | 0x01);
1055             o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1056         }
1057         vtop--;
1058         if (op >= TOK_ULT && op <= TOK_GT) {
1059             vtop->r = VT_CMP;
1060             vtop->c.i = op;
1061         }
1062         break;
1063     case '-':
1064     case TOK_SUBC1: /* sub with carry generation */
1065         opc = 5;
1066         goto gen_op8;
1067     case TOK_ADDC2: /* add with carry use */
1068         opc = 2;
1069         goto gen_op8;
1070     case TOK_SUBC2: /* sub with carry use */
1071         opc = 3;
1072         goto gen_op8;
1073     case '&':
1074         opc = 4;
1075         goto gen_op8;
1076     case '^':
1077         opc = 6;
1078         goto gen_op8;
1079     case '|':
1080         opc = 1;
1081         goto gen_op8;
1082     case '*':
1083         gv2(RC_INT, RC_INT);
1084         r = vtop[-1].r;
1085         fr = vtop[0].r;
1086         if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1087             is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1088             o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
1089         }
1090         vtop--;
1091         o(0xaf0f); /* imul fr, r */
1092         o(0xc0 + fr + r * 8);
1093         break;
1094     case TOK_SHL:
1095         opc = 4;
1096         goto gen_shift;
1097     case TOK_SHR:
1098         opc = 5;
1099         goto gen_shift;
1100     case TOK_SAR:
1101         opc = 7;
1102     gen_shift:
1103         opc = 0xc0 | (opc << 3);
1104         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1105             /* constant case */
1106             vswap();
1107             r = gv(RC_INT);
1108             if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1109                 o(0x48 | REX_BASE(r));
1110                 c = 0x3f;
1111             } else {
1112                 c = 0x1f;
1113             }
1114             vswap();
1115             c &= vtop->c.i;
1116             o(0xc1); /* shl/shr/sar $xxx, r */
1117             o(opc | r);
1118             g(c);
1119         } else {
1120             /* we generate the shift in ecx */
1121             gv2(RC_INT, RC_RCX);
1122             r = vtop[-1].r;
1123             if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
1124                 o(0x48 | REX_BASE(r));
1125             }
1126             o(0xd3); /* shl/shr/sar %cl, r */
1127             o(opc | r);
1128         }
1129         vtop--;
1130         break;
1131     case '/':
1132     case TOK_UDIV:
1133     case TOK_PDIV:
1134     case '%':
1135     case TOK_UMOD:
1136     case TOK_UMULL:
1137         /* first operand must be in eax */
1138         /* XXX: need better constraint for second operand */
1139         gv2(RC_RAX, RC_RCX);
1140         r = vtop[-1].r;
1141         fr = vtop[0].r;
1142         vtop--;
1143         save_reg(TREG_RDX);
1144         if (op == TOK_UMULL) {
1145             o(0xf7); /* mul fr */
1146             o(0xe0 + fr);
1147             vtop->r2 = TREG_RDX;
1148             r = TREG_RAX;
1149         } else {
1150             if (op == TOK_UDIV || op == TOK_UMOD) {
1151                 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1152                     o(0xd23148); /* xor %rdx, %rdx */
1153                     o(0x48 + REX_BASE(fr));
1154                 } else {
1155                     o(0xd231); /* xor %edx, %edx */
1156                 }
1157                 o(0xf7); /* div fr, %eax */
1158                 o(0xf0 + fr);
1159             } else {
1160                 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1161                     o(0x9948); /* cqto */
1162                     o(0x48 + REX_BASE(fr));
1163                 } else {
1164                     o(0x99); /* cltd */
1165                 }
1166                 o(0xf7); /* idiv fr, %eax */
1167                 o(0xf8 + fr);
1168             }
1169             if (op == '%' || op == TOK_UMOD)
1170                 r = TREG_RDX;
1171             else
1172                 r = TREG_RAX;
1173         }
1174         vtop->r = r;
1175         break;
1176     default:
1177         opc = 7;
1178         goto gen_op8;
1179     }
1180 }
1181
1182 void gen_opl(int op)
1183 {
1184     gen_opi(op);
1185 }
1186
1187 /* generate a floating point operation 'v = t1 op t2' instruction. The
1188    two operands are guaranted to have the same floating point type */
1189 /* XXX: need to use ST1 too */
1190 void gen_opf(int op)
1191 {
1192     int a, ft, fc, swapped, r;
1193     int float_type =
1194         (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1195
1196     /* convert constants to memory references */
1197     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1198         vswap();
1199         gv(float_type);
1200         vswap();
1201     }
1202     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1203         gv(float_type);
1204
1205     /* must put at least one value in the floating point register */
1206     if ((vtop[-1].r & VT_LVAL) &&
1207         (vtop[0].r & VT_LVAL)) {
1208         vswap();
1209         gv(float_type);
1210         vswap();
1211     }
1212     swapped = 0;
1213     /* swap the stack if needed so that t1 is the register and t2 is
1214        the memory reference */
1215     if (vtop[-1].r & VT_LVAL) {
1216         vswap();
1217         swapped = 1;
1218     }
1219     if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1220         if (op >= TOK_ULT && op <= TOK_GT) {
1221             /* load on stack second operand */
1222             load(TREG_ST0, vtop);
1223             save_reg(TREG_RAX); /* eax is used by FP comparison code */
1224             if (op == TOK_GE || op == TOK_GT)
1225                 swapped = !swapped;
1226             else if (op == TOK_EQ || op == TOK_NE)
1227                 swapped = 0;
1228             if (swapped)
1229                 o(0xc9d9); /* fxch %st(1) */
1230             o(0xe9da); /* fucompp */
1231             o(0xe0df); /* fnstsw %ax */
1232             if (op == TOK_EQ) {
1233                 o(0x45e480); /* and $0x45, %ah */
1234                 o(0x40fC80); /* cmp $0x40, %ah */
1235             } else if (op == TOK_NE) {
1236                 o(0x45e480); /* and $0x45, %ah */
1237                 o(0x40f480); /* xor $0x40, %ah */
1238                 op = TOK_NE;
1239             } else if (op == TOK_GE || op == TOK_LE) {
1240                 o(0x05c4f6); /* test $0x05, %ah */
1241                 op = TOK_EQ;
1242             } else {
1243                 o(0x45c4f6); /* test $0x45, %ah */
1244                 op = TOK_EQ;
1245             }
1246             vtop--;
1247             vtop->r = VT_CMP;
1248             vtop->c.i = op;
1249         } else {
1250             /* no memory reference possible for long double operations */
1251             load(TREG_ST0, vtop);
1252             swapped = !swapped;
1253
1254             switch(op) {
1255             default:
1256             case '+':
1257                 a = 0;
1258                 break;
1259             case '-':
1260                 a = 4;
1261                 if (swapped)
1262                     a++;
1263                 break;
1264             case '*':
1265                 a = 1;
1266                 break;
1267             case '/':
1268                 a = 6;
1269                 if (swapped)
1270                     a++;
1271                 break;
1272             }
1273             ft = vtop->type.t;
1274             fc = vtop->c.ul;
1275             o(0xde); /* fxxxp %st, %st(1) */
1276             o(0xc1 + (a << 3));
1277             vtop--;
1278         }
1279     } else {
1280         if (op >= TOK_ULT && op <= TOK_GT) {
1281             /* if saved lvalue, then we must reload it */
1282             r = vtop->r;
1283             fc = vtop->c.ul;
1284             if ((r & VT_VALMASK) == VT_LLOCAL) {
1285                 SValue v1;
1286                 r = get_reg(RC_INT);
1287                 v1.type.t = VT_INT;
1288                 v1.r = VT_LOCAL | VT_LVAL;
1289                 v1.c.ul = fc;
1290                 load(r, &v1);
1291                 fc = 0;
1292             }
1293
1294             if (op == TOK_EQ || op == TOK_NE) {
1295                 swapped = 0;
1296             } else {
1297                 if (op == TOK_LE || op == TOK_LT)
1298                     swapped = !swapped;
1299                 if (op == TOK_LE || op == TOK_GE) {
1300                     op = 0x93; /* setae */
1301                 } else {
1302                     op = 0x97; /* seta */
1303                 }
1304             }
1305
1306             if (swapped) {
1307                 o(0x7e0ff3); /* movq */
1308                 gen_modrm(1, r, vtop->sym, fc);
1309
1310                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1311                     o(0x66);
1312                 }
1313                 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1314                 o(0xc8);
1315             } else {
1316                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1317                     o(0x66);
1318                 }
1319                 o(0x2e0f); /* ucomisd */
1320                 gen_modrm(0, r, vtop->sym, fc);
1321             }
1322
1323             vtop--;
1324             vtop->r = VT_CMP;
1325             vtop->c.i = op;
1326         } else {
1327             /* no memory reference possible for long double operations */
1328             if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1329                 load(TREG_XMM0, vtop);
1330                 swapped = !swapped;
1331             }
1332             switch(op) {
1333             default:
1334             case '+':
1335                 a = 0;
1336                 break;
1337             case '-':
1338                 a = 4;
1339                 break;
1340             case '*':
1341                 a = 1;
1342                 break;
1343             case '/':
1344                 a = 6;
1345                 break;
1346             }
1347             ft = vtop->type.t;
1348             fc = vtop->c.ul;
1349             if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1350                 o(0xde); /* fxxxp %st, %st(1) */
1351                 o(0xc1 + (a << 3));
1352             } else {
1353                 /* if saved lvalue, then we must reload it */
1354                 r = vtop->r;
1355                 if ((r & VT_VALMASK) == VT_LLOCAL) {
1356                     SValue v1;
1357                     r = get_reg(RC_INT);
1358                     v1.type.t = VT_INT;
1359                     v1.r = VT_LOCAL | VT_LVAL;
1360                     v1.c.ul = fc;
1361                     load(r, &v1);
1362                     fc = 0;
1363                 }
1364                 if (swapped) {
1365                     /* movq %xmm0,%xmm1 */
1366                     o(0x7e0ff3);
1367                     o(0xc8);
1368                     load(TREG_XMM0, vtop);
1369                     /* subsd  %xmm1,%xmm0 (f2 0f 5c c1) */
1370                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1371                         o(0xf2);
1372                     } else {
1373                         o(0xf3);
1374                     }
1375                     o(0x0f);
1376                     o(0x58 + a);
1377                     o(0xc1);
1378                 } else {
1379                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1380                         o(0xf2);
1381                     } else {
1382                         o(0xf3);
1383                     }
1384                     o(0x0f);
1385                     o(0x58 + a);
1386                     gen_modrm(0, r, vtop->sym, fc);
1387                 }
1388             }
1389             vtop--;
1390         }
1391     }
1392 }
1393
1394 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1395    and 'long long' cases. */
1396 void gen_cvt_itof(int t)
1397 {
1398     if ((t & VT_BTYPE) == VT_LDOUBLE) {
1399         save_reg(TREG_ST0);
1400         gv(RC_INT);
1401         if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1402             /* signed long long to float/double/long double (unsigned case
1403                is handled generically) */
1404             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1405             o(0x242cdf); /* fildll (%rsp) */
1406             o(0x08c48348); /* add $8, %rsp */
1407         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1408                    (VT_INT | VT_UNSIGNED)) {
1409             /* unsigned int to float/double/long double */
1410             o(0x6a); /* push $0 */
1411             g(0x00);
1412             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1413             o(0x242cdf); /* fildll (%rsp) */
1414             o(0x10c48348); /* add $16, %rsp */
1415         } else {
1416             /* int to float/double/long double */
1417             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1418             o(0x2404db); /* fildl (%rsp) */
1419             o(0x08c48348); /* add $8, %rsp */
1420         }
1421         vtop->r = TREG_ST0;
1422     } else {
1423         save_reg(TREG_XMM0);
1424         gv(RC_INT);
1425         o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1426         if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1427             (VT_INT | VT_UNSIGNED) ||
1428             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1429             o(0x48); /* REX */
1430         }
1431         o(0x2a0f);
1432         o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1433         vtop->r = TREG_XMM0;
1434     }
1435 }
1436
1437 /* convert from one floating point type to another */
1438 void gen_cvt_ftof(int t)
1439 {
1440     int ft, bt, tbt;
1441
1442     ft = vtop->type.t;
1443     bt = ft & VT_BTYPE;
1444     tbt = t & VT_BTYPE;
1445
1446     if (bt == VT_FLOAT) {
1447         gv(RC_FLOAT);
1448         if (tbt == VT_DOUBLE) {
1449             o(0xc0140f); /* unpcklps */
1450             o(0xc05a0f); /* cvtps2pd */
1451         } else if (tbt == VT_LDOUBLE) {
1452             /* movss %xmm0,-0x10(%rsp) */
1453             o(0x44110ff3);
1454             o(0xf024);
1455             o(0xf02444d9); /* flds -0x10(%rsp) */
1456             vtop->r = TREG_ST0;
1457         }
1458     } else if (bt == VT_DOUBLE) {
1459         gv(RC_FLOAT);
1460         if (tbt == VT_FLOAT) {
1461             o(0xc0140f66); /* unpcklpd */
1462             o(0xc05a0f66); /* cvtpd2ps */
1463         } else if (tbt == VT_LDOUBLE) {
1464             /* movsd %xmm0,-0x10(%rsp) */
1465             o(0x44110ff2);
1466             o(0xf024);
1467             o(0xf02444dd); /* fldl -0x10(%rsp) */
1468             vtop->r = TREG_ST0;
1469         }
1470     } else {
1471         gv(RC_ST0);
1472         if (tbt == VT_DOUBLE) {
1473             o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1474             /* movsd -0x10(%rsp),%xmm0 */
1475             o(0x44100ff2);
1476             o(0xf024);
1477             vtop->r = TREG_XMM0;
1478         } else if (tbt == VT_FLOAT) {
1479             o(0xf0245cd9); /* fstps -0x10(%rsp) */
1480             /* movss -0x10(%rsp),%xmm0 */
1481             o(0x44100ff3);
1482             o(0xf024);
1483             vtop->r = TREG_XMM0;
1484         }
1485     }
1486 }
1487
1488 /* convert fp to int 't' type */
1489 void gen_cvt_ftoi(int t)
1490 {
1491     int ft, bt, size, r;
1492     ft = vtop->type.t;
1493     bt = ft & VT_BTYPE;
1494     if (bt == VT_LDOUBLE) {
1495         gen_cvt_ftof(VT_DOUBLE);
1496         bt = VT_DOUBLE;
1497     }
1498
1499     gv(RC_FLOAT);
1500     if (t != VT_INT)
1501         size = 8;
1502     else
1503         size = 4;
1504
1505     r = get_reg(RC_INT);
1506     if (bt == VT_FLOAT) {
1507         o(0xf3);
1508     } else if (bt == VT_DOUBLE) {
1509         o(0xf2);
1510     } else {
1511         assert(0);
1512     }
1513     if (size == 8) {
1514         o(0x48 + REX_BASE(r));
1515     }
1516     o(0x2c0f); /* cvttss2si or cvttsd2si */
1517     o(0xc0 + (REG_VALUE(r) << 3));
1518     vtop->r = r;
1519 }
1520
1521 /* computed goto support */
1522 void ggoto(void)
1523 {
1524     gcall_or_jmp(1);
1525     vtop--;
1526 }
1527
1528 /* end of x86-64 code generator */
1529 /*************************************************************/