x86_64-gen.c

   1 /*
   2  *  x86-64 code generator for TCC
   3  *
   4  *  Copyright (c) 2008 Shinichiro Hamaji
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #ifdef TARGET_DEFS_ONLY
  24
  25 /* number of available registers */
  26 #define NB_REGS         5
  27 #define NB_ASM_REGS     8
  28
  29 /* a register can belong to several classes. The classes must be
  30    sorted from more general to more precise (see gv2() code which does
  31    assumptions on it). */
  32 #define RC_INT     0x0001 /* generic integer register */
  33 #define RC_FLOAT   0x0002 /* generic float register */
  34 #define RC_RAX     0x0004
  35 #define RC_RCX     0x0008
  36 #define RC_RDX     0x0010
  37 #define RC_R8      0x0100
  38 #define RC_R9      0x0200
  39 #define RC_XMM0    0x0020
  40 #define RC_ST0     0x0040 /* only for long double */
  41 #define RC_IRET    RC_RAX /* function return: integer register */
  42 #define RC_LRET    RC_RDX /* function return: second integer register */
  43 #define RC_FRET    RC_XMM0 /* function return: float register */
  44
  45 /* pretty names for the registers */
  46 enum {
  47     TREG_RAX = 0,
  48     TREG_RCX = 1,
  49     TREG_RDX = 2,
  50     TREG_XMM0 = 3,
  51     TREG_ST0 = 4,
  52
  53     TREG_RSI = 6,
  54     TREG_RDI = 7,
  55     TREG_R8  = 8,
  56     TREG_R9  = 9,
  57
  58     TREG_R10 = 10,
  59     TREG_R11 = 11,
  60
  61     TREG_MEM = 0x10,
  62 };
  63
  64 #define REX_BASE(reg) (((reg) >> 3) & 1)
  65 #define REG_VALUE(reg) ((reg) & 7)
  66
  67 /* return registers for function */
  68 #define REG_IRET TREG_RAX /* single word int return register */
  69 #define REG_LRET TREG_RDX /* second word return register (for long long) */
  70 #define REG_FRET TREG_XMM0 /* float return register */
  71
  72 /* defined if function parameters must be evaluated in reverse order */
  73 #define INVERT_FUNC_PARAMS
  74
  75 /* pointer size, in bytes */
  76 #define PTR_SIZE 8
  77
  78 /* long double size and alignment, in bytes */
  79 #define LDOUBLE_SIZE  16
  80 #define LDOUBLE_ALIGN 8
  81 /* maximum alignment (for aligned attribute support) */
  82 #define MAX_ALIGN     8
  83
  84 ST_FUNC void gen_opl(int op);
  85 ST_FUNC void gen_le64(int64_t c);
  86
  87 /******************************************************/
  88 /* ELF defines */
  89
  90 #define EM_TCC_TARGET EM_X86_64
  91
  92 /* relocation type for 32 bit data relocation */
  93 #define R_DATA_32   R_X86_64_32
  94 #define R_DATA_PTR  R_X86_64_64
  95 #define R_JMP_SLOT  R_X86_64_JUMP_SLOT
  96 #define R_COPY      R_X86_64_COPY
  97
  98 #define ELF_START_ADDR 0x08048000
  99 #define ELF_PAGE_SIZE  0x1000
 100
 101 /******************************************************/
 102 #else /* ! TARGET_DEFS_ONLY */
 103 /******************************************************/
 104 #include "tcc.h"
 105 #include <assert.h>
 106
 107 ST_DATA const int reg_classes[NB_REGS] = {
 108     /* eax */ RC_INT | RC_RAX,
 109     /* ecx */ RC_INT | RC_RCX,
 110     /* edx */ RC_INT | RC_RDX,
 111     /* xmm0 */ RC_FLOAT | RC_XMM0,
 112     /* st0 */ RC_ST0,
 113 #if NB_REGS == 10
 114     0,
 115     0,
 116     0,
 117     RC_INT | RC_R8,
 118     RC_INT | RC_R9,
 119 #endif
 120 };
 121
 122 static unsigned long func_sub_sp_offset;
 123 static int func_ret_sub;
 124
 125 /* XXX: make it faster ? */
 126 void g(int c)
 127 {
 128     int ind1;
 129     ind1 = ind + 1;
 130     if (ind1 > cur_text_section->data_allocated)
 131         section_realloc(cur_text_section, ind1);
 132     cur_text_section->data[ind] = c;
 133     ind = ind1;
 134 }
 135
 136 void o(unsigned int c)
 137 {
 138     while (c) {
 139         g(c);
 140         c = c >> 8;
 141     }
 142 }
 143
 144 void gen_le16(int v)
 145 {
 146     g(v);
 147     g(v >> 8);
 148 }
 149
 150 void gen_le32(int c)
 151 {
 152     g(c);
 153     g(c >> 8);
 154     g(c >> 16);
 155     g(c >> 24);
 156 }
 157
 158 void gen_le64(int64_t c)
 159 {
 160     g(c);
 161     g(c >> 8);
 162     g(c >> 16);
 163     g(c >> 24);
 164     g(c >> 32);
 165     g(c >> 40);
 166     g(c >> 48);
 167     g(c >> 56);
 168 }
 169
 170 void orex(int ll, int r, int r2, int b)
 171 {
 172     if ((r & VT_VALMASK) >= VT_CONST)
 173         r = 0;
 174     if ((r2 & VT_VALMASK) >= VT_CONST)
 175         r2 = 0;
 176     if (ll || REX_BASE(r) || REX_BASE(r2))
 177         o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
 178     o(b);
 179 }
 180
 181 /* output a symbol and patch all calls to it */
 182 void gsym_addr(int t, int a)
 183 {
 184     int n, *ptr;
 185     while (t) {
 186         ptr = (int *)(cur_text_section->data + t);
 187         n = *ptr; /* next value */
 188         *ptr = a - t - 4;
 189         t = n;
 190     }
 191 }
 192
 193 void gsym(int t)
 194 {
 195     gsym_addr(t, ind);
 196 }
 197
 198 /* psym is used to put an instruction with a data field which is a
 199    reference to a symbol. It is in fact the same as oad ! */
 200 #define psym oad
 201
 202 static int is64_type(int t)
 203 {
 204     return ((t & VT_BTYPE) == VT_PTR ||
 205             (t & VT_BTYPE) == VT_FUNC ||
 206             (t & VT_BTYPE) == VT_LLONG);
 207 }
 208
 209 static int is_sse_float(int t) {
 210     int bt;
 211     bt = t & VT_BTYPE;
 212     return bt == VT_DOUBLE || bt == VT_FLOAT;
 213 }
 214
 215
 216 /* instruction + 4 bytes data. Return the address of the data */
 217 ST_FUNC int oad(int c, int s)
 218 {
 219     int ind1;
 220
 221     o(c);
 222     ind1 = ind + 4;
 223     if (ind1 > cur_text_section->data_allocated)
 224         section_realloc(cur_text_section, ind1);
 225     *(int *)(cur_text_section->data + ind) = s;
 226     s = ind;
 227     ind = ind1;
 228     return s;
 229 }
 230
 231 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
 232 {
 233     if (r & VT_SYM)
 234         greloc(cur_text_section, sym, ind, R_X86_64_32);
 235     gen_le32(c);
 236 }
 237
 238 /* output constant with relocation if 'r & VT_SYM' is true */
 239 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
 240 {
 241     if (r & VT_SYM)
 242         greloc(cur_text_section, sym, ind, R_X86_64_64);
 243     gen_le64(c);
 244 }
 245
 246 /* output constant with relocation if 'r & VT_SYM' is true */
 247 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
 248 {
 249     if (r & VT_SYM)
 250         greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 251     gen_le32(c-4);
 252 }
 253
 254 /* output got address with relocation */
 255 static void gen_gotpcrel(int r, Sym *sym, int c)
 256 {
 257 #ifndef TCC_TARGET_PE
 258     Section *sr;
 259     ElfW(Rela) *rel;
 260     greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
 261     sr = cur_text_section->reloc;
 262     rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
 263     rel->r_addend = -4;
 264 #else
 265     printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
 266         cur_text_section->data[ind-3],
 267         cur_text_section->data[ind-2],
 268         cur_text_section->data[ind-1]
 269         );
 270     greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 271 #endif
 272     gen_le32(0);
 273     if (c) {
 274         /* we use add c, %xxx for displacement */
 275         orex(1, r, 0, 0x81);
 276         o(0xc0 + REG_VALUE(r));
 277         gen_le32(c);
 278     }
 279 }
 280
 281 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
 282 {
 283     op_reg = REG_VALUE(op_reg) << 3;
 284     if ((r & VT_VALMASK) == VT_CONST) {
 285         /* constant memory reference */
 286         o(0x05 | op_reg);
 287         if (is_got) {
 288             gen_gotpcrel(r, sym, c);
 289         } else {
 290             gen_addrpc32(r, sym, c);
 291         }
 292     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 293         /* currently, we use only ebp as base */
 294         if (c == (char)c) {
 295             /* short reference */
 296             o(0x45 | op_reg);
 297             g(c);
 298         } else {
 299             oad(0x85 | op_reg, c);
 300         }
 301     } else if ((r & VT_VALMASK) >= TREG_MEM) {
 302         if (c) {
 303             g(0x80 | op_reg | REG_VALUE(r));
 304             gen_le32(c);
 305         } else {
 306             g(0x00 | op_reg | REG_VALUE(r));
 307         }
 308     } else {
 309         g(0x00 | op_reg | REG_VALUE(r));
 310     }
 311 }
 312
 313 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 314    opcode bits */
 315 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
 316 {
 317     gen_modrm_impl(op_reg, r, sym, c, 0);
 318 }
 319
 320 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 321    opcode bits */
 322 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
 323 {
 324     int is_got;
 325     is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
 326     orex(1, r, op_reg, opcode);
 327     gen_modrm_impl(op_reg, r, sym, c, is_got);
 328 }
 329
 330
 331 /* load 'r' from value 'sv' */
 332 void load(int r, SValue *sv)
 333 {
 334     int v, t, ft, fc, fr;
 335     SValue v1;
 336
 337 #ifdef TCC_TARGET_PE
 338     SValue v2;
 339     sv = pe_getimport(sv, &v2);
 340 #endif
 341
 342     fr = sv->r;
 343     ft = sv->type.t;
 344     fc = sv->c.ul;
 345
 346 #ifndef TCC_TARGET_PE
 347     /* we use indirect access via got */
 348     if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
 349         (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
 350         /* use the result register as a temporal register */
 351         int tr = r | TREG_MEM;
 352         if (is_float(ft)) {
 353             /* we cannot use float registers as a temporal register */
 354             tr = get_reg(RC_INT) | TREG_MEM;
 355         }
 356         gen_modrm64(0x8b, tr, fr, sv->sym, 0);
 357
 358         /* load from the temporal register */
 359         fr = tr | VT_LVAL;
 360     }
 361 #endif
 362
 363     v = fr & VT_VALMASK;
 364     if (fr & VT_LVAL) {
 365         int b, ll;
 366         if (v == VT_LLOCAL) {
 367             v1.type.t = VT_PTR;
 368             v1.r = VT_LOCAL | VT_LVAL;
 369             v1.c.ul = fc;
 370             load(r, &v1);
 371             fr = r;
 372         }
 373         ll = 0;
 374         if ((ft & VT_BTYPE) == VT_FLOAT) {
 375             b = 0x6e0f66, r = 0; /* movd */
 376         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
 377             b = 0x7e0ff3, r = 0; /* movq */
 378         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 379             b = 0xdb, r = 5; /* fldt */
 380         } else if ((ft & VT_TYPE) == VT_BYTE) {
 381             b = 0xbe0f;   /* movsbl */
 382         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
 383             b = 0xb60f;   /* movzbl */
 384         } else if ((ft & VT_TYPE) == VT_SHORT) {
 385             b = 0xbf0f;   /* movswl */
 386         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
 387             b = 0xb70f;   /* movzwl */
 388         } else {
 389             ll = is64_type(ft);
 390             b = 0x8b;
 391         }
 392         if (ll) {
 393             gen_modrm64(b, r, fr, sv->sym, fc);
 394         } else {
 395             orex(ll, fr, r, b);
 396             gen_modrm(r, fr, sv->sym, fc);
 397         }
 398     } else {
 399         if (v == VT_CONST) {
 400             if (fr & VT_SYM) {
 401 #ifdef TCC_TARGET_PE
 402                 orex(1,0,r,0x8d);
 403                 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 404                 gen_addrpc32(fr, sv->sym, fc);
 405 #else
 406                 if (sv->sym->type.t & VT_STATIC) {
 407                     orex(1,0,r,0x8d);
 408                     o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 409                     gen_addrpc32(fr, sv->sym, fc);
 410                 } else {
 411                     orex(1,0,r,0x8b);
 412                     o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
 413                     gen_gotpcrel(fr, sv->sym, fc);
 414                 }
 415 #endif
 416             } else if (is64_type(ft)) {
 417                 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
 418                 gen_le64(sv->c.ull);
 419             } else {
 420                 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
 421                 gen_le32(fc);
 422             }
 423         } else if (v == VT_LOCAL) {
 424             orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
 425             gen_modrm(r, VT_LOCAL, sv->sym, fc);
 426         } else if (v == VT_CMP) {
 427             orex(0,r,0,0);
 428             oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
 429             orex(0,r,0, 0x0f); /* setxx %br */
 430             o(fc);
 431             o(0xc0 + REG_VALUE(r));
 432         } else if (v == VT_JMP || v == VT_JMPI) {
 433             t = v & 1;
 434             orex(0,r,0,0);
 435             oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
 436             o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
 437             gsym(fc);
 438             orex(0,r,0,0);
 439             oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
 440         } else if (v != r) {
 441             if (r == TREG_XMM0) {
 442                 assert(v == TREG_ST0);
 443                 /* gen_cvt_ftof(VT_DOUBLE); */
 444                 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
 445                 /* movsd -0x10(%rsp),%xmm0 */
 446                 o(0x44100ff2);
 447                 o(0xf024);
 448             } else if (r == TREG_ST0) {
 449                 assert(v == TREG_XMM0);
 450                 /* gen_cvt_ftof(VT_LDOUBLE); */
 451                 /* movsd %xmm0,-0x10(%rsp) */
 452                 o(0x44110ff2);
 453                 o(0xf024);
 454                 o(0xf02444dd); /* fldl -0x10(%rsp) */
 455             } else {
 456                 orex(1,r,v, 0x89);
 457                 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
 458             }
 459         }
 460     }
 461 }
 462
 463 /* store register 'r' in lvalue 'v' */
 464 void store(int r, SValue *v)
 465 {
 466     int fr, bt, ft, fc;
 467     int op64 = 0;
 468     /* store the REX prefix in this variable when PIC is enabled */
 469     int pic = 0;
 470
 471 #ifdef TCC_TARGET_PE
 472     SValue v2;
 473     v = pe_getimport(v, &v2);
 474 #endif
 475
 476     ft = v->type.t;
 477     fc = v->c.ul;
 478     fr = v->r & VT_VALMASK;
 479     bt = ft & VT_BTYPE;
 480
 481 #ifndef TCC_TARGET_PE
 482     /* we need to access the variable via got */
 483     if (fr == VT_CONST && (v->r & VT_SYM)) {
 484         /* mov xx(%rip), %r11 */
 485         o(0x1d8b4c);
 486         gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
 487         pic = is64_type(bt) ? 0x49 : 0x41;
 488     }
 489 #endif
 490
 491     /* XXX: incorrect if float reg to reg */
 492     if (bt == VT_FLOAT) {
 493         o(0x66);
 494         o(pic);
 495         o(0x7e0f); /* movd */
 496         r = 0;
 497     } else if (bt == VT_DOUBLE) {
 498         o(0x66);
 499         o(pic);
 500         o(0xd60f); /* movq */
 501         r = 0;
 502     } else if (bt == VT_LDOUBLE) {
 503         o(0xc0d9); /* fld %st(0) */
 504         o(pic);
 505         o(0xdb); /* fstpt */
 506         r = 7;
 507     } else {
 508         if (bt == VT_SHORT)
 509             o(0x66);
 510         o(pic);
 511         if (bt == VT_BYTE || bt == VT_BOOL)
 512             orex(0, 0, r, 0x88);
 513         else if (is64_type(bt))
 514             op64 = 0x89;
 515         else
 516             orex(0, 0, r, 0x89);
 517     }
 518     if (pic) {
 519         /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
 520         if (op64)
 521             o(op64);
 522         o(3 + (r << 3));
 523     } else if (op64) {
 524         if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
 525             gen_modrm64(op64, r, v->r, v->sym, fc);
 526         } else if (fr != r) {
 527             /* XXX: don't we really come here? */
 528             abort();
 529             o(0xc0 + fr + r * 8); /* mov r, fr */
 530         }
 531     } else {
 532         if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
 533             gen_modrm(r, v->r, v->sym, fc);
 534         } else if (fr != r) {
 535             /* XXX: don't we really come here? */
 536             abort();
 537             o(0xc0 + fr + r * 8); /* mov r, fr */
 538         }
 539     }
 540 }
 541
 542 /* 'is_jmp' is '1' if it is a jump */
 543 static void gcall_or_jmp(int is_jmp)
 544 {
 545     int r;
 546     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 547         /* constant case */
 548         if (vtop->r & VT_SYM) {
 549             /* relocation case */
 550             greloc(cur_text_section, vtop->sym,
 551                    ind + 1, R_X86_64_PC32);
 552         } else {
 553             /* put an empty PC32 relocation */
 554             put_elf_reloc(symtab_section, cur_text_section,
 555                           ind + 1, R_X86_64_PC32, 0);
 556         }
 557         oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
 558     } else {
 559         /* otherwise, indirect call */
 560         r = TREG_R11;
 561         load(r, vtop);
 562         o(0x41); /* REX */
 563         o(0xff); /* call/jmp *r */
 564         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
 565     }
 566 }
 567
 568 #ifdef TCC_TARGET_PE
 569
 570 #define REGN 4
 571 static const uint8_t arg_regs[] = {
 572     TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
 573 };
 574
 575 static int func_scratch;
 576
 577 /* Generate function call. The function address is pushed first, then
 578    all the parameters in call order. This functions pops all the
 579    parameters and the function address. */
 580
 581 void gen_offs_sp(int b, int r, int d)
 582 {
 583     orex(1,0,r & 0x100 ? 0 : r, b);
 584     if (d == (char)d) {
 585         o(0x2444 | (REG_VALUE(r) << 3));
 586         g(d);
 587     } else {
 588         o(0x2484 | (REG_VALUE(r) << 3));
 589         gen_le32(d);
 590     }
 591 }
 592
 593 void gfunc_call(int nb_args)
 594 {
 595     int size, align, r, args_size, i, d, j, bt, struct_size;
 596     int nb_reg_args, gen_reg;
 597
 598     nb_reg_args = nb_args;
 599     args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
 600
 601     /* for struct arguments, we need to call memcpy and the function
 602        call breaks register passing arguments we are preparing.
 603        So, we process arguments which will be passed by stack first. */
 604     struct_size = args_size;
 605     for(i = 0; i < nb_args; i++) {
 606         SValue *sv = &vtop[-i];
 607         bt = (sv->type.t & VT_BTYPE);
 608         if (bt == VT_STRUCT) {
 609             size = type_size(&sv->type, &align);
 610             /* align to stack align size */
 611             size = (size + 15) & ~15;
 612             /* generate structure store */
 613             r = get_reg(RC_INT);
 614             gen_offs_sp(0x8d, r, struct_size);
 615             struct_size += size;
 616
 617             /* generate memcpy call */
 618             vset(&sv->type, r | VT_LVAL, 0);
 619             vpushv(sv);
 620             vstore();
 621             --vtop;
 622
 623         } else if (bt == VT_LDOUBLE) {
 624
 625             gv(RC_ST0);
 626             gen_offs_sp(0xdb, 0x107, struct_size);
 627             struct_size += 16;
 628
 629         }
 630     }
 631
 632     if (func_scratch < struct_size)
 633         func_scratch = struct_size;
 634 #if 1
 635     for (i = 0; i < REGN; ++i)
 636         save_reg(arg_regs[i]);
 637     save_reg(TREG_RAX);
 638 #endif
 639     gen_reg = nb_reg_args;
 640     struct_size = args_size;
 641
 642     for(i = 0; i < nb_args; i++) {
 643         bt = (vtop->type.t & VT_BTYPE);
 644
 645         if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
 646             if (bt == VT_LDOUBLE)
 647                 size = 16;
 648             else
 649                 size = type_size(&vtop->type, &align);
 650             /* align to stack align size */
 651             size = (size + 15) & ~15;
 652             j = --gen_reg;
 653             if (j >= REGN) {
 654                 d = TREG_RAX;
 655                 gen_offs_sp(0x8d, d, struct_size);
 656                 gen_offs_sp(0x89, d, j*8);
 657             } else {
 658                 d = arg_regs[j];
 659                 gen_offs_sp(0x8d, d, struct_size);
 660             }
 661             struct_size += size;
 662
 663         } else if (is_sse_float(vtop->type.t)) {
 664             gv(RC_FLOAT); /* only one float register */
 665             j = --gen_reg;
 666             if (j >= REGN) {
 667                 /* movq %xmm0, j*8(%rsp) */
 668                 gen_offs_sp(0xd60f66, 0x100, j*8);
 669             } else {
 670                 /* movaps %xmm0, %xmmN */
 671                 o(0x280f);
 672                 o(0xc0 + (j << 3));
 673                 d = arg_regs[j];
 674                 /* mov %xmm0, %rxx */
 675                 o(0x66);
 676                 orex(1,d,0, 0x7e0f);
 677                 o(0xc0 + REG_VALUE(d));
 678             }
 679         } else {
 680             j = --gen_reg;
 681             if (j >= REGN) {
 682                 r = gv(RC_INT);
 683                 gen_offs_sp(0x89, r, j*8);
 684             } else {
 685                 d = arg_regs[j];
 686                 if (d < NB_REGS) {
 687                     gv(reg_classes[d] & ~RC_INT);
 688                 } else {
 689                     r = gv(RC_INT);
 690                     if (d != r) {
 691                         orex(1,d,r, 0x89);
 692                         o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
 693                     }
 694                 }
 695
 696             }
 697         }
 698         vtop--;
 699     }
 700     save_regs(0);
 701     gcall_or_jmp(0);
 702     vtop--;
 703 }
 704
 705
 706 #define FUNC_PROLOG_SIZE 11
 707
 708 /* generate function prolog of type 't' */
 709 void gfunc_prolog(CType *func_type)
 710 {
 711     int addr, reg_param_index, bt;
 712     Sym *sym;
 713     CType *type;
 714
 715     func_ret_sub = 0;
 716     func_scratch = 0;
 717     loc = 0;
 718
 719     addr = PTR_SIZE * 2;
 720     ind += FUNC_PROLOG_SIZE;
 721     func_sub_sp_offset = ind;
 722     reg_param_index = 0;
 723
 724     sym = func_type->ref;
 725
 726     /* if the function returns a structure, then add an
 727        implicit pointer parameter */
 728     func_vt = sym->type;
 729     if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
 730         gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
 731         reg_param_index++;
 732         addr += PTR_SIZE;
 733     }
 734
 735     /* define parameters */
 736     while ((sym = sym->next) != NULL) {
 737         type = &sym->type;
 738         bt = type->t & VT_BTYPE;
 739         if (reg_param_index < REGN) {
 740             /* save arguments passed by register */
 741             gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
 742         }
 743         if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
 744             sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
 745         } else {
 746             sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
 747         }
 748         reg_param_index++;
 749         addr += PTR_SIZE;
 750     }
 751
 752     while (reg_param_index < REGN) {
 753         if (func_type->ref->c == FUNC_ELLIPSIS)
 754             gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
 755         reg_param_index++;
 756         addr += PTR_SIZE;
 757     }
 758 }
 759
 760 /* generate function epilog */
 761 void gfunc_epilog(void)
 762 {
 763     int v, saved_ind;
 764
 765     o(0xc9); /* leave */
 766     if (func_ret_sub == 0) {
 767         o(0xc3); /* ret */
 768     } else {
 769         o(0xc2); /* ret n */
 770         g(func_ret_sub);
 771         g(func_ret_sub >> 8);
 772     }
 773
 774     saved_ind = ind;
 775     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
 776     /* align local size to word & save local variables */
 777     v = (func_scratch + -loc + 15) & -16;
 778
 779     if (v >= 4096) {
 780         Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
 781         oad(0xb8, v); /* mov stacksize, %eax */
 782         oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
 783         greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
 784         o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
 785     } else {
 786         o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
 787         o(0xec8148);  /* sub rsp, stacksize */
 788         gen_le32(v);
 789     }
 790
 791     cur_text_section->data_offset = saved_ind;
 792     pe_add_unwind_data(ind, saved_ind, v);
 793     ind = cur_text_section->data_offset;
 794 }
 795
 796 #else
 797
 798 static void gadd_sp(int val)
 799 {
 800     if (val == (char)val) {
 801         o(0xc48348);
 802         g(val);
 803     } else {
 804         oad(0xc48148, val); /* add $xxx, %rsp */
 805     }
 806 }
 807
 808 #define REGN 6
 809 static const uint8_t arg_regs[REGN] = {
 810     TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
 811 };
 812
 813 /* Generate function call. The function address is pushed first, then
 814    all the parameters in call order. This functions pops all the
 815    parameters and the function address. */
 816 void gfunc_call(int nb_args)
 817 {
 818     int size, align, r, args_size, i;
 819     SValue *orig_vtop;
 820     int nb_reg_args = 0;
 821     int nb_sse_args = 0;
 822     int sse_reg, gen_reg;
 823
 824     /* calculate the number of integer/float arguments */
 825     args_size = 0;
 826     for(i = 0; i < nb_args; i++) {
 827         if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
 828             args_size += type_size(&vtop[-i].type, &align);
 829             args_size = (args_size + 7) & ~7;
 830         } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
 831             args_size += 16;
 832         } else if (is_sse_float(vtop[-i].type.t)) {
 833             nb_sse_args++;
 834             if (nb_sse_args > 8) args_size += 8;
 835         } else {
 836             nb_reg_args++;
 837             if (nb_reg_args > REGN) args_size += 8;
 838         }
 839     }
 840
 841     /* for struct arguments, we need to call memcpy and the function
 842        call breaks register passing arguments we are preparing.
 843        So, we process arguments which will be passed by stack first. */
 844     orig_vtop = vtop;
 845     gen_reg = nb_reg_args;
 846     sse_reg = nb_sse_args;
 847
 848     /* adjust stack to align SSE boundary */
 849     if (args_size &= 15) {
 850         /* fetch cpu flag before the following sub will change the value */
 851         if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
 852             gv(RC_INT);
 853
 854         args_size = 16 - args_size;
 855         o(0x48);
 856         oad(0xec81, args_size); /* sub $xxx, %rsp */
 857     }
 858
 859     for(i = 0; i < nb_args; i++) {
 860         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 861             size = type_size(&vtop->type, &align);
 862             /* align to stack align size */
 863             size = (size + 7) & ~7;
 864             /* allocate the necessary size on stack */
 865             o(0x48);
 866             oad(0xec81, size); /* sub $xxx, %rsp */
 867             /* generate structure store */
 868             r = get_reg(RC_INT);
 869             orex(1, r, 0, 0x89); /* mov %rsp, r */
 870             o(0xe0 + REG_VALUE(r));
 871             {
 872                 /* following code breaks vtop[1], vtop[2], and vtop[3] */
 873                 SValue tmp1 = vtop[1];
 874                 SValue tmp2 = vtop[2];
 875                 SValue tmp3 = vtop[3];
 876                 vset(&vtop->type, r | VT_LVAL, 0);
 877                 vswap();
 878                 vstore();
 879                 vtop[1] = tmp1;
 880                 vtop[2] = tmp2;
 881                 vtop[3] = tmp3;
 882             }
 883             args_size += size;
 884         } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 885             gv(RC_ST0);
 886             size = LDOUBLE_SIZE;
 887             oad(0xec8148, size); /* sub $xxx, %rsp */
 888             o(0x7cdb); /* fstpt 0(%rsp) */
 889             g(0x24);
 890             g(0x00);
 891             args_size += size;
 892         } else if (is_sse_float(vtop->type.t)) {
 893             int j = --sse_reg;
 894             if (j >= 8) {
 895                 gv(RC_FLOAT);
 896                 o(0x50); /* push $rax */
 897                 /* movq %xmm0, (%rsp) */
 898                 o(0x04d60f66);
 899                 o(0x24);
 900                 args_size += 8;
 901             }
 902         } else {
 903             int j = --gen_reg;
 904             /* simple type */
 905             /* XXX: implicit cast ? */
 906             if (j >= REGN) {
 907                 r = gv(RC_INT);
 908                 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
 909                 args_size += 8;
 910             }
 911         }
 912         vtop--;
 913     }
 914     vtop = orig_vtop;
 915
 916     save_regs(0); /* save used temporary registers */
 917
 918     /* then, we prepare register passing arguments.
 919        Note that we cannot set RDX and RCX in this loop because gv()
 920        may break these temporary registers. Let's use R10 and R11
 921        instead of them */
 922     gen_reg = nb_reg_args;
 923     sse_reg = nb_sse_args;
 924     for(i = 0; i < nb_args; i++) {
 925         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
 926             (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 927         } else if (is_sse_float(vtop->type.t)) {
 928             int j = --sse_reg;
 929             if (j < 8) {
 930                 gv(RC_FLOAT); /* only one float register */
 931                 /* movaps %xmm0, %xmmN */
 932                 o(0x280f);
 933                 o(0xc0 + (sse_reg << 3));
 934             }
 935         } else {
 936             int j = --gen_reg;
 937             /* simple type */
 938             /* XXX: implicit cast ? */
 939             if (j < REGN) {
 940                 int d = arg_regs[j];
 941                 r = gv(RC_INT);
 942                 if (j == 2 || j == 3)
 943                     /* j=2: r10, j=3: r11 */
 944                     d = j + 8;
 945                 orex(1,d,r,0x89); /* mov */
 946                 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
 947             }
 948         }
 949         vtop--;
 950     }
 951
 952     /* Copy R10 and R11 into RDX and RCX, respectively */
 953     if (nb_reg_args > 2) {
 954         o(0xd2894c); /* mov %r10, %rdx */
 955         if (nb_reg_args > 3) {
 956             o(0xd9894c); /* mov %r11, %rcx */
 957         }
 958     }
 959
 960     oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
 961     gcall_or_jmp(0);
 962     if (args_size)
 963         gadd_sp(args_size);
 964     vtop--;
 965 }
 966
 967
 968 #define FUNC_PROLOG_SIZE 11
 969
 970 static void push_arg_reg(int i) {
 971     loc -= 8;
 972     gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
 973 }
 974
 975 /* generate function prolog of type 't' */
 976 void gfunc_prolog(CType *func_type)
 977 {
 978     int i, addr, align, size;
 979     int param_index, param_addr, reg_param_index, sse_param_index;
 980     Sym *sym;
 981     CType *type;
 982
 983     sym = func_type->ref;
 984     addr = PTR_SIZE * 2;
 985     loc = 0;
 986     ind += FUNC_PROLOG_SIZE;
 987     func_sub_sp_offset = ind;
 988     func_ret_sub = 0;
 989
 990     if (func_type->ref->c == FUNC_ELLIPSIS) {
 991         int seen_reg_num, seen_sse_num, seen_stack_size;
 992         seen_reg_num = seen_sse_num = 0;
 993         /* frame pointer and return address */
 994         seen_stack_size = PTR_SIZE * 2;
 995         /* count the number of seen parameters */
 996         sym = func_type->ref;
 997         while ((sym = sym->next) != NULL) {
 998             type = &sym->type;
 999             if (is_sse_float(type->t)) {
1000                 if (seen_sse_num < 8) {
1001                     seen_sse_num++;
1002                 } else {
1003                     seen_stack_size += 8;
1004                 }
1005             } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
1006                 size = type_size(type, &align);
1007                 size = (size + 7) & ~7;
1008                 seen_stack_size += size;
1009             } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
1010                 seen_stack_size += LDOUBLE_SIZE;
1011             } else {
1012                 if (seen_reg_num < REGN) {
1013                     seen_reg_num++;
1014                 } else {
1015                     seen_stack_size += 8;
1016                 }
1017             }
1018         }
1019
1020         loc -= 16;
1021         /* movl $0x????????, -0x10(%rbp) */
1022         o(0xf045c7);
1023         gen_le32(seen_reg_num * 8);
1024         /* movl $0x????????, -0xc(%rbp) */
1025         o(0xf445c7);
1026         gen_le32(seen_sse_num * 16 + 48);
1027         /* movl $0x????????, -0x8(%rbp) */
1028         o(0xf845c7);
1029         gen_le32(seen_stack_size);
1030
1031         /* save all register passing arguments */
1032         for (i = 0; i < 8; i++) {
1033             loc -= 16;
1034             o(0xd60f66); /* movq */
1035             gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1036             /* movq $0, loc+8(%rbp) */
1037             o(0x85c748);
1038             gen_le32(loc + 8);
1039             gen_le32(0);
1040         }
1041         for (i = 0; i < REGN; i++) {
1042             push_arg_reg(REGN-1-i);
1043         }
1044     }
1045
1046     sym = func_type->ref;
1047     param_index = 0;
1048     reg_param_index = 0;
1049     sse_param_index = 0;
1050
1051     /* if the function returns a structure, then add an
1052        implicit pointer parameter */
1053     func_vt = sym->type;
1054     if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
1055         push_arg_reg(reg_param_index);
1056         param_addr = loc;
1057
1058         func_vc = loc;
1059         param_index++;
1060         reg_param_index++;
1061     }
1062     /* define parameters */
1063     while ((sym = sym->next) != NULL) {
1064         type = &sym->type;
1065         size = type_size(type, &align);
1066         size = (size + 7) & ~7;
1067         if (is_sse_float(type->t)) {
1068             if (sse_param_index < 8) {
1069                 /* save arguments passed by register */
1070                 loc -= 8;
1071                 o(0xd60f66); /* movq */
1072                 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
1073                 param_addr = loc;
1074             } else {
1075                 param_addr = addr;
1076                 addr += size;
1077             }
1078             sse_param_index++;
1079
1080         } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
1081                    (type->t & VT_BTYPE) == VT_LDOUBLE) {
1082             param_addr = addr;
1083             addr += size;
1084         } else {
1085             if (reg_param_index < REGN) {
1086                 /* save arguments passed by register */
1087                 push_arg_reg(reg_param_index);
1088                 param_addr = loc;
1089             } else {
1090                 param_addr = addr;
1091                 addr += 8;
1092             }
1093             reg_param_index++;
1094         }
1095         sym_push(sym->v & ~SYM_FIELD, type,
1096                  VT_LOCAL | VT_LVAL, param_addr);
1097         param_index++;
1098     }
1099 }
1100
1101 /* generate function epilog */
1102 void gfunc_epilog(void)
1103 {
1104     int v, saved_ind;
1105
1106     o(0xc9); /* leave */
1107     if (func_ret_sub == 0) {
1108         o(0xc3); /* ret */
1109     } else {
1110         o(0xc2); /* ret n */
1111         g(func_ret_sub);
1112         g(func_ret_sub >> 8);
1113     }
1114     /* align local size to word & save local variables */
1115     v = (-loc + 15) & -16;
1116     saved_ind = ind;
1117     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1118     o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
1119     o(0xec8148);  /* sub rsp, stacksize */
1120     gen_le32(v);
1121     ind = saved_ind;
1122 }
1123
1124 #endif /* not PE */
1125
1126 /* generate a jump to a label */
1127 int gjmp(int t)
1128 {
1129     return psym(0xe9, t);
1130 }
1131
1132 /* generate a jump to a fixed address */
1133 void gjmp_addr(int a)
1134 {
1135     int r;
1136     r = a - ind - 2;
1137     if (r == (char)r) {
1138         g(0xeb);
1139         g(r);
1140     } else {
1141         oad(0xe9, a - ind - 5);
1142     }
1143 }
1144
1145 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1146 int gtst(int inv, int t)
1147 {
1148     int v, *p;
1149
1150     v = vtop->r & VT_VALMASK;
1151     if (v == VT_CMP) {
1152         /* fast case : can jump directly since flags are set */
1153         g(0x0f);
1154         t = psym((vtop->c.i - 16) ^ inv, t);
1155     } else if (v == VT_JMP || v == VT_JMPI) {
1156         /* && or || optimization */
1157         if ((v & 1) == inv) {
1158             /* insert vtop->c jump list in t */
1159             p = &vtop->c.i;
1160             while (*p != 0)
1161                 p = (int *)(cur_text_section->data + *p);
1162             *p = t;
1163             t = vtop->c.i;
1164         } else {
1165             t = gjmp(t);
1166             gsym(vtop->c.i);
1167         }
1168     } else {
1169         if (is_float(vtop->type.t) ||
1170             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1171             vpushi(0);
1172             gen_op(TOK_NE);
1173         }
1174         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1175             /* constant jmp optimization */
1176             if ((vtop->c.i != 0) != inv)
1177                 t = gjmp(t);
1178         } else {
1179             v = gv(RC_INT);
1180             orex(0,v,v,0x85);
1181             o(0xc0 + REG_VALUE(v) * 9);
1182             g(0x0f);
1183             t = psym(0x85 ^ inv, t);
1184         }
1185     }
1186     vtop--;
1187     return t;
1188 }
1189
1190 /* generate an integer binary operation */
1191 void gen_opi(int op)
1192 {
1193     int r, fr, opc, c;
1194     int ll, uu, cc;
1195
1196     ll = is64_type(vtop[-1].type.t);
1197     uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1198     cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1199
1200     switch(op) {
1201     case '+':
1202     case TOK_ADDC1: /* add with carry generation */
1203         opc = 0;
1204     gen_op8:
1205         if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1206             /* constant case */
1207             vswap();
1208             r = gv(RC_INT);
1209             vswap();
1210             c = vtop->c.i;
1211             if (c == (char)c) {
1212                 /* XXX: generate inc and dec for smaller code ? */
1213                 orex(ll, r, 0, 0x83);
1214                 o(0xc0 | (opc << 3) | REG_VALUE(r));
1215                 g(c);
1216             } else {
1217                 orex(ll, r, 0, 0x81);
1218                 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1219             }
1220         } else {
1221             gv2(RC_INT, RC_INT);
1222             r = vtop[-1].r;
1223             fr = vtop[0].r;
1224             orex(ll, r, fr, (opc << 3) | 0x01);
1225             o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1226         }
1227         vtop--;
1228         if (op >= TOK_ULT && op <= TOK_GT) {
1229             vtop->r = VT_CMP;
1230             vtop->c.i = op;
1231         }
1232         break;
1233     case '-':
1234     case TOK_SUBC1: /* sub with carry generation */
1235         opc = 5;
1236         goto gen_op8;
1237     case TOK_ADDC2: /* add with carry use */
1238         opc = 2;
1239         goto gen_op8;
1240     case TOK_SUBC2: /* sub with carry use */
1241         opc = 3;
1242         goto gen_op8;
1243     case '&':
1244         opc = 4;
1245         goto gen_op8;
1246     case '^':
1247         opc = 6;
1248         goto gen_op8;
1249     case '|':
1250         opc = 1;
1251         goto gen_op8;
1252     case '*':
1253         gv2(RC_INT, RC_INT);
1254         r = vtop[-1].r;
1255         fr = vtop[0].r;
1256         orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1257         o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1258         vtop--;
1259         break;
1260     case TOK_SHL:
1261         opc = 4;
1262         goto gen_shift;
1263     case TOK_SHR:
1264         opc = 5;
1265         goto gen_shift;
1266     case TOK_SAR:
1267         opc = 7;
1268     gen_shift:
1269         opc = 0xc0 | (opc << 3);
1270         if (cc) {
1271             /* constant case */
1272             vswap();
1273             r = gv(RC_INT);
1274             vswap();
1275             orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1276             o(opc | REG_VALUE(r));
1277             g(vtop->c.i & (ll ? 63 : 31));
1278         } else {
1279             /* we generate the shift in ecx */
1280             gv2(RC_INT, RC_RCX);
1281             r = vtop[-1].r;
1282             orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1283             o(opc | REG_VALUE(r));
1284         }
1285         vtop--;
1286         break;
1287     case TOK_UDIV:
1288     case TOK_UMOD:
1289         uu = 1;
1290         goto divmod;
1291     case '/':
1292     case '%':
1293     case TOK_PDIV:
1294         uu = 0;
1295     divmod:
1296         /* first operand must be in eax */
1297         /* XXX: need better constraint for second operand */
1298         gv2(RC_RAX, RC_RCX);
1299         r = vtop[-1].r;
1300         fr = vtop[0].r;
1301         vtop--;
1302         save_reg(TREG_RDX);
1303         orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1304         orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1305         o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1306         if (op == '%' || op == TOK_UMOD)
1307             r = TREG_RDX;
1308         else
1309             r = TREG_RAX;
1310         vtop->r = r;
1311         break;
1312     default:
1313         opc = 7;
1314         goto gen_op8;
1315     }
1316 }
1317
1318 void gen_opl(int op)
1319 {
1320     gen_opi(op);
1321 }
1322
1323 /* generate a floating point operation 'v = t1 op t2' instruction. The
1324    two operands are guaranted to have the same floating point type */
1325 /* XXX: need to use ST1 too */
1326 void gen_opf(int op)
1327 {
1328     int a, ft, fc, swapped, r;
1329     int float_type =
1330         (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1331
1332     /* convert constants to memory references */
1333     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1334         vswap();
1335         gv(float_type);
1336         vswap();
1337     }
1338     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1339         gv(float_type);
1340
1341     /* must put at least one value in the floating point register */
1342     if ((vtop[-1].r & VT_LVAL) &&
1343         (vtop[0].r & VT_LVAL)) {
1344         vswap();
1345         gv(float_type);
1346         vswap();
1347     }
1348     swapped = 0;
1349     /* swap the stack if needed so that t1 is the register and t2 is
1350        the memory reference */
1351     if (vtop[-1].r & VT_LVAL) {
1352         vswap();
1353         swapped = 1;
1354     }
1355     if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1356         if (op >= TOK_ULT && op <= TOK_GT) {
1357             /* load on stack second operand */
1358             load(TREG_ST0, vtop);
1359             save_reg(TREG_RAX); /* eax is used by FP comparison code */
1360             if (op == TOK_GE || op == TOK_GT)
1361                 swapped = !swapped;
1362             else if (op == TOK_EQ || op == TOK_NE)
1363                 swapped = 0;
1364             if (swapped)
1365                 o(0xc9d9); /* fxch %st(1) */
1366             o(0xe9da); /* fucompp */
1367             o(0xe0df); /* fnstsw %ax */
1368             if (op == TOK_EQ) {
1369                 o(0x45e480); /* and $0x45, %ah */
1370                 o(0x40fC80); /* cmp $0x40, %ah */
1371             } else if (op == TOK_NE) {
1372                 o(0x45e480); /* and $0x45, %ah */
1373                 o(0x40f480); /* xor $0x40, %ah */
1374                 op = TOK_NE;
1375             } else if (op == TOK_GE || op == TOK_LE) {
1376                 o(0x05c4f6); /* test $0x05, %ah */
1377                 op = TOK_EQ;
1378             } else {
1379                 o(0x45c4f6); /* test $0x45, %ah */
1380                 op = TOK_EQ;
1381             }
1382             vtop--;
1383             vtop->r = VT_CMP;
1384             vtop->c.i = op;
1385         } else {
1386             /* no memory reference possible for long double operations */
1387             load(TREG_ST0, vtop);
1388             swapped = !swapped;
1389
1390             switch(op) {
1391             default:
1392             case '+':
1393                 a = 0;
1394                 break;
1395             case '-':
1396                 a = 4;
1397                 if (swapped)
1398                     a++;
1399                 break;
1400             case '*':
1401                 a = 1;
1402                 break;
1403             case '/':
1404                 a = 6;
1405                 if (swapped)
1406                     a++;
1407                 break;
1408             }
1409             ft = vtop->type.t;
1410             fc = vtop->c.ul;
1411             o(0xde); /* fxxxp %st, %st(1) */
1412             o(0xc1 + (a << 3));
1413             vtop--;
1414         }
1415     } else {
1416         if (op >= TOK_ULT && op <= TOK_GT) {
1417             /* if saved lvalue, then we must reload it */
1418             r = vtop->r;
1419             fc = vtop->c.ul;
1420             if ((r & VT_VALMASK) == VT_LLOCAL) {
1421                 SValue v1;
1422                 r = get_reg(RC_INT);
1423                 v1.type.t = VT_INT;
1424                 v1.r = VT_LOCAL | VT_LVAL;
1425                 v1.c.ul = fc;
1426                 load(r, &v1);
1427                 fc = 0;
1428             }
1429
1430             if (op == TOK_EQ || op == TOK_NE) {
1431                 swapped = 0;
1432             } else {
1433                 if (op == TOK_LE || op == TOK_LT)
1434                     swapped = !swapped;
1435                 if (op == TOK_LE || op == TOK_GE) {
1436                     op = 0x93; /* setae */
1437                 } else {
1438                     op = 0x97; /* seta */
1439                 }
1440             }
1441
1442             if (swapped) {
1443                 o(0x7e0ff3); /* movq */
1444                 gen_modrm(1, r, vtop->sym, fc);
1445
1446                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1447                     o(0x66);
1448                 }
1449                 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1450                 o(0xc8);
1451             } else {
1452                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1453                     o(0x66);
1454                 }
1455                 o(0x2e0f); /* ucomisd */
1456                 gen_modrm(0, r, vtop->sym, fc);
1457             }
1458
1459             vtop--;
1460             vtop->r = VT_CMP;
1461             vtop->c.i = op;
1462         } else {
1463             /* no memory reference possible for long double operations */
1464             if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1465                 load(TREG_XMM0, vtop);
1466                 swapped = !swapped;
1467             }
1468             switch(op) {
1469             default:
1470             case '+':
1471                 a = 0;
1472                 break;
1473             case '-':
1474                 a = 4;
1475                 break;
1476             case '*':
1477                 a = 1;
1478                 break;
1479             case '/':
1480                 a = 6;
1481                 break;
1482             }
1483             ft = vtop->type.t;
1484             fc = vtop->c.ul;
1485             if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1486                 o(0xde); /* fxxxp %st, %st(1) */
1487                 o(0xc1 + (a << 3));
1488             } else {
1489                 /* if saved lvalue, then we must reload it */
1490                 r = vtop->r;
1491                 if ((r & VT_VALMASK) == VT_LLOCAL) {
1492                     SValue v1;
1493                     r = get_reg(RC_INT);
1494                     v1.type.t = VT_INT;
1495                     v1.r = VT_LOCAL | VT_LVAL;
1496                     v1.c.ul = fc;
1497                     load(r, &v1);
1498                     fc = 0;
1499                 }
1500                 if (swapped) {
1501                     /* movq %xmm0,%xmm1 */
1502                     o(0x7e0ff3);
1503                     o(0xc8);
1504                     load(TREG_XMM0, vtop);
1505                     /* subsd  %xmm1,%xmm0 (f2 0f 5c c1) */
1506                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1507                         o(0xf2);
1508                     } else {
1509                         o(0xf3);
1510                     }
1511                     o(0x0f);
1512                     o(0x58 + a);
1513                     o(0xc1);
1514                 } else {
1515                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1516                         o(0xf2);
1517                     } else {
1518                         o(0xf3);
1519                     }
1520                     o(0x0f);
1521                     o(0x58 + a);
1522                     gen_modrm(0, r, vtop->sym, fc);
1523                 }
1524             }
1525             vtop--;
1526         }
1527     }
1528 }
1529
1530 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1531    and 'long long' cases. */
1532 void gen_cvt_itof(int t)
1533 {
1534     if ((t & VT_BTYPE) == VT_LDOUBLE) {
1535         save_reg(TREG_ST0);
1536         gv(RC_INT);
1537         if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1538             /* signed long long to float/double/long double (unsigned case
1539                is handled generically) */
1540             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1541             o(0x242cdf); /* fildll (%rsp) */
1542             o(0x08c48348); /* add $8, %rsp */
1543         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1544                    (VT_INT | VT_UNSIGNED)) {
1545             /* unsigned int to float/double/long double */
1546             o(0x6a); /* push $0 */
1547             g(0x00);
1548             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1549             o(0x242cdf); /* fildll (%rsp) */
1550             o(0x10c48348); /* add $16, %rsp */
1551         } else {
1552             /* int to float/double/long double */
1553             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1554             o(0x2404db); /* fildl (%rsp) */
1555             o(0x08c48348); /* add $8, %rsp */
1556         }
1557         vtop->r = TREG_ST0;
1558     } else {
1559         save_reg(TREG_XMM0);
1560         gv(RC_INT);
1561         o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1562         if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1563             (VT_INT | VT_UNSIGNED) ||
1564             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1565             o(0x48); /* REX */
1566         }
1567         o(0x2a0f);
1568         o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1569         vtop->r = TREG_XMM0;
1570     }
1571 }
1572
1573 /* convert from one floating point type to another */
1574 void gen_cvt_ftof(int t)
1575 {
1576     int ft, bt, tbt;
1577
1578     ft = vtop->type.t;
1579     bt = ft & VT_BTYPE;
1580     tbt = t & VT_BTYPE;
1581
1582     if (bt == VT_FLOAT) {
1583         gv(RC_FLOAT);
1584         if (tbt == VT_DOUBLE) {
1585             o(0xc0140f); /* unpcklps */
1586             o(0xc05a0f); /* cvtps2pd */
1587         } else if (tbt == VT_LDOUBLE) {
1588             /* movss %xmm0,-0x10(%rsp) */
1589             o(0x44110ff3);
1590             o(0xf024);
1591             o(0xf02444d9); /* flds -0x10(%rsp) */
1592             vtop->r = TREG_ST0;
1593         }
1594     } else if (bt == VT_DOUBLE) {
1595         gv(RC_FLOAT);
1596         if (tbt == VT_FLOAT) {
1597             o(0xc0140f66); /* unpcklpd */
1598             o(0xc05a0f66); /* cvtpd2ps */
1599         } else if (tbt == VT_LDOUBLE) {
1600             /* movsd %xmm0,-0x10(%rsp) */
1601             o(0x44110ff2);
1602             o(0xf024);
1603             o(0xf02444dd); /* fldl -0x10(%rsp) */
1604             vtop->r = TREG_ST0;
1605         }
1606     } else {
1607         gv(RC_ST0);
1608         if (tbt == VT_DOUBLE) {
1609             o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1610             /* movsd -0x10(%rsp),%xmm0 */
1611             o(0x44100ff2);
1612             o(0xf024);
1613             vtop->r = TREG_XMM0;
1614         } else if (tbt == VT_FLOAT) {
1615             o(0xf0245cd9); /* fstps -0x10(%rsp) */
1616             /* movss -0x10(%rsp),%xmm0 */
1617             o(0x44100ff3);
1618             o(0xf024);
1619             vtop->r = TREG_XMM0;
1620         }
1621     }
1622 }
1623
1624 /* convert fp to int 't' type */
1625 void gen_cvt_ftoi(int t)
1626 {
1627     int ft, bt, size, r;
1628     ft = vtop->type.t;
1629     bt = ft & VT_BTYPE;
1630     if (bt == VT_LDOUBLE) {
1631         gen_cvt_ftof(VT_DOUBLE);
1632         bt = VT_DOUBLE;
1633     }
1634
1635     gv(RC_FLOAT);
1636     if (t != VT_INT)
1637         size = 8;
1638     else
1639         size = 4;
1640
1641     r = get_reg(RC_INT);
1642     if (bt == VT_FLOAT) {
1643         o(0xf3);
1644     } else if (bt == VT_DOUBLE) {
1645         o(0xf2);
1646     } else {
1647         assert(0);
1648     }
1649     orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1650     o(0xc0 + (REG_VALUE(r) << 3));
1651     vtop->r = r;
1652 }
1653
1654 /* computed goto support */
1655 void ggoto(void)
1656 {
1657     gcall_or_jmp(1);
1658     vtop--;
1659 }
1660
1661 /* end of x86-64 code generator */
1662 /*************************************************************/
1663 #endif /* ! TARGET_DEFS_ONLY */
1664 /******************************************************/