x86_64-gen.c

   1 /*
   2  *  x86-64 code generator for TCC
   3  *
   4  *  Copyright (c) 2008 Shinichiro Hamaji
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #include <assert.h>
  24
  25 /* number of available registers */
  26 #define NB_REGS             5
  27
  28 /* a register can belong to several classes. The classes must be
  29    sorted from more general to more precise (see gv2() code which does
  30    assumptions on it). */
  31 #define RC_INT     0x0001 /* generic integer register */
  32 #define RC_FLOAT   0x0002 /* generic float register */
  33 #define RC_RAX     0x0004
  34 #define RC_RCX     0x0008
  35 #define RC_RDX     0x0010
  36 #define RC_XMM0    0x0020
  37 #define RC_ST0     0x0040 /* only for long double */
  38 #define RC_IRET    RC_RAX /* function return: integer register */
  39 #define RC_LRET    RC_RDX /* function return: second integer register */
  40 #define RC_FRET    RC_XMM0 /* function return: float register */
  41
  42 /* pretty names for the registers */
  43 enum {
  44     TREG_RAX = 0,
  45     TREG_RCX = 1,
  46     TREG_RDX = 2,
  47     TREG_RSI = 6,
  48     TREG_RDI = 7,
  49     TREG_R8  = 8,
  50     TREG_R9  = 9,
  51     TREG_R10 = 10,
  52     TREG_R11 = 11,
  53
  54     TREG_XMM0 = 3,
  55     TREG_ST0 = 4,
  56 };
  57
  58 #define REX_BASE(reg) ((reg) >> 3)
  59 #define REG_VALUE(reg) ((reg) & 7)
  60
  61 int reg_classes[NB_REGS] = {
  62     /* eax */ RC_INT | RC_RAX,
  63     /* ecx */ RC_INT | RC_RCX,
  64     /* edx */ RC_INT | RC_RDX,
  65     /* xmm0 */ RC_FLOAT | RC_XMM0,
  66     /* st0 */ RC_ST0,
  67 };
  68
  69 /* return registers for function */
  70 #define REG_IRET TREG_RAX /* single word int return register */
  71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
  72 #define REG_FRET TREG_XMM0 /* float return register */
  73
  74 /* defined if function parameters must be evaluated in reverse order */
  75 #define INVERT_FUNC_PARAMS
  76
  77 /* pointer size, in bytes */
  78 #define PTR_SIZE 8
  79
  80 /* long double size and alignment, in bytes */
  81 #define LDOUBLE_SIZE  16
  82 #define LDOUBLE_ALIGN 8
  83 /* maximum alignment (for aligned attribute support) */
  84 #define MAX_ALIGN     8
  85
  86 /******************************************************/
  87 /* ELF defines */
  88
  89 #define EM_TCC_TARGET EM_X86_64
  90
  91 /* relocation type for 32 bit data relocation */
  92 #define R_DATA_32   R_X86_64_32
  93 #define R_JMP_SLOT  R_X86_64_JUMP_SLOT
  94 #define R_COPY      R_X86_64_COPY
  95
  96 #define ELF_START_ADDR 0x08048000
  97 #define ELF_PAGE_SIZE  0x1000
  98
  99 /******************************************************/
 100
 101 static unsigned long func_sub_sp_offset;
 102 static int func_ret_sub;
 103
 104 /* XXX: make it faster ? */
 105 void g(int c)
 106 {
 107     int ind1;
 108     ind1 = ind + 1;
 109     if (ind1 > cur_text_section->data_allocated)
 110         section_realloc(cur_text_section, ind1);
 111     cur_text_section->data[ind] = c;
 112     ind = ind1;
 113 }
 114
 115 void o(unsigned int c)
 116 {
 117     while (c) {
 118         g(c);
 119         c = c >> 8;
 120     }
 121 }
 122
 123 void gen_le32(int c)
 124 {
 125     g(c);
 126     g(c >> 8);
 127     g(c >> 16);
 128     g(c >> 24);
 129 }
 130
 131 void gen_le64(int64_t c)
 132 {
 133     g(c);
 134     g(c >> 8);
 135     g(c >> 16);
 136     g(c >> 24);
 137     g(c >> 32);
 138     g(c >> 40);
 139     g(c >> 48);
 140     g(c >> 56);
 141 }
 142
 143 /* output a symbol and patch all calls to it */
 144 void gsym_addr(int t, int a)
 145 {
 146     int n, *ptr;
 147     while (t) {
 148         ptr = (int *)(cur_text_section->data + t);
 149         n = *ptr; /* next value */
 150         *ptr = a - t - 4;
 151         t = n;
 152     }
 153 }
 154
 155 void gsym(int t)
 156 {
 157     gsym_addr(t, ind);
 158 }
 159
 160 /* psym is used to put an instruction with a data field which is a
 161    reference to a symbol. It is in fact the same as oad ! */
 162 #define psym oad
 163
 164 static int is64_type(int t)
 165 {
 166     return ((t & VT_BTYPE) == VT_PTR ||
 167             (t & VT_BTYPE) == VT_FUNC ||
 168             (t & VT_BTYPE) == VT_LLONG);
 169 }
 170
 171 static int is_sse_float(int t) {
 172     int bt;
 173     bt = t & VT_BTYPE;
 174     return bt == VT_DOUBLE || bt == VT_FLOAT;
 175 }
 176
 177 /* instruction + 4 bytes data. Return the address of the data */
 178 static int oad(int c, int s)
 179 {
 180     int ind1;
 181
 182     o(c);
 183     ind1 = ind + 4;
 184     if (ind1 > cur_text_section->data_allocated)
 185         section_realloc(cur_text_section, ind1);
 186     *(int *)(cur_text_section->data + ind) = s;
 187     s = ind;
 188     ind = ind1;
 189     return s;
 190 }
 191
 192 /* output constant with relocation if 'r & VT_SYM' is true */
 193 static void gen_addr64(int r, Sym *sym, int64_t c)
 194 {
 195     if (r & VT_SYM)
 196         greloc(cur_text_section, sym, ind, R_X86_64_64);
 197     gen_le64(c);
 198 }
 199
 200 /* output constant with relocation if 'r & VT_SYM' is true */
 201 static void gen_addr32(int r, Sym *sym, int c)
 202 {
 203     if (r & VT_SYM)
 204         greloc(cur_text_section, sym, ind, R_X86_64_32);
 205     gen_le32(c);
 206 }
 207
 208 /* output constant with relocation if 'r & VT_SYM' is true */
 209 static void gen_addrpc32(int r, Sym *sym, int c)
 210 {
 211     if (r & VT_SYM)
 212         greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 213     gen_le32(c-4);
 214 }
 215
 216 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 217    opcode bits */
 218 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
 219 {
 220     op_reg = op_reg << 3;
 221     if ((r & VT_VALMASK) == VT_CONST) {
 222         /* constant memory reference */
 223         o(0x05 | op_reg);
 224         gen_addrpc32(r, sym, c);
 225     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 226         /* currently, we use only ebp as base */
 227         if (c == (char)c) {
 228             /* short reference */
 229             o(0x45 | op_reg);
 230             g(c);
 231         } else {
 232             oad(0x85 | op_reg, c);
 233         }
 234     } else {
 235         g(0x00 | op_reg | (r & VT_VALMASK));
 236     }
 237 }
 238
 239 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 240    opcode bits */
 241 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
 242 {
 243     int rex = 0x48 | (REX_BASE(op_reg) << 2);
 244     if ((r & VT_VALMASK) != VT_CONST &&
 245         (r & VT_VALMASK) != VT_LOCAL) {
 246         rex |= REX_BASE(VT_VALMASK & r);
 247     }
 248     o(rex);
 249     o(opcode);
 250     op_reg = REG_VALUE(op_reg) << 3;
 251     if ((r & VT_VALMASK) == VT_CONST) {
 252         /* constant memory reference */
 253         o(0x05 | op_reg);
 254         gen_addrpc32(r, sym, c);
 255     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 256         /* currently, we use only ebp as base */
 257         if (c == (char)c) {
 258             /* short reference */
 259             o(0x45 | op_reg);
 260             g(c);
 261         } else {
 262             oad(0x85 | op_reg, c);
 263         }
 264     } else {
 265         g(0x00 | op_reg | (r & VT_VALMASK));
 266     }
 267 }
 268
 269
 270 /* load 'r' from value 'sv' */
 271 void load(int r, SValue *sv)
 272 {
 273     int v, t, ft, fc, fr;
 274     SValue v1;
 275
 276     fr = sv->r;
 277     ft = sv->type.t;
 278     fc = sv->c.ul;
 279
 280     v = fr & VT_VALMASK;
 281     if (fr & VT_LVAL) {
 282         if (v == VT_LLOCAL) {
 283             v1.type.t = VT_PTR;
 284             v1.r = VT_LOCAL | VT_LVAL;
 285             v1.c.ul = fc;
 286             load(r, &v1);
 287             fr = r;
 288         }
 289         if ((ft & VT_BTYPE) == VT_FLOAT) {
 290             o(0x6e0f66); /* movd */
 291             r = 0;
 292         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
 293             o(0x7e0ff3); /* movq */
 294             r = 0;
 295         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 296             o(0xdb); /* fldt */
 297             r = 5;
 298         } else if ((ft & VT_TYPE) == VT_BYTE) {
 299             o(0xbe0f);   /* movsbl */
 300         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
 301             o(0xb60f);   /* movzbl */
 302         } else if ((ft & VT_TYPE) == VT_SHORT) {
 303             o(0xbf0f);   /* movswl */
 304         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
 305             o(0xb70f);   /* movzwl */
 306         } else if (is64_type(ft)) {
 307             gen_modrm64(0x8b, r, fr, sv->sym, fc);
 308             return;
 309         } else {
 310             o(0x8b);   /* movl */
 311         }
 312         gen_modrm(r, fr, sv->sym, fc);
 313     } else {
 314         if (v == VT_CONST) {
 315             if ((ft & VT_BTYPE) == VT_LLONG) {
 316                 o(0x48);
 317                 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 318                 gen_addr64(fr, sv->sym, sv->c.ull);
 319             } else {
 320                 o(0xc748);
 321                 o(0xc0 + REG_VALUE(r)); /* mov $xx, r */
 322                 gen_addr32(fr, sv->sym, fc);
 323             }
 324         } else if (v == VT_LOCAL) {
 325             o(0x48 | REX_BASE(r));
 326             o(0x8d); /* lea xxx(%ebp), r */
 327             gen_modrm(r, VT_LOCAL, sv->sym, fc);
 328         } else if (v == VT_CMP) {
 329             oad(0xb8 + r, 0); /* mov $0, r */
 330             o(0x0f); /* setxx %br */
 331             o(fc);
 332             o(0xc0 + r);
 333         } else if (v == VT_JMP || v == VT_JMPI) {
 334             t = v & 1;
 335             oad(0xb8 + r, t); /* mov $1, r */
 336             o(0x05eb); /* jmp after */
 337             gsym(fc);
 338             oad(0xb8 + r, t ^ 1); /* mov $0, r */
 339         } else if (v != r) {
 340             if (r == TREG_XMM0) {
 341                 assert(v == TREG_ST0);
 342                 /* gen_cvt_ftof(VT_DOUBLE); */
 343                 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
 344                 /* movsd -0x10(%rsp),%xmm0 */
 345                 o(0x44100ff2);
 346                 o(0xf024);
 347             } else if (r == TREG_ST0) {
 348                 assert(v == TREG_XMM0);
 349                 /* gen_cvt_ftof(VT_LDOUBLE); */
 350                 /* movsd %xmm0,-0x10(%rsp) */
 351                 o(0x44110ff2);
 352                 o(0xf024);
 353                 o(0xf02444dd); /* fldl -0x10(%rsp) */
 354             } else {
 355                 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
 356                 o(0x89);
 357                 o(0xc0 + r + v * 8); /* mov v, r */
 358             }
 359         }
 360     }
 361 }
 362
 363 /* store register 'r' in lvalue 'v' */
 364 void store(int r, SValue *v)
 365 {
 366     int fr, bt, ft, fc;
 367     int op64 = 0;
 368
 369     ft = v->type.t;
 370     fc = v->c.ul;
 371     fr = v->r & VT_VALMASK;
 372     bt = ft & VT_BTYPE;
 373     /* XXX: incorrect if float reg to reg */
 374     if (bt == VT_FLOAT) {
 375         o(0x7e0f66); /* movd */
 376         r = 0;
 377     } else if (bt == VT_DOUBLE) {
 378         o(0xd60f66); /* movq */
 379         r = 0;
 380     } else if (bt == VT_LDOUBLE) {
 381         o(0xc0d9); /* fld %st(0) */
 382         o(0xdb); /* fstpt */
 383         r = 7;
 384     } else {
 385         if (bt == VT_SHORT)
 386             o(0x66);
 387         if (bt == VT_BYTE || bt == VT_BOOL)
 388             o(0x88);
 389         else if (is64_type(bt))
 390             op64 = 0x89;
 391         else
 392             o(0x89);
 393     }
 394     if (op64) {
 395         if (fr == VT_CONST ||
 396             fr == VT_LOCAL ||
 397             (v->r & VT_LVAL)) {
 398             gen_modrm64(op64, r, v->r, v->sym, fc);
 399         } else if (fr != r) {
 400             /* XXX: don't we really come here? */
 401             abort();
 402             o(0xc0 + fr + r * 8); /* mov r, fr */
 403         }
 404     } else {
 405         if (fr == VT_CONST ||
 406             fr == VT_LOCAL ||
 407             (v->r & VT_LVAL)) {
 408             gen_modrm(r, v->r, v->sym, fc);
 409         } else if (fr != r) {
 410             /* XXX: don't we really come here? */
 411             abort();
 412             o(0xc0 + fr + r * 8); /* mov r, fr */
 413         }
 414     }
 415 }
 416
 417 static void gadd_sp(int val)
 418 {
 419     if (val == (char)val) {
 420         o(0xc48348);
 421         g(val);
 422     } else {
 423         oad(0xc48148, val); /* add $xxx, %rsp */
 424     }
 425 }
 426
 427 /* 'is_jmp' is '1' if it is a jump */
 428 static void gcall_or_jmp(int is_jmp)
 429 {
 430     int r;
 431     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 432         /* constant case */
 433         if (vtop->r & VT_SYM) {
 434             /* relocation case */
 435             greloc(cur_text_section, vtop->sym,
 436                    ind + 1, R_X86_64_PC32);
 437         } else {
 438             /* put an empty PC32 relocation */
 439             put_elf_reloc(symtab_section, cur_text_section,
 440                           ind + 1, R_X86_64_PC32, 0);
 441         }
 442         oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
 443     } else {
 444         /* otherwise, indirect call */
 445         r = TREG_R11;
 446         load(r, vtop);
 447         o(0x41); /* REX */
 448         o(0xff); /* call/jmp *r */
 449         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
 450     }
 451 }
 452
 453 static uint8_t arg_regs[6] = {
 454     TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
 455 };
 456 /* Generate function call. The function address is pushed first, then
 457    all the parameters in call order. This functions pops all the
 458    parameters and the function address. */
 459 void gfunc_call(int nb_args)
 460 {
 461     int size, align, r, args_size, i, func_call;
 462     Sym *func_sym;
 463     SValue *orig_vtop;
 464     int nb_reg_args = 0;
 465     int nb_sse_args = 0;
 466     int sse_reg, gen_reg;
 467
 468     /* calculate the number of integer/float arguments */
 469     args_size = 0;
 470     for(i = 0; i < nb_args; i++) {
 471         if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
 472             args_size += type_size(&vtop->type, &align);
 473         } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
 474             args_size += 16;
 475         } else if (is_sse_float(vtop[-i].type.t)) {
 476             nb_sse_args++;
 477             if (nb_sse_args > 8) args_size += 8;
 478         } else {
 479             nb_reg_args++;
 480             if (nb_reg_args > 6) args_size += 8;
 481         }
 482     }
 483
 484     /* for struct arguments, we need to call memcpy and the function
 485        call breaks register passing arguments we are preparing.
 486        So, we process arguments which will be passed by stack first. */
 487     orig_vtop = vtop;
 488     gen_reg = nb_reg_args;
 489     sse_reg = nb_sse_args;
 490     /* adjust stack to align SSE boundary */
 491     if (args_size &= 8) {
 492         o(0x50); /* push $rax */
 493     }
 494     for(i = 0; i < nb_args; i++) {
 495         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 496             size = type_size(&vtop->type, &align);
 497             /* align to stack align size */
 498             size = (size + 3) & ~3;
 499             /* allocate the necessary size on stack */
 500             o(0x48);
 501             oad(0xec81, size); /* sub $xxx, %rsp */
 502             /* generate structure store */
 503             r = get_reg(RC_INT);
 504             o(0x48 + REX_BASE(r));
 505             o(0x89); /* mov %rsp, r */
 506             o(0xe0 + r);
 507             {
 508                 /* following code breaks vtop[1] */
 509                 SValue tmp = vtop[1];
 510                 vset(&vtop->type, r | VT_LVAL, 0);
 511                 vswap();
 512                 vstore();
 513                 vtop[1] = tmp;
 514             }
 515             args_size += size;
 516         } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 517             gv(RC_ST0);
 518             size = LDOUBLE_SIZE;
 519             oad(0xec8148, size); /* sub $xxx, %rsp */
 520             o(0x7cdb); /* fstpt 0(%rsp) */
 521             g(0x24);
 522             g(0x00);
 523             args_size += size;
 524         } else if (is_sse_float(vtop->type.t)) {
 525             int j = --sse_reg;
 526             if (j >= 8) {
 527                 gv(RC_FLOAT);
 528                 o(0x50); /* push $rax */
 529                 /* movq %xmm0, (%rsp) */
 530                 o(0x04d60f66);
 531                 o(0x24);
 532                 args_size += 8;
 533             }
 534         } else {
 535             int j = --gen_reg;
 536             /* simple type */
 537             /* XXX: implicit cast ? */
 538             if (j >= 6) {
 539                 r = gv(RC_INT);
 540                 o(0x50 + r); /* push r */
 541                 args_size += 8;
 542             }
 543         }
 544         vtop--;
 545     }
 546     vtop = orig_vtop;
 547
 548     /* then, we prepare register passing arguments.
 549        Note that we cannot set RDX and RCX in this loop because gv()
 550        may break these temporary registers. Let's use R10 and R11
 551        instead of them */
 552     gen_reg = nb_reg_args;
 553     sse_reg = nb_sse_args;
 554     for(i = 0; i < nb_args; i++) {
 555         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
 556             (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 557         } else if (is_sse_float(vtop->type.t)) {
 558             int j = --sse_reg;
 559             if (j < 8) {
 560                 gv(RC_FLOAT); /* only one float register */
 561                 /* movaps %xmm0, %xmmN */
 562                 o(0x280f);
 563                 o(0xc0 + (sse_reg << 3));
 564             }
 565         } else {
 566             int j = --gen_reg;
 567             /* simple type */
 568             /* XXX: implicit cast ? */
 569             if (j < 6) {
 570                 r = gv(RC_INT);
 571                 if (j < 2) {
 572                     o(0x8948); /* mov */
 573                     o(0xc0 + r * 8 + arg_regs[j]);
 574                 } else if (j < 4) {
 575                     o(0x8949); /* mov */
 576                     /* j=2: r10, j=3: r11 */
 577                     o(0xc0 + r * 8 + j);
 578                 } else {
 579                     o(0x8949); /* mov */
 580                     /* j=4: r8, j=5: r9 */
 581                     o(0xc0 + r * 8 + j - 4);
 582                 }
 583             }
 584         }
 585         vtop--;
 586     }
 587
 588     /* Copy R10 and R11 into RDX and RCX, respectively */
 589     if (nb_reg_args > 2) {
 590         o(0xd2894c); /* mov %r10, %rdx */
 591         if (nb_reg_args > 3) {
 592             o(0xd9894c); /* mov %r11, %rcx */
 593         }
 594     }
 595
 596     save_regs(0); /* save used temporary registers */
 597
 598     func_sym = vtop->type.ref;
 599     func_call = FUNC_CALL(func_sym->r);
 600     oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
 601     gcall_or_jmp(0);
 602     if (args_size)
 603         gadd_sp(args_size);
 604     vtop--;
 605 }
 606
 607 #ifdef TCC_TARGET_PE
 608 /* XXX: support PE? */
 609 #warning "PE isn't tested at all"
 610 #define FUNC_PROLOG_SIZE 12
 611 #else
 612 #define FUNC_PROLOG_SIZE 11
 613 #endif
 614
 615 static void push_arg_reg(int i) {
 616     loc -= 8;
 617     gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
 618 }
 619
 620 /* generate function prolog of type 't' */
 621 void gfunc_prolog(CType *func_type)
 622 {
 623     int i, addr, align, size, func_call;
 624     int param_index, param_addr, reg_param_index, sse_param_index;
 625     Sym *sym;
 626     CType *type;
 627
 628     func_ret_sub = 0;
 629
 630     sym = func_type->ref;
 631     func_call = FUNC_CALL(sym->r);
 632     addr = PTR_SIZE * 2;
 633     loc = 0;
 634     ind += FUNC_PROLOG_SIZE;
 635     func_sub_sp_offset = ind;
 636
 637     if (func_type->ref->c == FUNC_ELLIPSIS) {
 638         int seen_reg_num, seen_sse_num, seen_stack_size;
 639         seen_reg_num = seen_sse_num = 0;
 640         /* frame pointer and return address */
 641         seen_stack_size = PTR_SIZE * 2;
 642         /* count the number of seen parameters */
 643         sym = func_type->ref;
 644         while ((sym = sym->next) != NULL) {
 645             type = &sym->type;
 646             if (is_sse_float(type->t)) {
 647                 if (seen_sse_num < 8) {
 648                     seen_sse_num++;
 649                 } else {
 650                     seen_stack_size += 8;
 651                 }
 652             } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
 653                 size = type_size(type, &align);
 654                 size = (size + 3) & ~3;
 655                 seen_stack_size += size;
 656             } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
 657                 seen_stack_size += LDOUBLE_SIZE;
 658             } else {
 659                 if (seen_reg_num < 6) {
 660                     seen_reg_num++;
 661                 } else {
 662                     seen_stack_size += 8;
 663                 }
 664             }
 665         }
 666
 667         loc -= 16;
 668         /* movl $0x????????, -0x10(%rbp) */
 669         o(0xf045c7);
 670         gen_le32(seen_reg_num * 8);
 671         /* movl $0x????????, -0xc(%rbp) */
 672         o(0xf445c7);
 673         gen_le32(seen_sse_num * 16 + 48);
 674         /* movl $0x????????, -0x8(%rbp) */
 675         o(0xf845c7);
 676         gen_le32(seen_stack_size);
 677
 678         /* save all register passing arguments */
 679         for (i = 0; i < 8; i++) {
 680             loc -= 16;
 681             o(0xd60f66); /* movq */
 682             gen_modrm(7 - i, VT_LOCAL, NULL, loc);
 683             /* movq $0, loc+8(%rbp) */
 684             o(0x85c748);
 685             gen_le32(loc + 8);
 686             gen_le32(0);
 687         }
 688         for (i = 0; i < 6; i++) {
 689             push_arg_reg(5 - i);
 690         }
 691     }
 692
 693     sym = func_type->ref;
 694     param_index = 0;
 695     reg_param_index = 0;
 696     sse_param_index = 0;
 697
 698     /* if the function returns a structure, then add an
 699        implicit pointer parameter */
 700     func_vt = sym->type;
 701     if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
 702         push_arg_reg(reg_param_index);
 703         param_addr = loc;
 704
 705         func_vc = loc;
 706         param_index++;
 707         reg_param_index++;
 708     }
 709     /* define parameters */
 710     while ((sym = sym->next) != NULL) {
 711         type = &sym->type;
 712         size = type_size(type, &align);
 713         size = (size + 3) & ~3;
 714         if (is_sse_float(type->t)) {
 715             if (sse_param_index < 8) {
 716                 /* save arguments passed by register */
 717                 loc -= 8;
 718                 o(0xd60f66); /* movq */
 719                 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
 720                 param_addr = loc;
 721             } else {
 722                 param_addr = addr;
 723                 addr += size;
 724             }
 725             sse_param_index++;
 726         } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
 727                    (type->t & VT_BTYPE) == VT_LDOUBLE) {
 728             param_addr = addr;
 729             addr += size;
 730         } else {
 731             if (reg_param_index < 6) {
 732                 /* save arguments passed by register */
 733                 push_arg_reg(reg_param_index);
 734                 param_addr = loc;
 735             } else {
 736                 param_addr = addr;
 737                 addr += 8;
 738             }
 739             reg_param_index++;
 740         }
 741         sym_push(sym->v & ~SYM_FIELD, type,
 742                  VT_LOCAL | VT_LVAL, param_addr);
 743         param_index++;
 744     }
 745 }
 746
 747 /* generate function epilog */
 748 void gfunc_epilog(void)
 749 {
 750     int v, saved_ind;
 751
 752     o(0xc9); /* leave */
 753     if (func_ret_sub == 0) {
 754         o(0xc3); /* ret */
 755     } else {
 756         o(0xc2); /* ret n */
 757         g(func_ret_sub);
 758         g(func_ret_sub >> 8);
 759     }
 760     /* align local size to word & save local variables */
 761     v = (-loc + 15) & -16;
 762     saved_ind = ind;
 763     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
 764 #ifdef TCC_TARGET_PE
 765     if (v >= 4096) {
 766         Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
 767         oad(0xb8, v); /* mov stacksize, %eax */
 768         oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
 769         greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
 770     } else
 771 #endif
 772     {
 773         o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
 774         o(0xec8148);  /* sub rsp, stacksize */
 775         gen_le32(v);
 776 #if FUNC_PROLOG_SIZE == 12
 777         o(0x90);  /* adjust to FUNC_PROLOG_SIZE */
 778 #endif
 779     }
 780     ind = saved_ind;
 781 }
 782
 783 /* generate a jump to a label */
 784 int gjmp(int t)
 785 {
 786     return psym(0xe9, t);
 787 }
 788
 789 /* generate a jump to a fixed address */
 790 void gjmp_addr(int a)
 791 {
 792     int r;
 793     r = a - ind - 2;
 794     if (r == (char)r) {
 795         g(0xeb);
 796         g(r);
 797     } else {
 798         oad(0xe9, a - ind - 5);
 799     }
 800 }
 801
 802 /* generate a test. set 'inv' to invert test. Stack entry is popped */
 803 int gtst(int inv, int t)
 804 {
 805     int v, *p;
 806
 807     v = vtop->r & VT_VALMASK;
 808     if (v == VT_CMP) {
 809         /* fast case : can jump directly since flags are set */
 810         g(0x0f);
 811         t = psym((vtop->c.i - 16) ^ inv, t);
 812     } else if (v == VT_JMP || v == VT_JMPI) {
 813         /* && or || optimization */
 814         if ((v & 1) == inv) {
 815             /* insert vtop->c jump list in t */
 816             p = &vtop->c.i;
 817             while (*p != 0)
 818                 p = (int *)(cur_text_section->data + *p);
 819             *p = t;
 820             t = vtop->c.i;
 821         } else {
 822             t = gjmp(t);
 823             gsym(vtop->c.i);
 824         }
 825     } else {
 826         /* XXX: not tested */
 827         if (is_float(vtop->type.t) ||
 828             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
 829             vpushi(0);
 830             gen_op(TOK_NE);
 831         }
 832         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 833             /* constant jmp optimization */
 834             if ((vtop->c.i != 0) != inv)
 835                 t = gjmp(t);
 836         } else {
 837             v = gv(RC_INT);
 838             o(0x85);
 839             o(0xc0 + v * 9);
 840             g(0x0f);
 841             t = psym(0x85 ^ inv, t);
 842         }
 843     }
 844     vtop--;
 845     return t;
 846 }
 847
 848 /* generate an integer binary operation */
 849 void gen_opi(int op)
 850 {
 851     int r, fr, opc, c;
 852
 853     switch(op) {
 854     case '+':
 855     case TOK_ADDC1: /* add with carry generation */
 856         opc = 0;
 857     gen_op8:
 858         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
 859             !is64_type(vtop->type.t)) {
 860             /* constant case */
 861             vswap();
 862             r = gv(RC_INT);
 863             if (is64_type(vtop->type.t)) {
 864                 o(0x48 | REX_BASE(r));
 865             }
 866             vswap();
 867             c = vtop->c.i;
 868             if (c == (char)c) {
 869                 /* XXX: generate inc and dec for smaller code ? */
 870                 o(0x83);
 871                 o(0xc0 | (opc << 3) | REG_VALUE(r));
 872                 g(c);
 873             } else {
 874                 o(0x81);
 875                 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
 876             }
 877         } else {
 878             gv2(RC_INT, RC_INT);
 879             r = vtop[-1].r;
 880             fr = vtop[0].r;
 881             if (opc != 7 ||
 882                 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
 883                 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
 884                 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
 885             }
 886             o((opc << 3) | 0x01);
 887             o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
 888         }
 889         vtop--;
 890         if (op >= TOK_ULT && op <= TOK_GT) {
 891             vtop->r = VT_CMP;
 892             vtop->c.i = op;
 893         }
 894         break;
 895     case '-':
 896     case TOK_SUBC1: /* sub with carry generation */
 897         opc = 5;
 898         goto gen_op8;
 899     case TOK_ADDC2: /* add with carry use */
 900         opc = 2;
 901         goto gen_op8;
 902     case TOK_SUBC2: /* sub with carry use */
 903         opc = 3;
 904         goto gen_op8;
 905     case '&':
 906         opc = 4;
 907         goto gen_op8;
 908     case '^':
 909         opc = 6;
 910         goto gen_op8;
 911     case '|':
 912         opc = 1;
 913         goto gen_op8;
 914     case '*':
 915         gv2(RC_INT, RC_INT);
 916         r = vtop[-1].r;
 917         fr = vtop[0].r;
 918         if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
 919             is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
 920             o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
 921         }
 922         vtop--;
 923         o(0xaf0f); /* imul fr, r */
 924         o(0xc0 + fr + r * 8);
 925         break;
 926     case TOK_SHL:
 927         opc = 4;
 928         goto gen_shift;
 929     case TOK_SHR:
 930         opc = 5;
 931         goto gen_shift;
 932     case TOK_SAR:
 933         opc = 7;
 934     gen_shift:
 935         opc = 0xc0 | (opc << 3);
 936         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 937             /* constant case */
 938             vswap();
 939             r = gv(RC_INT);
 940             if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
 941                 o(0x48 | REX_BASE(r));
 942                 c = 0x3f;
 943             } else {
 944                 c = 0x1f;
 945             }
 946             vswap();
 947             c &= vtop->c.i;
 948             o(0xc1); /* shl/shr/sar $xxx, r */
 949             o(opc | r);
 950             g(c);
 951         } else {
 952             /* we generate the shift in ecx */
 953             gv2(RC_INT, RC_RCX);
 954             r = vtop[-1].r;
 955             if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
 956                 o(0x48 | REX_BASE(r));
 957             }
 958             o(0xd3); /* shl/shr/sar %cl, r */
 959             o(opc | r);
 960         }
 961         vtop--;
 962         break;
 963     case '/':
 964     case TOK_UDIV:
 965     case TOK_PDIV:
 966     case '%':
 967     case TOK_UMOD:
 968     case TOK_UMULL:
 969         /* first operand must be in eax */
 970         /* XXX: need better constraint for second operand */
 971         gv2(RC_RAX, RC_RCX);
 972         r = vtop[-1].r;
 973         fr = vtop[0].r;
 974         vtop--;
 975         save_reg(TREG_RDX);
 976         if (op == TOK_UMULL) {
 977             o(0xf7); /* mul fr */
 978             o(0xe0 + fr);
 979             vtop->r2 = TREG_RDX;
 980             r = TREG_RAX;
 981         } else {
 982             if (op == TOK_UDIV || op == TOK_UMOD) {
 983                 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
 984                 o(0xf0 + fr);
 985             } else {
 986                 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
 987                     o(0x9948); /* cqto */
 988                     o(0x48 + REX_BASE(fr));
 989                 } else {
 990                     o(0x99); /* cltd */
 991                 }
 992                 o(0xf7); /* idiv fr, %eax */
 993                 o(0xf8 + fr);
 994             }
 995             if (op == '%' || op == TOK_UMOD)
 996                 r = TREG_RDX;
 997             else
 998                 r = TREG_RAX;
 999         }
1000         vtop->r = r;
1001         break;
1002     default:
1003         opc = 7;
1004         goto gen_op8;
1005     }
1006 }
1007
1008 void gen_opl(int op)
1009 {
1010     gen_opi(op);
1011 }
1012
1013 /* generate a floating point operation 'v = t1 op t2' instruction. The
1014    two operands are guaranted to have the same floating point type */
1015 /* XXX: need to use ST1 too */
1016 void gen_opf(int op)
1017 {
1018     int a, ft, fc, swapped, r;
1019     int float_type =
1020         (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1021
1022     /* convert constants to memory references */
1023     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1024         vswap();
1025         gv(float_type);
1026         vswap();
1027     }
1028     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1029         gv(float_type);
1030
1031     /* must put at least one value in the floating point register */
1032     if ((vtop[-1].r & VT_LVAL) &&
1033         (vtop[0].r & VT_LVAL)) {
1034         vswap();
1035         gv(float_type);
1036         vswap();
1037     }
1038     swapped = 0;
1039     /* swap the stack if needed so that t1 is the register and t2 is
1040        the memory reference */
1041     if (vtop[-1].r & VT_LVAL) {
1042         vswap();
1043         swapped = 1;
1044     }
1045     if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1046         if (op >= TOK_ULT && op <= TOK_GT) {
1047             /* load on stack second operand */
1048             load(TREG_ST0, vtop);
1049             save_reg(TREG_RAX); /* eax is used by FP comparison code */
1050             if (op == TOK_GE || op == TOK_GT)
1051                 swapped = !swapped;
1052             else if (op == TOK_EQ || op == TOK_NE)
1053                 swapped = 0;
1054             if (swapped)
1055                 o(0xc9d9); /* fxch %st(1) */
1056             o(0xe9da); /* fucompp */
1057             o(0xe0df); /* fnstsw %ax */
1058             if (op == TOK_EQ) {
1059                 o(0x45e480); /* and $0x45, %ah */
1060                 o(0x40fC80); /* cmp $0x40, %ah */
1061             } else if (op == TOK_NE) {
1062                 o(0x45e480); /* and $0x45, %ah */
1063                 o(0x40f480); /* xor $0x40, %ah */
1064                 op = TOK_NE;
1065             } else if (op == TOK_GE || op == TOK_LE) {
1066                 o(0x05c4f6); /* test $0x05, %ah */
1067                 op = TOK_EQ;
1068             } else {
1069                 o(0x45c4f6); /* test $0x45, %ah */
1070                 op = TOK_EQ;
1071             }
1072             vtop--;
1073             vtop->r = VT_CMP;
1074             vtop->c.i = op;
1075         } else {
1076             /* no memory reference possible for long double operations */
1077             load(TREG_ST0, vtop);
1078             swapped = !swapped;
1079
1080             switch(op) {
1081             default:
1082             case '+':
1083                 a = 0;
1084                 break;
1085             case '-':
1086                 a = 4;
1087                 if (swapped)
1088                     a++;
1089                 break;
1090             case '*':
1091                 a = 1;
1092                 break;
1093             case '/':
1094                 a = 6;
1095                 if (swapped)
1096                     a++;
1097                 break;
1098             }
1099             ft = vtop->type.t;
1100             fc = vtop->c.ul;
1101             o(0xde); /* fxxxp %st, %st(1) */
1102             o(0xc1 + (a << 3));
1103             vtop--;
1104         }
1105     } else {
1106         if (op >= TOK_ULT && op <= TOK_GT) {
1107             /* if saved lvalue, then we must reload it */
1108             r = vtop->r;
1109             fc = vtop->c.ul;
1110             if ((r & VT_VALMASK) == VT_LLOCAL) {
1111                 SValue v1;
1112                 r = get_reg(RC_INT);
1113                 v1.type.t = VT_INT;
1114                 v1.r = VT_LOCAL | VT_LVAL;
1115                 v1.c.ul = fc;
1116                 load(r, &v1);
1117                 fc = 0;
1118             }
1119
1120             if (op == TOK_EQ || op == TOK_NE) {
1121                 swapped = 0;
1122             } else {
1123                 if (op == TOK_LE || op == TOK_LT)
1124                     swapped = !swapped;
1125                 if (op == TOK_LE || op == TOK_GE) {
1126                     op = 0x93; /* setae */
1127                 } else {
1128                     op = 0x97; /* seta */
1129                 }
1130             }
1131
1132             if (swapped) {
1133                 o(0x7e0ff3); /* movq */
1134                 gen_modrm(1, r, vtop->sym, fc);
1135
1136                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1137                     o(0x66);
1138                 }
1139                 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1140                 o(0xc8);
1141             } else {
1142                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1143                     o(0x66);
1144                 }
1145                 o(0x2e0f); /* ucomisd */
1146                 gen_modrm(0, r, vtop->sym, fc);
1147             }
1148
1149             vtop--;
1150             vtop->r = VT_CMP;
1151             vtop->c.i = op;
1152         } else {
1153             /* no memory reference possible for long double operations */
1154             if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1155                 load(TREG_XMM0, vtop);
1156                 swapped = !swapped;
1157             }
1158             switch(op) {
1159             default:
1160             case '+':
1161                 a = 0;
1162                 break;
1163             case '-':
1164                 a = 4;
1165                 break;
1166             case '*':
1167                 a = 1;
1168                 break;
1169             case '/':
1170                 a = 6;
1171                 break;
1172             }
1173             ft = vtop->type.t;
1174             fc = vtop->c.ul;
1175             if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1176                 o(0xde); /* fxxxp %st, %st(1) */
1177                 o(0xc1 + (a << 3));
1178             } else {
1179                 /* if saved lvalue, then we must reload it */
1180                 r = vtop->r;
1181                 if ((r & VT_VALMASK) == VT_LLOCAL) {
1182                     SValue v1;
1183                     r = get_reg(RC_INT);
1184                     v1.type.t = VT_INT;
1185                     v1.r = VT_LOCAL | VT_LVAL;
1186                     v1.c.ul = fc;
1187                     load(r, &v1);
1188                     fc = 0;
1189                 }
1190                 if (swapped) {
1191                     /* movq %xmm0,%xmm1 */
1192                     o(0x7e0ff3);
1193                     o(0xc8);
1194                     load(TREG_XMM0, vtop);
1195                     /* subsd  %xmm1,%xmm0 (f2 0f 5c c1) */
1196                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1197                         o(0xf2);
1198                     } else {
1199                         o(0xf3);
1200                     }
1201                     o(0x0f);
1202                     o(0x58 + a);
1203                     o(0xc1);
1204                 } else {
1205                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1206                         o(0xf2);
1207                     } else {
1208                         o(0xf3);
1209                     }
1210                     o(0x0f);
1211                     o(0x58 + a);
1212                     gen_modrm(0, r, vtop->sym, fc);
1213                 }
1214             }
1215             vtop--;
1216         }
1217     }
1218 }
1219
1220 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1221    and 'long long' cases. */
1222 void gen_cvt_itof(int t)
1223 {
1224     if ((t & VT_BTYPE) == VT_LDOUBLE) {
1225         save_reg(TREG_ST0);
1226         gv(RC_INT);
1227         if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1228             /* signed long long to float/double/long double (unsigned case
1229                is handled generically) */
1230             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1231             o(0x242cdf); /* fildll (%rsp) */
1232             o(0x08c48348); /* add $8, %rsp */
1233         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1234                    (VT_INT | VT_UNSIGNED)) {
1235             /* unsigned int to float/double/long double */
1236             o(0x6a); /* push $0 */
1237             g(0x00);
1238             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1239             o(0x242cdf); /* fildll (%rsp) */
1240             o(0x10c48348); /* add $16, %rsp */
1241         } else {
1242             /* int to float/double/long double */
1243             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1244             o(0x2404db); /* fildl (%rsp) */
1245             o(0x08c48348); /* add $8, %rsp */
1246         }
1247         vtop->r = TREG_ST0;
1248     } else {
1249         save_reg(TREG_XMM0);
1250         gv(RC_INT);
1251         o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1252         if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1253             (VT_INT | VT_UNSIGNED) ||
1254             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1255             o(0x48); /* REX */
1256         }
1257         o(0x2a0f);
1258         o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1259         vtop->r = TREG_XMM0;
1260     }
1261 }
1262
1263 /* convert from one floating point type to another */
1264 void gen_cvt_ftof(int t)
1265 {
1266     int ft, bt, tbt;
1267
1268     ft = vtop->type.t;
1269     bt = ft & VT_BTYPE;
1270     tbt = t & VT_BTYPE;
1271
1272     if (bt == VT_FLOAT) {
1273         gv(RC_FLOAT);
1274         if (tbt == VT_DOUBLE) {
1275             o(0xc0140f); /* unpcklps */
1276             o(0xc05a0f); /* cvtps2pd */
1277         } else if (tbt == VT_LDOUBLE) {
1278             /* movss %xmm0,-0x10(%rsp) */
1279             o(0x44110ff3);
1280             o(0xf024);
1281             o(0xf02444d9); /* flds -0x10(%rsp) */
1282             vtop->r = TREG_ST0;
1283         }
1284     } else if (bt == VT_DOUBLE) {
1285         gv(RC_FLOAT);
1286         if (tbt == VT_FLOAT) {
1287             o(0xc0140f66); /* unpcklpd */
1288             o(0xc05a0f66); /* cvtpd2ps */
1289         } else if (tbt == VT_LDOUBLE) {
1290             /* movsd %xmm0,-0x10(%rsp) */
1291             o(0x44110ff2);
1292             o(0xf024);
1293             o(0xf02444dd); /* fldl -0x10(%rsp) */
1294             vtop->r = TREG_ST0;
1295         }
1296     } else {
1297         gv(RC_ST0);
1298         if (tbt == VT_DOUBLE) {
1299             o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1300             /* movsd -0x10(%rsp),%xmm0 */
1301             o(0x44100ff2);
1302             o(0xf024);
1303             vtop->r = TREG_XMM0;
1304         } else if (tbt == VT_FLOAT) {
1305             o(0xf0245cd9); /* fstps -0x10(%rsp) */
1306             /* movss -0x10(%rsp),%xmm0 */
1307             o(0x44100ff3);
1308             o(0xf024);
1309             vtop->r = TREG_XMM0;
1310         }
1311     }
1312 }
1313
1314 /* convert fp to int 't' type */
1315 void gen_cvt_ftoi(int t)
1316 {
1317     int ft, bt, size, r;
1318     ft = vtop->type.t;
1319     bt = ft & VT_BTYPE;
1320     if (bt == VT_LDOUBLE) {
1321         gen_cvt_ftof(VT_DOUBLE);
1322         bt = VT_DOUBLE;
1323     }
1324
1325     gv(RC_FLOAT);
1326     if (t != VT_INT)
1327         size = 8;
1328     else
1329         size = 4;
1330
1331     r = get_reg(RC_INT);
1332     if (bt == VT_FLOAT) {
1333         o(0xf3);
1334     } else if (bt == VT_DOUBLE) {
1335         o(0xf2);
1336     } else {
1337         assert(0);
1338     }
1339     if (size == 8) {
1340         o(0x48 + REX_BASE(r));
1341     }
1342     o(0x2c0f); /* cvttss2si or cvttsd2si */
1343     o(0xc0 + (REG_VALUE(r) << 3));
1344     vtop->r = r;
1345 }
1346
1347 /* computed goto support */
1348 void ggoto(void)
1349 {
1350     gcall_or_jmp(1);
1351     vtop--;
1352 }
1353
1354 /* end of x86-64 code generator */
1355 /*************************************************************/