x86_64-gen.c

   1 /*
   2  *  x86-64 code generator for TCC
   3  *
   4  *  Copyright (c) 2008 Shinichiro Hamaji
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #include <assert.h>
  24
  25 /* number of available registers */
  26 #define NB_REGS             5
  27
  28 /* a register can belong to several classes. The classes must be
  29    sorted from more general to more precise (see gv2() code which does
  30    assumptions on it). */
  31 #define RC_INT     0x0001 /* generic integer register */
  32 #define RC_FLOAT   0x0002 /* generic float register */
  33 #define RC_RAX     0x0004
  34 #define RC_RCX     0x0008
  35 #define RC_RDX     0x0010
  36 #define RC_XMM0    0x0020
  37 #define RC_ST0     0x0040 /* only for long double */
  38 #define RC_IRET    RC_RAX /* function return: integer register */
  39 #define RC_LRET    RC_RDX /* function return: second integer register */
  40 #define RC_FRET    RC_XMM0 /* function return: float register */
  41
  42 /* pretty names for the registers */
  43 enum {
  44     TREG_RAX = 0,
  45     TREG_RCX = 1,
  46     TREG_RDX = 2,
  47     TREG_RSI = 6,
  48     TREG_RDI = 7,
  49     TREG_R8  = 8,
  50     TREG_R9  = 9,
  51     TREG_R10 = 10,
  52     TREG_R11 = 11,
  53
  54     TREG_XMM0 = 3,
  55     TREG_ST0 = 4,
  56 };
  57
  58 #define REX_BASE(reg) ((reg) >> 3)
  59 #define REG_VALUE(reg) ((reg) & 7)
  60
  61 int reg_classes[NB_REGS] = {
  62     /* eax */ RC_INT | RC_RAX,
  63     /* ecx */ RC_INT | RC_RCX,
  64     /* edx */ RC_INT | RC_RDX,
  65     /* xmm0 */ RC_FLOAT | RC_XMM0,
  66     /* st0 */ RC_ST0,
  67 };
  68
  69 /* return registers for function */
  70 #define REG_IRET TREG_RAX /* single word int return register */
  71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
  72 #define REG_FRET TREG_XMM0 /* float return register */
  73
  74 /* defined if function parameters must be evaluated in reverse order */
  75 #define INVERT_FUNC_PARAMS
  76
  77 /* pointer size, in bytes */
  78 #define PTR_SIZE 8
  79
  80 /* long double size and alignment, in bytes */
  81 #define LDOUBLE_SIZE  16
  82 #define LDOUBLE_ALIGN 8
  83 /* maximum alignment (for aligned attribute support) */
  84 #define MAX_ALIGN     8
  85
  86 /******************************************************/
  87 /* ELF defines */
  88
  89 #define EM_TCC_TARGET EM_X86_64
  90
  91 /* relocation type for 32 bit data relocation */
  92 #define R_DATA_32   R_X86_64_32
  93 #define R_JMP_SLOT  R_X86_64_JUMP_SLOT
  94 #define R_COPY      R_X86_64_COPY
  95
  96 #define ELF_START_ADDR 0x08048000
  97 #define ELF_PAGE_SIZE  0x1000
  98
  99 /******************************************************/
 100
 101 static unsigned long func_sub_sp_offset;
 102 static int func_ret_sub;
 103
 104 /* XXX: make it faster ? */
 105 void g(int c)
 106 {
 107     int ind1;
 108     ind1 = ind + 1;
 109     if (ind1 > cur_text_section->data_allocated)
 110         section_realloc(cur_text_section, ind1);
 111     cur_text_section->data[ind] = c;
 112     ind = ind1;
 113 }
 114
 115 void o(unsigned int c)
 116 {
 117     while (c) {
 118         g(c);
 119         c = c >> 8;
 120     }
 121 }
 122
 123 void gen_le32(int c)
 124 {
 125     g(c);
 126     g(c >> 8);
 127     g(c >> 16);
 128     g(c >> 24);
 129 }
 130
 131 void gen_le64(int64_t c)
 132 {
 133     g(c);
 134     g(c >> 8);
 135     g(c >> 16);
 136     g(c >> 24);
 137     g(c >> 32);
 138     g(c >> 40);
 139     g(c >> 48);
 140     g(c >> 56);
 141 }
 142
 143 /* output a symbol and patch all calls to it */
 144 void gsym_addr(int t, int a)
 145 {
 146     int n, *ptr;
 147     while (t) {
 148         ptr = (int *)(cur_text_section->data + t);
 149         n = *ptr; /* next value */
 150         *ptr = a - t - 4;
 151         t = n;
 152     }
 153 }
 154
 155 void gsym(int t)
 156 {
 157     gsym_addr(t, ind);
 158 }
 159
 160 /* psym is used to put an instruction with a data field which is a
 161    reference to a symbol. It is in fact the same as oad ! */
 162 #define psym oad
 163
 164 static int is64_type(int t)
 165 {
 166     return ((t & VT_BTYPE) == VT_PTR ||
 167             (t & VT_BTYPE) == VT_FUNC ||
 168             (t & VT_BTYPE) == VT_LLONG);
 169 }
 170
 171 static int is_sse_float(int t) {
 172     int bt;
 173     bt = t & VT_BTYPE;
 174     return bt == VT_DOUBLE || bt == VT_FLOAT;
 175 }
 176
 177 /* instruction + 4 bytes data. Return the address of the data */
 178 static int oad(int c, int s)
 179 {
 180     int ind1;
 181
 182     o(c);
 183     ind1 = ind + 4;
 184     if (ind1 > cur_text_section->data_allocated)
 185         section_realloc(cur_text_section, ind1);
 186     *(int *)(cur_text_section->data + ind) = s;
 187     s = ind;
 188     ind = ind1;
 189     return s;
 190 }
 191
 192 /* output constant with relocation if 'r & VT_SYM' is true */
 193 static void gen_addr64(int r, Sym *sym, int64_t c)
 194 {
 195     if (r & VT_SYM)
 196         greloc(cur_text_section, sym, ind, R_X86_64_64);
 197     gen_le64(c);
 198 }
 199
 200 /* output constant with relocation if 'r & VT_SYM' is true */
 201 static void gen_addrpc32(int r, Sym *sym, int c)
 202 {
 203     if (r & VT_SYM)
 204         greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 205     gen_le32(c-4);
 206 }
 207
 208 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 209    opcode bits */
 210 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
 211 {
 212     op_reg = op_reg << 3;
 213     if ((r & VT_VALMASK) == VT_CONST) {
 214         /* constant memory reference */
 215         o(0x05 | op_reg);
 216         gen_addrpc32(r, sym, c);
 217     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 218         /* currently, we use only ebp as base */
 219         if (c == (char)c) {
 220             /* short reference */
 221             o(0x45 | op_reg);
 222             g(c);
 223         } else {
 224             oad(0x85 | op_reg, c);
 225         }
 226     } else {
 227         g(0x00 | op_reg | (r & VT_VALMASK));
 228     }
 229 }
 230
 231 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 232    opcode bits */
 233 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
 234 {
 235     int rex = 0x48 | (REX_BASE(op_reg) << 2);
 236     if ((r & VT_VALMASK) != VT_CONST &&
 237         (r & VT_VALMASK) != VT_LOCAL) {
 238         rex |= REX_BASE(VT_VALMASK & r);
 239     }
 240     o(rex);
 241     o(opcode);
 242     op_reg = REG_VALUE(op_reg) << 3;
 243     if ((r & VT_VALMASK) == VT_CONST) {
 244         /* constant memory reference */
 245         o(0x05 | op_reg);
 246         gen_addrpc32(r, sym, c);
 247     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 248         /* currently, we use only ebp as base */
 249         if (c == (char)c) {
 250             /* short reference */
 251             o(0x45 | op_reg);
 252             g(c);
 253         } else {
 254             oad(0x85 | op_reg, c);
 255         }
 256     } else {
 257         g(0x00 | op_reg | (r & VT_VALMASK));
 258     }
 259 }
 260
 261
 262 /* load 'r' from value 'sv' */
 263 void load(int r, SValue *sv)
 264 {
 265     int v, t, ft, fc, fr;
 266     SValue v1;
 267
 268     fr = sv->r;
 269     ft = sv->type.t;
 270     fc = sv->c.ul;
 271
 272     v = fr & VT_VALMASK;
 273     if (fr & VT_LVAL) {
 274         if (v == VT_LLOCAL) {
 275             v1.type.t = VT_PTR;
 276             v1.r = VT_LOCAL | VT_LVAL;
 277             v1.c.ul = fc;
 278             load(r, &v1);
 279             fr = r;
 280         }
 281         if ((ft & VT_BTYPE) == VT_FLOAT) {
 282             o(0x6e0f66); /* movd */
 283             r = 0;
 284         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
 285             o(0x7e0ff3); /* movq */
 286             r = 0;
 287         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 288             o(0xdb); /* fldt */
 289             r = 5;
 290         } else if ((ft & VT_TYPE) == VT_BYTE) {
 291             o(0xbe0f);   /* movsbl */
 292         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
 293             o(0xb60f);   /* movzbl */
 294         } else if ((ft & VT_TYPE) == VT_SHORT) {
 295             o(0xbf0f);   /* movswl */
 296         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
 297             o(0xb70f);   /* movzwl */
 298         } else if (is64_type(ft)) {
 299             gen_modrm64(0x8b, r, fr, sv->sym, fc);
 300             return;
 301         } else {
 302             o(0x8b);   /* movl */
 303         }
 304         gen_modrm(r, fr, sv->sym, fc);
 305     } else {
 306         if (v == VT_CONST) {
 307             if ((ft & VT_BTYPE) == VT_LLONG) {
 308                 o(0x48);
 309                 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 310                 gen_addr64(fr, sv->sym, sv->c.ull);
 311             } else {
 312                 if (fr & VT_SYM) {
 313                     o(0x8d48);
 314                     o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 315                     gen_addrpc32(fr, sv->sym, fc);
 316                 } else {
 317                     o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
 318                     gen_le32(fc);
 319                 }
 320             }
 321         } else if (v == VT_LOCAL) {
 322             o(0x48 | REX_BASE(r));
 323             o(0x8d); /* lea xxx(%ebp), r */
 324             gen_modrm(r, VT_LOCAL, sv->sym, fc);
 325         } else if (v == VT_CMP) {
 326             oad(0xb8 + r, 0); /* mov $0, r */
 327             o(0x0f); /* setxx %br */
 328             o(fc);
 329             o(0xc0 + r);
 330         } else if (v == VT_JMP || v == VT_JMPI) {
 331             t = v & 1;
 332             oad(0xb8 + r, t); /* mov $1, r */
 333             o(0x05eb); /* jmp after */
 334             gsym(fc);
 335             oad(0xb8 + r, t ^ 1); /* mov $0, r */
 336         } else if (v != r) {
 337             if (r == TREG_XMM0) {
 338                 assert(v == TREG_ST0);
 339                 /* gen_cvt_ftof(VT_DOUBLE); */
 340                 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
 341                 /* movsd -0x10(%rsp),%xmm0 */
 342                 o(0x44100ff2);
 343                 o(0xf024);
 344             } else if (r == TREG_ST0) {
 345                 assert(v == TREG_XMM0);
 346                 /* gen_cvt_ftof(VT_LDOUBLE); */
 347                 /* movsd %xmm0,-0x10(%rsp) */
 348                 o(0x44110ff2);
 349                 o(0xf024);
 350                 o(0xf02444dd); /* fldl -0x10(%rsp) */
 351             } else {
 352                 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
 353                 o(0x89);
 354                 o(0xc0 + r + v * 8); /* mov v, r */
 355             }
 356         }
 357     }
 358 }
 359
 360 /* store register 'r' in lvalue 'v' */
 361 void store(int r, SValue *v)
 362 {
 363     int fr, bt, ft, fc;
 364     int op64 = 0;
 365
 366     ft = v->type.t;
 367     fc = v->c.ul;
 368     fr = v->r & VT_VALMASK;
 369     bt = ft & VT_BTYPE;
 370     /* XXX: incorrect if float reg to reg */
 371     if (bt == VT_FLOAT) {
 372         o(0x7e0f66); /* movd */
 373         r = 0;
 374     } else if (bt == VT_DOUBLE) {
 375         o(0xd60f66); /* movq */
 376         r = 0;
 377     } else if (bt == VT_LDOUBLE) {
 378         o(0xc0d9); /* fld %st(0) */
 379         o(0xdb); /* fstpt */
 380         r = 7;
 381     } else {
 382         if (bt == VT_SHORT)
 383             o(0x66);
 384         if (bt == VT_BYTE || bt == VT_BOOL)
 385             o(0x88);
 386         else if (is64_type(bt))
 387             op64 = 0x89;
 388         else
 389             o(0x89);
 390     }
 391     if (op64) {
 392         if (fr == VT_CONST ||
 393             fr == VT_LOCAL ||
 394             (v->r & VT_LVAL)) {
 395             gen_modrm64(op64, r, v->r, v->sym, fc);
 396         } else if (fr != r) {
 397             /* XXX: don't we really come here? */
 398             abort();
 399             o(0xc0 + fr + r * 8); /* mov r, fr */
 400         }
 401     } else {
 402         if (fr == VT_CONST ||
 403             fr == VT_LOCAL ||
 404             (v->r & VT_LVAL)) {
 405             gen_modrm(r, v->r, v->sym, fc);
 406         } else if (fr != r) {
 407             /* XXX: don't we really come here? */
 408             abort();
 409             o(0xc0 + fr + r * 8); /* mov r, fr */
 410         }
 411     }
 412 }
 413
 414 static void gadd_sp(int val)
 415 {
 416     if (val == (char)val) {
 417         o(0xc48348);
 418         g(val);
 419     } else {
 420         oad(0xc48148, val); /* add $xxx, %rsp */
 421     }
 422 }
 423
 424 /* 'is_jmp' is '1' if it is a jump */
 425 static void gcall_or_jmp(int is_jmp)
 426 {
 427     int r;
 428     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 429         /* constant case */
 430         if (vtop->r & VT_SYM) {
 431             /* relocation case */
 432             greloc(cur_text_section, vtop->sym,
 433                    ind + 1, R_X86_64_PC32);
 434         } else {
 435             /* put an empty PC32 relocation */
 436             put_elf_reloc(symtab_section, cur_text_section,
 437                           ind + 1, R_X86_64_PC32, 0);
 438         }
 439         oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
 440     } else {
 441         /* otherwise, indirect call */
 442         r = TREG_R11;
 443         load(r, vtop);
 444         o(0x41); /* REX */
 445         o(0xff); /* call/jmp *r */
 446         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
 447     }
 448 }
 449
 450 static uint8_t arg_regs[6] = {
 451     TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
 452 };
 453 /* Generate function call. The function address is pushed first, then
 454    all the parameters in call order. This functions pops all the
 455    parameters and the function address. */
 456 void gfunc_call(int nb_args)
 457 {
 458     int size, align, r, args_size, i, func_call;
 459     Sym *func_sym;
 460     SValue *orig_vtop;
 461     int nb_reg_args = 0;
 462     int nb_sse_args = 0;
 463     int sse_reg, gen_reg;
 464
 465     /* calculate the number of integer/float arguments */
 466     args_size = 0;
 467     for(i = 0; i < nb_args; i++) {
 468         if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
 469             args_size += type_size(&vtop->type, &align);
 470         } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
 471             args_size += 16;
 472         } else if (is_sse_float(vtop[-i].type.t)) {
 473             nb_sse_args++;
 474             if (nb_sse_args > 8) args_size += 8;
 475         } else {
 476             nb_reg_args++;
 477             if (nb_reg_args > 6) args_size += 8;
 478         }
 479     }
 480
 481     /* for struct arguments, we need to call memcpy and the function
 482        call breaks register passing arguments we are preparing.
 483        So, we process arguments which will be passed by stack first. */
 484     orig_vtop = vtop;
 485     gen_reg = nb_reg_args;
 486     sse_reg = nb_sse_args;
 487     /* adjust stack to align SSE boundary */
 488     if (args_size &= 8) {
 489         o(0x50); /* push $rax */
 490     }
 491     for(i = 0; i < nb_args; i++) {
 492         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 493             size = type_size(&vtop->type, &align);
 494             /* align to stack align size */
 495             size = (size + 3) & ~3;
 496             /* allocate the necessary size on stack */
 497             o(0x48);
 498             oad(0xec81, size); /* sub $xxx, %rsp */
 499             /* generate structure store */
 500             r = get_reg(RC_INT);
 501             o(0x48 + REX_BASE(r));
 502             o(0x89); /* mov %rsp, r */
 503             o(0xe0 + r);
 504             {
 505                 /* following code breaks vtop[1] */
 506                 SValue tmp = vtop[1];
 507                 vset(&vtop->type, r | VT_LVAL, 0);
 508                 vswap();
 509                 vstore();
 510                 vtop[1] = tmp;
 511             }
 512             args_size += size;
 513         } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 514             gv(RC_ST0);
 515             size = LDOUBLE_SIZE;
 516             oad(0xec8148, size); /* sub $xxx, %rsp */
 517             o(0x7cdb); /* fstpt 0(%rsp) */
 518             g(0x24);
 519             g(0x00);
 520             args_size += size;
 521         } else if (is_sse_float(vtop->type.t)) {
 522             int j = --sse_reg;
 523             if (j >= 8) {
 524                 gv(RC_FLOAT);
 525                 o(0x50); /* push $rax */
 526                 /* movq %xmm0, (%rsp) */
 527                 o(0x04d60f66);
 528                 o(0x24);
 529                 args_size += 8;
 530             }
 531         } else {
 532             int j = --gen_reg;
 533             /* simple type */
 534             /* XXX: implicit cast ? */
 535             if (j >= 6) {
 536                 r = gv(RC_INT);
 537                 o(0x50 + r); /* push r */
 538                 args_size += 8;
 539             }
 540         }
 541         vtop--;
 542     }
 543     vtop = orig_vtop;
 544
 545     /* then, we prepare register passing arguments.
 546        Note that we cannot set RDX and RCX in this loop because gv()
 547        may break these temporary registers. Let's use R10 and R11
 548        instead of them */
 549     gen_reg = nb_reg_args;
 550     sse_reg = nb_sse_args;
 551     for(i = 0; i < nb_args; i++) {
 552         if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
 553             (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
 554         } else if (is_sse_float(vtop->type.t)) {
 555             int j = --sse_reg;
 556             if (j < 8) {
 557                 gv(RC_FLOAT); /* only one float register */
 558                 /* movaps %xmm0, %xmmN */
 559                 o(0x280f);
 560                 o(0xc0 + (sse_reg << 3));
 561             }
 562         } else {
 563             int j = --gen_reg;
 564             /* simple type */
 565             /* XXX: implicit cast ? */
 566             if (j < 6) {
 567                 r = gv(RC_INT);
 568                 if (j < 2) {
 569                     o(0x8948); /* mov */
 570                     o(0xc0 + r * 8 + arg_regs[j]);
 571                 } else if (j < 4) {
 572                     o(0x8949); /* mov */
 573                     /* j=2: r10, j=3: r11 */
 574                     o(0xc0 + r * 8 + j);
 575                 } else {
 576                     o(0x8949); /* mov */
 577                     /* j=4: r8, j=5: r9 */
 578                     o(0xc0 + r * 8 + j - 4);
 579                 }
 580             }
 581         }
 582         vtop--;
 583     }
 584
 585     save_regs(0); /* save used temporary registers */
 586
 587     /* Copy R10 and R11 into RDX and RCX, respectively */
 588     if (nb_reg_args > 2) {
 589         o(0xd2894c); /* mov %r10, %rdx */
 590         if (nb_reg_args > 3) {
 591             o(0xd9894c); /* mov %r11, %rcx */
 592         }
 593     }
 594
 595     func_sym = vtop->type.ref;
 596     func_call = FUNC_CALL(func_sym->r);
 597     oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
 598     gcall_or_jmp(0);
 599     if (args_size)
 600         gadd_sp(args_size);
 601     vtop--;
 602 }
 603
 604 #ifdef TCC_TARGET_PE
 605 /* XXX: support PE? */
 606 #warning "PE isn't tested at all"
 607 #define FUNC_PROLOG_SIZE 12
 608 #else
 609 #define FUNC_PROLOG_SIZE 11
 610 #endif
 611
 612 static void push_arg_reg(int i) {
 613     loc -= 8;
 614     gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
 615 }
 616
 617 /* generate function prolog of type 't' */
 618 void gfunc_prolog(CType *func_type)
 619 {
 620     int i, addr, align, size, func_call;
 621     int param_index, param_addr, reg_param_index, sse_param_index;
 622     Sym *sym;
 623     CType *type;
 624
 625     func_ret_sub = 0;
 626
 627     sym = func_type->ref;
 628     func_call = FUNC_CALL(sym->r);
 629     addr = PTR_SIZE * 2;
 630     loc = 0;
 631     ind += FUNC_PROLOG_SIZE;
 632     func_sub_sp_offset = ind;
 633
 634     if (func_type->ref->c == FUNC_ELLIPSIS) {
 635         int seen_reg_num, seen_sse_num, seen_stack_size;
 636         seen_reg_num = seen_sse_num = 0;
 637         /* frame pointer and return address */
 638         seen_stack_size = PTR_SIZE * 2;
 639         /* count the number of seen parameters */
 640         sym = func_type->ref;
 641         while ((sym = sym->next) != NULL) {
 642             type = &sym->type;
 643             if (is_sse_float(type->t)) {
 644                 if (seen_sse_num < 8) {
 645                     seen_sse_num++;
 646                 } else {
 647                     seen_stack_size += 8;
 648                 }
 649             } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
 650                 size = type_size(type, &align);
 651                 size = (size + 3) & ~3;
 652                 seen_stack_size += size;
 653             } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
 654                 seen_stack_size += LDOUBLE_SIZE;
 655             } else {
 656                 if (seen_reg_num < 6) {
 657                     seen_reg_num++;
 658                 } else {
 659                     seen_stack_size += 8;
 660                 }
 661             }
 662         }
 663
 664         loc -= 16;
 665         /* movl $0x????????, -0x10(%rbp) */
 666         o(0xf045c7);
 667         gen_le32(seen_reg_num * 8);
 668         /* movl $0x????????, -0xc(%rbp) */
 669         o(0xf445c7);
 670         gen_le32(seen_sse_num * 16 + 48);
 671         /* movl $0x????????, -0x8(%rbp) */
 672         o(0xf845c7);
 673         gen_le32(seen_stack_size);
 674
 675         /* save all register passing arguments */
 676         for (i = 0; i < 8; i++) {
 677             loc -= 16;
 678             o(0xd60f66); /* movq */
 679             gen_modrm(7 - i, VT_LOCAL, NULL, loc);
 680             /* movq $0, loc+8(%rbp) */
 681             o(0x85c748);
 682             gen_le32(loc + 8);
 683             gen_le32(0);
 684         }
 685         for (i = 0; i < 6; i++) {
 686             push_arg_reg(5 - i);
 687         }
 688     }
 689
 690     sym = func_type->ref;
 691     param_index = 0;
 692     reg_param_index = 0;
 693     sse_param_index = 0;
 694
 695     /* if the function returns a structure, then add an
 696        implicit pointer parameter */
 697     func_vt = sym->type;
 698     if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
 699         push_arg_reg(reg_param_index);
 700         param_addr = loc;
 701
 702         func_vc = loc;
 703         param_index++;
 704         reg_param_index++;
 705     }
 706     /* define parameters */
 707     while ((sym = sym->next) != NULL) {
 708         type = &sym->type;
 709         size = type_size(type, &align);
 710         size = (size + 3) & ~3;
 711         if (is_sse_float(type->t)) {
 712             if (sse_param_index < 8) {
 713                 /* save arguments passed by register */
 714                 loc -= 8;
 715                 o(0xd60f66); /* movq */
 716                 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
 717                 param_addr = loc;
 718             } else {
 719                 param_addr = addr;
 720                 addr += size;
 721             }
 722             sse_param_index++;
 723         } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
 724                    (type->t & VT_BTYPE) == VT_LDOUBLE) {
 725             param_addr = addr;
 726             addr += size;
 727         } else {
 728             if (reg_param_index < 6) {
 729                 /* save arguments passed by register */
 730                 push_arg_reg(reg_param_index);
 731                 param_addr = loc;
 732             } else {
 733                 param_addr = addr;
 734                 addr += 8;
 735             }
 736             reg_param_index++;
 737         }
 738         sym_push(sym->v & ~SYM_FIELD, type,
 739                  VT_LOCAL | VT_LVAL, param_addr);
 740         param_index++;
 741     }
 742 }
 743
 744 /* generate function epilog */
 745 void gfunc_epilog(void)
 746 {
 747     int v, saved_ind;
 748
 749     o(0xc9); /* leave */
 750     if (func_ret_sub == 0) {
 751         o(0xc3); /* ret */
 752     } else {
 753         o(0xc2); /* ret n */
 754         g(func_ret_sub);
 755         g(func_ret_sub >> 8);
 756     }
 757     /* align local size to word & save local variables */
 758     v = (-loc + 15) & -16;
 759     saved_ind = ind;
 760     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
 761 #ifdef TCC_TARGET_PE
 762     if (v >= 4096) {
 763         Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
 764         oad(0xb8, v); /* mov stacksize, %eax */
 765         oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
 766         greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
 767     } else
 768 #endif
 769     {
 770         o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
 771         o(0xec8148);  /* sub rsp, stacksize */
 772         gen_le32(v);
 773 #if FUNC_PROLOG_SIZE == 12
 774         o(0x90);  /* adjust to FUNC_PROLOG_SIZE */
 775 #endif
 776     }
 777     ind = saved_ind;
 778 }
 779
 780 /* generate a jump to a label */
 781 int gjmp(int t)
 782 {
 783     return psym(0xe9, t);
 784 }
 785
 786 /* generate a jump to a fixed address */
 787 void gjmp_addr(int a)
 788 {
 789     int r;
 790     r = a - ind - 2;
 791     if (r == (char)r) {
 792         g(0xeb);
 793         g(r);
 794     } else {
 795         oad(0xe9, a - ind - 5);
 796     }
 797 }
 798
 799 /* generate a test. set 'inv' to invert test. Stack entry is popped */
 800 int gtst(int inv, int t)
 801 {
 802     int v, *p;
 803
 804     v = vtop->r & VT_VALMASK;
 805     if (v == VT_CMP) {
 806         /* fast case : can jump directly since flags are set */
 807         g(0x0f);
 808         t = psym((vtop->c.i - 16) ^ inv, t);
 809     } else if (v == VT_JMP || v == VT_JMPI) {
 810         /* && or || optimization */
 811         if ((v & 1) == inv) {
 812             /* insert vtop->c jump list in t */
 813             p = &vtop->c.i;
 814             while (*p != 0)
 815                 p = (int *)(cur_text_section->data + *p);
 816             *p = t;
 817             t = vtop->c.i;
 818         } else {
 819             t = gjmp(t);
 820             gsym(vtop->c.i);
 821         }
 822     } else {
 823         /* XXX: not tested */
 824         if (is_float(vtop->type.t) ||
 825             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
 826             vpushi(0);
 827             gen_op(TOK_NE);
 828         }
 829         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 830             /* constant jmp optimization */
 831             if ((vtop->c.i != 0) != inv)
 832                 t = gjmp(t);
 833         } else {
 834             v = gv(RC_INT);
 835             o(0x85);
 836             o(0xc0 + v * 9);
 837             g(0x0f);
 838             t = psym(0x85 ^ inv, t);
 839         }
 840     }
 841     vtop--;
 842     return t;
 843 }
 844
 845 /* generate an integer binary operation */
 846 void gen_opi(int op)
 847 {
 848     int r, fr, opc, c;
 849
 850     switch(op) {
 851     case '+':
 852     case TOK_ADDC1: /* add with carry generation */
 853         opc = 0;
 854     gen_op8:
 855         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
 856             !is64_type(vtop->type.t)) {
 857             /* constant case */
 858             vswap();
 859             r = gv(RC_INT);
 860             if (is64_type(vtop->type.t)) {
 861                 o(0x48 | REX_BASE(r));
 862             }
 863             vswap();
 864             c = vtop->c.i;
 865             if (c == (char)c) {
 866                 /* XXX: generate inc and dec for smaller code ? */
 867                 o(0x83);
 868                 o(0xc0 | (opc << 3) | REG_VALUE(r));
 869                 g(c);
 870             } else {
 871                 o(0x81);
 872                 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
 873             }
 874         } else {
 875             gv2(RC_INT, RC_INT);
 876             r = vtop[-1].r;
 877             fr = vtop[0].r;
 878             if (opc != 7 ||
 879                 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
 880                 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
 881                 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
 882             }
 883             o((opc << 3) | 0x01);
 884             o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
 885         }
 886         vtop--;
 887         if (op >= TOK_ULT && op <= TOK_GT) {
 888             vtop->r = VT_CMP;
 889             vtop->c.i = op;
 890         }
 891         break;
 892     case '-':
 893     case TOK_SUBC1: /* sub with carry generation */
 894         opc = 5;
 895         goto gen_op8;
 896     case TOK_ADDC2: /* add with carry use */
 897         opc = 2;
 898         goto gen_op8;
 899     case TOK_SUBC2: /* sub with carry use */
 900         opc = 3;
 901         goto gen_op8;
 902     case '&':
 903         opc = 4;
 904         goto gen_op8;
 905     case '^':
 906         opc = 6;
 907         goto gen_op8;
 908     case '|':
 909         opc = 1;
 910         goto gen_op8;
 911     case '*':
 912         gv2(RC_INT, RC_INT);
 913         r = vtop[-1].r;
 914         fr = vtop[0].r;
 915         if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
 916             is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
 917             o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
 918         }
 919         vtop--;
 920         o(0xaf0f); /* imul fr, r */
 921         o(0xc0 + fr + r * 8);
 922         break;
 923     case TOK_SHL:
 924         opc = 4;
 925         goto gen_shift;
 926     case TOK_SHR:
 927         opc = 5;
 928         goto gen_shift;
 929     case TOK_SAR:
 930         opc = 7;
 931     gen_shift:
 932         opc = 0xc0 | (opc << 3);
 933         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 934             /* constant case */
 935             vswap();
 936             r = gv(RC_INT);
 937             if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
 938                 o(0x48 | REX_BASE(r));
 939                 c = 0x3f;
 940             } else {
 941                 c = 0x1f;
 942             }
 943             vswap();
 944             c &= vtop->c.i;
 945             o(0xc1); /* shl/shr/sar $xxx, r */
 946             o(opc | r);
 947             g(c);
 948         } else {
 949             /* we generate the shift in ecx */
 950             gv2(RC_INT, RC_RCX);
 951             r = vtop[-1].r;
 952             if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
 953                 o(0x48 | REX_BASE(r));
 954             }
 955             o(0xd3); /* shl/shr/sar %cl, r */
 956             o(opc | r);
 957         }
 958         vtop--;
 959         break;
 960     case '/':
 961     case TOK_UDIV:
 962     case TOK_PDIV:
 963     case '%':
 964     case TOK_UMOD:
 965     case TOK_UMULL:
 966         /* first operand must be in eax */
 967         /* XXX: need better constraint for second operand */
 968         gv2(RC_RAX, RC_RCX);
 969         r = vtop[-1].r;
 970         fr = vtop[0].r;
 971         vtop--;
 972         save_reg(TREG_RDX);
 973         if (op == TOK_UMULL) {
 974             o(0xf7); /* mul fr */
 975             o(0xe0 + fr);
 976             vtop->r2 = TREG_RDX;
 977             r = TREG_RAX;
 978         } else {
 979             if (op == TOK_UDIV || op == TOK_UMOD) {
 980                 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
 981                 o(0xf0 + fr);
 982             } else {
 983                 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
 984                     o(0x9948); /* cqto */
 985                     o(0x48 + REX_BASE(fr));
 986                 } else {
 987                     o(0x99); /* cltd */
 988                 }
 989                 o(0xf7); /* idiv fr, %eax */
 990                 o(0xf8 + fr);
 991             }
 992             if (op == '%' || op == TOK_UMOD)
 993                 r = TREG_RDX;
 994             else
 995                 r = TREG_RAX;
 996         }
 997         vtop->r = r;
 998         break;
 999     default:
1000         opc = 7;
1001         goto gen_op8;
1002     }
1003 }
1004
1005 void gen_opl(int op)
1006 {
1007     gen_opi(op);
1008 }
1009
1010 /* generate a floating point operation 'v = t1 op t2' instruction. The
1011    two operands are guaranted to have the same floating point type */
1012 /* XXX: need to use ST1 too */
1013 void gen_opf(int op)
1014 {
1015     int a, ft, fc, swapped, r;
1016     int float_type =
1017         (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1018
1019     /* convert constants to memory references */
1020     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1021         vswap();
1022         gv(float_type);
1023         vswap();
1024     }
1025     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1026         gv(float_type);
1027
1028     /* must put at least one value in the floating point register */
1029     if ((vtop[-1].r & VT_LVAL) &&
1030         (vtop[0].r & VT_LVAL)) {
1031         vswap();
1032         gv(float_type);
1033         vswap();
1034     }
1035     swapped = 0;
1036     /* swap the stack if needed so that t1 is the register and t2 is
1037        the memory reference */
1038     if (vtop[-1].r & VT_LVAL) {
1039         vswap();
1040         swapped = 1;
1041     }
1042     if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1043         if (op >= TOK_ULT && op <= TOK_GT) {
1044             /* load on stack second operand */
1045             load(TREG_ST0, vtop);
1046             save_reg(TREG_RAX); /* eax is used by FP comparison code */
1047             if (op == TOK_GE || op == TOK_GT)
1048                 swapped = !swapped;
1049             else if (op == TOK_EQ || op == TOK_NE)
1050                 swapped = 0;
1051             if (swapped)
1052                 o(0xc9d9); /* fxch %st(1) */
1053             o(0xe9da); /* fucompp */
1054             o(0xe0df); /* fnstsw %ax */
1055             if (op == TOK_EQ) {
1056                 o(0x45e480); /* and $0x45, %ah */
1057                 o(0x40fC80); /* cmp $0x40, %ah */
1058             } else if (op == TOK_NE) {
1059                 o(0x45e480); /* and $0x45, %ah */
1060                 o(0x40f480); /* xor $0x40, %ah */
1061                 op = TOK_NE;
1062             } else if (op == TOK_GE || op == TOK_LE) {
1063                 o(0x05c4f6); /* test $0x05, %ah */
1064                 op = TOK_EQ;
1065             } else {
1066                 o(0x45c4f6); /* test $0x45, %ah */
1067                 op = TOK_EQ;
1068             }
1069             vtop--;
1070             vtop->r = VT_CMP;
1071             vtop->c.i = op;
1072         } else {
1073             /* no memory reference possible for long double operations */
1074             load(TREG_ST0, vtop);
1075             swapped = !swapped;
1076
1077             switch(op) {
1078             default:
1079             case '+':
1080                 a = 0;
1081                 break;
1082             case '-':
1083                 a = 4;
1084                 if (swapped)
1085                     a++;
1086                 break;
1087             case '*':
1088                 a = 1;
1089                 break;
1090             case '/':
1091                 a = 6;
1092                 if (swapped)
1093                     a++;
1094                 break;
1095             }
1096             ft = vtop->type.t;
1097             fc = vtop->c.ul;
1098             o(0xde); /* fxxxp %st, %st(1) */
1099             o(0xc1 + (a << 3));
1100             vtop--;
1101         }
1102     } else {
1103         if (op >= TOK_ULT && op <= TOK_GT) {
1104             /* if saved lvalue, then we must reload it */
1105             r = vtop->r;
1106             fc = vtop->c.ul;
1107             if ((r & VT_VALMASK) == VT_LLOCAL) {
1108                 SValue v1;
1109                 r = get_reg(RC_INT);
1110                 v1.type.t = VT_INT;
1111                 v1.r = VT_LOCAL | VT_LVAL;
1112                 v1.c.ul = fc;
1113                 load(r, &v1);
1114                 fc = 0;
1115             }
1116
1117             if (op == TOK_EQ || op == TOK_NE) {
1118                 swapped = 0;
1119             } else {
1120                 if (op == TOK_LE || op == TOK_LT)
1121                     swapped = !swapped;
1122                 if (op == TOK_LE || op == TOK_GE) {
1123                     op = 0x93; /* setae */
1124                 } else {
1125                     op = 0x97; /* seta */
1126                 }
1127             }
1128
1129             if (swapped) {
1130                 o(0x7e0ff3); /* movq */
1131                 gen_modrm(1, r, vtop->sym, fc);
1132
1133                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1134                     o(0x66);
1135                 }
1136                 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1137                 o(0xc8);
1138             } else {
1139                 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1140                     o(0x66);
1141                 }
1142                 o(0x2e0f); /* ucomisd */
1143                 gen_modrm(0, r, vtop->sym, fc);
1144             }
1145
1146             vtop--;
1147             vtop->r = VT_CMP;
1148             vtop->c.i = op;
1149         } else {
1150             /* no memory reference possible for long double operations */
1151             if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1152                 load(TREG_XMM0, vtop);
1153                 swapped = !swapped;
1154             }
1155             switch(op) {
1156             default:
1157             case '+':
1158                 a = 0;
1159                 break;
1160             case '-':
1161                 a = 4;
1162                 break;
1163             case '*':
1164                 a = 1;
1165                 break;
1166             case '/':
1167                 a = 6;
1168                 break;
1169             }
1170             ft = vtop->type.t;
1171             fc = vtop->c.ul;
1172             if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1173                 o(0xde); /* fxxxp %st, %st(1) */
1174                 o(0xc1 + (a << 3));
1175             } else {
1176                 /* if saved lvalue, then we must reload it */
1177                 r = vtop->r;
1178                 if ((r & VT_VALMASK) == VT_LLOCAL) {
1179                     SValue v1;
1180                     r = get_reg(RC_INT);
1181                     v1.type.t = VT_INT;
1182                     v1.r = VT_LOCAL | VT_LVAL;
1183                     v1.c.ul = fc;
1184                     load(r, &v1);
1185                     fc = 0;
1186                 }
1187                 if (swapped) {
1188                     /* movq %xmm0,%xmm1 */
1189                     o(0x7e0ff3);
1190                     o(0xc8);
1191                     load(TREG_XMM0, vtop);
1192                     /* subsd  %xmm1,%xmm0 (f2 0f 5c c1) */
1193                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1194                         o(0xf2);
1195                     } else {
1196                         o(0xf3);
1197                     }
1198                     o(0x0f);
1199                     o(0x58 + a);
1200                     o(0xc1);
1201                 } else {
1202                     if ((ft & VT_BTYPE) == VT_DOUBLE) {
1203                         o(0xf2);
1204                     } else {
1205                         o(0xf3);
1206                     }
1207                     o(0x0f);
1208                     o(0x58 + a);
1209                     gen_modrm(0, r, vtop->sym, fc);
1210                 }
1211             }
1212             vtop--;
1213         }
1214     }
1215 }
1216
1217 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1218    and 'long long' cases. */
1219 void gen_cvt_itof(int t)
1220 {
1221     if ((t & VT_BTYPE) == VT_LDOUBLE) {
1222         save_reg(TREG_ST0);
1223         gv(RC_INT);
1224         if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1225             /* signed long long to float/double/long double (unsigned case
1226                is handled generically) */
1227             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1228             o(0x242cdf); /* fildll (%rsp) */
1229             o(0x08c48348); /* add $8, %rsp */
1230         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1231                    (VT_INT | VT_UNSIGNED)) {
1232             /* unsigned int to float/double/long double */
1233             o(0x6a); /* push $0 */
1234             g(0x00);
1235             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1236             o(0x242cdf); /* fildll (%rsp) */
1237             o(0x10c48348); /* add $16, %rsp */
1238         } else {
1239             /* int to float/double/long double */
1240             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1241             o(0x2404db); /* fildl (%rsp) */
1242             o(0x08c48348); /* add $8, %rsp */
1243         }
1244         vtop->r = TREG_ST0;
1245     } else {
1246         save_reg(TREG_XMM0);
1247         gv(RC_INT);
1248         o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1249         if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1250             (VT_INT | VT_UNSIGNED) ||
1251             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1252             o(0x48); /* REX */
1253         }
1254         o(0x2a0f);
1255         o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1256         vtop->r = TREG_XMM0;
1257     }
1258 }
1259
1260 /* convert from one floating point type to another */
1261 void gen_cvt_ftof(int t)
1262 {
1263     int ft, bt, tbt;
1264
1265     ft = vtop->type.t;
1266     bt = ft & VT_BTYPE;
1267     tbt = t & VT_BTYPE;
1268
1269     if (bt == VT_FLOAT) {
1270         gv(RC_FLOAT);
1271         if (tbt == VT_DOUBLE) {
1272             o(0xc0140f); /* unpcklps */
1273             o(0xc05a0f); /* cvtps2pd */
1274         } else if (tbt == VT_LDOUBLE) {
1275             /* movss %xmm0,-0x10(%rsp) */
1276             o(0x44110ff3);
1277             o(0xf024);
1278             o(0xf02444d9); /* flds -0x10(%rsp) */
1279             vtop->r = TREG_ST0;
1280         }
1281     } else if (bt == VT_DOUBLE) {
1282         gv(RC_FLOAT);
1283         if (tbt == VT_FLOAT) {
1284             o(0xc0140f66); /* unpcklpd */
1285             o(0xc05a0f66); /* cvtpd2ps */
1286         } else if (tbt == VT_LDOUBLE) {
1287             /* movsd %xmm0,-0x10(%rsp) */
1288             o(0x44110ff2);
1289             o(0xf024);
1290             o(0xf02444dd); /* fldl -0x10(%rsp) */
1291             vtop->r = TREG_ST0;
1292         }
1293     } else {
1294         gv(RC_ST0);
1295         if (tbt == VT_DOUBLE) {
1296             o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1297             /* movsd -0x10(%rsp),%xmm0 */
1298             o(0x44100ff2);
1299             o(0xf024);
1300             vtop->r = TREG_XMM0;
1301         } else if (tbt == VT_FLOAT) {
1302             o(0xf0245cd9); /* fstps -0x10(%rsp) */
1303             /* movss -0x10(%rsp),%xmm0 */
1304             o(0x44100ff3);
1305             o(0xf024);
1306             vtop->r = TREG_XMM0;
1307         }
1308     }
1309 }
1310
1311 /* convert fp to int 't' type */
1312 void gen_cvt_ftoi(int t)
1313 {
1314     int ft, bt, size, r;
1315     ft = vtop->type.t;
1316     bt = ft & VT_BTYPE;
1317     if (bt == VT_LDOUBLE) {
1318         gen_cvt_ftof(VT_DOUBLE);
1319         bt = VT_DOUBLE;
1320     }
1321
1322     gv(RC_FLOAT);
1323     if (t != VT_INT)
1324         size = 8;
1325     else
1326         size = 4;
1327
1328     r = get_reg(RC_INT);
1329     if (bt == VT_FLOAT) {
1330         o(0xf3);
1331     } else if (bt == VT_DOUBLE) {
1332         o(0xf2);
1333     } else {
1334         assert(0);
1335     }
1336     if (size == 8) {
1337         o(0x48 + REX_BASE(r));
1338     }
1339     o(0x2c0f); /* cvttss2si or cvttsd2si */
1340     o(0xc0 + (REG_VALUE(r) << 3));
1341     vtop->r = r;
1342 }
1343
1344 /* computed goto support */
1345 void ggoto(void)
1346 {
1347     gcall_or_jmp(1);
1348     vtop--;
1349 }
1350
1351 /* end of x86-64 code generator */
1352 /*************************************************************/