x86_64-gen.c

   1 /*
   2  *  x86-64 code generator for TCC
   3  *
   4  *  Copyright (c) 2008 Shinichiro Hamaji
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #ifdef TARGET_DEFS_ONLY
  24
  25 /* number of available registers */
  26 #define NB_REGS         24
  27 #define NB_ASM_REGS     8
  28
  29 /* a register can belong to several classes. The classes must be
  30    sorted from more general to more precise (see gv2() code which does
  31    assumptions on it). */
  32 #define RC_INT     0x0001 /* generic integer register */
  33 #define RC_FLOAT   0x0002 /* generic float register */
  34 #define RC_RAX     0x0004
  35 #define RC_RCX     0x0008
  36 #define RC_RDX     0x0010
  37 #define RC_ST0     0x0080 /* only for long double */
  38 #define RC_R8      0x0100
  39 #define RC_R9      0x0200
  40 #define RC_R10     0x0400
  41 #define RC_R11     0x0800
  42 #define RC_XMM0    0x1000
  43 #define RC_XMM1    0x2000
  44 #define RC_XMM2    0x4000
  45 #define RC_XMM3    0x8000
  46 #define RC_XMM4    0x10000
  47 #define RC_XMM5    0x20000
  48 #define RC_XMM6    0x40000
  49 #define RC_XMM7    0x80000
  50 #define RC_IRET    RC_RAX /* function return: integer register */
  51 #define RC_LRET    RC_RDX /* function return: second integer register */
  52 #define RC_FRET    RC_XMM0 /* function return: float register */
  53 #define RC_QRET    RC_XMM1 /* function return: second float register */
  54
  55 /* pretty names for the registers */
  56 enum {
  57     TREG_RAX = 0,
  58     TREG_RCX = 1,
  59     TREG_RDX = 2,
  60     TREG_RSI = 6,
  61     TREG_RDI = 7,
  62
  63     TREG_R8  = 8,
  64     TREG_R9  = 9,
  65     TREG_R10 = 10,
  66     TREG_R11 = 11,
  67
  68     TREG_XMM0 = 16,
  69     TREG_XMM1 = 17,
  70     TREG_XMM2 = 18,
  71     TREG_XMM3 = 19,
  72     TREG_XMM4 = 20,
  73     TREG_XMM5 = 21,
  74     TREG_XMM6 = 22,
  75     TREG_XMM7 = 23,
  76
  77     TREG_ST0 = 4, // SP slot won't be used
  78
  79     TREG_MEM = 0x20,
  80 };
  81
  82 #define REX_BASE(reg) (((reg) >> 3) & 1)
  83 #define REG_VALUE(reg) ((reg) & 7)
  84
  85 /* return registers for function */
  86 #define REG_IRET TREG_RAX /* single word int return register */
  87 #define REG_LRET TREG_RDX /* second word return register (for long long) */
  88 #define REG_FRET TREG_XMM0 /* float return register */
  89 #define REG_QRET TREG_XMM1 /* second float return register */
  90
  91 /* defined if function parameters must be evaluated in reverse order */
  92 #define INVERT_FUNC_PARAMS
  93
  94 /* pointer size, in bytes */
  95 #define PTR_SIZE 8
  96
  97 /* long double size and alignment, in bytes */
  98 #define LDOUBLE_SIZE  16
  99 #define LDOUBLE_ALIGN 8
 100 /* maximum alignment (for aligned attribute support) */
 101 #define MAX_ALIGN     8
 102
 103 /******************************************************/
 104 /* ELF defines */
 105
 106 #define EM_TCC_TARGET EM_X86_64
 107
 108 /* relocation type for 32 bit data relocation */
 109 #define R_DATA_32   R_X86_64_32
 110 #define R_DATA_PTR  R_X86_64_64
 111 #define R_JMP_SLOT  R_X86_64_JUMP_SLOT
 112 #define R_COPY      R_X86_64_COPY
 113
 114 #define ELF_START_ADDR 0x08048000
 115 #define ELF_PAGE_SIZE  0x1000
 116
 117 /******************************************************/
 118 #else /* ! TARGET_DEFS_ONLY */
 119 /******************************************************/
 120 #include "tcc.h"
 121 #include <assert.h>
 122
 123 ST_DATA const int reg_classes[NB_REGS] = {
 124     /* eax */ RC_INT | RC_RAX,
 125     /* ecx */ RC_INT | RC_RCX,
 126     /* edx */ RC_INT | RC_RDX,
 127     0,
 128     /* st0 */ RC_ST0,
 129     0,
 130     0,
 131     0,
 132     RC_R8,
 133     RC_R9,
 134     RC_R10,
 135     RC_R11,
 136     0,
 137     0,
 138     0,
 139     0,
 140     /* xmm0 */ RC_FLOAT | RC_XMM0,
 141     /* xmm1 */ RC_FLOAT | RC_XMM1,
 142     /* xmm2 */ RC_FLOAT | RC_XMM2,
 143     /* xmm3 */ RC_FLOAT | RC_XMM3,
 144     /* xmm4 */ RC_FLOAT | RC_XMM4,
 145     /* xmm5 */ RC_FLOAT | RC_XMM5,
 146     /* xmm6 */ RC_FLOAT | RC_XMM6,
 147     /* xmm7 */ RC_FLOAT | RC_XMM7,
 148 };
 149
 150 static unsigned long func_sub_sp_offset;
 151 static int func_ret_sub;
 152
 153 /* XXX: make it faster ? */
 154 void g(int c)
 155 {
 156     int ind1;
 157     ind1 = ind + 1;
 158     if (ind1 > cur_text_section->data_allocated)
 159         section_realloc(cur_text_section, ind1);
 160     cur_text_section->data[ind] = c;
 161     ind = ind1;
 162 }
 163
 164 void o(unsigned int c)
 165 {
 166     while (c) {
 167         g(c);
 168         c = c >> 8;
 169     }
 170 }
 171
 172 void gen_le16(int v)
 173 {
 174     g(v);
 175     g(v >> 8);
 176 }
 177
 178 void gen_le32(int c)
 179 {
 180     g(c);
 181     g(c >> 8);
 182     g(c >> 16);
 183     g(c >> 24);
 184 }
 185
 186 void gen_le64(int64_t c)
 187 {
 188     g(c);
 189     g(c >> 8);
 190     g(c >> 16);
 191     g(c >> 24);
 192     g(c >> 32);
 193     g(c >> 40);
 194     g(c >> 48);
 195     g(c >> 56);
 196 }
 197
 198 void orex(int ll, int r, int r2, int b)
 199 {
 200     if ((r & VT_VALMASK) >= VT_CONST)
 201         r = 0;
 202     if ((r2 & VT_VALMASK) >= VT_CONST)
 203         r2 = 0;
 204     if (ll || REX_BASE(r) || REX_BASE(r2))
 205         o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
 206     o(b);
 207 }
 208
 209 /* output a symbol and patch all calls to it */
 210 void gsym_addr(int t, int a)
 211 {
 212     int n, *ptr;
 213     while (t) {
 214         ptr = (int *)(cur_text_section->data + t);
 215         n = *ptr; /* next value */
 216         *ptr = a - t - 4;
 217         t = n;
 218     }
 219 }
 220
 221 void gsym(int t)
 222 {
 223     gsym_addr(t, ind);
 224 }
 225
 226 /* psym is used to put an instruction with a data field which is a
 227    reference to a symbol. It is in fact the same as oad ! */
 228 #define psym oad
 229
 230 static int is64_type(int t)
 231 {
 232     return ((t & VT_BTYPE) == VT_PTR ||
 233             (t & VT_BTYPE) == VT_FUNC ||
 234             (t & VT_BTYPE) == VT_LLONG);
 235 }
 236
 237 static int is_sse_float(int t) {
 238     int bt;
 239     bt = t & VT_BTYPE;
 240     return bt == VT_DOUBLE || bt == VT_FLOAT;
 241 }
 242
 243
 244 /* instruction + 4 bytes data. Return the address of the data */
 245 ST_FUNC int oad(int c, int s)
 246 {
 247     int ind1;
 248
 249     o(c);
 250     ind1 = ind + 4;
 251     if (ind1 > cur_text_section->data_allocated)
 252         section_realloc(cur_text_section, ind1);
 253     *(int *)(cur_text_section->data + ind) = s;
 254     s = ind;
 255     ind = ind1;
 256     return s;
 257 }
 258
 259 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
 260 {
 261     if (r & VT_SYM)
 262         greloc(cur_text_section, sym, ind, R_X86_64_32);
 263     gen_le32(c);
 264 }
 265
 266 /* output constant with relocation if 'r & VT_SYM' is true */
 267 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
 268 {
 269     if (r & VT_SYM)
 270         greloc(cur_text_section, sym, ind, R_X86_64_64);
 271     gen_le64(c);
 272 }
 273
 274 /* output constant with relocation if 'r & VT_SYM' is true */
 275 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
 276 {
 277     if (r & VT_SYM)
 278         greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 279     gen_le32(c-4);
 280 }
 281
 282 /* output got address with relocation */
 283 static void gen_gotpcrel(int r, Sym *sym, int c)
 284 {
 285 #ifndef TCC_TARGET_PE
 286     Section *sr;
 287     ElfW(Rela) *rel;
 288     greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
 289     sr = cur_text_section->reloc;
 290     rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
 291     rel->r_addend = -4;
 292 #else
 293     printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
 294         cur_text_section->data[ind-3],
 295         cur_text_section->data[ind-2],
 296         cur_text_section->data[ind-1]
 297         );
 298     greloc(cur_text_section, sym, ind, R_X86_64_PC32);
 299 #endif
 300     gen_le32(0);
 301     if (c) {
 302         /* we use add c, %xxx for displacement */
 303         orex(1, r, 0, 0x81);
 304         o(0xc0 + REG_VALUE(r));
 305         gen_le32(c);
 306     }
 307 }
 308
 309 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
 310 {
 311     op_reg = REG_VALUE(op_reg) << 3;
 312     if ((r & VT_VALMASK) == VT_CONST) {
 313         /* constant memory reference */
 314         o(0x05 | op_reg);
 315         if (is_got) {
 316             gen_gotpcrel(r, sym, c);
 317         } else {
 318             gen_addrpc32(r, sym, c);
 319         }
 320     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 321         /* currently, we use only ebp as base */
 322         if (c == (char)c) {
 323             /* short reference */
 324             o(0x45 | op_reg);
 325             g(c);
 326         } else {
 327             oad(0x85 | op_reg, c);
 328         }
 329     } else if ((r & VT_VALMASK) >= TREG_MEM) {
 330         if (c) {
 331             g(0x80 | op_reg | REG_VALUE(r));
 332             gen_le32(c);
 333         } else {
 334             g(0x00 | op_reg | REG_VALUE(r));
 335         }
 336     } else {
 337         g(0x00 | op_reg | REG_VALUE(r));
 338     }
 339 }
 340
 341 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 342    opcode bits */
 343 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
 344 {
 345     gen_modrm_impl(op_reg, r, sym, c, 0);
 346 }
 347
 348 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 349    opcode bits */
 350 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
 351 {
 352     int is_got;
 353     is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
 354     orex(1, r, op_reg, opcode);
 355     gen_modrm_impl(op_reg, r, sym, c, is_got);
 356 }
 357
 358
 359 /* load 'r' from value 'sv' */
 360 void load(int r, SValue *sv)
 361 {
 362     int v, t, ft, fc, fr;
 363     SValue v1;
 364
 365 #ifdef TCC_TARGET_PE
 366     SValue v2;
 367     sv = pe_getimport(sv, &v2);
 368 #endif
 369
 370     fr = sv->r;
 371     ft = sv->type.t;
 372     fc = sv->c.ul;
 373
 374 #ifndef TCC_TARGET_PE
 375     /* we use indirect access via got */
 376     if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
 377         (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
 378         /* use the result register as a temporal register */
 379         int tr = r | TREG_MEM;
 380         if (is_float(ft)) {
 381             /* we cannot use float registers as a temporal register */
 382             tr = get_reg(RC_INT) | TREG_MEM;
 383         }
 384         gen_modrm64(0x8b, tr, fr, sv->sym, 0);
 385
 386         /* load from the temporal register */
 387         fr = tr | VT_LVAL;
 388     }
 389 #endif
 390
 391     v = fr & VT_VALMASK;
 392     if (fr & VT_LVAL) {
 393         int b, ll;
 394         if (v == VT_LLOCAL) {
 395             v1.type.t = VT_PTR;
 396             v1.r = VT_LOCAL | VT_LVAL;
 397             v1.c.ul = fc;
 398             fr = r;
 399             if (!(reg_classes[fr] & RC_INT))
 400                 fr = get_reg(RC_INT);
 401             load(fr, &v1);
 402         }
 403         ll = 0;
 404         if ((ft & VT_BTYPE) == VT_FLOAT) {
 405             b = 0x6e0f66;
 406             r = REG_VALUE(r); /* movd */
 407         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
 408             b = 0x7e0ff3; /* movq */
 409             r = REG_VALUE(r);
 410         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 411             b = 0xdb, r = 5; /* fldt */
 412         } else if ((ft & VT_TYPE) == VT_BYTE) {
 413             b = 0xbe0f;   /* movsbl */
 414         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
 415             b = 0xb60f;   /* movzbl */
 416         } else if ((ft & VT_TYPE) == VT_SHORT) {
 417             b = 0xbf0f;   /* movswl */
 418         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
 419             b = 0xb70f;   /* movzwl */
 420         } else {
 421             assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
 422                    || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
 423                    || ((ft & VT_BTYPE) == VT_FUNC));
 424             ll = is64_type(ft);
 425             b = 0x8b;
 426         }
 427         if (ll) {
 428             gen_modrm64(b, r, fr, sv->sym, fc);
 429         } else {
 430             orex(ll, fr, r, b);
 431             gen_modrm(r, fr, sv->sym, fc);
 432         }
 433     } else {
 434         if (v == VT_CONST) {
 435             if (fr & VT_SYM) {
 436 #ifdef TCC_TARGET_PE
 437                 orex(1,0,r,0x8d);
 438                 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 439                 gen_addrpc32(fr, sv->sym, fc);
 440 #else
 441                 if (sv->sym->type.t & VT_STATIC) {
 442                     orex(1,0,r,0x8d);
 443                     o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
 444                     gen_addrpc32(fr, sv->sym, fc);
 445                 } else {
 446                     orex(1,0,r,0x8b);
 447                     o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
 448                     gen_gotpcrel(r, sv->sym, fc);
 449                 }
 450 #endif
 451             } else if (is64_type(ft)) {
 452                 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
 453                 gen_le64(sv->c.ull);
 454             } else {
 455                 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
 456                 gen_le32(fc);
 457             }
 458         } else if (v == VT_LOCAL) {
 459             orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
 460             gen_modrm(r, VT_LOCAL, sv->sym, fc);
 461         } else if (v == VT_CMP) {
 462             orex(0,r,0,0);
 463             if ((fc & ~0x100) != TOK_NE)
 464               oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
 465             else
 466               oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
 467             if (fc & 0x100)
 468               {
 469                 /* This was a float compare.  If the parity bit is
 470                    set the result was unordered, meaning false for everything
 471                    except TOK_NE, and true for TOK_NE.  */
 472                 fc &= ~0x100;
 473                 o(0x037a + (REX_BASE(r) << 8));
 474               }
 475             orex(0,r,0, 0x0f); /* setxx %br */
 476             o(fc);
 477             o(0xc0 + REG_VALUE(r));
 478         } else if (v == VT_JMP || v == VT_JMPI) {
 479             t = v & 1;
 480             orex(0,r,0,0);
 481             oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
 482             o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
 483             gsym(fc);
 484             orex(0,r,0,0);
 485             oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
 486         } else if (v != r) {
 487             if ((r == TREG_XMM0) || (r == TREG_XMM1)) {
 488                 if (v == TREG_ST0) {
 489                     /* gen_cvt_ftof(VT_DOUBLE); */
 490                     o(0xf0245cdd); /* fstpl -0x10(%rsp) */
 491                     /* movsd -0x10(%rsp),%xmmN */
 492                     o(0x100ff2);
 493                     o(0x44 + REG_VALUE(r)*8); /* %xmmN */
 494                     o(0xf024);
 495                 } else {
 496                     assert((v == TREG_XMM0) || (v == TREG_XMM1));
 497                     if ((ft & VT_BTYPE) == VT_FLOAT) {
 498                         o(0x100ff3);
 499                     } else {
 500                         assert((ft & VT_BTYPE) == VT_DOUBLE);
 501                         o(0x100ff2);
 502                     }
 503                     o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
 504                 }
 505             } else if (r == TREG_ST0) {
 506                 assert((v == TREG_XMM0) || (v == TREG_XMM1));
 507                 /* gen_cvt_ftof(VT_LDOUBLE); */
 508                 /* movsd %xmmN,-0x10(%rsp) */
 509                 o(0x110ff2);
 510                 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
 511                 o(0xf024);
 512                 o(0xf02444dd); /* fldl -0x10(%rsp) */
 513             } else {
 514                 orex(1,r,v, 0x89);
 515                 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
 516             }
 517         }
 518     }
 519 }
 520
 521 /* store register 'r' in lvalue 'v' */
 522 void store(int r, SValue *v)
 523 {
 524     int fr, bt, ft, fc;
 525     int op64 = 0;
 526     /* store the REX prefix in this variable when PIC is enabled */
 527     int pic = 0;
 528
 529 #ifdef TCC_TARGET_PE
 530     SValue v2;
 531     v = pe_getimport(v, &v2);
 532 #endif
 533
 534     ft = v->type.t;
 535     fc = v->c.ul;
 536     fr = v->r & VT_VALMASK;
 537     bt = ft & VT_BTYPE;
 538
 539 #ifndef TCC_TARGET_PE
 540     /* we need to access the variable via got */
 541     if (fr == VT_CONST && (v->r & VT_SYM)) {
 542         /* mov xx(%rip), %r11 */
 543         o(0x1d8b4c);
 544         gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
 545         pic = is64_type(bt) ? 0x49 : 0x41;
 546     }
 547 #endif
 548
 549     /* XXX: incorrect if float reg to reg */
 550     if (bt == VT_FLOAT) {
 551         o(0x66);
 552         o(pic);
 553         o(0x7e0f); /* movd */
 554         r = REG_VALUE(r);
 555     } else if (bt == VT_DOUBLE) {
 556         o(0x66);
 557         o(pic);
 558         o(0xd60f); /* movq */
 559         r = REG_VALUE(r);
 560     } else if (bt == VT_LDOUBLE) {
 561         o(0xc0d9); /* fld %st(0) */
 562         o(pic);
 563         o(0xdb); /* fstpt */
 564         r = 7;
 565     } else {
 566         if (bt == VT_SHORT)
 567             o(0x66);
 568         o(pic);
 569         if (bt == VT_BYTE || bt == VT_BOOL)
 570             orex(0, 0, r, 0x88);
 571         else if (is64_type(bt))
 572             op64 = 0x89;
 573         else
 574             orex(0, 0, r, 0x89);
 575     }
 576     if (pic) {
 577         /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
 578         if (op64)
 579             o(op64);
 580         o(3 + (r << 3));
 581     } else if (op64) {
 582         if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
 583             gen_modrm64(op64, r, v->r, v->sym, fc);
 584         } else if (fr != r) {
 585             /* XXX: don't we really come here? */
 586             abort();
 587             o(0xc0 + fr + r * 8); /* mov r, fr */
 588         }
 589     } else {
 590         if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
 591             gen_modrm(r, v->r, v->sym, fc);
 592         } else if (fr != r) {
 593             /* XXX: don't we really come here? */
 594             abort();
 595             o(0xc0 + fr + r * 8); /* mov r, fr */
 596         }
 597     }
 598 }
 599
 600 /* 'is_jmp' is '1' if it is a jump */
 601 static void gcall_or_jmp(int is_jmp)
 602 {
 603     int r;
 604     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 605         /* constant case */
 606         if (vtop->r & VT_SYM) {
 607             /* relocation case */
 608             greloc(cur_text_section, vtop->sym,
 609                    ind + 1, R_X86_64_PC32);
 610         } else {
 611             /* put an empty PC32 relocation */
 612             put_elf_reloc(symtab_section, cur_text_section,
 613                           ind + 1, R_X86_64_PC32, 0);
 614         }
 615         oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
 616     } else {
 617         /* otherwise, indirect call */
 618         r = TREG_R11;
 619         load(r, vtop);
 620         o(0x41); /* REX */
 621         o(0xff); /* call/jmp *r */
 622         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
 623     }
 624 }
 625
 626 #ifdef TCC_TARGET_PE
 627
 628 #define REGN 4
 629 static const uint8_t arg_regs[] = {
 630     TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
 631 };
 632
 633 static int func_scratch;
 634
 635 /* Generate function call. The function address is pushed first, then
 636    all the parameters in call order. This functions pops all the
 637    parameters and the function address. */
 638
 639 void gen_offs_sp(int b, int r, int d)
 640 {
 641     orex(1,0,r & 0x100 ? 0 : r, b);
 642     if (d == (char)d) {
 643         o(0x2444 | (REG_VALUE(r) << 3));
 644         g(d);
 645     } else {
 646         o(0x2484 | (REG_VALUE(r) << 3));
 647         gen_le32(d);
 648     }
 649 }
 650
 651 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
 652 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
 653     *ret_align = 1; // Never have to re-align return values for x86-64
 654     return 1;
 655 }
 656
 657 void gfunc_call(int nb_args)
 658 {
 659     int size, align, r, args_size, i, d, j, bt, struct_size;
 660     int nb_reg_args, gen_reg;
 661
 662     nb_reg_args = nb_args;
 663     args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
 664
 665     /* for struct arguments, we need to call memcpy and the function
 666        call breaks register passing arguments we are preparing.
 667        So, we process arguments which will be passed by stack first. */
 668     struct_size = args_size;
 669     for(i = 0; i < nb_args; i++) {
 670         SValue *sv = &vtop[-i];
 671         bt = (sv->type.t & VT_BTYPE);
 672         if (bt == VT_STRUCT) {
 673             size = type_size(&sv->type, &align);
 674             /* align to stack align size */
 675             size = (size + 15) & ~15;
 676             /* generate structure store */
 677             r = get_reg(RC_INT);
 678             gen_offs_sp(0x8d, r, struct_size);
 679             struct_size += size;
 680
 681             /* generate memcpy call */
 682             vset(&sv->type, r | VT_LVAL, 0);
 683             vpushv(sv);
 684             vstore();
 685             --vtop;
 686
 687         } else if (bt == VT_LDOUBLE) {
 688
 689             gv(RC_ST0);
 690             gen_offs_sp(0xdb, 0x107, struct_size);
 691             struct_size += 16;
 692
 693         }
 694     }
 695
 696     if (func_scratch < struct_size)
 697         func_scratch = struct_size;
 698 #if 1
 699     for (i = 0; i < REGN; ++i)
 700         save_reg(arg_regs[i]);
 701     save_reg(TREG_RAX);
 702 #endif
 703     gen_reg = nb_reg_args;
 704     struct_size = args_size;
 705
 706     for(i = 0; i < nb_args; i++) {
 707         bt = (vtop->type.t & VT_BTYPE);
 708
 709         if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
 710             if (bt == VT_LDOUBLE)
 711                 size = 16;
 712             else
 713                 size = type_size(&vtop->type, &align);
 714             /* align to stack align size */
 715             size = (size + 15) & ~15;
 716             j = --gen_reg;
 717             if (j >= REGN) {
 718                 d = TREG_RAX;
 719                 gen_offs_sp(0x8d, d, struct_size);
 720                 gen_offs_sp(0x89, d, j*8);
 721             } else {
 722                 d = arg_regs[j];
 723                 gen_offs_sp(0x8d, d, struct_size);
 724             }
 725             struct_size += size;
 726
 727         } else if (is_sse_float(vtop->type.t)) {
 728             gv(RC_XMM0); /* only one float register */
 729             j = --gen_reg;
 730             if (j >= REGN) {
 731                 /* movq %xmm0, j*8(%rsp) */
 732                 gen_offs_sp(0xd60f66, 0x100, j*8);
 733             } else {
 734                 /* movaps %xmm0, %xmmN */
 735                 o(0x280f);
 736                 o(0xc0 + (j << 3));
 737                 d = arg_regs[j];
 738                 /* mov %xmm0, %rxx */
 739                 o(0x66);
 740                 orex(1,d,0, 0x7e0f);
 741                 o(0xc0 + REG_VALUE(d));
 742             }
 743         } else {
 744             j = --gen_reg;
 745             if (j >= REGN) {
 746                 r = gv(RC_INT);
 747                 gen_offs_sp(0x89, r, j*8);
 748             } else {
 749                 d = arg_regs[j];
 750                 if (d < NB_REGS) {
 751                     gv(reg_classes[d] & ~RC_INT);
 752                 } else {
 753                     r = gv(RC_INT);
 754                     if (d != r) {
 755                         orex(1,d,r, 0x89);
 756                         o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
 757                     }
 758                 }
 759
 760             }
 761         }
 762         vtop--;
 763     }
 764     save_regs(0);
 765     gcall_or_jmp(0);
 766     vtop--;
 767 }
 768
 769
 770 #define FUNC_PROLOG_SIZE 11
 771
 772 /* generate function prolog of type 't' */
 773 void gfunc_prolog(CType *func_type)
 774 {
 775     int addr, reg_param_index, bt;
 776     Sym *sym;
 777     CType *type;
 778
 779     func_ret_sub = 0;
 780     func_scratch = 0;
 781     loc = 0;
 782
 783     addr = PTR_SIZE * 2;
 784     ind += FUNC_PROLOG_SIZE;
 785     func_sub_sp_offset = ind;
 786     reg_param_index = 0;
 787
 788     sym = func_type->ref;
 789
 790     /* if the function returns a structure, then add an
 791        implicit pointer parameter */
 792     func_vt = sym->type;
 793     if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
 794         gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
 795         reg_param_index++;
 796         addr += PTR_SIZE;
 797     }
 798
 799     /* define parameters */
 800     while ((sym = sym->next) != NULL) {
 801         type = &sym->type;
 802         bt = type->t & VT_BTYPE;
 803         if (reg_param_index < REGN) {
 804             /* save arguments passed by register */
 805             gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
 806         }
 807         if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
 808             sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
 809         } else {
 810             sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
 811         }
 812         reg_param_index++;
 813         addr += PTR_SIZE;
 814     }
 815
 816     while (reg_param_index < REGN) {
 817         if (func_type->ref->c == FUNC_ELLIPSIS)
 818             gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
 819         reg_param_index++;
 820         addr += PTR_SIZE;
 821     }
 822 }
 823
 824 /* generate function epilog */
 825 void gfunc_epilog(void)
 826 {
 827     int v, saved_ind;
 828
 829     o(0xc9); /* leave */
 830     if (func_ret_sub == 0) {
 831         o(0xc3); /* ret */
 832     } else {
 833         o(0xc2); /* ret n */
 834         g(func_ret_sub);
 835         g(func_ret_sub >> 8);
 836     }
 837
 838     saved_ind = ind;
 839     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
 840     /* align local size to word & save local variables */
 841     v = (func_scratch + -loc + 15) & -16;
 842
 843     if (v >= 4096) {
 844         Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
 845         oad(0xb8, v); /* mov stacksize, %eax */
 846         oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
 847         greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
 848         o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
 849     } else {
 850         o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
 851         o(0xec8148);  /* sub rsp, stacksize */
 852         gen_le32(v);
 853     }
 854
 855     cur_text_section->data_offset = saved_ind;
 856     pe_add_unwind_data(ind, saved_ind, v);
 857     ind = cur_text_section->data_offset;
 858 }
 859
 860 #else
 861
 862 static void gadd_sp(int val)
 863 {
 864     if (val == (char)val) {
 865         o(0xc48348);
 866         g(val);
 867     } else {
 868         oad(0xc48148, val); /* add $xxx, %rsp */
 869     }
 870 }
 871
 872 typedef enum X86_64_Mode {
 873   x86_64_mode_none,
 874   x86_64_mode_memory,
 875   x86_64_mode_integer,
 876   x86_64_mode_sse,
 877   x86_64_mode_x87
 878 } X86_64_Mode;
 879
 880 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
 881     if (a == b)
 882         return a;
 883     else if (a == x86_64_mode_none)
 884         return b;
 885     else if (b == x86_64_mode_none)
 886         return a;
 887     else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
 888         return x86_64_mode_memory;
 889     else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
 890         return x86_64_mode_integer;
 891     else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
 892         return x86_64_mode_memory;
 893     else
 894         return x86_64_mode_sse;
 895 }
 896
 897 static X86_64_Mode classify_x86_64_inner(CType *ty) {
 898     X86_64_Mode mode;
 899     Sym *f;
 900
 901     switch (ty->t & VT_BTYPE) {
 902     case VT_VOID: return x86_64_mode_none;
 903
 904     case VT_INT:
 905     case VT_BYTE:
 906     case VT_SHORT:
 907     case VT_LLONG:
 908     case VT_BOOL:
 909     case VT_PTR:
 910     case VT_FUNC:
 911     case VT_ENUM: return x86_64_mode_integer;
 912
 913     case VT_FLOAT:
 914     case VT_DOUBLE: return x86_64_mode_sse;
 915
 916     case VT_LDOUBLE: return x86_64_mode_x87;
 917
 918     case VT_STRUCT:
 919         f = ty->ref;
 920
 921         // Detect union
 922         if (f->next && (f->c == f->next->c))
 923           return x86_64_mode_memory;
 924
 925         mode = x86_64_mode_none;
 926         for (; f; f = f->next)
 927             mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
 928
 929         return mode;
 930     }
 931
 932     assert(0);
 933 }
 934
 935 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *reg_count) {
 936     X86_64_Mode mode;
 937     int size, align, ret_t;
 938
 939     if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
 940         *psize = 8;
 941         *reg_count = 1;
 942         ret_t = ty->t;
 943         mode = x86_64_mode_integer;
 944     } else {
 945         size = type_size(ty, &align);
 946         *psize = (size + 7) & ~7;
 947
 948         if (size > 16) {
 949             mode = x86_64_mode_memory;
 950         } else {
 951             mode = classify_x86_64_inner(ty);
 952             switch (mode) {
 953             case x86_64_mode_integer:
 954                 if (size > 8) {
 955                     *reg_count = 2;
 956                     ret_t = VT_QLONG;
 957                 } else {
 958                     *reg_count = 1;
 959                     ret_t = (size > 4) ? VT_LLONG : VT_INT;
 960                 }
 961                 break;
 962
 963             case x86_64_mode_x87:
 964                 *reg_count = 1;
 965                 ret_t = VT_LDOUBLE;
 966                 break;
 967
 968             case x86_64_mode_sse:
 969                 if (size > 8) {
 970                     *reg_count = 2;
 971                     ret_t = VT_QFLOAT;
 972                 } else {
 973                     *reg_count = 1;
 974                     ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
 975                 }
 976                 break;
 977             }
 978         }
 979     }
 980
 981     if (ret) {
 982         ret->ref = NULL;
 983         ret->t = ret_t;
 984     }
 985
 986     return mode;
 987 }
 988
 989 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
 990     /* This definition must be synced with stdarg.h */
 991     enum __va_arg_type {
 992         __va_gen_reg, __va_float_reg, __va_stack
 993     };
 994     int size, reg_count;
 995     X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &reg_count);
 996     switch (mode) {
 997     default: return __va_stack;
 998     case x86_64_mode_integer: return __va_gen_reg;
 999     case x86_64_mode_sse: return __va_float_reg;
1000     }
1001 }
1002
1003 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1004 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
1005     int size, reg_count;
1006     *ret_align = 1; // Never have to re-align return values for x86-64
1007     return (classify_x86_64_arg(vt, ret, &size, &reg_count) == x86_64_mode_memory);
1008 }
1009
1010 #define REGN 6
1011 static const uint8_t arg_regs[REGN] = {
1012     TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1013 };
1014
1015 static int arg_prepare_reg(int idx) {
1016   if (idx == 2 || idx == 3)
1017       /* idx=2: r10, idx=3: r11 */
1018       return idx + 8;
1019   else
1020       return arg_regs[idx];
1021 }
1022
1023 /* Generate function call. The function address is pushed first, then
1024    all the parameters in call order. This functions pops all the
1025    parameters and the function address. */
1026 void gfunc_call(int nb_args)
1027 {
1028     X86_64_Mode mode;
1029     CType type;
1030     int size, align, r, args_size, i, j, reg_count;
1031     int nb_reg_args = 0;
1032     int nb_sse_args = 0;
1033     int sse_reg, gen_reg;
1034
1035     /* calculate the number of integer/float arguments */
1036     args_size = 0;
1037     for(i = 0; i < nb_args; i++) {
1038         mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &reg_count);
1039         switch (mode) {
1040         case x86_64_mode_memory:
1041         case x86_64_mode_x87:
1042             args_size += size;
1043             break;
1044
1045         case x86_64_mode_sse:
1046             nb_sse_args += reg_count;
1047             if (nb_sse_args > 8) args_size += size;
1048             break;
1049
1050         case x86_64_mode_integer:
1051             nb_reg_args += reg_count;
1052             if (nb_reg_args > REGN) args_size += size;
1053             break;
1054         }
1055     }
1056
1057     /* for struct arguments, we need to call memcpy and the function
1058        call breaks register passing arguments we are preparing.
1059        So, we process arguments which will be passed by stack first. */
1060     gen_reg = nb_reg_args;
1061     sse_reg = nb_sse_args;
1062
1063     /* adjust stack to align SSE boundary */
1064     if (args_size &= 15) {
1065         /* fetch cpu flag before the following sub will change the value */
1066         if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1067             gv(RC_INT);
1068
1069         args_size = 16 - args_size;
1070         o(0x48);
1071         oad(0xec81, args_size); /* sub $xxx, %rsp */
1072     }
1073
1074     for(i = 0; i < nb_args;) {
1075         /* Swap argument to top, it will possibly be changed here,
1076            and might use more temps. At the end of the loop we keep
1077            in on the stack and swap it back to its original position
1078            if it is a register. */
1079         SValue tmp = vtop[0];
1080         vtop[0] = vtop[-i];
1081         vtop[-i] = tmp;
1082
1083         mode = classify_x86_64_arg(&vtop->type, NULL, &size, &reg_count);
1084
1085         int arg_stored = 1;
1086         switch (vtop->type.t & VT_BTYPE) {
1087         case VT_STRUCT:
1088             if (mode == x86_64_mode_sse) {
1089                 if (sse_reg > 8)
1090                     sse_reg -= reg_count;
1091                 else
1092                     arg_stored = 0;
1093             } else if (mode == x86_64_mode_integer) {
1094                 if (gen_reg > REGN)
1095                     gen_reg -= reg_count;
1096                 else
1097                     arg_stored = 0;
1098             }
1099
1100             if (arg_stored) {
1101                 /* allocate the necessary size on stack */
1102                 o(0x48);
1103                 oad(0xec81, size); /* sub $xxx, %rsp */
1104                 /* generate structure store */
1105                 r = get_reg(RC_INT);
1106                 orex(1, r, 0, 0x89); /* mov %rsp, r */
1107                 o(0xe0 + REG_VALUE(r));
1108                 vset(&vtop->type, r | VT_LVAL, 0);
1109                 vswap();
1110                 vstore();
1111                 args_size += size;
1112             }
1113             break;
1114
1115         case VT_LDOUBLE:
1116             gv(RC_ST0);
1117             size = LDOUBLE_SIZE;
1118             oad(0xec8148, size); /* sub $xxx, %rsp */
1119             o(0x7cdb); /* fstpt 0(%rsp) */
1120             g(0x24);
1121             g(0x00);
1122             args_size += size;
1123             break;
1124
1125         case VT_FLOAT:
1126         case VT_DOUBLE:
1127             assert(mode == x86_64_mode_sse);
1128             if (sse_reg > 8) {
1129                 --sse_reg;
1130                 r = gv(RC_FLOAT);
1131                 o(0x50); /* push $rax */
1132                 /* movq %xmm0, (%rsp) */
1133                 o(0xd60f66);
1134                 o(0x04 + REG_VALUE(r)*8);
1135                 o(0x24);
1136                 args_size += size;
1137             } else {
1138                 arg_stored = 0;
1139             }
1140             break;
1141
1142         default:
1143             assert(mode == x86_64_mode_integer);
1144             /* simple type */
1145             /* XXX: implicit cast ? */
1146             if (gen_reg > REGN) {
1147                 --gen_reg;
1148                 r = gv(RC_INT);
1149                 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1150                 args_size += size;
1151             } else {
1152                 arg_stored = 0;
1153             }
1154             break;
1155         }
1156
1157         /* And swap the argument back to it's original position.  */
1158         tmp = vtop[0];
1159         vtop[0] = vtop[-i];
1160         vtop[-i] = tmp;
1161
1162         if (arg_stored) {
1163           vrotb(i+1);
1164           assert(vtop->type.t == tmp.type.t);
1165           vpop();
1166           --nb_args;
1167         } else {
1168           ++i;
1169         }
1170     }
1171
1172     /* XXX This should be superfluous.  */
1173     save_regs(0); /* save used temporary registers */
1174
1175     /* then, we prepare register passing arguments.
1176        Note that we cannot set RDX and RCX in this loop because gv()
1177        may break these temporary registers. Let's use R10 and R11
1178        instead of them */
1179     assert(gen_reg <= REGN);
1180     assert(sse_reg <= 8);
1181     for(i = 0; i < nb_args; i++) {
1182         mode = classify_x86_64_arg(&vtop->type, &type, &size, &reg_count);
1183         /* Alter stack entry type so that gv() knows how to treat it */
1184         vtop->type = type;
1185         if (mode == x86_64_mode_sse) {
1186           if (reg_count == 2) {
1187               sse_reg -= 2;
1188               gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1189               if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1190                   /* movaps %xmm0, %xmmN */
1191                   o(0x280f);
1192                   o(0xc0 + (sse_reg << 3));
1193                   /* movaps %xmm1, %xmmN */
1194                   o(0x280f);
1195                   o(0xc1 + ((sse_reg+1) << 3));
1196               }
1197           } else {
1198               assert(reg_count == 1);
1199               --sse_reg;
1200               /* Load directly to register */
1201               gv(RC_XMM0 << sse_reg);
1202           }
1203         } else if (mode == x86_64_mode_integer) {
1204             /* simple type */
1205             /* XXX: implicit cast ? */
1206             gen_reg -= reg_count;
1207             r = gv(RC_INT);
1208             int d = arg_prepare_reg(gen_reg);
1209             orex(1,d,r,0x89); /* mov */
1210             o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1211             if (reg_count == 2) {
1212                 /* Second word of two-word value should always be in rdx
1213                     this case is handled via RC_IRET */
1214                 d = arg_prepare_reg(gen_reg+1);
1215                 orex(1,d,vtop->r2,0x89); /* mov */
1216                 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1217             }
1218         }
1219         vtop--;
1220     }
1221     assert(gen_reg == 0);
1222     assert(sse_reg == 0);
1223
1224     /* We shouldn't have many operands on the stack anymore, but the
1225        call address itself is still there, and it might be in %eax
1226        (or edx/ecx) currently, which the below writes would clobber.
1227        So evict all remaining operands here.  */
1228     save_regs(0);
1229
1230     /* Copy R10 and R11 into RDX and RCX, respectively */
1231     if (nb_reg_args > 2) {
1232         o(0xd2894c); /* mov %r10, %rdx */
1233         if (nb_reg_args > 3) {
1234             o(0xd9894c); /* mov %r11, %rcx */
1235         }
1236     }
1237
1238     oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1239     gcall_or_jmp(0);
1240     if (args_size)
1241         gadd_sp(args_size);
1242     vtop--;
1243 }
1244
1245
1246 #define FUNC_PROLOG_SIZE 11
1247
1248 static void push_arg_reg(int i) {
1249     loc -= 8;
1250     gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1251 }
1252
1253 /* generate function prolog of type 't' */
1254 void gfunc_prolog(CType *func_type)
1255 {
1256     X86_64_Mode mode;
1257     int i, addr, align, size, reg_count;
1258     int param_index, param_addr, reg_param_index, sse_param_index;
1259     Sym *sym;
1260     CType *type;
1261
1262     sym = func_type->ref;
1263     addr = PTR_SIZE * 2;
1264     loc = 0;
1265     ind += FUNC_PROLOG_SIZE;
1266     func_sub_sp_offset = ind;
1267     func_ret_sub = 0;
1268
1269     if (func_type->ref->c == FUNC_ELLIPSIS) {
1270         int seen_reg_num, seen_sse_num, seen_stack_size;
1271         seen_reg_num = seen_sse_num = 0;
1272         /* frame pointer and return address */
1273         seen_stack_size = PTR_SIZE * 2;
1274         /* count the number of seen parameters */
1275         sym = func_type->ref;
1276         while ((sym = sym->next) != NULL) {
1277             type = &sym->type;
1278             mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1279             switch (mode) {
1280             default:
1281                 seen_stack_size += size;
1282                 break;
1283
1284             case x86_64_mode_integer:
1285                 if (seen_reg_num + reg_count <= 8) {
1286                     seen_reg_num += reg_count;
1287                 } else {
1288                     seen_reg_num = 8;
1289                     seen_stack_size += size;
1290                 }
1291                 break;
1292
1293             case x86_64_mode_sse:
1294                 if (seen_sse_num + reg_count <= 8) {
1295                     seen_sse_num += reg_count;
1296                 } else {
1297                     seen_sse_num = 8;
1298                     seen_stack_size += size;
1299                 }
1300                 break;
1301             }
1302         }
1303
1304         loc -= 16;
1305         /* movl $0x????????, -0x10(%rbp) */
1306         o(0xf045c7);
1307         gen_le32(seen_reg_num * 8);
1308         /* movl $0x????????, -0xc(%rbp) */
1309         o(0xf445c7);
1310         gen_le32(seen_sse_num * 16 + 48);
1311         /* movl $0x????????, -0x8(%rbp) */
1312         o(0xf845c7);
1313         gen_le32(seen_stack_size);
1314
1315         /* save all register passing arguments */
1316         for (i = 0; i < 8; i++) {
1317             loc -= 16;
1318             o(0xd60f66); /* movq */
1319             gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1320             /* movq $0, loc+8(%rbp) */
1321             o(0x85c748);
1322             gen_le32(loc + 8);
1323             gen_le32(0);
1324         }
1325         for (i = 0; i < REGN; i++) {
1326             push_arg_reg(REGN-1-i);
1327         }
1328     }
1329
1330     sym = func_type->ref;
1331     param_index = 0;
1332     reg_param_index = 0;
1333     sse_param_index = 0;
1334
1335     /* if the function returns a structure, then add an
1336        implicit pointer parameter */
1337     func_vt = sym->type;
1338     mode = classify_x86_64_arg(&func_vt, NULL, &size, &reg_count);
1339     if (mode == x86_64_mode_memory) {
1340         push_arg_reg(reg_param_index);
1341         param_addr = loc;
1342
1343         func_vc = loc;
1344         param_index++;
1345         reg_param_index++;
1346     }
1347     /* define parameters */
1348     while ((sym = sym->next) != NULL) {
1349         type = &sym->type;
1350         mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1351         switch (mode) {
1352         case x86_64_mode_sse:
1353             if (sse_param_index + reg_count <= 8) {
1354                 /* save arguments passed by register */
1355                 loc -= reg_count * 8;
1356                 param_addr = loc;
1357                 for (i = 0; i < reg_count; ++i) {
1358                     o(0xd60f66); /* movq */
1359                     gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1360                     ++sse_param_index;
1361                 }
1362             } else {
1363                 param_addr = addr;
1364                 addr += size;
1365                 sse_param_index += reg_count;
1366             }
1367             break;
1368
1369         case x86_64_mode_memory:
1370         case x86_64_mode_x87:
1371             param_addr = addr;
1372             addr += size;
1373             break;
1374
1375         case x86_64_mode_integer: {
1376             if (reg_param_index + reg_count <= REGN) {
1377                 /* save arguments passed by register */
1378                 loc -= reg_count * 8;
1379                 param_addr = loc;
1380                 for (i = 0; i < reg_count; ++i) {
1381                     gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1382                     ++reg_param_index;
1383                 }
1384             } else {
1385                 param_addr = addr;
1386                 addr += size;
1387                 reg_param_index += reg_count;
1388             }
1389             break;
1390         }
1391         }
1392         sym_push(sym->v & ~SYM_FIELD, type,
1393                  VT_LOCAL | VT_LVAL, param_addr);
1394         param_index++;
1395     }
1396 }
1397
1398 /* generate function epilog */
1399 void gfunc_epilog(void)
1400 {
1401     int v, saved_ind;
1402
1403     o(0xc9); /* leave */
1404     if (func_ret_sub == 0) {
1405         o(0xc3); /* ret */
1406     } else {
1407         o(0xc2); /* ret n */
1408         g(func_ret_sub);
1409         g(func_ret_sub >> 8);
1410     }
1411     /* align local size to word & save local variables */
1412     v = (-loc + 15) & -16;
1413     saved_ind = ind;
1414     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1415     o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
1416     o(0xec8148);  /* sub rsp, stacksize */
1417     gen_le32(v);
1418     ind = saved_ind;
1419 }
1420
1421 #endif /* not PE */
1422
1423 /* generate a jump to a label */
1424 int gjmp(int t)
1425 {
1426     return psym(0xe9, t);
1427 }
1428
1429 /* generate a jump to a fixed address */
1430 void gjmp_addr(int a)
1431 {
1432     int r;
1433     r = a - ind - 2;
1434     if (r == (char)r) {
1435         g(0xeb);
1436         g(r);
1437     } else {
1438         oad(0xe9, a - ind - 5);
1439     }
1440 }
1441
1442 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1443 int gtst(int inv, int t)
1444 {
1445     int v, *p;
1446
1447     v = vtop->r & VT_VALMASK;
1448     if (v == VT_CMP) {
1449         /* fast case : can jump directly since flags are set */
1450         if (vtop->c.i & 0x100)
1451           {
1452             /* This was a float compare.  If the parity flag is set
1453                the result was unordered.  For anything except != this
1454                means false and we don't jump (anding both conditions).
1455                For != this means true (oring both).
1456                Take care about inverting the test.  We need to jump
1457                to our target if the result was unordered and test wasn't NE,
1458                otherwise if unordered we don't want to jump.  */
1459             vtop->c.i &= ~0x100;
1460             if (!inv == (vtop->c.i != TOK_NE))
1461               o(0x067a);  /* jp +6 */
1462             else
1463               {
1464                 g(0x0f);
1465                 t = psym(0x8a, t); /* jp t */
1466               }
1467           }
1468         g(0x0f);
1469         t = psym((vtop->c.i - 16) ^ inv, t);
1470     } else if (v == VT_JMP || v == VT_JMPI) {
1471         /* && or || optimization */
1472         if ((v & 1) == inv) {
1473             /* insert vtop->c jump list in t */
1474             p = &vtop->c.i;
1475             while (*p != 0)
1476                 p = (int *)(cur_text_section->data + *p);
1477             *p = t;
1478             t = vtop->c.i;
1479         } else {
1480             t = gjmp(t);
1481             gsym(vtop->c.i);
1482         }
1483     } else {
1484         if (is_float(vtop->type.t) ||
1485             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1486             vpushi(0);
1487             gen_op(TOK_NE);
1488         }
1489         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1490             /* constant jmp optimization */
1491             if ((vtop->c.i != 0) != inv)
1492                 t = gjmp(t);
1493         } else {
1494             v = gv(RC_INT);
1495             orex(0,v,v,0x85);
1496             o(0xc0 + REG_VALUE(v) * 9);
1497             g(0x0f);
1498             t = psym(0x85 ^ inv, t);
1499         }
1500     }
1501     vtop--;
1502     return t;
1503 }
1504
1505 /* generate an integer binary operation */
1506 void gen_opi(int op)
1507 {
1508     int r, fr, opc, c;
1509     int ll, uu, cc;
1510
1511     ll = is64_type(vtop[-1].type.t);
1512     uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1513     cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1514
1515     switch(op) {
1516     case '+':
1517     case TOK_ADDC1: /* add with carry generation */
1518         opc = 0;
1519     gen_op8:
1520         if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1521             /* constant case */
1522             vswap();
1523             r = gv(RC_INT);
1524             vswap();
1525             c = vtop->c.i;
1526             if (c == (char)c) {
1527                 /* XXX: generate inc and dec for smaller code ? */
1528                 orex(ll, r, 0, 0x83);
1529                 o(0xc0 | (opc << 3) | REG_VALUE(r));
1530                 g(c);
1531             } else {
1532                 orex(ll, r, 0, 0x81);
1533                 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1534             }
1535         } else {
1536             gv2(RC_INT, RC_INT);
1537             r = vtop[-1].r;
1538             fr = vtop[0].r;
1539             orex(ll, r, fr, (opc << 3) | 0x01);
1540             o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1541         }
1542         vtop--;
1543         if (op >= TOK_ULT && op <= TOK_GT) {
1544             vtop->r = VT_CMP;
1545             vtop->c.i = op;
1546         }
1547         break;
1548     case '-':
1549     case TOK_SUBC1: /* sub with carry generation */
1550         opc = 5;
1551         goto gen_op8;
1552     case TOK_ADDC2: /* add with carry use */
1553         opc = 2;
1554         goto gen_op8;
1555     case TOK_SUBC2: /* sub with carry use */
1556         opc = 3;
1557         goto gen_op8;
1558     case '&':
1559         opc = 4;
1560         goto gen_op8;
1561     case '^':
1562         opc = 6;
1563         goto gen_op8;
1564     case '|':
1565         opc = 1;
1566         goto gen_op8;
1567     case '*':
1568         gv2(RC_INT, RC_INT);
1569         r = vtop[-1].r;
1570         fr = vtop[0].r;
1571         orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1572         o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1573         vtop--;
1574         break;
1575     case TOK_SHL:
1576         opc = 4;
1577         goto gen_shift;
1578     case TOK_SHR:
1579         opc = 5;
1580         goto gen_shift;
1581     case TOK_SAR:
1582         opc = 7;
1583     gen_shift:
1584         opc = 0xc0 | (opc << 3);
1585         if (cc) {
1586             /* constant case */
1587             vswap();
1588             r = gv(RC_INT);
1589             vswap();
1590             orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1591             o(opc | REG_VALUE(r));
1592             g(vtop->c.i & (ll ? 63 : 31));
1593         } else {
1594             /* we generate the shift in ecx */
1595             gv2(RC_INT, RC_RCX);
1596             r = vtop[-1].r;
1597             orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1598             o(opc | REG_VALUE(r));
1599         }
1600         vtop--;
1601         break;
1602     case TOK_UDIV:
1603     case TOK_UMOD:
1604         uu = 1;
1605         goto divmod;
1606     case '/':
1607     case '%':
1608     case TOK_PDIV:
1609         uu = 0;
1610     divmod:
1611         /* first operand must be in eax */
1612         /* XXX: need better constraint for second operand */
1613         gv2(RC_RAX, RC_RCX);
1614         r = vtop[-1].r;
1615         fr = vtop[0].r;
1616         vtop--;
1617         save_reg(TREG_RDX);
1618         orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1619         orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1620         o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1621         if (op == '%' || op == TOK_UMOD)
1622             r = TREG_RDX;
1623         else
1624             r = TREG_RAX;
1625         vtop->r = r;
1626         break;
1627     default:
1628         opc = 7;
1629         goto gen_op8;
1630     }
1631 }
1632
1633 void gen_opl(int op)
1634 {
1635     gen_opi(op);
1636 }
1637
1638 /* generate a floating point operation 'v = t1 op t2' instruction. The
1639    two operands are guaranted to have the same floating point type */
1640 /* XXX: need to use ST1 too */
1641 void gen_opf(int op)
1642 {
1643     int a, ft, fc, swapped, r;
1644     int float_type =
1645         (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1646
1647     /* convert constants to memory references */
1648     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1649         vswap();
1650         gv(float_type);
1651         vswap();
1652     }
1653     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1654         gv(float_type);
1655
1656     /* must put at least one value in the floating point register */
1657     if ((vtop[-1].r & VT_LVAL) &&
1658         (vtop[0].r & VT_LVAL)) {
1659         vswap();
1660         gv(float_type);
1661         vswap();
1662     }
1663     swapped = 0;
1664     /* swap the stack if needed so that t1 is the register and t2 is
1665        the memory reference */
1666     if (vtop[-1].r & VT_LVAL) {
1667         vswap();
1668         swapped = 1;
1669     }
1670     if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1671         if (op >= TOK_ULT && op <= TOK_GT) {
1672             /* load on stack second operand */
1673             load(TREG_ST0, vtop);
1674             save_reg(TREG_RAX); /* eax is used by FP comparison code */
1675             if (op == TOK_GE || op == TOK_GT)
1676                 swapped = !swapped;
1677             else if (op == TOK_EQ || op == TOK_NE)
1678                 swapped = 0;
1679             if (swapped)
1680                 o(0xc9d9); /* fxch %st(1) */
1681             o(0xe9da); /* fucompp */
1682             o(0xe0df); /* fnstsw %ax */
1683             if (op == TOK_EQ) {
1684                 o(0x45e480); /* and $0x45, %ah */
1685                 o(0x40fC80); /* cmp $0x40, %ah */
1686             } else if (op == TOK_NE) {
1687                 o(0x45e480); /* and $0x45, %ah */
1688                 o(0x40f480); /* xor $0x40, %ah */
1689                 op = TOK_NE;
1690             } else if (op == TOK_GE || op == TOK_LE) {
1691                 o(0x05c4f6); /* test $0x05, %ah */
1692                 op = TOK_EQ;
1693             } else {
1694                 o(0x45c4f6); /* test $0x45, %ah */
1695                 op = TOK_EQ;
1696             }
1697             vtop--;
1698             vtop->r = VT_CMP;
1699             vtop->c.i = op;
1700         } else {
1701             /* no memory reference possible for long double operations */
1702             load(TREG_ST0, vtop);
1703             swapped = !swapped;
1704
1705             switch(op) {
1706             default:
1707             case '+':
1708                 a = 0;
1709                 break;
1710             case '-':
1711                 a = 4;
1712                 if (swapped)
1713                     a++;
1714                 break;
1715             case '*':
1716                 a = 1;
1717                 break;
1718             case '/':
1719                 a = 6;
1720                 if (swapped)
1721                     a++;
1722                 break;
1723             }
1724             ft = vtop->type.t;
1725             fc = vtop->c.ul;
1726             o(0xde); /* fxxxp %st, %st(1) */
1727             o(0xc1 + (a << 3));
1728             vtop--;
1729         }
1730     } else {
1731         if (op >= TOK_ULT && op <= TOK_GT) {
1732             /* if saved lvalue, then we must reload it */
1733             r = vtop->r;
1734             fc = vtop->c.ul;
1735             if ((r & VT_VALMASK) == VT_LLOCAL) {
1736                 SValue v1;
1737                 r = get_reg(RC_INT);
1738                 v1.type.t = VT_PTR;
1739                 v1.r = VT_LOCAL | VT_LVAL;
1740                 v1.c.ul = fc;
1741                 load(r, &v1);
1742                 fc = 0;
1743             }
1744
1745             if (op == TOK_EQ || op == TOK_NE) {
1746                 swapped = 0;
1747             } else {
1748                 if (op == TOK_LE || op == TOK_LT)
1749                     swapped = !swapped;
1750                 if (op == TOK_LE || op == TOK_GE) {
1751                     op = 0x93; /* setae */
1752                 } else {
1753                     op = 0x97; /* seta */
1754                 }
1755             }
1756
1757             if (swapped) {
1758                 gv(RC_FLOAT);
1759                 vswap();
1760             }
1761             assert(!(vtop[-1].r & VT_LVAL));
1762
1763             if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1764                 o(0x66);
1765             o(0x2e0f); /* ucomisd */
1766
1767             if (vtop->r & VT_LVAL) {
1768                 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1769             } else {
1770                 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1771             }
1772
1773             vtop--;
1774             vtop->r = VT_CMP;
1775             vtop->c.i = op | 0x100;
1776         } else {
1777             assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1778             switch(op) {
1779             default:
1780             case '+':
1781                 a = 0;
1782                 break;
1783             case '-':
1784                 a = 4;
1785                 break;
1786             case '*':
1787                 a = 1;
1788                 break;
1789             case '/':
1790                 a = 6;
1791                 break;
1792             }
1793             ft = vtop->type.t;
1794             fc = vtop->c.ul;
1795             assert((ft & VT_BTYPE) != VT_LDOUBLE);
1796
1797             r = vtop->r;
1798             /* if saved lvalue, then we must reload it */
1799             if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1800                 SValue v1;
1801                 r = get_reg(RC_INT);
1802                 v1.type.t = VT_PTR;
1803                 v1.r = VT_LOCAL | VT_LVAL;
1804                 v1.c.ul = fc;
1805                 load(r, &v1);
1806                 fc = 0;
1807             }
1808
1809             assert(!(vtop[-1].r & VT_LVAL));
1810             if (swapped) {
1811                 assert(vtop->r & VT_LVAL);
1812                 gv(RC_FLOAT);
1813                 vswap();
1814             }
1815
1816             if ((ft & VT_BTYPE) == VT_DOUBLE) {
1817                 o(0xf2);
1818             } else {
1819                 o(0xf3);
1820             }
1821             o(0x0f);
1822             o(0x58 + a);
1823
1824             if (vtop->r & VT_LVAL) {
1825                 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1826             } else {
1827                 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1828             }
1829
1830             vtop--;
1831         }
1832     }
1833 }
1834
1835 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1836    and 'long long' cases. */
1837 void gen_cvt_itof(int t)
1838 {
1839     if ((t & VT_BTYPE) == VT_LDOUBLE) {
1840         save_reg(TREG_ST0);
1841         gv(RC_INT);
1842         if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1843             /* signed long long to float/double/long double (unsigned case
1844                is handled generically) */
1845             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1846             o(0x242cdf); /* fildll (%rsp) */
1847             o(0x08c48348); /* add $8, %rsp */
1848         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1849                    (VT_INT | VT_UNSIGNED)) {
1850             /* unsigned int to float/double/long double */
1851             o(0x6a); /* push $0 */
1852             g(0x00);
1853             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1854             o(0x242cdf); /* fildll (%rsp) */
1855             o(0x10c48348); /* add $16, %rsp */
1856         } else {
1857             /* int to float/double/long double */
1858             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1859             o(0x2404db); /* fildl (%rsp) */
1860             o(0x08c48348); /* add $8, %rsp */
1861         }
1862         vtop->r = TREG_ST0;
1863     } else {
1864         int r = get_reg(RC_FLOAT);
1865         gv(RC_INT);
1866         o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1867         if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1868             (VT_INT | VT_UNSIGNED) ||
1869             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1870             o(0x48); /* REX */
1871         }
1872         o(0x2a0f);
1873         o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1874         vtop->r = r;
1875     }
1876 }
1877
1878 /* convert from one floating point type to another */
1879 void gen_cvt_ftof(int t)
1880 {
1881     int ft, bt, tbt;
1882
1883     ft = vtop->type.t;
1884     bt = ft & VT_BTYPE;
1885     tbt = t & VT_BTYPE;
1886
1887     if (bt == VT_FLOAT) {
1888         gv(RC_FLOAT);
1889         if (tbt == VT_DOUBLE) {
1890             o(0x140f); /* unpcklps */
1891             o(0xc0 + REG_VALUE(vtop->r)*9);
1892             o(0x5a0f); /* cvtps2pd */
1893             o(0xc0 + REG_VALUE(vtop->r)*9);
1894         } else if (tbt == VT_LDOUBLE) {
1895             save_reg(RC_ST0);
1896             /* movss %xmm0,-0x10(%rsp) */
1897             o(0x110ff3);
1898             o(0x44 + REG_VALUE(vtop->r)*8);
1899             o(0xf024);
1900             o(0xf02444d9); /* flds -0x10(%rsp) */
1901             vtop->r = TREG_ST0;
1902         }
1903     } else if (bt == VT_DOUBLE) {
1904         gv(RC_FLOAT);
1905         if (tbt == VT_FLOAT) {
1906             o(0x140f66); /* unpcklpd */
1907             o(0xc0 + REG_VALUE(vtop->r)*9);
1908             o(0x5a0f66); /* cvtpd2ps */
1909             o(0xc0 + REG_VALUE(vtop->r)*9);
1910         } else if (tbt == VT_LDOUBLE) {
1911             save_reg(RC_ST0);
1912             /* movsd %xmm0,-0x10(%rsp) */
1913             o(0x110ff2);
1914             o(0x44 + REG_VALUE(vtop->r)*8);
1915             o(0xf024);
1916             o(0xf02444dd); /* fldl -0x10(%rsp) */
1917             vtop->r = TREG_ST0;
1918         }
1919     } else {
1920         gv(RC_ST0);
1921         int r = get_reg(RC_FLOAT);
1922         if (tbt == VT_DOUBLE) {
1923             o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1924             /* movsd -0x10(%rsp),%xmm0 */
1925             o(0x100ff2);
1926             o(0x44 + REG_VALUE(r)*8);
1927             o(0xf024);
1928             vtop->r = r;
1929         } else if (tbt == VT_FLOAT) {
1930             o(0xf0245cd9); /* fstps -0x10(%rsp) */
1931             /* movss -0x10(%rsp),%xmm0 */
1932             o(0x100ff3);
1933             o(0x44 + REG_VALUE(r)*8);
1934             o(0xf024);
1935             vtop->r = r;
1936         }
1937     }
1938 }
1939
1940 /* convert fp to int 't' type */
1941 void gen_cvt_ftoi(int t)
1942 {
1943     int ft, bt, size, r;
1944     ft = vtop->type.t;
1945     bt = ft & VT_BTYPE;
1946     if (bt == VT_LDOUBLE) {
1947         gen_cvt_ftof(VT_DOUBLE);
1948         bt = VT_DOUBLE;
1949     }
1950
1951     gv(RC_FLOAT);
1952     if (t != VT_INT)
1953         size = 8;
1954     else
1955         size = 4;
1956
1957     r = get_reg(RC_INT);
1958     if (bt == VT_FLOAT) {
1959         o(0xf3);
1960     } else if (bt == VT_DOUBLE) {
1961         o(0xf2);
1962     } else {
1963         assert(0);
1964     }
1965     orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1966     o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
1967     vtop->r = r;
1968 }
1969
1970 /* computed goto support */
1971 void ggoto(void)
1972 {
1973     gcall_or_jmp(1);
1974     vtop--;
1975 }
1976
1977 /* end of x86-64 code generator */
1978 /*************************************************************/
1979 #endif /* ! TARGET_DEFS_ONLY */
1980 /******************************************************/