Fix x86-64 vla
[tinycc.git] / x86_64-gen.c
blob7a79278bc9fd8e4f726527d14f0684ab197f6e19
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0020 /* only for long double */
38 #define RC_R8 0x0040
39 #define RC_R9 0x0080
40 #define RC_XMM0 0x0100
41 #define RC_XMM1 0x0200
42 #define RC_XMM2 0x0400
43 #define RC_XMM3 0x0800
44 #define RC_XMM4 0x1000
45 #define RC_XMM5 0x2000
46 #define RC_XMM6 0x4000
47 #define RC_XMM7 0x8000
48 #define RC_RSI 0x10000
49 #define RC_RDI 0x20000
50 #define RC_INT1 0x40000 /* function_pointer */
51 #define RC_INT2 0x80000
52 #define RC_RBX 0x100000
53 #define RC_R10 0x200000
54 #define RC_R11 0x400000
55 #define RC_R12 0x800000
56 #define RC_R13 0x1000000
57 #define RC_R14 0x2000000
58 #define RC_R15 0x4000000
59 #define RC_IRET RC_RAX /* function return: integer register */
60 #define RC_LRET RC_RDX /* function return: second integer register */
61 #define RC_FRET RC_XMM0 /* function return: float register */
62 #define RC_QRET RC_XMM1 /* function return: second float register */
63 #define RC_MASK (RC_INT|RC_INT1|RC_INT2|RC_FLOAT)
65 /* pretty names for the registers */
66 enum {
67 TREG_RAX = 0,
68 TREG_RCX = 1,
69 TREG_RDX = 2,
70 TREG_RSP = 4,
71 TREG_ST0 = 5,
72 TREG_RSI = 6,
73 TREG_RDI = 7,
75 TREG_R8 = 8,
76 TREG_R9 = 9,
77 TREG_R10 = 10,
78 TREG_R11 = 11,
80 TREG_XMM0 = 16,
81 TREG_XMM1 = 17,
82 TREG_XMM2 = 18,
83 TREG_XMM3 = 19,
84 TREG_XMM4 = 20,
85 TREG_XMM5 = 21,
86 TREG_XMM6 = 22,
87 TREG_XMM7 = 23,
91 #define REX_BASE(reg) (((reg) >> 3) & 1)
92 #define REG_VALUE(reg) ((reg) & 7)
93 #define FLAG_GOT 0X01
95 /* return registers for function */
96 #define REG_IRET TREG_RAX /* single word int return register */
97 #define REG_LRET TREG_RDX /* second word return register (for long long) */
98 #define REG_FRET TREG_XMM0 /* float return register */
99 #define REG_QRET TREG_XMM1 /* second float return register */
101 /* defined if function parameters must be evaluated in reverse order */
102 #define INVERT_FUNC_PARAMS
104 /* pointer size, in bytes */
105 #define PTR_SIZE 8
107 /* long double size and alignment, in bytes */
108 #define LDOUBLE_SIZE 16
109 #define LDOUBLE_ALIGN 16
110 /* maximum alignment (for aligned attribute support) */
111 #define MAX_ALIGN 16
113 /******************************************************/
114 /* ELF defines */
116 #define EM_TCC_TARGET EM_X86_64
118 /* relocation type for 32 bit data relocation */
119 #define R_DATA_32 R_X86_64_32
120 #define R_DATA_PTR R_X86_64_64
121 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
122 #define R_COPY R_X86_64_COPY
124 #define ELF_START_ADDR 0x400000
125 #define ELF_PAGE_SIZE 0x200000
127 /******************************************************/
128 #else /* ! TARGET_DEFS_ONLY */
129 /******************************************************/
130 #include "tcc.h"
131 #include <assert.h>
133 ST_DATA const int reg_classes[NB_REGS] = {
134 /* eax */ RC_INT|RC_RAX|RC_INT2,
135 /* ecx */ RC_INT|RC_RCX|RC_INT2,
136 /* edx */ RC_INT|RC_RDX,
137 RC_INT|RC_INT1|RC_INT2|RC_RBX,
139 /* st0 */ RC_ST0,
140 RC_RSI|RC_INT2,
141 RC_RDI|RC_INT2,
142 RC_INT|RC_R8|RC_INT2,
143 RC_INT|RC_R9|RC_INT2,
144 RC_INT|RC_INT1|RC_INT2|RC_R10,
145 RC_INT|RC_INT1|RC_INT2|RC_R11,
146 RC_INT|RC_INT1|RC_INT2|RC_R12,
147 RC_INT|RC_INT1|RC_INT2|RC_R13,
148 RC_INT|RC_INT1|RC_INT2|RC_R14,
149 RC_INT|RC_INT1|RC_INT2|RC_R15,
150 /* xmm0 */ RC_FLOAT | RC_XMM0,
151 RC_FLOAT|RC_XMM1,
152 RC_FLOAT|RC_XMM2,
153 RC_FLOAT|RC_XMM3,
154 RC_FLOAT|RC_XMM4,
155 RC_FLOAT|RC_XMM5,
156 RC_FLOAT|RC_XMM6,
157 RC_FLOAT|RC_XMM7,
160 static unsigned long func_sub_sp_offset;
161 static int func_ret_sub;
163 /* XXX: make it faster ? */
164 void g(int c)
166 int ind1;
167 ind1 = ind + 1;
168 if (ind1 > cur_text_section->data_allocated)
169 section_realloc(cur_text_section, ind1);
170 cur_text_section->data[ind] = c;
171 ind = ind1;
174 void o(unsigned int c)
176 while (c) {
177 g(c);
178 c = c >> 8;
182 void gen_le16(int v)
184 g(v);
185 g(v >> 8);
188 void gen_le32(int c)
190 g(c);
191 g(c >> 8);
192 g(c >> 16);
193 g(c >> 24);
196 void gen_le64(int64_t c)
198 g(c);
199 g(c >> 8);
200 g(c >> 16);
201 g(c >> 24);
202 g(c >> 32);
203 g(c >> 40);
204 g(c >> 48);
205 g(c >> 56);
208 void orex(int ll, int r, int r2, int b)
210 if ((r & VT_VALMASK) >= VT_CONST)
211 r = 0;
212 if ((r2 & VT_VALMASK) >= VT_CONST)
213 r2 = 0;
214 if (ll || REX_BASE(r) || REX_BASE(r2))
215 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
216 o(b);
219 /* output a symbol and patch all calls to it */
220 void gsym_addr(int t, int a)
222 int n, *ptr;
223 while (t) {
224 ptr = (int *)(cur_text_section->data + t);
225 n = *ptr; /* next value */
226 *ptr = a - t - 4;
227 t = n;
231 void gsym(int t)
233 gsym_addr(t, ind);
236 /* psym is used to put an instruction with a data field which is a
237 reference to a symbol. It is in fact the same as oad ! */
238 #define psym oad
240 static int is64_type(int t)
242 return ((t & VT_BTYPE) == VT_PTR ||
243 (t & VT_BTYPE) == VT_FUNC ||
244 (t & VT_BTYPE) == VT_LLONG);
247 /* instruction + 4 bytes data. Return the address of the data */
248 ST_FUNC int oad(int c, int s)
250 int ind1;
252 o(c);
253 ind1 = ind + 4;
254 if (ind1 > cur_text_section->data_allocated)
255 section_realloc(cur_text_section, ind1);
256 *(int *)(cur_text_section->data + ind) = s;
257 s = ind;
258 ind = ind1;
259 return s;
262 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
264 if (r & VT_SYM)
265 greloc(cur_text_section, sym, ind, R_X86_64_32);
266 gen_le32(c);
269 /* output constant with relocation if 'r & VT_SYM' is true */
270 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
272 if (r & VT_SYM)
273 greloc(cur_text_section, sym, ind, R_X86_64_64);
274 gen_le64(c);
277 /* output constant with relocation if 'r & VT_SYM' is true */
278 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
280 if (r & VT_SYM)
281 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
282 gen_le32(c-4);
285 /* output got address with relocation */
286 static void gen_gotpcrel(int r, Sym *sym, int c)
288 #ifndef TCC_TARGET_PE
289 Section *sr;
290 ElfW(Rela) *rel;
291 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
292 sr = cur_text_section->reloc;
293 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
294 rel->r_addend = -4;
295 #else
296 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
297 cur_text_section->data[ind-3],
298 cur_text_section->data[ind-2],
299 cur_text_section->data[ind-1]
301 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
302 #endif
303 gen_le32(0);
304 if (c) {
305 /* we use add c, %xxx for displacement */
306 orex(1, r, 0, 0x81);
307 o(0xc0 + REG_VALUE(r));
308 gen_le32(c);
312 static void gen_modrm_impl(int op_reg, int fr, Sym *sym, int c, int flag)
314 int r = fr & VT_VALMASK;
315 op_reg = REG_VALUE(op_reg) << 3;
316 if (r == VT_CONST) {
317 /* constant memory reference */
318 o(0x05 | op_reg);
319 if (flag & FLAG_GOT) {
320 gen_gotpcrel(fr, sym, c);
321 } else {
322 gen_addrpc32(fr, sym, c);
324 } else if (r == VT_LOCAL) {
325 /* currently, we use only ebp as base */
326 if (c == (char)c) {
327 /* short reference */
328 o(0x45 | op_reg);
329 g(c);
330 } else {
331 oad(0x85 | op_reg, c);
333 } else if (c) {
334 if (c == (char)c) {
335 /* short reference */
336 g(0x40 | op_reg | REG_VALUE(fr));
337 if(r == TREG_RSP)
338 g(0x24);
339 g(c);
340 } else {
341 g(0x80 | op_reg | REG_VALUE(fr));
342 if(r == TREG_RSP)
343 g(0x24);
344 gen_le32(c);
346 } else {
347 g(0x00 | op_reg | REG_VALUE(fr));
348 if(r == TREG_RSP)
349 g(0x24);
353 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
354 opcode bits */
355 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
357 gen_modrm_impl(op_reg, r, sym, c, 0);
360 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
361 opcode bits */
362 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
364 int flag;
365 if((op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC))
366 flag = FLAG_GOT;
367 orex(1, r, op_reg, opcode);
368 gen_modrm_impl(op_reg, r, sym, c, flag);
372 /* load 'r' from value 'sv' */
373 void load(int r, SValue *sv)
375 int v, t, ft, fc, fr, ll;
376 SValue v1;
378 #ifdef TCC_TARGET_PE
379 SValue v2;
380 sv = pe_getimport(sv, &v2);
381 #endif
383 fr = sv->r;
384 ft = sv->type.t & ~VT_DEFSIGN;
385 fc = sv->c.ul;
386 ll = is64_type(ft);
388 #ifndef TCC_TARGET_PE
389 /* we use indirect access via got */
390 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
391 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
392 /* use the result register as a temporal register */
393 int tr;
394 if (is_float(ft)) {
395 /* we cannot use float registers as a temporal register */
396 tr = get_reg(RC_INT) | TREG_MEM;
397 }else{
398 tr = r | TREG_MEM;
400 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
401 /* load from the temporal register */
402 fr = tr | VT_LVAL;
404 #endif
406 v = fr & VT_VALMASK;
407 if (fr & VT_LVAL) {
408 if(fr & VT_TMP){
409 int size, align;
410 if((ft & VT_BTYPE) == VT_FUNC)
411 size = 8;
412 else
413 size = type_size(&sv->type, &align);
414 loc_stack(size, 0);
416 if (v == VT_LLOCAL) {
417 v1.type.t = VT_PTR;
418 v1.r = VT_LOCAL | VT_LVAL;
419 v1.c.ul = fc;
420 fr = r;
421 if (!(reg_classes[fr] & RC_INT))
422 fr = get_reg(RC_INT);
423 load(fr, &v1);
424 fc = 0;
426 int b;
427 if ((ft & VT_BTYPE) == VT_FLOAT) {
428 b = 0x100ff3; /* movss */
429 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
430 b = 0x100ff2; /* movds */
431 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
432 b = 0xdb, r = 5; /* fldt */
433 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
434 b = 0xbe0f; /* movsbl */
435 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
436 b = 0xb60f; /* movzbl */
437 } else if ((ft & VT_TYPE) == VT_SHORT) {
438 b = 0xbf0f; /* movswl */
439 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
440 b = 0xb70f; /* movzwl */
441 } else {
442 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
443 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
444 || ((ft & VT_BTYPE) == VT_FUNC));
445 b = 0x8b;
447 orex(ll, fr, r, b);
448 gen_modrm(r, fr, sv->sym, fc);
449 } else {
450 if (v == VT_CONST) {
451 if (fr & VT_SYM) {
452 #ifdef TCC_TARGET_PE
453 orex(1,0,r,0x8d);
454 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
455 gen_addrpc32(fr, sv->sym, fc);
456 #else
457 if (sv->sym->type.t & VT_STATIC) {
458 orex(1,0,r,0x8d);
459 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
460 gen_addrpc32(fr, sv->sym, fc);
461 } else {
462 orex(1,0,r,0x8b);
463 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
464 gen_gotpcrel(r, sv->sym, fc);
466 #endif
467 } else {
468 orex(ll,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
469 if (ll)
470 gen_le64(sv->c.ull);
471 else
472 gen_le32(fc);
474 } else if (v == VT_LOCAL) {
475 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
476 gen_modrm(r, VT_LOCAL, sv->sym, fc);
477 } else if (v == VT_CMP) {
478 orex(0, r, 0, 0xb8 + REG_VALUE(r));
479 if ((fc & ~0x100) == TOK_NE){
480 gen_le32(1);/* mov $0, r */
481 }else{
482 gen_le32(0);/* mov $1, r */
484 if (fc & 0x100){
485 fc &= ~0x100;
486 /* This was a float compare. If the parity bit is
487 set the result was unordered, meaning false for everything
488 except TOK_NE, and true for TOK_NE. */
489 o(0x037a + (REX_BASE(r) << 8));/* jp 3*/
491 orex(0,r,0, 0x0f); /* setxx %br */
492 o(fc);
493 o(0xc0 + REG_VALUE(r));
494 } else if (v == VT_JMP || v == VT_JMPI) {
495 t = v & 1;
496 orex(0,r,0,0);
497 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
498 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
499 gsym(fc);
500 orex(0,r,0,0);
501 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
502 } else if (v != r) {
503 if (reg_classes[r] & RC_FLOAT) {
504 if(v == TREG_ST0){
505 /* gen_cvt_ftof(VT_DOUBLE); */
506 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
507 /* movsd -0x10(%rsp),%xmm0 */
508 o(0x100ff2);
509 o(0xf02444 + REG_VALUE(r)*8);
510 }else if(reg_classes[v] & RC_FLOAT){
511 o(0x7e0ff3);
512 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
513 }else
514 assert(0);
515 } else if (r == TREG_ST0) {
516 assert(reg_classes[v] & RC_FLOAT);
517 /* gen_cvt_ftof(VT_LDOUBLE); */
518 /* movsd %xmm0,-0x10(%rsp) */
519 o(0x110ff2);
520 o(0xf02444 + REG_VALUE(v)*8);
521 o(0xf02444dd); /* fldl -0x10(%rsp) */
522 } else {
523 if(fc){
524 orex(1,fr,r,0x8d); /* lea xxx(%ebp), r */
525 gen_modrm(r, fr, sv->sym, fc);
526 }else{
527 orex(ll,v,r, 0x8b);
528 o(0xc0 + REG_VALUE(v) + REG_VALUE(r) * 8); /* mov v, r */
535 /* store register 'r' in lvalue 'v' */
536 void store(int r, SValue *sv)
538 int fr, bt, ft, fc, ll, v;
540 #ifdef TCC_TARGET_PE
541 SValue v2;
542 sv = pe_getimport(sv, &v2);
543 #endif
544 ft = sv->type.t & ~VT_DEFSIGN;
545 fc = sv->c.ul;
546 fr = sv->r;
547 bt = ft & VT_BTYPE;
548 ll = is64_type(ft);
549 v = fr & VT_VALMASK;
551 //#ifndef TCC_TARGET_PE
552 /* we need to access the variable via got */
553 // if (fr == VT_CONST && (v->r & VT_SYM)) {
554 /* mov xx(%rip), %r11 */
555 // o(0x1d8b4c);
556 // gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
557 //pic = is64_type(bt) ? 0x49 : 0x41;
558 // }
559 //#endif
561 /* XXX: incorrect if float reg to reg */
562 if (bt == VT_FLOAT) {
563 orex(0, fr, r, 0x110ff3); /* movss */
564 } else if (bt == VT_DOUBLE) {
565 orex(0, fr, r, 0x110ff2);/* movds */
566 } else if (bt == VT_LDOUBLE) {
567 o(0xc0d9); /* fld %st(0) */
568 orex(0, fr, r, 0xdb);/* fstpt */
569 r = 7;
570 } else {
571 if (bt == VT_SHORT)
572 o(0x66);
573 if (bt == VT_BYTE || bt == VT_BOOL)
574 orex(ll, fr, r, 0x88);
575 else{
576 orex(ll, fr, r, 0x89);
579 if (v == VT_CONST || v == VT_LOCAL || (fr & VT_LVAL)) {
580 gen_modrm(r, fr, sv->sym, fc);
581 } else if (v != r) {
582 /* XXX: don't we really come here? */
583 abort();
584 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8); /* mov r, fr */
588 /* 'is_jmp' is '1' if it is a jump */
589 static void gcall_or_jmp(int is_jmp)
591 int r;
592 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
593 /* constant case */
594 if (vtop->r & VT_SYM) {
595 /* relocation case */
596 greloc(cur_text_section, vtop->sym,
597 ind + 1, R_X86_64_PLT32);
598 } else {
599 /* put an empty PC32 relocation */
600 put_elf_reloc(symtab_section, cur_text_section,
601 ind + 1, R_X86_64_PC32, 0);
603 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
604 } else {
605 /* otherwise, indirect call */
606 r = get_reg(RC_INT1);
607 load(r, vtop);
608 orex(0, r, 0, 0xff); /* REX call/jmp *r */
609 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
613 static int func_scratch;
614 static int r_loc;
616 int reloc_add(int inds)
618 return psym(0, inds);
621 void reloc_use(int t, int data)
623 int *ptr;
624 while (t) {
625 ptr = (int *)(cur_text_section->data + t);
626 t = *ptr; /* next value */
627 *ptr = data;
631 void struct_copy(SValue *d, SValue *s, SValue *c)
633 if(!c->c.i)
634 return;
635 save_reg(TREG_RCX);
636 load(TREG_RCX, c);
637 load(TREG_RDI, d);
638 load(TREG_RSI, s);
639 o(0xa4f3);// rep movsb
642 void gen_putz(SValue *d, int size)
644 if(!size)
645 return;
646 save_reg(TREG_RAX);
647 o(0xb0);
648 g(0x00);
649 save_reg(TREG_RCX);
650 o(0xb8 + REG_VALUE(TREG_RCX)); /* mov $xx, r */
651 gen_le32(size);
652 load(TREG_RDI, d);
653 o(0xaaf3);//rep stos
656 /* Generate function call. The function address is pushed first, then
657 all the parameters in call order. This functions pops all the
658 parameters and the function address. */
659 void gen_offs_sp(int b, int r, int off)
661 if(r & 0x100)
662 o(b);
663 else
664 orex(1, 0, r, b);
665 if(!off){
666 o(0x2404 | (REG_VALUE(r) << 3));
667 }else if (off == (char)off) {
668 o(0x2444 | (REG_VALUE(r) << 3));
669 g(off);
670 } else {
671 o(0x2484 | (REG_VALUE(r) << 3));
672 gen_le32(off);
676 #ifdef TCC_TARGET_PE
678 #define REGN 4
679 static const uint8_t arg_regs[REGN] = {
680 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
683 /* Prepare arguments in R10 and R11 rather than RCX and RDX
684 because gv() will not ever use these */
685 static int arg_prepare_reg(int idx) {
686 if (idx == 0 || idx == 1)
687 /* idx=0: r10, idx=1: r11 */
688 return idx + 10;
689 else
690 return arg_regs[idx];
693 /* Return the number of registers needed to return the struct, or 0 if
694 returning via struct pointer. */
695 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align)
697 int size, align;
698 *ret_align = 1; // Never have to re-align return values for x86-64
699 size = type_size(vt, &align);
700 ret->ref = NULL;
701 if (size > 8) {
702 return 0;
703 } else if (size > 4) {
704 ret->t = VT_LLONG;
705 return 1;
706 } else if (size > 2) {
707 ret->t = VT_INT;
708 return 1;
709 } else if (size > 1) {
710 ret->t = VT_SHORT;
711 return 1;
712 } else {
713 ret->t = VT_BYTE;
714 return 1;
718 static int is_sse_float(int t) {
719 int bt;
720 bt = t & VT_BTYPE;
721 return bt == VT_DOUBLE || bt == VT_FLOAT;
724 int gfunc_arg_size(CType *type) {
725 int align;
726 if (type->t & (VT_ARRAY|VT_BITFIELD))
727 return 8;
728 return type_size(type, &align);
731 void gfunc_call(int nb_args)
733 int size, r, args_size, i, d, bt, struct_size;
734 int arg;
736 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
737 arg = nb_args;
739 /* for struct arguments, we need to call memcpy and the function
740 call breaks register passing arguments we are preparing.
741 So, we process arguments which will be passed by stack first. */
742 struct_size = args_size;
743 for(i = 0; i < nb_args; i++) {
744 SValue *sv;
746 --arg;
747 sv = &vtop[-i];
748 bt = (sv->type.t & VT_BTYPE);
749 size = gfunc_arg_size(&sv->type);
751 if (size <= 8)
752 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
754 if (bt == VT_STRUCT) {
755 /* align to stack align size */
756 size = (size + 15) & ~15;
757 /* generate structure store */
758 r = get_reg(RC_INT);
759 gen_offs_sp(0x8d, r, struct_size);
760 struct_size += size;
762 /* generate memcpy call */
763 vset(&sv->type, r | VT_LVAL, 0);
764 vpushv(sv);
765 vstore();
766 --vtop;
767 } else if (bt == VT_LDOUBLE) {
768 gv(RC_ST0);
769 gen_offs_sp(0xdb, 0x107, struct_size);
770 struct_size += 16;
774 if (func_scratch < struct_size)
775 func_scratch = struct_size;
777 arg = nb_args;
778 struct_size = args_size;
780 for(i = 0; i < nb_args; i++) {
781 --arg;
782 bt = (vtop->type.t & VT_BTYPE);
784 size = gfunc_arg_size(&vtop->type);
785 if (size > 8) {
786 /* align to stack align size */
787 size = (size + 15) & ~15;
788 if (arg >= REGN) {
789 d = get_reg(RC_INT);
790 gen_offs_sp(0x8d, d, struct_size);
791 gen_offs_sp(0x89, d, arg*8);
792 } else {
793 d = arg_prepare_reg(arg);
794 gen_offs_sp(0x8d, d, struct_size);
796 struct_size += size;
797 } else {
798 if (is_sse_float(vtop->type.t)) {
799 gv(RC_XMM0); /* only use one float register */
800 if (arg >= REGN) {
801 /* movq %xmm0, j*8(%rsp) */
802 gen_offs_sp(0xd60f66, 0x100, arg*8);
803 } else {
804 /* movaps %xmm0, %xmmN */
805 o(0x280f);
806 o(0xc0 + (arg << 3));
807 d = arg_prepare_reg(arg);
808 /* mov %xmm0, %rxx */
809 o(0x66);
810 orex(1,d,0, 0x7e0f);
811 o(0xc0 + REG_VALUE(d));
813 } else {
814 if (bt == VT_STRUCT) {
815 vtop->type.ref = NULL;
816 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
817 : size > 1 ? VT_SHORT : VT_BYTE;
820 r = gv(RC_INT);
821 if (arg >= REGN) {
822 gen_offs_sp(0x89, r, arg*8);
823 } else {
824 d = arg_prepare_reg(arg);
825 orex(1,d,r,0x89); /* mov */
826 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
830 vtop--;
832 save_regs(0);
834 /* Copy R10 and R11 into RCX and RDX, respectively */
835 if (nb_args > 0) {
836 o(0xd1894c); /* mov %r10, %rcx */
837 if (nb_args > 1) {
838 o(0xda894c); /* mov %r11, %rdx */
842 gcall_or_jmp(0);
843 vtop--;
847 #define FUNC_PROLOG_SIZE 11
849 /* generate function prolog of type 't' */
850 void gfunc_prolog(CType *func_type)
852 int addr, reg_param_index, bt, size;
853 Sym *sym;
854 CType *type;
856 func_ret_sub = func_scratch = r_loc = 0;
857 pop_stack = loc = 0;
859 addr = PTR_SIZE * 2;
860 ind += FUNC_PROLOG_SIZE;
861 func_sub_sp_offset = ind;
862 reg_param_index = 0;
864 sym = func_type->ref;
866 /* if the function returns a structure, then add an
867 implicit pointer parameter */
868 func_vt = sym->type;
869 func_var = (sym->c == FUNC_ELLIPSIS);
870 size = gfunc_arg_size(&func_vt);
871 if (size > 8) {
872 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
873 func_vc = addr;
874 reg_param_index++;
875 addr += 8;
878 /* define parameters */
879 while ((sym = sym->next) != NULL) {
880 type = &sym->type;
881 bt = type->t & VT_BTYPE;
882 size = gfunc_arg_size(type);
883 if (size > 8) {
884 if (reg_param_index < REGN) {
885 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
887 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
888 } else {
889 if (reg_param_index < REGN) {
890 /* save arguments passed by register */
891 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
892 o(0xd60f66); /* movq */
893 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
894 } else {
895 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
898 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
900 addr += 8;
901 reg_param_index++;
904 while (reg_param_index < REGN) {
905 if (func_type->ref->c == FUNC_ELLIPSIS) {
906 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
907 addr += 8;
909 reg_param_index++;
913 /* generate function epilog */
914 void gfunc_epilog(void)
916 int v, saved_ind;
918 o(0xc9); /* leave */
919 if (func_ret_sub == 0) {
920 o(0xc3); /* ret */
921 } else {
922 o(0xc2); /* ret n */
923 g(func_ret_sub);
924 g(func_ret_sub >> 8);
927 saved_ind = ind;
928 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
929 /* align local size to word & save local variables */
930 v = (func_scratch + -loc + 15) & -16;
931 reloc_use(r_loc, func_scratch);
932 if (v >= 4096) {
933 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
934 oad(0xb8, v); /* mov stacksize, %eax */
935 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
936 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
937 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
938 } else {
939 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
940 o(0xec8148); /* sub rsp, stacksize */
941 gen_le32(v);
944 cur_text_section->data_offset = saved_ind;
945 pe_add_unwind_data(ind, saved_ind, v);
946 ind = cur_text_section->data_offset;
949 #else
951 typedef enum X86_64_Mode {
952 x86_64_mode_none,
953 x86_64_mode_memory,
954 x86_64_mode_integer,
955 x86_64_mode_sse,
956 x86_64_mode_x87
957 } X86_64_Mode;
959 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
961 if (a == b)
962 return a;
963 else if (a == x86_64_mode_none)
964 return b;
965 else if (b == x86_64_mode_none)
966 return a;
967 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
968 return x86_64_mode_memory;
969 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
970 return x86_64_mode_integer;
971 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
972 return x86_64_mode_memory;
973 else
974 return x86_64_mode_sse;
977 static X86_64_Mode classify_x86_64_inner(CType *ty)
979 X86_64_Mode mode;
980 Sym *f;
982 switch (ty->t & VT_BTYPE) {
983 case VT_VOID: return x86_64_mode_none;
985 case VT_INT:
986 case VT_BYTE:
987 case VT_SHORT:
988 case VT_LLONG:
989 case VT_QLONG:
990 case VT_BOOL:
991 case VT_PTR:
992 case VT_FUNC:
993 case VT_ENUM: return x86_64_mode_integer;
995 case VT_FLOAT:
996 case VT_QFLOAT:
997 case VT_DOUBLE: return x86_64_mode_sse;
999 case VT_LDOUBLE: return x86_64_mode_x87;
1001 case VT_STRUCT:
1002 f = ty->ref;
1004 // Detect union
1005 if (f->next && (f->c == f->next->c))
1006 return x86_64_mode_memory;
1008 mode = x86_64_mode_none;
1009 for (f = f->next; f; f = f->next)
1010 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
1012 return mode;
1015 assert(0);
1018 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
1020 X86_64_Mode mode;
1021 int size, align, ret_t = 0;
1023 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1024 *psize = 8;
1025 *palign = 8;
1026 *reg_count = 1;
1027 ret_t = ty->t;
1028 mode = x86_64_mode_integer;
1029 } else {
1030 size = type_size(ty, &align);
1031 *psize = (size + 7) & ~7;
1032 *palign = (align + 7) & ~7;
1034 if (size > 16) {
1035 mode = x86_64_mode_memory;
1036 ret_t = ty->t;
1037 } else {
1038 mode = classify_x86_64_inner(ty);
1039 switch (mode) {
1040 case x86_64_mode_integer:
1041 if (size > 8) {
1042 *reg_count = 2;
1043 ret_t = VT_QLONG;
1044 } else {
1045 *reg_count = 1;
1046 if(size > 4)
1047 ret_t = VT_LLONG;
1048 else if(size > 2){
1049 ret_t = VT_INT;
1050 }else if(size > 1)
1051 ret_t = VT_SHORT;
1052 else
1053 ret_t = VT_BYTE;
1055 ret_t |= (ty->t & VT_UNSIGNED);
1056 break;
1057 case x86_64_mode_x87:
1058 *reg_count = 1;
1059 ret_t = VT_LDOUBLE;
1060 break;
1061 case x86_64_mode_sse:
1062 if (size > 8) {
1063 *reg_count = 2;
1064 ret_t = VT_QFLOAT;
1065 } else {
1066 *reg_count = 1;
1067 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1069 break;
1070 default:
1071 ret_t = ty->t;
1072 break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1077 if (ret) {
1078 ret->ref = ty->ref;
1079 ret->t = ret_t;
1082 return mode;
1085 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1087 /* This definition must be synced with stdarg.h */
1088 enum __va_arg_type {
1089 __va_gen_reg, __va_float_reg, __va_ld_reg, __va_stack
1091 int size, align, reg_count;
1092 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1093 switch (mode) {
1094 default: return __va_stack;
1095 case x86_64_mode_x87: return __va_ld_reg;
1096 case x86_64_mode_integer: return __va_gen_reg;
1097 case x86_64_mode_sse: return __va_float_reg;
1101 /* Return the number of registers needed to return the struct, or 0 if
1102 returning via struct pointer. */
1103 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align)
1105 int size, align, reg_count;
1106 *ret_align = 1; // Never have to re-align return values for x86-64
1107 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1110 #define REGN 6
1111 static const uint8_t arg_regs[REGN] = {
1112 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1115 /* Generate function call. The function address is pushed first, then
1116 all the parameters in call order. This functions pops all the
1117 parameters and the function address. */
1118 void gfunc_call(int nb_args)
1120 X86_64_Mode mode;
1121 int size, align, args_size, s, e, i, reg_count;
1122 int nb_reg_args = 0;
1123 int nb_sse_args = 0;
1124 int gen_reg, sse_reg;
1125 CType type;
1127 /* fetch cpu flag before the following sub will change the value */
1128 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1129 gv(RC_INT);
1130 /* calculate the number of integer/float register arguments */
1131 for(i = 0; i < nb_args; i++) {
1132 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1133 if (mode == x86_64_mode_sse)
1134 nb_sse_args += reg_count;
1135 else if (mode == x86_64_mode_integer)
1136 nb_reg_args += reg_count;
1139 args_size = 0;
1140 gen_reg = nb_reg_args;
1141 sse_reg = nb_sse_args;
1142 /* for struct arguments, we need to call memcpy and the function
1143 call breaks register passing arguments we are preparing.
1144 So, we process arguments which will be passed by stack first. */
1145 for(i = 0; i < nb_args; i++) {
1146 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1147 switch (mode) {
1148 case x86_64_mode_x87:
1149 if((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT)
1150 goto stack_arg1;
1151 else
1152 args_size = (args_size + 15) & ~15;
1153 case x86_64_mode_memory:
1154 stack_arg1:
1155 args_size += size;
1156 break;
1157 case x86_64_mode_sse:
1158 sse_reg -= reg_count;
1159 if (sse_reg + reg_count > 8)
1160 goto stack_arg1;
1161 break;
1162 case x86_64_mode_integer:
1163 gen_reg -= reg_count;
1164 if (gen_reg + reg_count > REGN)
1165 goto stack_arg1;
1166 break;
1167 default: break; /* nothing to be done for x86_64_mode_none */
1171 args_size = (args_size + 15) & ~15;
1172 if (func_scratch < args_size)
1173 func_scratch = args_size;
1175 gen_reg = nb_reg_args;
1176 sse_reg = nb_sse_args;
1177 for(s = e = 0; s < nb_args; s = e){
1178 int run_gen, run_sse, st_size;
1179 run_gen = gen_reg;
1180 run_sse = sse_reg;
1181 st_size = 0;
1182 for(i = s; i < nb_args; i++) {
1183 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1184 switch (mode) {
1185 case x86_64_mode_x87:
1186 if((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT){
1187 goto stack_arg2;
1188 }else{
1189 ++i;
1190 goto doing;
1192 case x86_64_mode_memory:
1193 stack_arg2:
1194 st_size += size;
1195 break;
1196 case x86_64_mode_sse:
1197 sse_reg -= reg_count;
1198 if (sse_reg + reg_count > 8)
1199 goto stack_arg2;
1200 break;
1201 case x86_64_mode_integer:
1202 gen_reg -= reg_count;
1203 if (gen_reg + reg_count > REGN)
1204 goto stack_arg2;
1205 break;
1206 default: break; /* nothing to be done for x86_64_mode_none */
1209 doing:
1210 e = i;
1211 st_size = -st_size & 15;// 16 - (size & 15)
1212 if(st_size)
1213 args_size -= st_size;
1215 gen_reg = run_gen;
1216 sse_reg = run_sse;
1217 for(i = s; i < e; i++) {
1218 SValue tmp;
1219 /* Swap argument to top, it will possibly be changed here,
1220 and might use more temps. All arguments must remain on the
1221 stack, so that get_reg can correctly evict some of them onto
1222 stack. We could use also use a vrott(nb_args) at the end
1223 of this loop, but this seems faster. */
1224 if(i != 0){
1225 tmp = vtop[0];
1226 vtop[0] = vtop[-i];
1227 vtop[-i] = tmp;
1230 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1231 switch (mode) {
1232 case x86_64_mode_x87:
1233 /* Must ensure TREG_ST0 only */
1234 if((vtop->type.t & VT_BTYPE) == VT_STRUCT){
1235 vdup();
1236 vtop[-1].r = VT_CONST;
1237 vtop->type = type;
1238 gv(RC_ST0);
1239 args_size -= size;
1240 gen_offs_sp(0xdb, 0x107, args_size);
1241 vtop--;//Release TREG_ST0
1242 }else{
1243 gv(RC_ST0);
1244 args_size -= size;
1245 gen_offs_sp(0xdb, 0x107, args_size);
1246 vtop->r = VT_CONST;//Release TREG_ST0
1248 break;
1249 case x86_64_mode_memory:
1250 args_size -= size;
1251 vset(&char_pointer_type, TREG_RSP, args_size);/* generate memcpy RSP */
1252 vpushv(&vtop[-1]);
1253 vtop->type = char_pointer_type;
1254 gaddrof();
1255 vpushs(size);
1256 struct_copy(&vtop[-2], &vtop[-1], &vtop[0]);
1257 vtop -= 3;
1258 break;
1259 case x86_64_mode_sse:
1260 sse_reg -= reg_count;
1261 if (sse_reg + reg_count > 8){
1262 args_size -= size;
1263 goto gen_code;
1265 break;
1266 case x86_64_mode_integer:
1267 gen_reg -= reg_count;
1268 if (gen_reg + reg_count > REGN){
1269 args_size -= size;
1270 gen_code:
1271 vset(&type, TREG_RSP | VT_LVAL, args_size);
1272 vpushv(&vtop[-1]);
1273 vtop->type = type;
1274 vstore();
1275 vtop--;
1277 break;
1278 default: break; /* nothing to be done for x86_64_mode_none */
1280 if(i != 0){
1281 tmp = vtop[0];
1282 vtop[0] = vtop[-i];
1283 vtop[-i] = tmp;
1286 run_gen = gen_reg;
1287 run_sse = sse_reg;
1290 gen_reg = nb_reg_args;
1291 sse_reg = nb_sse_args;
1292 for(i = 0; i < nb_args; i++) {
1293 int d;
1294 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1295 /* Alter stack entry type so that gv() knows how to treat it */
1296 vtop->type = type;
1297 /* Alter stack entry type so that gv() knows how to treat it */
1298 if (mode == x86_64_mode_sse) {
1299 sse_reg -= reg_count;
1300 if (sse_reg + reg_count <= 8) {
1301 if (reg_count == 2) {
1302 ex_rc = RC_XMM0 << (sse_reg + 1);
1303 gv(RC_XMM0 << sse_reg);
1304 }else{
1305 assert(reg_count == 1);
1306 /* Load directly to register */
1307 gv(RC_XMM0 << sse_reg);
1310 } else if (mode == x86_64_mode_integer) {
1311 gen_reg -= reg_count;
1312 if (gen_reg + reg_count <= REGN) {
1313 if (reg_count == 2) {
1314 d = arg_regs[gen_reg+1];
1315 ex_rc = reg_classes[d] & ~RC_MASK;
1316 d = arg_regs[gen_reg];
1317 gv(reg_classes[d] & ~RC_MASK);
1318 }else{
1319 assert(reg_count == 1);
1320 d = arg_regs[gen_reg];
1321 gv(reg_classes[d] & ~RC_MASK);
1325 vpop();
1327 save_regs(0);
1328 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1329 gcall_or_jmp(0);
1330 vtop--;
1334 #define FUNC_PROLOG_SIZE 11
1336 static void push_arg_reg(int i) {
1337 loc -= 8;
1338 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1341 /* generate function prolog of type 't' */
1342 void gfunc_prolog(CType *func_type)
1344 X86_64_Mode mode;
1345 int i, addr, align, size, reg_count;
1346 int param_addr = 0, reg_param_index, sse_param_index;
1347 Sym *sym;
1348 CType *type;
1350 sym = func_type->ref;
1351 addr = PTR_SIZE * 2;
1352 pop_stack = loc = 0;
1353 func_scratch = r_loc = 0;
1354 ind += FUNC_PROLOG_SIZE;
1355 func_sub_sp_offset = ind;
1356 func_ret_sub = 0;
1358 if (func_type->ref->c == FUNC_ELLIPSIS) {
1359 int seen_reg_num, seen_sse_num, seen_stack_size;
1360 seen_reg_num = seen_sse_num = 0;
1361 /* frame pointer and return address */
1362 seen_stack_size = PTR_SIZE * 2;
1363 /* count the number of seen parameters */
1364 while ((sym = sym->next) != NULL) {
1365 type = &sym->type;
1366 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1367 switch (mode) {
1368 default:
1369 stack_arg:
1370 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1371 break;
1373 case x86_64_mode_integer:
1374 if (seen_reg_num + reg_count <= REGN) {
1375 seen_reg_num += reg_count;
1376 } else {
1377 seen_reg_num = 8;
1378 goto stack_arg;
1380 break;
1382 case x86_64_mode_sse:
1383 if (seen_sse_num + reg_count <= 8) {
1384 seen_sse_num += reg_count;
1385 } else {
1386 seen_sse_num = 8;
1387 goto stack_arg;
1389 break;
1393 loc -= 16;
1394 /* movl $0x????????, -0x10(%rbp) */
1395 o(0xf045c7);
1396 gen_le32(seen_reg_num * 8);
1397 /* movl $0x????????, -0xc(%rbp) */
1398 o(0xf445c7);
1399 gen_le32(seen_sse_num * 16 + 48);
1400 /* movl $0x????????, -0x8(%rbp) */
1401 o(0xf845c7);
1402 gen_le32(seen_stack_size);
1404 o(0xc084);/* test %al,%al */
1405 o(0x74);/* je */
1406 g(4*(8 - seen_sse_num) + 3);
1408 /* save all register passing arguments */
1409 for (i = 0; i < 8; i++) {
1410 loc -= 16;
1411 o(0x290f);/* movaps %xmm1-7,-XXX(%rbp) */
1412 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1414 for (i = 0; i < (REGN - seen_reg_num); i++) {
1415 push_arg_reg(REGN-1 - i);
1419 sym = func_type->ref;
1420 reg_param_index = 0;
1421 sse_param_index = 0;
1423 /* if the function returns a structure, then add an
1424 implicit pointer parameter */
1425 func_vt = sym->type;
1426 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1427 if (mode == x86_64_mode_memory) {
1428 push_arg_reg(reg_param_index);
1429 func_vc = loc;
1430 reg_param_index++;
1432 /* define parameters */
1433 while ((sym = sym->next) != NULL) {
1434 type = &sym->type;
1435 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1436 switch (mode) {
1437 case x86_64_mode_sse:
1438 if (sse_param_index + reg_count <= 8) {
1439 /* save arguments passed by register */
1440 loc -= reg_count * 8;
1441 param_addr = loc;
1442 for (i = 0; i < reg_count; ++i) {
1443 o(0xd60f66); /* movq */
1444 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1445 ++sse_param_index;
1447 } else {
1448 addr = (addr + align - 1) & -align;
1449 param_addr = addr;
1450 addr += size;
1451 sse_param_index += reg_count;
1453 break;
1455 case x86_64_mode_memory:
1456 case x86_64_mode_x87:
1457 addr = (addr + align - 1) & -align;
1458 param_addr = addr;
1459 addr += size;
1460 break;
1462 case x86_64_mode_integer: {
1463 if (reg_param_index + reg_count <= REGN) {
1464 /* save arguments passed by register */
1465 loc -= reg_count * 8;
1466 param_addr = loc;
1467 for (i = 0; i < reg_count; ++i) {
1468 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1469 ++reg_param_index;
1471 } else {
1472 addr = (addr + align - 1) & -align;
1473 param_addr = addr;
1474 addr += size;
1475 reg_param_index += reg_count;
1477 break;
1479 default: break; /* nothing to be done for x86_64_mode_none */
1481 sym_push(sym->v & ~SYM_FIELD, type,
1482 VT_LOCAL | VT_LVAL, param_addr);
1486 /* generate function epilog */
1487 void gfunc_epilog(void)
1489 int v, saved_ind;
1491 o(0xc9); /* leave */
1492 if (func_ret_sub == 0) {
1493 o(0xc3); /* ret */
1494 } else {
1495 o(0xc2); /* ret n */
1496 g(func_ret_sub);
1497 g(func_ret_sub >> 8);
1499 /* align local size to word & save local variables */
1500 v = (func_scratch -loc + 15) & -16;
1501 reloc_use(r_loc, func_scratch);
1502 saved_ind = ind;
1503 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1504 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1505 o(0xec8148); /* sub rsp, stacksize */
1506 gen_le32(v);
1507 ind = saved_ind;
1510 #endif /* not PE */
1512 /* generate a jump to a label */
1513 int gjmp(int t)
1515 return psym(0xe9, t);
1518 /* generate a jump to a fixed address */
1519 void gjmp_addr(int a)
1521 int r;
1522 r = a - ind - 2;
1523 if (r == (char)r) {
1524 g(0xeb);
1525 g(r);
1526 } else {
1527 oad(0xe9, a - ind - 5);
1531 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1532 int gtst(int inv, int t)
1534 int v, *p;
1536 v = vtop->r & VT_VALMASK;
1537 if (v == VT_CMP) {
1538 /* fast case : can jump directly since flags are set */
1539 if (vtop->c.i & 0x100)
1541 /* This was a float compare. If the parity flag is set
1542 the result was unordered. For anything except != this
1543 means false and we don't jump (anding both conditions).
1544 For != this means true (oring both).
1545 Take care about inverting the test. We need to jump
1546 to our target if the result was unordered and test wasn't NE,
1547 otherwise if unordered we don't want to jump. */
1548 vtop->c.i &= ~0x100;
1549 if (!inv == (vtop->c.i != TOK_NE))
1550 o(0x067a); /* jp +6 */
1551 else
1553 g(0x0f);
1554 t = psym(0x8a, t); /* jp t */
1557 g(0x0f);
1558 t = psym((vtop->c.i - 16) ^ inv, t);
1559 } else if (v == VT_JMP || v == VT_JMPI) {
1560 /* && or || optimization */
1561 if ((v & 1) == inv) {
1562 /* insert vtop->c jump list in t */
1563 p = &vtop->c.i;
1564 while (*p != 0)
1565 p = (int *)(cur_text_section->data + *p);
1566 *p = t;
1567 t = vtop->c.i;
1568 } else {
1569 t = gjmp(t);
1570 gsym(vtop->c.i);
1572 } else {
1573 if (is_float(vtop->type.t) ||
1574 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1575 vpushi(0);
1576 gen_op(TOK_NE);
1578 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1579 /* constant jmp optimization */
1580 if ((vtop->c.i != 0) != inv)
1581 t = gjmp(t);
1582 } else {
1583 v = gv(RC_INT);
1584 orex(0,v,v,0x85);
1585 o(0xc0 + REG_VALUE(v) * 9);
1586 g(0x0f);
1587 t = psym(0x85 ^ inv, t);
1590 vtop--;
1591 return t;
1594 /* generate an integer binary operation */
1595 void gen_opi(int op)
1597 int r, fr, opc, fc, c, ll, uu, cc, tt2;
1599 fr = vtop[0].r;
1600 fc = vtop->c.ul;
1601 ll = is64_type(vtop[-1].type.t);
1602 cc = (fr & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1603 tt2 = (fr & (VT_LVAL | VT_LVAL_TYPE)) == VT_LVAL;
1605 switch(op) {
1606 case '+':
1607 case TOK_ADDC1: /* add with carry generation */
1608 opc = 0;
1609 gen_op8:
1610 vswap();
1611 r = gv(RC_INT);
1612 vswap();
1613 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1614 /* constant case */
1615 c = vtop->c.i;
1616 if (c == (char)c) {
1617 /* XXX: generate inc and dec for smaller code ? */
1618 orex(ll, r, 0, 0x83);
1619 o(0xc0 + REG_VALUE(r) + opc*8);
1620 g(c);
1621 } else {
1622 orex(ll, r, 0, 0x81);
1623 oad(0xc0 + REG_VALUE(r) + opc*8, c);
1625 } else {
1626 if(!tt2)
1627 fr = gv(RC_INT);
1628 orex(ll, fr, r, 0x03 + opc*8);
1629 if(fr >= VT_CONST)
1630 gen_modrm(r, fr, vtop->sym, fc);
1631 else
1632 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
1634 vtop--;
1635 if (op >= TOK_ULT && op <= TOK_GT) {
1636 vtop->r = VT_CMP;
1637 vtop->c.i = op;
1639 break;
1640 case '-':
1641 case TOK_SUBC1: /* sub with carry generation */
1642 opc = 5;
1643 goto gen_op8;
1644 case TOK_ADDC2: /* add with carry use */
1645 opc = 2;
1646 goto gen_op8;
1647 case TOK_SUBC2: /* sub with carry use */
1648 opc = 3;
1649 goto gen_op8;
1650 case '&':
1651 opc = 4;
1652 goto gen_op8;
1653 case '^':
1654 opc = 6;
1655 goto gen_op8;
1656 case '|':
1657 opc = 1;
1658 goto gen_op8;
1659 case '*':
1660 opc = 5;
1661 vswap();
1662 r = gv(RC_INT);
1663 vswap();
1664 if(!tt2)
1665 fr = gv(RC_INT);
1666 if(r == TREG_RAX){
1667 if(fr != TREG_RDX)
1668 save_reg(TREG_RDX);
1669 orex(ll, fr, r, 0xf7);
1670 if(fr >= VT_CONST)
1671 gen_modrm(opc, fr, vtop->sym, fc);
1672 else
1673 o(0xc0 + REG_VALUE(fr) + opc*8);
1674 }else{
1675 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1676 if(fr >= VT_CONST)
1677 gen_modrm(r, fr, vtop->sym, fc);
1678 else
1679 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
1681 vtop--;
1682 break;
1683 case TOK_SHL:
1684 opc = 4;
1685 goto gen_shift;
1686 case TOK_SHR:
1687 opc = 5;
1688 goto gen_shift;
1689 case TOK_SAR:
1690 opc = 7;
1691 gen_shift:
1692 if (cc) {
1693 /* constant case */
1694 vswap();
1695 r = gv(RC_INT);
1696 vswap();
1697 c = vtop->c.i;
1698 if(c == 1){
1699 orex(ll, r, 0, 0xd1);
1700 o(0xc0 + REG_VALUE(r) + opc*8);
1701 }else{
1702 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1703 o(0xc0 + REG_VALUE(r) + opc*8);
1704 g(c & (ll ? 0x3f : 0x1f));
1706 } else {
1707 /* we generate the shift in ecx */
1708 gv2(RC_INT, RC_RCX);
1709 r = vtop[-1].r;
1710 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1711 o(0xc0 + REG_VALUE(r) + opc*8);
1713 vtop--;
1714 break;
1715 case TOK_UDIV:
1716 case TOK_UMOD:
1717 opc = 6;
1718 uu = 1;
1719 goto divmod;
1720 case '/':
1721 case '%':
1722 case TOK_PDIV:
1723 opc = 7;
1724 uu = 0;
1725 divmod:
1726 /* first operand must be in eax */
1727 /* XXX: need better constraint for second operand */
1728 if(!tt2){
1729 gv2(RC_RAX, RC_INT2);
1730 fr = vtop[0].r;
1731 }else{
1732 vswap();
1733 gv(RC_RAX);
1734 vswap();
1736 save_reg(TREG_RDX);
1737 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cdq RDX:RAX <- sign-extend of RAX. */
1738 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1739 if(fr >= VT_CONST)
1740 gen_modrm(opc, fr, vtop->sym, fc);
1741 else
1742 o(0xc0 + REG_VALUE(fr) + opc*8);
1743 if (op == '%' || op == TOK_UMOD)
1744 r = TREG_RDX;
1745 else
1746 r = TREG_RAX;
1747 vtop--;
1748 vtop->r = r;
1749 break;
1750 default:
1751 opc = 7;
1752 goto gen_op8;
1756 void gen_opl(int op)
1758 gen_opi(op);
1761 /* generate a floating point operation 'v = t1 op t2' instruction. The
1762 two operands are guaranted to have the same floating point type */
1763 /* XXX: need to use ST1 too */
1764 void gen_opf(int op)
1766 int a, ft, fc, swapped, fr, r;
1767 int float_type = (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1769 /* convert constants to memory references */
1770 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1771 vswap();
1772 gv(float_type);
1773 vswap();
1775 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1776 gv(float_type);
1778 swapped = 0;
1779 fc = vtop->c.ul;
1780 ft = vtop->type.t;
1782 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1783 /* swap the stack if needed so that t1 is the register and t2 is
1784 the memory reference */
1785 /* must put at least one value in the floating point register */
1786 if ((vtop[-1].r & VT_LVAL) && (vtop[0].r & VT_LVAL)) {
1787 vswap();
1788 gv(float_type);
1789 vswap();
1791 if (vtop[-1].r & VT_LVAL) {
1792 vswap();
1793 swapped = 1;
1795 if (op >= TOK_ULT && op <= TOK_GT) {
1796 /* load on stack second operand */
1797 load(TREG_ST0, vtop);
1798 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1799 if (op == TOK_GE || op == TOK_GT)
1800 swapped = !swapped;
1801 else if (op == TOK_EQ || op == TOK_NE)
1802 swapped = 0;
1803 if (swapped)
1804 o(0xc9d9); /* fxch %st(1) */
1805 if (op == TOK_EQ || op == TOK_NE)
1806 o(0xe9da); /* fucompp */
1807 else
1808 o(0xd9de); /* fcompp */
1809 o(0xe0df); /* fnstsw %ax */
1810 if (op == TOK_EQ) {
1811 o(0x45e480); /* and $0x45, %ah */
1812 o(0x40fC80); /* cmp $0x40, %ah */
1813 } else if (op == TOK_NE) {
1814 o(0x45e480); /* and $0x45, %ah */
1815 o(0x40f480); /* xor $0x40, %ah */
1816 op = TOK_NE;
1817 } else if (op == TOK_GE || op == TOK_LE) {
1818 o(0x05c4f6); /* test $0x05, %ah */
1819 op = TOK_EQ;
1820 } else {
1821 o(0x45c4f6); /* test $0x45, %ah */
1822 op = TOK_EQ;
1824 vtop--;
1825 vtop->r = VT_CMP;
1826 vtop->c.i = op;
1827 } else {
1828 /* no memory reference possible for long double operations */
1829 load(TREG_ST0, vtop);
1830 swapped = !swapped;
1831 switch(op) {
1832 default:
1833 case '+':
1834 a = 0;
1835 break;
1836 case '-':
1837 a = 4;
1838 if (swapped)
1839 a++;
1840 break;
1841 case '*':
1842 a = 1;
1843 break;
1844 case '/':
1845 a = 6;
1846 if (swapped)
1847 a++;
1848 break;
1850 o(0xde); /* fxxxp %st, %st(1) */
1851 o(0xc1 + (a << 3));
1852 vtop--;
1854 } else {
1855 vswap();
1856 gv(float_type);
1857 vswap();
1858 fr = vtop->r;
1859 r = vtop[-1].r;
1860 if (op >= TOK_ULT && op <= TOK_GT) {
1861 switch(op){
1862 case TOK_LE:
1863 op = TOK_ULE; /* setae */
1864 break;
1865 case TOK_LT:
1866 op = TOK_ULT;
1867 break;
1868 case TOK_GE:
1869 op = TOK_UGE;
1870 break;
1871 case TOK_GT:
1872 op = TOK_UGT; /* seta */
1873 break;
1875 assert(!(vtop[-1].r & VT_LVAL));
1876 if ((ft & VT_BTYPE) == VT_DOUBLE)
1877 o(0x66);
1878 o(0x2e0f); /* ucomisd */
1879 if(fr >= VT_CONST)
1880 gen_modrm(r, fr, vtop->sym, fc);
1881 else
1882 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
1883 vtop--;
1884 vtop->r = VT_CMP;
1885 vtop->c.i = op | 0x100;
1886 } else {
1887 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1888 /* no memory reference possible for long double operations */
1889 switch(op) {
1890 default:
1891 case '+':
1892 a = 0;
1893 break;
1894 case '-':
1895 a = 4;
1896 break;
1897 case '*':
1898 a = 1;
1899 break;
1900 case '/':
1901 a = 6;
1902 break;
1904 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1905 assert(!(vtop[-1].r & VT_LVAL));
1906 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1907 o(0xf2);
1908 } else {
1909 o(0xf3);
1911 o(0x0f);
1912 o(0x58 + a);
1913 if(fr >= VT_CONST)
1914 gen_modrm(r, fr, vtop->sym, fc);
1915 else
1916 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8);
1917 vtop--;
1922 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1923 and 'long long' cases. */
1924 void gen_cvt_itof(int t)
1926 int ft, bt, tbt, r;
1928 ft = vtop->type.t;
1929 bt = ft & VT_BTYPE;
1930 tbt = t & VT_BTYPE;
1931 r = gv(RC_INT);
1933 if (tbt == VT_LDOUBLE) {
1934 save_reg(TREG_ST0);
1935 if ((ft & VT_BTYPE) == VT_LLONG) {
1936 /* signed long long to float/double/long double (unsigned case
1937 is handled generically) */
1938 o(0x50 + REG_VALUE(r)); /* push r */
1939 o(0x242cdf); /* fildll (%rsp) */
1940 o(0x08c48348); /* add $8, %rsp */
1941 } else if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED)) {
1942 /* unsigned int to float/double/long double */
1943 o(0x6a); /* push $0 */
1944 g(0x00);
1945 o(0x50 + REG_VALUE(r)); /* push r */
1946 o(0x242cdf); /* fildll (%rsp) */
1947 o(0x10c48348); /* add $16, %rsp */
1948 } else {
1949 /* int to float/double/long double */
1950 o(0x50 + REG_VALUE(r)); /* push r */
1951 o(0x2404db); /* fildl (%rsp) */
1952 o(0x08c48348); /* add $8, %rsp */
1954 vtop->r = TREG_ST0;
1955 } else {
1956 int r_xmm;
1957 r_xmm = get_reg(RC_FLOAT);
1958 o(0xf2 + (tbt == VT_FLOAT));
1959 if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED) || bt == VT_LLONG) {
1960 o(0x48); /* REX */
1962 o(0x2a0f);
1963 o(0xc0 + REG_VALUE(r) + REG_VALUE(r_xmm)*8); /* cvtsi2sd or cvtsi2ss */
1964 vtop->r = r_xmm;
1968 /* convert from one floating point type to another */
1969 void gen_cvt_ftof(int t)
1971 int ft, bt, tbt, r;
1973 ft = vtop->type.t;
1974 bt = ft & VT_BTYPE;
1975 tbt = t & VT_BTYPE;
1977 if(bt == VT_LDOUBLE)
1978 r = get_reg(RC_FLOAT);
1979 else
1980 r = gv(RC_FLOAT);
1981 if (bt == VT_FLOAT) {
1982 if (tbt == VT_DOUBLE) {
1983 o(0x5a0f); /* cvtps2pd */
1984 o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8);
1985 } else if (tbt == VT_LDOUBLE) {
1986 /* movss %xmm0-7,-0x10(%rsp) */
1987 o(0x110ff3);
1988 o(0xf02444 + REG_VALUE(r)*8);
1989 o(0xf02444d9); /* flds -0x10(%rsp) */
1990 vtop->r = TREG_ST0;
1992 } else if (bt == VT_DOUBLE) {
1993 if (tbt == VT_FLOAT) {
1994 o(0x5a0f66); /* cvtpd2ps */
1995 o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8);
1996 } else if (tbt == VT_LDOUBLE) {
1997 /* movsd %xmm0-7,-0x10(%rsp) */
1998 o(0x110ff2);
1999 o(0xf02444 + REG_VALUE(r)*8);
2000 o(0xf02444dd); /* fldl -0x10(%rsp) */
2001 vtop->r = TREG_ST0;
2003 } else {
2004 gv(RC_ST0);
2005 if (tbt == VT_DOUBLE) {
2006 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2007 /* movsd -0x10(%rsp),%xmm0-7 */
2008 o(0x100ff2);
2009 o(0xf02444 + REG_VALUE(r)*8);
2010 vtop->r = r;
2011 } else if (tbt == VT_FLOAT) {
2012 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2013 /* movss -0x10(%rsp),%xmm0-7 */
2014 o(0x100ff3);
2015 o(0xf02444 + REG_VALUE(r)*8);
2016 vtop->r = r;
2021 /* convert fp to int 't' type */
2022 void gen_cvt_ftoi(int t)
2024 int ft, bt, ll, r, r_xmm;
2026 ft = vtop->type.t;
2027 bt = ft & VT_BTYPE;
2029 if (bt == VT_LDOUBLE) {
2030 gen_cvt_ftof(VT_DOUBLE);
2031 bt = VT_DOUBLE;
2033 r_xmm = gv(RC_FLOAT);
2034 if ((t & VT_BTYPE) == VT_INT)
2035 ll = 0;
2036 else
2037 ll = 1;
2038 r = get_reg(RC_INT);
2039 if (bt == VT_FLOAT) {
2040 o(0xf3);
2041 } else if (bt == VT_DOUBLE) {
2042 o(0xf2);
2043 } else {
2044 assert(0);
2046 orex(ll, r, r_xmm, 0x2c0f); /* cvttss2si or cvttsd2si */
2047 o(0xc0 + REG_VALUE(r_xmm) + (REG_VALUE(r) << 3));
2048 vtop->r = r;
2051 /* computed goto support */
2052 void ggoto(void)
2054 gcall_or_jmp(1);
2055 vtop--;
2058 /* Save the stack pointer onto the stack and return the location of its address */
2059 ST_FUNC void gen_vla_sp_save(int addr) {
2060 /* mov %rsp,addr(%rbp)*/
2061 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2064 /* Restore the SP from a location on the stack */
2065 ST_FUNC void gen_vla_sp_restore(int addr) {
2066 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2069 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2070 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2071 int r;
2072 r = gv(RC_INT); /* allocation size */
2073 /* sub r,%rsp */
2074 o(0x2b48);
2075 o(0xe0 | REG_VALUE(r));
2076 /* and ~15, %rsp */
2077 o(0xf0e48348);
2078 /* mov %rsp, r */
2079 orex(1, 0, r, 0x8d);
2080 o(0x2484 | (REG_VALUE(r)*8));
2081 r_loc = reloc_add(r_loc);
2082 vpop();
2083 vset(type, r, 0);
2087 /* end of x86-64 code generator */
2088 /*************************************************************/
2089 #endif /* ! TARGET_DEFS_ONLY */
2090 /******************************************************/