Fixed x86-64 long double passing.
[tinycc.git] / x86_64-gen.c
blobdb24cddc1a00564a98aac8df97244377922a631d
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 24
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSI = 6,
61 TREG_RDI = 7,
63 TREG_R8 = 8,
64 TREG_R9 = 9,
65 TREG_R10 = 10,
66 TREG_R11 = 11,
68 TREG_XMM0 = 16,
69 TREG_XMM1 = 17,
70 TREG_XMM2 = 18,
71 TREG_XMM3 = 19,
72 TREG_XMM4 = 20,
73 TREG_XMM5 = 21,
74 TREG_XMM6 = 22,
75 TREG_XMM7 = 23,
77 TREG_ST0 = 4, // SP slot won't be used
79 TREG_MEM = 0x20,
82 #define REX_BASE(reg) (((reg) >> 3) & 1)
83 #define REG_VALUE(reg) ((reg) & 7)
85 /* return registers for function */
86 #define REG_IRET TREG_RAX /* single word int return register */
87 #define REG_LRET TREG_RDX /* second word return register (for long long) */
88 #define REG_FRET TREG_XMM0 /* float return register */
89 #define REG_QRET TREG_XMM1 /* second float return register */
91 /* defined if function parameters must be evaluated in reverse order */
92 #define INVERT_FUNC_PARAMS
94 /* pointer size, in bytes */
95 #define PTR_SIZE 8
97 /* long double size and alignment, in bytes */
98 #define LDOUBLE_SIZE 16
99 #define LDOUBLE_ALIGN 16
100 /* maximum alignment (for aligned attribute support) */
101 #define MAX_ALIGN 16
103 /******************************************************/
104 /* ELF defines */
106 #define EM_TCC_TARGET EM_X86_64
108 /* relocation type for 32 bit data relocation */
109 #define R_DATA_32 R_X86_64_32
110 #define R_DATA_PTR R_X86_64_64
111 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
112 #define R_COPY R_X86_64_COPY
114 #define ELF_START_ADDR 0x08048000
115 #define ELF_PAGE_SIZE 0x1000
117 /******************************************************/
118 #else /* ! TARGET_DEFS_ONLY */
119 /******************************************************/
120 #include "tcc.h"
121 #include <assert.h>
123 ST_DATA const int reg_classes[NB_REGS] = {
124 /* eax */ RC_INT | RC_RAX,
125 /* ecx */ RC_INT | RC_RCX,
126 /* edx */ RC_INT | RC_RDX,
128 /* st0 */ RC_ST0,
132 RC_R8,
133 RC_R9,
134 RC_R10,
135 RC_R11,
140 /* xmm0 */ RC_FLOAT | RC_XMM0,
141 /* xmm1 */ RC_FLOAT | RC_XMM1,
142 /* xmm2 */ RC_FLOAT | RC_XMM2,
143 /* xmm3 */ RC_FLOAT | RC_XMM3,
144 /* xmm4 */ RC_FLOAT | RC_XMM4,
145 /* xmm5 */ RC_FLOAT | RC_XMM5,
146 /* xmm6 an xmm7 are included so gv() can be used on them,
147 but they are not tagged with RC_FLOAT because they are
148 callee saved on Windows */
149 RC_XMM6,
150 RC_XMM7
153 static unsigned long func_sub_sp_offset;
154 static int func_ret_sub;
156 /* XXX: make it faster ? */
157 void g(int c)
159 int ind1;
160 ind1 = ind + 1;
161 if (ind1 > cur_text_section->data_allocated)
162 section_realloc(cur_text_section, ind1);
163 cur_text_section->data[ind] = c;
164 ind = ind1;
167 void o(unsigned int c)
169 while (c) {
170 g(c);
171 c = c >> 8;
175 void gen_le16(int v)
177 g(v);
178 g(v >> 8);
181 void gen_le32(int c)
183 g(c);
184 g(c >> 8);
185 g(c >> 16);
186 g(c >> 24);
189 void gen_le64(int64_t c)
191 g(c);
192 g(c >> 8);
193 g(c >> 16);
194 g(c >> 24);
195 g(c >> 32);
196 g(c >> 40);
197 g(c >> 48);
198 g(c >> 56);
201 void orex(int ll, int r, int r2, int b)
203 if ((r & VT_VALMASK) >= VT_CONST)
204 r = 0;
205 if ((r2 & VT_VALMASK) >= VT_CONST)
206 r2 = 0;
207 if (ll || REX_BASE(r) || REX_BASE(r2))
208 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
209 o(b);
212 /* output a symbol and patch all calls to it */
213 void gsym_addr(int t, int a)
215 int n, *ptr;
216 while (t) {
217 ptr = (int *)(cur_text_section->data + t);
218 n = *ptr; /* next value */
219 *ptr = a - t - 4;
220 t = n;
224 void gsym(int t)
226 gsym_addr(t, ind);
229 /* psym is used to put an instruction with a data field which is a
230 reference to a symbol. It is in fact the same as oad ! */
231 #define psym oad
233 static int is64_type(int t)
235 return ((t & VT_BTYPE) == VT_PTR ||
236 (t & VT_BTYPE) == VT_FUNC ||
237 (t & VT_BTYPE) == VT_LLONG);
240 static int is_sse_float(int t) {
241 int bt;
242 bt = t & VT_BTYPE;
243 return bt == VT_DOUBLE || bt == VT_FLOAT;
247 /* instruction + 4 bytes data. Return the address of the data */
248 ST_FUNC int oad(int c, int s)
250 int ind1;
252 o(c);
253 ind1 = ind + 4;
254 if (ind1 > cur_text_section->data_allocated)
255 section_realloc(cur_text_section, ind1);
256 *(int *)(cur_text_section->data + ind) = s;
257 s = ind;
258 ind = ind1;
259 return s;
262 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
264 if (r & VT_SYM)
265 greloc(cur_text_section, sym, ind, R_X86_64_32);
266 gen_le32(c);
269 /* output constant with relocation if 'r & VT_SYM' is true */
270 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
272 if (r & VT_SYM)
273 greloc(cur_text_section, sym, ind, R_X86_64_64);
274 gen_le64(c);
277 /* output constant with relocation if 'r & VT_SYM' is true */
278 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
280 if (r & VT_SYM)
281 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
282 gen_le32(c-4);
285 /* output got address with relocation */
286 static void gen_gotpcrel(int r, Sym *sym, int c)
288 #ifndef TCC_TARGET_PE
289 Section *sr;
290 ElfW(Rela) *rel;
291 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
292 sr = cur_text_section->reloc;
293 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
294 rel->r_addend = -4;
295 #else
296 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
297 cur_text_section->data[ind-3],
298 cur_text_section->data[ind-2],
299 cur_text_section->data[ind-1]
301 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
302 #endif
303 gen_le32(0);
304 if (c) {
305 /* we use add c, %xxx for displacement */
306 orex(1, r, 0, 0x81);
307 o(0xc0 + REG_VALUE(r));
308 gen_le32(c);
312 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
314 op_reg = REG_VALUE(op_reg) << 3;
315 if ((r & VT_VALMASK) == VT_CONST) {
316 /* constant memory reference */
317 o(0x05 | op_reg);
318 if (is_got) {
319 gen_gotpcrel(r, sym, c);
320 } else {
321 gen_addrpc32(r, sym, c);
323 } else if ((r & VT_VALMASK) == VT_LOCAL) {
324 /* currently, we use only ebp as base */
325 if (c == (char)c) {
326 /* short reference */
327 o(0x45 | op_reg);
328 g(c);
329 } else {
330 oad(0x85 | op_reg, c);
332 } else if ((r & VT_VALMASK) >= TREG_MEM) {
333 if (c) {
334 g(0x80 | op_reg | REG_VALUE(r));
335 gen_le32(c);
336 } else {
337 g(0x00 | op_reg | REG_VALUE(r));
339 } else {
340 g(0x00 | op_reg | REG_VALUE(r));
344 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
345 opcode bits */
346 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
348 gen_modrm_impl(op_reg, r, sym, c, 0);
351 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
352 opcode bits */
353 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
355 int is_got;
356 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
357 orex(1, r, op_reg, opcode);
358 gen_modrm_impl(op_reg, r, sym, c, is_got);
362 /* load 'r' from value 'sv' */
363 void load(int r, SValue *sv)
365 int v, t, ft, fc, fr;
366 SValue v1;
368 #ifdef TCC_TARGET_PE
369 SValue v2;
370 sv = pe_getimport(sv, &v2);
371 #endif
373 fr = sv->r;
374 ft = sv->type.t;
375 fc = sv->c.ul;
377 #ifndef TCC_TARGET_PE
378 /* we use indirect access via got */
379 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
380 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
381 /* use the result register as a temporal register */
382 int tr = r | TREG_MEM;
383 if (is_float(ft)) {
384 /* we cannot use float registers as a temporal register */
385 tr = get_reg(RC_INT) | TREG_MEM;
387 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
389 /* load from the temporal register */
390 fr = tr | VT_LVAL;
392 #endif
394 v = fr & VT_VALMASK;
395 if (fr & VT_LVAL) {
396 int b, ll;
397 if (v == VT_LLOCAL) {
398 v1.type.t = VT_PTR;
399 v1.r = VT_LOCAL | VT_LVAL;
400 v1.c.ul = fc;
401 fr = r;
402 if (!(reg_classes[fr] & RC_INT))
403 fr = get_reg(RC_INT);
404 load(fr, &v1);
406 ll = 0;
407 if ((ft & VT_BTYPE) == VT_FLOAT) {
408 b = 0x6e0f66;
409 r = REG_VALUE(r); /* movd */
410 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
411 b = 0x7e0ff3; /* movq */
412 r = REG_VALUE(r);
413 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
414 b = 0xdb, r = 5; /* fldt */
415 } else if ((ft & VT_TYPE) == VT_BYTE) {
416 b = 0xbe0f; /* movsbl */
417 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
418 b = 0xb60f; /* movzbl */
419 } else if ((ft & VT_TYPE) == VT_SHORT) {
420 b = 0xbf0f; /* movswl */
421 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
422 b = 0xb70f; /* movzwl */
423 } else {
424 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
425 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
426 || ((ft & VT_BTYPE) == VT_FUNC));
427 ll = is64_type(ft);
428 b = 0x8b;
430 if (ll) {
431 gen_modrm64(b, r, fr, sv->sym, fc);
432 } else {
433 orex(ll, fr, r, b);
434 gen_modrm(r, fr, sv->sym, fc);
436 } else {
437 if (v == VT_CONST) {
438 if (fr & VT_SYM) {
439 #ifdef TCC_TARGET_PE
440 orex(1,0,r,0x8d);
441 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
442 gen_addrpc32(fr, sv->sym, fc);
443 #else
444 if (sv->sym->type.t & VT_STATIC) {
445 orex(1,0,r,0x8d);
446 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
447 gen_addrpc32(fr, sv->sym, fc);
448 } else {
449 orex(1,0,r,0x8b);
450 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
451 gen_gotpcrel(r, sv->sym, fc);
453 #endif
454 } else if (is64_type(ft)) {
455 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
456 gen_le64(sv->c.ull);
457 } else {
458 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
459 gen_le32(fc);
461 } else if (v == VT_LOCAL) {
462 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
463 gen_modrm(r, VT_LOCAL, sv->sym, fc);
464 } else if (v == VT_CMP) {
465 orex(0,r,0,0);
466 if ((fc & ~0x100) != TOK_NE)
467 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
468 else
469 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
470 if (fc & 0x100)
472 /* This was a float compare. If the parity bit is
473 set the result was unordered, meaning false for everything
474 except TOK_NE, and true for TOK_NE. */
475 fc &= ~0x100;
476 o(0x037a + (REX_BASE(r) << 8));
478 orex(0,r,0, 0x0f); /* setxx %br */
479 o(fc);
480 o(0xc0 + REG_VALUE(r));
481 } else if (v == VT_JMP || v == VT_JMPI) {
482 t = v & 1;
483 orex(0,r,0,0);
484 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
485 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
486 gsym(fc);
487 orex(0,r,0,0);
488 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
489 } else if (v != r) {
490 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
491 if (v == TREG_ST0) {
492 /* gen_cvt_ftof(VT_DOUBLE); */
493 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
494 /* movsd -0x10(%rsp),%xmmN */
495 o(0x100ff2);
496 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
497 o(0xf024);
498 } else {
499 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
500 if ((ft & VT_BTYPE) == VT_FLOAT) {
501 o(0x100ff3);
502 } else {
503 assert((ft & VT_BTYPE) == VT_DOUBLE);
504 o(0x100ff2);
506 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
508 } else if (r == TREG_ST0) {
509 assert((v >= TREG_XMM0) || (v <= TREG_XMM7));
510 /* gen_cvt_ftof(VT_LDOUBLE); */
511 /* movsd %xmmN,-0x10(%rsp) */
512 o(0x110ff2);
513 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
514 o(0xf024);
515 o(0xf02444dd); /* fldl -0x10(%rsp) */
516 } else {
517 orex(1,r,v, 0x89);
518 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
524 /* store register 'r' in lvalue 'v' */
525 void store(int r, SValue *v)
527 int fr, bt, ft, fc;
528 int op64 = 0;
529 /* store the REX prefix in this variable when PIC is enabled */
530 int pic = 0;
532 #ifdef TCC_TARGET_PE
533 SValue v2;
534 v = pe_getimport(v, &v2);
535 #endif
537 ft = v->type.t;
538 fc = v->c.ul;
539 fr = v->r & VT_VALMASK;
540 bt = ft & VT_BTYPE;
542 #ifndef TCC_TARGET_PE
543 /* we need to access the variable via got */
544 if (fr == VT_CONST && (v->r & VT_SYM)) {
545 /* mov xx(%rip), %r11 */
546 o(0x1d8b4c);
547 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
548 pic = is64_type(bt) ? 0x49 : 0x41;
550 #endif
552 /* XXX: incorrect if float reg to reg */
553 if (bt == VT_FLOAT) {
554 o(0x66);
555 o(pic);
556 o(0x7e0f); /* movd */
557 r = REG_VALUE(r);
558 } else if (bt == VT_DOUBLE) {
559 o(0x66);
560 o(pic);
561 o(0xd60f); /* movq */
562 r = REG_VALUE(r);
563 } else if (bt == VT_LDOUBLE) {
564 o(0xc0d9); /* fld %st(0) */
565 o(pic);
566 o(0xdb); /* fstpt */
567 r = 7;
568 } else {
569 if (bt == VT_SHORT)
570 o(0x66);
571 o(pic);
572 if (bt == VT_BYTE || bt == VT_BOOL)
573 orex(0, 0, r, 0x88);
574 else if (is64_type(bt))
575 op64 = 0x89;
576 else
577 orex(0, 0, r, 0x89);
579 if (pic) {
580 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
581 if (op64)
582 o(op64);
583 o(3 + (r << 3));
584 } else if (op64) {
585 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
586 gen_modrm64(op64, r, v->r, v->sym, fc);
587 } else if (fr != r) {
588 /* XXX: don't we really come here? */
589 abort();
590 o(0xc0 + fr + r * 8); /* mov r, fr */
592 } else {
593 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
594 gen_modrm(r, v->r, v->sym, fc);
595 } else if (fr != r) {
596 /* XXX: don't we really come here? */
597 abort();
598 o(0xc0 + fr + r * 8); /* mov r, fr */
603 /* 'is_jmp' is '1' if it is a jump */
604 static void gcall_or_jmp(int is_jmp)
606 int r;
607 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
608 /* constant case */
609 if (vtop->r & VT_SYM) {
610 /* relocation case */
611 greloc(cur_text_section, vtop->sym,
612 ind + 1, R_X86_64_PC32);
613 } else {
614 /* put an empty PC32 relocation */
615 put_elf_reloc(symtab_section, cur_text_section,
616 ind + 1, R_X86_64_PC32, 0);
618 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
619 } else {
620 /* otherwise, indirect call */
621 r = TREG_R11;
622 load(r, vtop);
623 o(0x41); /* REX */
624 o(0xff); /* call/jmp *r */
625 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
629 #ifdef TCC_TARGET_PE
631 #define REGN 4
632 static const uint8_t arg_regs[REGN] = {
633 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
636 /* Prepare arguments in R10 and R11 rather than RCX and RDX
637 because gv() will not ever use these */
638 static int arg_prepare_reg(int idx) {
639 if (idx == 0 || idx == 1)
640 /* idx=0: r10, idx=1: r11 */
641 return idx + 10;
642 else
643 return arg_regs[idx];
646 static int func_scratch;
648 /* Generate function call. The function address is pushed first, then
649 all the parameters in call order. This functions pops all the
650 parameters and the function address. */
652 void gen_offs_sp(int b, int r, int d)
654 orex(1,0,r & 0x100 ? 0 : r, b);
655 if (d == (char)d) {
656 o(0x2444 | (REG_VALUE(r) << 3));
657 g(d);
658 } else {
659 o(0x2484 | (REG_VALUE(r) << 3));
660 gen_le32(d);
664 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
665 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
666 *ret_align = 1; // Never have to re-align return values for x86-64
667 int size, align;
668 size = type_size(vt, &align);
669 ret->ref = NULL;
670 if (size > 8) {
671 return 1;
672 } else if (size > 4) {
673 ret->t = VT_LLONG;
674 return 0;
675 } else if (size > 2) {
676 ret->t = VT_INT;
677 return 0;
678 } else if (size > 1) {
679 ret->t = VT_SHORT;
680 return 0;
681 } else {
682 ret->t = VT_BYTE;
683 return 0;
687 int gfunc_arg_size(CType *type) {
688 if (type->t & (VT_ARRAY|VT_BITFIELD))
689 return 8;
690 int align;
691 return type_size(type, &align);
694 void gfunc_call(int nb_args)
696 int size, r, args_size, i, d, bt, struct_size;
697 int arg;
699 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
700 arg = nb_args;
702 /* for struct arguments, we need to call memcpy and the function
703 call breaks register passing arguments we are preparing.
704 So, we process arguments which will be passed by stack first. */
705 struct_size = args_size;
706 for(i = 0; i < nb_args; i++) {
707 --arg;
709 SValue *sv = &vtop[-i];
710 bt = (sv->type.t & VT_BTYPE);
711 size = gfunc_arg_size(&sv->type);
713 if (size <= 8)
714 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
716 if (bt == VT_STRUCT) {
717 /* align to stack align size */
718 size = (size + 15) & ~15;
719 /* generate structure store */
720 r = get_reg(RC_INT);
721 gen_offs_sp(0x8d, r, struct_size);
722 struct_size += size;
724 /* generate memcpy call */
725 vset(&sv->type, r | VT_LVAL, 0);
726 vpushv(sv);
727 vstore();
728 --vtop;
729 } else if (bt == VT_LDOUBLE) {
730 gv(RC_ST0);
731 gen_offs_sp(0xdb, 0x107, struct_size);
732 struct_size += 16;
736 if (func_scratch < struct_size)
737 func_scratch = struct_size;
739 arg = nb_args;
740 struct_size = args_size;
742 for(i = 0; i < nb_args; i++) {
743 --arg;
744 bt = (vtop->type.t & VT_BTYPE);
746 size = gfunc_arg_size(&vtop->type);
747 if (size > 8) {
748 /* align to stack align size */
749 size = (size + 15) & ~15;
750 if (arg >= REGN) {
751 d = get_reg(RC_INT);
752 gen_offs_sp(0x8d, d, struct_size);
753 gen_offs_sp(0x89, d, arg*8);
754 } else {
755 d = arg_prepare_reg(arg);
756 gen_offs_sp(0x8d, d, struct_size);
758 struct_size += size;
759 } else {
760 if (is_sse_float(vtop->type.t)) {
761 gv(RC_XMM0); /* only use one float register */
762 if (arg >= REGN) {
763 /* movq %xmm0, j*8(%rsp) */
764 gen_offs_sp(0xd60f66, 0x100, arg*8);
765 } else {
766 /* movaps %xmm0, %xmmN */
767 o(0x280f);
768 o(0xc0 + (arg << 3));
769 d = arg_prepare_reg(arg);
770 /* mov %xmm0, %rxx */
771 o(0x66);
772 orex(1,d,0, 0x7e0f);
773 o(0xc0 + REG_VALUE(d));
775 } else {
776 if (bt == VT_STRUCT) {
777 vtop->type.ref = NULL;
778 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
779 : size > 1 ? VT_SHORT : VT_BYTE;
782 r = gv(RC_INT);
783 if (arg >= REGN) {
784 gen_offs_sp(0x89, r, arg*8);
785 } else {
786 d = arg_prepare_reg(arg);
787 orex(1,d,r,0x89); /* mov */
788 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
792 vtop--;
794 save_regs(0);
796 /* Copy R10 and R11 into RCX and RDX, respectively */
797 if (nb_args > 0) {
798 o(0xd1894c); /* mov %r10, %rcx */
799 if (nb_args > 1) {
800 o(0xda894c); /* mov %r11, %rdx */
804 gcall_or_jmp(0);
805 vtop--;
809 #define FUNC_PROLOG_SIZE 11
811 /* generate function prolog of type 't' */
812 void gfunc_prolog(CType *func_type)
814 int addr, reg_param_index, bt, size;
815 Sym *sym;
816 CType *type;
818 func_ret_sub = 0;
819 func_scratch = 0;
820 loc = 0;
822 addr = PTR_SIZE * 2;
823 ind += FUNC_PROLOG_SIZE;
824 func_sub_sp_offset = ind;
825 reg_param_index = 0;
827 sym = func_type->ref;
829 /* if the function returns a structure, then add an
830 implicit pointer parameter */
831 func_vt = sym->type;
832 size = gfunc_arg_size(&func_vt);
833 if (size > 8) {
834 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
835 func_vc = addr;
836 reg_param_index++;
837 addr += 8;
840 /* define parameters */
841 while ((sym = sym->next) != NULL) {
842 type = &sym->type;
843 bt = type->t & VT_BTYPE;
844 size = gfunc_arg_size(type);
845 if (size > 8) {
846 if (reg_param_index < REGN) {
847 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
849 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
850 } else {
851 if (reg_param_index < REGN) {
852 /* save arguments passed by register */
853 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
854 o(0xd60f66); /* movq */
855 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
856 } else {
857 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
860 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
862 addr += 8;
863 reg_param_index++;
866 while (reg_param_index < REGN) {
867 if (func_type->ref->c == FUNC_ELLIPSIS) {
868 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
869 addr += 8;
871 reg_param_index++;
875 /* generate function epilog */
876 void gfunc_epilog(void)
878 int v, saved_ind;
880 o(0xc9); /* leave */
881 if (func_ret_sub == 0) {
882 o(0xc3); /* ret */
883 } else {
884 o(0xc2); /* ret n */
885 g(func_ret_sub);
886 g(func_ret_sub >> 8);
889 saved_ind = ind;
890 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
891 /* align local size to word & save local variables */
892 v = (func_scratch + -loc + 15) & -16;
894 if (v >= 4096) {
895 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
896 oad(0xb8, v); /* mov stacksize, %eax */
897 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
898 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
899 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
900 } else {
901 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
902 o(0xec8148); /* sub rsp, stacksize */
903 gen_le32(v);
906 cur_text_section->data_offset = saved_ind;
907 pe_add_unwind_data(ind, saved_ind, v);
908 ind = cur_text_section->data_offset;
911 #else
913 static void gadd_sp(int val)
915 if (val == (char)val) {
916 o(0xc48348);
917 g(val);
918 } else {
919 oad(0xc48148, val); /* add $xxx, %rsp */
923 typedef enum X86_64_Mode {
924 x86_64_mode_none,
925 x86_64_mode_memory,
926 x86_64_mode_integer,
927 x86_64_mode_sse,
928 x86_64_mode_x87
929 } X86_64_Mode;
931 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
932 if (a == b)
933 return a;
934 else if (a == x86_64_mode_none)
935 return b;
936 else if (b == x86_64_mode_none)
937 return a;
938 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
939 return x86_64_mode_memory;
940 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
941 return x86_64_mode_integer;
942 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
943 return x86_64_mode_memory;
944 else
945 return x86_64_mode_sse;
948 static X86_64_Mode classify_x86_64_inner(CType *ty) {
949 X86_64_Mode mode;
950 Sym *f;
952 switch (ty->t & VT_BTYPE) {
953 case VT_VOID: return x86_64_mode_none;
955 case VT_INT:
956 case VT_BYTE:
957 case VT_SHORT:
958 case VT_LLONG:
959 case VT_BOOL:
960 case VT_PTR:
961 case VT_FUNC:
962 case VT_ENUM: return x86_64_mode_integer;
964 case VT_FLOAT:
965 case VT_DOUBLE: return x86_64_mode_sse;
967 case VT_LDOUBLE: return x86_64_mode_x87;
969 case VT_STRUCT:
970 f = ty->ref;
972 // Detect union
973 if (f->next && (f->c == f->next->c))
974 return x86_64_mode_memory;
976 mode = x86_64_mode_none;
977 for (; f; f = f->next)
978 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
980 return mode;
983 assert(0);
986 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) {
987 X86_64_Mode mode;
988 int size, align, ret_t;
990 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
991 *psize = 8;
992 *reg_count = 1;
993 ret_t = ty->t;
994 mode = x86_64_mode_integer;
995 } else {
996 size = type_size(ty, &align);
997 *psize = (size + 7) & ~7;
998 *palign = (align + 7) & ~7;
1000 if (size > 16) {
1001 mode = x86_64_mode_memory;
1002 } else {
1003 mode = classify_x86_64_inner(ty);
1004 switch (mode) {
1005 case x86_64_mode_integer:
1006 if (size > 8) {
1007 *reg_count = 2;
1008 ret_t = VT_QLONG;
1009 } else {
1010 *reg_count = 1;
1011 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1013 break;
1015 case x86_64_mode_x87:
1016 *reg_count = 1;
1017 ret_t = VT_LDOUBLE;
1018 break;
1020 case x86_64_mode_sse:
1021 if (size > 8) {
1022 *reg_count = 2;
1023 ret_t = VT_QFLOAT;
1024 } else {
1025 *reg_count = 1;
1026 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1028 break;
1033 if (ret) {
1034 ret->ref = NULL;
1035 ret->t = ret_t;
1038 return mode;
1041 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
1042 /* This definition must be synced with stdarg.h */
1043 enum __va_arg_type {
1044 __va_gen_reg, __va_float_reg, __va_stack
1046 int size, align, reg_count;
1047 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1048 switch (mode) {
1049 default: return __va_stack;
1050 case x86_64_mode_integer: return __va_gen_reg;
1051 case x86_64_mode_sse: return __va_float_reg;
1055 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1056 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
1057 int size, align, reg_count;
1058 *ret_align = 1; // Never have to re-align return values for x86-64
1059 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) == x86_64_mode_memory);
1062 #define REGN 6
1063 static const uint8_t arg_regs[REGN] = {
1064 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1067 static int arg_prepare_reg(int idx) {
1068 if (idx == 2 || idx == 3)
1069 /* idx=2: r10, idx=3: r11 */
1070 return idx + 8;
1071 else
1072 return arg_regs[idx];
1075 /* Generate function call. The function address is pushed first, then
1076 all the parameters in call order. This functions pops all the
1077 parameters and the function address. */
1078 void gfunc_call(int nb_args)
1080 X86_64_Mode mode;
1081 CType type;
1082 int size, align, r, args_size, stack_adjust, run_start, run_end, i, j, reg_count;
1083 int nb_reg_args = 0;
1084 int nb_sse_args = 0;
1085 int sse_reg, gen_reg;
1087 /* calculate the number of integer/float register arguments */
1088 for(i = 0; i < nb_args; i++) {
1089 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1090 if (mode == x86_64_mode_sse)
1091 nb_sse_args += reg_count;
1092 else if (mode == x86_64_mode_integer)
1093 nb_reg_args += reg_count;
1096 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1097 and ended by a 16-byte aligned argument. This is because, from the point of view of
1098 the callee, argument alignment is computed from the bottom up. */
1099 /* for struct arguments, we need to call memcpy and the function
1100 call breaks register passing arguments we are preparing.
1101 So, we process arguments which will be passed by stack first. */
1102 gen_reg = nb_reg_args;
1103 sse_reg = nb_sse_args;
1104 run_start = 0;
1105 args_size = 0;
1106 while (run_start != nb_args) {
1107 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1109 run_end = nb_args;
1110 stack_adjust = 0;
1111 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1112 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1113 switch (mode) {
1114 case x86_64_mode_memory:
1115 case x86_64_mode_x87:
1116 stack_arg:
1117 if (align == 16)
1118 run_end = i;
1119 else
1120 stack_adjust += size;
1121 break;
1123 case x86_64_mode_sse:
1124 sse_reg -= reg_count;
1125 if (sse_reg + reg_count > 8) goto stack_arg;
1126 break;
1128 case x86_64_mode_integer:
1129 gen_reg -= reg_count;
1130 if (gen_reg + reg_count > REGN) goto stack_arg;
1131 break;
1135 gen_reg = run_gen_reg;
1136 sse_reg = run_sse_reg;
1138 /* adjust stack to align SSE boundary */
1139 if (stack_adjust &= 15) {
1140 /* fetch cpu flag before the following sub will change the value */
1141 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1142 gv(RC_INT);
1144 stack_adjust = 16 - stack_adjust;
1145 o(0x48);
1146 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1147 args_size += stack_adjust;
1150 for(i = run_start; i < run_end;) {
1151 /* Swap argument to top, it will possibly be changed here,
1152 and might use more temps. At the end of the loop we keep
1153 in on the stack and swap it back to its original position
1154 if it is a register. */
1155 SValue tmp = vtop[0];
1156 vtop[0] = vtop[-i];
1157 vtop[-i] = tmp;
1159 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, &reg_count);
1161 int arg_stored = 1;
1162 switch (vtop->type.t & VT_BTYPE) {
1163 case VT_STRUCT:
1164 if (mode == x86_64_mode_sse) {
1165 if (sse_reg > 8)
1166 sse_reg -= reg_count;
1167 else
1168 arg_stored = 0;
1169 } else if (mode == x86_64_mode_integer) {
1170 if (gen_reg > REGN)
1171 gen_reg -= reg_count;
1172 else
1173 arg_stored = 0;
1176 if (arg_stored) {
1177 /* allocate the necessary size on stack */
1178 o(0x48);
1179 oad(0xec81, size); /* sub $xxx, %rsp */
1180 /* generate structure store */
1181 r = get_reg(RC_INT);
1182 orex(1, r, 0, 0x89); /* mov %rsp, r */
1183 o(0xe0 + REG_VALUE(r));
1184 vset(&vtop->type, r | VT_LVAL, 0);
1185 vswap();
1186 vstore();
1187 args_size += size;
1189 break;
1191 case VT_LDOUBLE:
1192 assert(0);
1193 break;
1195 case VT_FLOAT:
1196 case VT_DOUBLE:
1197 assert(mode == x86_64_mode_sse);
1198 if (sse_reg > 8) {
1199 --sse_reg;
1200 r = gv(RC_FLOAT);
1201 o(0x50); /* push $rax */
1202 /* movq %xmmN, (%rsp) */
1203 o(0xd60f66);
1204 o(0x04 + REG_VALUE(r)*8);
1205 o(0x24);
1206 args_size += size;
1207 } else {
1208 arg_stored = 0;
1210 break;
1212 default:
1213 assert(mode == x86_64_mode_integer);
1214 /* simple type */
1215 /* XXX: implicit cast ? */
1216 if (gen_reg > REGN) {
1217 --gen_reg;
1218 r = gv(RC_INT);
1219 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1220 args_size += size;
1221 } else {
1222 arg_stored = 0;
1224 break;
1227 /* And swap the argument back to it's original position. */
1228 tmp = vtop[0];
1229 vtop[0] = vtop[-i];
1230 vtop[-i] = tmp;
1232 if (arg_stored) {
1233 vrotb(i+1);
1234 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1235 vpop();
1236 --nb_args;
1237 --run_end;
1238 } else {
1239 ++i;
1243 /* handle 16 byte aligned arguments at end of run */
1244 run_start = i = run_end;
1245 while (i < nb_args) {
1246 /* Rotate argument to top since it will always be popped */
1247 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1248 if (align != 16)
1249 break;
1251 vrotb(i+1);
1253 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1254 gv(RC_ST0);
1255 oad(0xec8148, size); /* sub $xxx, %rsp */
1256 o(0x7cdb); /* fstpt 0(%rsp) */
1257 g(0x24);
1258 g(0x00);
1259 args_size += size;
1260 } else {
1261 assert(mode == x86_64_mode_memory);
1263 /* allocate the necessary size on stack */
1264 o(0x48);
1265 oad(0xec81, size); /* sub $xxx, %rsp */
1266 /* generate structure store */
1267 r = get_reg(RC_INT);
1268 orex(1, r, 0, 0x89); /* mov %rsp, r */
1269 o(0xe0 + REG_VALUE(r));
1270 vset(&vtop->type, r | VT_LVAL, 0);
1271 vswap();
1272 vstore();
1273 args_size += size;
1276 vpop();
1277 --nb_args;
1281 /* XXX This should be superfluous. */
1282 save_regs(0); /* save used temporary registers */
1284 /* then, we prepare register passing arguments.
1285 Note that we cannot set RDX and RCX in this loop because gv()
1286 may break these temporary registers. Let's use R10 and R11
1287 instead of them */
1288 assert(gen_reg <= REGN);
1289 assert(sse_reg <= 8);
1290 for(i = 0; i < nb_args; i++) {
1291 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1292 /* Alter stack entry type so that gv() knows how to treat it */
1293 vtop->type = type;
1294 if (mode == x86_64_mode_sse) {
1295 if (reg_count == 2) {
1296 sse_reg -= 2;
1297 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1298 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1299 /* movaps %xmm0, %xmmN */
1300 o(0x280f);
1301 o(0xc0 + (sse_reg << 3));
1302 /* movaps %xmm1, %xmmN */
1303 o(0x280f);
1304 o(0xc1 + ((sse_reg+1) << 3));
1306 } else {
1307 assert(reg_count == 1);
1308 --sse_reg;
1309 /* Load directly to register */
1310 gv(RC_XMM0 << sse_reg);
1312 } else if (mode == x86_64_mode_integer) {
1313 /* simple type */
1314 /* XXX: implicit cast ? */
1315 gen_reg -= reg_count;
1316 r = gv(RC_INT);
1317 int d = arg_prepare_reg(gen_reg);
1318 orex(1,d,r,0x89); /* mov */
1319 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1320 if (reg_count == 2) {
1321 d = arg_prepare_reg(gen_reg+1);
1322 orex(1,d,vtop->r2,0x89); /* mov */
1323 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1326 vtop--;
1328 assert(gen_reg == 0);
1329 assert(sse_reg == 0);
1331 /* We shouldn't have many operands on the stack anymore, but the
1332 call address itself is still there, and it might be in %eax
1333 (or edx/ecx) currently, which the below writes would clobber.
1334 So evict all remaining operands here. */
1335 save_regs(0);
1337 /* Copy R10 and R11 into RDX and RCX, respectively */
1338 if (nb_reg_args > 2) {
1339 o(0xd2894c); /* mov %r10, %rdx */
1340 if (nb_reg_args > 3) {
1341 o(0xd9894c); /* mov %r11, %rcx */
1345 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1346 gcall_or_jmp(0);
1347 if (args_size)
1348 gadd_sp(args_size);
1349 vtop--;
1353 #define FUNC_PROLOG_SIZE 11
1355 static void push_arg_reg(int i) {
1356 loc -= 8;
1357 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1360 /* generate function prolog of type 't' */
1361 void gfunc_prolog(CType *func_type)
1363 X86_64_Mode mode;
1364 int i, addr, align, size, reg_count;
1365 int param_addr, reg_param_index, sse_param_index;
1366 Sym *sym;
1367 CType *type;
1369 sym = func_type->ref;
1370 addr = PTR_SIZE * 2;
1371 loc = 0;
1372 ind += FUNC_PROLOG_SIZE;
1373 func_sub_sp_offset = ind;
1374 func_ret_sub = 0;
1376 if (func_type->ref->c == FUNC_ELLIPSIS) {
1377 int seen_reg_num, seen_sse_num, seen_stack_size;
1378 seen_reg_num = seen_sse_num = 0;
1379 /* frame pointer and return address */
1380 seen_stack_size = PTR_SIZE * 2;
1381 /* count the number of seen parameters */
1382 sym = func_type->ref;
1383 while ((sym = sym->next) != NULL) {
1384 type = &sym->type;
1385 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1386 switch (mode) {
1387 default:
1388 stack_arg:
1389 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1390 break;
1392 case x86_64_mode_integer:
1393 if (seen_reg_num + reg_count <= 8) {
1394 seen_reg_num += reg_count;
1395 } else {
1396 seen_reg_num = 8;
1397 goto stack_arg;
1399 break;
1401 case x86_64_mode_sse:
1402 if (seen_sse_num + reg_count <= 8) {
1403 seen_sse_num += reg_count;
1404 } else {
1405 seen_sse_num = 8;
1406 goto stack_arg;
1408 break;
1412 loc -= 16;
1413 /* movl $0x????????, -0x10(%rbp) */
1414 o(0xf045c7);
1415 gen_le32(seen_reg_num * 8);
1416 /* movl $0x????????, -0xc(%rbp) */
1417 o(0xf445c7);
1418 gen_le32(seen_sse_num * 16 + 48);
1419 /* movl $0x????????, -0x8(%rbp) */
1420 o(0xf845c7);
1421 gen_le32(seen_stack_size);
1423 /* save all register passing arguments */
1424 for (i = 0; i < 8; i++) {
1425 loc -= 16;
1426 o(0xd60f66); /* movq */
1427 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1428 /* movq $0, loc+8(%rbp) */
1429 o(0x85c748);
1430 gen_le32(loc + 8);
1431 gen_le32(0);
1433 for (i = 0; i < REGN; i++) {
1434 push_arg_reg(REGN-1-i);
1438 sym = func_type->ref;
1439 reg_param_index = 0;
1440 sse_param_index = 0;
1442 /* if the function returns a structure, then add an
1443 implicit pointer parameter */
1444 func_vt = sym->type;
1445 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1446 if (mode == x86_64_mode_memory) {
1447 push_arg_reg(reg_param_index);
1448 func_vc = loc;
1449 reg_param_index++;
1451 /* define parameters */
1452 while ((sym = sym->next) != NULL) {
1453 type = &sym->type;
1454 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1455 switch (mode) {
1456 case x86_64_mode_sse:
1457 if (sse_param_index + reg_count <= 8) {
1458 /* save arguments passed by register */
1459 loc -= reg_count * 8;
1460 param_addr = loc;
1461 for (i = 0; i < reg_count; ++i) {
1462 o(0xd60f66); /* movq */
1463 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1464 ++sse_param_index;
1466 } else {
1467 addr = (addr + align - 1) & -align;
1468 param_addr = addr;
1469 addr += size;
1470 sse_param_index += reg_count;
1472 break;
1474 case x86_64_mode_memory:
1475 case x86_64_mode_x87:
1476 addr = (addr + align - 1) & -align;
1477 param_addr = addr;
1478 addr += size;
1479 break;
1481 case x86_64_mode_integer: {
1482 if (reg_param_index + reg_count <= REGN) {
1483 /* save arguments passed by register */
1484 loc -= reg_count * 8;
1485 param_addr = loc;
1486 for (i = 0; i < reg_count; ++i) {
1487 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1488 ++reg_param_index;
1490 } else {
1491 addr = (addr + align - 1) & -align;
1492 param_addr = addr;
1493 addr += size;
1494 reg_param_index += reg_count;
1496 break;
1499 sym_push(sym->v & ~SYM_FIELD, type,
1500 VT_LOCAL | VT_LVAL, param_addr);
1504 /* generate function epilog */
1505 void gfunc_epilog(void)
1507 int v, saved_ind;
1509 o(0xc9); /* leave */
1510 if (func_ret_sub == 0) {
1511 o(0xc3); /* ret */
1512 } else {
1513 o(0xc2); /* ret n */
1514 g(func_ret_sub);
1515 g(func_ret_sub >> 8);
1517 /* align local size to word & save local variables */
1518 v = (-loc + 15) & -16;
1519 saved_ind = ind;
1520 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1521 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1522 o(0xec8148); /* sub rsp, stacksize */
1523 gen_le32(v);
1524 ind = saved_ind;
1527 #endif /* not PE */
1529 /* generate a jump to a label */
1530 int gjmp(int t)
1532 return psym(0xe9, t);
1535 /* generate a jump to a fixed address */
1536 void gjmp_addr(int a)
1538 int r;
1539 r = a - ind - 2;
1540 if (r == (char)r) {
1541 g(0xeb);
1542 g(r);
1543 } else {
1544 oad(0xe9, a - ind - 5);
1548 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1549 int gtst(int inv, int t)
1551 int v, *p;
1553 v = vtop->r & VT_VALMASK;
1554 if (v == VT_CMP) {
1555 /* fast case : can jump directly since flags are set */
1556 if (vtop->c.i & 0x100)
1558 /* This was a float compare. If the parity flag is set
1559 the result was unordered. For anything except != this
1560 means false and we don't jump (anding both conditions).
1561 For != this means true (oring both).
1562 Take care about inverting the test. We need to jump
1563 to our target if the result was unordered and test wasn't NE,
1564 otherwise if unordered we don't want to jump. */
1565 vtop->c.i &= ~0x100;
1566 if (!inv == (vtop->c.i != TOK_NE))
1567 o(0x067a); /* jp +6 */
1568 else
1570 g(0x0f);
1571 t = psym(0x8a, t); /* jp t */
1574 g(0x0f);
1575 t = psym((vtop->c.i - 16) ^ inv, t);
1576 } else if (v == VT_JMP || v == VT_JMPI) {
1577 /* && or || optimization */
1578 if ((v & 1) == inv) {
1579 /* insert vtop->c jump list in t */
1580 p = &vtop->c.i;
1581 while (*p != 0)
1582 p = (int *)(cur_text_section->data + *p);
1583 *p = t;
1584 t = vtop->c.i;
1585 } else {
1586 t = gjmp(t);
1587 gsym(vtop->c.i);
1589 } else {
1590 if (is_float(vtop->type.t) ||
1591 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1592 vpushi(0);
1593 gen_op(TOK_NE);
1595 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1596 /* constant jmp optimization */
1597 if ((vtop->c.i != 0) != inv)
1598 t = gjmp(t);
1599 } else {
1600 v = gv(RC_INT);
1601 orex(0,v,v,0x85);
1602 o(0xc0 + REG_VALUE(v) * 9);
1603 g(0x0f);
1604 t = psym(0x85 ^ inv, t);
1607 vtop--;
1608 return t;
1611 /* generate an integer binary operation */
1612 void gen_opi(int op)
1614 int r, fr, opc, c;
1615 int ll, uu, cc;
1617 ll = is64_type(vtop[-1].type.t);
1618 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1619 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1621 switch(op) {
1622 case '+':
1623 case TOK_ADDC1: /* add with carry generation */
1624 opc = 0;
1625 gen_op8:
1626 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1627 /* constant case */
1628 vswap();
1629 r = gv(RC_INT);
1630 vswap();
1631 c = vtop->c.i;
1632 if (c == (char)c) {
1633 /* XXX: generate inc and dec for smaller code ? */
1634 orex(ll, r, 0, 0x83);
1635 o(0xc0 | (opc << 3) | REG_VALUE(r));
1636 g(c);
1637 } else {
1638 orex(ll, r, 0, 0x81);
1639 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1641 } else {
1642 gv2(RC_INT, RC_INT);
1643 r = vtop[-1].r;
1644 fr = vtop[0].r;
1645 orex(ll, r, fr, (opc << 3) | 0x01);
1646 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1648 vtop--;
1649 if (op >= TOK_ULT && op <= TOK_GT) {
1650 vtop->r = VT_CMP;
1651 vtop->c.i = op;
1653 break;
1654 case '-':
1655 case TOK_SUBC1: /* sub with carry generation */
1656 opc = 5;
1657 goto gen_op8;
1658 case TOK_ADDC2: /* add with carry use */
1659 opc = 2;
1660 goto gen_op8;
1661 case TOK_SUBC2: /* sub with carry use */
1662 opc = 3;
1663 goto gen_op8;
1664 case '&':
1665 opc = 4;
1666 goto gen_op8;
1667 case '^':
1668 opc = 6;
1669 goto gen_op8;
1670 case '|':
1671 opc = 1;
1672 goto gen_op8;
1673 case '*':
1674 gv2(RC_INT, RC_INT);
1675 r = vtop[-1].r;
1676 fr = vtop[0].r;
1677 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1678 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1679 vtop--;
1680 break;
1681 case TOK_SHL:
1682 opc = 4;
1683 goto gen_shift;
1684 case TOK_SHR:
1685 opc = 5;
1686 goto gen_shift;
1687 case TOK_SAR:
1688 opc = 7;
1689 gen_shift:
1690 opc = 0xc0 | (opc << 3);
1691 if (cc) {
1692 /* constant case */
1693 vswap();
1694 r = gv(RC_INT);
1695 vswap();
1696 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1697 o(opc | REG_VALUE(r));
1698 g(vtop->c.i & (ll ? 63 : 31));
1699 } else {
1700 /* we generate the shift in ecx */
1701 gv2(RC_INT, RC_RCX);
1702 r = vtop[-1].r;
1703 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1704 o(opc | REG_VALUE(r));
1706 vtop--;
1707 break;
1708 case TOK_UDIV:
1709 case TOK_UMOD:
1710 uu = 1;
1711 goto divmod;
1712 case '/':
1713 case '%':
1714 case TOK_PDIV:
1715 uu = 0;
1716 divmod:
1717 /* first operand must be in eax */
1718 /* XXX: need better constraint for second operand */
1719 gv2(RC_RAX, RC_RCX);
1720 r = vtop[-1].r;
1721 fr = vtop[0].r;
1722 vtop--;
1723 save_reg(TREG_RDX);
1724 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1725 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1726 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1727 if (op == '%' || op == TOK_UMOD)
1728 r = TREG_RDX;
1729 else
1730 r = TREG_RAX;
1731 vtop->r = r;
1732 break;
1733 default:
1734 opc = 7;
1735 goto gen_op8;
1739 void gen_opl(int op)
1741 gen_opi(op);
1744 /* generate a floating point operation 'v = t1 op t2' instruction. The
1745 two operands are guaranted to have the same floating point type */
1746 /* XXX: need to use ST1 too */
1747 void gen_opf(int op)
1749 int a, ft, fc, swapped, r;
1750 int float_type =
1751 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1753 /* convert constants to memory references */
1754 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1755 vswap();
1756 gv(float_type);
1757 vswap();
1759 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1760 gv(float_type);
1762 /* must put at least one value in the floating point register */
1763 if ((vtop[-1].r & VT_LVAL) &&
1764 (vtop[0].r & VT_LVAL)) {
1765 vswap();
1766 gv(float_type);
1767 vswap();
1769 swapped = 0;
1770 /* swap the stack if needed so that t1 is the register and t2 is
1771 the memory reference */
1772 if (vtop[-1].r & VT_LVAL) {
1773 vswap();
1774 swapped = 1;
1776 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1777 if (op >= TOK_ULT && op <= TOK_GT) {
1778 /* load on stack second operand */
1779 load(TREG_ST0, vtop);
1780 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1781 if (op == TOK_GE || op == TOK_GT)
1782 swapped = !swapped;
1783 else if (op == TOK_EQ || op == TOK_NE)
1784 swapped = 0;
1785 if (swapped)
1786 o(0xc9d9); /* fxch %st(1) */
1787 o(0xe9da); /* fucompp */
1788 o(0xe0df); /* fnstsw %ax */
1789 if (op == TOK_EQ) {
1790 o(0x45e480); /* and $0x45, %ah */
1791 o(0x40fC80); /* cmp $0x40, %ah */
1792 } else if (op == TOK_NE) {
1793 o(0x45e480); /* and $0x45, %ah */
1794 o(0x40f480); /* xor $0x40, %ah */
1795 op = TOK_NE;
1796 } else if (op == TOK_GE || op == TOK_LE) {
1797 o(0x05c4f6); /* test $0x05, %ah */
1798 op = TOK_EQ;
1799 } else {
1800 o(0x45c4f6); /* test $0x45, %ah */
1801 op = TOK_EQ;
1803 vtop--;
1804 vtop->r = VT_CMP;
1805 vtop->c.i = op;
1806 } else {
1807 /* no memory reference possible for long double operations */
1808 load(TREG_ST0, vtop);
1809 swapped = !swapped;
1811 switch(op) {
1812 default:
1813 case '+':
1814 a = 0;
1815 break;
1816 case '-':
1817 a = 4;
1818 if (swapped)
1819 a++;
1820 break;
1821 case '*':
1822 a = 1;
1823 break;
1824 case '/':
1825 a = 6;
1826 if (swapped)
1827 a++;
1828 break;
1830 ft = vtop->type.t;
1831 fc = vtop->c.ul;
1832 o(0xde); /* fxxxp %st, %st(1) */
1833 o(0xc1 + (a << 3));
1834 vtop--;
1836 } else {
1837 if (op >= TOK_ULT && op <= TOK_GT) {
1838 /* if saved lvalue, then we must reload it */
1839 r = vtop->r;
1840 fc = vtop->c.ul;
1841 if ((r & VT_VALMASK) == VT_LLOCAL) {
1842 SValue v1;
1843 r = get_reg(RC_INT);
1844 v1.type.t = VT_PTR;
1845 v1.r = VT_LOCAL | VT_LVAL;
1846 v1.c.ul = fc;
1847 load(r, &v1);
1848 fc = 0;
1851 if (op == TOK_EQ || op == TOK_NE) {
1852 swapped = 0;
1853 } else {
1854 if (op == TOK_LE || op == TOK_LT)
1855 swapped = !swapped;
1856 if (op == TOK_LE || op == TOK_GE) {
1857 op = 0x93; /* setae */
1858 } else {
1859 op = 0x97; /* seta */
1863 if (swapped) {
1864 gv(RC_FLOAT);
1865 vswap();
1867 assert(!(vtop[-1].r & VT_LVAL));
1869 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1870 o(0x66);
1871 o(0x2e0f); /* ucomisd */
1873 if (vtop->r & VT_LVAL) {
1874 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1875 } else {
1876 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1879 vtop--;
1880 vtop->r = VT_CMP;
1881 vtop->c.i = op | 0x100;
1882 } else {
1883 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1884 switch(op) {
1885 default:
1886 case '+':
1887 a = 0;
1888 break;
1889 case '-':
1890 a = 4;
1891 break;
1892 case '*':
1893 a = 1;
1894 break;
1895 case '/':
1896 a = 6;
1897 break;
1899 ft = vtop->type.t;
1900 fc = vtop->c.ul;
1901 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1903 r = vtop->r;
1904 /* if saved lvalue, then we must reload it */
1905 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1906 SValue v1;
1907 r = get_reg(RC_INT);
1908 v1.type.t = VT_PTR;
1909 v1.r = VT_LOCAL | VT_LVAL;
1910 v1.c.ul = fc;
1911 load(r, &v1);
1912 fc = 0;
1915 assert(!(vtop[-1].r & VT_LVAL));
1916 if (swapped) {
1917 assert(vtop->r & VT_LVAL);
1918 gv(RC_FLOAT);
1919 vswap();
1922 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1923 o(0xf2);
1924 } else {
1925 o(0xf3);
1927 o(0x0f);
1928 o(0x58 + a);
1930 if (vtop->r & VT_LVAL) {
1931 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1932 } else {
1933 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1936 vtop--;
1941 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1942 and 'long long' cases. */
1943 void gen_cvt_itof(int t)
1945 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1946 save_reg(TREG_ST0);
1947 gv(RC_INT);
1948 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1949 /* signed long long to float/double/long double (unsigned case
1950 is handled generically) */
1951 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1952 o(0x242cdf); /* fildll (%rsp) */
1953 o(0x08c48348); /* add $8, %rsp */
1954 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1955 (VT_INT | VT_UNSIGNED)) {
1956 /* unsigned int to float/double/long double */
1957 o(0x6a); /* push $0 */
1958 g(0x00);
1959 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1960 o(0x242cdf); /* fildll (%rsp) */
1961 o(0x10c48348); /* add $16, %rsp */
1962 } else {
1963 /* int to float/double/long double */
1964 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1965 o(0x2404db); /* fildl (%rsp) */
1966 o(0x08c48348); /* add $8, %rsp */
1968 vtop->r = TREG_ST0;
1969 } else {
1970 int r = get_reg(RC_FLOAT);
1971 gv(RC_INT);
1972 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1973 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1974 (VT_INT | VT_UNSIGNED) ||
1975 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1976 o(0x48); /* REX */
1978 o(0x2a0f);
1979 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1980 vtop->r = r;
1984 /* convert from one floating point type to another */
1985 void gen_cvt_ftof(int t)
1987 int ft, bt, tbt;
1989 ft = vtop->type.t;
1990 bt = ft & VT_BTYPE;
1991 tbt = t & VT_BTYPE;
1993 if (bt == VT_FLOAT) {
1994 gv(RC_FLOAT);
1995 if (tbt == VT_DOUBLE) {
1996 o(0x140f); /* unpcklps */
1997 o(0xc0 + REG_VALUE(vtop->r)*9);
1998 o(0x5a0f); /* cvtps2pd */
1999 o(0xc0 + REG_VALUE(vtop->r)*9);
2000 } else if (tbt == VT_LDOUBLE) {
2001 save_reg(RC_ST0);
2002 /* movss %xmm0,-0x10(%rsp) */
2003 o(0x110ff3);
2004 o(0x44 + REG_VALUE(vtop->r)*8);
2005 o(0xf024);
2006 o(0xf02444d9); /* flds -0x10(%rsp) */
2007 vtop->r = TREG_ST0;
2009 } else if (bt == VT_DOUBLE) {
2010 gv(RC_FLOAT);
2011 if (tbt == VT_FLOAT) {
2012 o(0x140f66); /* unpcklpd */
2013 o(0xc0 + REG_VALUE(vtop->r)*9);
2014 o(0x5a0f66); /* cvtpd2ps */
2015 o(0xc0 + REG_VALUE(vtop->r)*9);
2016 } else if (tbt == VT_LDOUBLE) {
2017 save_reg(RC_ST0);
2018 /* movsd %xmm0,-0x10(%rsp) */
2019 o(0x110ff2);
2020 o(0x44 + REG_VALUE(vtop->r)*8);
2021 o(0xf024);
2022 o(0xf02444dd); /* fldl -0x10(%rsp) */
2023 vtop->r = TREG_ST0;
2025 } else {
2026 gv(RC_ST0);
2027 int r = get_reg(RC_FLOAT);
2028 if (tbt == VT_DOUBLE) {
2029 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2030 /* movsd -0x10(%rsp),%xmm0 */
2031 o(0x100ff2);
2032 o(0x44 + REG_VALUE(r)*8);
2033 o(0xf024);
2034 vtop->r = r;
2035 } else if (tbt == VT_FLOAT) {
2036 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2037 /* movss -0x10(%rsp),%xmm0 */
2038 o(0x100ff3);
2039 o(0x44 + REG_VALUE(r)*8);
2040 o(0xf024);
2041 vtop->r = r;
2046 /* convert fp to int 't' type */
2047 void gen_cvt_ftoi(int t)
2049 int ft, bt, size, r;
2050 ft = vtop->type.t;
2051 bt = ft & VT_BTYPE;
2052 if (bt == VT_LDOUBLE) {
2053 gen_cvt_ftof(VT_DOUBLE);
2054 bt = VT_DOUBLE;
2057 gv(RC_FLOAT);
2058 if (t != VT_INT)
2059 size = 8;
2060 else
2061 size = 4;
2063 r = get_reg(RC_INT);
2064 if (bt == VT_FLOAT) {
2065 o(0xf3);
2066 } else if (bt == VT_DOUBLE) {
2067 o(0xf2);
2068 } else {
2069 assert(0);
2071 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2072 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2073 vtop->r = r;
2076 /* computed goto support */
2077 void ggoto(void)
2079 gcall_or_jmp(1);
2080 vtop--;
2083 /* end of x86-64 code generator */
2084 /*************************************************************/
2085 #endif /* ! TARGET_DEFS_ONLY */
2086 /******************************************************/