Improved variable length array support.
[tinycc.git] / x86_64-gen.c
blob24fa2c6dc0742431d90386e39ea860f4ad65d89a
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSP = 4,
61 TREG_RSI = 6,
62 TREG_RDI = 7,
64 TREG_R8 = 8,
65 TREG_R9 = 9,
66 TREG_R10 = 10,
67 TREG_R11 = 11,
69 TREG_XMM0 = 16,
70 TREG_XMM1 = 17,
71 TREG_XMM2 = 18,
72 TREG_XMM3 = 19,
73 TREG_XMM4 = 20,
74 TREG_XMM5 = 21,
75 TREG_XMM6 = 22,
76 TREG_XMM7 = 23,
78 TREG_ST0 = 24,
80 TREG_MEM = 0x20,
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
96 #define PTR_SIZE 8
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
102 #define MAX_ALIGN 16
104 /******************************************************/
105 /* ELF defines */
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x08048000
116 #define ELF_PAGE_SIZE 0x1000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
121 #include "tcc.h"
122 #include <assert.h>
124 ST_DATA const int reg_classes[NB_REGS] = {
125 /* eax */ RC_INT | RC_RAX,
126 /* ecx */ RC_INT | RC_RCX,
127 /* edx */ RC_INT | RC_RDX,
133 RC_R8,
134 RC_R9,
135 RC_R10,
136 RC_R11,
141 /* xmm0 */ RC_FLOAT | RC_XMM0,
142 /* xmm1 */ RC_FLOAT | RC_XMM1,
143 /* xmm2 */ RC_FLOAT | RC_XMM2,
144 /* xmm3 */ RC_FLOAT | RC_XMM3,
145 /* xmm4 */ RC_FLOAT | RC_XMM4,
146 /* xmm5 */ RC_FLOAT | RC_XMM5,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
150 RC_XMM6,
151 RC_XMM7,
152 /* st0 */ RC_ST0
155 static unsigned long func_sub_sp_offset;
156 static int func_ret_sub;
158 /* XXX: make it faster ? */
159 void g(int c)
161 int ind1;
162 ind1 = ind + 1;
163 if (ind1 > cur_text_section->data_allocated)
164 section_realloc(cur_text_section, ind1);
165 cur_text_section->data[ind] = c;
166 ind = ind1;
169 void o(unsigned int c)
171 while (c) {
172 g(c);
173 c = c >> 8;
177 void gen_le16(int v)
179 g(v);
180 g(v >> 8);
183 void gen_le32(int c)
185 g(c);
186 g(c >> 8);
187 g(c >> 16);
188 g(c >> 24);
191 void gen_le64(int64_t c)
193 g(c);
194 g(c >> 8);
195 g(c >> 16);
196 g(c >> 24);
197 g(c >> 32);
198 g(c >> 40);
199 g(c >> 48);
200 g(c >> 56);
203 void orex(int ll, int r, int r2, int b)
205 if ((r & VT_VALMASK) >= VT_CONST)
206 r = 0;
207 if ((r2 & VT_VALMASK) >= VT_CONST)
208 r2 = 0;
209 if (ll || REX_BASE(r) || REX_BASE(r2))
210 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
211 o(b);
214 /* output a symbol and patch all calls to it */
215 void gsym_addr(int t, int a)
217 int n, *ptr;
218 while (t) {
219 ptr = (int *)(cur_text_section->data + t);
220 n = *ptr; /* next value */
221 *ptr = a - t - 4;
222 t = n;
226 void gsym(int t)
228 gsym_addr(t, ind);
231 /* psym is used to put an instruction with a data field which is a
232 reference to a symbol. It is in fact the same as oad ! */
233 #define psym oad
235 static int is64_type(int t)
237 return ((t & VT_BTYPE) == VT_PTR ||
238 (t & VT_BTYPE) == VT_FUNC ||
239 (t & VT_BTYPE) == VT_LLONG);
242 static int is_sse_float(int t) {
243 int bt;
244 bt = t & VT_BTYPE;
245 return bt == VT_DOUBLE || bt == VT_FLOAT;
249 /* instruction + 4 bytes data. Return the address of the data */
250 ST_FUNC int oad(int c, int s)
252 int ind1;
254 o(c);
255 ind1 = ind + 4;
256 if (ind1 > cur_text_section->data_allocated)
257 section_realloc(cur_text_section, ind1);
258 *(int *)(cur_text_section->data + ind) = s;
259 s = ind;
260 ind = ind1;
261 return s;
264 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
266 if (r & VT_SYM)
267 greloc(cur_text_section, sym, ind, R_X86_64_32);
268 gen_le32(c);
271 /* output constant with relocation if 'r & VT_SYM' is true */
272 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
274 if (r & VT_SYM)
275 greloc(cur_text_section, sym, ind, R_X86_64_64);
276 gen_le64(c);
279 /* output constant with relocation if 'r & VT_SYM' is true */
280 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
282 if (r & VT_SYM)
283 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
284 gen_le32(c-4);
287 /* output got address with relocation */
288 static void gen_gotpcrel(int r, Sym *sym, int c)
290 #ifndef TCC_TARGET_PE
291 Section *sr;
292 ElfW(Rela) *rel;
293 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
294 sr = cur_text_section->reloc;
295 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
296 rel->r_addend = -4;
297 #else
298 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
299 cur_text_section->data[ind-3],
300 cur_text_section->data[ind-2],
301 cur_text_section->data[ind-1]
303 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
304 #endif
305 gen_le32(0);
306 if (c) {
307 /* we use add c, %xxx for displacement */
308 orex(1, r, 0, 0x81);
309 o(0xc0 + REG_VALUE(r));
310 gen_le32(c);
314 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
316 op_reg = REG_VALUE(op_reg) << 3;
317 if ((r & VT_VALMASK) == VT_CONST) {
318 /* constant memory reference */
319 o(0x05 | op_reg);
320 if (is_got) {
321 gen_gotpcrel(r, sym, c);
322 } else {
323 gen_addrpc32(r, sym, c);
325 } else if ((r & VT_VALMASK) == VT_LOCAL) {
326 /* currently, we use only ebp as base */
327 if (c == (char)c) {
328 /* short reference */
329 o(0x45 | op_reg);
330 g(c);
331 } else {
332 oad(0x85 | op_reg, c);
334 } else if ((r & VT_VALMASK) >= TREG_MEM) {
335 if (c) {
336 g(0x80 | op_reg | REG_VALUE(r));
337 gen_le32(c);
338 } else {
339 g(0x00 | op_reg | REG_VALUE(r));
341 } else {
342 g(0x00 | op_reg | REG_VALUE(r));
346 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
347 opcode bits */
348 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
350 gen_modrm_impl(op_reg, r, sym, c, 0);
353 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
354 opcode bits */
355 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
357 int is_got;
358 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
359 orex(1, r, op_reg, opcode);
360 gen_modrm_impl(op_reg, r, sym, c, is_got);
364 /* load 'r' from value 'sv' */
365 void load(int r, SValue *sv)
367 int v, t, ft, fc, fr;
368 SValue v1;
370 #ifdef TCC_TARGET_PE
371 SValue v2;
372 sv = pe_getimport(sv, &v2);
373 #endif
375 fr = sv->r;
376 ft = sv->type.t;
377 fc = sv->c.ul;
379 #ifndef TCC_TARGET_PE
380 /* we use indirect access via got */
381 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
382 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
383 /* use the result register as a temporal register */
384 int tr = r | TREG_MEM;
385 if (is_float(ft)) {
386 /* we cannot use float registers as a temporal register */
387 tr = get_reg(RC_INT) | TREG_MEM;
389 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
391 /* load from the temporal register */
392 fr = tr | VT_LVAL;
394 #endif
396 v = fr & VT_VALMASK;
397 if (fr & VT_LVAL) {
398 int b, ll;
399 if (v == VT_LLOCAL) {
400 v1.type.t = VT_PTR;
401 v1.r = VT_LOCAL | VT_LVAL;
402 v1.c.ul = fc;
403 fr = r;
404 if (!(reg_classes[fr] & RC_INT))
405 fr = get_reg(RC_INT);
406 load(fr, &v1);
408 ll = 0;
409 if ((ft & VT_BTYPE) == VT_FLOAT) {
410 b = 0x6e0f66;
411 r = REG_VALUE(r); /* movd */
412 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
413 b = 0x7e0ff3; /* movq */
414 r = REG_VALUE(r);
415 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
416 b = 0xdb, r = 5; /* fldt */
417 } else if ((ft & VT_TYPE) == VT_BYTE) {
418 b = 0xbe0f; /* movsbl */
419 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
420 b = 0xb60f; /* movzbl */
421 } else if ((ft & VT_TYPE) == VT_SHORT) {
422 b = 0xbf0f; /* movswl */
423 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
424 b = 0xb70f; /* movzwl */
425 } else {
426 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
427 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
428 || ((ft & VT_BTYPE) == VT_FUNC));
429 ll = is64_type(ft);
430 b = 0x8b;
432 if (ll) {
433 gen_modrm64(b, r, fr, sv->sym, fc);
434 } else {
435 orex(ll, fr, r, b);
436 gen_modrm(r, fr, sv->sym, fc);
438 } else {
439 if (v == VT_CONST) {
440 if (fr & VT_SYM) {
441 #ifdef TCC_TARGET_PE
442 orex(1,0,r,0x8d);
443 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
444 gen_addrpc32(fr, sv->sym, fc);
445 #else
446 if (sv->sym->type.t & VT_STATIC) {
447 orex(1,0,r,0x8d);
448 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
449 gen_addrpc32(fr, sv->sym, fc);
450 } else {
451 orex(1,0,r,0x8b);
452 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
453 gen_gotpcrel(r, sv->sym, fc);
455 #endif
456 } else if (is64_type(ft)) {
457 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
458 gen_le64(sv->c.ull);
459 } else {
460 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
461 gen_le32(fc);
463 } else if (v == VT_LOCAL) {
464 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
465 gen_modrm(r, VT_LOCAL, sv->sym, fc);
466 } else if (v == VT_CMP) {
467 orex(0,r,0,0);
468 if ((fc & ~0x100) != TOK_NE)
469 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
470 else
471 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
472 if (fc & 0x100)
474 /* This was a float compare. If the parity bit is
475 set the result was unordered, meaning false for everything
476 except TOK_NE, and true for TOK_NE. */
477 fc &= ~0x100;
478 o(0x037a + (REX_BASE(r) << 8));
480 orex(0,r,0, 0x0f); /* setxx %br */
481 o(fc);
482 o(0xc0 + REG_VALUE(r));
483 } else if (v == VT_JMP || v == VT_JMPI) {
484 t = v & 1;
485 orex(0,r,0,0);
486 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
487 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
488 gsym(fc);
489 orex(0,r,0,0);
490 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
491 } else if (v != r) {
492 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
493 if (v == TREG_ST0) {
494 /* gen_cvt_ftof(VT_DOUBLE); */
495 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
496 /* movsd -0x10(%rsp),%xmmN */
497 o(0x100ff2);
498 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
499 o(0xf024);
500 } else {
501 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
502 if ((ft & VT_BTYPE) == VT_FLOAT) {
503 o(0x100ff3);
504 } else {
505 assert((ft & VT_BTYPE) == VT_DOUBLE);
506 o(0x100ff2);
508 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
510 } else if (r == TREG_ST0) {
511 assert((v >= TREG_XMM0) || (v <= TREG_XMM7));
512 /* gen_cvt_ftof(VT_LDOUBLE); */
513 /* movsd %xmmN,-0x10(%rsp) */
514 o(0x110ff2);
515 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
516 o(0xf024);
517 o(0xf02444dd); /* fldl -0x10(%rsp) */
518 } else {
519 orex(1,r,v, 0x89);
520 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
526 /* store register 'r' in lvalue 'v' */
527 void store(int r, SValue *v)
529 int fr, bt, ft, fc;
530 int op64 = 0;
531 /* store the REX prefix in this variable when PIC is enabled */
532 int pic = 0;
534 #ifdef TCC_TARGET_PE
535 SValue v2;
536 v = pe_getimport(v, &v2);
537 #endif
539 ft = v->type.t;
540 fc = v->c.ul;
541 fr = v->r & VT_VALMASK;
542 bt = ft & VT_BTYPE;
544 #ifndef TCC_TARGET_PE
545 /* we need to access the variable via got */
546 if (fr == VT_CONST && (v->r & VT_SYM)) {
547 /* mov xx(%rip), %r11 */
548 o(0x1d8b4c);
549 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
550 pic = is64_type(bt) ? 0x49 : 0x41;
552 #endif
554 /* XXX: incorrect if float reg to reg */
555 if (bt == VT_FLOAT) {
556 o(0x66);
557 o(pic);
558 o(0x7e0f); /* movd */
559 r = REG_VALUE(r);
560 } else if (bt == VT_DOUBLE) {
561 o(0x66);
562 o(pic);
563 o(0xd60f); /* movq */
564 r = REG_VALUE(r);
565 } else if (bt == VT_LDOUBLE) {
566 o(0xc0d9); /* fld %st(0) */
567 o(pic);
568 o(0xdb); /* fstpt */
569 r = 7;
570 } else {
571 if (bt == VT_SHORT)
572 o(0x66);
573 o(pic);
574 if (bt == VT_BYTE || bt == VT_BOOL)
575 orex(0, 0, r, 0x88);
576 else if (is64_type(bt))
577 op64 = 0x89;
578 else
579 orex(0, 0, r, 0x89);
581 if (pic) {
582 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
583 if (op64)
584 o(op64);
585 o(3 + (r << 3));
586 } else if (op64) {
587 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
588 gen_modrm64(op64, r, v->r, v->sym, fc);
589 } else if (fr != r) {
590 /* XXX: don't we really come here? */
591 abort();
592 o(0xc0 + fr + r * 8); /* mov r, fr */
594 } else {
595 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
596 gen_modrm(r, v->r, v->sym, fc);
597 } else if (fr != r) {
598 /* XXX: don't we really come here? */
599 abort();
600 o(0xc0 + fr + r * 8); /* mov r, fr */
605 /* 'is_jmp' is '1' if it is a jump */
606 static void gcall_or_jmp(int is_jmp)
608 int r;
609 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
610 /* constant case */
611 if (vtop->r & VT_SYM) {
612 /* relocation case */
613 greloc(cur_text_section, vtop->sym,
614 ind + 1, R_X86_64_PC32);
615 } else {
616 /* put an empty PC32 relocation */
617 put_elf_reloc(symtab_section, cur_text_section,
618 ind + 1, R_X86_64_PC32, 0);
620 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
621 } else {
622 /* otherwise, indirect call */
623 r = TREG_R11;
624 load(r, vtop);
625 o(0x41); /* REX */
626 o(0xff); /* call/jmp *r */
627 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
631 #ifdef TCC_TARGET_PE
633 #define REGN 4
634 static const uint8_t arg_regs[REGN] = {
635 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
638 /* Prepare arguments in R10 and R11 rather than RCX and RDX
639 because gv() will not ever use these */
640 static int arg_prepare_reg(int idx) {
641 if (idx == 0 || idx == 1)
642 /* idx=0: r10, idx=1: r11 */
643 return idx + 10;
644 else
645 return arg_regs[idx];
648 static int func_scratch;
650 /* Generate function call. The function address is pushed first, then
651 all the parameters in call order. This functions pops all the
652 parameters and the function address. */
654 void gen_offs_sp(int b, int r, int d)
656 orex(1,0,r & 0x100 ? 0 : r, b);
657 if (d == (char)d) {
658 o(0x2444 | (REG_VALUE(r) << 3));
659 g(d);
660 } else {
661 o(0x2484 | (REG_VALUE(r) << 3));
662 gen_le32(d);
666 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
667 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
668 *ret_align = 1; // Never have to re-align return values for x86-64
669 int size, align;
670 size = type_size(vt, &align);
671 ret->ref = NULL;
672 if (size > 8) {
673 return 1;
674 } else if (size > 4) {
675 ret->t = VT_LLONG;
676 return 0;
677 } else if (size > 2) {
678 ret->t = VT_INT;
679 return 0;
680 } else if (size > 1) {
681 ret->t = VT_SHORT;
682 return 0;
683 } else {
684 ret->t = VT_BYTE;
685 return 0;
689 int gfunc_arg_size(CType *type) {
690 if (type->t & (VT_ARRAY|VT_BITFIELD))
691 return 8;
692 int align;
693 return type_size(type, &align);
696 void gfunc_call(int nb_args)
698 int size, r, args_size, i, d, bt, struct_size;
699 int arg;
701 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
702 arg = nb_args;
704 /* for struct arguments, we need to call memcpy and the function
705 call breaks register passing arguments we are preparing.
706 So, we process arguments which will be passed by stack first. */
707 struct_size = args_size;
708 for(i = 0; i < nb_args; i++) {
709 --arg;
711 SValue *sv = &vtop[-i];
712 bt = (sv->type.t & VT_BTYPE);
713 size = gfunc_arg_size(&sv->type);
715 if (size <= 8)
716 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
718 if (bt == VT_STRUCT) {
719 /* align to stack align size */
720 size = (size + 15) & ~15;
721 /* generate structure store */
722 r = get_reg(RC_INT);
723 gen_offs_sp(0x8d, r, struct_size);
724 struct_size += size;
726 /* generate memcpy call */
727 vset(&sv->type, r | VT_LVAL, 0);
728 vpushv(sv);
729 vstore();
730 --vtop;
731 } else if (bt == VT_LDOUBLE) {
732 gv(RC_ST0);
733 gen_offs_sp(0xdb, 0x107, struct_size);
734 struct_size += 16;
738 if (func_scratch < struct_size)
739 func_scratch = struct_size;
741 arg = nb_args;
742 struct_size = args_size;
744 for(i = 0; i < nb_args; i++) {
745 --arg;
746 bt = (vtop->type.t & VT_BTYPE);
748 size = gfunc_arg_size(&vtop->type);
749 if (size > 8) {
750 /* align to stack align size */
751 size = (size + 15) & ~15;
752 if (arg >= REGN) {
753 d = get_reg(RC_INT);
754 gen_offs_sp(0x8d, d, struct_size);
755 gen_offs_sp(0x89, d, arg*8);
756 } else {
757 d = arg_prepare_reg(arg);
758 gen_offs_sp(0x8d, d, struct_size);
760 struct_size += size;
761 } else {
762 if (is_sse_float(vtop->type.t)) {
763 gv(RC_XMM0); /* only use one float register */
764 if (arg >= REGN) {
765 /* movq %xmm0, j*8(%rsp) */
766 gen_offs_sp(0xd60f66, 0x100, arg*8);
767 } else {
768 /* movaps %xmm0, %xmmN */
769 o(0x280f);
770 o(0xc0 + (arg << 3));
771 d = arg_prepare_reg(arg);
772 /* mov %xmm0, %rxx */
773 o(0x66);
774 orex(1,d,0, 0x7e0f);
775 o(0xc0 + REG_VALUE(d));
777 } else {
778 if (bt == VT_STRUCT) {
779 vtop->type.ref = NULL;
780 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
781 : size > 1 ? VT_SHORT : VT_BYTE;
784 r = gv(RC_INT);
785 if (arg >= REGN) {
786 gen_offs_sp(0x89, r, arg*8);
787 } else {
788 d = arg_prepare_reg(arg);
789 orex(1,d,r,0x89); /* mov */
790 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
794 vtop--;
796 save_regs(0);
798 /* Copy R10 and R11 into RCX and RDX, respectively */
799 if (nb_args > 0) {
800 o(0xd1894c); /* mov %r10, %rcx */
801 if (nb_args > 1) {
802 o(0xda894c); /* mov %r11, %rdx */
806 gcall_or_jmp(0);
807 vtop--;
811 #define FUNC_PROLOG_SIZE 11
813 /* generate function prolog of type 't' */
814 void gfunc_prolog(CType *func_type)
816 int addr, reg_param_index, bt, size;
817 Sym *sym;
818 CType *type;
820 func_ret_sub = 0;
821 func_scratch = 0;
822 loc = 0;
824 addr = PTR_SIZE * 2;
825 ind += FUNC_PROLOG_SIZE;
826 func_sub_sp_offset = ind;
827 reg_param_index = 0;
829 sym = func_type->ref;
831 /* if the function returns a structure, then add an
832 implicit pointer parameter */
833 func_vt = sym->type;
834 size = gfunc_arg_size(&func_vt);
835 if (size > 8) {
836 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
837 func_vc = addr;
838 reg_param_index++;
839 addr += 8;
842 /* define parameters */
843 while ((sym = sym->next) != NULL) {
844 type = &sym->type;
845 bt = type->t & VT_BTYPE;
846 size = gfunc_arg_size(type);
847 if (size > 8) {
848 if (reg_param_index < REGN) {
849 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
851 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
852 } else {
853 if (reg_param_index < REGN) {
854 /* save arguments passed by register */
855 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
856 o(0xd60f66); /* movq */
857 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
858 } else {
859 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
862 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
864 addr += 8;
865 reg_param_index++;
868 while (reg_param_index < REGN) {
869 if (func_type->ref->c == FUNC_ELLIPSIS) {
870 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
871 addr += 8;
873 reg_param_index++;
877 /* generate function epilog */
878 void gfunc_epilog(void)
880 int v, saved_ind;
882 o(0xc9); /* leave */
883 if (func_ret_sub == 0) {
884 o(0xc3); /* ret */
885 } else {
886 o(0xc2); /* ret n */
887 g(func_ret_sub);
888 g(func_ret_sub >> 8);
891 saved_ind = ind;
892 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
893 /* align local size to word & save local variables */
894 v = (func_scratch + -loc + 15) & -16;
896 if (v >= 4096) {
897 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
898 oad(0xb8, v); /* mov stacksize, %eax */
899 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
900 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
901 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
902 } else {
903 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
904 o(0xec8148); /* sub rsp, stacksize */
905 gen_le32(v);
908 cur_text_section->data_offset = saved_ind;
909 pe_add_unwind_data(ind, saved_ind, v);
910 ind = cur_text_section->data_offset;
913 #else
915 static void gadd_sp(int val)
917 if (val == (char)val) {
918 o(0xc48348);
919 g(val);
920 } else {
921 oad(0xc48148, val); /* add $xxx, %rsp */
925 typedef enum X86_64_Mode {
926 x86_64_mode_none,
927 x86_64_mode_memory,
928 x86_64_mode_integer,
929 x86_64_mode_sse,
930 x86_64_mode_x87
931 } X86_64_Mode;
933 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
934 if (a == b)
935 return a;
936 else if (a == x86_64_mode_none)
937 return b;
938 else if (b == x86_64_mode_none)
939 return a;
940 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
941 return x86_64_mode_memory;
942 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
943 return x86_64_mode_integer;
944 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
945 return x86_64_mode_memory;
946 else
947 return x86_64_mode_sse;
950 static X86_64_Mode classify_x86_64_inner(CType *ty) {
951 X86_64_Mode mode;
952 Sym *f;
954 switch (ty->t & VT_BTYPE) {
955 case VT_VOID: return x86_64_mode_none;
957 case VT_INT:
958 case VT_BYTE:
959 case VT_SHORT:
960 case VT_LLONG:
961 case VT_BOOL:
962 case VT_PTR:
963 case VT_FUNC:
964 case VT_ENUM: return x86_64_mode_integer;
966 case VT_FLOAT:
967 case VT_DOUBLE: return x86_64_mode_sse;
969 case VT_LDOUBLE: return x86_64_mode_x87;
971 case VT_STRUCT:
972 f = ty->ref;
974 // Detect union
975 if (f->next && (f->c == f->next->c))
976 return x86_64_mode_memory;
978 mode = x86_64_mode_none;
979 for (; f; f = f->next)
980 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
982 return mode;
985 assert(0);
988 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) {
989 X86_64_Mode mode;
990 int size, align, ret_t;
992 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
993 *psize = 8;
994 *reg_count = 1;
995 ret_t = ty->t;
996 mode = x86_64_mode_integer;
997 } else {
998 size = type_size(ty, &align);
999 *psize = (size + 7) & ~7;
1000 *palign = (align + 7) & ~7;
1002 if (size > 16) {
1003 mode = x86_64_mode_memory;
1004 } else {
1005 mode = classify_x86_64_inner(ty);
1006 switch (mode) {
1007 case x86_64_mode_integer:
1008 if (size > 8) {
1009 *reg_count = 2;
1010 ret_t = VT_QLONG;
1011 } else {
1012 *reg_count = 1;
1013 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1015 break;
1017 case x86_64_mode_x87:
1018 *reg_count = 1;
1019 ret_t = VT_LDOUBLE;
1020 break;
1022 case x86_64_mode_sse:
1023 if (size > 8) {
1024 *reg_count = 2;
1025 ret_t = VT_QFLOAT;
1026 } else {
1027 *reg_count = 1;
1028 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1030 break;
1035 if (ret) {
1036 ret->ref = NULL;
1037 ret->t = ret_t;
1040 return mode;
1043 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
1044 /* This definition must be synced with stdarg.h */
1045 enum __va_arg_type {
1046 __va_gen_reg, __va_float_reg, __va_stack
1048 int size, align, reg_count;
1049 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1050 switch (mode) {
1051 default: return __va_stack;
1052 case x86_64_mode_integer: return __va_gen_reg;
1053 case x86_64_mode_sse: return __va_float_reg;
1057 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1058 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
1059 int size, align, reg_count;
1060 *ret_align = 1; // Never have to re-align return values for x86-64
1061 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) == x86_64_mode_memory);
1064 #define REGN 6
1065 static const uint8_t arg_regs[REGN] = {
1066 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1069 static int arg_prepare_reg(int idx) {
1070 if (idx == 2 || idx == 3)
1071 /* idx=2: r10, idx=3: r11 */
1072 return idx + 8;
1073 else
1074 return arg_regs[idx];
1077 /* Generate function call. The function address is pushed first, then
1078 all the parameters in call order. This functions pops all the
1079 parameters and the function address. */
1080 void gfunc_call(int nb_args)
1082 X86_64_Mode mode;
1083 CType type;
1084 int size, align, r, args_size, stack_adjust, run_start, run_end, i, j, reg_count;
1085 int nb_reg_args = 0;
1086 int nb_sse_args = 0;
1087 int sse_reg, gen_reg;
1089 /* calculate the number of integer/float register arguments */
1090 for(i = 0; i < nb_args; i++) {
1091 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1092 if (mode == x86_64_mode_sse)
1093 nb_sse_args += reg_count;
1094 else if (mode == x86_64_mode_integer)
1095 nb_reg_args += reg_count;
1098 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1099 and ended by a 16-byte aligned argument. This is because, from the point of view of
1100 the callee, argument alignment is computed from the bottom up. */
1101 /* for struct arguments, we need to call memcpy and the function
1102 call breaks register passing arguments we are preparing.
1103 So, we process arguments which will be passed by stack first. */
1104 gen_reg = nb_reg_args;
1105 sse_reg = nb_sse_args;
1106 run_start = 0;
1107 args_size = 0;
1108 while (run_start != nb_args) {
1109 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1111 run_end = nb_args;
1112 stack_adjust = 0;
1113 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1114 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1115 switch (mode) {
1116 case x86_64_mode_memory:
1117 case x86_64_mode_x87:
1118 stack_arg:
1119 if (align == 16)
1120 run_end = i;
1121 else
1122 stack_adjust += size;
1123 break;
1125 case x86_64_mode_sse:
1126 sse_reg -= reg_count;
1127 if (sse_reg + reg_count > 8) goto stack_arg;
1128 break;
1130 case x86_64_mode_integer:
1131 gen_reg -= reg_count;
1132 if (gen_reg + reg_count > REGN) goto stack_arg;
1133 break;
1137 gen_reg = run_gen_reg;
1138 sse_reg = run_sse_reg;
1140 /* adjust stack to align SSE boundary */
1141 if (stack_adjust &= 15) {
1142 /* fetch cpu flag before the following sub will change the value */
1143 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1144 gv(RC_INT);
1146 stack_adjust = 16 - stack_adjust;
1147 o(0x48);
1148 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1149 args_size += stack_adjust;
1152 for(i = run_start; i < run_end;) {
1153 /* Swap argument to top, it will possibly be changed here,
1154 and might use more temps. At the end of the loop we keep
1155 in on the stack and swap it back to its original position
1156 if it is a register. */
1157 SValue tmp = vtop[0];
1158 vtop[0] = vtop[-i];
1159 vtop[-i] = tmp;
1161 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, &reg_count);
1163 int arg_stored = 1;
1164 switch (vtop->type.t & VT_BTYPE) {
1165 case VT_STRUCT:
1166 if (mode == x86_64_mode_sse) {
1167 if (sse_reg > 8)
1168 sse_reg -= reg_count;
1169 else
1170 arg_stored = 0;
1171 } else if (mode == x86_64_mode_integer) {
1172 if (gen_reg > REGN)
1173 gen_reg -= reg_count;
1174 else
1175 arg_stored = 0;
1178 if (arg_stored) {
1179 /* allocate the necessary size on stack */
1180 o(0x48);
1181 oad(0xec81, size); /* sub $xxx, %rsp */
1182 /* generate structure store */
1183 r = get_reg(RC_INT);
1184 orex(1, r, 0, 0x89); /* mov %rsp, r */
1185 o(0xe0 + REG_VALUE(r));
1186 vset(&vtop->type, r | VT_LVAL, 0);
1187 vswap();
1188 vstore();
1189 args_size += size;
1191 break;
1193 case VT_LDOUBLE:
1194 assert(0);
1195 break;
1197 case VT_FLOAT:
1198 case VT_DOUBLE:
1199 assert(mode == x86_64_mode_sse);
1200 if (sse_reg > 8) {
1201 --sse_reg;
1202 r = gv(RC_FLOAT);
1203 o(0x50); /* push $rax */
1204 /* movq %xmmN, (%rsp) */
1205 o(0xd60f66);
1206 o(0x04 + REG_VALUE(r)*8);
1207 o(0x24);
1208 args_size += size;
1209 } else {
1210 arg_stored = 0;
1212 break;
1214 default:
1215 assert(mode == x86_64_mode_integer);
1216 /* simple type */
1217 /* XXX: implicit cast ? */
1218 if (gen_reg > REGN) {
1219 --gen_reg;
1220 r = gv(RC_INT);
1221 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1222 args_size += size;
1223 } else {
1224 arg_stored = 0;
1226 break;
1229 /* And swap the argument back to it's original position. */
1230 tmp = vtop[0];
1231 vtop[0] = vtop[-i];
1232 vtop[-i] = tmp;
1234 if (arg_stored) {
1235 vrotb(i+1);
1236 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1237 vpop();
1238 --nb_args;
1239 --run_end;
1240 } else {
1241 ++i;
1245 /* handle 16 byte aligned arguments at end of run */
1246 run_start = i = run_end;
1247 while (i < nb_args) {
1248 /* Rotate argument to top since it will always be popped */
1249 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1250 if (align != 16)
1251 break;
1253 vrotb(i+1);
1255 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1256 gv(RC_ST0);
1257 oad(0xec8148, size); /* sub $xxx, %rsp */
1258 o(0x7cdb); /* fstpt 0(%rsp) */
1259 g(0x24);
1260 g(0x00);
1261 args_size += size;
1262 } else {
1263 assert(mode == x86_64_mode_memory);
1265 /* allocate the necessary size on stack */
1266 o(0x48);
1267 oad(0xec81, size); /* sub $xxx, %rsp */
1268 /* generate structure store */
1269 r = get_reg(RC_INT);
1270 orex(1, r, 0, 0x89); /* mov %rsp, r */
1271 o(0xe0 + REG_VALUE(r));
1272 vset(&vtop->type, r | VT_LVAL, 0);
1273 vswap();
1274 vstore();
1275 args_size += size;
1278 vpop();
1279 --nb_args;
1283 /* XXX This should be superfluous. */
1284 save_regs(0); /* save used temporary registers */
1286 /* then, we prepare register passing arguments.
1287 Note that we cannot set RDX and RCX in this loop because gv()
1288 may break these temporary registers. Let's use R10 and R11
1289 instead of them */
1290 assert(gen_reg <= REGN);
1291 assert(sse_reg <= 8);
1292 for(i = 0; i < nb_args; i++) {
1293 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1294 /* Alter stack entry type so that gv() knows how to treat it */
1295 vtop->type = type;
1296 if (mode == x86_64_mode_sse) {
1297 if (reg_count == 2) {
1298 sse_reg -= 2;
1299 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1300 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1301 /* movaps %xmm0, %xmmN */
1302 o(0x280f);
1303 o(0xc0 + (sse_reg << 3));
1304 /* movaps %xmm1, %xmmN */
1305 o(0x280f);
1306 o(0xc1 + ((sse_reg+1) << 3));
1308 } else {
1309 assert(reg_count == 1);
1310 --sse_reg;
1311 /* Load directly to register */
1312 gv(RC_XMM0 << sse_reg);
1314 } else if (mode == x86_64_mode_integer) {
1315 /* simple type */
1316 /* XXX: implicit cast ? */
1317 gen_reg -= reg_count;
1318 r = gv(RC_INT);
1319 int d = arg_prepare_reg(gen_reg);
1320 orex(1,d,r,0x89); /* mov */
1321 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1322 if (reg_count == 2) {
1323 d = arg_prepare_reg(gen_reg+1);
1324 orex(1,d,vtop->r2,0x89); /* mov */
1325 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1328 vtop--;
1330 assert(gen_reg == 0);
1331 assert(sse_reg == 0);
1333 /* We shouldn't have many operands on the stack anymore, but the
1334 call address itself is still there, and it might be in %eax
1335 (or edx/ecx) currently, which the below writes would clobber.
1336 So evict all remaining operands here. */
1337 save_regs(0);
1339 /* Copy R10 and R11 into RDX and RCX, respectively */
1340 if (nb_reg_args > 2) {
1341 o(0xd2894c); /* mov %r10, %rdx */
1342 if (nb_reg_args > 3) {
1343 o(0xd9894c); /* mov %r11, %rcx */
1347 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1348 gcall_or_jmp(0);
1349 if (args_size)
1350 gadd_sp(args_size);
1351 vtop--;
1355 #define FUNC_PROLOG_SIZE 11
1357 static void push_arg_reg(int i) {
1358 loc -= 8;
1359 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1362 /* generate function prolog of type 't' */
1363 void gfunc_prolog(CType *func_type)
1365 X86_64_Mode mode;
1366 int i, addr, align, size, reg_count;
1367 int param_addr, reg_param_index, sse_param_index;
1368 Sym *sym;
1369 CType *type;
1371 sym = func_type->ref;
1372 addr = PTR_SIZE * 2;
1373 loc = 0;
1374 ind += FUNC_PROLOG_SIZE;
1375 func_sub_sp_offset = ind;
1376 func_ret_sub = 0;
1378 if (func_type->ref->c == FUNC_ELLIPSIS) {
1379 int seen_reg_num, seen_sse_num, seen_stack_size;
1380 seen_reg_num = seen_sse_num = 0;
1381 /* frame pointer and return address */
1382 seen_stack_size = PTR_SIZE * 2;
1383 /* count the number of seen parameters */
1384 sym = func_type->ref;
1385 while ((sym = sym->next) != NULL) {
1386 type = &sym->type;
1387 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1388 switch (mode) {
1389 default:
1390 stack_arg:
1391 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1392 break;
1394 case x86_64_mode_integer:
1395 if (seen_reg_num + reg_count <= 8) {
1396 seen_reg_num += reg_count;
1397 } else {
1398 seen_reg_num = 8;
1399 goto stack_arg;
1401 break;
1403 case x86_64_mode_sse:
1404 if (seen_sse_num + reg_count <= 8) {
1405 seen_sse_num += reg_count;
1406 } else {
1407 seen_sse_num = 8;
1408 goto stack_arg;
1410 break;
1414 loc -= 16;
1415 /* movl $0x????????, -0x10(%rbp) */
1416 o(0xf045c7);
1417 gen_le32(seen_reg_num * 8);
1418 /* movl $0x????????, -0xc(%rbp) */
1419 o(0xf445c7);
1420 gen_le32(seen_sse_num * 16 + 48);
1421 /* movl $0x????????, -0x8(%rbp) */
1422 o(0xf845c7);
1423 gen_le32(seen_stack_size);
1425 /* save all register passing arguments */
1426 for (i = 0; i < 8; i++) {
1427 loc -= 16;
1428 o(0xd60f66); /* movq */
1429 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1430 /* movq $0, loc+8(%rbp) */
1431 o(0x85c748);
1432 gen_le32(loc + 8);
1433 gen_le32(0);
1435 for (i = 0; i < REGN; i++) {
1436 push_arg_reg(REGN-1-i);
1440 sym = func_type->ref;
1441 reg_param_index = 0;
1442 sse_param_index = 0;
1444 /* if the function returns a structure, then add an
1445 implicit pointer parameter */
1446 func_vt = sym->type;
1447 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1448 if (mode == x86_64_mode_memory) {
1449 push_arg_reg(reg_param_index);
1450 func_vc = loc;
1451 reg_param_index++;
1453 /* define parameters */
1454 while ((sym = sym->next) != NULL) {
1455 type = &sym->type;
1456 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1457 switch (mode) {
1458 case x86_64_mode_sse:
1459 if (sse_param_index + reg_count <= 8) {
1460 /* save arguments passed by register */
1461 loc -= reg_count * 8;
1462 param_addr = loc;
1463 for (i = 0; i < reg_count; ++i) {
1464 o(0xd60f66); /* movq */
1465 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1466 ++sse_param_index;
1468 } else {
1469 addr = (addr + align - 1) & -align;
1470 param_addr = addr;
1471 addr += size;
1472 sse_param_index += reg_count;
1474 break;
1476 case x86_64_mode_memory:
1477 case x86_64_mode_x87:
1478 addr = (addr + align - 1) & -align;
1479 param_addr = addr;
1480 addr += size;
1481 break;
1483 case x86_64_mode_integer: {
1484 if (reg_param_index + reg_count <= REGN) {
1485 /* save arguments passed by register */
1486 loc -= reg_count * 8;
1487 param_addr = loc;
1488 for (i = 0; i < reg_count; ++i) {
1489 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1490 ++reg_param_index;
1492 } else {
1493 addr = (addr + align - 1) & -align;
1494 param_addr = addr;
1495 addr += size;
1496 reg_param_index += reg_count;
1498 break;
1501 sym_push(sym->v & ~SYM_FIELD, type,
1502 VT_LOCAL | VT_LVAL, param_addr);
1506 /* generate function epilog */
1507 void gfunc_epilog(void)
1509 int v, saved_ind;
1511 o(0xc9); /* leave */
1512 if (func_ret_sub == 0) {
1513 o(0xc3); /* ret */
1514 } else {
1515 o(0xc2); /* ret n */
1516 g(func_ret_sub);
1517 g(func_ret_sub >> 8);
1519 /* align local size to word & save local variables */
1520 v = (-loc + 15) & -16;
1521 saved_ind = ind;
1522 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1523 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1524 o(0xec8148); /* sub rsp, stacksize */
1525 gen_le32(v);
1526 ind = saved_ind;
1529 #endif /* not PE */
1531 /* generate a jump to a label */
1532 int gjmp(int t)
1534 return psym(0xe9, t);
1537 /* generate a jump to a fixed address */
1538 void gjmp_addr(int a)
1540 int r;
1541 r = a - ind - 2;
1542 if (r == (char)r) {
1543 g(0xeb);
1544 g(r);
1545 } else {
1546 oad(0xe9, a - ind - 5);
1550 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1551 int gtst(int inv, int t)
1553 int v, *p;
1555 v = vtop->r & VT_VALMASK;
1556 if (v == VT_CMP) {
1557 /* fast case : can jump directly since flags are set */
1558 if (vtop->c.i & 0x100)
1560 /* This was a float compare. If the parity flag is set
1561 the result was unordered. For anything except != this
1562 means false and we don't jump (anding both conditions).
1563 For != this means true (oring both).
1564 Take care about inverting the test. We need to jump
1565 to our target if the result was unordered and test wasn't NE,
1566 otherwise if unordered we don't want to jump. */
1567 vtop->c.i &= ~0x100;
1568 if (!inv == (vtop->c.i != TOK_NE))
1569 o(0x067a); /* jp +6 */
1570 else
1572 g(0x0f);
1573 t = psym(0x8a, t); /* jp t */
1576 g(0x0f);
1577 t = psym((vtop->c.i - 16) ^ inv, t);
1578 } else if (v == VT_JMP || v == VT_JMPI) {
1579 /* && or || optimization */
1580 if ((v & 1) == inv) {
1581 /* insert vtop->c jump list in t */
1582 p = &vtop->c.i;
1583 while (*p != 0)
1584 p = (int *)(cur_text_section->data + *p);
1585 *p = t;
1586 t = vtop->c.i;
1587 } else {
1588 t = gjmp(t);
1589 gsym(vtop->c.i);
1591 } else {
1592 if (is_float(vtop->type.t) ||
1593 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1594 vpushi(0);
1595 gen_op(TOK_NE);
1597 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1598 /* constant jmp optimization */
1599 if ((vtop->c.i != 0) != inv)
1600 t = gjmp(t);
1601 } else {
1602 v = gv(RC_INT);
1603 orex(0,v,v,0x85);
1604 o(0xc0 + REG_VALUE(v) * 9);
1605 g(0x0f);
1606 t = psym(0x85 ^ inv, t);
1609 vtop--;
1610 return t;
1613 /* generate an integer binary operation */
1614 void gen_opi(int op)
1616 int r, fr, opc, c;
1617 int ll, uu, cc;
1619 ll = is64_type(vtop[-1].type.t);
1620 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1621 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1623 switch(op) {
1624 case '+':
1625 case TOK_ADDC1: /* add with carry generation */
1626 opc = 0;
1627 gen_op8:
1628 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1629 /* constant case */
1630 vswap();
1631 r = gv(RC_INT);
1632 vswap();
1633 c = vtop->c.i;
1634 if (c == (char)c) {
1635 /* XXX: generate inc and dec for smaller code ? */
1636 orex(ll, r, 0, 0x83);
1637 o(0xc0 | (opc << 3) | REG_VALUE(r));
1638 g(c);
1639 } else {
1640 orex(ll, r, 0, 0x81);
1641 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1643 } else {
1644 gv2(RC_INT, RC_INT);
1645 r = vtop[-1].r;
1646 fr = vtop[0].r;
1647 orex(ll, r, fr, (opc << 3) | 0x01);
1648 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1650 vtop--;
1651 if (op >= TOK_ULT && op <= TOK_GT) {
1652 vtop->r = VT_CMP;
1653 vtop->c.i = op;
1655 break;
1656 case '-':
1657 case TOK_SUBC1: /* sub with carry generation */
1658 opc = 5;
1659 goto gen_op8;
1660 case TOK_ADDC2: /* add with carry use */
1661 opc = 2;
1662 goto gen_op8;
1663 case TOK_SUBC2: /* sub with carry use */
1664 opc = 3;
1665 goto gen_op8;
1666 case '&':
1667 opc = 4;
1668 goto gen_op8;
1669 case '^':
1670 opc = 6;
1671 goto gen_op8;
1672 case '|':
1673 opc = 1;
1674 goto gen_op8;
1675 case '*':
1676 gv2(RC_INT, RC_INT);
1677 r = vtop[-1].r;
1678 fr = vtop[0].r;
1679 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1680 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1681 vtop--;
1682 break;
1683 case TOK_SHL:
1684 opc = 4;
1685 goto gen_shift;
1686 case TOK_SHR:
1687 opc = 5;
1688 goto gen_shift;
1689 case TOK_SAR:
1690 opc = 7;
1691 gen_shift:
1692 opc = 0xc0 | (opc << 3);
1693 if (cc) {
1694 /* constant case */
1695 vswap();
1696 r = gv(RC_INT);
1697 vswap();
1698 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1699 o(opc | REG_VALUE(r));
1700 g(vtop->c.i & (ll ? 63 : 31));
1701 } else {
1702 /* we generate the shift in ecx */
1703 gv2(RC_INT, RC_RCX);
1704 r = vtop[-1].r;
1705 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1706 o(opc | REG_VALUE(r));
1708 vtop--;
1709 break;
1710 case TOK_UDIV:
1711 case TOK_UMOD:
1712 uu = 1;
1713 goto divmod;
1714 case '/':
1715 case '%':
1716 case TOK_PDIV:
1717 uu = 0;
1718 divmod:
1719 /* first operand must be in eax */
1720 /* XXX: need better constraint for second operand */
1721 gv2(RC_RAX, RC_RCX);
1722 r = vtop[-1].r;
1723 fr = vtop[0].r;
1724 vtop--;
1725 save_reg(TREG_RDX);
1726 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1727 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1728 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1729 if (op == '%' || op == TOK_UMOD)
1730 r = TREG_RDX;
1731 else
1732 r = TREG_RAX;
1733 vtop->r = r;
1734 break;
1735 default:
1736 opc = 7;
1737 goto gen_op8;
1741 void gen_opl(int op)
1743 gen_opi(op);
1746 /* generate a floating point operation 'v = t1 op t2' instruction. The
1747 two operands are guaranted to have the same floating point type */
1748 /* XXX: need to use ST1 too */
1749 void gen_opf(int op)
1751 int a, ft, fc, swapped, r;
1752 int float_type =
1753 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1755 /* convert constants to memory references */
1756 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1757 vswap();
1758 gv(float_type);
1759 vswap();
1761 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1762 gv(float_type);
1764 /* must put at least one value in the floating point register */
1765 if ((vtop[-1].r & VT_LVAL) &&
1766 (vtop[0].r & VT_LVAL)) {
1767 vswap();
1768 gv(float_type);
1769 vswap();
1771 swapped = 0;
1772 /* swap the stack if needed so that t1 is the register and t2 is
1773 the memory reference */
1774 if (vtop[-1].r & VT_LVAL) {
1775 vswap();
1776 swapped = 1;
1778 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1779 if (op >= TOK_ULT && op <= TOK_GT) {
1780 /* load on stack second operand */
1781 load(TREG_ST0, vtop);
1782 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1783 if (op == TOK_GE || op == TOK_GT)
1784 swapped = !swapped;
1785 else if (op == TOK_EQ || op == TOK_NE)
1786 swapped = 0;
1787 if (swapped)
1788 o(0xc9d9); /* fxch %st(1) */
1789 o(0xe9da); /* fucompp */
1790 o(0xe0df); /* fnstsw %ax */
1791 if (op == TOK_EQ) {
1792 o(0x45e480); /* and $0x45, %ah */
1793 o(0x40fC80); /* cmp $0x40, %ah */
1794 } else if (op == TOK_NE) {
1795 o(0x45e480); /* and $0x45, %ah */
1796 o(0x40f480); /* xor $0x40, %ah */
1797 op = TOK_NE;
1798 } else if (op == TOK_GE || op == TOK_LE) {
1799 o(0x05c4f6); /* test $0x05, %ah */
1800 op = TOK_EQ;
1801 } else {
1802 o(0x45c4f6); /* test $0x45, %ah */
1803 op = TOK_EQ;
1805 vtop--;
1806 vtop->r = VT_CMP;
1807 vtop->c.i = op;
1808 } else {
1809 /* no memory reference possible for long double operations */
1810 load(TREG_ST0, vtop);
1811 swapped = !swapped;
1813 switch(op) {
1814 default:
1815 case '+':
1816 a = 0;
1817 break;
1818 case '-':
1819 a = 4;
1820 if (swapped)
1821 a++;
1822 break;
1823 case '*':
1824 a = 1;
1825 break;
1826 case '/':
1827 a = 6;
1828 if (swapped)
1829 a++;
1830 break;
1832 ft = vtop->type.t;
1833 fc = vtop->c.ul;
1834 o(0xde); /* fxxxp %st, %st(1) */
1835 o(0xc1 + (a << 3));
1836 vtop--;
1838 } else {
1839 if (op >= TOK_ULT && op <= TOK_GT) {
1840 /* if saved lvalue, then we must reload it */
1841 r = vtop->r;
1842 fc = vtop->c.ul;
1843 if ((r & VT_VALMASK) == VT_LLOCAL) {
1844 SValue v1;
1845 r = get_reg(RC_INT);
1846 v1.type.t = VT_PTR;
1847 v1.r = VT_LOCAL | VT_LVAL;
1848 v1.c.ul = fc;
1849 load(r, &v1);
1850 fc = 0;
1853 if (op == TOK_EQ || op == TOK_NE) {
1854 swapped = 0;
1855 } else {
1856 if (op == TOK_LE || op == TOK_LT)
1857 swapped = !swapped;
1858 if (op == TOK_LE || op == TOK_GE) {
1859 op = 0x93; /* setae */
1860 } else {
1861 op = 0x97; /* seta */
1865 if (swapped) {
1866 gv(RC_FLOAT);
1867 vswap();
1869 assert(!(vtop[-1].r & VT_LVAL));
1871 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1872 o(0x66);
1873 o(0x2e0f); /* ucomisd */
1875 if (vtop->r & VT_LVAL) {
1876 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1877 } else {
1878 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1881 vtop--;
1882 vtop->r = VT_CMP;
1883 vtop->c.i = op | 0x100;
1884 } else {
1885 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1886 switch(op) {
1887 default:
1888 case '+':
1889 a = 0;
1890 break;
1891 case '-':
1892 a = 4;
1893 break;
1894 case '*':
1895 a = 1;
1896 break;
1897 case '/':
1898 a = 6;
1899 break;
1901 ft = vtop->type.t;
1902 fc = vtop->c.ul;
1903 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1905 r = vtop->r;
1906 /* if saved lvalue, then we must reload it */
1907 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1908 SValue v1;
1909 r = get_reg(RC_INT);
1910 v1.type.t = VT_PTR;
1911 v1.r = VT_LOCAL | VT_LVAL;
1912 v1.c.ul = fc;
1913 load(r, &v1);
1914 fc = 0;
1917 assert(!(vtop[-1].r & VT_LVAL));
1918 if (swapped) {
1919 assert(vtop->r & VT_LVAL);
1920 gv(RC_FLOAT);
1921 vswap();
1924 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1925 o(0xf2);
1926 } else {
1927 o(0xf3);
1929 o(0x0f);
1930 o(0x58 + a);
1932 if (vtop->r & VT_LVAL) {
1933 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1934 } else {
1935 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1938 vtop--;
1943 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1944 and 'long long' cases. */
1945 void gen_cvt_itof(int t)
1947 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1948 save_reg(TREG_ST0);
1949 gv(RC_INT);
1950 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1951 /* signed long long to float/double/long double (unsigned case
1952 is handled generically) */
1953 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1954 o(0x242cdf); /* fildll (%rsp) */
1955 o(0x08c48348); /* add $8, %rsp */
1956 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1957 (VT_INT | VT_UNSIGNED)) {
1958 /* unsigned int to float/double/long double */
1959 o(0x6a); /* push $0 */
1960 g(0x00);
1961 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1962 o(0x242cdf); /* fildll (%rsp) */
1963 o(0x10c48348); /* add $16, %rsp */
1964 } else {
1965 /* int to float/double/long double */
1966 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1967 o(0x2404db); /* fildl (%rsp) */
1968 o(0x08c48348); /* add $8, %rsp */
1970 vtop->r = TREG_ST0;
1971 } else {
1972 int r = get_reg(RC_FLOAT);
1973 gv(RC_INT);
1974 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1975 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1976 (VT_INT | VT_UNSIGNED) ||
1977 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1978 o(0x48); /* REX */
1980 o(0x2a0f);
1981 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1982 vtop->r = r;
1986 /* convert from one floating point type to another */
1987 void gen_cvt_ftof(int t)
1989 int ft, bt, tbt;
1991 ft = vtop->type.t;
1992 bt = ft & VT_BTYPE;
1993 tbt = t & VT_BTYPE;
1995 if (bt == VT_FLOAT) {
1996 gv(RC_FLOAT);
1997 if (tbt == VT_DOUBLE) {
1998 o(0x140f); /* unpcklps */
1999 o(0xc0 + REG_VALUE(vtop->r)*9);
2000 o(0x5a0f); /* cvtps2pd */
2001 o(0xc0 + REG_VALUE(vtop->r)*9);
2002 } else if (tbt == VT_LDOUBLE) {
2003 save_reg(RC_ST0);
2004 /* movss %xmm0,-0x10(%rsp) */
2005 o(0x110ff3);
2006 o(0x44 + REG_VALUE(vtop->r)*8);
2007 o(0xf024);
2008 o(0xf02444d9); /* flds -0x10(%rsp) */
2009 vtop->r = TREG_ST0;
2011 } else if (bt == VT_DOUBLE) {
2012 gv(RC_FLOAT);
2013 if (tbt == VT_FLOAT) {
2014 o(0x140f66); /* unpcklpd */
2015 o(0xc0 + REG_VALUE(vtop->r)*9);
2016 o(0x5a0f66); /* cvtpd2ps */
2017 o(0xc0 + REG_VALUE(vtop->r)*9);
2018 } else if (tbt == VT_LDOUBLE) {
2019 save_reg(RC_ST0);
2020 /* movsd %xmm0,-0x10(%rsp) */
2021 o(0x110ff2);
2022 o(0x44 + REG_VALUE(vtop->r)*8);
2023 o(0xf024);
2024 o(0xf02444dd); /* fldl -0x10(%rsp) */
2025 vtop->r = TREG_ST0;
2027 } else {
2028 gv(RC_ST0);
2029 int r = get_reg(RC_FLOAT);
2030 if (tbt == VT_DOUBLE) {
2031 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2032 /* movsd -0x10(%rsp),%xmm0 */
2033 o(0x100ff2);
2034 o(0x44 + REG_VALUE(r)*8);
2035 o(0xf024);
2036 vtop->r = r;
2037 } else if (tbt == VT_FLOAT) {
2038 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2039 /* movss -0x10(%rsp),%xmm0 */
2040 o(0x100ff3);
2041 o(0x44 + REG_VALUE(r)*8);
2042 o(0xf024);
2043 vtop->r = r;
2048 /* convert fp to int 't' type */
2049 void gen_cvt_ftoi(int t)
2051 int ft, bt, size, r;
2052 ft = vtop->type.t;
2053 bt = ft & VT_BTYPE;
2054 if (bt == VT_LDOUBLE) {
2055 gen_cvt_ftof(VT_DOUBLE);
2056 bt = VT_DOUBLE;
2059 gv(RC_FLOAT);
2060 if (t != VT_INT)
2061 size = 8;
2062 else
2063 size = 4;
2065 r = get_reg(RC_INT);
2066 if (bt == VT_FLOAT) {
2067 o(0xf3);
2068 } else if (bt == VT_DOUBLE) {
2069 o(0xf2);
2070 } else {
2071 assert(0);
2073 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2074 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2075 vtop->r = r;
2078 /* computed goto support */
2079 void ggoto(void)
2081 gcall_or_jmp(1);
2082 vtop--;
2085 /* Save the stack pointer onto the stack and return the location of its address */
2086 ST_FUNC void gen_vla_sp_save(int addr) {
2087 /* mov %rsp,addr(%rbp)*/
2088 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2091 /* Restore the SP from a location on the stack */
2092 ST_FUNC void gen_vla_sp_restore(int addr) {
2093 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2096 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2097 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2098 #ifdef TCC_TARGET_PE
2099 /* alloca does more than just adjust %rsp on Windows */
2100 vpush_global_sym(&func_old_type, TOK_alloca);
2101 vswap(); /* Move alloca ref past allocation size */
2102 gfunc_call(1);
2103 vset(type, REG_IRET, 0);
2104 #else
2105 int r;
2106 r = gv(RC_INT); /* allocation size */
2107 /* sub r,%rsp */
2108 o(0x2b48);
2109 o(0xe0 | REG_VALUE(r));
2110 /* We align to 16 bytes rather than align */
2111 /* and ~15, %rsp */
2112 o(0xf0e48348);
2113 /* mov %rsp, r */
2114 o(0x8948);
2115 o(0xe0 | REG_VALUE(r));
2116 vpop();
2117 vset(type, r, 0);
2118 #endif
2122 /* end of x86-64 code generator */
2123 /*************************************************************/
2124 #endif /* ! TARGET_DEFS_ONLY */
2125 /******************************************************/