Add support for struct > 4B returned via registers
[tinycc.git] / x86_64-gen.c
blob09620568bf306070e024fe4ee2beeb5c7ca58186
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSP = 4,
61 TREG_RSI = 6,
62 TREG_RDI = 7,
64 TREG_R8 = 8,
65 TREG_R9 = 9,
66 TREG_R10 = 10,
67 TREG_R11 = 11,
69 TREG_XMM0 = 16,
70 TREG_XMM1 = 17,
71 TREG_XMM2 = 18,
72 TREG_XMM3 = 19,
73 TREG_XMM4 = 20,
74 TREG_XMM5 = 21,
75 TREG_XMM6 = 22,
76 TREG_XMM7 = 23,
78 TREG_ST0 = 24,
80 TREG_MEM = 0x20,
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
96 #define PTR_SIZE 8
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
102 #define MAX_ALIGN 16
104 /******************************************************/
105 /* ELF defines */
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x08048000
116 #define ELF_PAGE_SIZE 0x1000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
121 #include "tcc.h"
122 #include <assert.h>
124 ST_DATA const int reg_classes[NB_REGS] = {
125 /* eax */ RC_INT | RC_RAX,
126 /* ecx */ RC_INT | RC_RCX,
127 /* edx */ RC_INT | RC_RDX,
133 RC_R8,
134 RC_R9,
135 RC_R10,
136 RC_R11,
141 /* xmm0 */ RC_FLOAT | RC_XMM0,
142 /* xmm1 */ RC_FLOAT | RC_XMM1,
143 /* xmm2 */ RC_FLOAT | RC_XMM2,
144 /* xmm3 */ RC_FLOAT | RC_XMM3,
145 /* xmm4 */ RC_FLOAT | RC_XMM4,
146 /* xmm5 */ RC_FLOAT | RC_XMM5,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
150 RC_XMM6,
151 RC_XMM7,
152 /* st0 */ RC_ST0
155 static unsigned long func_sub_sp_offset;
156 static int func_ret_sub;
158 /* XXX: make it faster ? */
159 void g(int c)
161 int ind1;
162 ind1 = ind + 1;
163 if (ind1 > cur_text_section->data_allocated)
164 section_realloc(cur_text_section, ind1);
165 cur_text_section->data[ind] = c;
166 ind = ind1;
169 void o(unsigned int c)
171 while (c) {
172 g(c);
173 c = c >> 8;
177 void gen_le16(int v)
179 g(v);
180 g(v >> 8);
183 void gen_le32(int c)
185 g(c);
186 g(c >> 8);
187 g(c >> 16);
188 g(c >> 24);
191 void gen_le64(int64_t c)
193 g(c);
194 g(c >> 8);
195 g(c >> 16);
196 g(c >> 24);
197 g(c >> 32);
198 g(c >> 40);
199 g(c >> 48);
200 g(c >> 56);
203 void orex(int ll, int r, int r2, int b)
205 if ((r & VT_VALMASK) >= VT_CONST)
206 r = 0;
207 if ((r2 & VT_VALMASK) >= VT_CONST)
208 r2 = 0;
209 if (ll || REX_BASE(r) || REX_BASE(r2))
210 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
211 o(b);
214 /* output a symbol and patch all calls to it */
215 void gsym_addr(int t, int a)
217 int n, *ptr;
218 while (t) {
219 ptr = (int *)(cur_text_section->data + t);
220 n = *ptr; /* next value */
221 *ptr = a - t - 4;
222 t = n;
226 void gsym(int t)
228 gsym_addr(t, ind);
231 /* psym is used to put an instruction with a data field which is a
232 reference to a symbol. It is in fact the same as oad ! */
233 #define psym oad
235 static int is64_type(int t)
237 return ((t & VT_BTYPE) == VT_PTR ||
238 (t & VT_BTYPE) == VT_FUNC ||
239 (t & VT_BTYPE) == VT_LLONG);
242 /* instruction + 4 bytes data. Return the address of the data */
243 ST_FUNC int oad(int c, int s)
245 int ind1;
247 o(c);
248 ind1 = ind + 4;
249 if (ind1 > cur_text_section->data_allocated)
250 section_realloc(cur_text_section, ind1);
251 *(int *)(cur_text_section->data + ind) = s;
252 s = ind;
253 ind = ind1;
254 return s;
257 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
259 if (r & VT_SYM)
260 greloc(cur_text_section, sym, ind, R_X86_64_32);
261 gen_le32(c);
264 /* output constant with relocation if 'r & VT_SYM' is true */
265 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
267 if (r & VT_SYM)
268 greloc(cur_text_section, sym, ind, R_X86_64_64);
269 gen_le64(c);
272 /* output constant with relocation if 'r & VT_SYM' is true */
273 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
275 if (r & VT_SYM)
276 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
277 gen_le32(c-4);
280 /* output got address with relocation */
281 static void gen_gotpcrel(int r, Sym *sym, int c)
283 #ifndef TCC_TARGET_PE
284 Section *sr;
285 ElfW(Rela) *rel;
286 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
287 sr = cur_text_section->reloc;
288 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
289 rel->r_addend = -4;
290 #else
291 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
292 cur_text_section->data[ind-3],
293 cur_text_section->data[ind-2],
294 cur_text_section->data[ind-1]
296 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
297 #endif
298 gen_le32(0);
299 if (c) {
300 /* we use add c, %xxx for displacement */
301 orex(1, r, 0, 0x81);
302 o(0xc0 + REG_VALUE(r));
303 gen_le32(c);
307 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
309 op_reg = REG_VALUE(op_reg) << 3;
310 if ((r & VT_VALMASK) == VT_CONST) {
311 /* constant memory reference */
312 o(0x05 | op_reg);
313 if (is_got) {
314 gen_gotpcrel(r, sym, c);
315 } else {
316 gen_addrpc32(r, sym, c);
318 } else if ((r & VT_VALMASK) == VT_LOCAL) {
319 /* currently, we use only ebp as base */
320 if (c == (char)c) {
321 /* short reference */
322 o(0x45 | op_reg);
323 g(c);
324 } else {
325 oad(0x85 | op_reg, c);
327 } else if ((r & VT_VALMASK) >= TREG_MEM) {
328 if (c) {
329 g(0x80 | op_reg | REG_VALUE(r));
330 gen_le32(c);
331 } else {
332 g(0x00 | op_reg | REG_VALUE(r));
334 } else {
335 g(0x00 | op_reg | REG_VALUE(r));
339 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
340 opcode bits */
341 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
343 gen_modrm_impl(op_reg, r, sym, c, 0);
346 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
347 opcode bits */
348 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
350 int is_got;
351 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
352 orex(1, r, op_reg, opcode);
353 gen_modrm_impl(op_reg, r, sym, c, is_got);
357 /* load 'r' from value 'sv' */
358 void load(int r, SValue *sv)
360 int v, t, ft, fc, fr;
361 SValue v1;
363 #ifdef TCC_TARGET_PE
364 SValue v2;
365 sv = pe_getimport(sv, &v2);
366 #endif
368 fr = sv->r;
369 ft = sv->type.t;
370 fc = sv->c.ul;
372 #ifndef TCC_TARGET_PE
373 /* we use indirect access via got */
374 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
375 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
376 /* use the result register as a temporal register */
377 int tr = r | TREG_MEM;
378 if (is_float(ft)) {
379 /* we cannot use float registers as a temporal register */
380 tr = get_reg(RC_INT) | TREG_MEM;
382 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
384 /* load from the temporal register */
385 fr = tr | VT_LVAL;
387 #endif
389 v = fr & VT_VALMASK;
390 if (fr & VT_LVAL) {
391 int b, ll;
392 if (v == VT_LLOCAL) {
393 v1.type.t = VT_PTR;
394 v1.r = VT_LOCAL | VT_LVAL;
395 v1.c.ul = fc;
396 fr = r;
397 if (!(reg_classes[fr] & RC_INT))
398 fr = get_reg(RC_INT);
399 load(fr, &v1);
401 ll = 0;
402 if ((ft & VT_BTYPE) == VT_FLOAT) {
403 b = 0x6e0f66;
404 r = REG_VALUE(r); /* movd */
405 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
406 b = 0x7e0ff3; /* movq */
407 r = REG_VALUE(r);
408 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
409 b = 0xdb, r = 5; /* fldt */
410 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
411 b = 0xbe0f; /* movsbl */
412 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
413 b = 0xb60f; /* movzbl */
414 } else if ((ft & VT_TYPE) == VT_SHORT) {
415 b = 0xbf0f; /* movswl */
416 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
417 b = 0xb70f; /* movzwl */
418 } else {
419 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
420 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
421 || ((ft & VT_BTYPE) == VT_FUNC));
422 ll = is64_type(ft);
423 b = 0x8b;
425 if (ll) {
426 gen_modrm64(b, r, fr, sv->sym, fc);
427 } else {
428 orex(ll, fr, r, b);
429 gen_modrm(r, fr, sv->sym, fc);
431 } else {
432 if (v == VT_CONST) {
433 if (fr & VT_SYM) {
434 #ifdef TCC_TARGET_PE
435 orex(1,0,r,0x8d);
436 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
437 gen_addrpc32(fr, sv->sym, fc);
438 #else
439 if (sv->sym->type.t & VT_STATIC) {
440 orex(1,0,r,0x8d);
441 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
442 gen_addrpc32(fr, sv->sym, fc);
443 } else {
444 orex(1,0,r,0x8b);
445 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
446 gen_gotpcrel(r, sv->sym, fc);
448 #endif
449 } else if (is64_type(ft)) {
450 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
451 gen_le64(sv->c.ull);
452 } else {
453 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
454 gen_le32(fc);
456 } else if (v == VT_LOCAL) {
457 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
458 gen_modrm(r, VT_LOCAL, sv->sym, fc);
459 } else if (v == VT_CMP) {
460 orex(0,r,0,0);
461 if ((fc & ~0x100) != TOK_NE)
462 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
463 else
464 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
465 if (fc & 0x100)
467 /* This was a float compare. If the parity bit is
468 set the result was unordered, meaning false for everything
469 except TOK_NE, and true for TOK_NE. */
470 fc &= ~0x100;
471 o(0x037a + (REX_BASE(r) << 8));
473 orex(0,r,0, 0x0f); /* setxx %br */
474 o(fc);
475 o(0xc0 + REG_VALUE(r));
476 } else if (v == VT_JMP || v == VT_JMPI) {
477 t = v & 1;
478 orex(0,r,0,0);
479 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
480 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
481 gsym(fc);
482 orex(0,r,0,0);
483 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
484 } else if (v != r) {
485 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
486 if (v == TREG_ST0) {
487 /* gen_cvt_ftof(VT_DOUBLE); */
488 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
489 /* movsd -0x10(%rsp),%xmmN */
490 o(0x100ff2);
491 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
492 o(0xf024);
493 } else {
494 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
495 if ((ft & VT_BTYPE) == VT_FLOAT) {
496 o(0x100ff3);
497 } else {
498 assert((ft & VT_BTYPE) == VT_DOUBLE);
499 o(0x100ff2);
501 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
503 } else if (r == TREG_ST0) {
504 assert((v >= TREG_XMM0) || (v <= TREG_XMM7));
505 /* gen_cvt_ftof(VT_LDOUBLE); */
506 /* movsd %xmmN,-0x10(%rsp) */
507 o(0x110ff2);
508 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
509 o(0xf024);
510 o(0xf02444dd); /* fldl -0x10(%rsp) */
511 } else {
512 orex(1,r,v, 0x89);
513 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
519 /* store register 'r' in lvalue 'v' */
520 void store(int r, SValue *v)
522 int fr, bt, ft, fc;
523 int op64 = 0;
524 /* store the REX prefix in this variable when PIC is enabled */
525 int pic = 0;
527 #ifdef TCC_TARGET_PE
528 SValue v2;
529 v = pe_getimport(v, &v2);
530 #endif
532 ft = v->type.t;
533 fc = v->c.ul;
534 fr = v->r & VT_VALMASK;
535 bt = ft & VT_BTYPE;
537 #ifndef TCC_TARGET_PE
538 /* we need to access the variable via got */
539 if (fr == VT_CONST && (v->r & VT_SYM)) {
540 /* mov xx(%rip), %r11 */
541 o(0x1d8b4c);
542 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
543 pic = is64_type(bt) ? 0x49 : 0x41;
545 #endif
547 /* XXX: incorrect if float reg to reg */
548 if (bt == VT_FLOAT) {
549 o(0x66);
550 o(pic);
551 o(0x7e0f); /* movd */
552 r = REG_VALUE(r);
553 } else if (bt == VT_DOUBLE) {
554 o(0x66);
555 o(pic);
556 o(0xd60f); /* movq */
557 r = REG_VALUE(r);
558 } else if (bt == VT_LDOUBLE) {
559 o(0xc0d9); /* fld %st(0) */
560 o(pic);
561 o(0xdb); /* fstpt */
562 r = 7;
563 } else {
564 if (bt == VT_SHORT)
565 o(0x66);
566 o(pic);
567 if (bt == VT_BYTE || bt == VT_BOOL)
568 orex(0, 0, r, 0x88);
569 else if (is64_type(bt))
570 op64 = 0x89;
571 else
572 orex(0, 0, r, 0x89);
574 if (pic) {
575 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
576 if (op64)
577 o(op64);
578 o(3 + (r << 3));
579 } else if (op64) {
580 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
581 gen_modrm64(op64, r, v->r, v->sym, fc);
582 } else if (fr != r) {
583 /* XXX: don't we really come here? */
584 abort();
585 o(0xc0 + fr + r * 8); /* mov r, fr */
587 } else {
588 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
589 gen_modrm(r, v->r, v->sym, fc);
590 } else if (fr != r) {
591 /* XXX: don't we really come here? */
592 abort();
593 o(0xc0 + fr + r * 8); /* mov r, fr */
598 /* 'is_jmp' is '1' if it is a jump */
599 static void gcall_or_jmp(int is_jmp)
601 int r;
602 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
603 /* constant case */
604 if (vtop->r & VT_SYM) {
605 /* relocation case */
606 greloc(cur_text_section, vtop->sym,
607 ind + 1, R_X86_64_PC32);
608 } else {
609 /* put an empty PC32 relocation */
610 put_elf_reloc(symtab_section, cur_text_section,
611 ind + 1, R_X86_64_PC32, 0);
613 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
614 } else {
615 /* otherwise, indirect call */
616 r = TREG_R11;
617 load(r, vtop);
618 o(0x41); /* REX */
619 o(0xff); /* call/jmp *r */
620 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
624 #ifdef TCC_TARGET_PE
626 #define REGN 4
627 static const uint8_t arg_regs[REGN] = {
628 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
631 /* Prepare arguments in R10 and R11 rather than RCX and RDX
632 because gv() will not ever use these */
633 static int arg_prepare_reg(int idx) {
634 if (idx == 0 || idx == 1)
635 /* idx=0: r10, idx=1: r11 */
636 return idx + 10;
637 else
638 return arg_regs[idx];
641 static int func_scratch;
643 /* Generate function call. The function address is pushed first, then
644 all the parameters in call order. This functions pops all the
645 parameters and the function address. */
647 void gen_offs_sp(int b, int r, int d)
649 orex(1,0,r & 0x100 ? 0 : r, b);
650 if (d == (char)d) {
651 o(0x2444 | (REG_VALUE(r) << 3));
652 g(d);
653 } else {
654 o(0x2484 | (REG_VALUE(r) << 3));
655 gen_le32(d);
659 /* Return the number of registers needed to return the struct, or 0 if
660 returning via struct pointer. */
661 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align)
663 int size, align;
664 *ret_align = 1; // Never have to re-align return values for x86-64
665 size = type_size(vt, &align);
666 ret->ref = NULL;
667 if (size > 8) {
668 return 0;
669 } else if (size > 4) {
670 ret->t = VT_LLONG;
671 return 1;
672 } else if (size > 2) {
673 ret->t = VT_INT;
674 return 1;
675 } else if (size > 1) {
676 ret->t = VT_SHORT;
677 return 1;
678 } else {
679 ret->t = VT_BYTE;
680 return 1;
684 static int is_sse_float(int t) {
685 int bt;
686 bt = t & VT_BTYPE;
687 return bt == VT_DOUBLE || bt == VT_FLOAT;
690 int gfunc_arg_size(CType *type) {
691 int align;
692 if (type->t & (VT_ARRAY|VT_BITFIELD))
693 return 8;
694 return type_size(type, &align);
697 void gfunc_call(int nb_args)
699 int size, r, args_size, i, d, bt, struct_size;
700 int arg;
702 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
703 arg = nb_args;
705 /* for struct arguments, we need to call memcpy and the function
706 call breaks register passing arguments we are preparing.
707 So, we process arguments which will be passed by stack first. */
708 struct_size = args_size;
709 for(i = 0; i < nb_args; i++) {
710 SValue *sv;
712 --arg;
713 sv = &vtop[-i];
714 bt = (sv->type.t & VT_BTYPE);
715 size = gfunc_arg_size(&sv->type);
717 if (size <= 8)
718 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
720 if (bt == VT_STRUCT) {
721 /* align to stack align size */
722 size = (size + 15) & ~15;
723 /* generate structure store */
724 r = get_reg(RC_INT);
725 gen_offs_sp(0x8d, r, struct_size);
726 struct_size += size;
728 /* generate memcpy call */
729 vset(&sv->type, r | VT_LVAL, 0);
730 vpushv(sv);
731 vstore();
732 --vtop;
733 } else if (bt == VT_LDOUBLE) {
734 gv(RC_ST0);
735 gen_offs_sp(0xdb, 0x107, struct_size);
736 struct_size += 16;
740 if (func_scratch < struct_size)
741 func_scratch = struct_size;
743 arg = nb_args;
744 struct_size = args_size;
746 for(i = 0; i < nb_args; i++) {
747 --arg;
748 bt = (vtop->type.t & VT_BTYPE);
750 size = gfunc_arg_size(&vtop->type);
751 if (size > 8) {
752 /* align to stack align size */
753 size = (size + 15) & ~15;
754 if (arg >= REGN) {
755 d = get_reg(RC_INT);
756 gen_offs_sp(0x8d, d, struct_size);
757 gen_offs_sp(0x89, d, arg*8);
758 } else {
759 d = arg_prepare_reg(arg);
760 gen_offs_sp(0x8d, d, struct_size);
762 struct_size += size;
763 } else {
764 if (is_sse_float(vtop->type.t)) {
765 gv(RC_XMM0); /* only use one float register */
766 if (arg >= REGN) {
767 /* movq %xmm0, j*8(%rsp) */
768 gen_offs_sp(0xd60f66, 0x100, arg*8);
769 } else {
770 /* movaps %xmm0, %xmmN */
771 o(0x280f);
772 o(0xc0 + (arg << 3));
773 d = arg_prepare_reg(arg);
774 /* mov %xmm0, %rxx */
775 o(0x66);
776 orex(1,d,0, 0x7e0f);
777 o(0xc0 + REG_VALUE(d));
779 } else {
780 if (bt == VT_STRUCT) {
781 vtop->type.ref = NULL;
782 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
783 : size > 1 ? VT_SHORT : VT_BYTE;
786 r = gv(RC_INT);
787 if (arg >= REGN) {
788 gen_offs_sp(0x89, r, arg*8);
789 } else {
790 d = arg_prepare_reg(arg);
791 orex(1,d,r,0x89); /* mov */
792 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
796 vtop--;
798 save_regs(0);
800 /* Copy R10 and R11 into RCX and RDX, respectively */
801 if (nb_args > 0) {
802 o(0xd1894c); /* mov %r10, %rcx */
803 if (nb_args > 1) {
804 o(0xda894c); /* mov %r11, %rdx */
808 gcall_or_jmp(0);
809 vtop--;
813 #define FUNC_PROLOG_SIZE 11
815 /* generate function prolog of type 't' */
816 void gfunc_prolog(CType *func_type)
818 int addr, reg_param_index, bt, size;
819 Sym *sym;
820 CType *type;
822 func_ret_sub = 0;
823 func_scratch = 0;
824 loc = 0;
826 addr = PTR_SIZE * 2;
827 ind += FUNC_PROLOG_SIZE;
828 func_sub_sp_offset = ind;
829 reg_param_index = 0;
831 sym = func_type->ref;
833 /* if the function returns a structure, then add an
834 implicit pointer parameter */
835 func_vt = sym->type;
836 size = gfunc_arg_size(&func_vt);
837 if (size > 8) {
838 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
839 func_vc = addr;
840 reg_param_index++;
841 addr += 8;
844 /* define parameters */
845 while ((sym = sym->next) != NULL) {
846 type = &sym->type;
847 bt = type->t & VT_BTYPE;
848 size = gfunc_arg_size(type);
849 if (size > 8) {
850 if (reg_param_index < REGN) {
851 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
853 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
854 } else {
855 if (reg_param_index < REGN) {
856 /* save arguments passed by register */
857 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
858 o(0xd60f66); /* movq */
859 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
860 } else {
861 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
864 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
866 addr += 8;
867 reg_param_index++;
870 while (reg_param_index < REGN) {
871 if (func_type->ref->c == FUNC_ELLIPSIS) {
872 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
873 addr += 8;
875 reg_param_index++;
879 /* generate function epilog */
880 void gfunc_epilog(void)
882 int v, saved_ind;
884 o(0xc9); /* leave */
885 if (func_ret_sub == 0) {
886 o(0xc3); /* ret */
887 } else {
888 o(0xc2); /* ret n */
889 g(func_ret_sub);
890 g(func_ret_sub >> 8);
893 saved_ind = ind;
894 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
895 /* align local size to word & save local variables */
896 v = (func_scratch + -loc + 15) & -16;
898 if (v >= 4096) {
899 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
900 oad(0xb8, v); /* mov stacksize, %eax */
901 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
902 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
903 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
904 } else {
905 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
906 o(0xec8148); /* sub rsp, stacksize */
907 gen_le32(v);
910 cur_text_section->data_offset = saved_ind;
911 pe_add_unwind_data(ind, saved_ind, v);
912 ind = cur_text_section->data_offset;
915 #else
917 static void gadd_sp(int val)
919 if (val == (char)val) {
920 o(0xc48348);
921 g(val);
922 } else {
923 oad(0xc48148, val); /* add $xxx, %rsp */
927 typedef enum X86_64_Mode {
928 x86_64_mode_none,
929 x86_64_mode_memory,
930 x86_64_mode_integer,
931 x86_64_mode_sse,
932 x86_64_mode_x87
933 } X86_64_Mode;
935 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
936 if (a == b)
937 return a;
938 else if (a == x86_64_mode_none)
939 return b;
940 else if (b == x86_64_mode_none)
941 return a;
942 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
943 return x86_64_mode_memory;
944 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
945 return x86_64_mode_integer;
946 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
947 return x86_64_mode_memory;
948 else
949 return x86_64_mode_sse;
952 static X86_64_Mode classify_x86_64_inner(CType *ty) {
953 X86_64_Mode mode;
954 Sym *f;
956 switch (ty->t & VT_BTYPE) {
957 case VT_VOID: return x86_64_mode_none;
959 case VT_INT:
960 case VT_BYTE:
961 case VT_SHORT:
962 case VT_LLONG:
963 case VT_BOOL:
964 case VT_PTR:
965 case VT_FUNC:
966 case VT_ENUM: return x86_64_mode_integer;
968 case VT_FLOAT:
969 case VT_DOUBLE: return x86_64_mode_sse;
971 case VT_LDOUBLE: return x86_64_mode_x87;
973 case VT_STRUCT:
974 f = ty->ref;
976 // Detect union
977 if (f->next && (f->c == f->next->c))
978 return x86_64_mode_memory;
980 mode = x86_64_mode_none;
981 for (; f; f = f->next)
982 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
984 return mode;
987 assert(0);
990 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) {
991 X86_64_Mode mode;
992 int size, align, ret_t = 0;
994 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
995 *psize = 8;
996 *reg_count = 1;
997 ret_t = ty->t;
998 mode = x86_64_mode_integer;
999 } else {
1000 size = type_size(ty, &align);
1001 *psize = (size + 7) & ~7;
1002 *palign = (align + 7) & ~7;
1004 if (size > 16) {
1005 mode = x86_64_mode_memory;
1006 } else {
1007 mode = classify_x86_64_inner(ty);
1008 switch (mode) {
1009 case x86_64_mode_integer:
1010 if (size > 8) {
1011 *reg_count = 2;
1012 ret_t = VT_QLONG;
1013 } else {
1014 *reg_count = 1;
1015 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1017 break;
1019 case x86_64_mode_x87:
1020 *reg_count = 1;
1021 ret_t = VT_LDOUBLE;
1022 break;
1024 case x86_64_mode_sse:
1025 if (size > 8) {
1026 *reg_count = 2;
1027 ret_t = VT_QFLOAT;
1028 } else {
1029 *reg_count = 1;
1030 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1032 break;
1033 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1038 if (ret) {
1039 ret->ref = NULL;
1040 ret->t = ret_t;
1043 return mode;
1046 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
1047 /* This definition must be synced with stdarg.h */
1048 enum __va_arg_type {
1049 __va_gen_reg, __va_float_reg, __va_stack
1051 int size, align, reg_count;
1052 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1053 switch (mode) {
1054 default: return __va_stack;
1055 case x86_64_mode_integer: return __va_gen_reg;
1056 case x86_64_mode_sse: return __va_float_reg;
1060 /* Return the number of registers needed to return the struct, or 0 if
1061 returning via struct pointer. */
1062 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
1063 int size, align, reg_count;
1064 *ret_align = 1; // Never have to re-align return values for x86-64
1065 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1068 #define REGN 6
1069 static const uint8_t arg_regs[REGN] = {
1070 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1073 static int arg_prepare_reg(int idx) {
1074 if (idx == 2 || idx == 3)
1075 /* idx=2: r10, idx=3: r11 */
1076 return idx + 8;
1077 else
1078 return arg_regs[idx];
1081 /* Generate function call. The function address is pushed first, then
1082 all the parameters in call order. This functions pops all the
1083 parameters and the function address. */
1084 void gfunc_call(int nb_args)
1086 X86_64_Mode mode;
1087 CType type;
1088 int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count;
1089 int nb_reg_args = 0;
1090 int nb_sse_args = 0;
1091 int sse_reg, gen_reg;
1093 /* calculate the number of integer/float register arguments */
1094 for(i = 0; i < nb_args; i++) {
1095 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1096 if (mode == x86_64_mode_sse)
1097 nb_sse_args += reg_count;
1098 else if (mode == x86_64_mode_integer)
1099 nb_reg_args += reg_count;
1102 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1103 and ended by a 16-byte aligned argument. This is because, from the point of view of
1104 the callee, argument alignment is computed from the bottom up. */
1105 /* for struct arguments, we need to call memcpy and the function
1106 call breaks register passing arguments we are preparing.
1107 So, we process arguments which will be passed by stack first. */
1108 gen_reg = nb_reg_args;
1109 sse_reg = nb_sse_args;
1110 run_start = 0;
1111 args_size = 0;
1112 while (run_start != nb_args) {
1113 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1115 run_end = nb_args;
1116 stack_adjust = 0;
1117 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1118 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1119 switch (mode) {
1120 case x86_64_mode_memory:
1121 case x86_64_mode_x87:
1122 stack_arg:
1123 if (align == 16)
1124 run_end = i;
1125 else
1126 stack_adjust += size;
1127 break;
1129 case x86_64_mode_sse:
1130 sse_reg -= reg_count;
1131 if (sse_reg + reg_count > 8) goto stack_arg;
1132 break;
1134 case x86_64_mode_integer:
1135 gen_reg -= reg_count;
1136 if (gen_reg + reg_count > REGN) goto stack_arg;
1137 break;
1138 default: break; /* nothing to be done for x86_64_mode_none */
1142 gen_reg = run_gen_reg;
1143 sse_reg = run_sse_reg;
1145 /* adjust stack to align SSE boundary */
1146 if (stack_adjust &= 15) {
1147 /* fetch cpu flag before the following sub will change the value */
1148 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1149 gv(RC_INT);
1151 stack_adjust = 16 - stack_adjust;
1152 o(0x48);
1153 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1154 args_size += stack_adjust;
1157 for(i = run_start; i < run_end;) {
1158 /* Swap argument to top, it will possibly be changed here,
1159 and might use more temps. At the end of the loop we keep
1160 in on the stack and swap it back to its original position
1161 if it is a register. */
1162 SValue tmp = vtop[0];
1163 vtop[0] = vtop[-i];
1164 vtop[-i] = tmp;
1166 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, &reg_count);
1168 int arg_stored = 1;
1169 switch (vtop->type.t & VT_BTYPE) {
1170 case VT_STRUCT:
1171 if (mode == x86_64_mode_sse) {
1172 if (sse_reg > 8)
1173 sse_reg -= reg_count;
1174 else
1175 arg_stored = 0;
1176 } else if (mode == x86_64_mode_integer) {
1177 if (gen_reg > REGN)
1178 gen_reg -= reg_count;
1179 else
1180 arg_stored = 0;
1183 if (arg_stored) {
1184 /* allocate the necessary size on stack */
1185 o(0x48);
1186 oad(0xec81, size); /* sub $xxx, %rsp */
1187 /* generate structure store */
1188 r = get_reg(RC_INT);
1189 orex(1, r, 0, 0x89); /* mov %rsp, r */
1190 o(0xe0 + REG_VALUE(r));
1191 vset(&vtop->type, r | VT_LVAL, 0);
1192 vswap();
1193 vstore();
1194 args_size += size;
1196 break;
1198 case VT_LDOUBLE:
1199 assert(0);
1200 break;
1202 case VT_FLOAT:
1203 case VT_DOUBLE:
1204 assert(mode == x86_64_mode_sse);
1205 if (sse_reg > 8) {
1206 --sse_reg;
1207 r = gv(RC_FLOAT);
1208 o(0x50); /* push $rax */
1209 /* movq %xmmN, (%rsp) */
1210 o(0xd60f66);
1211 o(0x04 + REG_VALUE(r)*8);
1212 o(0x24);
1213 args_size += size;
1214 } else {
1215 arg_stored = 0;
1217 break;
1219 default:
1220 assert(mode == x86_64_mode_integer);
1221 /* simple type */
1222 /* XXX: implicit cast ? */
1223 if (gen_reg > REGN) {
1224 --gen_reg;
1225 r = gv(RC_INT);
1226 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1227 args_size += size;
1228 } else {
1229 arg_stored = 0;
1231 break;
1234 /* And swap the argument back to it's original position. */
1235 tmp = vtop[0];
1236 vtop[0] = vtop[-i];
1237 vtop[-i] = tmp;
1239 if (arg_stored) {
1240 vrotb(i+1);
1241 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1242 vpop();
1243 --nb_args;
1244 --run_end;
1245 } else {
1246 ++i;
1250 /* handle 16 byte aligned arguments at end of run */
1251 run_start = i = run_end;
1252 while (i < nb_args) {
1253 /* Rotate argument to top since it will always be popped */
1254 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1255 if (align != 16)
1256 break;
1258 vrotb(i+1);
1260 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1261 gv(RC_ST0);
1262 oad(0xec8148, size); /* sub $xxx, %rsp */
1263 o(0x7cdb); /* fstpt 0(%rsp) */
1264 g(0x24);
1265 g(0x00);
1266 args_size += size;
1267 } else {
1268 assert(mode == x86_64_mode_memory);
1270 /* allocate the necessary size on stack */
1271 o(0x48);
1272 oad(0xec81, size); /* sub $xxx, %rsp */
1273 /* generate structure store */
1274 r = get_reg(RC_INT);
1275 orex(1, r, 0, 0x89); /* mov %rsp, r */
1276 o(0xe0 + REG_VALUE(r));
1277 vset(&vtop->type, r | VT_LVAL, 0);
1278 vswap();
1279 vstore();
1280 args_size += size;
1283 vpop();
1284 --nb_args;
1288 /* XXX This should be superfluous. */
1289 save_regs(0); /* save used temporary registers */
1291 /* then, we prepare register passing arguments.
1292 Note that we cannot set RDX and RCX in this loop because gv()
1293 may break these temporary registers. Let's use R10 and R11
1294 instead of them */
1295 assert(gen_reg <= REGN);
1296 assert(sse_reg <= 8);
1297 for(i = 0; i < nb_args; i++) {
1298 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1299 /* Alter stack entry type so that gv() knows how to treat it */
1300 vtop->type = type;
1301 if (mode == x86_64_mode_sse) {
1302 if (reg_count == 2) {
1303 sse_reg -= 2;
1304 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1305 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1306 /* movaps %xmm0, %xmmN */
1307 o(0x280f);
1308 o(0xc0 + (sse_reg << 3));
1309 /* movaps %xmm1, %xmmN */
1310 o(0x280f);
1311 o(0xc1 + ((sse_reg+1) << 3));
1313 } else {
1314 assert(reg_count == 1);
1315 --sse_reg;
1316 /* Load directly to register */
1317 gv(RC_XMM0 << sse_reg);
1319 } else if (mode == x86_64_mode_integer) {
1320 /* simple type */
1321 /* XXX: implicit cast ? */
1322 gen_reg -= reg_count;
1323 r = gv(RC_INT);
1324 int d = arg_prepare_reg(gen_reg);
1325 orex(1,d,r,0x89); /* mov */
1326 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1327 if (reg_count == 2) {
1328 d = arg_prepare_reg(gen_reg+1);
1329 orex(1,d,vtop->r2,0x89); /* mov */
1330 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1333 vtop--;
1335 assert(gen_reg == 0);
1336 assert(sse_reg == 0);
1338 /* We shouldn't have many operands on the stack anymore, but the
1339 call address itself is still there, and it might be in %eax
1340 (or edx/ecx) currently, which the below writes would clobber.
1341 So evict all remaining operands here. */
1342 save_regs(0);
1344 /* Copy R10 and R11 into RDX and RCX, respectively */
1345 if (nb_reg_args > 2) {
1346 o(0xd2894c); /* mov %r10, %rdx */
1347 if (nb_reg_args > 3) {
1348 o(0xd9894c); /* mov %r11, %rcx */
1352 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1353 gcall_or_jmp(0);
1354 if (args_size)
1355 gadd_sp(args_size);
1356 vtop--;
1360 #define FUNC_PROLOG_SIZE 11
1362 static void push_arg_reg(int i) {
1363 loc -= 8;
1364 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1367 /* generate function prolog of type 't' */
1368 void gfunc_prolog(CType *func_type)
1370 X86_64_Mode mode;
1371 int i, addr, align, size, reg_count;
1372 int param_addr = 0, reg_param_index, sse_param_index;
1373 Sym *sym;
1374 CType *type;
1376 sym = func_type->ref;
1377 addr = PTR_SIZE * 2;
1378 loc = 0;
1379 ind += FUNC_PROLOG_SIZE;
1380 func_sub_sp_offset = ind;
1381 func_ret_sub = 0;
1383 if (func_type->ref->c == FUNC_ELLIPSIS) {
1384 int seen_reg_num, seen_sse_num, seen_stack_size;
1385 seen_reg_num = seen_sse_num = 0;
1386 /* frame pointer and return address */
1387 seen_stack_size = PTR_SIZE * 2;
1388 /* count the number of seen parameters */
1389 sym = func_type->ref;
1390 while ((sym = sym->next) != NULL) {
1391 type = &sym->type;
1392 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1393 switch (mode) {
1394 default:
1395 stack_arg:
1396 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1397 break;
1399 case x86_64_mode_integer:
1400 if (seen_reg_num + reg_count <= 8) {
1401 seen_reg_num += reg_count;
1402 } else {
1403 seen_reg_num = 8;
1404 goto stack_arg;
1406 break;
1408 case x86_64_mode_sse:
1409 if (seen_sse_num + reg_count <= 8) {
1410 seen_sse_num += reg_count;
1411 } else {
1412 seen_sse_num = 8;
1413 goto stack_arg;
1415 break;
1419 loc -= 16;
1420 /* movl $0x????????, -0x10(%rbp) */
1421 o(0xf045c7);
1422 gen_le32(seen_reg_num * 8);
1423 /* movl $0x????????, -0xc(%rbp) */
1424 o(0xf445c7);
1425 gen_le32(seen_sse_num * 16 + 48);
1426 /* movl $0x????????, -0x8(%rbp) */
1427 o(0xf845c7);
1428 gen_le32(seen_stack_size);
1430 /* save all register passing arguments */
1431 for (i = 0; i < 8; i++) {
1432 loc -= 16;
1433 o(0xd60f66); /* movq */
1434 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1435 /* movq $0, loc+8(%rbp) */
1436 o(0x85c748);
1437 gen_le32(loc + 8);
1438 gen_le32(0);
1440 for (i = 0; i < REGN; i++) {
1441 push_arg_reg(REGN-1-i);
1445 sym = func_type->ref;
1446 reg_param_index = 0;
1447 sse_param_index = 0;
1449 /* if the function returns a structure, then add an
1450 implicit pointer parameter */
1451 func_vt = sym->type;
1452 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1453 if (mode == x86_64_mode_memory) {
1454 push_arg_reg(reg_param_index);
1455 func_vc = loc;
1456 reg_param_index++;
1458 /* define parameters */
1459 while ((sym = sym->next) != NULL) {
1460 type = &sym->type;
1461 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1462 switch (mode) {
1463 case x86_64_mode_sse:
1464 if (sse_param_index + reg_count <= 8) {
1465 /* save arguments passed by register */
1466 loc -= reg_count * 8;
1467 param_addr = loc;
1468 for (i = 0; i < reg_count; ++i) {
1469 o(0xd60f66); /* movq */
1470 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1471 ++sse_param_index;
1473 } else {
1474 addr = (addr + align - 1) & -align;
1475 param_addr = addr;
1476 addr += size;
1477 sse_param_index += reg_count;
1479 break;
1481 case x86_64_mode_memory:
1482 case x86_64_mode_x87:
1483 addr = (addr + align - 1) & -align;
1484 param_addr = addr;
1485 addr += size;
1486 break;
1488 case x86_64_mode_integer: {
1489 if (reg_param_index + reg_count <= REGN) {
1490 /* save arguments passed by register */
1491 loc -= reg_count * 8;
1492 param_addr = loc;
1493 for (i = 0; i < reg_count; ++i) {
1494 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1495 ++reg_param_index;
1497 } else {
1498 addr = (addr + align - 1) & -align;
1499 param_addr = addr;
1500 addr += size;
1501 reg_param_index += reg_count;
1503 break;
1505 default: break; /* nothing to be done for x86_64_mode_none */
1507 sym_push(sym->v & ~SYM_FIELD, type,
1508 VT_LOCAL | VT_LVAL, param_addr);
1512 /* generate function epilog */
1513 void gfunc_epilog(void)
1515 int v, saved_ind;
1517 o(0xc9); /* leave */
1518 if (func_ret_sub == 0) {
1519 o(0xc3); /* ret */
1520 } else {
1521 o(0xc2); /* ret n */
1522 g(func_ret_sub);
1523 g(func_ret_sub >> 8);
1525 /* align local size to word & save local variables */
1526 v = (-loc + 15) & -16;
1527 saved_ind = ind;
1528 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1529 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1530 o(0xec8148); /* sub rsp, stacksize */
1531 gen_le32(v);
1532 ind = saved_ind;
1535 #endif /* not PE */
1537 /* generate a jump to a label */
1538 int gjmp(int t)
1540 return psym(0xe9, t);
1543 /* generate a jump to a fixed address */
1544 void gjmp_addr(int a)
1546 int r;
1547 r = a - ind - 2;
1548 if (r == (char)r) {
1549 g(0xeb);
1550 g(r);
1551 } else {
1552 oad(0xe9, a - ind - 5);
1556 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1557 int gtst(int inv, int t)
1559 int v, *p;
1561 v = vtop->r & VT_VALMASK;
1562 if (v == VT_CMP) {
1563 /* fast case : can jump directly since flags are set */
1564 if (vtop->c.i & 0x100)
1566 /* This was a float compare. If the parity flag is set
1567 the result was unordered. For anything except != this
1568 means false and we don't jump (anding both conditions).
1569 For != this means true (oring both).
1570 Take care about inverting the test. We need to jump
1571 to our target if the result was unordered and test wasn't NE,
1572 otherwise if unordered we don't want to jump. */
1573 vtop->c.i &= ~0x100;
1574 if (!inv == (vtop->c.i != TOK_NE))
1575 o(0x067a); /* jp +6 */
1576 else
1578 g(0x0f);
1579 t = psym(0x8a, t); /* jp t */
1582 g(0x0f);
1583 t = psym((vtop->c.i - 16) ^ inv, t);
1584 } else if (v == VT_JMP || v == VT_JMPI) {
1585 /* && or || optimization */
1586 if ((v & 1) == inv) {
1587 /* insert vtop->c jump list in t */
1588 p = &vtop->c.i;
1589 while (*p != 0)
1590 p = (int *)(cur_text_section->data + *p);
1591 *p = t;
1592 t = vtop->c.i;
1593 } else {
1594 t = gjmp(t);
1595 gsym(vtop->c.i);
1597 } else {
1598 if (is_float(vtop->type.t) ||
1599 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1600 vpushi(0);
1601 gen_op(TOK_NE);
1603 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1604 /* constant jmp optimization */
1605 if ((vtop->c.i != 0) != inv)
1606 t = gjmp(t);
1607 } else {
1608 v = gv(RC_INT);
1609 orex(0,v,v,0x85);
1610 o(0xc0 + REG_VALUE(v) * 9);
1611 g(0x0f);
1612 t = psym(0x85 ^ inv, t);
1615 vtop--;
1616 return t;
1619 /* generate an integer binary operation */
1620 void gen_opi(int op)
1622 int r, fr, opc, c;
1623 int ll, uu, cc;
1625 ll = is64_type(vtop[-1].type.t);
1626 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1627 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1629 switch(op) {
1630 case '+':
1631 case TOK_ADDC1: /* add with carry generation */
1632 opc = 0;
1633 gen_op8:
1634 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1635 /* constant case */
1636 vswap();
1637 r = gv(RC_INT);
1638 vswap();
1639 c = vtop->c.i;
1640 if (c == (char)c) {
1641 /* XXX: generate inc and dec for smaller code ? */
1642 orex(ll, r, 0, 0x83);
1643 o(0xc0 | (opc << 3) | REG_VALUE(r));
1644 g(c);
1645 } else {
1646 orex(ll, r, 0, 0x81);
1647 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1649 } else {
1650 gv2(RC_INT, RC_INT);
1651 r = vtop[-1].r;
1652 fr = vtop[0].r;
1653 orex(ll, r, fr, (opc << 3) | 0x01);
1654 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1656 vtop--;
1657 if (op >= TOK_ULT && op <= TOK_GT) {
1658 vtop->r = VT_CMP;
1659 vtop->c.i = op;
1661 break;
1662 case '-':
1663 case TOK_SUBC1: /* sub with carry generation */
1664 opc = 5;
1665 goto gen_op8;
1666 case TOK_ADDC2: /* add with carry use */
1667 opc = 2;
1668 goto gen_op8;
1669 case TOK_SUBC2: /* sub with carry use */
1670 opc = 3;
1671 goto gen_op8;
1672 case '&':
1673 opc = 4;
1674 goto gen_op8;
1675 case '^':
1676 opc = 6;
1677 goto gen_op8;
1678 case '|':
1679 opc = 1;
1680 goto gen_op8;
1681 case '*':
1682 gv2(RC_INT, RC_INT);
1683 r = vtop[-1].r;
1684 fr = vtop[0].r;
1685 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1686 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1687 vtop--;
1688 break;
1689 case TOK_SHL:
1690 opc = 4;
1691 goto gen_shift;
1692 case TOK_SHR:
1693 opc = 5;
1694 goto gen_shift;
1695 case TOK_SAR:
1696 opc = 7;
1697 gen_shift:
1698 opc = 0xc0 | (opc << 3);
1699 if (cc) {
1700 /* constant case */
1701 vswap();
1702 r = gv(RC_INT);
1703 vswap();
1704 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1705 o(opc | REG_VALUE(r));
1706 g(vtop->c.i & (ll ? 63 : 31));
1707 } else {
1708 /* we generate the shift in ecx */
1709 gv2(RC_INT, RC_RCX);
1710 r = vtop[-1].r;
1711 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1712 o(opc | REG_VALUE(r));
1714 vtop--;
1715 break;
1716 case TOK_UDIV:
1717 case TOK_UMOD:
1718 uu = 1;
1719 goto divmod;
1720 case '/':
1721 case '%':
1722 case TOK_PDIV:
1723 uu = 0;
1724 divmod:
1725 /* first operand must be in eax */
1726 /* XXX: need better constraint for second operand */
1727 gv2(RC_RAX, RC_RCX);
1728 r = vtop[-1].r;
1729 fr = vtop[0].r;
1730 vtop--;
1731 save_reg(TREG_RDX);
1732 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1733 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1734 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1735 if (op == '%' || op == TOK_UMOD)
1736 r = TREG_RDX;
1737 else
1738 r = TREG_RAX;
1739 vtop->r = r;
1740 break;
1741 default:
1742 opc = 7;
1743 goto gen_op8;
1747 void gen_opl(int op)
1749 gen_opi(op);
1752 /* generate a floating point operation 'v = t1 op t2' instruction. The
1753 two operands are guaranted to have the same floating point type */
1754 /* XXX: need to use ST1 too */
1755 void gen_opf(int op)
1757 int a, ft, fc, swapped, r;
1758 int float_type =
1759 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1761 /* convert constants to memory references */
1762 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1763 vswap();
1764 gv(float_type);
1765 vswap();
1767 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1768 gv(float_type);
1770 /* must put at least one value in the floating point register */
1771 if ((vtop[-1].r & VT_LVAL) &&
1772 (vtop[0].r & VT_LVAL)) {
1773 vswap();
1774 gv(float_type);
1775 vswap();
1777 swapped = 0;
1778 /* swap the stack if needed so that t1 is the register and t2 is
1779 the memory reference */
1780 if (vtop[-1].r & VT_LVAL) {
1781 vswap();
1782 swapped = 1;
1784 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1785 if (op >= TOK_ULT && op <= TOK_GT) {
1786 /* load on stack second operand */
1787 load(TREG_ST0, vtop);
1788 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1789 if (op == TOK_GE || op == TOK_GT)
1790 swapped = !swapped;
1791 else if (op == TOK_EQ || op == TOK_NE)
1792 swapped = 0;
1793 if (swapped)
1794 o(0xc9d9); /* fxch %st(1) */
1795 o(0xe9da); /* fucompp */
1796 o(0xe0df); /* fnstsw %ax */
1797 if (op == TOK_EQ) {
1798 o(0x45e480); /* and $0x45, %ah */
1799 o(0x40fC80); /* cmp $0x40, %ah */
1800 } else if (op == TOK_NE) {
1801 o(0x45e480); /* and $0x45, %ah */
1802 o(0x40f480); /* xor $0x40, %ah */
1803 op = TOK_NE;
1804 } else if (op == TOK_GE || op == TOK_LE) {
1805 o(0x05c4f6); /* test $0x05, %ah */
1806 op = TOK_EQ;
1807 } else {
1808 o(0x45c4f6); /* test $0x45, %ah */
1809 op = TOK_EQ;
1811 vtop--;
1812 vtop->r = VT_CMP;
1813 vtop->c.i = op;
1814 } else {
1815 /* no memory reference possible for long double operations */
1816 load(TREG_ST0, vtop);
1817 swapped = !swapped;
1819 switch(op) {
1820 default:
1821 case '+':
1822 a = 0;
1823 break;
1824 case '-':
1825 a = 4;
1826 if (swapped)
1827 a++;
1828 break;
1829 case '*':
1830 a = 1;
1831 break;
1832 case '/':
1833 a = 6;
1834 if (swapped)
1835 a++;
1836 break;
1838 ft = vtop->type.t;
1839 fc = vtop->c.ul;
1840 o(0xde); /* fxxxp %st, %st(1) */
1841 o(0xc1 + (a << 3));
1842 vtop--;
1844 } else {
1845 if (op >= TOK_ULT && op <= TOK_GT) {
1846 /* if saved lvalue, then we must reload it */
1847 r = vtop->r;
1848 fc = vtop->c.ul;
1849 if ((r & VT_VALMASK) == VT_LLOCAL) {
1850 SValue v1;
1851 r = get_reg(RC_INT);
1852 v1.type.t = VT_PTR;
1853 v1.r = VT_LOCAL | VT_LVAL;
1854 v1.c.ul = fc;
1855 load(r, &v1);
1856 fc = 0;
1859 if (op == TOK_EQ || op == TOK_NE) {
1860 swapped = 0;
1861 } else {
1862 if (op == TOK_LE || op == TOK_LT)
1863 swapped = !swapped;
1864 if (op == TOK_LE || op == TOK_GE) {
1865 op = 0x93; /* setae */
1866 } else {
1867 op = 0x97; /* seta */
1871 if (swapped) {
1872 gv(RC_FLOAT);
1873 vswap();
1875 assert(!(vtop[-1].r & VT_LVAL));
1877 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1878 o(0x66);
1879 o(0x2e0f); /* ucomisd */
1881 if (vtop->r & VT_LVAL) {
1882 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1883 } else {
1884 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1887 vtop--;
1888 vtop->r = VT_CMP;
1889 vtop->c.i = op | 0x100;
1890 } else {
1891 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1892 switch(op) {
1893 default:
1894 case '+':
1895 a = 0;
1896 break;
1897 case '-':
1898 a = 4;
1899 break;
1900 case '*':
1901 a = 1;
1902 break;
1903 case '/':
1904 a = 6;
1905 break;
1907 ft = vtop->type.t;
1908 fc = vtop->c.ul;
1909 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1911 r = vtop->r;
1912 /* if saved lvalue, then we must reload it */
1913 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1914 SValue v1;
1915 r = get_reg(RC_INT);
1916 v1.type.t = VT_PTR;
1917 v1.r = VT_LOCAL | VT_LVAL;
1918 v1.c.ul = fc;
1919 load(r, &v1);
1920 fc = 0;
1923 assert(!(vtop[-1].r & VT_LVAL));
1924 if (swapped) {
1925 assert(vtop->r & VT_LVAL);
1926 gv(RC_FLOAT);
1927 vswap();
1930 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1931 o(0xf2);
1932 } else {
1933 o(0xf3);
1935 o(0x0f);
1936 o(0x58 + a);
1938 if (vtop->r & VT_LVAL) {
1939 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1940 } else {
1941 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1944 vtop--;
1949 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1950 and 'long long' cases. */
1951 void gen_cvt_itof(int t)
1953 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1954 save_reg(TREG_ST0);
1955 gv(RC_INT);
1956 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1957 /* signed long long to float/double/long double (unsigned case
1958 is handled generically) */
1959 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1960 o(0x242cdf); /* fildll (%rsp) */
1961 o(0x08c48348); /* add $8, %rsp */
1962 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1963 (VT_INT | VT_UNSIGNED)) {
1964 /* unsigned int to float/double/long double */
1965 o(0x6a); /* push $0 */
1966 g(0x00);
1967 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1968 o(0x242cdf); /* fildll (%rsp) */
1969 o(0x10c48348); /* add $16, %rsp */
1970 } else {
1971 /* int to float/double/long double */
1972 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1973 o(0x2404db); /* fildl (%rsp) */
1974 o(0x08c48348); /* add $8, %rsp */
1976 vtop->r = TREG_ST0;
1977 } else {
1978 int r = get_reg(RC_FLOAT);
1979 gv(RC_INT);
1980 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1981 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1982 (VT_INT | VT_UNSIGNED) ||
1983 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1984 o(0x48); /* REX */
1986 o(0x2a0f);
1987 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1988 vtop->r = r;
1992 /* convert from one floating point type to another */
1993 void gen_cvt_ftof(int t)
1995 int ft, bt, tbt;
1997 ft = vtop->type.t;
1998 bt = ft & VT_BTYPE;
1999 tbt = t & VT_BTYPE;
2001 if (bt == VT_FLOAT) {
2002 gv(RC_FLOAT);
2003 if (tbt == VT_DOUBLE) {
2004 o(0x140f); /* unpcklps */
2005 o(0xc0 + REG_VALUE(vtop->r)*9);
2006 o(0x5a0f); /* cvtps2pd */
2007 o(0xc0 + REG_VALUE(vtop->r)*9);
2008 } else if (tbt == VT_LDOUBLE) {
2009 save_reg(RC_ST0);
2010 /* movss %xmm0,-0x10(%rsp) */
2011 o(0x110ff3);
2012 o(0x44 + REG_VALUE(vtop->r)*8);
2013 o(0xf024);
2014 o(0xf02444d9); /* flds -0x10(%rsp) */
2015 vtop->r = TREG_ST0;
2017 } else if (bt == VT_DOUBLE) {
2018 gv(RC_FLOAT);
2019 if (tbt == VT_FLOAT) {
2020 o(0x140f66); /* unpcklpd */
2021 o(0xc0 + REG_VALUE(vtop->r)*9);
2022 o(0x5a0f66); /* cvtpd2ps */
2023 o(0xc0 + REG_VALUE(vtop->r)*9);
2024 } else if (tbt == VT_LDOUBLE) {
2025 save_reg(RC_ST0);
2026 /* movsd %xmm0,-0x10(%rsp) */
2027 o(0x110ff2);
2028 o(0x44 + REG_VALUE(vtop->r)*8);
2029 o(0xf024);
2030 o(0xf02444dd); /* fldl -0x10(%rsp) */
2031 vtop->r = TREG_ST0;
2033 } else {
2034 int r;
2035 gv(RC_ST0);
2036 r = get_reg(RC_FLOAT);
2037 if (tbt == VT_DOUBLE) {
2038 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2039 /* movsd -0x10(%rsp),%xmm0 */
2040 o(0x100ff2);
2041 o(0x44 + REG_VALUE(r)*8);
2042 o(0xf024);
2043 vtop->r = r;
2044 } else if (tbt == VT_FLOAT) {
2045 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2046 /* movss -0x10(%rsp),%xmm0 */
2047 o(0x100ff3);
2048 o(0x44 + REG_VALUE(r)*8);
2049 o(0xf024);
2050 vtop->r = r;
2055 /* convert fp to int 't' type */
2056 void gen_cvt_ftoi(int t)
2058 int ft, bt, size, r;
2059 ft = vtop->type.t;
2060 bt = ft & VT_BTYPE;
2061 if (bt == VT_LDOUBLE) {
2062 gen_cvt_ftof(VT_DOUBLE);
2063 bt = VT_DOUBLE;
2066 gv(RC_FLOAT);
2067 if (t != VT_INT)
2068 size = 8;
2069 else
2070 size = 4;
2072 r = get_reg(RC_INT);
2073 if (bt == VT_FLOAT) {
2074 o(0xf3);
2075 } else if (bt == VT_DOUBLE) {
2076 o(0xf2);
2077 } else {
2078 assert(0);
2080 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2081 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2082 vtop->r = r;
2085 /* computed goto support */
2086 void ggoto(void)
2088 gcall_or_jmp(1);
2089 vtop--;
2092 /* Save the stack pointer onto the stack and return the location of its address */
2093 ST_FUNC void gen_vla_sp_save(int addr) {
2094 /* mov %rsp,addr(%rbp)*/
2095 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2098 /* Restore the SP from a location on the stack */
2099 ST_FUNC void gen_vla_sp_restore(int addr) {
2100 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2103 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2104 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2105 #ifdef TCC_TARGET_PE
2106 /* alloca does more than just adjust %rsp on Windows */
2107 vpush_global_sym(&func_old_type, TOK_alloca);
2108 vswap(); /* Move alloca ref past allocation size */
2109 gfunc_call(1);
2110 vset(type, REG_IRET, 0);
2111 #else
2112 int r;
2113 r = gv(RC_INT); /* allocation size */
2114 /* sub r,%rsp */
2115 o(0x2b48);
2116 o(0xe0 | REG_VALUE(r));
2117 /* We align to 16 bytes rather than align */
2118 /* and ~15, %rsp */
2119 o(0xf0e48348);
2120 /* mov %rsp, r */
2121 o(0x8948);
2122 o(0xe0 | REG_VALUE(r));
2123 vpop();
2124 vset(type, r, 0);
2125 #endif
2129 /* end of x86-64 code generator */
2130 /*************************************************************/
2131 #endif /* ! TARGET_DEFS_ONLY */
2132 /******************************************************/