reverse a commit a6149c6dbb41: Set CONFIG_MULTIARCHDIR for cross compilers.
[tinycc.git] / x86_64-gen.c
blob67aaadc7c8a6181fe5ba274cef3b683a0c1a2583
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSP = 4,
61 TREG_RSI = 6,
62 TREG_RDI = 7,
64 TREG_R8 = 8,
65 TREG_R9 = 9,
66 TREG_R10 = 10,
67 TREG_R11 = 11,
69 TREG_XMM0 = 16,
70 TREG_XMM1 = 17,
71 TREG_XMM2 = 18,
72 TREG_XMM3 = 19,
73 TREG_XMM4 = 20,
74 TREG_XMM5 = 21,
75 TREG_XMM6 = 22,
76 TREG_XMM7 = 23,
78 TREG_ST0 = 24,
80 TREG_MEM = 0x20,
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
96 #define PTR_SIZE 8
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
102 #define MAX_ALIGN 16
104 /******************************************************/
105 /* ELF defines */
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x400000
116 #define ELF_PAGE_SIZE 0x200000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
121 #include "tcc.h"
122 #include <assert.h>
124 ST_DATA const int reg_classes[NB_REGS] = {
125 /* eax */ RC_INT | RC_RAX,
126 /* ecx */ RC_INT | RC_RCX,
127 /* edx */ RC_INT | RC_RDX,
133 RC_R8,
134 RC_R9,
135 RC_R10,
136 RC_R11,
141 /* xmm0 */ RC_FLOAT | RC_XMM0,
142 /* xmm1 */ RC_FLOAT | RC_XMM1,
143 /* xmm2 */ RC_FLOAT | RC_XMM2,
144 /* xmm3 */ RC_FLOAT | RC_XMM3,
145 /* xmm4 */ RC_FLOAT | RC_XMM4,
146 /* xmm5 */ RC_FLOAT | RC_XMM5,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
150 RC_XMM6,
151 RC_XMM7,
152 /* st0 */ RC_ST0
155 static unsigned long func_sub_sp_offset;
156 static int func_ret_sub;
158 /* XXX: make it faster ? */
159 void g(int c)
161 int ind1;
162 ind1 = ind + 1;
163 if (ind1 > cur_text_section->data_allocated)
164 section_realloc(cur_text_section, ind1);
165 cur_text_section->data[ind] = c;
166 ind = ind1;
169 void o(unsigned int c)
171 while (c) {
172 g(c);
173 c = c >> 8;
177 void gen_le16(int v)
179 g(v);
180 g(v >> 8);
183 void gen_le32(int c)
185 g(c);
186 g(c >> 8);
187 g(c >> 16);
188 g(c >> 24);
191 void gen_le64(int64_t c)
193 g(c);
194 g(c >> 8);
195 g(c >> 16);
196 g(c >> 24);
197 g(c >> 32);
198 g(c >> 40);
199 g(c >> 48);
200 g(c >> 56);
203 void orex(int ll, int r, int r2, int b)
205 if ((r & VT_VALMASK) >= VT_CONST)
206 r = 0;
207 if ((r2 & VT_VALMASK) >= VT_CONST)
208 r2 = 0;
209 if (ll || REX_BASE(r) || REX_BASE(r2))
210 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
211 o(b);
214 /* output a symbol and patch all calls to it */
215 void gsym_addr(int t, int a)
217 int n, *ptr;
218 while (t) {
219 ptr = (int *)(cur_text_section->data + t);
220 n = *ptr; /* next value */
221 *ptr = a - t - 4;
222 t = n;
226 void gsym(int t)
228 gsym_addr(t, ind);
231 /* psym is used to put an instruction with a data field which is a
232 reference to a symbol. It is in fact the same as oad ! */
233 #define psym oad
235 static int is64_type(int t)
237 return ((t & VT_BTYPE) == VT_PTR ||
238 (t & VT_BTYPE) == VT_FUNC ||
239 (t & VT_BTYPE) == VT_LLONG);
242 /* instruction + 4 bytes data. Return the address of the data */
243 ST_FUNC int oad(int c, int s)
245 int ind1;
247 o(c);
248 ind1 = ind + 4;
249 if (ind1 > cur_text_section->data_allocated)
250 section_realloc(cur_text_section, ind1);
251 *(int *)(cur_text_section->data + ind) = s;
252 s = ind;
253 ind = ind1;
254 return s;
257 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
259 if (r & VT_SYM)
260 greloc(cur_text_section, sym, ind, R_X86_64_32);
261 gen_le32(c);
264 /* output constant with relocation if 'r & VT_SYM' is true */
265 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
267 if (r & VT_SYM)
268 greloc(cur_text_section, sym, ind, R_X86_64_64);
269 gen_le64(c);
272 /* output constant with relocation if 'r & VT_SYM' is true */
273 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
275 if (r & VT_SYM)
276 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
277 gen_le32(c-4);
280 /* output got address with relocation */
281 static void gen_gotpcrel(int r, Sym *sym, int c)
283 #ifndef TCC_TARGET_PE
284 Section *sr;
285 ElfW(Rela) *rel;
286 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
287 sr = cur_text_section->reloc;
288 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
289 rel->r_addend = -4;
290 #else
291 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
292 get_tok_str(sym->v, NULL), c, r,
293 cur_text_section->data[ind-3],
294 cur_text_section->data[ind-2],
295 cur_text_section->data[ind-1]
297 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
298 #endif
299 gen_le32(0);
300 if (c) {
301 /* we use add c, %xxx for displacement */
302 orex(1, r, 0, 0x81);
303 o(0xc0 + REG_VALUE(r));
304 gen_le32(c);
308 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
310 op_reg = REG_VALUE(op_reg) << 3;
311 if ((r & VT_VALMASK) == VT_CONST) {
312 /* constant memory reference */
313 o(0x05 | op_reg);
314 if (is_got) {
315 gen_gotpcrel(r, sym, c);
316 } else {
317 gen_addrpc32(r, sym, c);
319 } else if ((r & VT_VALMASK) == VT_LOCAL) {
320 /* currently, we use only ebp as base */
321 if (c == (char)c) {
322 /* short reference */
323 o(0x45 | op_reg);
324 g(c);
325 } else {
326 oad(0x85 | op_reg, c);
328 } else if ((r & VT_VALMASK) >= TREG_MEM) {
329 if (c) {
330 g(0x80 | op_reg | REG_VALUE(r));
331 gen_le32(c);
332 } else {
333 g(0x00 | op_reg | REG_VALUE(r));
335 } else {
336 g(0x00 | op_reg | REG_VALUE(r));
340 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
341 opcode bits */
342 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
344 gen_modrm_impl(op_reg, r, sym, c, 0);
347 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
348 opcode bits */
349 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
351 int is_got;
352 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
353 orex(1, r, op_reg, opcode);
354 gen_modrm_impl(op_reg, r, sym, c, is_got);
358 /* load 'r' from value 'sv' */
359 void load(int r, SValue *sv)
361 int v, t, ft, fc, fr;
362 SValue v1;
364 #ifdef TCC_TARGET_PE
365 SValue v2;
366 sv = pe_getimport(sv, &v2);
367 #endif
369 fr = sv->r;
370 ft = sv->type.t & ~VT_DEFSIGN;
371 fc = sv->c.ul;
373 #ifndef TCC_TARGET_PE
374 /* we use indirect access via got */
375 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
376 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
377 /* use the result register as a temporal register */
378 int tr = r | TREG_MEM;
379 if (is_float(ft)) {
380 /* we cannot use float registers as a temporal register */
381 tr = get_reg(RC_INT) | TREG_MEM;
383 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
385 /* load from the temporal register */
386 fr = tr | VT_LVAL;
388 #endif
390 v = fr & VT_VALMASK;
391 if (fr & VT_LVAL) {
392 int b, ll;
393 if (v == VT_LLOCAL) {
394 v1.type.t = VT_PTR;
395 v1.r = VT_LOCAL | VT_LVAL;
396 v1.c.ul = fc;
397 fr = r;
398 if (!(reg_classes[fr] & RC_INT))
399 fr = get_reg(RC_INT);
400 load(fr, &v1);
402 ll = 0;
403 if ((ft & VT_BTYPE) == VT_FLOAT) {
404 b = 0x6e0f66;
405 r = REG_VALUE(r); /* movd */
406 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
407 b = 0x7e0ff3; /* movq */
408 r = REG_VALUE(r);
409 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
410 b = 0xdb, r = 5; /* fldt */
411 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
412 b = 0xbe0f; /* movsbl */
413 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
414 b = 0xb60f; /* movzbl */
415 } else if ((ft & VT_TYPE) == VT_SHORT) {
416 b = 0xbf0f; /* movswl */
417 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
418 b = 0xb70f; /* movzwl */
419 } else {
420 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
421 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
422 || ((ft & VT_BTYPE) == VT_FUNC));
423 ll = is64_type(ft);
424 b = 0x8b;
426 if (ll) {
427 gen_modrm64(b, r, fr, sv->sym, fc);
428 } else {
429 orex(ll, fr, r, b);
430 gen_modrm(r, fr, sv->sym, fc);
432 } else {
433 if (v == VT_CONST) {
434 if (fr & VT_SYM) {
435 #ifdef TCC_TARGET_PE
436 orex(1,0,r,0x8d);
437 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
438 gen_addrpc32(fr, sv->sym, fc);
439 #else
440 if (sv->sym->type.t & VT_STATIC) {
441 orex(1,0,r,0x8d);
442 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
443 gen_addrpc32(fr, sv->sym, fc);
444 } else {
445 orex(1,0,r,0x8b);
446 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
447 gen_gotpcrel(r, sv->sym, fc);
449 #endif
450 } else if (is64_type(ft)) {
451 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
452 gen_le64(sv->c.ull);
453 } else {
454 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
455 gen_le32(fc);
457 } else if (v == VT_LOCAL) {
458 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
459 gen_modrm(r, VT_LOCAL, sv->sym, fc);
460 } else if (v == VT_CMP) {
461 orex(0,r,0,0);
462 if ((fc & ~0x100) != TOK_NE)
463 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
464 else
465 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
466 if (fc & 0x100)
468 /* This was a float compare. If the parity bit is
469 set the result was unordered, meaning false for everything
470 except TOK_NE, and true for TOK_NE. */
471 fc &= ~0x100;
472 o(0x037a + (REX_BASE(r) << 8));
474 orex(0,r,0, 0x0f); /* setxx %br */
475 o(fc);
476 o(0xc0 + REG_VALUE(r));
477 } else if (v == VT_JMP || v == VT_JMPI) {
478 t = v & 1;
479 orex(0,r,0,0);
480 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
481 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
482 gsym(fc);
483 orex(0,r,0,0);
484 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
485 } else if (v != r) {
486 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
487 if (v == TREG_ST0) {
488 /* gen_cvt_ftof(VT_DOUBLE); */
489 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
490 /* movsd -0x10(%rsp),%xmmN */
491 o(0x100ff2);
492 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
493 o(0xf024);
494 } else {
495 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
496 if ((ft & VT_BTYPE) == VT_FLOAT) {
497 o(0x100ff3);
498 } else {
499 assert((ft & VT_BTYPE) == VT_DOUBLE);
500 o(0x100ff2);
502 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
504 } else if (r == TREG_ST0) {
505 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
506 /* gen_cvt_ftof(VT_LDOUBLE); */
507 /* movsd %xmmN,-0x10(%rsp) */
508 o(0x110ff2);
509 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
510 o(0xf024);
511 o(0xf02444dd); /* fldl -0x10(%rsp) */
512 } else {
513 orex(1,r,v, 0x89);
514 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
520 /* store register 'r' in lvalue 'v' */
521 void store(int r, SValue *v)
523 int fr, bt, ft, fc;
524 int op64 = 0;
525 /* store the REX prefix in this variable when PIC is enabled */
526 int pic = 0;
528 #ifdef TCC_TARGET_PE
529 SValue v2;
530 v = pe_getimport(v, &v2);
531 #endif
533 ft = v->type.t;
534 fc = v->c.ul;
535 fr = v->r & VT_VALMASK;
536 bt = ft & VT_BTYPE;
538 #ifndef TCC_TARGET_PE
539 /* we need to access the variable via got */
540 if (fr == VT_CONST && (v->r & VT_SYM)) {
541 /* mov xx(%rip), %r11 */
542 o(0x1d8b4c);
543 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
544 pic = is64_type(bt) ? 0x49 : 0x41;
546 #endif
548 /* XXX: incorrect if float reg to reg */
549 if (bt == VT_FLOAT) {
550 o(0x66);
551 o(pic);
552 o(0x7e0f); /* movd */
553 r = REG_VALUE(r);
554 } else if (bt == VT_DOUBLE) {
555 o(0x66);
556 o(pic);
557 o(0xd60f); /* movq */
558 r = REG_VALUE(r);
559 } else if (bt == VT_LDOUBLE) {
560 o(0xc0d9); /* fld %st(0) */
561 o(pic);
562 o(0xdb); /* fstpt */
563 r = 7;
564 } else {
565 if (bt == VT_SHORT)
566 o(0x66);
567 o(pic);
568 if (bt == VT_BYTE || bt == VT_BOOL)
569 orex(0, 0, r, 0x88);
570 else if (is64_type(bt))
571 op64 = 0x89;
572 else
573 orex(0, 0, r, 0x89);
575 if (pic) {
576 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
577 if (op64)
578 o(op64);
579 o(3 + (r << 3));
580 } else if (op64) {
581 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
582 gen_modrm64(op64, r, v->r, v->sym, fc);
583 } else if (fr != r) {
584 /* XXX: don't we really come here? */
585 abort();
586 o(0xc0 + fr + r * 8); /* mov r, fr */
588 } else {
589 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
590 gen_modrm(r, v->r, v->sym, fc);
591 } else if (fr != r) {
592 /* XXX: don't we really come here? */
593 abort();
594 o(0xc0 + fr + r * 8); /* mov r, fr */
599 /* 'is_jmp' is '1' if it is a jump */
600 static void gcall_or_jmp(int is_jmp)
602 int r;
603 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
604 /* constant case */
605 if (vtop->r & VT_SYM) {
606 /* relocation case */
607 #ifdef TCC_TARGET_PE
608 greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32);
609 #else
610 greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32);
611 #endif
612 } else {
613 /* put an empty PC32 relocation */
614 put_elf_reloc(symtab_section, cur_text_section,
615 ind + 1, R_X86_64_PC32, 0);
617 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
618 } else {
619 /* otherwise, indirect call */
620 r = TREG_R11;
621 load(r, vtop);
622 o(0x41); /* REX */
623 o(0xff); /* call/jmp *r */
624 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
628 #ifdef TCC_TARGET_PE
630 #define REGN 4
631 static const uint8_t arg_regs[REGN] = {
632 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
635 /* Prepare arguments in R10 and R11 rather than RCX and RDX
636 because gv() will not ever use these */
637 static int arg_prepare_reg(int idx) {
638 if (idx == 0 || idx == 1)
639 /* idx=0: r10, idx=1: r11 */
640 return idx + 10;
641 else
642 return arg_regs[idx];
645 static int func_scratch;
647 /* Generate function call. The function address is pushed first, then
648 all the parameters in call order. This functions pops all the
649 parameters and the function address. */
651 void gen_offs_sp(int b, int r, int d)
653 orex(1,0,r & 0x100 ? 0 : r, b);
654 if (d == (char)d) {
655 o(0x2444 | (REG_VALUE(r) << 3));
656 g(d);
657 } else {
658 o(0x2484 | (REG_VALUE(r) << 3));
659 gen_le32(d);
663 /* Return the number of registers needed to return the struct, or 0 if
664 returning via struct pointer. */
665 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align)
667 int size, align;
668 *ret_align = 1; // Never have to re-align return values for x86-64
669 size = type_size(vt, &align);
670 ret->ref = NULL;
671 if (size > 8) {
672 return 0;
673 } else if (size > 4) {
674 ret->t = VT_LLONG;
675 return 1;
676 } else if (size > 2) {
677 ret->t = VT_INT;
678 return 1;
679 } else if (size > 1) {
680 ret->t = VT_SHORT;
681 return 1;
682 } else {
683 ret->t = VT_BYTE;
684 return 1;
688 static int is_sse_float(int t) {
689 int bt;
690 bt = t & VT_BTYPE;
691 return bt == VT_DOUBLE || bt == VT_FLOAT;
694 int gfunc_arg_size(CType *type) {
695 int align;
696 if (type->t & (VT_ARRAY|VT_BITFIELD))
697 return 8;
698 return type_size(type, &align);
701 void gfunc_call(int nb_args)
703 int size, r, args_size, i, d, bt, struct_size;
704 int arg;
706 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
707 arg = nb_args;
709 /* for struct arguments, we need to call memcpy and the function
710 call breaks register passing arguments we are preparing.
711 So, we process arguments which will be passed by stack first. */
712 struct_size = args_size;
713 for(i = 0; i < nb_args; i++) {
714 SValue *sv;
716 --arg;
717 sv = &vtop[-i];
718 bt = (sv->type.t & VT_BTYPE);
719 size = gfunc_arg_size(&sv->type);
721 if (size <= 8)
722 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
724 if (bt == VT_STRUCT) {
725 /* align to stack align size */
726 size = (size + 15) & ~15;
727 /* generate structure store */
728 r = get_reg(RC_INT);
729 gen_offs_sp(0x8d, r, struct_size);
730 struct_size += size;
732 /* generate memcpy call */
733 vset(&sv->type, r | VT_LVAL, 0);
734 vpushv(sv);
735 vstore();
736 --vtop;
737 } else if (bt == VT_LDOUBLE) {
738 gv(RC_ST0);
739 gen_offs_sp(0xdb, 0x107, struct_size);
740 struct_size += 16;
744 if (func_scratch < struct_size)
745 func_scratch = struct_size;
747 arg = nb_args;
748 struct_size = args_size;
750 for(i = 0; i < nb_args; i++) {
751 --arg;
752 bt = (vtop->type.t & VT_BTYPE);
754 size = gfunc_arg_size(&vtop->type);
755 if (size > 8) {
756 /* align to stack align size */
757 size = (size + 15) & ~15;
758 if (arg >= REGN) {
759 d = get_reg(RC_INT);
760 gen_offs_sp(0x8d, d, struct_size);
761 gen_offs_sp(0x89, d, arg*8);
762 } else {
763 d = arg_prepare_reg(arg);
764 gen_offs_sp(0x8d, d, struct_size);
766 struct_size += size;
767 } else {
768 if (is_sse_float(vtop->type.t)) {
769 gv(RC_XMM0); /* only use one float register */
770 if (arg >= REGN) {
771 /* movq %xmm0, j*8(%rsp) */
772 gen_offs_sp(0xd60f66, 0x100, arg*8);
773 } else {
774 /* movaps %xmm0, %xmmN */
775 o(0x280f);
776 o(0xc0 + (arg << 3));
777 d = arg_prepare_reg(arg);
778 /* mov %xmm0, %rxx */
779 o(0x66);
780 orex(1,d,0, 0x7e0f);
781 o(0xc0 + REG_VALUE(d));
783 } else {
784 if (bt == VT_STRUCT) {
785 vtop->type.ref = NULL;
786 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
787 : size > 1 ? VT_SHORT : VT_BYTE;
790 r = gv(RC_INT);
791 if (arg >= REGN) {
792 gen_offs_sp(0x89, r, arg*8);
793 } else {
794 d = arg_prepare_reg(arg);
795 orex(1,d,r,0x89); /* mov */
796 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
800 vtop--;
802 save_regs(0);
804 /* Copy R10 and R11 into RCX and RDX, respectively */
805 if (nb_args > 0) {
806 o(0xd1894c); /* mov %r10, %rcx */
807 if (nb_args > 1) {
808 o(0xda894c); /* mov %r11, %rdx */
812 gcall_or_jmp(0);
813 vtop--;
817 #define FUNC_PROLOG_SIZE 11
819 /* generate function prolog of type 't' */
820 void gfunc_prolog(CType *func_type)
822 int addr, reg_param_index, bt, size;
823 Sym *sym;
824 CType *type;
826 func_ret_sub = 0;
827 func_scratch = 0;
828 loc = 0;
830 addr = PTR_SIZE * 2;
831 ind += FUNC_PROLOG_SIZE;
832 func_sub_sp_offset = ind;
833 reg_param_index = 0;
835 sym = func_type->ref;
837 /* if the function returns a structure, then add an
838 implicit pointer parameter */
839 func_vt = sym->type;
840 func_var = (sym->c == FUNC_ELLIPSIS);
841 size = gfunc_arg_size(&func_vt);
842 if (size > 8) {
843 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
844 func_vc = addr;
845 reg_param_index++;
846 addr += 8;
849 /* define parameters */
850 while ((sym = sym->next) != NULL) {
851 type = &sym->type;
852 bt = type->t & VT_BTYPE;
853 size = gfunc_arg_size(type);
854 if (size > 8) {
855 if (reg_param_index < REGN) {
856 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
858 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
859 } else {
860 if (reg_param_index < REGN) {
861 /* save arguments passed by register */
862 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
863 o(0xd60f66); /* movq */
864 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
865 } else {
866 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
869 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
871 addr += 8;
872 reg_param_index++;
875 while (reg_param_index < REGN) {
876 if (func_type->ref->c == FUNC_ELLIPSIS) {
877 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
878 addr += 8;
880 reg_param_index++;
884 /* generate function epilog */
885 void gfunc_epilog(void)
887 int v, saved_ind;
889 o(0xc9); /* leave */
890 if (func_ret_sub == 0) {
891 o(0xc3); /* ret */
892 } else {
893 o(0xc2); /* ret n */
894 g(func_ret_sub);
895 g(func_ret_sub >> 8);
898 saved_ind = ind;
899 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
900 /* align local size to word & save local variables */
901 v = (func_scratch + -loc + 15) & -16;
903 if (v >= 4096) {
904 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
905 oad(0xb8, v); /* mov stacksize, %eax */
906 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
907 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
908 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
909 } else {
910 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
911 o(0xec8148); /* sub rsp, stacksize */
912 gen_le32(v);
915 cur_text_section->data_offset = saved_ind;
916 pe_add_unwind_data(ind, saved_ind, v);
917 ind = cur_text_section->data_offset;
920 #else
922 static void gadd_sp(int val)
924 if (val == (char)val) {
925 o(0xc48348);
926 g(val);
927 } else {
928 oad(0xc48148, val); /* add $xxx, %rsp */
932 typedef enum X86_64_Mode {
933 x86_64_mode_none,
934 x86_64_mode_memory,
935 x86_64_mode_integer,
936 x86_64_mode_sse,
937 x86_64_mode_x87
938 } X86_64_Mode;
940 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
942 if (a == b)
943 return a;
944 else if (a == x86_64_mode_none)
945 return b;
946 else if (b == x86_64_mode_none)
947 return a;
948 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
949 return x86_64_mode_memory;
950 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
951 return x86_64_mode_integer;
952 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
953 return x86_64_mode_memory;
954 else
955 return x86_64_mode_sse;
958 static X86_64_Mode classify_x86_64_inner(CType *ty)
960 X86_64_Mode mode;
961 Sym *f;
963 switch (ty->t & VT_BTYPE) {
964 case VT_VOID: return x86_64_mode_none;
966 case VT_INT:
967 case VT_BYTE:
968 case VT_SHORT:
969 case VT_LLONG:
970 case VT_BOOL:
971 case VT_PTR:
972 case VT_FUNC:
973 case VT_ENUM: return x86_64_mode_integer;
975 case VT_FLOAT:
976 case VT_DOUBLE: return x86_64_mode_sse;
978 case VT_LDOUBLE: return x86_64_mode_x87;
980 case VT_STRUCT:
981 f = ty->ref;
983 // Detect union
984 if (f->next && (f->c == f->next->c))
985 return x86_64_mode_memory;
987 mode = x86_64_mode_none;
988 for (; f; f = f->next)
989 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
991 return mode;
994 assert(0);
997 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
999 X86_64_Mode mode;
1000 int size, align, ret_t = 0;
1002 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1003 *psize = 8;
1004 *palign = 8;
1005 *reg_count = 1;
1006 ret_t = ty->t;
1007 mode = x86_64_mode_integer;
1008 } else {
1009 size = type_size(ty, &align);
1010 *psize = (size + 7) & ~7;
1011 *palign = (align + 7) & ~7;
1013 if (size > 16) {
1014 mode = x86_64_mode_memory;
1015 } else {
1016 mode = classify_x86_64_inner(ty);
1017 switch (mode) {
1018 case x86_64_mode_integer:
1019 if (size > 8) {
1020 *reg_count = 2;
1021 ret_t = VT_QLONG;
1022 } else {
1023 *reg_count = 1;
1024 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1026 break;
1028 case x86_64_mode_x87:
1029 *reg_count = 1;
1030 ret_t = VT_LDOUBLE;
1031 break;
1033 case x86_64_mode_sse:
1034 if (size > 8) {
1035 *reg_count = 2;
1036 ret_t = VT_QFLOAT;
1037 } else {
1038 *reg_count = 1;
1039 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1041 break;
1042 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1047 if (ret) {
1048 ret->ref = NULL;
1049 ret->t = ret_t;
1052 return mode;
1055 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1057 /* This definition must be synced with stdarg.h */
1058 enum __va_arg_type {
1059 __va_gen_reg, __va_float_reg, __va_stack
1061 int size, align, reg_count;
1062 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1063 switch (mode) {
1064 default: return __va_stack;
1065 case x86_64_mode_integer: return __va_gen_reg;
1066 case x86_64_mode_sse: return __va_float_reg;
1070 /* Return the number of registers needed to return the struct, or 0 if
1071 returning via struct pointer. */
1072 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align)
1074 int size, align, reg_count;
1075 *ret_align = 1; // Never have to re-align return values for x86-64
1076 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1079 #define REGN 6
1080 static const uint8_t arg_regs[REGN] = {
1081 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1084 static int arg_prepare_reg(int idx) {
1085 if (idx == 2 || idx == 3)
1086 /* idx=2: r10, idx=3: r11 */
1087 return idx + 8;
1088 else
1089 return arg_regs[idx];
1092 /* Generate function call. The function address is pushed first, then
1093 all the parameters in call order. This functions pops all the
1094 parameters and the function address. */
1095 void gfunc_call(int nb_args)
1097 X86_64_Mode mode;
1098 CType type;
1099 int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count;
1100 int nb_reg_args = 0;
1101 int nb_sse_args = 0;
1102 int sse_reg, gen_reg;
1104 /* calculate the number of integer/float register arguments */
1105 for(i = 0; i < nb_args; i++) {
1106 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1107 if (mode == x86_64_mode_sse)
1108 nb_sse_args += reg_count;
1109 else if (mode == x86_64_mode_integer)
1110 nb_reg_args += reg_count;
1113 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1114 and ended by a 16-byte aligned argument. This is because, from the point of view of
1115 the callee, argument alignment is computed from the bottom up. */
1116 /* for struct arguments, we need to call memcpy and the function
1117 call breaks register passing arguments we are preparing.
1118 So, we process arguments which will be passed by stack first. */
1119 gen_reg = nb_reg_args;
1120 sse_reg = nb_sse_args;
1121 run_start = 0;
1122 args_size = 0;
1123 while (run_start != nb_args) {
1124 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1126 run_end = nb_args;
1127 stack_adjust = 0;
1128 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1129 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1130 switch (mode) {
1131 case x86_64_mode_memory:
1132 case x86_64_mode_x87:
1133 stack_arg:
1134 if (align == 16)
1135 run_end = i;
1136 else
1137 stack_adjust += size;
1138 break;
1140 case x86_64_mode_sse:
1141 sse_reg -= reg_count;
1142 if (sse_reg + reg_count > 8) goto stack_arg;
1143 break;
1145 case x86_64_mode_integer:
1146 gen_reg -= reg_count;
1147 if (gen_reg + reg_count > REGN) goto stack_arg;
1148 break;
1149 default: break; /* nothing to be done for x86_64_mode_none */
1153 gen_reg = run_gen_reg;
1154 sse_reg = run_sse_reg;
1156 /* adjust stack to align SSE boundary */
1157 if (stack_adjust &= 15) {
1158 /* fetch cpu flag before the following sub will change the value */
1159 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1160 gv(RC_INT);
1162 stack_adjust = 16 - stack_adjust;
1163 o(0x48);
1164 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1165 args_size += stack_adjust;
1168 for(i = run_start; i < run_end;) {
1169 /* Swap argument to top, it will possibly be changed here,
1170 and might use more temps. At the end of the loop we keep
1171 in on the stack and swap it back to its original position
1172 if it is a register. */
1173 SValue tmp = vtop[0];
1174 vtop[0] = vtop[-i];
1175 vtop[-i] = tmp;
1177 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, &reg_count);
1179 int arg_stored = 1;
1180 switch (vtop->type.t & VT_BTYPE) {
1181 case VT_STRUCT:
1182 if (mode == x86_64_mode_sse) {
1183 if (sse_reg > 8)
1184 sse_reg -= reg_count;
1185 else
1186 arg_stored = 0;
1187 } else if (mode == x86_64_mode_integer) {
1188 if (gen_reg > REGN)
1189 gen_reg -= reg_count;
1190 else
1191 arg_stored = 0;
1194 if (arg_stored) {
1195 /* allocate the necessary size on stack */
1196 o(0x48);
1197 oad(0xec81, size); /* sub $xxx, %rsp */
1198 /* generate structure store */
1199 r = get_reg(RC_INT);
1200 orex(1, r, 0, 0x89); /* mov %rsp, r */
1201 o(0xe0 + REG_VALUE(r));
1202 vset(&vtop->type, r | VT_LVAL, 0);
1203 vswap();
1204 vstore();
1205 args_size += size;
1207 break;
1209 case VT_LDOUBLE:
1210 assert(0);
1211 break;
1213 case VT_FLOAT:
1214 case VT_DOUBLE:
1215 assert(mode == x86_64_mode_sse);
1216 if (sse_reg > 8) {
1217 --sse_reg;
1218 r = gv(RC_FLOAT);
1219 o(0x50); /* push $rax */
1220 /* movq %xmmN, (%rsp) */
1221 o(0xd60f66);
1222 o(0x04 + REG_VALUE(r)*8);
1223 o(0x24);
1224 args_size += size;
1225 } else {
1226 arg_stored = 0;
1228 break;
1230 default:
1231 assert(mode == x86_64_mode_integer);
1232 /* simple type */
1233 /* XXX: implicit cast ? */
1234 if (gen_reg > REGN) {
1235 --gen_reg;
1236 r = gv(RC_INT);
1237 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1238 args_size += size;
1239 } else {
1240 arg_stored = 0;
1242 break;
1245 /* And swap the argument back to it's original position. */
1246 tmp = vtop[0];
1247 vtop[0] = vtop[-i];
1248 vtop[-i] = tmp;
1250 if (arg_stored) {
1251 vrotb(i+1);
1252 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1253 vpop();
1254 --nb_args;
1255 --run_end;
1256 } else {
1257 ++i;
1261 /* handle 16 byte aligned arguments at end of run */
1262 run_start = i = run_end;
1263 while (i < nb_args) {
1264 /* Rotate argument to top since it will always be popped */
1265 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1266 if (align != 16)
1267 break;
1269 vrotb(i+1);
1271 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1272 gv(RC_ST0);
1273 oad(0xec8148, size); /* sub $xxx, %rsp */
1274 o(0x7cdb); /* fstpt 0(%rsp) */
1275 g(0x24);
1276 g(0x00);
1277 args_size += size;
1278 } else {
1279 assert(mode == x86_64_mode_memory);
1281 /* allocate the necessary size on stack */
1282 o(0x48);
1283 oad(0xec81, size); /* sub $xxx, %rsp */
1284 /* generate structure store */
1285 r = get_reg(RC_INT);
1286 orex(1, r, 0, 0x89); /* mov %rsp, r */
1287 o(0xe0 + REG_VALUE(r));
1288 vset(&vtop->type, r | VT_LVAL, 0);
1289 vswap();
1290 vstore();
1291 args_size += size;
1294 vpop();
1295 --nb_args;
1299 /* XXX This should be superfluous. */
1300 save_regs(0); /* save used temporary registers */
1302 /* then, we prepare register passing arguments.
1303 Note that we cannot set RDX and RCX in this loop because gv()
1304 may break these temporary registers. Let's use R10 and R11
1305 instead of them */
1306 assert(gen_reg <= REGN);
1307 assert(sse_reg <= 8);
1308 for(i = 0; i < nb_args; i++) {
1309 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1310 /* Alter stack entry type so that gv() knows how to treat it */
1311 vtop->type = type;
1312 if (mode == x86_64_mode_sse) {
1313 if (reg_count == 2) {
1314 sse_reg -= 2;
1315 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1316 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1317 /* movaps %xmm0, %xmmN */
1318 o(0x280f);
1319 o(0xc0 + (sse_reg << 3));
1320 /* movaps %xmm1, %xmmN */
1321 o(0x280f);
1322 o(0xc1 + ((sse_reg+1) << 3));
1324 } else {
1325 assert(reg_count == 1);
1326 --sse_reg;
1327 /* Load directly to register */
1328 gv(RC_XMM0 << sse_reg);
1330 } else if (mode == x86_64_mode_integer) {
1331 /* simple type */
1332 /* XXX: implicit cast ? */
1333 gen_reg -= reg_count;
1334 r = gv(RC_INT);
1335 int d = arg_prepare_reg(gen_reg);
1336 orex(1,d,r,0x89); /* mov */
1337 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1338 if (reg_count == 2) {
1339 d = arg_prepare_reg(gen_reg+1);
1340 orex(1,d,vtop->r2,0x89); /* mov */
1341 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1344 vtop--;
1346 assert(gen_reg == 0);
1347 assert(sse_reg == 0);
1349 /* We shouldn't have many operands on the stack anymore, but the
1350 call address itself is still there, and it might be in %eax
1351 (or edx/ecx) currently, which the below writes would clobber.
1352 So evict all remaining operands here. */
1353 save_regs(0);
1355 /* Copy R10 and R11 into RDX and RCX, respectively */
1356 if (nb_reg_args > 2) {
1357 o(0xd2894c); /* mov %r10, %rdx */
1358 if (nb_reg_args > 3) {
1359 o(0xd9894c); /* mov %r11, %rcx */
1363 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1364 gcall_or_jmp(0);
1365 if (args_size)
1366 gadd_sp(args_size);
1367 vtop--;
1371 #define FUNC_PROLOG_SIZE 11
1373 static void push_arg_reg(int i) {
1374 loc -= 8;
1375 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1378 /* generate function prolog of type 't' */
1379 void gfunc_prolog(CType *func_type)
1381 X86_64_Mode mode;
1382 int i, addr, align, size, reg_count;
1383 int param_addr = 0, reg_param_index, sse_param_index;
1384 Sym *sym;
1385 CType *type;
1387 sym = func_type->ref;
1388 addr = PTR_SIZE * 2;
1389 loc = 0;
1390 ind += FUNC_PROLOG_SIZE;
1391 func_sub_sp_offset = ind;
1392 func_ret_sub = 0;
1394 if (func_type->ref->c == FUNC_ELLIPSIS) {
1395 int seen_reg_num, seen_sse_num, seen_stack_size;
1396 seen_reg_num = seen_sse_num = 0;
1397 /* frame pointer and return address */
1398 seen_stack_size = PTR_SIZE * 2;
1399 /* count the number of seen parameters */
1400 sym = func_type->ref;
1401 while ((sym = sym->next) != NULL) {
1402 type = &sym->type;
1403 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1404 switch (mode) {
1405 default:
1406 stack_arg:
1407 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1408 break;
1410 case x86_64_mode_integer:
1411 if (seen_reg_num + reg_count <= 8) {
1412 seen_reg_num += reg_count;
1413 } else {
1414 seen_reg_num = 8;
1415 goto stack_arg;
1417 break;
1419 case x86_64_mode_sse:
1420 if (seen_sse_num + reg_count <= 8) {
1421 seen_sse_num += reg_count;
1422 } else {
1423 seen_sse_num = 8;
1424 goto stack_arg;
1426 break;
1430 loc -= 16;
1431 /* movl $0x????????, -0x10(%rbp) */
1432 o(0xf045c7);
1433 gen_le32(seen_reg_num * 8);
1434 /* movl $0x????????, -0xc(%rbp) */
1435 o(0xf445c7);
1436 gen_le32(seen_sse_num * 16 + 48);
1437 /* movl $0x????????, -0x8(%rbp) */
1438 o(0xf845c7);
1439 gen_le32(seen_stack_size);
1441 /* save all register passing arguments */
1442 for (i = 0; i < 8; i++) {
1443 loc -= 16;
1444 o(0xd60f66); /* movq */
1445 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1446 /* movq $0, loc+8(%rbp) */
1447 o(0x85c748);
1448 gen_le32(loc + 8);
1449 gen_le32(0);
1451 for (i = 0; i < REGN; i++) {
1452 push_arg_reg(REGN-1-i);
1456 sym = func_type->ref;
1457 reg_param_index = 0;
1458 sse_param_index = 0;
1460 /* if the function returns a structure, then add an
1461 implicit pointer parameter */
1462 func_vt = sym->type;
1463 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1464 if (mode == x86_64_mode_memory) {
1465 push_arg_reg(reg_param_index);
1466 func_vc = loc;
1467 reg_param_index++;
1469 /* define parameters */
1470 while ((sym = sym->next) != NULL) {
1471 type = &sym->type;
1472 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1473 switch (mode) {
1474 case x86_64_mode_sse:
1475 if (sse_param_index + reg_count <= 8) {
1476 /* save arguments passed by register */
1477 loc -= reg_count * 8;
1478 param_addr = loc;
1479 for (i = 0; i < reg_count; ++i) {
1480 o(0xd60f66); /* movq */
1481 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1482 ++sse_param_index;
1484 } else {
1485 addr = (addr + align - 1) & -align;
1486 param_addr = addr;
1487 addr += size;
1488 sse_param_index += reg_count;
1490 break;
1492 case x86_64_mode_memory:
1493 case x86_64_mode_x87:
1494 addr = (addr + align - 1) & -align;
1495 param_addr = addr;
1496 addr += size;
1497 break;
1499 case x86_64_mode_integer: {
1500 if (reg_param_index + reg_count <= REGN) {
1501 /* save arguments passed by register */
1502 loc -= reg_count * 8;
1503 param_addr = loc;
1504 for (i = 0; i < reg_count; ++i) {
1505 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1506 ++reg_param_index;
1508 } else {
1509 addr = (addr + align - 1) & -align;
1510 param_addr = addr;
1511 addr += size;
1512 reg_param_index += reg_count;
1514 break;
1516 default: break; /* nothing to be done for x86_64_mode_none */
1518 sym_push(sym->v & ~SYM_FIELD, type,
1519 VT_LOCAL | VT_LVAL, param_addr);
1523 /* generate function epilog */
1524 void gfunc_epilog(void)
1526 int v, saved_ind;
1528 o(0xc9); /* leave */
1529 if (func_ret_sub == 0) {
1530 o(0xc3); /* ret */
1531 } else {
1532 o(0xc2); /* ret n */
1533 g(func_ret_sub);
1534 g(func_ret_sub >> 8);
1536 /* align local size to word & save local variables */
1537 v = (-loc + 15) & -16;
1538 saved_ind = ind;
1539 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1540 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1541 o(0xec8148); /* sub rsp, stacksize */
1542 gen_le32(v);
1543 ind = saved_ind;
1546 #endif /* not PE */
1548 /* generate a jump to a label */
1549 int gjmp(int t)
1551 return psym(0xe9, t);
1554 /* generate a jump to a fixed address */
1555 void gjmp_addr(int a)
1557 int r;
1558 r = a - ind - 2;
1559 if (r == (char)r) {
1560 g(0xeb);
1561 g(r);
1562 } else {
1563 oad(0xe9, a - ind - 5);
1567 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1568 int gtst(int inv, int t)
1570 int v, *p;
1572 v = vtop->r & VT_VALMASK;
1573 if (v == VT_CMP) {
1574 /* fast case : can jump directly since flags are set */
1575 if (vtop->c.i & 0x100)
1577 /* This was a float compare. If the parity flag is set
1578 the result was unordered. For anything except != this
1579 means false and we don't jump (anding both conditions).
1580 For != this means true (oring both).
1581 Take care about inverting the test. We need to jump
1582 to our target if the result was unordered and test wasn't NE,
1583 otherwise if unordered we don't want to jump. */
1584 vtop->c.i &= ~0x100;
1585 if (!inv == (vtop->c.i != TOK_NE))
1586 o(0x067a); /* jp +6 */
1587 else
1589 g(0x0f);
1590 t = psym(0x8a, t); /* jp t */
1593 g(0x0f);
1594 t = psym((vtop->c.i - 16) ^ inv, t);
1595 } else if (v == VT_JMP || v == VT_JMPI) {
1596 /* && or || optimization */
1597 if ((v & 1) == inv) {
1598 /* insert vtop->c jump list in t */
1599 p = &vtop->c.i;
1600 while (*p != 0)
1601 p = (int *)(cur_text_section->data + *p);
1602 *p = t;
1603 t = vtop->c.i;
1604 } else {
1605 t = gjmp(t);
1606 gsym(vtop->c.i);
1609 vtop--;
1610 return t;
1613 /* generate an integer binary operation */
1614 void gen_opi(int op)
1616 int r, fr, opc, c;
1617 int ll, uu, cc;
1619 ll = is64_type(vtop[-1].type.t);
1620 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1621 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1623 switch(op) {
1624 case '+':
1625 case TOK_ADDC1: /* add with carry generation */
1626 opc = 0;
1627 gen_op8:
1628 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1629 /* constant case */
1630 vswap();
1631 r = gv(RC_INT);
1632 vswap();
1633 c = vtop->c.i;
1634 if (c == (char)c) {
1635 /* XXX: generate inc and dec for smaller code ? */
1636 orex(ll, r, 0, 0x83);
1637 o(0xc0 | (opc << 3) | REG_VALUE(r));
1638 g(c);
1639 } else {
1640 orex(ll, r, 0, 0x81);
1641 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1643 } else {
1644 gv2(RC_INT, RC_INT);
1645 r = vtop[-1].r;
1646 fr = vtop[0].r;
1647 orex(ll, r, fr, (opc << 3) | 0x01);
1648 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1650 vtop--;
1651 if (op >= TOK_ULT && op <= TOK_GT) {
1652 vtop->r = VT_CMP;
1653 vtop->c.i = op;
1655 break;
1656 case '-':
1657 case TOK_SUBC1: /* sub with carry generation */
1658 opc = 5;
1659 goto gen_op8;
1660 case TOK_ADDC2: /* add with carry use */
1661 opc = 2;
1662 goto gen_op8;
1663 case TOK_SUBC2: /* sub with carry use */
1664 opc = 3;
1665 goto gen_op8;
1666 case '&':
1667 opc = 4;
1668 goto gen_op8;
1669 case '^':
1670 opc = 6;
1671 goto gen_op8;
1672 case '|':
1673 opc = 1;
1674 goto gen_op8;
1675 case '*':
1676 gv2(RC_INT, RC_INT);
1677 r = vtop[-1].r;
1678 fr = vtop[0].r;
1679 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1680 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1681 vtop--;
1682 break;
1683 case TOK_SHL:
1684 opc = 4;
1685 goto gen_shift;
1686 case TOK_SHR:
1687 opc = 5;
1688 goto gen_shift;
1689 case TOK_SAR:
1690 opc = 7;
1691 gen_shift:
1692 opc = 0xc0 | (opc << 3);
1693 if (cc) {
1694 /* constant case */
1695 vswap();
1696 r = gv(RC_INT);
1697 vswap();
1698 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1699 o(opc | REG_VALUE(r));
1700 g(vtop->c.i & (ll ? 63 : 31));
1701 } else {
1702 /* we generate the shift in ecx */
1703 gv2(RC_INT, RC_RCX);
1704 r = vtop[-1].r;
1705 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1706 o(opc | REG_VALUE(r));
1708 vtop--;
1709 break;
1710 case TOK_UDIV:
1711 case TOK_UMOD:
1712 uu = 1;
1713 goto divmod;
1714 case '/':
1715 case '%':
1716 case TOK_PDIV:
1717 uu = 0;
1718 divmod:
1719 /* first operand must be in eax */
1720 /* XXX: need better constraint for second operand */
1721 gv2(RC_RAX, RC_RCX);
1722 r = vtop[-1].r;
1723 fr = vtop[0].r;
1724 vtop--;
1725 save_reg(TREG_RDX);
1726 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1727 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1728 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1729 if (op == '%' || op == TOK_UMOD)
1730 r = TREG_RDX;
1731 else
1732 r = TREG_RAX;
1733 vtop->r = r;
1734 break;
1735 default:
1736 opc = 7;
1737 goto gen_op8;
1741 void gen_opl(int op)
1743 gen_opi(op);
1746 /* generate a floating point operation 'v = t1 op t2' instruction. The
1747 two operands are guaranted to have the same floating point type */
1748 /* XXX: need to use ST1 too */
1749 void gen_opf(int op)
1751 int a, ft, fc, swapped, r;
1752 int float_type =
1753 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1755 /* convert constants to memory references */
1756 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1757 vswap();
1758 gv(float_type);
1759 vswap();
1761 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1762 gv(float_type);
1764 /* must put at least one value in the floating point register */
1765 if ((vtop[-1].r & VT_LVAL) &&
1766 (vtop[0].r & VT_LVAL)) {
1767 vswap();
1768 gv(float_type);
1769 vswap();
1771 swapped = 0;
1772 /* swap the stack if needed so that t1 is the register and t2 is
1773 the memory reference */
1774 if (vtop[-1].r & VT_LVAL) {
1775 vswap();
1776 swapped = 1;
1778 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1779 if (op >= TOK_ULT && op <= TOK_GT) {
1780 /* load on stack second operand */
1781 load(TREG_ST0, vtop);
1782 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1783 if (op == TOK_GE || op == TOK_GT)
1784 swapped = !swapped;
1785 else if (op == TOK_EQ || op == TOK_NE)
1786 swapped = 0;
1787 if (swapped)
1788 o(0xc9d9); /* fxch %st(1) */
1789 if (op == TOK_EQ || op == TOK_NE)
1790 o(0xe9da); /* fucompp */
1791 else
1792 o(0xd9de); /* fcompp */
1793 o(0xe0df); /* fnstsw %ax */
1794 if (op == TOK_EQ) {
1795 o(0x45e480); /* and $0x45, %ah */
1796 o(0x40fC80); /* cmp $0x40, %ah */
1797 } else if (op == TOK_NE) {
1798 o(0x45e480); /* and $0x45, %ah */
1799 o(0x40f480); /* xor $0x40, %ah */
1800 op = TOK_NE;
1801 } else if (op == TOK_GE || op == TOK_LE) {
1802 o(0x05c4f6); /* test $0x05, %ah */
1803 op = TOK_EQ;
1804 } else {
1805 o(0x45c4f6); /* test $0x45, %ah */
1806 op = TOK_EQ;
1808 vtop--;
1809 vtop->r = VT_CMP;
1810 vtop->c.i = op;
1811 } else {
1812 /* no memory reference possible for long double operations */
1813 load(TREG_ST0, vtop);
1814 swapped = !swapped;
1816 switch(op) {
1817 default:
1818 case '+':
1819 a = 0;
1820 break;
1821 case '-':
1822 a = 4;
1823 if (swapped)
1824 a++;
1825 break;
1826 case '*':
1827 a = 1;
1828 break;
1829 case '/':
1830 a = 6;
1831 if (swapped)
1832 a++;
1833 break;
1835 ft = vtop->type.t;
1836 fc = vtop->c.ul;
1837 o(0xde); /* fxxxp %st, %st(1) */
1838 o(0xc1 + (a << 3));
1839 vtop--;
1841 } else {
1842 if (op >= TOK_ULT && op <= TOK_GT) {
1843 /* if saved lvalue, then we must reload it */
1844 r = vtop->r;
1845 fc = vtop->c.ul;
1846 if ((r & VT_VALMASK) == VT_LLOCAL) {
1847 SValue v1;
1848 r = get_reg(RC_INT);
1849 v1.type.t = VT_PTR;
1850 v1.r = VT_LOCAL | VT_LVAL;
1851 v1.c.ul = fc;
1852 load(r, &v1);
1853 fc = 0;
1856 if (op == TOK_EQ || op == TOK_NE) {
1857 swapped = 0;
1858 } else {
1859 if (op == TOK_LE || op == TOK_LT)
1860 swapped = !swapped;
1861 if (op == TOK_LE || op == TOK_GE) {
1862 op = 0x93; /* setae */
1863 } else {
1864 op = 0x97; /* seta */
1868 if (swapped) {
1869 gv(RC_FLOAT);
1870 vswap();
1872 assert(!(vtop[-1].r & VT_LVAL));
1874 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1875 o(0x66);
1876 if (op == TOK_EQ || op == TOK_NE)
1877 o(0x2e0f); /* ucomisd */
1878 else
1879 o(0x2f0f); /* comisd */
1881 if (vtop->r & VT_LVAL) {
1882 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1883 } else {
1884 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1887 vtop--;
1888 vtop->r = VT_CMP;
1889 vtop->c.i = op | 0x100;
1890 } else {
1891 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1892 switch(op) {
1893 default:
1894 case '+':
1895 a = 0;
1896 break;
1897 case '-':
1898 a = 4;
1899 break;
1900 case '*':
1901 a = 1;
1902 break;
1903 case '/':
1904 a = 6;
1905 break;
1907 ft = vtop->type.t;
1908 fc = vtop->c.ul;
1909 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1911 r = vtop->r;
1912 /* if saved lvalue, then we must reload it */
1913 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1914 SValue v1;
1915 r = get_reg(RC_INT);
1916 v1.type.t = VT_PTR;
1917 v1.r = VT_LOCAL | VT_LVAL;
1918 v1.c.ul = fc;
1919 load(r, &v1);
1920 fc = 0;
1923 assert(!(vtop[-1].r & VT_LVAL));
1924 if (swapped) {
1925 assert(vtop->r & VT_LVAL);
1926 gv(RC_FLOAT);
1927 vswap();
1930 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1931 o(0xf2);
1932 } else {
1933 o(0xf3);
1935 o(0x0f);
1936 o(0x58 + a);
1938 if (vtop->r & VT_LVAL) {
1939 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1940 } else {
1941 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1944 vtop--;
1949 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1950 and 'long long' cases. */
1951 void gen_cvt_itof(int t)
1953 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1954 save_reg(TREG_ST0);
1955 gv(RC_INT);
1956 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1957 /* signed long long to float/double/long double (unsigned case
1958 is handled generically) */
1959 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1960 o(0x242cdf); /* fildll (%rsp) */
1961 o(0x08c48348); /* add $8, %rsp */
1962 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1963 (VT_INT | VT_UNSIGNED)) {
1964 /* unsigned int to float/double/long double */
1965 o(0x6a); /* push $0 */
1966 g(0x00);
1967 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1968 o(0x242cdf); /* fildll (%rsp) */
1969 o(0x10c48348); /* add $16, %rsp */
1970 } else {
1971 /* int to float/double/long double */
1972 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1973 o(0x2404db); /* fildl (%rsp) */
1974 o(0x08c48348); /* add $8, %rsp */
1976 vtop->r = TREG_ST0;
1977 } else {
1978 int r = get_reg(RC_FLOAT);
1979 gv(RC_INT);
1980 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1981 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1982 (VT_INT | VT_UNSIGNED) ||
1983 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1984 o(0x48); /* REX */
1986 o(0x2a0f);
1987 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1988 vtop->r = r;
1992 /* convert from one floating point type to another */
1993 void gen_cvt_ftof(int t)
1995 int ft, bt, tbt;
1997 ft = vtop->type.t;
1998 bt = ft & VT_BTYPE;
1999 tbt = t & VT_BTYPE;
2001 if (bt == VT_FLOAT) {
2002 gv(RC_FLOAT);
2003 if (tbt == VT_DOUBLE) {
2004 o(0x140f); /* unpcklps */
2005 o(0xc0 + REG_VALUE(vtop->r)*9);
2006 o(0x5a0f); /* cvtps2pd */
2007 o(0xc0 + REG_VALUE(vtop->r)*9);
2008 } else if (tbt == VT_LDOUBLE) {
2009 save_reg(RC_ST0);
2010 /* movss %xmm0,-0x10(%rsp) */
2011 o(0x110ff3);
2012 o(0x44 + REG_VALUE(vtop->r)*8);
2013 o(0xf024);
2014 o(0xf02444d9); /* flds -0x10(%rsp) */
2015 vtop->r = TREG_ST0;
2017 } else if (bt == VT_DOUBLE) {
2018 gv(RC_FLOAT);
2019 if (tbt == VT_FLOAT) {
2020 o(0x140f66); /* unpcklpd */
2021 o(0xc0 + REG_VALUE(vtop->r)*9);
2022 o(0x5a0f66); /* cvtpd2ps */
2023 o(0xc0 + REG_VALUE(vtop->r)*9);
2024 } else if (tbt == VT_LDOUBLE) {
2025 save_reg(RC_ST0);
2026 /* movsd %xmm0,-0x10(%rsp) */
2027 o(0x110ff2);
2028 o(0x44 + REG_VALUE(vtop->r)*8);
2029 o(0xf024);
2030 o(0xf02444dd); /* fldl -0x10(%rsp) */
2031 vtop->r = TREG_ST0;
2033 } else {
2034 int r;
2035 gv(RC_ST0);
2036 r = get_reg(RC_FLOAT);
2037 if (tbt == VT_DOUBLE) {
2038 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2039 /* movsd -0x10(%rsp),%xmm0 */
2040 o(0x100ff2);
2041 o(0x44 + REG_VALUE(r)*8);
2042 o(0xf024);
2043 vtop->r = r;
2044 } else if (tbt == VT_FLOAT) {
2045 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2046 /* movss -0x10(%rsp),%xmm0 */
2047 o(0x100ff3);
2048 o(0x44 + REG_VALUE(r)*8);
2049 o(0xf024);
2050 vtop->r = r;
2055 /* convert fp to int 't' type */
2056 void gen_cvt_ftoi(int t)
2058 int ft, bt, size, r;
2059 ft = vtop->type.t;
2060 bt = ft & VT_BTYPE;
2061 if (bt == VT_LDOUBLE) {
2062 gen_cvt_ftof(VT_DOUBLE);
2063 bt = VT_DOUBLE;
2066 gv(RC_FLOAT);
2067 if (t != VT_INT)
2068 size = 8;
2069 else
2070 size = 4;
2072 r = get_reg(RC_INT);
2073 if (bt == VT_FLOAT) {
2074 o(0xf3);
2075 } else if (bt == VT_DOUBLE) {
2076 o(0xf2);
2077 } else {
2078 assert(0);
2080 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2081 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2082 vtop->r = r;
2085 /* computed goto support */
2086 void ggoto(void)
2088 gcall_or_jmp(1);
2089 vtop--;
2092 /* Save the stack pointer onto the stack and return the location of its address */
2093 ST_FUNC void gen_vla_sp_save(int addr) {
2094 /* mov %rsp,addr(%rbp)*/
2095 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2098 /* Restore the SP from a location on the stack */
2099 ST_FUNC void gen_vla_sp_restore(int addr) {
2100 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2103 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2104 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2105 #ifdef TCC_TARGET_PE
2106 /* alloca does more than just adjust %rsp on Windows */
2107 vpush_global_sym(&func_old_type, TOK_alloca);
2108 vswap(); /* Move alloca ref past allocation size */
2109 gfunc_call(1);
2110 vset(type, REG_IRET, 0);
2111 #else
2112 int r;
2113 r = gv(RC_INT); /* allocation size */
2114 /* sub r,%rsp */
2115 o(0x2b48);
2116 o(0xe0 | REG_VALUE(r));
2117 /* We align to 16 bytes rather than align */
2118 /* and ~15, %rsp */
2119 o(0xf0e48348);
2120 /* mov %rsp, r */
2121 o(0x8948);
2122 o(0xe0 | REG_VALUE(r));
2123 vpop();
2124 vset(type, r, 0);
2125 #endif
2129 /* end of x86-64 code generator */
2130 /*************************************************************/
2131 #endif /* ! TARGET_DEFS_ONLY */
2132 /******************************************************/