Add support for load/store of _Bool value
[tinycc.git] / x86_64-gen.c
blobd1bf75c5ebed4ad519cd01658a9ead18391f1045
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSP = 4,
61 TREG_RSI = 6,
62 TREG_RDI = 7,
64 TREG_R8 = 8,
65 TREG_R9 = 9,
66 TREG_R10 = 10,
67 TREG_R11 = 11,
69 TREG_XMM0 = 16,
70 TREG_XMM1 = 17,
71 TREG_XMM2 = 18,
72 TREG_XMM3 = 19,
73 TREG_XMM4 = 20,
74 TREG_XMM5 = 21,
75 TREG_XMM6 = 22,
76 TREG_XMM7 = 23,
78 TREG_ST0 = 24,
80 TREG_MEM = 0x20,
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
96 #define PTR_SIZE 8
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
102 #define MAX_ALIGN 16
104 /******************************************************/
105 /* ELF defines */
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x08048000
116 #define ELF_PAGE_SIZE 0x1000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
121 #include "tcc.h"
122 #include <assert.h>
124 ST_DATA const int reg_classes[NB_REGS] = {
125 /* eax */ RC_INT | RC_RAX,
126 /* ecx */ RC_INT | RC_RCX,
127 /* edx */ RC_INT | RC_RDX,
133 RC_R8,
134 RC_R9,
135 RC_R10,
136 RC_R11,
141 /* xmm0 */ RC_FLOAT | RC_XMM0,
142 /* xmm1 */ RC_FLOAT | RC_XMM1,
143 /* xmm2 */ RC_FLOAT | RC_XMM2,
144 /* xmm3 */ RC_FLOAT | RC_XMM3,
145 /* xmm4 */ RC_FLOAT | RC_XMM4,
146 /* xmm5 */ RC_FLOAT | RC_XMM5,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
150 RC_XMM6,
151 RC_XMM7,
152 /* st0 */ RC_ST0
155 static unsigned long func_sub_sp_offset;
156 static int func_ret_sub;
158 /* XXX: make it faster ? */
159 void g(int c)
161 int ind1;
162 ind1 = ind + 1;
163 if (ind1 > cur_text_section->data_allocated)
164 section_realloc(cur_text_section, ind1);
165 cur_text_section->data[ind] = c;
166 ind = ind1;
169 void o(unsigned int c)
171 while (c) {
172 g(c);
173 c = c >> 8;
177 void gen_le16(int v)
179 g(v);
180 g(v >> 8);
183 void gen_le32(int c)
185 g(c);
186 g(c >> 8);
187 g(c >> 16);
188 g(c >> 24);
191 void gen_le64(int64_t c)
193 g(c);
194 g(c >> 8);
195 g(c >> 16);
196 g(c >> 24);
197 g(c >> 32);
198 g(c >> 40);
199 g(c >> 48);
200 g(c >> 56);
203 void orex(int ll, int r, int r2, int b)
205 if ((r & VT_VALMASK) >= VT_CONST)
206 r = 0;
207 if ((r2 & VT_VALMASK) >= VT_CONST)
208 r2 = 0;
209 if (ll || REX_BASE(r) || REX_BASE(r2))
210 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
211 o(b);
214 /* output a symbol and patch all calls to it */
215 void gsym_addr(int t, int a)
217 int n, *ptr;
218 while (t) {
219 ptr = (int *)(cur_text_section->data + t);
220 n = *ptr; /* next value */
221 *ptr = a - t - 4;
222 t = n;
226 void gsym(int t)
228 gsym_addr(t, ind);
231 /* psym is used to put an instruction with a data field which is a
232 reference to a symbol. It is in fact the same as oad ! */
233 #define psym oad
235 static int is64_type(int t)
237 return ((t & VT_BTYPE) == VT_PTR ||
238 (t & VT_BTYPE) == VT_FUNC ||
239 (t & VT_BTYPE) == VT_LLONG);
242 static int is_sse_float(int t) {
243 int bt;
244 bt = t & VT_BTYPE;
245 return bt == VT_DOUBLE || bt == VT_FLOAT;
249 /* instruction + 4 bytes data. Return the address of the data */
250 ST_FUNC int oad(int c, int s)
252 int ind1;
254 o(c);
255 ind1 = ind + 4;
256 if (ind1 > cur_text_section->data_allocated)
257 section_realloc(cur_text_section, ind1);
258 *(int *)(cur_text_section->data + ind) = s;
259 s = ind;
260 ind = ind1;
261 return s;
264 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
266 if (r & VT_SYM)
267 greloc(cur_text_section, sym, ind, R_X86_64_32);
268 gen_le32(c);
271 /* output constant with relocation if 'r & VT_SYM' is true */
272 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
274 if (r & VT_SYM)
275 greloc(cur_text_section, sym, ind, R_X86_64_64);
276 gen_le64(c);
279 /* output constant with relocation if 'r & VT_SYM' is true */
280 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
282 if (r & VT_SYM)
283 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
284 gen_le32(c-4);
287 /* output got address with relocation */
288 static void gen_gotpcrel(int r, Sym *sym, int c)
290 #ifndef TCC_TARGET_PE
291 Section *sr;
292 ElfW(Rela) *rel;
293 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
294 sr = cur_text_section->reloc;
295 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
296 rel->r_addend = -4;
297 #else
298 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
299 cur_text_section->data[ind-3],
300 cur_text_section->data[ind-2],
301 cur_text_section->data[ind-1]
303 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
304 #endif
305 gen_le32(0);
306 if (c) {
307 /* we use add c, %xxx for displacement */
308 orex(1, r, 0, 0x81);
309 o(0xc0 + REG_VALUE(r));
310 gen_le32(c);
314 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
316 op_reg = REG_VALUE(op_reg) << 3;
317 if ((r & VT_VALMASK) == VT_CONST) {
318 /* constant memory reference */
319 o(0x05 | op_reg);
320 if (is_got) {
321 gen_gotpcrel(r, sym, c);
322 } else {
323 gen_addrpc32(r, sym, c);
325 } else if ((r & VT_VALMASK) == VT_LOCAL) {
326 /* currently, we use only ebp as base */
327 if (c == (char)c) {
328 /* short reference */
329 o(0x45 | op_reg);
330 g(c);
331 } else {
332 oad(0x85 | op_reg, c);
334 } else if ((r & VT_VALMASK) >= TREG_MEM) {
335 if (c) {
336 g(0x80 | op_reg | REG_VALUE(r));
337 gen_le32(c);
338 } else {
339 g(0x00 | op_reg | REG_VALUE(r));
341 } else {
342 g(0x00 | op_reg | REG_VALUE(r));
346 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
347 opcode bits */
348 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
350 gen_modrm_impl(op_reg, r, sym, c, 0);
353 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
354 opcode bits */
355 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
357 int is_got;
358 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
359 orex(1, r, op_reg, opcode);
360 gen_modrm_impl(op_reg, r, sym, c, is_got);
364 /* load 'r' from value 'sv' */
365 void load(int r, SValue *sv)
367 int v, t, ft, fc, fr;
368 SValue v1;
370 #ifdef TCC_TARGET_PE
371 SValue v2;
372 sv = pe_getimport(sv, &v2);
373 #endif
375 fr = sv->r;
376 ft = sv->type.t;
377 fc = sv->c.ul;
379 #ifndef TCC_TARGET_PE
380 /* we use indirect access via got */
381 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
382 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
383 /* use the result register as a temporal register */
384 int tr = r | TREG_MEM;
385 if (is_float(ft)) {
386 /* we cannot use float registers as a temporal register */
387 tr = get_reg(RC_INT) | TREG_MEM;
389 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
391 /* load from the temporal register */
392 fr = tr | VT_LVAL;
394 #endif
396 v = fr & VT_VALMASK;
397 if (fr & VT_LVAL) {
398 int b, ll;
399 if (v == VT_LLOCAL) {
400 v1.type.t = VT_PTR;
401 v1.r = VT_LOCAL | VT_LVAL;
402 v1.c.ul = fc;
403 fr = r;
404 if (!(reg_classes[fr] & RC_INT))
405 fr = get_reg(RC_INT);
406 load(fr, &v1);
408 ll = 0;
409 if ((ft & VT_BTYPE) == VT_FLOAT) {
410 b = 0x6e0f66;
411 r = REG_VALUE(r); /* movd */
412 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
413 b = 0x7e0ff3; /* movq */
414 r = REG_VALUE(r);
415 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
416 b = 0xdb, r = 5; /* fldt */
417 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
418 b = 0xbe0f; /* movsbl */
419 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
420 b = 0xb60f; /* movzbl */
421 } else if ((ft & VT_TYPE) == VT_SHORT) {
422 b = 0xbf0f; /* movswl */
423 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
424 b = 0xb70f; /* movzwl */
425 } else {
426 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
427 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
428 || ((ft & VT_BTYPE) == VT_FUNC));
429 ll = is64_type(ft);
430 b = 0x8b;
432 if (ll) {
433 gen_modrm64(b, r, fr, sv->sym, fc);
434 } else {
435 orex(ll, fr, r, b);
436 gen_modrm(r, fr, sv->sym, fc);
438 } else {
439 if (v == VT_CONST) {
440 if (fr & VT_SYM) {
441 #ifdef TCC_TARGET_PE
442 orex(1,0,r,0x8d);
443 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
444 gen_addrpc32(fr, sv->sym, fc);
445 #else
446 if (sv->sym->type.t & VT_STATIC) {
447 orex(1,0,r,0x8d);
448 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
449 gen_addrpc32(fr, sv->sym, fc);
450 } else {
451 orex(1,0,r,0x8b);
452 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
453 gen_gotpcrel(r, sv->sym, fc);
455 #endif
456 } else if (is64_type(ft)) {
457 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
458 gen_le64(sv->c.ull);
459 } else {
460 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
461 gen_le32(fc);
463 } else if (v == VT_LOCAL) {
464 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
465 gen_modrm(r, VT_LOCAL, sv->sym, fc);
466 } else if (v == VT_CMP) {
467 orex(0,r,0,0);
468 if ((fc & ~0x100) != TOK_NE)
469 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
470 else
471 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
472 if (fc & 0x100)
474 /* This was a float compare. If the parity bit is
475 set the result was unordered, meaning false for everything
476 except TOK_NE, and true for TOK_NE. */
477 fc &= ~0x100;
478 o(0x037a + (REX_BASE(r) << 8));
480 orex(0,r,0, 0x0f); /* setxx %br */
481 o(fc);
482 o(0xc0 + REG_VALUE(r));
483 } else if (v == VT_JMP || v == VT_JMPI) {
484 t = v & 1;
485 orex(0,r,0,0);
486 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
487 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
488 gsym(fc);
489 orex(0,r,0,0);
490 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
491 } else if (v != r) {
492 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
493 if (v == TREG_ST0) {
494 /* gen_cvt_ftof(VT_DOUBLE); */
495 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
496 /* movsd -0x10(%rsp),%xmmN */
497 o(0x100ff2);
498 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
499 o(0xf024);
500 } else {
501 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
502 if ((ft & VT_BTYPE) == VT_FLOAT) {
503 o(0x100ff3);
504 } else {
505 assert((ft & VT_BTYPE) == VT_DOUBLE);
506 o(0x100ff2);
508 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
510 } else if (r == TREG_ST0) {
511 assert((v >= TREG_XMM0) || (v <= TREG_XMM7));
512 /* gen_cvt_ftof(VT_LDOUBLE); */
513 /* movsd %xmmN,-0x10(%rsp) */
514 o(0x110ff2);
515 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
516 o(0xf024);
517 o(0xf02444dd); /* fldl -0x10(%rsp) */
518 } else {
519 orex(1,r,v, 0x89);
520 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
526 /* store register 'r' in lvalue 'v' */
527 void store(int r, SValue *v)
529 int fr, bt, ft, fc;
530 int op64 = 0;
531 /* store the REX prefix in this variable when PIC is enabled */
532 int pic = 0;
534 #ifdef TCC_TARGET_PE
535 SValue v2;
536 v = pe_getimport(v, &v2);
537 #endif
539 ft = v->type.t;
540 fc = v->c.ul;
541 fr = v->r & VT_VALMASK;
542 bt = ft & VT_BTYPE;
544 #ifndef TCC_TARGET_PE
545 /* we need to access the variable via got */
546 if (fr == VT_CONST && (v->r & VT_SYM)) {
547 /* mov xx(%rip), %r11 */
548 o(0x1d8b4c);
549 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
550 pic = is64_type(bt) ? 0x49 : 0x41;
552 #endif
554 /* XXX: incorrect if float reg to reg */
555 if (bt == VT_FLOAT) {
556 o(0x66);
557 o(pic);
558 o(0x7e0f); /* movd */
559 r = REG_VALUE(r);
560 } else if (bt == VT_DOUBLE) {
561 o(0x66);
562 o(pic);
563 o(0xd60f); /* movq */
564 r = REG_VALUE(r);
565 } else if (bt == VT_LDOUBLE) {
566 o(0xc0d9); /* fld %st(0) */
567 o(pic);
568 o(0xdb); /* fstpt */
569 r = 7;
570 } else {
571 if (bt == VT_SHORT)
572 o(0x66);
573 o(pic);
574 if (bt == VT_BYTE || bt == VT_BOOL)
575 orex(0, 0, r, 0x88);
576 else if (is64_type(bt))
577 op64 = 0x89;
578 else
579 orex(0, 0, r, 0x89);
581 if (pic) {
582 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
583 if (op64)
584 o(op64);
585 o(3 + (r << 3));
586 } else if (op64) {
587 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
588 gen_modrm64(op64, r, v->r, v->sym, fc);
589 } else if (fr != r) {
590 /* XXX: don't we really come here? */
591 abort();
592 o(0xc0 + fr + r * 8); /* mov r, fr */
594 } else {
595 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
596 gen_modrm(r, v->r, v->sym, fc);
597 } else if (fr != r) {
598 /* XXX: don't we really come here? */
599 abort();
600 o(0xc0 + fr + r * 8); /* mov r, fr */
605 /* 'is_jmp' is '1' if it is a jump */
606 static void gcall_or_jmp(int is_jmp)
608 int r;
609 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
610 /* constant case */
611 if (vtop->r & VT_SYM) {
612 /* relocation case */
613 greloc(cur_text_section, vtop->sym,
614 ind + 1, R_X86_64_PC32);
615 } else {
616 /* put an empty PC32 relocation */
617 put_elf_reloc(symtab_section, cur_text_section,
618 ind + 1, R_X86_64_PC32, 0);
620 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
621 } else {
622 /* otherwise, indirect call */
623 r = TREG_R11;
624 load(r, vtop);
625 o(0x41); /* REX */
626 o(0xff); /* call/jmp *r */
627 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
631 #ifdef TCC_TARGET_PE
633 #define REGN 4
634 static const uint8_t arg_regs[REGN] = {
635 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
638 /* Prepare arguments in R10 and R11 rather than RCX and RDX
639 because gv() will not ever use these */
640 static int arg_prepare_reg(int idx) {
641 if (idx == 0 || idx == 1)
642 /* idx=0: r10, idx=1: r11 */
643 return idx + 10;
644 else
645 return arg_regs[idx];
648 static int func_scratch;
650 /* Generate function call. The function address is pushed first, then
651 all the parameters in call order. This functions pops all the
652 parameters and the function address. */
654 void gen_offs_sp(int b, int r, int d)
656 orex(1,0,r & 0x100 ? 0 : r, b);
657 if (d == (char)d) {
658 o(0x2444 | (REG_VALUE(r) << 3));
659 g(d);
660 } else {
661 o(0x2484 | (REG_VALUE(r) << 3));
662 gen_le32(d);
666 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
667 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align)
669 int size, align;
670 *ret_align = 1; // Never have to re-align return values for x86-64
671 size = type_size(vt, &align);
672 ret->ref = NULL;
673 if (size > 8) {
674 return 1;
675 } else if (size > 4) {
676 ret->t = VT_LLONG;
677 return 0;
678 } else if (size > 2) {
679 ret->t = VT_INT;
680 return 0;
681 } else if (size > 1) {
682 ret->t = VT_SHORT;
683 return 0;
684 } else {
685 ret->t = VT_BYTE;
686 return 0;
690 int gfunc_arg_size(CType *type) {
691 int align;
692 if (type->t & (VT_ARRAY|VT_BITFIELD))
693 return 8;
694 return type_size(type, &align);
697 void gfunc_call(int nb_args)
699 int size, r, args_size, i, d, bt, struct_size;
700 int arg;
702 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
703 arg = nb_args;
705 /* for struct arguments, we need to call memcpy and the function
706 call breaks register passing arguments we are preparing.
707 So, we process arguments which will be passed by stack first. */
708 struct_size = args_size;
709 for(i = 0; i < nb_args; i++) {
710 SValue *sv;
712 --arg;
713 sv = &vtop[-i];
714 bt = (sv->type.t & VT_BTYPE);
715 size = gfunc_arg_size(&sv->type);
717 if (size <= 8)
718 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
720 if (bt == VT_STRUCT) {
721 /* align to stack align size */
722 size = (size + 15) & ~15;
723 /* generate structure store */
724 r = get_reg(RC_INT);
725 gen_offs_sp(0x8d, r, struct_size);
726 struct_size += size;
728 /* generate memcpy call */
729 vset(&sv->type, r | VT_LVAL, 0);
730 vpushv(sv);
731 vstore();
732 --vtop;
733 } else if (bt == VT_LDOUBLE) {
734 gv(RC_ST0);
735 gen_offs_sp(0xdb, 0x107, struct_size);
736 struct_size += 16;
740 if (func_scratch < struct_size)
741 func_scratch = struct_size;
743 arg = nb_args;
744 struct_size = args_size;
746 for(i = 0; i < nb_args; i++) {
747 --arg;
748 bt = (vtop->type.t & VT_BTYPE);
750 size = gfunc_arg_size(&vtop->type);
751 if (size > 8) {
752 /* align to stack align size */
753 size = (size + 15) & ~15;
754 if (arg >= REGN) {
755 d = get_reg(RC_INT);
756 gen_offs_sp(0x8d, d, struct_size);
757 gen_offs_sp(0x89, d, arg*8);
758 } else {
759 d = arg_prepare_reg(arg);
760 gen_offs_sp(0x8d, d, struct_size);
762 struct_size += size;
763 } else {
764 if (is_sse_float(vtop->type.t)) {
765 gv(RC_XMM0); /* only use one float register */
766 if (arg >= REGN) {
767 /* movq %xmm0, j*8(%rsp) */
768 gen_offs_sp(0xd60f66, 0x100, arg*8);
769 } else {
770 /* movaps %xmm0, %xmmN */
771 o(0x280f);
772 o(0xc0 + (arg << 3));
773 d = arg_prepare_reg(arg);
774 /* mov %xmm0, %rxx */
775 o(0x66);
776 orex(1,d,0, 0x7e0f);
777 o(0xc0 + REG_VALUE(d));
779 } else {
780 if (bt == VT_STRUCT) {
781 vtop->type.ref = NULL;
782 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
783 : size > 1 ? VT_SHORT : VT_BYTE;
786 r = gv(RC_INT);
787 if (arg >= REGN) {
788 gen_offs_sp(0x89, r, arg*8);
789 } else {
790 d = arg_prepare_reg(arg);
791 orex(1,d,r,0x89); /* mov */
792 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
796 vtop--;
798 save_regs(0);
800 /* Copy R10 and R11 into RCX and RDX, respectively */
801 if (nb_args > 0) {
802 o(0xd1894c); /* mov %r10, %rcx */
803 if (nb_args > 1) {
804 o(0xda894c); /* mov %r11, %rdx */
808 gcall_or_jmp(0);
809 vtop--;
813 #define FUNC_PROLOG_SIZE 11
815 /* generate function prolog of type 't' */
816 void gfunc_prolog(CType *func_type)
818 int addr, reg_param_index, bt, size;
819 Sym *sym;
820 CType *type;
822 func_ret_sub = 0;
823 func_scratch = 0;
824 loc = 0;
826 addr = PTR_SIZE * 2;
827 ind += FUNC_PROLOG_SIZE;
828 func_sub_sp_offset = ind;
829 reg_param_index = 0;
831 sym = func_type->ref;
833 /* if the function returns a structure, then add an
834 implicit pointer parameter */
835 func_vt = sym->type;
836 size = gfunc_arg_size(&func_vt);
837 if (size > 8) {
838 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
839 func_vc = addr;
840 reg_param_index++;
841 addr += 8;
844 /* define parameters */
845 while ((sym = sym->next) != NULL) {
846 type = &sym->type;
847 bt = type->t & VT_BTYPE;
848 size = gfunc_arg_size(type);
849 if (size > 8) {
850 if (reg_param_index < REGN) {
851 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
853 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
854 } else {
855 if (reg_param_index < REGN) {
856 /* save arguments passed by register */
857 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
858 o(0xd60f66); /* movq */
859 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
860 } else {
861 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
864 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
866 addr += 8;
867 reg_param_index++;
870 while (reg_param_index < REGN) {
871 if (func_type->ref->c == FUNC_ELLIPSIS) {
872 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
873 addr += 8;
875 reg_param_index++;
879 /* generate function epilog */
880 void gfunc_epilog(void)
882 int v, saved_ind;
884 o(0xc9); /* leave */
885 if (func_ret_sub == 0) {
886 o(0xc3); /* ret */
887 } else {
888 o(0xc2); /* ret n */
889 g(func_ret_sub);
890 g(func_ret_sub >> 8);
893 saved_ind = ind;
894 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
895 /* align local size to word & save local variables */
896 v = (func_scratch + -loc + 15) & -16;
898 if (v >= 4096) {
899 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
900 oad(0xb8, v); /* mov stacksize, %eax */
901 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
902 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
903 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
904 } else {
905 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
906 o(0xec8148); /* sub rsp, stacksize */
907 gen_le32(v);
910 cur_text_section->data_offset = saved_ind;
911 pe_add_unwind_data(ind, saved_ind, v);
912 ind = cur_text_section->data_offset;
915 #else
917 static void gadd_sp(int val)
919 if (val == (char)val) {
920 o(0xc48348);
921 g(val);
922 } else {
923 oad(0xc48148, val); /* add $xxx, %rsp */
927 typedef enum X86_64_Mode {
928 x86_64_mode_none,
929 x86_64_mode_memory,
930 x86_64_mode_integer,
931 x86_64_mode_sse,
932 x86_64_mode_x87
933 } X86_64_Mode;
935 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
936 if (a == b)
937 return a;
938 else if (a == x86_64_mode_none)
939 return b;
940 else if (b == x86_64_mode_none)
941 return a;
942 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
943 return x86_64_mode_memory;
944 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
945 return x86_64_mode_integer;
946 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
947 return x86_64_mode_memory;
948 else
949 return x86_64_mode_sse;
952 static X86_64_Mode classify_x86_64_inner(CType *ty) {
953 X86_64_Mode mode;
954 Sym *f;
956 switch (ty->t & VT_BTYPE) {
957 case VT_VOID: return x86_64_mode_none;
959 case VT_INT:
960 case VT_BYTE:
961 case VT_SHORT:
962 case VT_LLONG:
963 case VT_BOOL:
964 case VT_PTR:
965 case VT_FUNC:
966 case VT_ENUM: return x86_64_mode_integer;
968 case VT_FLOAT:
969 case VT_DOUBLE: return x86_64_mode_sse;
971 case VT_LDOUBLE: return x86_64_mode_x87;
973 case VT_STRUCT:
974 f = ty->ref;
976 // Detect union
977 if (f->next && (f->c == f->next->c))
978 return x86_64_mode_memory;
980 mode = x86_64_mode_none;
981 for (; f; f = f->next)
982 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
984 return mode;
987 assert(0);
990 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) {
991 X86_64_Mode mode;
992 int size, align, ret_t;
994 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
995 *psize = 8;
996 *reg_count = 1;
997 ret_t = ty->t;
998 mode = x86_64_mode_integer;
999 } else {
1000 size = type_size(ty, &align);
1001 *psize = (size + 7) & ~7;
1002 *palign = (align + 7) & ~7;
1004 if (size > 16) {
1005 mode = x86_64_mode_memory;
1006 } else {
1007 mode = classify_x86_64_inner(ty);
1008 switch (mode) {
1009 case x86_64_mode_integer:
1010 if (size > 8) {
1011 *reg_count = 2;
1012 ret_t = VT_QLONG;
1013 } else {
1014 *reg_count = 1;
1015 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1017 break;
1019 case x86_64_mode_x87:
1020 *reg_count = 1;
1021 ret_t = VT_LDOUBLE;
1022 break;
1024 case x86_64_mode_sse:
1025 if (size > 8) {
1026 *reg_count = 2;
1027 ret_t = VT_QFLOAT;
1028 } else {
1029 *reg_count = 1;
1030 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1032 break;
1037 if (ret) {
1038 ret->ref = NULL;
1039 ret->t = ret_t;
1042 return mode;
1045 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
1046 /* This definition must be synced with stdarg.h */
1047 enum __va_arg_type {
1048 __va_gen_reg, __va_float_reg, __va_stack
1050 int size, align, reg_count;
1051 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1052 switch (mode) {
1053 default: return __va_stack;
1054 case x86_64_mode_integer: return __va_gen_reg;
1055 case x86_64_mode_sse: return __va_float_reg;
1059 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1060 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
1061 int size, align, reg_count;
1062 *ret_align = 1; // Never have to re-align return values for x86-64
1063 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) == x86_64_mode_memory);
1066 #define REGN 6
1067 static const uint8_t arg_regs[REGN] = {
1068 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1071 static int arg_prepare_reg(int idx) {
1072 if (idx == 2 || idx == 3)
1073 /* idx=2: r10, idx=3: r11 */
1074 return idx + 8;
1075 else
1076 return arg_regs[idx];
1079 /* Generate function call. The function address is pushed first, then
1080 all the parameters in call order. This functions pops all the
1081 parameters and the function address. */
1082 void gfunc_call(int nb_args)
1084 X86_64_Mode mode;
1085 CType type;
1086 int size, align, r, args_size, stack_adjust, run_start, run_end, i, j, reg_count;
1087 int nb_reg_args = 0;
1088 int nb_sse_args = 0;
1089 int sse_reg, gen_reg;
1091 /* calculate the number of integer/float register arguments */
1092 for(i = 0; i < nb_args; i++) {
1093 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1094 if (mode == x86_64_mode_sse)
1095 nb_sse_args += reg_count;
1096 else if (mode == x86_64_mode_integer)
1097 nb_reg_args += reg_count;
1100 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1101 and ended by a 16-byte aligned argument. This is because, from the point of view of
1102 the callee, argument alignment is computed from the bottom up. */
1103 /* for struct arguments, we need to call memcpy and the function
1104 call breaks register passing arguments we are preparing.
1105 So, we process arguments which will be passed by stack first. */
1106 gen_reg = nb_reg_args;
1107 sse_reg = nb_sse_args;
1108 run_start = 0;
1109 args_size = 0;
1110 while (run_start != nb_args) {
1111 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1113 run_end = nb_args;
1114 stack_adjust = 0;
1115 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1116 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1117 switch (mode) {
1118 case x86_64_mode_memory:
1119 case x86_64_mode_x87:
1120 stack_arg:
1121 if (align == 16)
1122 run_end = i;
1123 else
1124 stack_adjust += size;
1125 break;
1127 case x86_64_mode_sse:
1128 sse_reg -= reg_count;
1129 if (sse_reg + reg_count > 8) goto stack_arg;
1130 break;
1132 case x86_64_mode_integer:
1133 gen_reg -= reg_count;
1134 if (gen_reg + reg_count > REGN) goto stack_arg;
1135 break;
1139 gen_reg = run_gen_reg;
1140 sse_reg = run_sse_reg;
1142 /* adjust stack to align SSE boundary */
1143 if (stack_adjust &= 15) {
1144 /* fetch cpu flag before the following sub will change the value */
1145 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1146 gv(RC_INT);
1148 stack_adjust = 16 - stack_adjust;
1149 o(0x48);
1150 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1151 args_size += stack_adjust;
1154 for(i = run_start; i < run_end;) {
1155 /* Swap argument to top, it will possibly be changed here,
1156 and might use more temps. At the end of the loop we keep
1157 in on the stack and swap it back to its original position
1158 if it is a register. */
1159 SValue tmp = vtop[0];
1160 vtop[0] = vtop[-i];
1161 vtop[-i] = tmp;
1163 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, &reg_count);
1165 int arg_stored = 1;
1166 switch (vtop->type.t & VT_BTYPE) {
1167 case VT_STRUCT:
1168 if (mode == x86_64_mode_sse) {
1169 if (sse_reg > 8)
1170 sse_reg -= reg_count;
1171 else
1172 arg_stored = 0;
1173 } else if (mode == x86_64_mode_integer) {
1174 if (gen_reg > REGN)
1175 gen_reg -= reg_count;
1176 else
1177 arg_stored = 0;
1180 if (arg_stored) {
1181 /* allocate the necessary size on stack */
1182 o(0x48);
1183 oad(0xec81, size); /* sub $xxx, %rsp */
1184 /* generate structure store */
1185 r = get_reg(RC_INT);
1186 orex(1, r, 0, 0x89); /* mov %rsp, r */
1187 o(0xe0 + REG_VALUE(r));
1188 vset(&vtop->type, r | VT_LVAL, 0);
1189 vswap();
1190 vstore();
1191 args_size += size;
1193 break;
1195 case VT_LDOUBLE:
1196 assert(0);
1197 break;
1199 case VT_FLOAT:
1200 case VT_DOUBLE:
1201 assert(mode == x86_64_mode_sse);
1202 if (sse_reg > 8) {
1203 --sse_reg;
1204 r = gv(RC_FLOAT);
1205 o(0x50); /* push $rax */
1206 /* movq %xmmN, (%rsp) */
1207 o(0xd60f66);
1208 o(0x04 + REG_VALUE(r)*8);
1209 o(0x24);
1210 args_size += size;
1211 } else {
1212 arg_stored = 0;
1214 break;
1216 default:
1217 assert(mode == x86_64_mode_integer);
1218 /* simple type */
1219 /* XXX: implicit cast ? */
1220 if (gen_reg > REGN) {
1221 --gen_reg;
1222 r = gv(RC_INT);
1223 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1224 args_size += size;
1225 } else {
1226 arg_stored = 0;
1228 break;
1231 /* And swap the argument back to it's original position. */
1232 tmp = vtop[0];
1233 vtop[0] = vtop[-i];
1234 vtop[-i] = tmp;
1236 if (arg_stored) {
1237 vrotb(i+1);
1238 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1239 vpop();
1240 --nb_args;
1241 --run_end;
1242 } else {
1243 ++i;
1247 /* handle 16 byte aligned arguments at end of run */
1248 run_start = i = run_end;
1249 while (i < nb_args) {
1250 /* Rotate argument to top since it will always be popped */
1251 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1252 if (align != 16)
1253 break;
1255 vrotb(i+1);
1257 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1258 gv(RC_ST0);
1259 oad(0xec8148, size); /* sub $xxx, %rsp */
1260 o(0x7cdb); /* fstpt 0(%rsp) */
1261 g(0x24);
1262 g(0x00);
1263 args_size += size;
1264 } else {
1265 assert(mode == x86_64_mode_memory);
1267 /* allocate the necessary size on stack */
1268 o(0x48);
1269 oad(0xec81, size); /* sub $xxx, %rsp */
1270 /* generate structure store */
1271 r = get_reg(RC_INT);
1272 orex(1, r, 0, 0x89); /* mov %rsp, r */
1273 o(0xe0 + REG_VALUE(r));
1274 vset(&vtop->type, r | VT_LVAL, 0);
1275 vswap();
1276 vstore();
1277 args_size += size;
1280 vpop();
1281 --nb_args;
1285 /* XXX This should be superfluous. */
1286 save_regs(0); /* save used temporary registers */
1288 /* then, we prepare register passing arguments.
1289 Note that we cannot set RDX and RCX in this loop because gv()
1290 may break these temporary registers. Let's use R10 and R11
1291 instead of them */
1292 assert(gen_reg <= REGN);
1293 assert(sse_reg <= 8);
1294 for(i = 0; i < nb_args; i++) {
1295 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1296 /* Alter stack entry type so that gv() knows how to treat it */
1297 vtop->type = type;
1298 if (mode == x86_64_mode_sse) {
1299 if (reg_count == 2) {
1300 sse_reg -= 2;
1301 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1302 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1303 /* movaps %xmm0, %xmmN */
1304 o(0x280f);
1305 o(0xc0 + (sse_reg << 3));
1306 /* movaps %xmm1, %xmmN */
1307 o(0x280f);
1308 o(0xc1 + ((sse_reg+1) << 3));
1310 } else {
1311 assert(reg_count == 1);
1312 --sse_reg;
1313 /* Load directly to register */
1314 gv(RC_XMM0 << sse_reg);
1316 } else if (mode == x86_64_mode_integer) {
1317 /* simple type */
1318 /* XXX: implicit cast ? */
1319 gen_reg -= reg_count;
1320 r = gv(RC_INT);
1321 int d = arg_prepare_reg(gen_reg);
1322 orex(1,d,r,0x89); /* mov */
1323 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1324 if (reg_count == 2) {
1325 d = arg_prepare_reg(gen_reg+1);
1326 orex(1,d,vtop->r2,0x89); /* mov */
1327 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1330 vtop--;
1332 assert(gen_reg == 0);
1333 assert(sse_reg == 0);
1335 /* We shouldn't have many operands on the stack anymore, but the
1336 call address itself is still there, and it might be in %eax
1337 (or edx/ecx) currently, which the below writes would clobber.
1338 So evict all remaining operands here. */
1339 save_regs(0);
1341 /* Copy R10 and R11 into RDX and RCX, respectively */
1342 if (nb_reg_args > 2) {
1343 o(0xd2894c); /* mov %r10, %rdx */
1344 if (nb_reg_args > 3) {
1345 o(0xd9894c); /* mov %r11, %rcx */
1349 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1350 gcall_or_jmp(0);
1351 if (args_size)
1352 gadd_sp(args_size);
1353 vtop--;
1357 #define FUNC_PROLOG_SIZE 11
1359 static void push_arg_reg(int i) {
1360 loc -= 8;
1361 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1364 /* generate function prolog of type 't' */
1365 void gfunc_prolog(CType *func_type)
1367 X86_64_Mode mode;
1368 int i, addr, align, size, reg_count;
1369 int param_addr, reg_param_index, sse_param_index;
1370 Sym *sym;
1371 CType *type;
1373 sym = func_type->ref;
1374 addr = PTR_SIZE * 2;
1375 loc = 0;
1376 ind += FUNC_PROLOG_SIZE;
1377 func_sub_sp_offset = ind;
1378 func_ret_sub = 0;
1380 if (func_type->ref->c == FUNC_ELLIPSIS) {
1381 int seen_reg_num, seen_sse_num, seen_stack_size;
1382 seen_reg_num = seen_sse_num = 0;
1383 /* frame pointer and return address */
1384 seen_stack_size = PTR_SIZE * 2;
1385 /* count the number of seen parameters */
1386 sym = func_type->ref;
1387 while ((sym = sym->next) != NULL) {
1388 type = &sym->type;
1389 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1390 switch (mode) {
1391 default:
1392 stack_arg:
1393 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1394 break;
1396 case x86_64_mode_integer:
1397 if (seen_reg_num + reg_count <= 8) {
1398 seen_reg_num += reg_count;
1399 } else {
1400 seen_reg_num = 8;
1401 goto stack_arg;
1403 break;
1405 case x86_64_mode_sse:
1406 if (seen_sse_num + reg_count <= 8) {
1407 seen_sse_num += reg_count;
1408 } else {
1409 seen_sse_num = 8;
1410 goto stack_arg;
1412 break;
1416 loc -= 16;
1417 /* movl $0x????????, -0x10(%rbp) */
1418 o(0xf045c7);
1419 gen_le32(seen_reg_num * 8);
1420 /* movl $0x????????, -0xc(%rbp) */
1421 o(0xf445c7);
1422 gen_le32(seen_sse_num * 16 + 48);
1423 /* movl $0x????????, -0x8(%rbp) */
1424 o(0xf845c7);
1425 gen_le32(seen_stack_size);
1427 /* save all register passing arguments */
1428 for (i = 0; i < 8; i++) {
1429 loc -= 16;
1430 o(0xd60f66); /* movq */
1431 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1432 /* movq $0, loc+8(%rbp) */
1433 o(0x85c748);
1434 gen_le32(loc + 8);
1435 gen_le32(0);
1437 for (i = 0; i < REGN; i++) {
1438 push_arg_reg(REGN-1-i);
1442 sym = func_type->ref;
1443 reg_param_index = 0;
1444 sse_param_index = 0;
1446 /* if the function returns a structure, then add an
1447 implicit pointer parameter */
1448 func_vt = sym->type;
1449 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1450 if (mode == x86_64_mode_memory) {
1451 push_arg_reg(reg_param_index);
1452 func_vc = loc;
1453 reg_param_index++;
1455 /* define parameters */
1456 while ((sym = sym->next) != NULL) {
1457 type = &sym->type;
1458 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1459 switch (mode) {
1460 case x86_64_mode_sse:
1461 if (sse_param_index + reg_count <= 8) {
1462 /* save arguments passed by register */
1463 loc -= reg_count * 8;
1464 param_addr = loc;
1465 for (i = 0; i < reg_count; ++i) {
1466 o(0xd60f66); /* movq */
1467 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1468 ++sse_param_index;
1470 } else {
1471 addr = (addr + align - 1) & -align;
1472 param_addr = addr;
1473 addr += size;
1474 sse_param_index += reg_count;
1476 break;
1478 case x86_64_mode_memory:
1479 case x86_64_mode_x87:
1480 addr = (addr + align - 1) & -align;
1481 param_addr = addr;
1482 addr += size;
1483 break;
1485 case x86_64_mode_integer: {
1486 if (reg_param_index + reg_count <= REGN) {
1487 /* save arguments passed by register */
1488 loc -= reg_count * 8;
1489 param_addr = loc;
1490 for (i = 0; i < reg_count; ++i) {
1491 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1492 ++reg_param_index;
1494 } else {
1495 addr = (addr + align - 1) & -align;
1496 param_addr = addr;
1497 addr += size;
1498 reg_param_index += reg_count;
1500 break;
1503 sym_push(sym->v & ~SYM_FIELD, type,
1504 VT_LOCAL | VT_LVAL, param_addr);
1508 /* generate function epilog */
1509 void gfunc_epilog(void)
1511 int v, saved_ind;
1513 o(0xc9); /* leave */
1514 if (func_ret_sub == 0) {
1515 o(0xc3); /* ret */
1516 } else {
1517 o(0xc2); /* ret n */
1518 g(func_ret_sub);
1519 g(func_ret_sub >> 8);
1521 /* align local size to word & save local variables */
1522 v = (-loc + 15) & -16;
1523 saved_ind = ind;
1524 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1525 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1526 o(0xec8148); /* sub rsp, stacksize */
1527 gen_le32(v);
1528 ind = saved_ind;
1531 #endif /* not PE */
1533 /* generate a jump to a label */
1534 int gjmp(int t)
1536 return psym(0xe9, t);
1539 /* generate a jump to a fixed address */
1540 void gjmp_addr(int a)
1542 int r;
1543 r = a - ind - 2;
1544 if (r == (char)r) {
1545 g(0xeb);
1546 g(r);
1547 } else {
1548 oad(0xe9, a - ind - 5);
1552 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1553 int gtst(int inv, int t)
1555 int v, *p;
1557 v = vtop->r & VT_VALMASK;
1558 if (v == VT_CMP) {
1559 /* fast case : can jump directly since flags are set */
1560 if (vtop->c.i & 0x100)
1562 /* This was a float compare. If the parity flag is set
1563 the result was unordered. For anything except != this
1564 means false and we don't jump (anding both conditions).
1565 For != this means true (oring both).
1566 Take care about inverting the test. We need to jump
1567 to our target if the result was unordered and test wasn't NE,
1568 otherwise if unordered we don't want to jump. */
1569 vtop->c.i &= ~0x100;
1570 if (!inv == (vtop->c.i != TOK_NE))
1571 o(0x067a); /* jp +6 */
1572 else
1574 g(0x0f);
1575 t = psym(0x8a, t); /* jp t */
1578 g(0x0f);
1579 t = psym((vtop->c.i - 16) ^ inv, t);
1580 } else if (v == VT_JMP || v == VT_JMPI) {
1581 /* && or || optimization */
1582 if ((v & 1) == inv) {
1583 /* insert vtop->c jump list in t */
1584 p = &vtop->c.i;
1585 while (*p != 0)
1586 p = (int *)(cur_text_section->data + *p);
1587 *p = t;
1588 t = vtop->c.i;
1589 } else {
1590 t = gjmp(t);
1591 gsym(vtop->c.i);
1593 } else {
1594 if (is_float(vtop->type.t) ||
1595 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1596 vpushi(0);
1597 gen_op(TOK_NE);
1599 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1600 /* constant jmp optimization */
1601 if ((vtop->c.i != 0) != inv)
1602 t = gjmp(t);
1603 } else {
1604 v = gv(RC_INT);
1605 orex(0,v,v,0x85);
1606 o(0xc0 + REG_VALUE(v) * 9);
1607 g(0x0f);
1608 t = psym(0x85 ^ inv, t);
1611 vtop--;
1612 return t;
1615 /* generate an integer binary operation */
1616 void gen_opi(int op)
1618 int r, fr, opc, c;
1619 int ll, uu, cc;
1621 ll = is64_type(vtop[-1].type.t);
1622 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1623 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1625 switch(op) {
1626 case '+':
1627 case TOK_ADDC1: /* add with carry generation */
1628 opc = 0;
1629 gen_op8:
1630 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1631 /* constant case */
1632 vswap();
1633 r = gv(RC_INT);
1634 vswap();
1635 c = vtop->c.i;
1636 if (c == (char)c) {
1637 /* XXX: generate inc and dec for smaller code ? */
1638 orex(ll, r, 0, 0x83);
1639 o(0xc0 | (opc << 3) | REG_VALUE(r));
1640 g(c);
1641 } else {
1642 orex(ll, r, 0, 0x81);
1643 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1645 } else {
1646 gv2(RC_INT, RC_INT);
1647 r = vtop[-1].r;
1648 fr = vtop[0].r;
1649 orex(ll, r, fr, (opc << 3) | 0x01);
1650 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1652 vtop--;
1653 if (op >= TOK_ULT && op <= TOK_GT) {
1654 vtop->r = VT_CMP;
1655 vtop->c.i = op;
1657 break;
1658 case '-':
1659 case TOK_SUBC1: /* sub with carry generation */
1660 opc = 5;
1661 goto gen_op8;
1662 case TOK_ADDC2: /* add with carry use */
1663 opc = 2;
1664 goto gen_op8;
1665 case TOK_SUBC2: /* sub with carry use */
1666 opc = 3;
1667 goto gen_op8;
1668 case '&':
1669 opc = 4;
1670 goto gen_op8;
1671 case '^':
1672 opc = 6;
1673 goto gen_op8;
1674 case '|':
1675 opc = 1;
1676 goto gen_op8;
1677 case '*':
1678 gv2(RC_INT, RC_INT);
1679 r = vtop[-1].r;
1680 fr = vtop[0].r;
1681 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1682 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1683 vtop--;
1684 break;
1685 case TOK_SHL:
1686 opc = 4;
1687 goto gen_shift;
1688 case TOK_SHR:
1689 opc = 5;
1690 goto gen_shift;
1691 case TOK_SAR:
1692 opc = 7;
1693 gen_shift:
1694 opc = 0xc0 | (opc << 3);
1695 if (cc) {
1696 /* constant case */
1697 vswap();
1698 r = gv(RC_INT);
1699 vswap();
1700 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1701 o(opc | REG_VALUE(r));
1702 g(vtop->c.i & (ll ? 63 : 31));
1703 } else {
1704 /* we generate the shift in ecx */
1705 gv2(RC_INT, RC_RCX);
1706 r = vtop[-1].r;
1707 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1708 o(opc | REG_VALUE(r));
1710 vtop--;
1711 break;
1712 case TOK_UDIV:
1713 case TOK_UMOD:
1714 uu = 1;
1715 goto divmod;
1716 case '/':
1717 case '%':
1718 case TOK_PDIV:
1719 uu = 0;
1720 divmod:
1721 /* first operand must be in eax */
1722 /* XXX: need better constraint for second operand */
1723 gv2(RC_RAX, RC_RCX);
1724 r = vtop[-1].r;
1725 fr = vtop[0].r;
1726 vtop--;
1727 save_reg(TREG_RDX);
1728 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1729 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1730 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1731 if (op == '%' || op == TOK_UMOD)
1732 r = TREG_RDX;
1733 else
1734 r = TREG_RAX;
1735 vtop->r = r;
1736 break;
1737 default:
1738 opc = 7;
1739 goto gen_op8;
1743 void gen_opl(int op)
1745 gen_opi(op);
1748 /* generate a floating point operation 'v = t1 op t2' instruction. The
1749 two operands are guaranted to have the same floating point type */
1750 /* XXX: need to use ST1 too */
1751 void gen_opf(int op)
1753 int a, ft, fc, swapped, r;
1754 int float_type =
1755 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1757 /* convert constants to memory references */
1758 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1759 vswap();
1760 gv(float_type);
1761 vswap();
1763 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1764 gv(float_type);
1766 /* must put at least one value in the floating point register */
1767 if ((vtop[-1].r & VT_LVAL) &&
1768 (vtop[0].r & VT_LVAL)) {
1769 vswap();
1770 gv(float_type);
1771 vswap();
1773 swapped = 0;
1774 /* swap the stack if needed so that t1 is the register and t2 is
1775 the memory reference */
1776 if (vtop[-1].r & VT_LVAL) {
1777 vswap();
1778 swapped = 1;
1780 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1781 if (op >= TOK_ULT && op <= TOK_GT) {
1782 /* load on stack second operand */
1783 load(TREG_ST0, vtop);
1784 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1785 if (op == TOK_GE || op == TOK_GT)
1786 swapped = !swapped;
1787 else if (op == TOK_EQ || op == TOK_NE)
1788 swapped = 0;
1789 if (swapped)
1790 o(0xc9d9); /* fxch %st(1) */
1791 o(0xe9da); /* fucompp */
1792 o(0xe0df); /* fnstsw %ax */
1793 if (op == TOK_EQ) {
1794 o(0x45e480); /* and $0x45, %ah */
1795 o(0x40fC80); /* cmp $0x40, %ah */
1796 } else if (op == TOK_NE) {
1797 o(0x45e480); /* and $0x45, %ah */
1798 o(0x40f480); /* xor $0x40, %ah */
1799 op = TOK_NE;
1800 } else if (op == TOK_GE || op == TOK_LE) {
1801 o(0x05c4f6); /* test $0x05, %ah */
1802 op = TOK_EQ;
1803 } else {
1804 o(0x45c4f6); /* test $0x45, %ah */
1805 op = TOK_EQ;
1807 vtop--;
1808 vtop->r = VT_CMP;
1809 vtop->c.i = op;
1810 } else {
1811 /* no memory reference possible for long double operations */
1812 load(TREG_ST0, vtop);
1813 swapped = !swapped;
1815 switch(op) {
1816 default:
1817 case '+':
1818 a = 0;
1819 break;
1820 case '-':
1821 a = 4;
1822 if (swapped)
1823 a++;
1824 break;
1825 case '*':
1826 a = 1;
1827 break;
1828 case '/':
1829 a = 6;
1830 if (swapped)
1831 a++;
1832 break;
1834 ft = vtop->type.t;
1835 fc = vtop->c.ul;
1836 o(0xde); /* fxxxp %st, %st(1) */
1837 o(0xc1 + (a << 3));
1838 vtop--;
1840 } else {
1841 if (op >= TOK_ULT && op <= TOK_GT) {
1842 /* if saved lvalue, then we must reload it */
1843 r = vtop->r;
1844 fc = vtop->c.ul;
1845 if ((r & VT_VALMASK) == VT_LLOCAL) {
1846 SValue v1;
1847 r = get_reg(RC_INT);
1848 v1.type.t = VT_PTR;
1849 v1.r = VT_LOCAL | VT_LVAL;
1850 v1.c.ul = fc;
1851 load(r, &v1);
1852 fc = 0;
1855 if (op == TOK_EQ || op == TOK_NE) {
1856 swapped = 0;
1857 } else {
1858 if (op == TOK_LE || op == TOK_LT)
1859 swapped = !swapped;
1860 if (op == TOK_LE || op == TOK_GE) {
1861 op = 0x93; /* setae */
1862 } else {
1863 op = 0x97; /* seta */
1867 if (swapped) {
1868 gv(RC_FLOAT);
1869 vswap();
1871 assert(!(vtop[-1].r & VT_LVAL));
1873 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1874 o(0x66);
1875 o(0x2e0f); /* ucomisd */
1877 if (vtop->r & VT_LVAL) {
1878 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1879 } else {
1880 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1883 vtop--;
1884 vtop->r = VT_CMP;
1885 vtop->c.i = op | 0x100;
1886 } else {
1887 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1888 switch(op) {
1889 default:
1890 case '+':
1891 a = 0;
1892 break;
1893 case '-':
1894 a = 4;
1895 break;
1896 case '*':
1897 a = 1;
1898 break;
1899 case '/':
1900 a = 6;
1901 break;
1903 ft = vtop->type.t;
1904 fc = vtop->c.ul;
1905 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1907 r = vtop->r;
1908 /* if saved lvalue, then we must reload it */
1909 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1910 SValue v1;
1911 r = get_reg(RC_INT);
1912 v1.type.t = VT_PTR;
1913 v1.r = VT_LOCAL | VT_LVAL;
1914 v1.c.ul = fc;
1915 load(r, &v1);
1916 fc = 0;
1919 assert(!(vtop[-1].r & VT_LVAL));
1920 if (swapped) {
1921 assert(vtop->r & VT_LVAL);
1922 gv(RC_FLOAT);
1923 vswap();
1926 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1927 o(0xf2);
1928 } else {
1929 o(0xf3);
1931 o(0x0f);
1932 o(0x58 + a);
1934 if (vtop->r & VT_LVAL) {
1935 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1936 } else {
1937 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1940 vtop--;
1945 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1946 and 'long long' cases. */
1947 void gen_cvt_itof(int t)
1949 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1950 save_reg(TREG_ST0);
1951 gv(RC_INT);
1952 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1953 /* signed long long to float/double/long double (unsigned case
1954 is handled generically) */
1955 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1956 o(0x242cdf); /* fildll (%rsp) */
1957 o(0x08c48348); /* add $8, %rsp */
1958 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1959 (VT_INT | VT_UNSIGNED)) {
1960 /* unsigned int to float/double/long double */
1961 o(0x6a); /* push $0 */
1962 g(0x00);
1963 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1964 o(0x242cdf); /* fildll (%rsp) */
1965 o(0x10c48348); /* add $16, %rsp */
1966 } else {
1967 /* int to float/double/long double */
1968 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1969 o(0x2404db); /* fildl (%rsp) */
1970 o(0x08c48348); /* add $8, %rsp */
1972 vtop->r = TREG_ST0;
1973 } else {
1974 int r = get_reg(RC_FLOAT);
1975 gv(RC_INT);
1976 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1977 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1978 (VT_INT | VT_UNSIGNED) ||
1979 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1980 o(0x48); /* REX */
1982 o(0x2a0f);
1983 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1984 vtop->r = r;
1988 /* convert from one floating point type to another */
1989 void gen_cvt_ftof(int t)
1991 int ft, bt, tbt;
1993 ft = vtop->type.t;
1994 bt = ft & VT_BTYPE;
1995 tbt = t & VT_BTYPE;
1997 if (bt == VT_FLOAT) {
1998 gv(RC_FLOAT);
1999 if (tbt == VT_DOUBLE) {
2000 o(0x140f); /* unpcklps */
2001 o(0xc0 + REG_VALUE(vtop->r)*9);
2002 o(0x5a0f); /* cvtps2pd */
2003 o(0xc0 + REG_VALUE(vtop->r)*9);
2004 } else if (tbt == VT_LDOUBLE) {
2005 save_reg(RC_ST0);
2006 /* movss %xmm0,-0x10(%rsp) */
2007 o(0x110ff3);
2008 o(0x44 + REG_VALUE(vtop->r)*8);
2009 o(0xf024);
2010 o(0xf02444d9); /* flds -0x10(%rsp) */
2011 vtop->r = TREG_ST0;
2013 } else if (bt == VT_DOUBLE) {
2014 gv(RC_FLOAT);
2015 if (tbt == VT_FLOAT) {
2016 o(0x140f66); /* unpcklpd */
2017 o(0xc0 + REG_VALUE(vtop->r)*9);
2018 o(0x5a0f66); /* cvtpd2ps */
2019 o(0xc0 + REG_VALUE(vtop->r)*9);
2020 } else if (tbt == VT_LDOUBLE) {
2021 save_reg(RC_ST0);
2022 /* movsd %xmm0,-0x10(%rsp) */
2023 o(0x110ff2);
2024 o(0x44 + REG_VALUE(vtop->r)*8);
2025 o(0xf024);
2026 o(0xf02444dd); /* fldl -0x10(%rsp) */
2027 vtop->r = TREG_ST0;
2029 } else {
2030 int r;
2031 gv(RC_ST0);
2032 r = get_reg(RC_FLOAT);
2033 if (tbt == VT_DOUBLE) {
2034 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2035 /* movsd -0x10(%rsp),%xmm0 */
2036 o(0x100ff2);
2037 o(0x44 + REG_VALUE(r)*8);
2038 o(0xf024);
2039 vtop->r = r;
2040 } else if (tbt == VT_FLOAT) {
2041 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2042 /* movss -0x10(%rsp),%xmm0 */
2043 o(0x100ff3);
2044 o(0x44 + REG_VALUE(r)*8);
2045 o(0xf024);
2046 vtop->r = r;
2051 /* convert fp to int 't' type */
2052 void gen_cvt_ftoi(int t)
2054 int ft, bt, size, r;
2055 ft = vtop->type.t;
2056 bt = ft & VT_BTYPE;
2057 if (bt == VT_LDOUBLE) {
2058 gen_cvt_ftof(VT_DOUBLE);
2059 bt = VT_DOUBLE;
2062 gv(RC_FLOAT);
2063 if (t != VT_INT)
2064 size = 8;
2065 else
2066 size = 4;
2068 r = get_reg(RC_INT);
2069 if (bt == VT_FLOAT) {
2070 o(0xf3);
2071 } else if (bt == VT_DOUBLE) {
2072 o(0xf2);
2073 } else {
2074 assert(0);
2076 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2077 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2078 vtop->r = r;
2081 /* computed goto support */
2082 void ggoto(void)
2084 gcall_or_jmp(1);
2085 vtop--;
2088 /* Save the stack pointer onto the stack and return the location of its address */
2089 ST_FUNC void gen_vla_sp_save(int addr) {
2090 /* mov %rsp,addr(%rbp)*/
2091 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2094 /* Restore the SP from a location on the stack */
2095 ST_FUNC void gen_vla_sp_restore(int addr) {
2096 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2099 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2100 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2101 #ifdef TCC_TARGET_PE
2102 /* alloca does more than just adjust %rsp on Windows */
2103 vpush_global_sym(&func_old_type, TOK_alloca);
2104 vswap(); /* Move alloca ref past allocation size */
2105 gfunc_call(1);
2106 vset(type, REG_IRET, 0);
2107 #else
2108 int r;
2109 r = gv(RC_INT); /* allocation size */
2110 /* sub r,%rsp */
2111 o(0x2b48);
2112 o(0xe0 | REG_VALUE(r));
2113 /* We align to 16 bytes rather than align */
2114 /* and ~15, %rsp */
2115 o(0xf0e48348);
2116 /* mov %rsp, r */
2117 o(0x8948);
2118 o(0xe0 | REG_VALUE(r));
2119 vpop();
2120 vset(type, r, 0);
2121 #endif
2125 /* end of x86-64 code generator */
2126 /*************************************************************/
2127 #endif /* ! TARGET_DEFS_ONLY */
2128 /******************************************************/