Fix Makefile.
[tinycc.git] / src / x86 / x86_64-gen.c
blobab2eb6a105c7906a5aba47b39f1825911ea8795e
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
28 #define REG_ARGS_MAX 2 /* at most 2 registers used for each argument */
30 #ifdef TCC_TARGET_PE
31 typedef int RegArgs;
32 #else
33 /* This struct stores the struct offsets at which %rax, %rdx, %xmm0, and
34 * %xmm1 are to be stored.
36 * struct { long long l; double x; }: ireg = { 0, -1 } freg = { 8, -1 }
37 * struct { double x; long long l; }: ireg = { 8, -1 } freg = { 0, -1 }
38 * struct { long long l; long long l2; }: ireg = { 0, 8 } freg = { -1, -1 }
39 * struct { double x; double x2; }: ireg = { -1, -1 } freg = { 0, 8 }
41 typedef struct {
42 int ireg[REG_ARGS_MAX];
43 int freg[REG_ARGS_MAX];
44 } RegArgs;
45 #endif
47 /* a register can belong to several classes. The classes must be
48 sorted from more general to more precise (see gv2() code which does
49 assumptions on it). */
50 #define RC_INT 0x0001 /* generic integer register */
51 #define RC_FLOAT 0x0002 /* generic float register */
52 #define RC_RAX 0x0004
53 #define RC_RCX 0x0008
54 #define RC_RDX 0x0010
55 #define RC_ST0 0x0080 /* only for long double */
56 #define RC_R8 0x0100
57 #define RC_R9 0x0200
58 #define RC_R10 0x0400
59 #define RC_R11 0x0800
60 #define RC_XMM0 0x1000
61 #define RC_XMM1 0x2000
62 #define RC_XMM2 0x4000
63 #define RC_XMM3 0x8000
64 #define RC_XMM4 0x10000
65 #define RC_XMM5 0x20000
66 #define RC_XMM6 0x40000
67 #define RC_XMM7 0x80000
68 #define RC_IRET RC_RAX /* function return: integer register */
69 #define RC_LRET RC_RDX /* function return: second integer register */
70 #define RC_FRET RC_XMM0 /* function return: float register */
71 #define RC_QRET RC_XMM1 /* function return: second float register */
73 /* pretty names for the registers */
74 enum {
75 TREG_RAX = 0,
76 TREG_RCX = 1,
77 TREG_RDX = 2,
78 TREG_RSP = 4,
79 TREG_RSI = 6,
80 TREG_RDI = 7,
82 TREG_R8 = 8,
83 TREG_R9 = 9,
84 TREG_R10 = 10,
85 TREG_R11 = 11,
87 TREG_XMM0 = 16,
88 TREG_XMM1 = 17,
89 TREG_XMM2 = 18,
90 TREG_XMM3 = 19,
91 TREG_XMM4 = 20,
92 TREG_XMM5 = 21,
93 TREG_XMM6 = 22,
94 TREG_XMM7 = 23,
96 TREG_ST0 = 24,
98 TREG_MEM = 0x20,
101 #define REX_BASE(reg) (((reg) >> 3) & 1)
102 #define REG_VALUE(reg) ((reg) & 7)
104 /* return registers for function */
105 #define REG_IRET TREG_RAX /* single word int return register */
106 #define REG_LRET TREG_RDX /* second word return register (for long long) */
107 #define REG_FRET TREG_XMM0 /* float return register */
108 #define REG_QRET TREG_XMM1 /* second float return register */
110 /* defined if function parameters must be evaluated in reverse order */
111 #define INVERT_FUNC_PARAMS
113 /* pointer size, in bytes */
114 #define PTR_SIZE 8
116 /* long double size and alignment, in bytes */
117 #define LDOUBLE_SIZE 16
118 #define LDOUBLE_ALIGN 16
119 /* maximum alignment (for aligned attribute support) */
120 #define MAX_ALIGN 16
122 /******************************************************/
123 /* ELF defines */
125 #define EM_TCC_TARGET EM_X86_64
127 /* relocation type for 32 bit data relocation */
128 #define R_DATA_32 R_X86_64_32
129 #define R_DATA_PTR R_X86_64_64
130 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
131 #define R_COPY R_X86_64_COPY
133 #define ELF_START_ADDR 0x400000
134 #define ELF_PAGE_SIZE 0x200000
136 /******************************************************/
137 #else /* ! TARGET_DEFS_ONLY */
138 /******************************************************/
139 #include "../tcc.h"
140 #include <assert.h>
142 ST_DATA const int reg_classes[NB_REGS] = {
143 /* eax */ RC_INT | RC_RAX,
144 /* ecx */ RC_INT | RC_RCX,
145 /* edx */ RC_INT | RC_RDX,
151 RC_R8,
152 RC_R9,
153 RC_R10,
154 RC_R11,
159 /* xmm0 */ RC_FLOAT | RC_XMM0,
160 /* xmm1 */ RC_FLOAT | RC_XMM1,
161 /* xmm2 */ RC_FLOAT | RC_XMM2,
162 /* xmm3 */ RC_FLOAT | RC_XMM3,
163 /* xmm4 */ RC_FLOAT | RC_XMM4,
164 /* xmm5 */ RC_FLOAT | RC_XMM5,
165 /* xmm6 an xmm7 are included so gv() can be used on them,
166 but they are not tagged with RC_FLOAT because they are
167 callee saved on Windows */
168 RC_XMM6,
169 RC_XMM7,
170 /* st0 */ RC_ST0
173 static unsigned long func_sub_sp_offset;
174 static int func_ret_sub;
176 /* XXX: make it faster ? */
177 void g(int c)
179 int ind1;
180 ind1 = ind + 1;
181 if (ind1 > cur_text_section->data_allocated)
182 section_realloc(cur_text_section, ind1);
183 cur_text_section->data[ind] = c;
184 ind = ind1;
187 void o(unsigned int c)
189 while (c) {
190 g(c);
191 c = c >> 8;
195 void gen_le16(int v)
197 g(v);
198 g(v >> 8);
201 void gen_le32(int c)
203 g(c);
204 g(c >> 8);
205 g(c >> 16);
206 g(c >> 24);
209 void gen_le64(int64_t c)
211 g(c);
212 g(c >> 8);
213 g(c >> 16);
214 g(c >> 24);
215 g(c >> 32);
216 g(c >> 40);
217 g(c >> 48);
218 g(c >> 56);
221 void orex(int ll, int r, int r2, int b)
223 if ((r & VT_VALMASK) >= VT_CONST)
224 r = 0;
225 if ((r2 & VT_VALMASK) >= VT_CONST)
226 r2 = 0;
227 if (ll || REX_BASE(r) || REX_BASE(r2))
228 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
229 o(b);
232 /* output a symbol and patch all calls to it */
233 void gsym_addr(int t, int a)
235 int n, *ptr;
236 while (t) {
237 ptr = (int *)(cur_text_section->data + t);
238 n = *ptr; /* next value */
239 *ptr = a - t - 4;
240 t = n;
244 void gsym(int t)
246 gsym_addr(t, ind);
249 /* psym is used to put an instruction with a data field which is a
250 reference to a symbol. It is in fact the same as oad ! */
251 #define psym oad
253 static int is64_type(int t)
255 return ((t & VT_BTYPE) == VT_PTR ||
256 (t & VT_BTYPE) == VT_FUNC ||
257 (t & VT_BTYPE) == VT_LLONG);
260 /* instruction + 4 bytes data. Return the address of the data */
261 ST_FUNC int oad(int c, int s)
263 int ind1;
265 o(c);
266 ind1 = ind + 4;
267 if (ind1 > cur_text_section->data_allocated)
268 section_realloc(cur_text_section, ind1);
269 *(int *)(cur_text_section->data + ind) = s;
270 s = ind;
271 ind = ind1;
272 return s;
275 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
277 if (r & VT_SYM)
278 greloc(cur_text_section, sym, ind, R_X86_64_32);
279 gen_le32(c);
282 /* output constant with relocation if 'r & VT_SYM' is true */
283 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
285 if (r & VT_SYM)
286 greloc(cur_text_section, sym, ind, R_X86_64_64);
287 gen_le64(c);
290 /* output constant with relocation if 'r & VT_SYM' is true */
291 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
293 if (r & VT_SYM)
294 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
295 gen_le32(c-4);
298 /* output got address with relocation */
299 static void gen_gotpcrel(int r, Sym *sym, int c)
301 #ifndef TCC_TARGET_PE
302 Section *sr;
303 ElfW(Rela) *rel;
304 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
305 sr = cur_text_section->reloc;
306 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
307 rel->r_addend = -4;
308 #else
309 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
310 get_tok_str(sym->v, NULL), c, r,
311 cur_text_section->data[ind-3],
312 cur_text_section->data[ind-2],
313 cur_text_section->data[ind-1]
315 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
316 #endif
317 gen_le32(0);
318 if (c) {
319 /* we use add c, %xxx for displacement */
320 orex(1, r, 0, 0x81);
321 o(0xc0 + REG_VALUE(r));
322 gen_le32(c);
326 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
328 op_reg = REG_VALUE(op_reg) << 3;
329 if ((r & VT_VALMASK) == VT_CONST) {
330 /* constant memory reference */
331 o(0x05 | op_reg);
332 if (is_got) {
333 gen_gotpcrel(r, sym, c);
334 } else {
335 gen_addrpc32(r, sym, c);
337 } else if ((r & VT_VALMASK) == VT_LOCAL) {
338 /* currently, we use only ebp as base */
339 if (c == (char)c) {
340 /* short reference */
341 o(0x45 | op_reg);
342 g(c);
343 } else {
344 oad(0x85 | op_reg, c);
346 } else if ((r & VT_VALMASK) >= TREG_MEM) {
347 if (c) {
348 g(0x80 | op_reg | REG_VALUE(r));
349 gen_le32(c);
350 } else {
351 g(0x00 | op_reg | REG_VALUE(r));
353 } else {
354 g(0x00 | op_reg | REG_VALUE(r));
358 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
359 opcode bits */
360 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
362 gen_modrm_impl(op_reg, r, sym, c, 0);
365 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
366 opcode bits */
367 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
369 int is_got;
370 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
371 orex(1, r, op_reg, opcode);
372 gen_modrm_impl(op_reg, r, sym, c, is_got);
376 /* load 'r' from value 'sv' */
377 void load(int r, SValue *sv)
379 int v, t, ft, fc, fr;
380 SValue v1;
382 #ifdef TCC_TARGET_PE
383 SValue v2;
384 sv = pe_getimport(sv, &v2);
385 #endif
387 fr = sv->r;
388 ft = sv->type.t & ~VT_DEFSIGN;
389 fc = sv->c.ul;
391 #ifndef TCC_TARGET_PE
392 /* we use indirect access via got */
393 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
394 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
395 /* use the result register as a temporal register */
396 int tr = r | TREG_MEM;
397 if (is_float(ft)) {
398 /* we cannot use float registers as a temporal register */
399 tr = get_reg(RC_INT) | TREG_MEM;
401 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
403 /* load from the temporal register */
404 fr = tr | VT_LVAL;
406 #endif
408 v = fr & VT_VALMASK;
409 if (fr & VT_LVAL) {
410 int b, ll;
411 if (v == VT_LLOCAL) {
412 v1.type.t = VT_PTR;
413 v1.r = VT_LOCAL | VT_LVAL;
414 v1.c.ul = fc;
415 fr = r;
416 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
417 fr = get_reg(RC_INT);
418 load(fr, &v1);
420 ll = 0;
421 if ((ft & VT_BTYPE) == VT_FLOAT) {
422 b = 0x6e0f66;
423 r = REG_VALUE(r); /* movd */
424 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
425 b = 0x7e0ff3; /* movq */
426 r = REG_VALUE(r);
427 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
428 b = 0xdb, r = 5; /* fldt */
429 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
430 b = 0xbe0f; /* movsbl */
431 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
432 b = 0xb60f; /* movzbl */
433 } else if ((ft & VT_TYPE) == VT_SHORT) {
434 b = 0xbf0f; /* movswl */
435 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
436 b = 0xb70f; /* movzwl */
437 } else {
438 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
439 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
440 || ((ft & VT_BTYPE) == VT_FUNC));
441 ll = is64_type(ft);
442 b = 0x8b;
444 if (ll) {
445 gen_modrm64(b, r, fr, sv->sym, fc);
446 } else {
447 orex(ll, fr, r, b);
448 gen_modrm(r, fr, sv->sym, fc);
450 } else {
451 if (v == VT_CONST) {
452 if (fr & VT_SYM) {
453 #ifdef TCC_TARGET_PE
454 orex(1,0,r,0x8d);
455 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
456 gen_addrpc32(fr, sv->sym, fc);
457 #else
458 if (sv->sym->type.t & VT_STATIC) {
459 orex(1,0,r,0x8d);
460 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
461 gen_addrpc32(fr, sv->sym, fc);
462 } else {
463 orex(1,0,r,0x8b);
464 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
465 gen_gotpcrel(r, sv->sym, fc);
467 #endif
468 } else if (is64_type(ft)) {
469 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
470 gen_le64(sv->c.ull);
471 } else {
472 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
473 gen_le32(fc);
475 } else if (v == VT_LOCAL) {
476 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
477 gen_modrm(r, VT_LOCAL, sv->sym, fc);
478 } else if (v == VT_CMP) {
479 orex(0,r,0,0);
480 if ((fc & ~0x100) != TOK_NE)
481 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
482 else
483 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
484 if (fc & 0x100) {
485 /* This was a float compare. If the parity bit is
486 * set the result was unordered, meaning false for everything
487 * except TOK_NE, and true for TOK_NE. */
488 fc &= ~0x100;
489 o(0x037a + (REX_BASE(r) << 8));
491 orex(0,r,0, 0x0f); /* setxx %br */
492 o(fc);
493 o(0xc0 + REG_VALUE(r));
494 } else if (v == VT_JMP || v == VT_JMPI) {
495 t = v & 1;
496 orex(0,r,0,0);
497 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
498 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
499 gsym(fc);
500 orex(0,r,0,0);
501 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
502 } else if (v != r) {
503 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
504 if (v == TREG_ST0) {
505 /* gen_cvt_ftof(VT_DOUBLE); */
506 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
507 /* movsd -0x10(%rsp),%xmmN */
508 o(0x100ff2);
509 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
510 o(0xf024);
511 } else {
512 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
513 if ((ft & VT_BTYPE) == VT_FLOAT) {
514 o(0x100ff3);
515 } else {
516 assert((ft & VT_BTYPE) == VT_DOUBLE);
517 o(0x100ff2);
519 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
521 } else if (r == TREG_ST0) {
522 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
523 /* gen_cvt_ftof(VT_LDOUBLE); */
524 /* movsd %xmmN,-0x10(%rsp) */
525 o(0x110ff2);
526 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
527 o(0xf024);
528 o(0xf02444dd); /* fldl -0x10(%rsp) */
529 } else {
530 orex(1,r,v, 0x89);
531 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
537 /* store register 'r' in lvalue 'v' */
538 void store(int r, SValue *v)
540 int fr, bt, ft, fc;
541 int op64 = 0;
542 /* store the REX prefix in this variable when PIC is enabled */
543 int pic = 0;
545 #ifdef TCC_TARGET_PE
546 SValue v2;
547 v = pe_getimport(v, &v2);
548 #endif
550 ft = v->type.t;
551 fc = v->c.ul;
552 fr = v->r & VT_VALMASK;
553 bt = ft & VT_BTYPE;
555 #ifndef TCC_TARGET_PE
556 /* we need to access the variable via got */
557 if (fr == VT_CONST && (v->r & VT_SYM)) {
558 /* mov xx(%rip), %r11 */
559 o(0x1d8b4c);
560 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
561 pic = is64_type(bt) ? 0x49 : 0x41;
563 #endif
565 /* XXX: incorrect if float reg to reg */
566 if (bt == VT_FLOAT) {
567 o(0x66);
568 o(pic);
569 o(0x7e0f); /* movd */
570 r = REG_VALUE(r);
571 } else if (bt == VT_DOUBLE) {
572 o(0x66);
573 o(pic);
574 o(0xd60f); /* movq */
575 r = REG_VALUE(r);
576 } else if (bt == VT_LDOUBLE) {
577 o(0xc0d9); /* fld %st(0) */
578 o(pic);
579 o(0xdb); /* fstpt */
580 r = 7;
581 } else {
582 if (bt == VT_SHORT)
583 o(0x66);
584 o(pic);
585 if (bt == VT_BYTE || bt == VT_BOOL)
586 orex(0, 0, r, 0x88);
587 else if (is64_type(bt))
588 op64 = 0x89;
589 else
590 orex(0, 0, r, 0x89);
592 if (pic) {
593 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
594 if (op64)
595 o(op64);
596 o(3 + (r << 3));
597 } else if (op64) {
598 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
599 gen_modrm64(op64, r, v->r, v->sym, fc);
600 } else if (fr != r) {
601 /* XXX: don't we really come here? */
602 abort();
603 o(0xc0 + fr + r * 8); /* mov r, fr */
605 } else {
606 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
607 gen_modrm(r, v->r, v->sym, fc);
608 } else if (fr != r) {
609 /* XXX: don't we really come here? */
610 abort();
611 o(0xc0 + fr + r * 8); /* mov r, fr */
616 /* 'is_jmp' is '1' if it is a jump */
617 static void gcall_or_jmp(int is_jmp)
619 int r;
620 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
621 ((vtop->r & VT_SYM) || (vtop->c.ll-4) == (int)(vtop->c.ll-4))) {
622 /* constant case */
623 if (vtop->r & VT_SYM) {
624 /* relocation case */
625 #ifdef TCC_TARGET_PE
626 greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32);
627 #else
628 greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32);
629 #endif
630 } else {
631 /* put an empty PC32 relocation */
632 put_elf_reloc(symtab_section, cur_text_section,
633 ind + 1, R_X86_64_PC32, 0);
635 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
636 } else {
637 /* otherwise, indirect call */
638 r = TREG_R11;
639 load(r, vtop);
640 o(0x41); /* REX */
641 o(0xff); /* call/jmp *r */
642 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
646 #if defined(CONFIG_TCC_BCHECK)
647 #ifndef TCC_TARGET_PE
648 static addr_t func_bound_offset;
649 static unsigned long func_bound_ind;
650 #endif
652 static void gen_static_call(int v)
654 Sym *sym = external_global_sym(v, &func_old_type, 0);
655 oad(0xe8, -4);
656 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
659 /* generate a bounded pointer addition */
660 ST_FUNC void gen_bounded_ptr_add(void)
662 /* save all temporary registers */
663 save_regs(0);
665 /* prepare fast x86_64 function call */
666 gv(RC_RAX);
667 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
668 vtop--;
670 gv(RC_RAX);
671 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
672 vtop--;
674 /* do a fast function call */
675 gen_static_call(TOK___bound_ptr_add);
677 /* returned pointer is in rax */
678 vtop++;
679 vtop->r = TREG_RAX | VT_BOUNDED;
682 /* relocation offset of the bounding function call point */
683 vtop->c.ull = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
686 /* patch pointer addition in vtop so that pointer dereferencing is
687 also tested */
688 ST_FUNC void gen_bounded_ptr_deref(void)
690 addr_t func;
691 int size, align;
692 ElfW(Rela) *rel;
693 Sym *sym;
695 size = 0;
696 /* XXX: put that code in generic part of tcc */
697 if (!is_float(vtop->type.t)) {
698 if (vtop->r & VT_LVAL_BYTE)
699 size = 1;
700 else if (vtop->r & VT_LVAL_SHORT)
701 size = 2;
703 if (!size)
704 size = type_size(&vtop->type, &align);
705 switch(size) {
706 case 1: func = TOK___bound_ptr_indir1; break;
707 case 2: func = TOK___bound_ptr_indir2; break;
708 case 4: func = TOK___bound_ptr_indir4; break;
709 case 8: func = TOK___bound_ptr_indir8; break;
710 case 12: func = TOK___bound_ptr_indir12; break;
711 case 16: func = TOK___bound_ptr_indir16; break;
712 default:
713 tcc_error("unhandled size when dereferencing bounded pointer");
714 func = 0;
715 break;
718 sym = external_global_sym(func, &func_old_type, 0);
719 if (!sym->c)
720 put_extern_sym(sym, NULL, 0, 0);
722 /* patch relocation */
723 /* XXX: find a better solution ? */
725 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.ull);
726 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
728 #endif
730 #ifdef TCC_TARGET_PE
732 #define REGN 4
733 static const uint8_t arg_regs[REGN] = {
734 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
737 /* Prepare arguments in R10 and R11 rather than RCX and RDX
738 because gv() will not ever use these */
739 static int arg_prepare_reg(int idx) {
740 if (idx == 0 || idx == 1)
741 /* idx=0: r10, idx=1: r11 */
742 return idx + 10;
743 else
744 return arg_regs[idx];
747 static int func_scratch;
749 /* Generate function call. The function address is pushed first, then
750 all the parameters in call order. This functions pops all the
751 parameters and the function address. */
753 void gen_offs_sp(int b, int r, int d)
755 orex(1,0,r & 0x100 ? 0 : r, b);
756 if (d == (char)d) {
757 o(0x2444 | (REG_VALUE(r) << 3));
758 g(d);
759 } else {
760 o(0x2484 | (REG_VALUE(r) << 3));
761 gen_le32(d);
765 ST_FUNC int regargs_nregs(RegArgs *args)
767 return *args;
770 /* Return the number of registers needed to return the struct, or 0 if
771 returning via struct pointer. */
772 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args)
774 int size, align;
775 *regsize = 8;
776 *ret_align = 1; // Never have to re-align return values for x86-64
777 size = type_size(vt, &align);
778 ret->ref = NULL;
779 if (size > 8) {
780 *args = 0;
781 } else if (size > 4) {
782 ret->t = VT_LLONG;
783 *args = 1;
784 } else if (size > 2) {
785 ret->t = VT_INT;
786 *args = 1;
787 } else if (size > 1) {
788 ret->t = VT_SHORT;
789 *args = 1;
790 } else {
791 ret->t = VT_BYTE;
792 *args = 1;
795 return *args != 0;
798 static int is_sse_float(int t) {
799 int bt;
800 bt = t & VT_BTYPE;
801 return bt == VT_DOUBLE || bt == VT_FLOAT;
804 int gfunc_arg_size(CType *type) {
805 int align;
806 if (type->t & (VT_ARRAY|VT_BITFIELD))
807 return 8;
808 return type_size(type, &align);
811 void gfunc_call(int nb_args)
813 int size, r, args_size, i, d, bt, struct_size;
814 int arg;
816 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
817 arg = nb_args;
819 /* for struct arguments, we need to call memcpy and the function
820 call breaks register passing arguments we are preparing.
821 So, we process arguments which will be passed by stack first. */
822 struct_size = args_size;
823 for(i = 0; i < nb_args; i++) {
824 SValue *sv;
826 --arg;
827 sv = &vtop[-i];
828 bt = (sv->type.t & VT_BTYPE);
829 size = gfunc_arg_size(&sv->type);
831 if (size <= 8)
832 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
834 if (bt == VT_STRUCT) {
835 /* align to stack align size */
836 size = (size + 15) & ~15;
837 /* generate structure store */
838 r = get_reg(RC_INT);
839 gen_offs_sp(0x8d, r, struct_size);
840 struct_size += size;
842 /* generate memcpy call */
843 vset(&sv->type, r | VT_LVAL, 0);
844 vpushv(sv);
845 vstore();
846 --vtop;
847 } else if (bt == VT_LDOUBLE) {
848 gv(RC_ST0);
849 gen_offs_sp(0xdb, 0x107, struct_size);
850 struct_size += 16;
854 if (func_scratch < struct_size)
855 func_scratch = struct_size;
857 arg = nb_args;
858 struct_size = args_size;
860 for(i = 0; i < nb_args; i++) {
861 --arg;
862 bt = (vtop->type.t & VT_BTYPE);
864 size = gfunc_arg_size(&vtop->type);
865 if (size > 8) {
866 /* align to stack align size */
867 size = (size + 15) & ~15;
868 if (arg >= REGN) {
869 d = get_reg(RC_INT);
870 gen_offs_sp(0x8d, d, struct_size);
871 gen_offs_sp(0x89, d, arg*8);
872 } else {
873 d = arg_prepare_reg(arg);
874 gen_offs_sp(0x8d, d, struct_size);
876 struct_size += size;
877 } else {
878 if (is_sse_float(vtop->type.t)) {
879 gv(RC_XMM0); /* only use one float register */
880 if (arg >= REGN) {
881 /* movq %xmm0, j*8(%rsp) */
882 gen_offs_sp(0xd60f66, 0x100, arg*8);
883 } else {
884 /* movaps %xmm0, %xmmN */
885 o(0x280f);
886 o(0xc0 + (arg << 3));
887 d = arg_prepare_reg(arg);
888 /* mov %xmm0, %rxx */
889 o(0x66);
890 orex(1,d,0, 0x7e0f);
891 o(0xc0 + REG_VALUE(d));
893 } else {
894 if (bt == VT_STRUCT) {
895 vtop->type.ref = NULL;
896 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
897 : size > 1 ? VT_SHORT : VT_BYTE;
900 r = gv(RC_INT);
901 if (arg >= REGN) {
902 gen_offs_sp(0x89, r, arg*8);
903 } else {
904 d = arg_prepare_reg(arg);
905 orex(1,d,r,0x89); /* mov */
906 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
910 vtop--;
912 save_regs(0);
914 /* Copy R10 and R11 into RCX and RDX, respectively */
915 if (nb_args > 0) {
916 o(0xd1894c); /* mov %r10, %rcx */
917 if (nb_args > 1) {
918 o(0xda894c); /* mov %r11, %rdx */
922 gcall_or_jmp(0);
923 vtop--;
927 #define FUNC_PROLOG_SIZE 11
929 /* generate function prolog of type 't' */
930 void gfunc_prolog(CType *func_type)
932 int addr, reg_param_index, bt, size;
933 Sym *sym;
934 CType *type;
936 func_ret_sub = 0;
937 func_scratch = 0;
938 loc = 0;
940 addr = PTR_SIZE * 2;
941 ind += FUNC_PROLOG_SIZE;
942 func_sub_sp_offset = ind;
943 reg_param_index = 0;
945 sym = func_type->ref;
947 /* if the function returns a structure, then add an
948 implicit pointer parameter */
949 func_vt = sym->type;
950 func_var = (sym->c == FUNC_ELLIPSIS);
951 size = gfunc_arg_size(&func_vt);
952 if (size > 8) {
953 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
954 func_vc = addr;
955 reg_param_index++;
956 addr += 8;
959 /* define parameters */
960 while ((sym = sym->next) != NULL) {
961 type = &sym->type;
962 bt = type->t & VT_BTYPE;
963 size = gfunc_arg_size(type);
964 if (size > 8) {
965 if (reg_param_index < REGN) {
966 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
968 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
969 } else {
970 if (reg_param_index < REGN) {
971 /* save arguments passed by register */
972 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
973 o(0xd60f66); /* movq */
974 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
975 } else {
976 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
979 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
981 addr += 8;
982 reg_param_index++;
985 while (reg_param_index < REGN) {
986 if (func_type->ref->c == FUNC_ELLIPSIS) {
987 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
988 addr += 8;
990 reg_param_index++;
994 /* generate function epilog */
995 void gfunc_epilog(void)
997 int v, saved_ind;
999 o(0xc9); /* leave */
1000 if (func_ret_sub == 0) {
1001 o(0xc3); /* ret */
1002 } else {
1003 o(0xc2); /* ret n */
1004 g(func_ret_sub);
1005 g(func_ret_sub >> 8);
1008 saved_ind = ind;
1009 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1010 /* align local size to word & save local variables */
1011 v = (func_scratch + -loc + 15) & -16;
1013 if (v >= 4096) {
1014 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
1015 oad(0xb8, v); /* mov stacksize, %eax */
1016 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
1017 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
1018 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1019 } else {
1020 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1021 o(0xec8148); /* sub rsp, stacksize */
1022 gen_le32(v);
1025 cur_text_section->data_offset = saved_ind;
1026 pe_add_unwind_data(ind, saved_ind, v);
1027 ind = cur_text_section->data_offset;
1030 #else
1032 static void gadd_sp(int val)
1034 if (val == (char)val) {
1035 o(0xc48348);
1036 g(val);
1037 } else {
1038 oad(0xc48148, val); /* add $xxx, %rsp */
1042 typedef enum X86_64_Mode {
1043 x86_64_mode_none,
1044 x86_64_mode_memory,
1045 x86_64_mode_integer,
1046 x86_64_mode_sse,
1047 x86_64_mode_x87
1048 } X86_64_Mode;
1050 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1052 if (a == b)
1053 return a;
1054 else if (a == x86_64_mode_none)
1055 return b;
1056 else if (b == x86_64_mode_none)
1057 return a;
1058 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1059 return x86_64_mode_memory;
1060 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1061 return x86_64_mode_integer;
1062 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1063 return x86_64_mode_memory;
1064 else
1065 return x86_64_mode_sse;
1068 /* classify the x86 eightbytes from byte index start to byte index
1069 * end, at offset offset from the root struct */
1070 static X86_64_Mode classify_x86_64_inner(CType *ty, int offset, int start, int end)
1072 X86_64_Mode mode;
1073 Sym *f;
1075 switch (ty->t & VT_BTYPE) {
1076 case VT_VOID: return x86_64_mode_none;
1078 case VT_INT:
1079 case VT_BYTE:
1080 case VT_SHORT:
1081 case VT_LLONG:
1082 case VT_BOOL:
1083 case VT_PTR:
1084 case VT_FUNC:
1085 case VT_ENUM: return x86_64_mode_integer;
1087 case VT_FLOAT:
1088 case VT_DOUBLE: return x86_64_mode_sse;
1090 case VT_LDOUBLE: return x86_64_mode_x87;
1092 case VT_STRUCT:
1093 f = ty->ref;
1095 mode = x86_64_mode_none;
1096 while ((f = f->next) != NULL) {
1097 if (f->c + offset >= start && f->c + offset < end)
1098 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type, f->c + offset, start, end));
1101 return mode;
1104 assert(0);
1107 static X86_64_Mode classify_x86_64_arg_eightbyte(CType *ty, int offset)
1109 X86_64_Mode mode;
1111 assert((ty->t & VT_BTYPE) == VT_STRUCT);
1113 mode = classify_x86_64_inner(ty, 0, offset, offset + 8);
1115 return mode;
1118 static void regargs_init(RegArgs *args)
1120 int i;
1121 for(i=0; i<REG_ARGS_MAX; i++) {
1122 args->ireg[i] = -1;
1123 args->freg[i] = -1;
1127 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, RegArgs *args)
1129 X86_64_Mode mode = x86_64_mode_none;
1130 int size, align, ret_t = 0;
1131 int ireg = 0, freg = 0;
1133 if (args)
1134 regargs_init(args);
1136 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1137 *psize = 8;
1138 *palign = 8;
1139 if (args)
1140 args->ireg[ireg++] = 0;
1141 ret_t = ty->t;
1142 mode = x86_64_mode_integer;
1143 } else {
1144 size = type_size(ty, &align);
1145 *psize = (size + 7) & ~7;
1146 *palign = (align + 7) & ~7;
1148 if (size > 16) {
1149 mode = x86_64_mode_memory;
1150 } else {
1151 int start;
1153 for(start=0; start < size; start += 8) {
1154 if ((ty->t & VT_BTYPE) == VT_STRUCT) {
1155 mode = classify_x86_64_arg_eightbyte(ty, start);
1156 } else {
1157 mode = classify_x86_64_inner(ty, 0, 0, size);
1160 if (mode == x86_64_mode_integer) {
1161 if (args)
1162 args->ireg[ireg++] = start;
1163 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1164 } else if (mode == x86_64_mode_sse) {
1165 if (args)
1166 args->freg[freg++] = start;
1167 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1168 } else {
1169 ret_t = VT_LDOUBLE;
1175 if (ret) {
1176 ret->ref = NULL;
1177 ret->t = ret_t;
1180 return mode;
1183 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1185 /* This definition must be synced with stdarg.h */
1186 enum __va_arg_type {
1187 __va_gen_reg, __va_float_reg, __va_stack
1189 int size, align;
1190 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, NULL);
1191 switch (mode) {
1192 default: return __va_stack;
1193 case x86_64_mode_integer: return __va_gen_reg;
1194 case x86_64_mode_sse: return __va_float_reg;
1198 static int regargs_iregs(RegArgs *args)
1200 int i;
1201 int ret = 0;
1202 for(i=0; i<REG_ARGS_MAX; i++) {
1203 if(args->ireg[i] != -1)
1204 ret++;
1207 return ret;
1210 static int regargs_fregs(RegArgs *args)
1212 int i;
1213 int ret = 0;
1214 for(i=0; i<REG_ARGS_MAX; i++) {
1215 if(args->freg[i] != -1)
1216 ret++;
1219 return ret;
1222 /* Count the total number of registers used by args */
1223 ST_FUNC int regargs_nregs(RegArgs *args)
1225 int i;
1226 int ret = 0;
1227 for(i=0; i<REG_ARGS_MAX; i++) {
1228 if(args->ireg[i] != -1)
1229 ret++;
1231 if(args->freg[i] != -1)
1232 ret++;
1235 return ret;
1238 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args)
1240 int size, align;
1241 X86_64_Mode mode;
1242 *ret_align = 1; // Never have to re-align return values for x86-64
1243 *regsize = 8;
1245 mode = classify_x86_64_arg(vt, ret, &size, &align, args);
1247 return mode != x86_64_mode_memory &&
1248 mode != x86_64_mode_none;
1251 #define REGN 6
1252 static const uint8_t arg_regs[REGN] = {
1253 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1256 static int arg_prepare_reg(int idx) {
1257 if (idx == 2 || idx == 3)
1258 /* idx=2: r10, idx=3: r11 */
1259 return idx + 8;
1260 else
1261 return arg_regs[idx];
1264 /* Generate function call. The function address is pushed first, then
1265 all the parameters in call order. This functions pops all the
1266 parameters and the function address. */
1267 void gfunc_call(int nb_args)
1269 X86_64_Mode mode;
1270 CType type;
1271 int size, align, r, args_size, stack_adjust, run_start, run_end, i;
1272 int nb_reg_args = 0;
1273 int nb_sse_args = 0;
1274 int sse_reg = 0, gen_reg = 0;
1275 RegArgs *reg_args = alloca(nb_args * sizeof *reg_args);
1277 /* calculate the number of integer/float register arguments */
1278 for(i = nb_args - 1; i >= 0; i--) {
1279 int fregs, iregs;
1280 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_args[i]);
1281 fregs = regargs_fregs(&reg_args[i]);
1282 iregs = regargs_iregs(&reg_args[i]);
1284 nb_sse_args += fregs;
1285 nb_reg_args += iregs;
1287 if (sse_reg + fregs > 8 || gen_reg + iregs > REGN) {
1288 regargs_init(&reg_args[i]);
1289 } else {
1290 sse_reg += fregs;
1291 gen_reg += iregs;
1295 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1296 and ended by a 16-byte aligned argument. This is because, from the point of view of
1297 the callee, argument alignment is computed from the bottom up. */
1298 /* for struct arguments, we need to call memcpy and the function
1299 call breaks register passing arguments we are preparing.
1300 So, we process arguments which will be passed by stack first. */
1301 gen_reg = nb_reg_args;
1302 sse_reg = nb_sse_args;
1303 run_start = 0;
1304 args_size = 0;
1305 while (run_start != nb_args) {
1306 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1308 run_end = nb_args;
1309 stack_adjust = 0;
1310 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1311 int stack = regargs_nregs(&reg_args[i]) == 0;
1312 classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, NULL);
1314 if (stack) {
1315 if (align == 16)
1316 run_end = i;
1317 else
1318 stack_adjust += size;
1322 gen_reg = run_gen_reg;
1323 sse_reg = run_sse_reg;
1325 /* adjust stack to align SSE boundary */
1326 if (stack_adjust &= 15) {
1327 /* fetch cpu flag before the following sub will change the value */
1328 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1329 gv(RC_INT);
1331 stack_adjust = 16 - stack_adjust;
1332 o(0x48);
1333 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1334 args_size += stack_adjust;
1337 for(i = run_start; i < run_end;) {
1338 int arg_stored = regargs_nregs(&reg_args[i]) == 0;
1339 SValue tmp;
1340 RegArgs args;
1342 if (!arg_stored) {
1343 ++i;
1344 continue;
1347 /* Swap argument to top, it will possibly be changed here,
1348 and might use more temps. At the end of the loop we keep
1349 in on the stack and swap it back to its original position
1350 if it is a register. */
1351 tmp = vtop[0];
1352 vtop[0] = vtop[-i];
1353 vtop[-i] = tmp;
1355 classify_x86_64_arg(&vtop->type, NULL, &size, &align, &args);
1357 switch (vtop->type.t & VT_BTYPE) {
1358 case VT_STRUCT:
1359 /* allocate the necessary size on stack */
1360 o(0x48);
1361 oad(0xec81, size); /* sub $xxx, %rsp */
1362 /* generate structure store */
1363 r = get_reg(RC_INT);
1364 orex(1, r, 0, 0x89); /* mov %rsp, r */
1365 o(0xe0 + REG_VALUE(r));
1366 vset(&vtop->type, r | VT_LVAL, 0);
1367 vswap();
1368 vstore();
1369 args_size += size;
1370 break;
1372 case VT_LDOUBLE:
1373 assert(0);
1374 break;
1376 case VT_FLOAT:
1377 case VT_DOUBLE:
1378 r = gv(RC_FLOAT);
1379 o(0x50); /* push $rax */
1380 /* movq %xmmN, (%rsp) */
1381 o(0xd60f66);
1382 o(0x04 + REG_VALUE(r)*8);
1383 o(0x24);
1384 args_size += size;
1385 break;
1387 default:
1388 /* simple type */
1389 /* XXX: implicit cast ? */
1390 --gen_reg;
1391 r = gv(RC_INT);
1392 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1393 args_size += size;
1394 break;
1397 /* And swap the argument back to its original position. */
1398 tmp = vtop[0];
1399 vtop[0] = vtop[-i];
1400 vtop[-i] = tmp;
1402 vrotb(i+1);
1403 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1404 vpop();
1405 memmove(reg_args + i, reg_args + i + 1, (nb_args - i - 1) * sizeof *reg_args);
1406 --nb_args;
1407 --run_end;
1410 /* handle 16 byte aligned arguments at end of run */
1411 run_start = i = run_end;
1412 while (i < nb_args) {
1413 /* Rotate argument to top since it will always be popped */
1414 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, NULL);
1415 if (align != 16)
1416 break;
1418 vrotb(i+1);
1420 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1421 gv(RC_ST0);
1422 oad(0xec8148, size); /* sub $xxx, %rsp */
1423 o(0x7cdb); /* fstpt 0(%rsp) */
1424 g(0x24);
1425 g(0x00);
1426 args_size += size;
1427 } else {
1428 assert(mode == x86_64_mode_memory);
1430 /* allocate the necessary size on stack */
1431 o(0x48);
1432 oad(0xec81, size); /* sub $xxx, %rsp */
1433 /* generate structure store */
1434 r = get_reg(RC_INT);
1435 orex(1, r, 0, 0x89); /* mov %rsp, r */
1436 o(0xe0 + REG_VALUE(r));
1437 vset(&vtop->type, r | VT_LVAL, 0);
1438 vswap();
1439 vstore();
1440 args_size += size;
1443 vpop();
1444 memmove(reg_args + i, reg_args + i + 1, (nb_args - i - 1) * sizeof *reg_args);
1445 --nb_args;
1449 /* XXX This should be superfluous. */
1450 save_regs(0); /* save used temporary registers */
1452 /* recalculate the number of register arguments there actually
1453 * are. This is slow but more obviously correct than using the
1454 * old counts. */
1455 gen_reg = 0;
1456 sse_reg = 0;
1457 for(i = 0; i < nb_args; i++) {
1458 gen_reg += regargs_iregs(&reg_args[i]);
1459 sse_reg += regargs_fregs(&reg_args[i]);
1462 /* then, we prepare register passing arguments.
1463 Note that we cannot set RDX and RCX in this loop because gv()
1464 may break these temporary registers. Let's use R10 and R11
1465 instead of them */
1466 assert(gen_reg <= REGN);
1467 assert(sse_reg <= 8);
1468 for(i = 0; i < nb_args; i++) {
1469 RegArgs args;
1471 args = reg_args[i];
1473 /* Alter stack entry type so that gv() knows how to treat it */
1474 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
1475 int k;
1477 for(k=REG_ARGS_MAX-1; k>=0; k--) {
1478 if (args.freg[k] == -1)
1479 continue;
1481 sse_reg--;
1482 assert(sse_reg >= 0);
1484 vdup();
1485 vtop->type.t = VT_DOUBLE;
1486 vtop->c.ull += args.freg[k];
1487 gv(RC_XMM0 << sse_reg);
1488 vpop();
1490 for(k=REG_ARGS_MAX-1; k>=0; k--) {
1491 int d;
1492 if (args.ireg[k] == -1)
1493 continue;
1495 gen_reg--;
1497 vdup();
1498 vtop->type.t = VT_LLONG;
1499 vtop->c.ull += args.ireg[k];
1500 r = gv(RC_INT);
1501 d = arg_prepare_reg(gen_reg);
1502 orex(1,d,r,0x89); /* mov */
1503 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1504 vpop();
1506 } else {
1507 /* XXX is it really necessary to set vtop->type? */
1508 classify_x86_64_arg(&vtop->type, &type, &size, &align, NULL);
1509 vtop->type = type;
1510 if (args.freg[0] != -1) {
1511 --sse_reg;
1512 /* Load directly to register */
1513 gv(RC_XMM0 << sse_reg);
1514 } else if (args.ireg[0] != -1) {
1515 int d;
1516 /* simple type */
1517 /* XXX: implicit cast ? */
1518 gen_reg--;
1519 r = gv(RC_INT);
1520 d = arg_prepare_reg(gen_reg);
1521 orex(1,d,r,0x89); /* mov */
1522 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1523 } else {
1524 assert(0);
1527 vtop--;
1529 assert(gen_reg == 0);
1530 assert(sse_reg == 0);
1532 /* We shouldn't have many operands on the stack anymore, but the
1533 call address itself is still there, and it might be in %eax
1534 (or edx/ecx) currently, which the below writes would clobber.
1535 So evict all remaining operands here. */
1536 save_regs(0);
1538 /* Copy R10 and R11 into RDX and RCX, respectively */
1539 if (nb_reg_args > 2) {
1540 o(0xd2894c); /* mov %r10, %rdx */
1541 if (nb_reg_args > 3) {
1542 o(0xd9894c); /* mov %r11, %rcx */
1546 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1547 gcall_or_jmp(0);
1548 if (args_size)
1549 gadd_sp(args_size);
1550 vtop--;
1554 #define FUNC_PROLOG_SIZE 11
1556 static void push_arg_reg(int i) {
1557 loc -= 8;
1558 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1561 /* generate function prolog of type 't' */
1562 void gfunc_prolog(CType *func_type)
1564 X86_64_Mode mode;
1565 int i, addr, align, size;
1566 int param_addr = 0, reg_param_index, sse_param_index;
1567 Sym *sym;
1568 CType *type;
1570 sym = func_type->ref;
1571 addr = PTR_SIZE * 2;
1572 loc = 0;
1573 ind += FUNC_PROLOG_SIZE;
1574 func_sub_sp_offset = ind;
1575 func_ret_sub = 0;
1577 if (func_type->ref->c == FUNC_ELLIPSIS) {
1578 int seen_reg_num, seen_sse_num, seen_stack_size;
1579 seen_reg_num = seen_sse_num = 0;
1580 /* frame pointer and return address */
1581 seen_stack_size = PTR_SIZE * 2;
1582 /* count the number of seen parameters */
1583 sym = func_type->ref;
1584 while ((sym = sym->next) != NULL) {
1585 RegArgs args;
1587 type = &sym->type;
1588 mode = classify_x86_64_arg(type, NULL, &size, &align, &args);
1590 switch (mode) {
1591 default:
1592 stack_arg:
1593 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1594 break;
1596 case x86_64_mode_integer:
1597 case x86_64_mode_sse: {
1598 int stack = 0;
1600 seen_sse_num += regargs_fregs(&args);
1601 seen_reg_num += regargs_iregs(&args);
1603 if (seen_reg_num > 8) {
1604 seen_reg_num = 8;
1605 stack = 1;
1607 if (seen_sse_num > 8) {
1608 seen_sse_num = 8;
1609 stack = 1;
1612 if (stack)
1613 goto stack_arg;
1614 break;
1619 loc -= 16;
1620 /* movl $0x????????, -0x10(%rbp) */
1621 o(0xf045c7);
1622 gen_le32(seen_reg_num * 8);
1623 /* movl $0x????????, -0xc(%rbp) */
1624 o(0xf445c7);
1625 gen_le32(seen_sse_num * 16 + 48);
1626 /* movl $0x????????, -0x8(%rbp) */
1627 o(0xf845c7);
1628 gen_le32(seen_stack_size);
1630 /* save all register passing arguments */
1631 for (i = 0; i < 8; i++) {
1632 loc -= 16;
1633 o(0xd60f66); /* movq */
1634 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1635 /* movq $0, loc+8(%rbp) */
1636 o(0x85c748);
1637 gen_le32(loc + 8);
1638 gen_le32(0);
1640 for (i = 0; i < REGN; i++) {
1641 push_arg_reg(REGN-1-i);
1645 sym = func_type->ref;
1646 reg_param_index = 0;
1647 sse_param_index = 0;
1649 /* if the function returns a structure, then add an
1650 implicit pointer parameter */
1651 func_vt = sym->type;
1652 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, NULL);
1653 if (mode == x86_64_mode_memory) {
1654 push_arg_reg(reg_param_index);
1655 func_vc = loc;
1656 reg_param_index++;
1658 /* define parameters */
1659 while ((sym = sym->next) != NULL) {
1660 RegArgs args;
1661 int reg_count_integer = 0;
1662 int reg_count_sse = 0;
1663 int arg_stored = 1;
1665 type = &sym->type;
1666 mode = classify_x86_64_arg(type, NULL, &size, &align, &args);
1667 reg_count_integer = regargs_iregs(&args);
1668 reg_count_sse = regargs_fregs(&args);
1670 switch (mode) {
1671 case x86_64_mode_integer:
1672 case x86_64_mode_sse:
1673 if (reg_count_integer || reg_count_sse) {
1674 if ((reg_count_sse == 0 || sse_param_index + reg_count_sse <= 8) &&
1675 (reg_count_integer == 0 || reg_param_index + reg_count_integer <= REGN)) {
1676 /* argument fits into registers */
1677 arg_stored = 0;
1681 if (!arg_stored) {
1682 /* save arguments passed by register */
1683 loc -= (reg_count_sse + reg_count_integer) * 8;
1684 param_addr = loc;
1685 for (i = 0; i < reg_count_sse; ++i) {
1686 o(0xd60f66); /* movq */
1687 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + args.freg[i]);
1688 ++sse_param_index;
1690 for (i = 0; i < reg_count_integer; ++i) {
1691 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + args.ireg[i]);
1692 ++reg_param_index;
1694 } else {
1695 addr = (addr + align - 1) & -align;
1696 param_addr = addr;
1697 addr += size;
1699 break;
1701 case x86_64_mode_memory:
1702 case x86_64_mode_x87:
1703 addr = (addr + align - 1) & -align;
1704 param_addr = addr;
1705 addr += size;
1706 break;
1707 default: break; /* nothing to be done for x86_64_mode_none */
1709 sym_push(sym->v & ~SYM_FIELD, type,
1710 VT_LOCAL | VT_LVAL, param_addr);
1713 #ifdef CONFIG_TCC_BCHECK
1714 /* leave some room for bound checking code */
1715 if (tcc_state->do_bounds_check) {
1716 func_bound_offset = lbounds_section->data_offset;
1717 func_bound_ind = ind;
1718 oad(0xb8, 0); /* lbound section pointer */
1719 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1720 oad(0xb8, 0); /* call to function */
1722 #endif
1725 /* generate function epilog */
1726 void gfunc_epilog(void)
1728 int v, saved_ind;
1730 #ifdef CONFIG_TCC_BCHECK
1731 if (tcc_state->do_bounds_check
1732 && func_bound_offset != lbounds_section->data_offset)
1734 addr_t saved_ind;
1735 addr_t *bounds_ptr;
1736 Sym *sym_data;
1738 /* add end of table info */
1739 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
1740 *bounds_ptr = 0;
1742 /* generate bound local allocation */
1743 sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
1744 func_bound_offset, lbounds_section->data_offset);
1745 saved_ind = ind;
1746 ind = func_bound_ind;
1747 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1748 ind = ind + 5 + 3;
1749 gen_static_call(TOK___bound_local_new);
1750 ind = saved_ind;
1752 /* generate bound check local freeing */
1753 o(0x5250); /* save returned value, if any */
1754 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1755 oad(0xb8, 0); /* mov xxx, %rax */
1756 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1757 gen_static_call(TOK___bound_local_delete);
1758 o(0x585a); /* restore returned value, if any */
1760 #endif
1761 o(0xc9); /* leave */
1762 if (func_ret_sub == 0) {
1763 o(0xc3); /* ret */
1764 } else {
1765 o(0xc2); /* ret n */
1766 g(func_ret_sub);
1767 g(func_ret_sub >> 8);
1769 /* align local size to word & save local variables */
1770 v = (-loc + 15) & -16;
1771 saved_ind = ind;
1772 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1773 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1774 o(0xec8148); /* sub rsp, stacksize */
1775 gen_le32(v);
1776 ind = saved_ind;
1779 #endif /* not PE */
1781 /* generate a jump to a label */
1782 int gjmp(int t)
1784 return psym(0xe9, t);
1787 /* generate a jump to a fixed address */
1788 void gjmp_addr(int a)
1790 int r;
1791 r = a - ind - 2;
1792 if (r == (char)r) {
1793 g(0xeb);
1794 g(r);
1795 } else {
1796 oad(0xe9, a - ind - 5);
1800 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1801 int gtst(int inv, int t)
1803 int v, *p;
1805 v = vtop->r & VT_VALMASK;
1806 if (v == VT_CMP) {
1807 /* fast case : can jump directly since flags are set */
1808 if (vtop->c.i & 0x100)
1810 /* This was a float compare. If the parity flag is set
1811 the result was unordered. For anything except != this
1812 means false and we don't jump (anding both conditions).
1813 For != this means true (oring both).
1814 Take care about inverting the test. We need to jump
1815 to our target if the result was unordered and test wasn't NE,
1816 otherwise if unordered we don't want to jump. */
1817 vtop->c.i &= ~0x100;
1818 if (!inv == (vtop->c.i != TOK_NE))
1819 o(0x067a); /* jp +6 */
1820 else {
1821 g(0x0f);
1822 t = psym(0x8a, t); /* jp t */
1825 g(0x0f);
1826 t = psym((vtop->c.i - 16) ^ inv, t);
1827 } else if (v == VT_JMP || v == VT_JMPI) {
1828 /* && or || optimization */
1829 if ((v & 1) == inv) {
1830 /* insert vtop->c jump list in t */
1831 p = &vtop->c.i;
1832 while (*p != 0)
1833 p = (int *)(cur_text_section->data + *p);
1834 *p = t;
1835 t = vtop->c.i;
1836 } else {
1837 t = gjmp(t);
1838 gsym(vtop->c.i);
1841 vtop--;
1842 return t;
1845 /* generate an integer binary operation */
1846 void gen_opi(int op)
1848 int r, fr, opc, c;
1849 int ll, uu, cc;
1851 ll = is64_type(vtop[-1].type.t);
1852 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1853 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1855 switch(op) {
1856 case '+':
1857 case TOK_ADDC1: /* add with carry generation */
1858 opc = 0;
1859 gen_op8:
1860 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1861 /* constant case */
1862 vswap();
1863 r = gv(RC_INT);
1864 vswap();
1865 c = vtop->c.i;
1866 if (c == (char)c) {
1867 /* XXX: generate inc and dec for smaller code ? */
1868 orex(ll, r, 0, 0x83);
1869 o(0xc0 | (opc << 3) | REG_VALUE(r));
1870 g(c);
1871 } else {
1872 orex(ll, r, 0, 0x81);
1873 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1875 } else {
1876 gv2(RC_INT, RC_INT);
1877 r = vtop[-1].r;
1878 fr = vtop[0].r;
1879 orex(ll, r, fr, (opc << 3) | 0x01);
1880 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1882 vtop--;
1883 if (op >= TOK_ULT && op <= TOK_GT) {
1884 vtop->r = VT_CMP;
1885 vtop->c.i = op;
1887 break;
1888 case '-':
1889 case TOK_SUBC1: /* sub with carry generation */
1890 opc = 5;
1891 goto gen_op8;
1892 case TOK_ADDC2: /* add with carry use */
1893 opc = 2;
1894 goto gen_op8;
1895 case TOK_SUBC2: /* sub with carry use */
1896 opc = 3;
1897 goto gen_op8;
1898 case '&':
1899 opc = 4;
1900 goto gen_op8;
1901 case '^':
1902 opc = 6;
1903 goto gen_op8;
1904 case '|':
1905 opc = 1;
1906 goto gen_op8;
1907 case '*':
1908 gv2(RC_INT, RC_INT);
1909 r = vtop[-1].r;
1910 fr = vtop[0].r;
1911 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1912 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1913 vtop--;
1914 break;
1915 case TOK_SHL:
1916 opc = 4;
1917 goto gen_shift;
1918 case TOK_SHR:
1919 opc = 5;
1920 goto gen_shift;
1921 case TOK_SAR:
1922 opc = 7;
1923 gen_shift:
1924 opc = 0xc0 | (opc << 3);
1925 if (cc) {
1926 /* constant case */
1927 vswap();
1928 r = gv(RC_INT);
1929 vswap();
1930 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1931 o(opc | REG_VALUE(r));
1932 g(vtop->c.i & (ll ? 63 : 31));
1933 } else {
1934 /* we generate the shift in ecx */
1935 gv2(RC_INT, RC_RCX);
1936 r = vtop[-1].r;
1937 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1938 o(opc | REG_VALUE(r));
1940 vtop--;
1941 break;
1942 case TOK_UDIV:
1943 case TOK_UMOD:
1944 uu = 1;
1945 goto divmod;
1946 case '/':
1947 case '%':
1948 case TOK_PDIV:
1949 uu = 0;
1950 divmod:
1951 /* first operand must be in eax */
1952 /* XXX: need better constraint for second operand */
1953 gv2(RC_RAX, RC_RCX);
1954 r = vtop[-1].r;
1955 fr = vtop[0].r;
1956 vtop--;
1957 save_reg(TREG_RDX);
1958 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1959 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1960 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1961 if (op == '%' || op == TOK_UMOD)
1962 r = TREG_RDX;
1963 else
1964 r = TREG_RAX;
1965 vtop->r = r;
1966 break;
1967 default:
1968 opc = 7;
1969 goto gen_op8;
1973 void gen_opl(int op)
1975 gen_opi(op);
1978 /* generate a floating point operation 'v = t1 op t2' instruction. The
1979 two operands are guaranted to have the same floating point type */
1980 /* XXX: need to use ST1 too */
1981 void gen_opf(int op)
1983 int a, ft, fc, swapped, r;
1984 int float_type =
1985 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1987 /* convert constants to memory references */
1988 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1989 vswap();
1990 gv(float_type);
1991 vswap();
1993 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1994 gv(float_type);
1996 /* must put at least one value in the floating point register */
1997 if ((vtop[-1].r & VT_LVAL) &&
1998 (vtop[0].r & VT_LVAL)) {
1999 vswap();
2000 gv(float_type);
2001 vswap();
2003 swapped = 0;
2004 /* swap the stack if needed so that t1 is the register and t2 is
2005 the memory reference */
2006 if (vtop[-1].r & VT_LVAL) {
2007 vswap();
2008 swapped = 1;
2010 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
2011 if (op >= TOK_ULT && op <= TOK_GT) {
2012 /* load on stack second operand */
2013 load(TREG_ST0, vtop);
2014 save_reg(TREG_RAX); /* eax is used by FP comparison code */
2015 if (op == TOK_GE || op == TOK_GT)
2016 swapped = !swapped;
2017 else if (op == TOK_EQ || op == TOK_NE)
2018 swapped = 0;
2019 if (swapped)
2020 o(0xc9d9); /* fxch %st(1) */
2021 if (op == TOK_EQ || op == TOK_NE)
2022 o(0xe9da); /* fucompp */
2023 else
2024 o(0xd9de); /* fcompp */
2025 o(0xe0df); /* fnstsw %ax */
2026 if (op == TOK_EQ) {
2027 o(0x45e480); /* and $0x45, %ah */
2028 o(0x40fC80); /* cmp $0x40, %ah */
2029 } else if (op == TOK_NE) {
2030 o(0x45e480); /* and $0x45, %ah */
2031 o(0x40f480); /* xor $0x40, %ah */
2032 op = TOK_NE;
2033 } else if (op == TOK_GE || op == TOK_LE) {
2034 o(0x05c4f6); /* test $0x05, %ah */
2035 op = TOK_EQ;
2036 } else {
2037 o(0x45c4f6); /* test $0x45, %ah */
2038 op = TOK_EQ;
2040 vtop--;
2041 vtop->r = VT_CMP;
2042 vtop->c.i = op;
2043 } else {
2044 /* no memory reference possible for long double operations */
2045 load(TREG_ST0, vtop);
2046 swapped = !swapped;
2048 switch(op) {
2049 default:
2050 case '+':
2051 a = 0;
2052 break;
2053 case '-':
2054 a = 4;
2055 if (swapped)
2056 a++;
2057 break;
2058 case '*':
2059 a = 1;
2060 break;
2061 case '/':
2062 a = 6;
2063 if (swapped)
2064 a++;
2065 break;
2067 ft = vtop->type.t;
2068 fc = vtop->c.ul;
2069 o(0xde); /* fxxxp %st, %st(1) */
2070 o(0xc1 + (a << 3));
2071 vtop--;
2073 } else {
2074 if (op >= TOK_ULT && op <= TOK_GT) {
2075 /* if saved lvalue, then we must reload it */
2076 r = vtop->r;
2077 fc = vtop->c.ul;
2078 if ((r & VT_VALMASK) == VT_LLOCAL) {
2079 SValue v1;
2080 r = get_reg(RC_INT);
2081 v1.type.t = VT_PTR;
2082 v1.r = VT_LOCAL | VT_LVAL;
2083 v1.c.ul = fc;
2084 load(r, &v1);
2085 fc = 0;
2088 if (op == TOK_EQ || op == TOK_NE) {
2089 swapped = 0;
2090 } else {
2091 if (op == TOK_LE || op == TOK_LT)
2092 swapped = !swapped;
2093 if (op == TOK_LE || op == TOK_GE) {
2094 op = 0x93; /* setae */
2095 } else {
2096 op = 0x97; /* seta */
2100 if (swapped) {
2101 gv(RC_FLOAT);
2102 vswap();
2104 assert(!(vtop[-1].r & VT_LVAL));
2106 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
2107 o(0x66);
2108 if (op == TOK_EQ || op == TOK_NE)
2109 o(0x2e0f); /* ucomisd */
2110 else
2111 o(0x2f0f); /* comisd */
2113 if (vtop->r & VT_LVAL) {
2114 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2115 } else {
2116 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2119 vtop--;
2120 vtop->r = VT_CMP;
2121 vtop->c.i = op | 0x100;
2122 } else {
2123 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
2124 switch(op) {
2125 default:
2126 case '+':
2127 a = 0;
2128 break;
2129 case '-':
2130 a = 4;
2131 break;
2132 case '*':
2133 a = 1;
2134 break;
2135 case '/':
2136 a = 6;
2137 break;
2139 ft = vtop->type.t;
2140 fc = vtop->c.ul;
2141 assert((ft & VT_BTYPE) != VT_LDOUBLE);
2143 r = vtop->r;
2144 /* if saved lvalue, then we must reload it */
2145 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2146 SValue v1;
2147 r = get_reg(RC_INT);
2148 v1.type.t = VT_PTR;
2149 v1.r = VT_LOCAL | VT_LVAL;
2150 v1.c.ul = fc;
2151 load(r, &v1);
2152 fc = 0;
2155 assert(!(vtop[-1].r & VT_LVAL));
2156 if (swapped) {
2157 assert(vtop->r & VT_LVAL);
2158 gv(RC_FLOAT);
2159 vswap();
2162 if ((ft & VT_BTYPE) == VT_DOUBLE) {
2163 o(0xf2);
2164 } else {
2165 o(0xf3);
2167 o(0x0f);
2168 o(0x58 + a);
2170 if (vtop->r & VT_LVAL) {
2171 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2172 } else {
2173 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2176 vtop--;
2181 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2182 and 'long long' cases. */
2183 void gen_cvt_itof(int t)
2185 if ((t & VT_BTYPE) == VT_LDOUBLE) {
2186 save_reg(TREG_ST0);
2187 gv(RC_INT);
2188 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2189 /* signed long long to float/double/long double (unsigned case
2190 is handled generically) */
2191 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2192 o(0x242cdf); /* fildll (%rsp) */
2193 o(0x08c48348); /* add $8, %rsp */
2194 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2195 (VT_INT | VT_UNSIGNED)) {
2196 /* unsigned int to float/double/long double */
2197 o(0x6a); /* push $0 */
2198 g(0x00);
2199 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2200 o(0x242cdf); /* fildll (%rsp) */
2201 o(0x10c48348); /* add $16, %rsp */
2202 } else {
2203 /* int to float/double/long double */
2204 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2205 o(0x2404db); /* fildl (%rsp) */
2206 o(0x08c48348); /* add $8, %rsp */
2208 vtop->r = TREG_ST0;
2209 } else {
2210 int r = get_reg(RC_FLOAT);
2211 gv(RC_INT);
2212 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2213 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2214 (VT_INT | VT_UNSIGNED) ||
2215 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2216 o(0x48); /* REX */
2218 o(0x2a0f);
2219 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2220 vtop->r = r;
2224 /* convert from one floating point type to another */
2225 void gen_cvt_ftof(int t)
2227 int ft, bt, tbt;
2229 ft = vtop->type.t;
2230 bt = ft & VT_BTYPE;
2231 tbt = t & VT_BTYPE;
2233 if (bt == VT_FLOAT) {
2234 gv(RC_FLOAT);
2235 if (tbt == VT_DOUBLE) {
2236 o(0x140f); /* unpcklps */
2237 o(0xc0 + REG_VALUE(vtop->r)*9);
2238 o(0x5a0f); /* cvtps2pd */
2239 o(0xc0 + REG_VALUE(vtop->r)*9);
2240 } else if (tbt == VT_LDOUBLE) {
2241 save_reg(RC_ST0);
2242 /* movss %xmm0,-0x10(%rsp) */
2243 o(0x110ff3);
2244 o(0x44 + REG_VALUE(vtop->r)*8);
2245 o(0xf024);
2246 o(0xf02444d9); /* flds -0x10(%rsp) */
2247 vtop->r = TREG_ST0;
2249 } else if (bt == VT_DOUBLE) {
2250 gv(RC_FLOAT);
2251 if (tbt == VT_FLOAT) {
2252 o(0x140f66); /* unpcklpd */
2253 o(0xc0 + REG_VALUE(vtop->r)*9);
2254 o(0x5a0f66); /* cvtpd2ps */
2255 o(0xc0 + REG_VALUE(vtop->r)*9);
2256 } else if (tbt == VT_LDOUBLE) {
2257 save_reg(RC_ST0);
2258 /* movsd %xmm0,-0x10(%rsp) */
2259 o(0x110ff2);
2260 o(0x44 + REG_VALUE(vtop->r)*8);
2261 o(0xf024);
2262 o(0xf02444dd); /* fldl -0x10(%rsp) */
2263 vtop->r = TREG_ST0;
2265 } else {
2266 int r;
2267 gv(RC_ST0);
2268 r = get_reg(RC_FLOAT);
2269 if (tbt == VT_DOUBLE) {
2270 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2271 /* movsd -0x10(%rsp),%xmm0 */
2272 o(0x100ff2);
2273 o(0x44 + REG_VALUE(r)*8);
2274 o(0xf024);
2275 vtop->r = r;
2276 } else if (tbt == VT_FLOAT) {
2277 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2278 /* movss -0x10(%rsp),%xmm0 */
2279 o(0x100ff3);
2280 o(0x44 + REG_VALUE(r)*8);
2281 o(0xf024);
2282 vtop->r = r;
2287 /* convert fp to int 't' type */
2288 void gen_cvt_ftoi(int t)
2290 int ft, bt, size, r;
2291 ft = vtop->type.t;
2292 bt = ft & VT_BTYPE;
2293 if (bt == VT_LDOUBLE) {
2294 gen_cvt_ftof(VT_DOUBLE);
2295 bt = VT_DOUBLE;
2298 gv(RC_FLOAT);
2299 if (t != VT_INT)
2300 size = 8;
2301 else
2302 size = 4;
2304 r = get_reg(RC_INT);
2305 if (bt == VT_FLOAT) {
2306 o(0xf3);
2307 } else if (bt == VT_DOUBLE) {
2308 o(0xf2);
2309 } else {
2310 assert(0);
2312 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2313 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2314 vtop->r = r;
2317 /* computed goto support */
2318 void ggoto(void)
2320 gcall_or_jmp(1);
2321 vtop--;
2324 /* Save the stack pointer onto the stack and return the location of its address */
2325 ST_FUNC void gen_vla_sp_save(int addr) {
2326 /* mov %rsp,addr(%rbp)*/
2327 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2330 /* Restore the SP from a location on the stack */
2331 ST_FUNC void gen_vla_sp_restore(int addr) {
2332 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2335 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2336 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2337 #ifdef TCC_TARGET_PE
2338 /* alloca does more than just adjust %rsp on Windows */
2339 vpush_global_sym(&func_old_type, TOK_alloca);
2340 vswap(); /* Move alloca ref past allocation size */
2341 gfunc_call(1);
2342 vset(type, REG_IRET, 0);
2343 #else
2344 int r = gv(RC_INT); /* allocation size */
2345 /* sub r,%rsp */
2346 o(0x2b48);
2347 o(0xe0 | REG_VALUE(r));
2348 /* We align to 16 bytes rather than align */
2349 /* and ~15, %rsp */
2350 o(0xf0e48348);
2351 vpop();
2352 #endif
2356 /* end of x86-64 code generator */
2357 /*************************************************************/
2358 #endif /* ! TARGET_DEFS_ONLY */
2359 /******************************************************/