Clean up lots of rogue tabs.
[tinycc.git] / x86_64-gen.c
blob4a48604124490eaf189e313e7cda169174204370
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
28 #define REG_ARGS_MAX 2 /* at most 2 registers used for each argument */
30 #ifdef TCC_TARGET_PE
31 typedef int RegArgs;
32 #else
33 /* This struct stores the struct offsets at which %rax, %rdx, %xmm0, and
34 * %xmm1 are to be stored.
36 * struct { long long l; double x; }: ireg = { 0, -1 } freg = { 8, -1 }
37 * struct { double x; long long l; }: ireg = { 8, -1 } freg = { 0, -1 }
38 * struct { long long l; long long l2; }: ireg = { 0, 8 } freg = { -1, -1 }
39 * struct { double x; double x2; }: ireg = { -1, -1 } freg = { 0, 8 }
41 typedef struct {
42 int ireg[REG_ARGS_MAX];
43 int freg[REG_ARGS_MAX];
44 } RegArgs;
45 #endif
47 /* a register can belong to several classes. The classes must be
48 sorted from more general to more precise (see gv2() code which does
49 assumptions on it). */
50 #define RC_INT 0x0001 /* generic integer register */
51 #define RC_FLOAT 0x0002 /* generic float register */
52 #define RC_RAX 0x0004
53 #define RC_RCX 0x0008
54 #define RC_RDX 0x0010
55 #define RC_ST0 0x0080 /* only for long double */
56 #define RC_R8 0x0100
57 #define RC_R9 0x0200
58 #define RC_R10 0x0400
59 #define RC_R11 0x0800
60 #define RC_XMM0 0x1000
61 #define RC_XMM1 0x2000
62 #define RC_XMM2 0x4000
63 #define RC_XMM3 0x8000
64 #define RC_XMM4 0x10000
65 #define RC_XMM5 0x20000
66 #define RC_XMM6 0x40000
67 #define RC_XMM7 0x80000
68 #define RC_IRET RC_RAX /* function return: integer register */
69 #define RC_LRET RC_RDX /* function return: second integer register */
70 #define RC_FRET RC_XMM0 /* function return: float register */
71 #define RC_QRET RC_XMM1 /* function return: second float register */
73 /* pretty names for the registers */
74 enum {
75 TREG_RAX = 0,
76 TREG_RCX = 1,
77 TREG_RDX = 2,
78 TREG_RSP = 4,
79 TREG_RSI = 6,
80 TREG_RDI = 7,
82 TREG_R8 = 8,
83 TREG_R9 = 9,
84 TREG_R10 = 10,
85 TREG_R11 = 11,
87 TREG_XMM0 = 16,
88 TREG_XMM1 = 17,
89 TREG_XMM2 = 18,
90 TREG_XMM3 = 19,
91 TREG_XMM4 = 20,
92 TREG_XMM5 = 21,
93 TREG_XMM6 = 22,
94 TREG_XMM7 = 23,
96 TREG_ST0 = 24,
98 TREG_MEM = 0x20,
101 #define REX_BASE(reg) (((reg) >> 3) & 1)
102 #define REG_VALUE(reg) ((reg) & 7)
104 /* return registers for function */
105 #define REG_IRET TREG_RAX /* single word int return register */
106 #define REG_LRET TREG_RDX /* second word return register (for long long) */
107 #define REG_FRET TREG_XMM0 /* float return register */
108 #define REG_QRET TREG_XMM1 /* second float return register */
110 /* defined if function parameters must be evaluated in reverse order */
111 #define INVERT_FUNC_PARAMS
113 /* pointer size, in bytes */
114 #define PTR_SIZE 8
116 /* long double size and alignment, in bytes */
117 #define LDOUBLE_SIZE 16
118 #define LDOUBLE_ALIGN 16
119 /* maximum alignment (for aligned attribute support) */
120 #define MAX_ALIGN 16
122 /******************************************************/
123 /* ELF defines */
125 #define EM_TCC_TARGET EM_X86_64
127 /* relocation type for 32 bit data relocation */
128 #define R_DATA_32 R_X86_64_32
129 #define R_DATA_PTR R_X86_64_64
130 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
131 #define R_COPY R_X86_64_COPY
133 #define ELF_START_ADDR 0x400000
134 #define ELF_PAGE_SIZE 0x200000
136 /******************************************************/
137 #else /* ! TARGET_DEFS_ONLY */
138 /******************************************************/
139 #include "tcc.h"
140 #include <assert.h>
142 ST_DATA const int reg_classes[NB_REGS] = {
143 /* eax */ RC_INT | RC_RAX,
144 /* ecx */ RC_INT | RC_RCX,
145 /* edx */ RC_INT | RC_RDX,
151 RC_R8,
152 RC_R9,
153 RC_R10,
154 RC_R11,
159 /* xmm0 */ RC_FLOAT | RC_XMM0,
160 /* xmm1 */ RC_FLOAT | RC_XMM1,
161 /* xmm2 */ RC_FLOAT | RC_XMM2,
162 /* xmm3 */ RC_FLOAT | RC_XMM3,
163 /* xmm4 */ RC_FLOAT | RC_XMM4,
164 /* xmm5 */ RC_FLOAT | RC_XMM5,
165 /* xmm6 an xmm7 are included so gv() can be used on them,
166 but they are not tagged with RC_FLOAT because they are
167 callee saved on Windows */
168 RC_XMM6,
169 RC_XMM7,
170 /* st0 */ RC_ST0
173 static unsigned long func_sub_sp_offset;
174 static int func_ret_sub;
176 /* XXX: make it faster ? */
177 void g(int c)
179 int ind1;
180 ind1 = ind + 1;
181 if (ind1 > cur_text_section->data_allocated)
182 section_realloc(cur_text_section, ind1);
183 cur_text_section->data[ind] = c;
184 ind = ind1;
187 void o(unsigned int c)
189 while (c) {
190 g(c);
191 c = c >> 8;
195 void gen_le16(int v)
197 g(v);
198 g(v >> 8);
201 void gen_le32(int c)
203 g(c);
204 g(c >> 8);
205 g(c >> 16);
206 g(c >> 24);
209 void gen_le64(int64_t c)
211 g(c);
212 g(c >> 8);
213 g(c >> 16);
214 g(c >> 24);
215 g(c >> 32);
216 g(c >> 40);
217 g(c >> 48);
218 g(c >> 56);
221 void orex(int ll, int r, int r2, int b)
223 if ((r & VT_VALMASK) >= VT_CONST)
224 r = 0;
225 if ((r2 & VT_VALMASK) >= VT_CONST)
226 r2 = 0;
227 if (ll || REX_BASE(r) || REX_BASE(r2))
228 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
229 o(b);
232 /* output a symbol and patch all calls to it */
233 void gsym_addr(int t, int a)
235 int n, *ptr;
236 while (t) {
237 ptr = (int *)(cur_text_section->data + t);
238 n = *ptr; /* next value */
239 *ptr = a - t - 4;
240 t = n;
244 void gsym(int t)
246 gsym_addr(t, ind);
249 /* psym is used to put an instruction with a data field which is a
250 reference to a symbol. It is in fact the same as oad ! */
251 #define psym oad
253 static int is64_type(int t)
255 return ((t & VT_BTYPE) == VT_PTR ||
256 (t & VT_BTYPE) == VT_FUNC ||
257 (t & VT_BTYPE) == VT_LLONG);
260 /* instruction + 4 bytes data. Return the address of the data */
261 ST_FUNC int oad(int c, int s)
263 int ind1;
265 o(c);
266 ind1 = ind + 4;
267 if (ind1 > cur_text_section->data_allocated)
268 section_realloc(cur_text_section, ind1);
269 *(int *)(cur_text_section->data + ind) = s;
270 s = ind;
271 ind = ind1;
272 return s;
275 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
277 if (r & VT_SYM)
278 greloc(cur_text_section, sym, ind, R_X86_64_32);
279 gen_le32(c);
282 /* output constant with relocation if 'r & VT_SYM' is true */
283 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
285 if (r & VT_SYM)
286 greloc(cur_text_section, sym, ind, R_X86_64_64);
287 gen_le64(c);
290 /* output constant with relocation if 'r & VT_SYM' is true */
291 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
293 if (r & VT_SYM)
294 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
295 gen_le32(c-4);
298 /* output got address with relocation */
299 static void gen_gotpcrel(int r, Sym *sym, int c)
301 #ifndef TCC_TARGET_PE
302 Section *sr;
303 ElfW(Rela) *rel;
304 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
305 sr = cur_text_section->reloc;
306 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
307 rel->r_addend = -4;
308 #else
309 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
310 get_tok_str(sym->v, NULL), c, r,
311 cur_text_section->data[ind-3],
312 cur_text_section->data[ind-2],
313 cur_text_section->data[ind-1]
315 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
316 #endif
317 gen_le32(0);
318 if (c) {
319 /* we use add c, %xxx for displacement */
320 orex(1, r, 0, 0x81);
321 o(0xc0 + REG_VALUE(r));
322 gen_le32(c);
326 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
328 op_reg = REG_VALUE(op_reg) << 3;
329 if ((r & VT_VALMASK) == VT_CONST) {
330 /* constant memory reference */
331 o(0x05 | op_reg);
332 if (is_got) {
333 gen_gotpcrel(r, sym, c);
334 } else {
335 gen_addrpc32(r, sym, c);
337 } else if ((r & VT_VALMASK) == VT_LOCAL) {
338 /* currently, we use only ebp as base */
339 if (c == (char)c) {
340 /* short reference */
341 o(0x45 | op_reg);
342 g(c);
343 } else {
344 oad(0x85 | op_reg, c);
346 } else if ((r & VT_VALMASK) >= TREG_MEM) {
347 if (c) {
348 g(0x80 | op_reg | REG_VALUE(r));
349 gen_le32(c);
350 } else {
351 g(0x00 | op_reg | REG_VALUE(r));
353 } else {
354 g(0x00 | op_reg | REG_VALUE(r));
358 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
359 opcode bits */
360 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
362 gen_modrm_impl(op_reg, r, sym, c, 0);
365 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
366 opcode bits */
367 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
369 int is_got;
370 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
371 orex(1, r, op_reg, opcode);
372 gen_modrm_impl(op_reg, r, sym, c, is_got);
376 /* load 'r' from value 'sv' */
377 void load(int r, SValue *sv)
379 int v, t, ft, fc, fr;
380 SValue v1;
382 #ifdef TCC_TARGET_PE
383 SValue v2;
384 sv = pe_getimport(sv, &v2);
385 #endif
387 fr = sv->r;
388 ft = sv->type.t & ~VT_DEFSIGN;
389 fc = sv->c.ul;
391 #ifndef TCC_TARGET_PE
392 /* we use indirect access via got */
393 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
394 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
395 /* use the result register as a temporal register */
396 int tr = r | TREG_MEM;
397 if (is_float(ft)) {
398 /* we cannot use float registers as a temporal register */
399 tr = get_reg(RC_INT) | TREG_MEM;
401 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
403 /* load from the temporal register */
404 fr = tr | VT_LVAL;
406 #endif
408 v = fr & VT_VALMASK;
409 if (fr & VT_LVAL) {
410 int b, ll;
411 if (v == VT_LLOCAL) {
412 v1.type.t = VT_PTR;
413 v1.r = VT_LOCAL | VT_LVAL;
414 v1.c.ul = fc;
415 fr = r;
416 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
417 fr = get_reg(RC_INT);
418 load(fr, &v1);
420 ll = 0;
421 if ((ft & VT_BTYPE) == VT_FLOAT) {
422 b = 0x6e0f66;
423 r = REG_VALUE(r); /* movd */
424 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
425 b = 0x7e0ff3; /* movq */
426 r = REG_VALUE(r);
427 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
428 b = 0xdb, r = 5; /* fldt */
429 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
430 b = 0xbe0f; /* movsbl */
431 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
432 b = 0xb60f; /* movzbl */
433 } else if ((ft & VT_TYPE) == VT_SHORT) {
434 b = 0xbf0f; /* movswl */
435 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
436 b = 0xb70f; /* movzwl */
437 } else {
438 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
439 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
440 || ((ft & VT_BTYPE) == VT_FUNC));
441 ll = is64_type(ft);
442 b = 0x8b;
444 if (ll) {
445 gen_modrm64(b, r, fr, sv->sym, fc);
446 } else {
447 orex(ll, fr, r, b);
448 gen_modrm(r, fr, sv->sym, fc);
450 } else {
451 if (v == VT_CONST) {
452 if (fr & VT_SYM) {
453 #ifdef TCC_TARGET_PE
454 orex(1,0,r,0x8d);
455 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
456 gen_addrpc32(fr, sv->sym, fc);
457 #else
458 if (sv->sym->type.t & VT_STATIC) {
459 orex(1,0,r,0x8d);
460 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
461 gen_addrpc32(fr, sv->sym, fc);
462 } else {
463 orex(1,0,r,0x8b);
464 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
465 gen_gotpcrel(r, sv->sym, fc);
467 #endif
468 } else if (is64_type(ft)) {
469 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
470 gen_le64(sv->c.ull);
471 } else {
472 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
473 gen_le32(fc);
475 } else if (v == VT_LOCAL) {
476 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
477 gen_modrm(r, VT_LOCAL, sv->sym, fc);
478 } else if (v == VT_CMP) {
479 orex(0,r,0,0);
480 if ((fc & ~0x100) != TOK_NE)
481 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
482 else
483 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
484 if (fc & 0x100) {
485 /* This was a float compare. If the parity bit is
486 * set the result was unordered, meaning false for everything
487 * except TOK_NE, and true for TOK_NE. */
488 fc &= ~0x100;
489 o(0x037a + (REX_BASE(r) << 8));
491 orex(0,r,0, 0x0f); /* setxx %br */
492 o(fc);
493 o(0xc0 + REG_VALUE(r));
494 } else if (v == VT_JMP || v == VT_JMPI) {
495 t = v & 1;
496 orex(0,r,0,0);
497 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
498 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
499 gsym(fc);
500 orex(0,r,0,0);
501 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
502 } else if (v != r) {
503 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
504 if (v == TREG_ST0) {
505 /* gen_cvt_ftof(VT_DOUBLE); */
506 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
507 /* movsd -0x10(%rsp),%xmmN */
508 o(0x100ff2);
509 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
510 o(0xf024);
511 } else {
512 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
513 if ((ft & VT_BTYPE) == VT_FLOAT) {
514 o(0x100ff3);
515 } else {
516 assert((ft & VT_BTYPE) == VT_DOUBLE);
517 o(0x100ff2);
519 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
521 } else if (r == TREG_ST0) {
522 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
523 /* gen_cvt_ftof(VT_LDOUBLE); */
524 /* movsd %xmmN,-0x10(%rsp) */
525 o(0x110ff2);
526 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
527 o(0xf024);
528 o(0xf02444dd); /* fldl -0x10(%rsp) */
529 } else {
530 orex(1,r,v, 0x89);
531 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
537 /* store register 'r' in lvalue 'v' */
538 void store(int r, SValue *v)
540 int fr, bt, ft, fc;
541 int op64 = 0;
542 /* store the REX prefix in this variable when PIC is enabled */
543 int pic = 0;
545 #ifdef TCC_TARGET_PE
546 SValue v2;
547 v = pe_getimport(v, &v2);
548 #endif
550 ft = v->type.t;
551 fc = v->c.ul;
552 fr = v->r & VT_VALMASK;
553 bt = ft & VT_BTYPE;
555 #ifndef TCC_TARGET_PE
556 /* we need to access the variable via got */
557 if (fr == VT_CONST && (v->r & VT_SYM)) {
558 /* mov xx(%rip), %r11 */
559 o(0x1d8b4c);
560 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
561 pic = is64_type(bt) ? 0x49 : 0x41;
563 #endif
565 /* XXX: incorrect if float reg to reg */
566 if (bt == VT_FLOAT) {
567 o(0x66);
568 o(pic);
569 o(0x7e0f); /* movd */
570 r = REG_VALUE(r);
571 } else if (bt == VT_DOUBLE) {
572 o(0x66);
573 o(pic);
574 o(0xd60f); /* movq */
575 r = REG_VALUE(r);
576 } else if (bt == VT_LDOUBLE) {
577 o(0xc0d9); /* fld %st(0) */
578 o(pic);
579 o(0xdb); /* fstpt */
580 r = 7;
581 } else {
582 if (bt == VT_SHORT)
583 o(0x66);
584 o(pic);
585 if (bt == VT_BYTE || bt == VT_BOOL)
586 orex(0, 0, r, 0x88);
587 else if (is64_type(bt))
588 op64 = 0x89;
589 else
590 orex(0, 0, r, 0x89);
592 if (pic) {
593 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
594 if (op64)
595 o(op64);
596 o(3 + (r << 3));
597 } else if (op64) {
598 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
599 gen_modrm64(op64, r, v->r, v->sym, fc);
600 } else if (fr != r) {
601 /* XXX: don't we really come here? */
602 abort();
603 o(0xc0 + fr + r * 8); /* mov r, fr */
605 } else {
606 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
607 gen_modrm(r, v->r, v->sym, fc);
608 } else if (fr != r) {
609 /* XXX: don't we really come here? */
610 abort();
611 o(0xc0 + fr + r * 8); /* mov r, fr */
616 /* 'is_jmp' is '1' if it is a jump */
617 static void gcall_or_jmp(int is_jmp)
619 int r;
620 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
621 ((vtop->r & VT_SYM) || (vtop->c.ll-4) == (int)(vtop->c.ll-4))) {
622 /* constant case */
623 if (vtop->r & VT_SYM) {
624 /* relocation case */
625 #ifdef TCC_TARGET_PE
626 greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32);
627 #else
628 greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32);
629 #endif
630 } else {
631 /* put an empty PC32 relocation */
632 put_elf_reloc(symtab_section, cur_text_section,
633 ind + 1, R_X86_64_PC32, 0);
635 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
636 } else {
637 /* otherwise, indirect call */
638 r = TREG_R11;
639 load(r, vtop);
640 o(0x41); /* REX */
641 o(0xff); /* call/jmp *r */
642 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
646 #if defined(CONFIG_TCC_BCHECK)
647 #ifndef TCC_TARGET_PE
648 static addr_t func_bound_offset;
649 static unsigned long func_bound_ind;
650 #endif
652 static void gen_static_call(int v)
654 Sym *sym = external_global_sym(v, &func_old_type, 0);
655 oad(0xe8, -4);
656 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
659 /* generate a bounded pointer addition */
660 ST_FUNC void gen_bounded_ptr_add(void)
662 /* save all temporary registers */
663 save_regs(0);
665 /* prepare fast x86_64 function call */
666 gv(RC_RAX);
667 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
668 vtop--;
670 gv(RC_RAX);
671 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
672 vtop--;
674 /* do a fast function call */
675 gen_static_call(TOK___bound_ptr_add);
677 /* returned pointer is in rax */
678 vtop++;
679 vtop->r = TREG_RAX | VT_BOUNDED;
682 /* relocation offset of the bounding function call point */
683 vtop->c.ull = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
686 /* patch pointer addition in vtop so that pointer dereferencing is
687 also tested */
688 ST_FUNC void gen_bounded_ptr_deref(void)
690 addr_t func;
691 int size, align;
692 ElfW(Rela) *rel;
693 Sym *sym;
695 size = 0;
696 /* XXX: put that code in generic part of tcc */
697 if (!is_float(vtop->type.t)) {
698 if (vtop->r & VT_LVAL_BYTE)
699 size = 1;
700 else if (vtop->r & VT_LVAL_SHORT)
701 size = 2;
703 if (!size)
704 size = type_size(&vtop->type, &align);
705 switch(size) {
706 case 1: func = TOK___bound_ptr_indir1; break;
707 case 2: func = TOK___bound_ptr_indir2; break;
708 case 4: func = TOK___bound_ptr_indir4; break;
709 case 8: func = TOK___bound_ptr_indir8; break;
710 case 12: func = TOK___bound_ptr_indir12; break;
711 case 16: func = TOK___bound_ptr_indir16; break;
712 default:
713 tcc_error("unhandled size when dereferencing bounded pointer");
714 func = 0;
715 break;
718 sym = external_global_sym(func, &func_old_type, 0);
719 if (!sym->c)
720 put_extern_sym(sym, NULL, 0, 0);
722 /* patch relocation */
723 /* XXX: find a better solution ? */
725 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.ull);
726 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
728 #endif
730 #ifdef TCC_TARGET_PE
732 #define REGN 4
733 static const uint8_t arg_regs[REGN] = {
734 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
737 /* Prepare arguments in R10 and R11 rather than RCX and RDX
738 because gv() will not ever use these */
739 static int arg_prepare_reg(int idx) {
740 if (idx == 0 || idx == 1)
741 /* idx=0: r10, idx=1: r11 */
742 return idx + 10;
743 else
744 return arg_regs[idx];
747 static int func_scratch;
749 /* Generate function call. The function address is pushed first, then
750 all the parameters in call order. This functions pops all the
751 parameters and the function address. */
753 void gen_offs_sp(int b, int r, int d)
755 orex(1,0,r & 0x100 ? 0 : r, b);
756 if (d == (char)d) {
757 o(0x2444 | (REG_VALUE(r) << 3));
758 g(d);
759 } else {
760 o(0x2484 | (REG_VALUE(r) << 3));
761 gen_le32(d);
765 ST_FUNC int regargs_nregs(RegArgs *args)
767 return *args;
770 /* Return the number of registers needed to return the struct, or 0 if
771 returning via struct pointer. */
772 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args)
774 int size, align;
775 *regsize = 8;
776 *ret_align = 1; // Never have to re-align return values for x86-64
777 size = type_size(vt, &align);
778 ret->ref = NULL;
779 if (size > 8) {
780 *args = 0;
781 } else if (size > 4) {
782 ret->t = VT_LLONG;
783 *args = 1;
784 } else if (size > 2) {
785 ret->t = VT_INT;
786 *args = 1;
787 } else if (size > 1) {
788 ret->t = VT_SHORT;
789 *args = 1;
790 } else {
791 ret->t = VT_BYTE;
792 *args = 1;
795 return *args != 0;
798 static int is_sse_float(int t) {
799 int bt;
800 bt = t & VT_BTYPE;
801 return bt == VT_DOUBLE || bt == VT_FLOAT;
804 int gfunc_arg_size(CType *type) {
805 int align;
806 if (type->t & (VT_ARRAY|VT_BITFIELD))
807 return 8;
808 return type_size(type, &align);
811 void gfunc_call(int nb_args)
813 int size, r, args_size, i, d, bt, struct_size;
814 int arg;
816 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
817 arg = nb_args;
819 /* for struct arguments, we need to call memcpy and the function
820 call breaks register passing arguments we are preparing.
821 So, we process arguments which will be passed by stack first. */
822 struct_size = args_size;
823 for(i = 0; i < nb_args; i++) {
824 SValue *sv;
826 --arg;
827 sv = &vtop[-i];
828 bt = (sv->type.t & VT_BTYPE);
829 size = gfunc_arg_size(&sv->type);
831 if (size <= 8)
832 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
834 if (bt == VT_STRUCT) {
835 /* align to stack align size */
836 size = (size + 15) & ~15;
837 /* generate structure store */
838 r = get_reg(RC_INT);
839 gen_offs_sp(0x8d, r, struct_size);
840 struct_size += size;
842 /* generate memcpy call */
843 vset(&sv->type, r | VT_LVAL, 0);
844 vpushv(sv);
845 vstore();
846 --vtop;
847 } else if (bt == VT_LDOUBLE) {
848 gv(RC_ST0);
849 gen_offs_sp(0xdb, 0x107, struct_size);
850 struct_size += 16;
854 if (func_scratch < struct_size)
855 func_scratch = struct_size;
857 arg = nb_args;
858 struct_size = args_size;
860 for(i = 0; i < nb_args; i++) {
861 --arg;
862 bt = (vtop->type.t & VT_BTYPE);
864 size = gfunc_arg_size(&vtop->type);
865 if (size > 8) {
866 /* align to stack align size */
867 size = (size + 15) & ~15;
868 if (arg >= REGN) {
869 d = get_reg(RC_INT);
870 gen_offs_sp(0x8d, d, struct_size);
871 gen_offs_sp(0x89, d, arg*8);
872 } else {
873 d = arg_prepare_reg(arg);
874 gen_offs_sp(0x8d, d, struct_size);
876 struct_size += size;
877 } else {
878 if (is_sse_float(vtop->type.t)) {
879 gv(RC_XMM0); /* only use one float register */
880 if (arg >= REGN) {
881 /* movq %xmm0, j*8(%rsp) */
882 gen_offs_sp(0xd60f66, 0x100, arg*8);
883 } else {
884 /* movaps %xmm0, %xmmN */
885 o(0x280f);
886 o(0xc0 + (arg << 3));
887 d = arg_prepare_reg(arg);
888 /* mov %xmm0, %rxx */
889 o(0x66);
890 orex(1,d,0, 0x7e0f);
891 o(0xc0 + REG_VALUE(d));
893 } else {
894 if (bt == VT_STRUCT) {
895 vtop->type.ref = NULL;
896 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
897 : size > 1 ? VT_SHORT : VT_BYTE;
900 r = gv(RC_INT);
901 if (arg >= REGN) {
902 gen_offs_sp(0x89, r, arg*8);
903 } else {
904 d = arg_prepare_reg(arg);
905 orex(1,d,r,0x89); /* mov */
906 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
910 vtop--;
912 save_regs(0);
914 /* Copy R10 and R11 into RCX and RDX, respectively */
915 if (nb_args > 0) {
916 o(0xd1894c); /* mov %r10, %rcx */
917 if (nb_args > 1) {
918 o(0xda894c); /* mov %r11, %rdx */
922 gcall_or_jmp(0);
923 vtop--;
927 #define FUNC_PROLOG_SIZE 11
929 /* generate function prolog of type 't' */
930 void gfunc_prolog(CType *func_type)
932 int addr, reg_param_index, bt, size;
933 Sym *sym;
934 CType *type;
936 func_ret_sub = 0;
937 func_scratch = 0;
938 loc = 0;
940 addr = PTR_SIZE * 2;
941 ind += FUNC_PROLOG_SIZE;
942 func_sub_sp_offset = ind;
943 reg_param_index = 0;
945 sym = func_type->ref;
947 /* if the function returns a structure, then add an
948 implicit pointer parameter */
949 func_vt = sym->type;
950 func_var = (sym->c == FUNC_ELLIPSIS);
951 size = gfunc_arg_size(&func_vt);
952 if (size > 8) {
953 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
954 func_vc = addr;
955 reg_param_index++;
956 addr += 8;
959 /* define parameters */
960 while ((sym = sym->next) != NULL) {
961 type = &sym->type;
962 bt = type->t & VT_BTYPE;
963 size = gfunc_arg_size(type);
964 if (size > 8) {
965 if (reg_param_index < REGN) {
966 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
968 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
969 } else {
970 if (reg_param_index < REGN) {
971 /* save arguments passed by register */
972 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
973 o(0xd60f66); /* movq */
974 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
975 } else {
976 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
979 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
981 addr += 8;
982 reg_param_index++;
985 while (reg_param_index < REGN) {
986 if (func_type->ref->c == FUNC_ELLIPSIS) {
987 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
988 addr += 8;
990 reg_param_index++;
994 /* generate function epilog */
995 void gfunc_epilog(void)
997 int v, saved_ind;
999 o(0xc9); /* leave */
1000 if (func_ret_sub == 0) {
1001 o(0xc3); /* ret */
1002 } else {
1003 o(0xc2); /* ret n */
1004 g(func_ret_sub);
1005 g(func_ret_sub >> 8);
1008 saved_ind = ind;
1009 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1010 /* align local size to word & save local variables */
1011 v = (func_scratch + -loc + 15) & -16;
1013 if (v >= 4096) {
1014 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
1015 oad(0xb8, v); /* mov stacksize, %eax */
1016 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
1017 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
1018 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1019 } else {
1020 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1021 o(0xec8148); /* sub rsp, stacksize */
1022 gen_le32(v);
1025 cur_text_section->data_offset = saved_ind;
1026 pe_add_unwind_data(ind, saved_ind, v);
1027 ind = cur_text_section->data_offset;
1030 #else
1032 static void gadd_sp(int val)
1034 if (val == (char)val) {
1035 o(0xc48348);
1036 g(val);
1037 } else {
1038 oad(0xc48148, val); /* add $xxx, %rsp */
1042 typedef enum X86_64_Mode {
1043 x86_64_mode_none,
1044 x86_64_mode_memory,
1045 x86_64_mode_integer,
1046 x86_64_mode_sse,
1047 x86_64_mode_x87
1048 } X86_64_Mode;
1050 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1052 if (a == b)
1053 return a;
1054 else if (a == x86_64_mode_none)
1055 return b;
1056 else if (b == x86_64_mode_none)
1057 return a;
1058 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1059 return x86_64_mode_memory;
1060 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1061 return x86_64_mode_integer;
1062 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1063 return x86_64_mode_memory;
1064 else
1065 return x86_64_mode_sse;
1068 /* classify the x86 eightbytes from byte index start to byte index
1069 * end, at offset offset from the root struct */
1070 static X86_64_Mode classify_x86_64_inner(CType *ty, int offset, int start, int end)
1072 X86_64_Mode mode;
1073 Sym *f;
1075 switch (ty->t & VT_BTYPE) {
1076 case VT_VOID: return x86_64_mode_none;
1078 case VT_INT:
1079 case VT_BYTE:
1080 case VT_SHORT:
1081 case VT_LLONG:
1082 case VT_BOOL:
1083 case VT_PTR:
1084 case VT_FUNC:
1085 case VT_ENUM: return x86_64_mode_integer;
1087 case VT_FLOAT:
1088 case VT_DOUBLE: return x86_64_mode_sse;
1090 case VT_LDOUBLE: return x86_64_mode_x87;
1092 case VT_STRUCT:
1093 f = ty->ref;
1095 mode = x86_64_mode_none;
1096 while ((f = f->next) != NULL) {
1097 if (f->c + offset >= start && f->c + offset < end)
1098 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type, f->c + offset, start, end));
1101 return mode;
1104 assert(0);
1107 static X86_64_Mode classify_x86_64_arg_eightbyte(CType *ty, int offset)
1109 X86_64_Mode mode;
1111 assert((ty->t & VT_BTYPE) == VT_STRUCT);
1113 mode = classify_x86_64_inner(ty, 0, offset, offset + 8);
1115 return mode;
1118 static void regargs_init(RegArgs *args)
1120 int i;
1121 for(i=0; i<REG_ARGS_MAX; i++) {
1122 args->ireg[i] = -1;
1123 args->freg[i] = -1;
1127 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, RegArgs *args)
1129 X86_64_Mode mode = x86_64_mode_none;
1130 int size, align, ret_t = 0;
1131 int ireg = 0, freg = 0;
1133 if (args)
1134 regargs_init(args);
1136 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1137 *psize = 8;
1138 *palign = 8;
1139 if (args)
1140 args->ireg[ireg++] = 0;
1141 ret_t = ty->t;
1142 mode = x86_64_mode_integer;
1143 } else {
1144 size = type_size(ty, &align);
1145 *psize = (size + 7) & ~7;
1146 *palign = (align + 7) & ~7;
1148 if (size > 16) {
1149 mode = x86_64_mode_memory;
1150 } else {
1151 int start;
1153 for(start=0; start < size; start += 8) {
1154 if ((ty->t & VT_BTYPE) == VT_STRUCT) {
1155 mode = classify_x86_64_arg_eightbyte(ty, start);
1156 } else {
1157 mode = classify_x86_64_inner(ty, 0, 0, size);
1160 if (mode == x86_64_mode_integer) {
1161 if (args)
1162 args->ireg[ireg++] = start;
1163 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1164 } else if (mode == x86_64_mode_sse) {
1165 if (args)
1166 args->freg[freg++] = start;
1167 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1168 } else {
1169 ret_t = VT_LDOUBLE;
1175 if (ret) {
1176 ret->ref = NULL;
1177 ret->t = ret_t;
1180 return mode;
1183 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1185 /* This definition must be synced with stdarg.h */
1186 enum __va_arg_type {
1187 __va_gen_reg, __va_float_reg, __va_stack
1189 int size, align;
1190 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, NULL);
1191 switch (mode) {
1192 default: return __va_stack;
1193 case x86_64_mode_integer: return __va_gen_reg;
1194 case x86_64_mode_sse: return __va_float_reg;
1198 static int regargs_iregs(RegArgs *args)
1200 int i;
1201 int ret = 0;
1202 for(i=0; i<REG_ARGS_MAX; i++) {
1203 if(args->ireg[i] != -1)
1204 ret++;
1207 return ret;
1210 static int regargs_fregs(RegArgs *args)
1212 int i;
1213 int ret = 0;
1214 for(i=0; i<REG_ARGS_MAX; i++) {
1215 if(args->freg[i] != -1)
1216 ret++;
1219 return ret;
1222 /* Count the total number of registers used by args */
1223 ST_FUNC int regargs_nregs(RegArgs *args)
1225 int i;
1226 int ret = 0;
1227 for(i=0; i<REG_ARGS_MAX; i++) {
1228 if(args->ireg[i] != -1)
1229 ret++;
1231 if(args->freg[i] != -1)
1232 ret++;
1235 return ret;
1238 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args)
1240 int size, align;
1241 X86_64_Mode mode;
1242 *ret_align = 1; // Never have to re-align return values for x86-64
1243 *regsize = 8;
1245 mode = classify_x86_64_arg(vt, ret, &size, &align, args);
1247 return mode != x86_64_mode_memory &&
1248 mode != x86_64_mode_none;
1251 #define REGN 6
1252 static const uint8_t arg_regs[REGN] = {
1253 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1256 static int arg_prepare_reg(int idx) {
1257 if (idx == 2 || idx == 3)
1258 /* idx=2: r10, idx=3: r11 */
1259 return idx + 8;
1260 else
1261 return arg_regs[idx];
1264 /* Generate function call. The function address is pushed first, then
1265 all the parameters in call order. This functions pops all the
1266 parameters and the function address. */
1267 void gfunc_call(int nb_args)
1269 X86_64_Mode mode;
1270 CType type;
1271 int size, align, r, args_size, stack_adjust, run_start, run_end, i;
1272 int nb_reg_args = 0;
1273 int nb_sse_args = 0;
1274 int sse_reg = 0, gen_reg = 0;
1275 RegArgs *reg_args = alloca(nb_args * sizeof *reg_args);
1277 /* calculate the number of integer/float register arguments */
1278 for(i = nb_args - 1; i >= 0; i--) {
1279 int fregs, iregs;
1280 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_args[i]);
1281 fregs = regargs_fregs(&reg_args[i]);
1282 iregs = regargs_iregs(&reg_args[i]);
1284 nb_sse_args += fregs;
1285 nb_reg_args += iregs;
1287 if (sse_reg + fregs > 8 || gen_reg + iregs > REGN) {
1288 regargs_init(&reg_args[i]);
1289 } else {
1290 sse_reg += fregs;
1291 gen_reg += iregs;
1295 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1296 and ended by a 16-byte aligned argument. This is because, from the point of view of
1297 the callee, argument alignment is computed from the bottom up. */
1298 /* for struct arguments, we need to call memcpy and the function
1299 call breaks register passing arguments we are preparing.
1300 So, we process arguments which will be passed by stack first. */
1301 gen_reg = nb_reg_args;
1302 sse_reg = nb_sse_args;
1303 run_start = 0;
1304 args_size = 0;
1305 while (run_start != nb_args) {
1306 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1308 run_end = nb_args;
1309 stack_adjust = 0;
1310 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1311 int stack = regargs_nregs(&reg_args[i]) == 0;
1312 classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, NULL);
1314 if (stack) {
1315 if (align == 16)
1316 run_end = i;
1317 else
1318 stack_adjust += size;
1322 gen_reg = run_gen_reg;
1323 sse_reg = run_sse_reg;
1325 /* adjust stack to align SSE boundary */
1326 if (stack_adjust &= 15) {
1327 /* fetch cpu flag before the following sub will change the value */
1328 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1329 gv(RC_INT);
1331 stack_adjust = 16 - stack_adjust;
1332 o(0x48);
1333 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1334 args_size += stack_adjust;
1337 for(i = run_start; i < run_end;) {
1338 int arg_stored = regargs_nregs(&reg_args[i]) == 0;
1339 SValue tmp;
1340 RegArgs args;
1342 if (!arg_stored) {
1343 ++i;
1344 continue;
1347 /* Swap argument to top, it will possibly be changed here,
1348 and might use more temps. At the end of the loop we keep
1349 in on the stack and swap it back to its original position
1350 if it is a register. */
1351 tmp = vtop[0];
1352 vtop[0] = vtop[-i];
1353 vtop[-i] = tmp;
1355 classify_x86_64_arg(&vtop->type, NULL, &size, &align, &args);
1357 switch (vtop->type.t & VT_BTYPE) {
1358 case VT_STRUCT:
1359 /* allocate the necessary size on stack */
1360 o(0x48);
1361 oad(0xec81, size); /* sub $xxx, %rsp */
1362 /* generate structure store */
1363 r = get_reg(RC_INT);
1364 orex(1, r, 0, 0x89); /* mov %rsp, r */
1365 o(0xe0 + REG_VALUE(r));
1366 vset(&vtop->type, r | VT_LVAL, 0);
1367 vswap();
1368 vstore();
1369 args_size += size;
1370 break;
1372 case VT_LDOUBLE:
1373 assert(0);
1374 break;
1376 case VT_FLOAT:
1377 case VT_DOUBLE:
1378 r = gv(RC_FLOAT);
1379 o(0x50); /* push $rax */
1380 /* movq %xmmN, (%rsp) */
1381 o(0xd60f66);
1382 o(0x04 + REG_VALUE(r)*8);
1383 o(0x24);
1384 args_size += size;
1385 break;
1387 default:
1388 /* simple type */
1389 /* XXX: implicit cast ? */
1390 --gen_reg;
1391 r = gv(RC_INT);
1392 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1393 args_size += size;
1394 break;
1397 /* And swap the argument back to its original position. */
1398 tmp = vtop[0];
1399 vtop[0] = vtop[-i];
1400 vtop[-i] = tmp;
1402 vrotb(i+1);
1403 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1404 vpop();
1405 memmove(reg_args + i, reg_args + i + 1, (nb_args - i - 1) * sizeof *reg_args);
1406 --nb_args;
1407 --run_end;
1410 /* handle 16 byte aligned arguments at end of run */
1411 run_start = i = run_end;
1412 while (i < nb_args) {
1413 /* Rotate argument to top since it will always be popped */
1414 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, NULL);
1415 if (align != 16)
1416 break;
1418 vrotb(i+1);
1420 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1421 gv(RC_ST0);
1422 oad(0xec8148, size); /* sub $xxx, %rsp */
1423 o(0x7cdb); /* fstpt 0(%rsp) */
1424 g(0x24);
1425 g(0x00);
1426 args_size += size;
1427 } else {
1428 assert(mode == x86_64_mode_memory);
1430 /* allocate the necessary size on stack */
1431 o(0x48);
1432 oad(0xec81, size); /* sub $xxx, %rsp */
1433 /* generate structure store */
1434 r = get_reg(RC_INT);
1435 orex(1, r, 0, 0x89); /* mov %rsp, r */
1436 o(0xe0 + REG_VALUE(r));
1437 vset(&vtop->type, r | VT_LVAL, 0);
1438 vswap();
1439 vstore();
1440 args_size += size;
1443 vpop();
1444 memmove(reg_args + i, reg_args + i + 1, (nb_args - i - 1) * sizeof *reg_args);
1445 --nb_args;
1449 /* XXX This should be superfluous. */
1450 save_regs(0); /* save used temporary registers */
1452 /* recalculate the number of register arguments there actually
1453 * are. This is slow but more obviously correct than using the
1454 * old counts. */
1455 gen_reg = 0;
1456 sse_reg = 0;
1457 for(i = 0; i < nb_args; i++) {
1458 gen_reg += regargs_iregs(&reg_args[i]);
1459 sse_reg += regargs_fregs(&reg_args[i]);
1462 /* then, we prepare register passing arguments.
1463 Note that we cannot set RDX and RCX in this loop because gv()
1464 may break these temporary registers. Let's use R10 and R11
1465 instead of them */
1466 assert(gen_reg <= REGN);
1467 assert(sse_reg <= 8);
1468 for(i = 0; i < nb_args; i++) {
1469 RegArgs args;
1471 args = reg_args[i];
1473 /* Alter stack entry type so that gv() knows how to treat it */
1474 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
1475 int k;
1477 for(k=REG_ARGS_MAX-1; k>=0; k--) {
1478 if (args.freg[k] == -1)
1479 continue;
1481 sse_reg--;
1482 assert(sse_reg >= 0);
1484 vdup();
1485 vtop->type.t = VT_DOUBLE;
1486 vtop->c.ull += args.freg[k];
1487 gv(RC_XMM0 << sse_reg);
1488 vpop();
1490 for(k=REG_ARGS_MAX-1; k>=0; k--) {
1491 int d;
1492 if (args.ireg[k] == -1)
1493 continue;
1495 gen_reg--;
1497 vdup();
1498 vtop->type.t = VT_LLONG;
1499 vtop->c.ull += args.ireg[k];
1500 r = gv(RC_INT);
1501 d = arg_prepare_reg(gen_reg);
1502 orex(1,d,r,0x89); /* mov */
1503 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1504 vpop();
1506 } else {
1507 /* XXX is it really necessary to set vtop->type? */
1508 classify_x86_64_arg(&vtop->type, &type, &size, &align, NULL);
1509 vtop->type = type;
1510 if (args.freg[0] != -1) {
1511 --sse_reg;
1512 /* Load directly to register */
1513 gv(RC_XMM0 << sse_reg);
1514 } else if (args.ireg[0] != -1) {
1515 int d;
1516 /* simple type */
1517 /* XXX: implicit cast ? */
1518 gen_reg--;
1519 r = gv(RC_INT);
1520 d = arg_prepare_reg(gen_reg);
1521 orex(1,d,r,0x89); /* mov */
1522 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1523 } else {
1524 assert(0);
1527 vtop--;
1529 assert(gen_reg == 0);
1530 assert(sse_reg == 0);
1532 /* We shouldn't have many operands on the stack anymore, but the
1533 call address itself is still there, and it might be in %eax
1534 (or edx/ecx) currently, which the below writes would clobber.
1535 So evict all remaining operands here. */
1536 save_regs(0);
1538 /* Copy R10 and R11 into RDX and RCX, respectively */
1539 if (nb_reg_args > 2) {
1540 o(0xd2894c); /* mov %r10, %rdx */
1541 if (nb_reg_args > 3) {
1542 o(0xd9894c); /* mov %r11, %rcx */
1546 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1547 gcall_or_jmp(0);
1548 if (args_size)
1549 gadd_sp(args_size);
1550 vtop--;
1554 #define FUNC_PROLOG_SIZE 11
1556 static void push_arg_reg(int i) {
1557 loc -= 8;
1558 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1561 /* generate function prolog of type 't' */
1562 void gfunc_prolog(CType *func_type)
1564 X86_64_Mode mode;
1565 int i, addr, align, size;
1566 int param_addr = 0, reg_param_index, sse_param_index;
1567 Sym *sym;
1568 CType *type;
1570 sym = func_type->ref;
1571 addr = PTR_SIZE * 2;
1572 loc = 0;
1573 ind += FUNC_PROLOG_SIZE;
1574 func_sub_sp_offset = ind;
1575 func_ret_sub = 0;
1577 if (func_type->ref->c == FUNC_ELLIPSIS) {
1578 int seen_reg_num, seen_sse_num, seen_stack_size;
1579 seen_reg_num = seen_sse_num = 0;
1580 /* frame pointer and return address */
1581 seen_stack_size = PTR_SIZE * 2;
1582 /* count the number of seen parameters */
1583 sym = func_type->ref;
1584 while ((sym = sym->next) != NULL) {
1585 RegArgs args;
1587 type = &sym->type;
1588 mode = classify_x86_64_arg(type, NULL, &size, &align, &args);
1590 switch (mode) {
1591 default:
1592 stack_arg:
1593 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1594 break;
1596 case x86_64_mode_integer:
1597 case x86_64_mode_sse: {
1598 int stack = 0;
1600 seen_sse_num += regargs_fregs(&args);
1601 seen_reg_num += regargs_iregs(&args);
1603 if (seen_reg_num > 8) {
1604 seen_reg_num = 8;
1605 stack = 1;
1607 if (seen_sse_num > 8) {
1608 seen_sse_num = 8;
1609 stack = 1;
1612 if (stack)
1613 goto stack_arg;
1614 break;
1619 loc -= 16;
1620 /* movl $0x????????, -0x10(%rbp) */
1621 o(0xf045c7);
1622 gen_le32(seen_reg_num * 8);
1623 /* movl $0x????????, -0xc(%rbp) */
1624 o(0xf445c7);
1625 gen_le32(seen_sse_num * 16 + 48);
1626 /* movl $0x????????, -0x8(%rbp) */
1627 o(0xf845c7);
1628 gen_le32(seen_stack_size);
1630 /* save all register passing arguments */
1631 for (i = 0; i < 8; i++) {
1632 loc -= 16;
1633 o(0xd60f66); /* movq */
1634 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1635 /* movq $0, loc+8(%rbp) */
1636 o(0x85c748);
1637 gen_le32(loc + 8);
1638 gen_le32(0);
1640 for (i = 0; i < REGN; i++) {
1641 push_arg_reg(REGN-1-i);
1645 sym = func_type->ref;
1646 reg_param_index = 0;
1647 sse_param_index = 0;
1649 /* if the function returns a structure, then add an
1650 implicit pointer parameter */
1651 func_vt = sym->type;
1652 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, NULL);
1653 if (mode == x86_64_mode_memory) {
1654 push_arg_reg(reg_param_index);
1655 func_vc = loc;
1656 reg_param_index++;
1658 /* define parameters */
1659 while ((sym = sym->next) != NULL) {
1660 RegArgs args;
1661 int reg_count_integer = 0;
1662 int reg_count_sse = 0;
1663 int arg_stored = 1;
1665 type = &sym->type;
1666 mode = classify_x86_64_arg(type, NULL, &size, &align, &args);
1667 reg_count_integer = regargs_iregs(&args);
1668 reg_count_sse = regargs_fregs(&args);
1670 switch (mode) {
1671 case x86_64_mode_integer:
1672 case x86_64_mode_sse:
1673 if (reg_count_integer || reg_count_sse) {
1674 if ((reg_count_sse == 0 || sse_param_index + reg_count_sse <= 8) &&
1675 (reg_count_integer == 0 || reg_param_index + reg_count_integer <= REGN)) {
1676 /* argument fits into registers */
1677 arg_stored = 0;
1681 if (!arg_stored) {
1682 /* save arguments passed by register */
1683 loc -= (reg_count_sse + reg_count_integer) * 8;
1684 param_addr = loc;
1685 for (i = 0; i < reg_count_sse; ++i) {
1686 o(0xd60f66); /* movq */
1687 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + args.freg[i]);
1688 ++sse_param_index;
1690 for (i = 0; i < reg_count_integer; ++i) {
1691 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + args.ireg[i]);
1692 ++reg_param_index;
1694 } else {
1695 addr = (addr + align - 1) & -align;
1696 param_addr = addr;
1697 addr += size;
1699 break;
1701 case x86_64_mode_memory:
1702 case x86_64_mode_x87:
1703 addr = (addr + align - 1) & -align;
1704 param_addr = addr;
1705 addr += size;
1706 break;
1707 default: break; /* nothing to be done for x86_64_mode_none */
1709 sym_push(sym->v & ~SYM_FIELD, type,
1710 VT_LOCAL | VT_LVAL, param_addr);
1713 #ifdef CONFIG_TCC_BCHECK
1714 /* leave some room for bound checking code */
1715 if (tcc_state->do_bounds_check) {
1716 func_bound_offset = lbounds_section->data_offset;
1717 func_bound_ind = ind;
1718 oad(0xb8, 0); /* lbound section pointer */
1719 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1720 oad(0xb8, 0); /* call to function */
1722 #endif
1725 /* generate function epilog */
1726 void gfunc_epilog(void)
1728 int v, saved_ind;
1730 #ifdef CONFIG_TCC_BCHECK
1731 if (tcc_state->do_bounds_check
1732 && func_bound_offset != lbounds_section->data_offset)
1734 addr_t saved_ind;
1735 addr_t *bounds_ptr;
1736 Sym *sym_data;
1738 /* add end of table info */
1739 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
1740 *bounds_ptr = 0;
1742 /* generate bound local allocation */
1743 sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
1744 func_bound_offset, lbounds_section->data_offset);
1745 saved_ind = ind;
1746 ind = func_bound_ind;
1747 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1748 ind = ind + 5 + 3;
1749 gen_static_call(TOK___bound_local_new);
1750 ind = saved_ind;
1752 /* generate bound check local freeing */
1753 o(0x5250); /* save returned value, if any */
1754 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1755 oad(0xb8, 0); /* mov xxx, %rax */
1756 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1757 gen_static_call(TOK___bound_local_delete);
1758 o(0x585a); /* restore returned value, if any */
1760 #endif
1761 o(0xc9); /* leave */
1762 if (func_ret_sub == 0) {
1763 o(0xc3); /* ret */
1764 } else {
1765 o(0xc2); /* ret n */
1766 g(func_ret_sub);
1767 g(func_ret_sub >> 8);
1769 /* align local size to word & save local variables */
1770 v = (-loc + 15) & -16;
1771 saved_ind = ind;
1772 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1773 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1774 o(0xec8148); /* sub rsp, stacksize */
1775 gen_le32(v);
1776 ind = saved_ind;
1779 #endif /* not PE */
1781 /* generate a jump to a label */
1782 int gjmp(int t)
1784 return psym(0xe9, t);
1787 /* generate a jump to a fixed address */
1788 void gjmp_addr(int a)
1790 int r;
1791 r = a - ind - 2;
1792 if (r == (char)r) {
1793 g(0xeb);
1794 g(r);
1795 } else {
1796 oad(0xe9, a - ind - 5);
1800 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1801 int gtst(int inv, int t)
1803 int v, *p;
1805 v = vtop->r & VT_VALMASK;
1806 if (v == VT_CMP) {
1807 /* fast case : can jump directly since flags are set */
1808 if (vtop->c.i & 0x100)
1810 /* This was a float compare. If the parity flag is set
1811 the result was unordered. For anything except != this
1812 means false and we don't jump (anding both conditions).
1813 For != this means true (oring both).
1814 Take care about inverting the test. We need to jump
1815 to our target if the result was unordered and test wasn't NE,
1816 otherwise if unordered we don't want to jump. */
1817 vtop->c.i &= ~0x100;
1818 if (!inv == (vtop->c.i != TOK_NE))
1819 o(0x067a); /* jp +6 */
1820 else
1822 g(0x0f);
1823 t = psym(0x8a, t); /* jp t */
1826 g(0x0f);
1827 t = psym((vtop->c.i - 16) ^ inv, t);
1828 } else if (v == VT_JMP || v == VT_JMPI) {
1829 /* && or || optimization */
1830 if ((v & 1) == inv) {
1831 /* insert vtop->c jump list in t */
1832 p = &vtop->c.i;
1833 while (*p != 0)
1834 p = (int *)(cur_text_section->data + *p);
1835 *p = t;
1836 t = vtop->c.i;
1837 } else {
1838 t = gjmp(t);
1839 gsym(vtop->c.i);
1842 vtop--;
1843 return t;
1846 /* generate an integer binary operation */
1847 void gen_opi(int op)
1849 int r, fr, opc, c;
1850 int ll, uu, cc;
1852 ll = is64_type(vtop[-1].type.t);
1853 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1854 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1856 switch(op) {
1857 case '+':
1858 case TOK_ADDC1: /* add with carry generation */
1859 opc = 0;
1860 gen_op8:
1861 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1862 /* constant case */
1863 vswap();
1864 r = gv(RC_INT);
1865 vswap();
1866 c = vtop->c.i;
1867 if (c == (char)c) {
1868 /* XXX: generate inc and dec for smaller code ? */
1869 orex(ll, r, 0, 0x83);
1870 o(0xc0 | (opc << 3) | REG_VALUE(r));
1871 g(c);
1872 } else {
1873 orex(ll, r, 0, 0x81);
1874 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1876 } else {
1877 gv2(RC_INT, RC_INT);
1878 r = vtop[-1].r;
1879 fr = vtop[0].r;
1880 orex(ll, r, fr, (opc << 3) | 0x01);
1881 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1883 vtop--;
1884 if (op >= TOK_ULT && op <= TOK_GT) {
1885 vtop->r = VT_CMP;
1886 vtop->c.i = op;
1888 break;
1889 case '-':
1890 case TOK_SUBC1: /* sub with carry generation */
1891 opc = 5;
1892 goto gen_op8;
1893 case TOK_ADDC2: /* add with carry use */
1894 opc = 2;
1895 goto gen_op8;
1896 case TOK_SUBC2: /* sub with carry use */
1897 opc = 3;
1898 goto gen_op8;
1899 case '&':
1900 opc = 4;
1901 goto gen_op8;
1902 case '^':
1903 opc = 6;
1904 goto gen_op8;
1905 case '|':
1906 opc = 1;
1907 goto gen_op8;
1908 case '*':
1909 gv2(RC_INT, RC_INT);
1910 r = vtop[-1].r;
1911 fr = vtop[0].r;
1912 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1913 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1914 vtop--;
1915 break;
1916 case TOK_SHL:
1917 opc = 4;
1918 goto gen_shift;
1919 case TOK_SHR:
1920 opc = 5;
1921 goto gen_shift;
1922 case TOK_SAR:
1923 opc = 7;
1924 gen_shift:
1925 opc = 0xc0 | (opc << 3);
1926 if (cc) {
1927 /* constant case */
1928 vswap();
1929 r = gv(RC_INT);
1930 vswap();
1931 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1932 o(opc | REG_VALUE(r));
1933 g(vtop->c.i & (ll ? 63 : 31));
1934 } else {
1935 /* we generate the shift in ecx */
1936 gv2(RC_INT, RC_RCX);
1937 r = vtop[-1].r;
1938 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1939 o(opc | REG_VALUE(r));
1941 vtop--;
1942 break;
1943 case TOK_UDIV:
1944 case TOK_UMOD:
1945 uu = 1;
1946 goto divmod;
1947 case '/':
1948 case '%':
1949 case TOK_PDIV:
1950 uu = 0;
1951 divmod:
1952 /* first operand must be in eax */
1953 /* XXX: need better constraint for second operand */
1954 gv2(RC_RAX, RC_RCX);
1955 r = vtop[-1].r;
1956 fr = vtop[0].r;
1957 vtop--;
1958 save_reg(TREG_RDX);
1959 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1960 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1961 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1962 if (op == '%' || op == TOK_UMOD)
1963 r = TREG_RDX;
1964 else
1965 r = TREG_RAX;
1966 vtop->r = r;
1967 break;
1968 default:
1969 opc = 7;
1970 goto gen_op8;
1974 void gen_opl(int op)
1976 gen_opi(op);
1979 /* generate a floating point operation 'v = t1 op t2' instruction. The
1980 two operands are guaranted to have the same floating point type */
1981 /* XXX: need to use ST1 too */
1982 void gen_opf(int op)
1984 int a, ft, fc, swapped, r;
1985 int float_type =
1986 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1988 /* convert constants to memory references */
1989 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1990 vswap();
1991 gv(float_type);
1992 vswap();
1994 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1995 gv(float_type);
1997 /* must put at least one value in the floating point register */
1998 if ((vtop[-1].r & VT_LVAL) &&
1999 (vtop[0].r & VT_LVAL)) {
2000 vswap();
2001 gv(float_type);
2002 vswap();
2004 swapped = 0;
2005 /* swap the stack if needed so that t1 is the register and t2 is
2006 the memory reference */
2007 if (vtop[-1].r & VT_LVAL) {
2008 vswap();
2009 swapped = 1;
2011 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
2012 if (op >= TOK_ULT && op <= TOK_GT) {
2013 /* load on stack second operand */
2014 load(TREG_ST0, vtop);
2015 save_reg(TREG_RAX); /* eax is used by FP comparison code */
2016 if (op == TOK_GE || op == TOK_GT)
2017 swapped = !swapped;
2018 else if (op == TOK_EQ || op == TOK_NE)
2019 swapped = 0;
2020 if (swapped)
2021 o(0xc9d9); /* fxch %st(1) */
2022 if (op == TOK_EQ || op == TOK_NE)
2023 o(0xe9da); /* fucompp */
2024 else
2025 o(0xd9de); /* fcompp */
2026 o(0xe0df); /* fnstsw %ax */
2027 if (op == TOK_EQ) {
2028 o(0x45e480); /* and $0x45, %ah */
2029 o(0x40fC80); /* cmp $0x40, %ah */
2030 } else if (op == TOK_NE) {
2031 o(0x45e480); /* and $0x45, %ah */
2032 o(0x40f480); /* xor $0x40, %ah */
2033 op = TOK_NE;
2034 } else if (op == TOK_GE || op == TOK_LE) {
2035 o(0x05c4f6); /* test $0x05, %ah */
2036 op = TOK_EQ;
2037 } else {
2038 o(0x45c4f6); /* test $0x45, %ah */
2039 op = TOK_EQ;
2041 vtop--;
2042 vtop->r = VT_CMP;
2043 vtop->c.i = op;
2044 } else {
2045 /* no memory reference possible for long double operations */
2046 load(TREG_ST0, vtop);
2047 swapped = !swapped;
2049 switch(op) {
2050 default:
2051 case '+':
2052 a = 0;
2053 break;
2054 case '-':
2055 a = 4;
2056 if (swapped)
2057 a++;
2058 break;
2059 case '*':
2060 a = 1;
2061 break;
2062 case '/':
2063 a = 6;
2064 if (swapped)
2065 a++;
2066 break;
2068 ft = vtop->type.t;
2069 fc = vtop->c.ul;
2070 o(0xde); /* fxxxp %st, %st(1) */
2071 o(0xc1 + (a << 3));
2072 vtop--;
2074 } else {
2075 if (op >= TOK_ULT && op <= TOK_GT) {
2076 /* if saved lvalue, then we must reload it */
2077 r = vtop->r;
2078 fc = vtop->c.ul;
2079 if ((r & VT_VALMASK) == VT_LLOCAL) {
2080 SValue v1;
2081 r = get_reg(RC_INT);
2082 v1.type.t = VT_PTR;
2083 v1.r = VT_LOCAL | VT_LVAL;
2084 v1.c.ul = fc;
2085 load(r, &v1);
2086 fc = 0;
2089 if (op == TOK_EQ || op == TOK_NE) {
2090 swapped = 0;
2091 } else {
2092 if (op == TOK_LE || op == TOK_LT)
2093 swapped = !swapped;
2094 if (op == TOK_LE || op == TOK_GE) {
2095 op = 0x93; /* setae */
2096 } else {
2097 op = 0x97; /* seta */
2101 if (swapped) {
2102 gv(RC_FLOAT);
2103 vswap();
2105 assert(!(vtop[-1].r & VT_LVAL));
2107 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
2108 o(0x66);
2109 if (op == TOK_EQ || op == TOK_NE)
2110 o(0x2e0f); /* ucomisd */
2111 else
2112 o(0x2f0f); /* comisd */
2114 if (vtop->r & VT_LVAL) {
2115 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2116 } else {
2117 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2120 vtop--;
2121 vtop->r = VT_CMP;
2122 vtop->c.i = op | 0x100;
2123 } else {
2124 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
2125 switch(op) {
2126 default:
2127 case '+':
2128 a = 0;
2129 break;
2130 case '-':
2131 a = 4;
2132 break;
2133 case '*':
2134 a = 1;
2135 break;
2136 case '/':
2137 a = 6;
2138 break;
2140 ft = vtop->type.t;
2141 fc = vtop->c.ul;
2142 assert((ft & VT_BTYPE) != VT_LDOUBLE);
2144 r = vtop->r;
2145 /* if saved lvalue, then we must reload it */
2146 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2147 SValue v1;
2148 r = get_reg(RC_INT);
2149 v1.type.t = VT_PTR;
2150 v1.r = VT_LOCAL | VT_LVAL;
2151 v1.c.ul = fc;
2152 load(r, &v1);
2153 fc = 0;
2156 assert(!(vtop[-1].r & VT_LVAL));
2157 if (swapped) {
2158 assert(vtop->r & VT_LVAL);
2159 gv(RC_FLOAT);
2160 vswap();
2163 if ((ft & VT_BTYPE) == VT_DOUBLE) {
2164 o(0xf2);
2165 } else {
2166 o(0xf3);
2168 o(0x0f);
2169 o(0x58 + a);
2171 if (vtop->r & VT_LVAL) {
2172 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2173 } else {
2174 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2177 vtop--;
2182 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2183 and 'long long' cases. */
2184 void gen_cvt_itof(int t)
2186 if ((t & VT_BTYPE) == VT_LDOUBLE) {
2187 save_reg(TREG_ST0);
2188 gv(RC_INT);
2189 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2190 /* signed long long to float/double/long double (unsigned case
2191 is handled generically) */
2192 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2193 o(0x242cdf); /* fildll (%rsp) */
2194 o(0x08c48348); /* add $8, %rsp */
2195 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2196 (VT_INT | VT_UNSIGNED)) {
2197 /* unsigned int to float/double/long double */
2198 o(0x6a); /* push $0 */
2199 g(0x00);
2200 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2201 o(0x242cdf); /* fildll (%rsp) */
2202 o(0x10c48348); /* add $16, %rsp */
2203 } else {
2204 /* int to float/double/long double */
2205 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2206 o(0x2404db); /* fildl (%rsp) */
2207 o(0x08c48348); /* add $8, %rsp */
2209 vtop->r = TREG_ST0;
2210 } else {
2211 int r = get_reg(RC_FLOAT);
2212 gv(RC_INT);
2213 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2214 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2215 (VT_INT | VT_UNSIGNED) ||
2216 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2217 o(0x48); /* REX */
2219 o(0x2a0f);
2220 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2221 vtop->r = r;
2225 /* convert from one floating point type to another */
2226 void gen_cvt_ftof(int t)
2228 int ft, bt, tbt;
2230 ft = vtop->type.t;
2231 bt = ft & VT_BTYPE;
2232 tbt = t & VT_BTYPE;
2234 if (bt == VT_FLOAT) {
2235 gv(RC_FLOAT);
2236 if (tbt == VT_DOUBLE) {
2237 o(0x140f); /* unpcklps */
2238 o(0xc0 + REG_VALUE(vtop->r)*9);
2239 o(0x5a0f); /* cvtps2pd */
2240 o(0xc0 + REG_VALUE(vtop->r)*9);
2241 } else if (tbt == VT_LDOUBLE) {
2242 save_reg(RC_ST0);
2243 /* movss %xmm0,-0x10(%rsp) */
2244 o(0x110ff3);
2245 o(0x44 + REG_VALUE(vtop->r)*8);
2246 o(0xf024);
2247 o(0xf02444d9); /* flds -0x10(%rsp) */
2248 vtop->r = TREG_ST0;
2250 } else if (bt == VT_DOUBLE) {
2251 gv(RC_FLOAT);
2252 if (tbt == VT_FLOAT) {
2253 o(0x140f66); /* unpcklpd */
2254 o(0xc0 + REG_VALUE(vtop->r)*9);
2255 o(0x5a0f66); /* cvtpd2ps */
2256 o(0xc0 + REG_VALUE(vtop->r)*9);
2257 } else if (tbt == VT_LDOUBLE) {
2258 save_reg(RC_ST0);
2259 /* movsd %xmm0,-0x10(%rsp) */
2260 o(0x110ff2);
2261 o(0x44 + REG_VALUE(vtop->r)*8);
2262 o(0xf024);
2263 o(0xf02444dd); /* fldl -0x10(%rsp) */
2264 vtop->r = TREG_ST0;
2266 } else {
2267 int r;
2268 gv(RC_ST0);
2269 r = get_reg(RC_FLOAT);
2270 if (tbt == VT_DOUBLE) {
2271 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2272 /* movsd -0x10(%rsp),%xmm0 */
2273 o(0x100ff2);
2274 o(0x44 + REG_VALUE(r)*8);
2275 o(0xf024);
2276 vtop->r = r;
2277 } else if (tbt == VT_FLOAT) {
2278 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2279 /* movss -0x10(%rsp),%xmm0 */
2280 o(0x100ff3);
2281 o(0x44 + REG_VALUE(r)*8);
2282 o(0xf024);
2283 vtop->r = r;
2288 /* convert fp to int 't' type */
2289 void gen_cvt_ftoi(int t)
2291 int ft, bt, size, r;
2292 ft = vtop->type.t;
2293 bt = ft & VT_BTYPE;
2294 if (bt == VT_LDOUBLE) {
2295 gen_cvt_ftof(VT_DOUBLE);
2296 bt = VT_DOUBLE;
2299 gv(RC_FLOAT);
2300 if (t != VT_INT)
2301 size = 8;
2302 else
2303 size = 4;
2305 r = get_reg(RC_INT);
2306 if (bt == VT_FLOAT) {
2307 o(0xf3);
2308 } else if (bt == VT_DOUBLE) {
2309 o(0xf2);
2310 } else {
2311 assert(0);
2313 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2314 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2315 vtop->r = r;
2318 /* computed goto support */
2319 void ggoto(void)
2321 gcall_or_jmp(1);
2322 vtop--;
2325 /* Save the stack pointer onto the stack and return the location of its address */
2326 ST_FUNC void gen_vla_sp_save(int addr) {
2327 /* mov %rsp,addr(%rbp)*/
2328 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2331 /* Restore the SP from a location on the stack */
2332 ST_FUNC void gen_vla_sp_restore(int addr) {
2333 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2336 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2337 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2338 #ifdef TCC_TARGET_PE
2339 /* alloca does more than just adjust %rsp on Windows */
2340 vpush_global_sym(&func_old_type, TOK_alloca);
2341 vswap(); /* Move alloca ref past allocation size */
2342 gfunc_call(1);
2343 vset(type, REG_IRET, 0);
2344 #else
2345 int r = gv(RC_INT); /* allocation size */
2346 /* sub r,%rsp */
2347 o(0x2b48);
2348 o(0xe0 | REG_VALUE(r));
2349 /* We align to 16 bytes rather than align */
2350 /* and ~15, %rsp */
2351 o(0xf0e48348);
2352 vpop();
2353 #endif
2357 /* end of x86-64 code generator */
2358 /*************************************************************/
2359 #endif /* ! TARGET_DEFS_ONLY */
2360 /******************************************************/