2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
27 #define NB_ASM_REGS 16
28 #define CONFIG_TCC_ASM
30 /* a register can belong to several classes. The classes must be
31 sorted from more general to more precise (see gv2() code which does
32 assumptions on it). */
33 #define RC_INT 0x0001 /* generic integer register */
34 #define RC_FLOAT 0x0002 /* generic float register */
38 #define RC_ST0 0x0080 /* only for long double */
43 #define RC_XMM0 0x1000
44 #define RC_XMM1 0x2000
45 #define RC_XMM2 0x4000
46 #define RC_XMM3 0x8000
47 #define RC_XMM4 0x10000
48 #define RC_XMM5 0x20000
49 #define RC_XMM6 0x40000
50 #define RC_XMM7 0x80000
51 #define RC_IRET RC_RAX /* function return: integer register */
52 #define RC_IRE2 RC_RDX /* function return: second integer register */
53 #define RC_FRET RC_XMM0 /* function return: float register */
54 #define RC_FRE2 RC_XMM1 /* function return: second float register */
56 /* pretty names for the registers */
84 #define REX_BASE(reg) (((reg) >> 3) & 1)
85 #define REG_VALUE(reg) ((reg) & 7)
87 /* return registers for function */
88 #define REG_IRET TREG_RAX /* single word int return register */
89 #define REG_IRE2 TREG_RDX /* second word return register (for long long) */
90 #define REG_FRET TREG_XMM0 /* float return register */
91 #define REG_FRE2 TREG_XMM1 /* second float return register */
93 /* defined if function parameters must be evaluated in reverse order */
94 #define INVERT_FUNC_PARAMS
96 /* pointer size, in bytes */
99 /* long double size and alignment, in bytes */
100 #define LDOUBLE_SIZE 16
101 #define LDOUBLE_ALIGN 16
102 /* maximum alignment (for aligned attribute support) */
105 /* define if return values need to be extended explicitely
106 at caller side (for interfacing with non-TCC compilers) */
108 /******************************************************/
109 #else /* ! TARGET_DEFS_ONLY */
110 /******************************************************/
111 #define USING_GLOBALS
115 ST_DATA
const int reg_classes
[NB_REGS
] = {
116 /* eax */ RC_INT
| RC_RAX
,
117 /* ecx */ RC_INT
| RC_RCX
,
118 /* edx */ RC_INT
| RC_RDX
,
132 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
133 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
134 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
135 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
136 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
137 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
138 /* xmm6 an xmm7 are included so gv() can be used on them,
139 but they are not tagged with RC_FLOAT because they are
140 callee saved on Windows */
146 static unsigned long func_sub_sp_offset
;
147 static int func_ret_sub
;
149 #if defined(CONFIG_TCC_BCHECK)
150 static addr_t func_bound_offset
;
151 static unsigned long func_bound_ind
;
152 ST_DATA
int func_bound_add_epilog
;
156 static int func_scratch
, func_alloca
;
159 /* XXX: make it faster ? */
160 ST_FUNC
void g(int c
)
166 if (ind1
> cur_text_section
->data_allocated
)
167 section_realloc(cur_text_section
, ind1
);
168 cur_text_section
->data
[ind
] = c
;
172 ST_FUNC
void o(unsigned int c
)
180 ST_FUNC
void gen_le16(int v
)
186 ST_FUNC
void gen_le32(int c
)
194 ST_FUNC
void gen_le64(int64_t c
)
206 static void orex(int ll
, int r
, int r2
, int b
)
208 if ((r
& VT_VALMASK
) >= VT_CONST
)
210 if ((r2
& VT_VALMASK
) >= VT_CONST
)
212 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
213 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
217 /* output a symbol and patch all calls to it */
218 ST_FUNC
void gsym_addr(int t
, int a
)
221 unsigned char *ptr
= cur_text_section
->data
+ t
;
222 uint32_t n
= read32le(ptr
); /* next value */
223 write32le(ptr
, a
< 0 ? -a
: a
- t
- 4);
228 static int is64_type(int t
)
230 return ((t
& VT_BTYPE
) == VT_PTR
||
231 (t
& VT_BTYPE
) == VT_FUNC
||
232 (t
& VT_BTYPE
) == VT_LLONG
);
235 /* instruction + 4 bytes data. Return the address of the data */
236 static int oad(int c
, int s
)
247 /* generate jmp to a label */
248 #define gjmp2(instr,lbl) oad(instr,lbl)
250 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
253 greloca(cur_text_section
, sym
, ind
, R_X86_64_32S
, c
), c
=0;
257 /* output constant with relocation if 'r & VT_SYM' is true */
258 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
261 greloca(cur_text_section
, sym
, ind
, R_X86_64_64
, c
), c
=0;
265 /* output constant with relocation if 'r & VT_SYM' is true */
266 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
269 greloca(cur_text_section
, sym
, ind
, R_X86_64_PC32
, c
-4), c
=4;
273 /* output got address with relocation */
274 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
277 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
278 get_tok_str(sym
->v
, NULL
), c
, r
,
279 cur_text_section
->data
[ind
-3],
280 cur_text_section
->data
[ind
-2],
281 cur_text_section
->data
[ind
-1]
284 greloca(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
, -4);
287 /* we use add c, %xxx for displacement */
289 o(0xc0 + REG_VALUE(r
));
294 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
296 op_reg
= REG_VALUE(op_reg
) << 3;
297 if ((r
& VT_VALMASK
) == VT_CONST
) {
298 /* constant memory reference */
300 /* Absolute memory reference */
301 o(0x04 | op_reg
); /* [sib] | destreg */
302 oad(0x25, c
); /* disp32 */
304 o(0x05 | op_reg
); /* (%rip)+disp32 | destreg */
306 gen_gotpcrel(r
, sym
, c
);
308 gen_addrpc32(r
, sym
, c
);
311 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
312 /* currently, we use only ebp as base */
314 /* short reference */
318 oad(0x85 | op_reg
, c
);
320 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
322 g(0x80 | op_reg
| REG_VALUE(r
));
325 g(0x00 | op_reg
| REG_VALUE(r
));
328 g(0x00 | op_reg
| REG_VALUE(r
));
332 /* generate a modrm reference. 'op_reg' contains the additional 3
334 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
336 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
339 /* generate a modrm reference. 'op_reg' contains the additional 3
341 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
344 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
345 orex(1, r
, op_reg
, opcode
);
346 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
350 /* load 'r' from value 'sv' */
351 void load(int r
, SValue
*sv
)
353 int v
, t
, ft
, fc
, fr
;
358 sv
= pe_getimport(sv
, &v2
);
362 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
364 if (fc
!= sv
->c
.i
&& (fr
& VT_SYM
))
365 tcc_error("64 bit addend in load");
367 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
369 #ifndef TCC_TARGET_PE
370 /* we use indirect access via got */
371 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
372 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
373 /* use the result register as a temporal register */
374 int tr
= r
| TREG_MEM
;
376 /* we cannot use float registers as a temporal register */
377 tr
= get_reg(RC_INT
) | TREG_MEM
;
379 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
381 /* load from the temporal register */
389 if (v
== VT_LLOCAL
) {
391 v1
.r
= VT_LOCAL
| VT_LVAL
;
394 if (!(reg_classes
[fr
] & (RC_INT
|RC_R11
)))
395 fr
= get_reg(RC_INT
);
399 /* If the addends doesn't fit into a 32bit signed
400 we must use a 64bit move. We've checked above
401 that this doesn't have a sym associated. */
402 v1
.type
.t
= VT_LLONG
;
406 if (!(reg_classes
[fr
] & (RC_INT
|RC_R11
)))
407 fr
= get_reg(RC_INT
);
412 /* Like GCC we can load from small enough properly sized
413 structs and unions as well.
414 XXX maybe move to generic operand handling, but should
415 occur only with asm, so tccasm.c might also be a better place */
416 if ((ft
& VT_BTYPE
) == VT_STRUCT
) {
418 switch (type_size(&sv
->type
, &align
)) {
419 case 1: ft
= VT_BYTE
; break;
420 case 2: ft
= VT_SHORT
; break;
421 case 4: ft
= VT_INT
; break;
422 case 8: ft
= VT_LLONG
; break;
424 tcc_error("invalid aggregate type for register load");
428 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
430 r
= REG_VALUE(r
); /* movd */
431 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
432 b
= 0x7e0ff3; /* movq */
434 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
435 b
= 0xdb, r
= 5; /* fldt */
436 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
437 b
= 0xbe0f; /* movsbl */
438 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
439 b
= 0xb60f; /* movzbl */
440 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
441 b
= 0xbf0f; /* movswl */
442 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
443 b
= 0xb70f; /* movzwl */
444 } else if ((ft
& VT_TYPE
) == (VT_VOID
)) {
445 /* Can happen with zero size structs */
448 assert(((ft
& VT_BTYPE
) == VT_INT
)
449 || ((ft
& VT_BTYPE
) == VT_LLONG
)
450 || ((ft
& VT_BTYPE
) == VT_PTR
)
451 || ((ft
& VT_BTYPE
) == VT_FUNC
)
457 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
460 gen_modrm(r
, fr
, sv
->sym
, fc
);
467 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
468 gen_addrpc32(fr
, sv
->sym
, fc
);
470 if (sv
->sym
->type
.t
& VT_STATIC
) {
472 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
473 gen_addrpc32(fr
, sv
->sym
, fc
);
476 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
477 gen_gotpcrel(r
, sv
->sym
, fc
);
480 } else if (is64_type(ft
)) {
481 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
484 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
487 } else if (v
== VT_LOCAL
) {
488 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
489 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
490 } else if (v
== VT_CMP
) {
495 /* This was a float compare. If the parity bit is
496 set the result was unordered, meaning false for everything
497 except TOK_NE, and true for TOK_NE. */
498 orex(0, r
, 0, 0xb0 + REG_VALUE(r
)); /* mov $0/1,%al */
499 g(v
^ fc
^ (v
== TOK_NE
));
500 o(0x037a + (REX_BASE(r
) << 8));
502 orex(0,r
,0, 0x0f); /* setxx %br */
504 o(0xc0 + REG_VALUE(r
));
506 o(0xc0b6 + REG_VALUE(r
) * 0x900); /* movzbl %al, %eax */
507 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
510 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
511 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
514 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
516 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
518 /* gen_cvt_ftof(VT_DOUBLE); */
519 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
520 /* movsd -0x10(%rsp),%xmmN */
522 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
525 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
526 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
529 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
532 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
534 } else if (r
== TREG_ST0
) {
535 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
536 /* gen_cvt_ftof(VT_LDOUBLE); */
537 /* movsd %xmmN,-0x10(%rsp) */
539 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
541 o(0xf02444dd); /* fldl -0x10(%rsp) */
543 orex(is64_type(ft
), r
, v
, 0x89);
544 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
550 /* store register 'r' in lvalue 'v' */
551 void store(int r
, SValue
*v
)
555 /* store the REX prefix in this variable when PIC is enabled */
560 v
= pe_getimport(v
, &v2
);
563 fr
= v
->r
& VT_VALMASK
;
566 if (fc
!= v
->c
.i
&& (fr
& VT_SYM
))
567 tcc_error("64 bit addend in store");
568 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
571 #ifndef TCC_TARGET_PE
572 /* we need to access the variable via got */
573 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
574 /* mov xx(%rip), %r11 */
576 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.i
);
577 pic
= is64_type(bt
) ? 0x49 : 0x41;
581 /* XXX: incorrect if float reg to reg */
582 if (bt
== VT_FLOAT
) {
585 o(0x7e0f); /* movd */
587 } else if (bt
== VT_DOUBLE
) {
590 o(0xd60f); /* movq */
592 } else if (bt
== VT_LDOUBLE
) {
593 o(0xc0d9); /* fld %st(0) */
601 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
603 else if (is64_type(bt
))
609 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
614 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
615 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
616 } else if (fr
!= r
) {
617 orex(1, fr
, r
, op64
);
618 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
621 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
622 gen_modrm(r
, v
->r
, v
->sym
, fc
);
623 } else if (fr
!= r
) {
624 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
629 /* 'is_jmp' is '1' if it is a jump */
630 static void gcall_or_jmp(int is_jmp
)
633 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
&&
634 ((vtop
->r
& VT_SYM
) && (vtop
->c
.i
-4) == (int)(vtop
->c
.i
-4))) {
635 /* constant symbolic case -> simple relocation */
637 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PC32
, (int)(vtop
->c
.i
-4));
639 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PLT32
, (int)(vtop
->c
.i
-4));
641 oad(0xe8 + is_jmp
, 0); /* call/jmp im */
643 /* otherwise, indirect call */
647 o(0xff); /* call/jmp *r */
648 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
652 #if defined(CONFIG_TCC_BCHECK)
654 static void gen_bounds_call(int v
)
656 Sym
*sym
= external_global_sym(v
, &func_old_type
);
659 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
661 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PLT32
, -4);
665 /* generate a bounded pointer addition */
666 ST_FUNC
void gen_bounded_ptr_add(void)
668 vpush_global_sym(&func_old_type
, TOK___bound_ptr_add
);
672 /* returned pointer is in rax */
673 vtop
->r
= TREG_RAX
| VT_BOUNDED
;
676 /* relocation offset of the bounding function call point */
677 vtop
->c
.i
= (cur_text_section
->reloc
->data_offset
- sizeof(ElfW(Rela
)));
680 /* patch pointer addition in vtop so that pointer dereferencing is
682 ST_FUNC
void gen_bounded_ptr_deref(void)
692 size
= type_size(&vtop
->type
, &align
);
694 case 1: func
= TOK___bound_ptr_indir1
; break;
695 case 2: func
= TOK___bound_ptr_indir2
; break;
696 case 4: func
= TOK___bound_ptr_indir4
; break;
697 case 8: func
= TOK___bound_ptr_indir8
; break;
698 case 12: func
= TOK___bound_ptr_indir12
; break;
699 case 16: func
= TOK___bound_ptr_indir16
; break;
701 /* may happen with struct member access */
703 //tcc_error("unhandled size when dereferencing bounded pointer");
707 sym
= external_global_sym(func
, &func_old_type
);
709 put_extern_sym(sym
, NULL
, 0, 0);
710 /* patch relocation */
711 /* XXX: find a better solution ? */
712 rel
= (ElfW(Rela
) *)(cur_text_section
->reloc
->data
+ vtop
->c
.i
);
713 rel
->r_info
= ELF64_R_INFO(sym
->c
, ELF64_R_TYPE(rel
->r_info
));
717 # define TREG_FASTCALL_1 TREG_RCX
719 # define TREG_FASTCALL_1 TREG_RDI
722 static void gen_bounds_prolog(void)
724 /* leave some room for bound checking code */
725 func_bound_offset
= lbounds_section
->data_offset
;
726 func_bound_ind
= ind
;
727 func_bound_add_epilog
= 0;
728 o(0xb848 + TREG_FASTCALL_1
* 0x100); /*lbound section pointer */
730 oad(0xb8, 0); /* call to function */
733 static void gen_bounds_epilog(void)
738 int offset_modified
= func_bound_offset
!= lbounds_section
->data_offset
;
740 if (!offset_modified
&& !func_bound_add_epilog
)
743 /* add end of table info */
744 bounds_ptr
= section_ptr_add(lbounds_section
, sizeof(addr_t
));
747 sym_data
= get_sym_ref(&char_pointer_type
, lbounds_section
,
748 func_bound_offset
, lbounds_section
->data_offset
);
750 /* generate bound local allocation */
751 if (offset_modified
) {
753 ind
= func_bound_ind
;
754 greloca(cur_text_section
, sym_data
, ind
+ 2, R_X86_64_64
, 0);
756 gen_bounds_call(TOK___bound_local_new
);
760 /* generate bound check local freeing */
761 o(0x5250); /* save returned value, if any */
762 greloca(cur_text_section
, sym_data
, ind
+ 2, R_X86_64_64
, 0);
763 o(0xb848 + TREG_FASTCALL_1
* 0x100); /* mov xxx, %rcx/di */
765 gen_bounds_call(TOK___bound_local_delete
);
766 o(0x585a); /* restore returned value, if any */
773 static const uint8_t arg_regs
[REGN
] = {
774 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
777 /* Prepare arguments in R10 and R11 rather than RCX and RDX
778 because gv() will not ever use these */
779 static int arg_prepare_reg(int idx
) {
780 if (idx
== 0 || idx
== 1)
781 /* idx=0: r10, idx=1: r11 */
784 return arg_regs
[idx
];
787 /* Generate function call. The function address is pushed first, then
788 all the parameters in call order. This functions pops all the
789 parameters and the function address. */
791 static void gen_offs_sp(int b
, int r
, int d
)
793 orex(1,0,r
& 0x100 ? 0 : r
, b
);
795 o(0x2444 | (REG_VALUE(r
) << 3));
798 o(0x2484 | (REG_VALUE(r
) << 3));
803 static int using_regs(int size
)
805 return !(size
> 8 || (size
& (size
- 1)));
808 /* Return the number of registers needed to return the struct, or 0 if
809 returning via struct pointer. */
810 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
813 *ret_align
= 1; // Never have to re-align return values for x86-64
815 size
= type_size(vt
, &align
);
816 if (!using_regs(size
))
830 static int is_sse_float(int t
) {
833 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
836 static int gfunc_arg_size(CType
*type
) {
838 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
840 return type_size(type
, &align
);
843 void gfunc_call(int nb_args
)
845 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
848 #ifdef CONFIG_TCC_BCHECK
849 if (tcc_state
->do_bounds_check
)
850 gbound_args(nb_args
);
853 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
856 /* for struct arguments, we need to call memcpy and the function
857 call breaks register passing arguments we are preparing.
858 So, we process arguments which will be passed by stack first. */
859 struct_size
= args_size
;
860 for(i
= 0; i
< nb_args
; i
++) {
865 bt
= (sv
->type
.t
& VT_BTYPE
);
866 size
= gfunc_arg_size(&sv
->type
);
868 if (using_regs(size
))
869 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
871 if (bt
== VT_STRUCT
) {
872 /* align to stack align size */
873 size
= (size
+ 15) & ~15;
874 /* generate structure store */
876 gen_offs_sp(0x8d, r
, struct_size
);
879 /* generate memcpy call */
880 vset(&sv
->type
, r
| VT_LVAL
, 0);
884 } else if (bt
== VT_LDOUBLE
) {
886 gen_offs_sp(0xdb, 0x107, struct_size
);
891 if (func_scratch
< struct_size
)
892 func_scratch
= struct_size
;
895 struct_size
= args_size
;
897 for(i
= 0; i
< nb_args
; i
++) {
899 bt
= (vtop
->type
.t
& VT_BTYPE
);
901 size
= gfunc_arg_size(&vtop
->type
);
902 if (!using_regs(size
)) {
903 /* align to stack align size */
904 size
= (size
+ 15) & ~15;
907 gen_offs_sp(0x8d, d
, struct_size
);
908 gen_offs_sp(0x89, d
, arg
*8);
910 d
= arg_prepare_reg(arg
);
911 gen_offs_sp(0x8d, d
, struct_size
);
915 if (is_sse_float(vtop
->type
.t
)) {
916 if (tcc_state
->nosse
)
917 tcc_error("SSE disabled");
920 /* movq %xmm0, j*8(%rsp) */
921 gen_offs_sp(0xd60f66, 0x100, arg
*8);
923 /* Load directly to xmmN register */
925 d
= arg_prepare_reg(arg
);
926 /* mov %xmmN, %rxx */
929 o(0xc0 + arg
*8 + REG_VALUE(d
));
932 if (bt
== VT_STRUCT
) {
933 vtop
->type
.ref
= NULL
;
934 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
935 : size
> 1 ? VT_SHORT
: VT_BYTE
;
940 gen_offs_sp(0x89, r
, arg
*8);
942 d
= arg_prepare_reg(arg
);
943 orex(1,d
,r
,0x89); /* mov */
944 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
951 /* Copy R10 and R11 into RCX and RDX, respectively */
953 o(0xd1894c); /* mov %r10, %rcx */
955 o(0xda894c); /* mov %r11, %rdx */
961 if ((vtop
->r
& VT_SYM
) && vtop
->sym
->v
== TOK_alloca
) {
962 /* need to add the "func_scratch" area after alloca */
963 o(0x48); func_alloca
= oad(0x05, func_alloca
); /* add $NN, %rax */
964 #ifdef CONFIG_TCC_BCHECK
965 if (tcc_state
->do_bounds_check
)
966 gen_bounds_call(TOK___bound_alloca_nr
); /* new region */
973 #define FUNC_PROLOG_SIZE 11
975 /* generate function prolog of type 't' */
976 void gfunc_prolog(Sym
*func_sym
)
978 CType
*func_type
= &func_sym
->type
;
979 int addr
, reg_param_index
, bt
, size
;
989 ind
+= FUNC_PROLOG_SIZE
;
990 func_sub_sp_offset
= ind
;
993 sym
= func_type
->ref
;
995 /* if the function returns a structure, then add an
996 implicit pointer parameter */
997 size
= gfunc_arg_size(&func_vt
);
998 if (!using_regs(size
)) {
999 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
1005 /* define parameters */
1006 while ((sym
= sym
->next
) != NULL
) {
1008 bt
= type
->t
& VT_BTYPE
;
1009 size
= gfunc_arg_size(type
);
1010 if (!using_regs(size
)) {
1011 if (reg_param_index
< REGN
) {
1012 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
1014 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1015 VT_LLOCAL
| VT_LVAL
, addr
);
1017 if (reg_param_index
< REGN
) {
1018 /* save arguments passed by register */
1019 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
1020 if (tcc_state
->nosse
)
1021 tcc_error("SSE disabled");
1022 o(0xd60f66); /* movq */
1023 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
1025 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
1028 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1029 VT_LOCAL
| VT_LVAL
, addr
);
1035 while (reg_param_index
< REGN
) {
1037 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
1042 #ifdef CONFIG_TCC_BCHECK
1043 if (tcc_state
->do_bounds_check
)
1044 gen_bounds_prolog();
1048 /* generate function epilog */
1049 void gfunc_epilog(void)
1053 /* align local size to word & save local variables */
1054 func_scratch
= (func_scratch
+ 15) & -16;
1055 loc
= (loc
& -16) - func_scratch
;
1057 #ifdef CONFIG_TCC_BCHECK
1058 if (tcc_state
->do_bounds_check
)
1059 gen_bounds_epilog();
1062 o(0xc9); /* leave */
1063 if (func_ret_sub
== 0) {
1066 o(0xc2); /* ret n */
1068 g(func_ret_sub
>> 8);
1072 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1076 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
);
1077 oad(0xb8, v
); /* mov stacksize, %eax */
1078 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1079 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
1080 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1082 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1083 o(0xec8148); /* sub rsp, stacksize */
1087 /* add the "func_scratch" area after each alloca seen */
1088 gsym_addr(func_alloca
, -func_scratch
);
1090 cur_text_section
->data_offset
= saved_ind
;
1091 pe_add_unwind_data(ind
, saved_ind
, v
);
1092 ind
= cur_text_section
->data_offset
;
1097 static void gadd_sp(int val
)
1099 if (val
== (char)val
) {
1103 oad(0xc48148, val
); /* add $xxx, %rsp */
1107 typedef enum X86_64_Mode
{
1110 x86_64_mode_integer
,
1115 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
1119 else if (a
== x86_64_mode_none
)
1121 else if (b
== x86_64_mode_none
)
1123 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
1124 return x86_64_mode_memory
;
1125 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
1126 return x86_64_mode_integer
;
1127 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
1128 return x86_64_mode_memory
;
1130 return x86_64_mode_sse
;
1133 static X86_64_Mode
classify_x86_64_inner(CType
*ty
)
1138 switch (ty
->t
& VT_BTYPE
) {
1139 case VT_VOID
: return x86_64_mode_none
;
1148 return x86_64_mode_integer
;
1151 case VT_DOUBLE
: return x86_64_mode_sse
;
1153 case VT_LDOUBLE
: return x86_64_mode_x87
;
1158 mode
= x86_64_mode_none
;
1159 for (f
= f
->next
; f
; f
= f
->next
)
1160 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
1168 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
)
1171 int size
, align
, ret_t
= 0;
1173 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1178 mode
= x86_64_mode_integer
;
1180 size
= type_size(ty
, &align
);
1181 *psize
= (size
+ 7) & ~7;
1182 *palign
= (align
+ 7) & ~7;
1185 mode
= x86_64_mode_memory
;
1187 mode
= classify_x86_64_inner(ty
);
1189 case x86_64_mode_integer
:
1203 if ((ty
->t
& VT_BTYPE
) == VT_STRUCT
|| (ty
->t
& VT_UNSIGNED
))
1204 ret_t
|= VT_UNSIGNED
;
1208 case x86_64_mode_x87
:
1213 case x86_64_mode_sse
:
1219 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1222 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1235 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1237 /* This definition must be synced with stdarg.h */
1238 enum __va_arg_type
{
1239 __va_gen_reg
, __va_float_reg
, __va_stack
1241 int size
, align
, reg_count
;
1242 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1244 default: return __va_stack
;
1245 case x86_64_mode_integer
: return __va_gen_reg
;
1246 case x86_64_mode_sse
: return __va_float_reg
;
1250 /* Return the number of registers needed to return the struct, or 0 if
1251 returning via struct pointer. */
1252 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
1254 int size
, align
, reg_count
;
1255 *ret_align
= 1; // Never have to re-align return values for x86-64
1257 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) != x86_64_mode_memory
);
1261 static const uint8_t arg_regs
[REGN
] = {
1262 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1265 static int arg_prepare_reg(int idx
) {
1266 if (idx
== 2 || idx
== 3)
1267 /* idx=2: r10, idx=3: r11 */
1270 return arg_regs
[idx
];
1273 /* Generate function call. The function address is pushed first, then
1274 all the parameters in call order. This functions pops all the
1275 parameters and the function address. */
1276 void gfunc_call(int nb_args
)
1280 int size
, align
, r
, args_size
, stack_adjust
, i
, reg_count
, k
;
1281 int nb_reg_args
= 0;
1282 int nb_sse_args
= 0;
1283 int sse_reg
, gen_reg
;
1284 char *onstack
= tcc_malloc((nb_args
+ 1) * sizeof (char));
1286 #ifdef CONFIG_TCC_BCHECK
1287 if (tcc_state
->do_bounds_check
)
1288 gbound_args(nb_args
);
1291 /* calculate the number of integer/float register arguments, remember
1292 arguments to be passed via stack (in onstack[]), and also remember
1293 if we have to align the stack pointer to 16 (onstack[i] == 2). Needs
1294 to be done in a left-to-right pass over arguments. */
1296 for(i
= nb_args
- 1; i
>= 0; i
--) {
1297 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1298 if (mode
== x86_64_mode_sse
&& nb_sse_args
+ reg_count
<= 8) {
1299 nb_sse_args
+= reg_count
;
1301 } else if (mode
== x86_64_mode_integer
&& nb_reg_args
+ reg_count
<= REGN
) {
1302 nb_reg_args
+= reg_count
;
1304 } else if (mode
== x86_64_mode_none
) {
1307 if (align
== 16 && (stack_adjust
&= 15)) {
1312 stack_adjust
+= size
;
1316 if (nb_sse_args
&& tcc_state
->nosse
)
1317 tcc_error("SSE disabled but floating point arguments passed");
1319 /* fetch cpu flag before generating any code */
1320 if ((vtop
->r
& VT_VALMASK
) == VT_CMP
)
1323 /* for struct arguments, we need to call memcpy and the function
1324 call breaks register passing arguments we are preparing.
1325 So, we process arguments which will be passed by stack first. */
1326 gen_reg
= nb_reg_args
;
1327 sse_reg
= nb_sse_args
;
1330 for (i
= k
= 0; i
< nb_args
;) {
1331 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1332 if (!onstack
[i
+ k
]) {
1336 /* Possibly adjust stack to align SSE boundary. We're processing
1337 args from right to left while allocating happens left to right
1338 (stack grows down), so the adjustment needs to happen _after_
1339 an argument that requires it. */
1341 o(0x50); /* push %rax; aka sub $8,%rsp */
1345 if (onstack
[i
+ k
] == 2)
1350 switch (vtop
->type
.t
& VT_BTYPE
) {
1352 /* allocate the necessary size on stack */
1354 oad(0xec81, size
); /* sub $xxx, %rsp */
1355 /* generate structure store */
1356 r
= get_reg(RC_INT
);
1357 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1358 o(0xe0 + REG_VALUE(r
));
1359 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1366 oad(0xec8148, size
); /* sub $xxx, %rsp */
1367 o(0x7cdb); /* fstpt 0(%rsp) */
1374 assert(mode
== x86_64_mode_sse
);
1376 o(0x50); /* push $rax */
1377 /* movq %xmmN, (%rsp) */
1379 o(0x04 + REG_VALUE(r
)*8);
1384 assert(mode
== x86_64_mode_integer
);
1386 /* XXX: implicit cast ? */
1388 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1400 /* XXX This should be superfluous. */
1401 save_regs(0); /* save used temporary registers */
1403 /* then, we prepare register passing arguments.
1404 Note that we cannot set RDX and RCX in this loop because gv()
1405 may break these temporary registers. Let's use R10 and R11
1407 assert(gen_reg
<= REGN
);
1408 assert(sse_reg
<= 8);
1409 for(i
= 0; i
< nb_args
; i
++) {
1410 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1411 /* Alter stack entry type so that gv() knows how to treat it */
1413 if (mode
== x86_64_mode_sse
) {
1414 if (reg_count
== 2) {
1416 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1417 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1418 /* movaps %xmm1, %xmmN */
1420 o(0xc1 + ((sse_reg
+1) << 3));
1421 /* movaps %xmm0, %xmmN */
1423 o(0xc0 + (sse_reg
<< 3));
1426 assert(reg_count
== 1);
1428 /* Load directly to register */
1429 gv(RC_XMM0
<< sse_reg
);
1431 } else if (mode
== x86_64_mode_integer
) {
1433 /* XXX: implicit cast ? */
1435 gen_reg
-= reg_count
;
1437 d
= arg_prepare_reg(gen_reg
);
1438 orex(1,d
,r
,0x89); /* mov */
1439 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1440 if (reg_count
== 2) {
1441 d
= arg_prepare_reg(gen_reg
+1);
1442 orex(1,d
,vtop
->r2
,0x89); /* mov */
1443 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1448 assert(gen_reg
== 0);
1449 assert(sse_reg
== 0);
1451 /* We shouldn't have many operands on the stack anymore, but the
1452 call address itself is still there, and it might be in %eax
1453 (or edx/ecx) currently, which the below writes would clobber.
1454 So evict all remaining operands here. */
1457 /* Copy R10 and R11 into RDX and RCX, respectively */
1458 if (nb_reg_args
> 2) {
1459 o(0xd2894c); /* mov %r10, %rdx */
1460 if (nb_reg_args
> 3) {
1461 o(0xd9894c); /* mov %r11, %rcx */
1465 if (vtop
->type
.ref
->f
.func_type
!= FUNC_NEW
) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1466 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1473 #define FUNC_PROLOG_SIZE 11
1475 static void push_arg_reg(int i
) {
1477 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1480 /* generate function prolog of type 't' */
1481 void gfunc_prolog(Sym
*func_sym
)
1483 CType
*func_type
= &func_sym
->type
;
1485 int i
, addr
, align
, size
, reg_count
;
1486 int param_addr
= 0, reg_param_index
, sse_param_index
;
1490 sym
= func_type
->ref
;
1491 addr
= PTR_SIZE
* 2;
1493 ind
+= FUNC_PROLOG_SIZE
;
1494 func_sub_sp_offset
= ind
;
1498 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1499 seen_reg_num
= seen_sse_num
= 0;
1500 /* frame pointer and return address */
1501 seen_stack_size
= PTR_SIZE
* 2;
1502 /* count the number of seen parameters */
1503 sym
= func_type
->ref
;
1504 while ((sym
= sym
->next
) != NULL
) {
1506 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1510 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1513 case x86_64_mode_integer
:
1514 if (seen_reg_num
+ reg_count
> REGN
)
1516 seen_reg_num
+= reg_count
;
1519 case x86_64_mode_sse
:
1520 if (seen_sse_num
+ reg_count
> 8)
1522 seen_sse_num
+= reg_count
;
1528 /* movl $0x????????, -0x18(%rbp) */
1530 gen_le32(seen_reg_num
* 8);
1531 /* movl $0x????????, -0x14(%rbp) */
1533 gen_le32(seen_sse_num
* 16 + 48);
1534 /* leaq $0x????????, %r11 */
1536 gen_le32(seen_stack_size
);
1537 /* movq %r11, -0x10(%rbp) */
1539 /* leaq $-192(%rbp), %r11 */
1541 gen_le32(-176 - 24);
1542 /* movq %r11, -0x8(%rbp) */
1545 /* save all register passing arguments */
1546 for (i
= 0; i
< 8; i
++) {
1548 if (!tcc_state
->nosse
) {
1549 o(0xd60f66); /* movq */
1550 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1552 /* movq $0, loc+8(%rbp) */
1557 for (i
= 0; i
< REGN
; i
++) {
1558 push_arg_reg(REGN
-1-i
);
1562 sym
= func_type
->ref
;
1563 reg_param_index
= 0;
1564 sse_param_index
= 0;
1566 /* if the function returns a structure, then add an
1567 implicit pointer parameter */
1568 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1569 if (mode
== x86_64_mode_memory
) {
1570 push_arg_reg(reg_param_index
);
1574 /* define parameters */
1575 while ((sym
= sym
->next
) != NULL
) {
1577 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1579 case x86_64_mode_sse
:
1580 if (tcc_state
->nosse
)
1581 tcc_error("SSE disabled but floating point arguments used");
1582 if (sse_param_index
+ reg_count
<= 8) {
1583 /* save arguments passed by register */
1584 loc
-= reg_count
* 8;
1586 for (i
= 0; i
< reg_count
; ++i
) {
1587 o(0xd60f66); /* movq */
1588 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1592 addr
= (addr
+ align
- 1) & -align
;
1598 case x86_64_mode_memory
:
1599 case x86_64_mode_x87
:
1600 addr
= (addr
+ align
- 1) & -align
;
1605 case x86_64_mode_integer
: {
1606 if (reg_param_index
+ reg_count
<= REGN
) {
1607 /* save arguments passed by register */
1608 loc
-= reg_count
* 8;
1610 for (i
= 0; i
< reg_count
; ++i
) {
1611 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1615 addr
= (addr
+ align
- 1) & -align
;
1621 default: break; /* nothing to be done for x86_64_mode_none */
1623 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1624 VT_LOCAL
| VT_LVAL
, param_addr
);
1627 #ifdef CONFIG_TCC_BCHECK
1628 if (tcc_state
->do_bounds_check
)
1629 gen_bounds_prolog();
1633 /* generate function epilog */
1634 void gfunc_epilog(void)
1638 #ifdef CONFIG_TCC_BCHECK
1639 if (tcc_state
->do_bounds_check
)
1640 gen_bounds_epilog();
1642 o(0xc9); /* leave */
1643 if (func_ret_sub
== 0) {
1646 o(0xc2); /* ret n */
1648 g(func_ret_sub
>> 8);
1650 /* align local size to word & save local variables */
1651 v
= (-loc
+ 15) & -16;
1653 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1654 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1655 o(0xec8148); /* sub rsp, stacksize */
1662 ST_FUNC
void gen_fill_nops(int bytes
)
1668 /* generate a jump to a label */
1671 return gjmp2(0xe9, t
);
1674 /* generate a jump to a fixed address */
1675 void gjmp_addr(int a
)
1683 oad(0xe9, a
- ind
- 5);
1687 ST_FUNC
int gjmp_append(int n
, int t
)
1690 /* insert vtop->c jump list in t */
1692 uint32_t n1
= n
, n2
;
1693 while ((n2
= read32le(p
= cur_text_section
->data
+ n1
)))
1701 ST_FUNC
int gjmp_cond(int op
, int t
)
1705 /* This was a float compare. If the parity flag is set
1706 the result was unordered. For anything except != this
1707 means false and we don't jump (anding both conditions).
1708 For != this means true (oring both).
1709 Take care about inverting the test. We need to jump
1710 to our target if the result was unordered and test wasn't NE,
1711 otherwise if unordered we don't want to jump. */
1712 int v
= vtop
->cmp_r
;
1714 if (op
^ v
^ (v
!= TOK_NE
))
1715 o(0x067a); /* jp +6 */
1719 t
= gjmp2(0x8a, t
); /* jp t */
1723 t
= gjmp2(op
- 16, t
);
1727 /* generate an integer binary operation */
1728 void gen_opi(int op
)
1733 ll
= is64_type(vtop
[-1].type
.t
);
1734 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1735 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1739 case TOK_ADDC1
: /* add with carry generation */
1742 if (cc
&& (!ll
|| (int)vtop
->c
.i
== vtop
->c
.i
)) {
1749 /* XXX: generate inc and dec for smaller code ? */
1750 orex(ll
, r
, 0, 0x83);
1751 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1754 orex(ll
, r
, 0, 0x81);
1755 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1758 gv2(RC_INT
, RC_INT
);
1761 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1762 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1765 if (op
>= TOK_ULT
&& op
<= TOK_GT
)
1769 case TOK_SUBC1
: /* sub with carry generation */
1772 case TOK_ADDC2
: /* add with carry use */
1775 case TOK_SUBC2
: /* sub with carry use */
1788 gv2(RC_INT
, RC_INT
);
1791 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1792 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1804 opc
= 0xc0 | (opc
<< 3);
1810 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1811 o(opc
| REG_VALUE(r
));
1812 g(vtop
->c
.i
& (ll
? 63 : 31));
1814 /* we generate the shift in ecx */
1815 gv2(RC_INT
, RC_RCX
);
1817 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1818 o(opc
| REG_VALUE(r
));
1831 /* first operand must be in eax */
1832 /* XXX: need better constraint for second operand */
1833 gv2(RC_RAX
, RC_RCX
);
1838 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1839 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1840 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1841 if (op
== '%' || op
== TOK_UMOD
)
1853 void gen_opl(int op
)
1858 /* generate a floating point operation 'v = t1 op t2' instruction. The
1859 two operands are guaranteed to have the same floating point type */
1860 /* XXX: need to use ST1 too */
1861 void gen_opf(int op
)
1863 int a
, ft
, fc
, swapped
, r
;
1865 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1867 /* convert constants to memory references */
1868 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1873 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1876 /* must put at least one value in the floating point register */
1877 if ((vtop
[-1].r
& VT_LVAL
) &&
1878 (vtop
[0].r
& VT_LVAL
)) {
1884 /* swap the stack if needed so that t1 is the register and t2 is
1885 the memory reference */
1886 if (vtop
[-1].r
& VT_LVAL
) {
1890 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1891 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1892 /* load on stack second operand */
1893 load(TREG_ST0
, vtop
);
1894 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1895 if (op
== TOK_GE
|| op
== TOK_GT
)
1897 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1900 o(0xc9d9); /* fxch %st(1) */
1901 if (op
== TOK_EQ
|| op
== TOK_NE
)
1902 o(0xe9da); /* fucompp */
1904 o(0xd9de); /* fcompp */
1905 o(0xe0df); /* fnstsw %ax */
1907 o(0x45e480); /* and $0x45, %ah */
1908 o(0x40fC80); /* cmp $0x40, %ah */
1909 } else if (op
== TOK_NE
) {
1910 o(0x45e480); /* and $0x45, %ah */
1911 o(0x40f480); /* xor $0x40, %ah */
1913 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1914 o(0x05c4f6); /* test $0x05, %ah */
1917 o(0x45c4f6); /* test $0x45, %ah */
1923 /* no memory reference possible for long double operations */
1924 load(TREG_ST0
, vtop
);
1948 o(0xde); /* fxxxp %st, %st(1) */
1953 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1954 /* if saved lvalue, then we must reload it */
1957 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1959 r
= get_reg(RC_INT
);
1961 v1
.r
= VT_LOCAL
| VT_LVAL
;
1965 vtop
->r
= r
= r
| VT_LVAL
;
1968 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1971 if (op
== TOK_LE
|| op
== TOK_LT
)
1973 if (op
== TOK_LE
|| op
== TOK_GE
) {
1974 op
= 0x93; /* setae */
1976 op
= 0x97; /* seta */
1984 assert(!(vtop
[-1].r
& VT_LVAL
));
1986 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
1988 if (op
== TOK_EQ
|| op
== TOK_NE
)
1989 o(0x2e0f); /* ucomisd */
1991 o(0x2f0f); /* comisd */
1993 if (vtop
->r
& VT_LVAL
) {
1994 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1996 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2000 vset_VT_CMP(op
| 0x100);
2003 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
2021 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
2024 /* if saved lvalue, then we must reload it */
2025 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
2027 r
= get_reg(RC_INT
);
2029 v1
.r
= VT_LOCAL
| VT_LVAL
;
2033 vtop
->r
= r
= r
| VT_LVAL
;
2036 assert(!(vtop
[-1].r
& VT_LVAL
));
2038 assert(vtop
->r
& VT_LVAL
);
2043 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
2051 if (vtop
->r
& VT_LVAL
) {
2052 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2054 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2062 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2063 and 'long long' cases. */
2064 void gen_cvt_itof(int t
)
2066 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
2069 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2070 /* signed long long to float/double/long double (unsigned case
2071 is handled generically) */
2072 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2073 o(0x242cdf); /* fildll (%rsp) */
2074 o(0x08c48348); /* add $8, %rsp */
2075 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2076 (VT_INT
| VT_UNSIGNED
)) {
2077 /* unsigned int to float/double/long double */
2078 o(0x6a); /* push $0 */
2080 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2081 o(0x242cdf); /* fildll (%rsp) */
2082 o(0x10c48348); /* add $16, %rsp */
2084 /* int to float/double/long double */
2085 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2086 o(0x2404db); /* fildl (%rsp) */
2087 o(0x08c48348); /* add $8, %rsp */
2091 int r
= get_reg(RC_FLOAT
);
2093 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
2094 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2095 (VT_INT
| VT_UNSIGNED
) ||
2096 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2100 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
2105 /* convert from one floating point type to another */
2106 void gen_cvt_ftof(int t
)
2114 if (bt
== VT_FLOAT
) {
2116 if (tbt
== VT_DOUBLE
) {
2117 o(0x140f); /* unpcklps */
2118 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2119 o(0x5a0f); /* cvtps2pd */
2120 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2121 } else if (tbt
== VT_LDOUBLE
) {
2123 /* movss %xmm0,-0x10(%rsp) */
2125 o(0x44 + REG_VALUE(vtop
->r
)*8);
2127 o(0xf02444d9); /* flds -0x10(%rsp) */
2130 } else if (bt
== VT_DOUBLE
) {
2132 if (tbt
== VT_FLOAT
) {
2133 o(0x140f66); /* unpcklpd */
2134 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2135 o(0x5a0f66); /* cvtpd2ps */
2136 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2137 } else if (tbt
== VT_LDOUBLE
) {
2139 /* movsd %xmm0,-0x10(%rsp) */
2141 o(0x44 + REG_VALUE(vtop
->r
)*8);
2143 o(0xf02444dd); /* fldl -0x10(%rsp) */
2149 r
= get_reg(RC_FLOAT
);
2150 if (tbt
== VT_DOUBLE
) {
2151 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2152 /* movsd -0x10(%rsp),%xmm0 */
2154 o(0x44 + REG_VALUE(r
)*8);
2157 } else if (tbt
== VT_FLOAT
) {
2158 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2159 /* movss -0x10(%rsp),%xmm0 */
2161 o(0x44 + REG_VALUE(r
)*8);
2168 /* convert fp to int 't' type */
2169 void gen_cvt_ftoi(int t
)
2171 int ft
, bt
, size
, r
;
2174 if (bt
== VT_LDOUBLE
) {
2175 gen_cvt_ftof(VT_DOUBLE
);
2185 r
= get_reg(RC_INT
);
2186 if (bt
== VT_FLOAT
) {
2188 } else if (bt
== VT_DOUBLE
) {
2193 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2194 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2198 // Generate sign extension from 32 to 64 bits:
2199 ST_FUNC
void gen_cvt_sxtw(void)
2202 /* x86_64 specific: movslq */
2204 o(0xc0 + (REG_VALUE(r
) << 3) + REG_VALUE(r
));
2207 /* char/short to int conversion */
2208 ST_FUNC
void gen_cvt_csti(int t
)
2212 sz
= !(t
& VT_UNSIGNED
);
2213 xl
= (t
& VT_BTYPE
) == VT_SHORT
;
2214 ll
= (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
;
2215 orex(ll
, r
, 0, 0xc0b60f /* mov[sz] %a[xl], %eax */
2216 | (sz
<< 3 | xl
) << 8
2217 | (REG_VALUE(r
) << 3 | REG_VALUE(r
)) << 16
2221 /* computed goto support */
2228 /* Save the stack pointer onto the stack and return the location of its address */
2229 ST_FUNC
void gen_vla_sp_save(int addr
) {
2230 /* mov %rsp,addr(%rbp)*/
2231 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2234 /* Restore the SP from a location on the stack */
2235 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2236 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2239 #ifdef TCC_TARGET_PE
2240 /* Save result of gen_vla_alloc onto the stack */
2241 ST_FUNC
void gen_vla_result(int addr
) {
2242 /* mov %rax,addr(%rbp)*/
2243 gen_modrm64(0x89, TREG_RAX
, VT_LOCAL
, NULL
, addr
);
2247 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2248 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2251 #if defined(CONFIG_TCC_BCHECK)
2252 use_call
= tcc_state
->do_bounds_check
;
2254 #ifdef TCC_TARGET_PE /* alloca does more than just adjust %rsp on Windows */
2259 vpush_global_sym(&func_old_type
, TOK_alloca
);
2260 vswap(); /* Move alloca ref past allocation size */
2265 r
= gv(RC_INT
); /* allocation size */
2268 o(0xe0 | REG_VALUE(r
));
2269 /* We align to 16 bytes rather than align */
2277 /* end of x86-64 code generator */
2278 /*************************************************************/
2279 #endif /* ! TARGET_DEFS_ONLY */
2280 /******************************************************/