2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
27 #define NB_ASM_REGS 16
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0080 /* only for long double */
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
104 /******************************************************/
105 #else /* ! TARGET_DEFS_ONLY */
106 /******************************************************/
110 ST_DATA
const int reg_classes
[NB_REGS
] = {
111 /* eax */ RC_INT
| RC_RAX
,
112 /* ecx */ RC_INT
| RC_RCX
,
113 /* edx */ RC_INT
| RC_RDX
,
127 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
128 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
129 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
130 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
131 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
132 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
133 /* xmm6 an xmm7 are included so gv() can be used on them,
134 but they are not tagged with RC_FLOAT because they are
135 callee saved on Windows */
141 static unsigned long func_sub_sp_offset
;
142 static int func_ret_sub
;
144 /* XXX: make it faster ? */
145 ST_FUNC
void g(int c
)
149 if (ind1
> cur_text_section
->data_allocated
)
150 section_realloc(cur_text_section
, ind1
);
151 cur_text_section
->data
[ind
] = c
;
155 ST_FUNC
void o(unsigned int c
)
163 ST_FUNC
void gen_le16(int v
)
169 ST_FUNC
void gen_le32(int c
)
177 ST_FUNC
void gen_le64(int64_t c
)
189 static void orex(int ll
, int r
, int r2
, int b
)
191 if ((r
& VT_VALMASK
) >= VT_CONST
)
193 if ((r2
& VT_VALMASK
) >= VT_CONST
)
195 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
196 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
200 /* output a symbol and patch all calls to it */
201 ST_FUNC
void gsym_addr(int t
, int a
)
204 unsigned char *ptr
= cur_text_section
->data
+ t
;
205 uint32_t n
= read32le(ptr
); /* next value */
206 write32le(ptr
, a
- t
- 4);
216 /* psym is used to put an instruction with a data field which is a
217 reference to a symbol. It is in fact the same as oad ! */
220 static int is64_type(int t
)
222 return ((t
& VT_BTYPE
) == VT_PTR
||
223 (t
& VT_BTYPE
) == VT_FUNC
||
224 (t
& VT_BTYPE
) == VT_LLONG
);
227 /* instruction + 4 bytes data. Return the address of the data */
228 ST_FUNC
int oad(int c
, int s
)
234 if (ind1
> cur_text_section
->data_allocated
)
235 section_realloc(cur_text_section
, ind1
);
236 write32le(cur_text_section
->data
+ ind
, s
);
242 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, long c
)
245 greloca(cur_text_section
, sym
, ind
, R_X86_64_32S
, c
), c
=0;
249 /* output constant with relocation if 'r & VT_SYM' is true */
250 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
253 greloca(cur_text_section
, sym
, ind
, R_X86_64_64
, c
), c
=0;
257 /* output constant with relocation if 'r & VT_SYM' is true */
258 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, long c
)
261 greloca(cur_text_section
, sym
, ind
, R_X86_64_PC32
, c
-4), c
=4;
265 /* output got address with relocation */
266 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
268 #ifndef TCC_TARGET_PE
269 greloca(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
, -4);
271 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
272 get_tok_str(sym
->v
, NULL
), c
, r
,
273 cur_text_section
->data
[ind
-3],
274 cur_text_section
->data
[ind
-2],
275 cur_text_section
->data
[ind
-1]
277 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
281 /* we use add c, %xxx for displacement */
283 o(0xc0 + REG_VALUE(r
));
288 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
290 op_reg
= REG_VALUE(op_reg
) << 3;
291 if ((r
& VT_VALMASK
) == VT_CONST
) {
292 /* constant memory reference */
295 gen_gotpcrel(r
, sym
, c
);
297 gen_addrpc32(r
, sym
, c
);
299 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
300 /* currently, we use only ebp as base */
302 /* short reference */
306 oad(0x85 | op_reg
, c
);
308 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
310 g(0x80 | op_reg
| REG_VALUE(r
));
313 g(0x00 | op_reg
| REG_VALUE(r
));
316 g(0x00 | op_reg
| REG_VALUE(r
));
320 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
322 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
324 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
327 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
329 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
332 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
333 orex(1, r
, op_reg
, opcode
);
334 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
338 /* load 'r' from value 'sv' */
339 void load(int r
, SValue
*sv
)
341 int v
, t
, ft
, fc
, fr
;
346 sv
= pe_getimport(sv
, &v2
);
350 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
352 if (fc
!= sv
->c
.i
&& (fr
& VT_SYM
))
353 tcc_error("64 bit addend in load");
355 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
357 #ifndef TCC_TARGET_PE
358 /* we use indirect access via got */
359 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
360 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
361 /* use the result register as a temporal register */
362 int tr
= r
| TREG_MEM
;
364 /* we cannot use float registers as a temporal register */
365 tr
= get_reg(RC_INT
) | TREG_MEM
;
367 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
369 /* load from the temporal register */
377 if (v
== VT_LLOCAL
) {
379 v1
.r
= VT_LOCAL
| VT_LVAL
;
382 if (!(reg_classes
[fr
] & (RC_INT
|RC_R11
)))
383 fr
= get_reg(RC_INT
);
387 /* Like GCC we can load from small enough properly sized
388 structs and unions as well.
389 XXX maybe move to generic operand handling, but should
390 occur only with asm, so tccasm.c might also be a better place */
391 if ((ft
& VT_BTYPE
) == VT_STRUCT
) {
393 switch (type_size(&sv
->type
, &align
)) {
394 case 1: ft
= VT_BYTE
; break;
395 case 2: ft
= VT_SHORT
; break;
396 case 4: ft
= VT_INT
; break;
397 case 8: ft
= VT_LLONG
; break;
399 tcc_error("invalid aggregate type for register load");
403 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
405 r
= REG_VALUE(r
); /* movd */
406 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
407 b
= 0x7e0ff3; /* movq */
409 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
410 b
= 0xdb, r
= 5; /* fldt */
411 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
412 b
= 0xbe0f; /* movsbl */
413 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
414 b
= 0xb60f; /* movzbl */
415 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
416 b
= 0xbf0f; /* movswl */
417 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
418 b
= 0xb70f; /* movzwl */
420 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
421 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
422 || ((ft
& VT_BTYPE
) == VT_FUNC
));
427 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
430 gen_modrm(r
, fr
, sv
->sym
, fc
);
437 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
438 gen_addrpc32(fr
, sv
->sym
, fc
);
440 if (sv
->sym
->type
.t
& VT_STATIC
) {
442 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
443 gen_addrpc32(fr
, sv
->sym
, fc
);
446 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
447 gen_gotpcrel(r
, sv
->sym
, fc
);
450 } else if (is64_type(ft
)) {
451 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
454 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
457 } else if (v
== VT_LOCAL
) {
458 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
459 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
460 } else if (v
== VT_CMP
) {
462 if ((fc
& ~0x100) != TOK_NE
)
463 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
465 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
468 /* This was a float compare. If the parity bit is
469 set the result was unordered, meaning false for everything
470 except TOK_NE, and true for TOK_NE. */
472 o(0x037a + (REX_BASE(r
) << 8));
474 orex(0,r
,0, 0x0f); /* setxx %br */
476 o(0xc0 + REG_VALUE(r
));
477 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
480 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
481 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
484 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
486 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
488 /* gen_cvt_ftof(VT_DOUBLE); */
489 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
490 /* movsd -0x10(%rsp),%xmmN */
492 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
495 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
496 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
499 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
502 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
504 } else if (r
== TREG_ST0
) {
505 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
506 /* gen_cvt_ftof(VT_LDOUBLE); */
507 /* movsd %xmmN,-0x10(%rsp) */
509 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
511 o(0xf02444dd); /* fldl -0x10(%rsp) */
514 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
520 /* store register 'r' in lvalue 'v' */
521 void store(int r
, SValue
*v
)
525 /* store the REX prefix in this variable when PIC is enabled */
530 v
= pe_getimport(v
, &v2
);
533 fr
= v
->r
& VT_VALMASK
;
536 if (fc
!= v
->c
.i
&& (fr
& VT_SYM
))
537 tcc_error("64 bit addend in store");
538 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
541 #ifndef TCC_TARGET_PE
542 /* we need to access the variable via got */
543 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
544 /* mov xx(%rip), %r11 */
546 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.i
);
547 pic
= is64_type(bt
) ? 0x49 : 0x41;
551 /* XXX: incorrect if float reg to reg */
552 if (bt
== VT_FLOAT
) {
555 o(0x7e0f); /* movd */
557 } else if (bt
== VT_DOUBLE
) {
560 o(0xd60f); /* movq */
562 } else if (bt
== VT_LDOUBLE
) {
563 o(0xc0d9); /* fld %st(0) */
571 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
573 else if (is64_type(bt
))
579 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
584 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
585 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
586 } else if (fr
!= r
) {
587 /* XXX: don't we really come here? */
589 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
592 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
593 gen_modrm(r
, v
->r
, v
->sym
, fc
);
594 } else if (fr
!= r
) {
595 /* XXX: don't we really come here? */
597 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
602 /* 'is_jmp' is '1' if it is a jump */
603 static void gcall_or_jmp(int is_jmp
)
606 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
&&
607 ((vtop
->r
& VT_SYM
) || (vtop
->c
.i
-4) == (int)(vtop
->c
.i
-4))) {
609 if (vtop
->r
& VT_SYM
) {
610 /* relocation case */
612 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PC32
, (int)(vtop
->c
.i
-4));
614 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PLT32
, (int)(vtop
->c
.i
-4));
617 /* put an empty PC32 relocation */
618 put_elf_reloca(symtab_section
, cur_text_section
,
619 ind
+ 1, R_X86_64_PC32
, 0, (int)(vtop
->c
.i
-4));
621 oad(0xe8 + is_jmp
, 0); /* call/jmp im */
623 /* otherwise, indirect call */
627 o(0xff); /* call/jmp *r */
628 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
632 #if defined(CONFIG_TCC_BCHECK)
633 #ifndef TCC_TARGET_PE
634 static addr_t func_bound_offset
;
635 static unsigned long func_bound_ind
;
638 static void gen_static_call(int v
)
640 Sym
*sym
= external_global_sym(v
, &func_old_type
, 0);
642 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
645 /* generate a bounded pointer addition */
646 ST_FUNC
void gen_bounded_ptr_add(void)
648 /* save all temporary registers */
651 /* prepare fast x86_64 function call */
653 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
657 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
660 /* do a fast function call */
661 gen_static_call(TOK___bound_ptr_add
);
663 /* returned pointer is in rax */
665 vtop
->r
= TREG_RAX
| VT_BOUNDED
;
668 /* relocation offset of the bounding function call point */
669 vtop
->c
.i
= (cur_text_section
->reloc
->data_offset
- sizeof(ElfW(Rela
)));
672 /* patch pointer addition in vtop so that pointer dereferencing is
674 ST_FUNC
void gen_bounded_ptr_deref(void)
682 /* XXX: put that code in generic part of tcc */
683 if (!is_float(vtop
->type
.t
)) {
684 if (vtop
->r
& VT_LVAL_BYTE
)
686 else if (vtop
->r
& VT_LVAL_SHORT
)
690 size
= type_size(&vtop
->type
, &align
);
692 case 1: func
= TOK___bound_ptr_indir1
; break;
693 case 2: func
= TOK___bound_ptr_indir2
; break;
694 case 4: func
= TOK___bound_ptr_indir4
; break;
695 case 8: func
= TOK___bound_ptr_indir8
; break;
696 case 12: func
= TOK___bound_ptr_indir12
; break;
697 case 16: func
= TOK___bound_ptr_indir16
; break;
699 tcc_error("unhandled size when dereferencing bounded pointer");
704 sym
= external_global_sym(func
, &func_old_type
, 0);
706 put_extern_sym(sym
, NULL
, 0, 0);
708 /* patch relocation */
709 /* XXX: find a better solution ? */
711 rel
= (ElfW(Rela
) *)(cur_text_section
->reloc
->data
+ vtop
->c
.i
);
712 rel
->r_info
= ELF64_R_INFO(sym
->c
, ELF64_R_TYPE(rel
->r_info
));
719 static const uint8_t arg_regs
[REGN
] = {
720 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
723 /* Prepare arguments in R10 and R11 rather than RCX and RDX
724 because gv() will not ever use these */
725 static int arg_prepare_reg(int idx
) {
726 if (idx
== 0 || idx
== 1)
727 /* idx=0: r10, idx=1: r11 */
730 return arg_regs
[idx
];
733 static int func_scratch
;
735 /* Generate function call. The function address is pushed first, then
736 all the parameters in call order. This functions pops all the
737 parameters and the function address. */
739 void gen_offs_sp(int b
, int r
, int d
)
741 orex(1,0,r
& 0x100 ? 0 : r
, b
);
743 o(0x2444 | (REG_VALUE(r
) << 3));
746 o(0x2484 | (REG_VALUE(r
) << 3));
751 /* Return the number of registers needed to return the struct, or 0 if
752 returning via struct pointer. */
753 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
757 *ret_align
= 1; // Never have to re-align return values for x86-64
758 size
= type_size(vt
, &align
);
762 } else if (size
> 4) {
765 } else if (size
> 2) {
768 } else if (size
> 1) {
777 static int is_sse_float(int t
) {
780 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
783 int gfunc_arg_size(CType
*type
) {
785 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
787 return type_size(type
, &align
);
790 void gfunc_call(int nb_args
)
792 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
795 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
798 /* for struct arguments, we need to call memcpy and the function
799 call breaks register passing arguments we are preparing.
800 So, we process arguments which will be passed by stack first. */
801 struct_size
= args_size
;
802 for(i
= 0; i
< nb_args
; i
++) {
807 bt
= (sv
->type
.t
& VT_BTYPE
);
808 size
= gfunc_arg_size(&sv
->type
);
811 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
813 if (bt
== VT_STRUCT
) {
814 /* align to stack align size */
815 size
= (size
+ 15) & ~15;
816 /* generate structure store */
818 gen_offs_sp(0x8d, r
, struct_size
);
821 /* generate memcpy call */
822 vset(&sv
->type
, r
| VT_LVAL
, 0);
826 } else if (bt
== VT_LDOUBLE
) {
828 gen_offs_sp(0xdb, 0x107, struct_size
);
833 if (func_scratch
< struct_size
)
834 func_scratch
= struct_size
;
837 struct_size
= args_size
;
839 for(i
= 0; i
< nb_args
; i
++) {
841 bt
= (vtop
->type
.t
& VT_BTYPE
);
843 size
= gfunc_arg_size(&vtop
->type
);
845 /* align to stack align size */
846 size
= (size
+ 15) & ~15;
849 gen_offs_sp(0x8d, d
, struct_size
);
850 gen_offs_sp(0x89, d
, arg
*8);
852 d
= arg_prepare_reg(arg
);
853 gen_offs_sp(0x8d, d
, struct_size
);
857 if (is_sse_float(vtop
->type
.t
)) {
858 if (tcc_state
->nosse
)
859 tcc_error("SSE disabled");
860 gv(RC_XMM0
); /* only use one float register */
862 /* movq %xmm0, j*8(%rsp) */
863 gen_offs_sp(0xd60f66, 0x100, arg
*8);
865 /* movaps %xmm0, %xmmN */
867 o(0xc0 + (arg
<< 3));
868 d
= arg_prepare_reg(arg
);
869 /* mov %xmm0, %rxx */
872 o(0xc0 + REG_VALUE(d
));
875 if (bt
== VT_STRUCT
) {
876 vtop
->type
.ref
= NULL
;
877 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
878 : size
> 1 ? VT_SHORT
: VT_BYTE
;
883 gen_offs_sp(0x89, r
, arg
*8);
885 d
= arg_prepare_reg(arg
);
886 orex(1,d
,r
,0x89); /* mov */
887 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
895 /* Copy R10 and R11 into RCX and RDX, respectively */
897 o(0xd1894c); /* mov %r10, %rcx */
899 o(0xda894c); /* mov %r11, %rdx */
904 /* other compilers don't clear the upper bits when returning char/short */
905 bt
= vtop
->type
.ref
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
);
906 if (bt
== (VT_BYTE
| VT_UNSIGNED
))
907 o(0xc0b60f); /* movzbl %al, %eax */
908 else if (bt
== VT_BYTE
)
909 o(0xc0be0f); /* movsbl %al, %eax */
910 else if (bt
== VT_SHORT
)
912 else if (bt
== (VT_SHORT
| VT_UNSIGNED
))
913 o(0xc0b70f); /* movzbl %al, %eax */
914 #if 0 /* handled in gen_cast() */
915 else if (bt
== VT_INT
)
916 o(0x9848); /* cltq */
917 else if (bt
== (VT_INT
| VT_UNSIGNED
))
918 o(0xc089); /* mov %eax,%eax */
924 #define FUNC_PROLOG_SIZE 11
926 /* generate function prolog of type 't' */
927 void gfunc_prolog(CType
*func_type
)
929 int addr
, reg_param_index
, bt
, size
;
938 ind
+= FUNC_PROLOG_SIZE
;
939 func_sub_sp_offset
= ind
;
942 sym
= func_type
->ref
;
944 /* if the function returns a structure, then add an
945 implicit pointer parameter */
947 func_var
= (sym
->c
== FUNC_ELLIPSIS
);
948 size
= gfunc_arg_size(&func_vt
);
950 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
956 /* define parameters */
957 while ((sym
= sym
->next
) != NULL
) {
959 bt
= type
->t
& VT_BTYPE
;
960 size
= gfunc_arg_size(type
);
962 if (reg_param_index
< REGN
) {
963 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
965 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
967 if (reg_param_index
< REGN
) {
968 /* save arguments passed by register */
969 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
970 if (tcc_state
->nosse
)
971 tcc_error("SSE disabled");
972 o(0xd60f66); /* movq */
973 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
975 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
978 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
984 while (reg_param_index
< REGN
) {
985 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
986 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
993 /* generate function epilog */
994 void gfunc_epilog(void)
999 if (func_ret_sub
== 0) {
1002 o(0xc2); /* ret n */
1004 g(func_ret_sub
>> 8);
1008 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1009 /* align local size to word & save local variables */
1010 v
= (func_scratch
+ -loc
+ 15) & -16;
1013 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
1014 oad(0xb8, v
); /* mov stacksize, %eax */
1015 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1016 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
1017 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1019 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1020 o(0xec8148); /* sub rsp, stacksize */
1024 cur_text_section
->data_offset
= saved_ind
;
1025 pe_add_unwind_data(ind
, saved_ind
, v
);
1026 ind
= cur_text_section
->data_offset
;
1031 static void gadd_sp(int val
)
1033 if (val
== (char)val
) {
1037 oad(0xc48148, val
); /* add $xxx, %rsp */
1041 typedef enum X86_64_Mode
{
1044 x86_64_mode_integer
,
1049 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
1053 else if (a
== x86_64_mode_none
)
1055 else if (b
== x86_64_mode_none
)
1057 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
1058 return x86_64_mode_memory
;
1059 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
1060 return x86_64_mode_integer
;
1061 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
1062 return x86_64_mode_memory
;
1064 return x86_64_mode_sse
;
1067 static X86_64_Mode
classify_x86_64_inner(CType
*ty
)
1072 switch (ty
->t
& VT_BTYPE
) {
1073 case VT_VOID
: return x86_64_mode_none
;
1082 case VT_ENUM
: return x86_64_mode_integer
;
1085 case VT_DOUBLE
: return x86_64_mode_sse
;
1087 case VT_LDOUBLE
: return x86_64_mode_x87
;
1092 mode
= x86_64_mode_none
;
1093 for (f
= f
->next
; f
; f
= f
->next
)
1094 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
1102 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
)
1105 int size
, align
, ret_t
= 0;
1107 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1112 mode
= x86_64_mode_integer
;
1114 size
= type_size(ty
, &align
);
1115 *psize
= (size
+ 7) & ~7;
1116 *palign
= (align
+ 7) & ~7;
1119 mode
= x86_64_mode_memory
;
1121 mode
= classify_x86_64_inner(ty
);
1123 case x86_64_mode_integer
:
1129 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1133 case x86_64_mode_x87
:
1138 case x86_64_mode_sse
:
1144 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1147 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1160 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1162 /* This definition must be synced with stdarg.h */
1163 enum __va_arg_type
{
1164 __va_gen_reg
, __va_float_reg
, __va_stack
1166 int size
, align
, reg_count
;
1167 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1169 default: return __va_stack
;
1170 case x86_64_mode_integer
: return __va_gen_reg
;
1171 case x86_64_mode_sse
: return __va_float_reg
;
1175 /* Return the number of registers needed to return the struct, or 0 if
1176 returning via struct pointer. */
1177 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
1179 int size
, align
, reg_count
;
1180 *ret_align
= 1; // Never have to re-align return values for x86-64
1182 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) != x86_64_mode_memory
);
1186 static const uint8_t arg_regs
[REGN
] = {
1187 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1190 static int arg_prepare_reg(int idx
) {
1191 if (idx
== 2 || idx
== 3)
1192 /* idx=2: r10, idx=3: r11 */
1195 return arg_regs
[idx
];
1198 /* Generate function call. The function address is pushed first, then
1199 all the parameters in call order. This functions pops all the
1200 parameters and the function address. */
1201 void gfunc_call(int nb_args
)
1205 int size
, align
, r
, args_size
, stack_adjust
, run_start
, run_end
, i
, reg_count
;
1206 int nb_reg_args
= 0;
1207 int nb_sse_args
= 0;
1208 int sse_reg
, gen_reg
;
1210 /* calculate the number of integer/float register arguments */
1211 for(i
= 0; i
< nb_args
; i
++) {
1212 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1213 if (mode
== x86_64_mode_sse
)
1214 nb_sse_args
+= reg_count
;
1215 else if (mode
== x86_64_mode_integer
)
1216 nb_reg_args
+= reg_count
;
1219 if (nb_sse_args
&& tcc_state
->nosse
)
1220 tcc_error("SSE disabled but floating point arguments passed");
1222 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1223 and ended by a 16-byte aligned argument. This is because, from the point of view of
1224 the callee, argument alignment is computed from the bottom up. */
1225 /* for struct arguments, we need to call memcpy and the function
1226 call breaks register passing arguments we are preparing.
1227 So, we process arguments which will be passed by stack first. */
1228 gen_reg
= nb_reg_args
;
1229 sse_reg
= nb_sse_args
;
1232 while (run_start
!= nb_args
) {
1233 int run_gen_reg
= gen_reg
, run_sse_reg
= sse_reg
;
1237 for(i
= run_start
; (i
< nb_args
) && (run_end
== nb_args
); i
++) {
1238 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1240 case x86_64_mode_memory
:
1241 case x86_64_mode_x87
:
1246 stack_adjust
+= size
;
1249 case x86_64_mode_sse
:
1250 sse_reg
-= reg_count
;
1251 if (sse_reg
+ reg_count
> 8) goto stack_arg
;
1254 case x86_64_mode_integer
:
1255 gen_reg
-= reg_count
;
1256 if (gen_reg
+ reg_count
> REGN
) goto stack_arg
;
1258 default: break; /* nothing to be done for x86_64_mode_none */
1262 gen_reg
= run_gen_reg
;
1263 sse_reg
= run_sse_reg
;
1265 /* adjust stack to align SSE boundary */
1266 if (stack_adjust
&= 15) {
1267 /* fetch cpu flag before the following sub will change the value */
1268 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1271 stack_adjust
= 16 - stack_adjust
;
1273 oad(0xec81, stack_adjust
); /* sub $xxx, %rsp */
1274 args_size
+= stack_adjust
;
1277 for(i
= run_start
; i
< run_end
;) {
1278 /* Swap argument to top, it will possibly be changed here,
1279 and might use more temps. At the end of the loop we keep
1280 in on the stack and swap it back to its original position
1281 if it is a register. */
1282 SValue tmp
= vtop
[0];
1287 mode
= classify_x86_64_arg(&vtop
->type
, NULL
, &size
, &align
, ®_count
);
1289 switch (vtop
->type
.t
& VT_BTYPE
) {
1291 if (mode
== x86_64_mode_sse
) {
1293 sse_reg
-= reg_count
;
1296 } else if (mode
== x86_64_mode_integer
) {
1298 gen_reg
-= reg_count
;
1304 /* allocate the necessary size on stack */
1306 oad(0xec81, size
); /* sub $xxx, %rsp */
1307 /* generate structure store */
1308 r
= get_reg(RC_INT
);
1309 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1310 o(0xe0 + REG_VALUE(r
));
1311 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1324 assert(mode
== x86_64_mode_sse
);
1328 o(0x50); /* push $rax */
1329 /* movq %xmmN, (%rsp) */
1331 o(0x04 + REG_VALUE(r
)*8);
1340 assert(mode
== x86_64_mode_integer
);
1342 /* XXX: implicit cast ? */
1343 if (gen_reg
> REGN
) {
1346 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1354 /* And swap the argument back to it's original position. */
1361 assert((vtop
->type
.t
== tmp
.type
.t
) && (vtop
->r
== tmp
.r
));
1370 /* handle 16 byte aligned arguments at end of run */
1371 run_start
= i
= run_end
;
1372 while (i
< nb_args
) {
1373 /* Rotate argument to top since it will always be popped */
1374 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1380 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1382 oad(0xec8148, size
); /* sub $xxx, %rsp */
1383 o(0x7cdb); /* fstpt 0(%rsp) */
1388 assert(mode
== x86_64_mode_memory
);
1390 /* allocate the necessary size on stack */
1392 oad(0xec81, size
); /* sub $xxx, %rsp */
1393 /* generate structure store */
1394 r
= get_reg(RC_INT
);
1395 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1396 o(0xe0 + REG_VALUE(r
));
1397 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1408 /* XXX This should be superfluous. */
1409 save_regs(0); /* save used temporary registers */
1411 /* then, we prepare register passing arguments.
1412 Note that we cannot set RDX and RCX in this loop because gv()
1413 may break these temporary registers. Let's use R10 and R11
1415 assert(gen_reg
<= REGN
);
1416 assert(sse_reg
<= 8);
1417 for(i
= 0; i
< nb_args
; i
++) {
1418 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1419 /* Alter stack entry type so that gv() knows how to treat it */
1421 if (mode
== x86_64_mode_sse
) {
1422 if (reg_count
== 2) {
1424 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1425 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1426 /* movaps %xmm0, %xmmN */
1428 o(0xc0 + (sse_reg
<< 3));
1429 /* movaps %xmm1, %xmmN */
1431 o(0xc1 + ((sse_reg
+1) << 3));
1434 assert(reg_count
== 1);
1436 /* Load directly to register */
1437 gv(RC_XMM0
<< sse_reg
);
1439 } else if (mode
== x86_64_mode_integer
) {
1441 /* XXX: implicit cast ? */
1443 gen_reg
-= reg_count
;
1445 d
= arg_prepare_reg(gen_reg
);
1446 orex(1,d
,r
,0x89); /* mov */
1447 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1448 if (reg_count
== 2) {
1449 d
= arg_prepare_reg(gen_reg
+1);
1450 orex(1,d
,vtop
->r2
,0x89); /* mov */
1451 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1456 assert(gen_reg
== 0);
1457 assert(sse_reg
== 0);
1459 /* We shouldn't have many operands on the stack anymore, but the
1460 call address itself is still there, and it might be in %eax
1461 (or edx/ecx) currently, which the below writes would clobber.
1462 So evict all remaining operands here. */
1465 /* Copy R10 and R11 into RDX and RCX, respectively */
1466 if (nb_reg_args
> 2) {
1467 o(0xd2894c); /* mov %r10, %rdx */
1468 if (nb_reg_args
> 3) {
1469 o(0xd9894c); /* mov %r11, %rcx */
1473 if (vtop
->type
.ref
->c
!= FUNC_NEW
) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1474 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1482 #define FUNC_PROLOG_SIZE 11
1484 static void push_arg_reg(int i
) {
1486 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1489 /* generate function prolog of type 't' */
1490 void gfunc_prolog(CType
*func_type
)
1493 int i
, addr
, align
, size
, reg_count
;
1494 int param_addr
= 0, reg_param_index
, sse_param_index
;
1498 sym
= func_type
->ref
;
1499 addr
= PTR_SIZE
* 2;
1501 ind
+= FUNC_PROLOG_SIZE
;
1502 func_sub_sp_offset
= ind
;
1505 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1506 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1507 seen_reg_num
= seen_sse_num
= 0;
1508 /* frame pointer and return address */
1509 seen_stack_size
= PTR_SIZE
* 2;
1510 /* count the number of seen parameters */
1511 sym
= func_type
->ref
;
1512 while ((sym
= sym
->next
) != NULL
) {
1514 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1518 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1521 case x86_64_mode_integer
:
1522 if (seen_reg_num
+ reg_count
<= 8) {
1523 seen_reg_num
+= reg_count
;
1530 case x86_64_mode_sse
:
1531 if (seen_sse_num
+ reg_count
<= 8) {
1532 seen_sse_num
+= reg_count
;
1542 /* movl $0x????????, -0x10(%rbp) */
1544 gen_le32(seen_reg_num
* 8);
1545 /* movl $0x????????, -0xc(%rbp) */
1547 gen_le32(seen_sse_num
* 16 + 48);
1548 /* movl $0x????????, -0x8(%rbp) */
1550 gen_le32(seen_stack_size
);
1552 /* save all register passing arguments */
1553 for (i
= 0; i
< 8; i
++) {
1555 if (!tcc_state
->nosse
) {
1556 o(0xd60f66); /* movq */
1557 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1559 /* movq $0, loc+8(%rbp) */
1564 for (i
= 0; i
< REGN
; i
++) {
1565 push_arg_reg(REGN
-1-i
);
1569 sym
= func_type
->ref
;
1570 reg_param_index
= 0;
1571 sse_param_index
= 0;
1573 /* if the function returns a structure, then add an
1574 implicit pointer parameter */
1575 func_vt
= sym
->type
;
1576 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1577 if (mode
== x86_64_mode_memory
) {
1578 push_arg_reg(reg_param_index
);
1582 /* define parameters */
1583 while ((sym
= sym
->next
) != NULL
) {
1585 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1587 case x86_64_mode_sse
:
1588 if (tcc_state
->nosse
)
1589 tcc_error("SSE disabled but floating point arguments used");
1590 if (sse_param_index
+ reg_count
<= 8) {
1591 /* save arguments passed by register */
1592 loc
-= reg_count
* 8;
1594 for (i
= 0; i
< reg_count
; ++i
) {
1595 o(0xd60f66); /* movq */
1596 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1600 addr
= (addr
+ align
- 1) & -align
;
1606 case x86_64_mode_memory
:
1607 case x86_64_mode_x87
:
1608 addr
= (addr
+ align
- 1) & -align
;
1613 case x86_64_mode_integer
: {
1614 if (reg_param_index
+ reg_count
<= REGN
) {
1615 /* save arguments passed by register */
1616 loc
-= reg_count
* 8;
1618 for (i
= 0; i
< reg_count
; ++i
) {
1619 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1623 addr
= (addr
+ align
- 1) & -align
;
1629 default: break; /* nothing to be done for x86_64_mode_none */
1631 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1632 VT_LOCAL
| VT_LVAL
, param_addr
);
1635 #ifdef CONFIG_TCC_BCHECK
1636 /* leave some room for bound checking code */
1637 if (tcc_state
->do_bounds_check
) {
1638 func_bound_offset
= lbounds_section
->data_offset
;
1639 func_bound_ind
= ind
;
1640 oad(0xb8, 0); /* lbound section pointer */
1641 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1642 oad(0xb8, 0); /* call to function */
1647 /* generate function epilog */
1648 void gfunc_epilog(void)
1652 #ifdef CONFIG_TCC_BCHECK
1653 if (tcc_state
->do_bounds_check
1654 && func_bound_offset
!= lbounds_section
->data_offset
)
1660 /* add end of table info */
1661 bounds_ptr
= section_ptr_add(lbounds_section
, sizeof(addr_t
));
1664 /* generate bound local allocation */
1665 sym_data
= get_sym_ref(&char_pointer_type
, lbounds_section
,
1666 func_bound_offset
, lbounds_section
->data_offset
);
1668 ind
= func_bound_ind
;
1669 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1671 gen_static_call(TOK___bound_local_new
);
1674 /* generate bound check local freeing */
1675 o(0x5250); /* save returned value, if any */
1676 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1677 oad(0xb8, 0); /* mov xxx, %rax */
1678 o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
1679 gen_static_call(TOK___bound_local_delete
);
1680 o(0x585a); /* restore returned value, if any */
1683 o(0xc9); /* leave */
1684 if (func_ret_sub
== 0) {
1687 o(0xc2); /* ret n */
1689 g(func_ret_sub
>> 8);
1691 /* align local size to word & save local variables */
1692 v
= (-loc
+ 15) & -16;
1694 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1695 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1696 o(0xec8148); /* sub rsp, stacksize */
1703 /* generate a jump to a label */
1706 return psym(0xe9, t
);
1709 /* generate a jump to a fixed address */
1710 void gjmp_addr(int a
)
1718 oad(0xe9, a
- ind
- 5);
1722 ST_FUNC
void gtst_addr(int inv
, int a
)
1724 inv
^= (vtop
--)->c
.i
;
1731 oad(inv
- 16, a
- 4);
1735 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1736 ST_FUNC
int gtst(int inv
, int t
)
1738 int v
= vtop
->r
& VT_VALMASK
;
1740 /* fast case : can jump directly since flags are set */
1741 if (vtop
->c
.i
& 0x100)
1743 /* This was a float compare. If the parity flag is set
1744 the result was unordered. For anything except != this
1745 means false and we don't jump (anding both conditions).
1746 For != this means true (oring both).
1747 Take care about inverting the test. We need to jump
1748 to our target if the result was unordered and test wasn't NE,
1749 otherwise if unordered we don't want to jump. */
1750 vtop
->c
.i
&= ~0x100;
1751 if (inv
== (vtop
->c
.i
== TOK_NE
))
1752 o(0x067a); /* jp +6 */
1756 t
= psym(0x8a, t
); /* jp t */
1760 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1761 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1762 /* && or || optimization */
1763 if ((v
& 1) == inv
) {
1764 /* insert vtop->c jump list in t */
1765 uint32_t n1
, n
= vtop
->c
.i
;
1767 while ((n1
= read32le(cur_text_section
->data
+ n
)))
1769 write32le(cur_text_section
->data
+ n
, t
);
1781 /* generate an integer binary operation */
1782 void gen_opi(int op
)
1787 ll
= is64_type(vtop
[-1].type
.t
);
1788 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1789 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1793 case TOK_ADDC1
: /* add with carry generation */
1796 if (cc
&& (!ll
|| (int)vtop
->c
.i
== vtop
->c
.i
)) {
1803 /* XXX: generate inc and dec for smaller code ? */
1804 orex(ll
, r
, 0, 0x83);
1805 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1808 orex(ll
, r
, 0, 0x81);
1809 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1812 gv2(RC_INT
, RC_INT
);
1815 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1816 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1819 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1825 case TOK_SUBC1
: /* sub with carry generation */
1828 case TOK_ADDC2
: /* add with carry use */
1831 case TOK_SUBC2
: /* sub with carry use */
1844 gv2(RC_INT
, RC_INT
);
1847 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1848 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1860 opc
= 0xc0 | (opc
<< 3);
1866 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1867 o(opc
| REG_VALUE(r
));
1868 g(vtop
->c
.i
& (ll
? 63 : 31));
1870 /* we generate the shift in ecx */
1871 gv2(RC_INT
, RC_RCX
);
1873 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1874 o(opc
| REG_VALUE(r
));
1887 /* first operand must be in eax */
1888 /* XXX: need better constraint for second operand */
1889 gv2(RC_RAX
, RC_RCX
);
1894 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1895 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1896 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1897 if (op
== '%' || op
== TOK_UMOD
)
1909 void gen_opl(int op
)
1914 /* generate a floating point operation 'v = t1 op t2' instruction. The
1915 two operands are guaranted to have the same floating point type */
1916 /* XXX: need to use ST1 too */
1917 void gen_opf(int op
)
1919 int a
, ft
, fc
, swapped
, r
;
1921 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1923 /* convert constants to memory references */
1924 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1929 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1932 /* must put at least one value in the floating point register */
1933 if ((vtop
[-1].r
& VT_LVAL
) &&
1934 (vtop
[0].r
& VT_LVAL
)) {
1940 /* swap the stack if needed so that t1 is the register and t2 is
1941 the memory reference */
1942 if (vtop
[-1].r
& VT_LVAL
) {
1946 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1947 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1948 /* load on stack second operand */
1949 load(TREG_ST0
, vtop
);
1950 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1951 if (op
== TOK_GE
|| op
== TOK_GT
)
1953 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1956 o(0xc9d9); /* fxch %st(1) */
1957 if (op
== TOK_EQ
|| op
== TOK_NE
)
1958 o(0xe9da); /* fucompp */
1960 o(0xd9de); /* fcompp */
1961 o(0xe0df); /* fnstsw %ax */
1963 o(0x45e480); /* and $0x45, %ah */
1964 o(0x40fC80); /* cmp $0x40, %ah */
1965 } else if (op
== TOK_NE
) {
1966 o(0x45e480); /* and $0x45, %ah */
1967 o(0x40f480); /* xor $0x40, %ah */
1969 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1970 o(0x05c4f6); /* test $0x05, %ah */
1973 o(0x45c4f6); /* test $0x45, %ah */
1980 /* no memory reference possible for long double operations */
1981 load(TREG_ST0
, vtop
);
2005 o(0xde); /* fxxxp %st, %st(1) */
2010 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
2011 /* if saved lvalue, then we must reload it */
2014 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
2016 r
= get_reg(RC_INT
);
2018 v1
.r
= VT_LOCAL
| VT_LVAL
;
2024 if (op
== TOK_EQ
|| op
== TOK_NE
) {
2027 if (op
== TOK_LE
|| op
== TOK_LT
)
2029 if (op
== TOK_LE
|| op
== TOK_GE
) {
2030 op
= 0x93; /* setae */
2032 op
= 0x97; /* seta */
2040 assert(!(vtop
[-1].r
& VT_LVAL
));
2042 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
2044 if (op
== TOK_EQ
|| op
== TOK_NE
)
2045 o(0x2e0f); /* ucomisd */
2047 o(0x2f0f); /* comisd */
2049 if (vtop
->r
& VT_LVAL
) {
2050 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2052 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2057 vtop
->c
.i
= op
| 0x100;
2059 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
2077 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
2080 /* if saved lvalue, then we must reload it */
2081 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
2083 r
= get_reg(RC_INT
);
2085 v1
.r
= VT_LOCAL
| VT_LVAL
;
2091 assert(!(vtop
[-1].r
& VT_LVAL
));
2093 assert(vtop
->r
& VT_LVAL
);
2098 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
2106 if (vtop
->r
& VT_LVAL
) {
2107 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2109 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2117 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2118 and 'long long' cases. */
2119 void gen_cvt_itof(int t
)
2121 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
2124 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2125 /* signed long long to float/double/long double (unsigned case
2126 is handled generically) */
2127 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2128 o(0x242cdf); /* fildll (%rsp) */
2129 o(0x08c48348); /* add $8, %rsp */
2130 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2131 (VT_INT
| VT_UNSIGNED
)) {
2132 /* unsigned int to float/double/long double */
2133 o(0x6a); /* push $0 */
2135 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2136 o(0x242cdf); /* fildll (%rsp) */
2137 o(0x10c48348); /* add $16, %rsp */
2139 /* int to float/double/long double */
2140 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2141 o(0x2404db); /* fildl (%rsp) */
2142 o(0x08c48348); /* add $8, %rsp */
2146 int r
= get_reg(RC_FLOAT
);
2148 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
2149 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2150 (VT_INT
| VT_UNSIGNED
) ||
2151 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2155 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
2160 /* convert from one floating point type to another */
2161 void gen_cvt_ftof(int t
)
2169 if (bt
== VT_FLOAT
) {
2171 if (tbt
== VT_DOUBLE
) {
2172 o(0x140f); /* unpcklps */
2173 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2174 o(0x5a0f); /* cvtps2pd */
2175 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2176 } else if (tbt
== VT_LDOUBLE
) {
2178 /* movss %xmm0,-0x10(%rsp) */
2180 o(0x44 + REG_VALUE(vtop
->r
)*8);
2182 o(0xf02444d9); /* flds -0x10(%rsp) */
2185 } else if (bt
== VT_DOUBLE
) {
2187 if (tbt
== VT_FLOAT
) {
2188 o(0x140f66); /* unpcklpd */
2189 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2190 o(0x5a0f66); /* cvtpd2ps */
2191 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2192 } else if (tbt
== VT_LDOUBLE
) {
2194 /* movsd %xmm0,-0x10(%rsp) */
2196 o(0x44 + REG_VALUE(vtop
->r
)*8);
2198 o(0xf02444dd); /* fldl -0x10(%rsp) */
2204 r
= get_reg(RC_FLOAT
);
2205 if (tbt
== VT_DOUBLE
) {
2206 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2207 /* movsd -0x10(%rsp),%xmm0 */
2209 o(0x44 + REG_VALUE(r
)*8);
2212 } else if (tbt
== VT_FLOAT
) {
2213 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2214 /* movss -0x10(%rsp),%xmm0 */
2216 o(0x44 + REG_VALUE(r
)*8);
2223 /* convert fp to int 't' type */
2224 void gen_cvt_ftoi(int t
)
2226 int ft
, bt
, size
, r
;
2229 if (bt
== VT_LDOUBLE
) {
2230 gen_cvt_ftof(VT_DOUBLE
);
2240 r
= get_reg(RC_INT
);
2241 if (bt
== VT_FLOAT
) {
2243 } else if (bt
== VT_DOUBLE
) {
2248 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2249 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2253 /* computed goto support */
2260 /* Save the stack pointer onto the stack and return the location of its address */
2261 ST_FUNC
void gen_vla_sp_save(int addr
) {
2262 /* mov %rsp,addr(%rbp)*/
2263 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2266 /* Restore the SP from a location on the stack */
2267 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2268 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2271 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2272 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2273 #ifdef TCC_TARGET_PE
2274 /* alloca does more than just adjust %rsp on Windows */
2275 vpush_global_sym(&func_old_type
, TOK_alloca
);
2276 vswap(); /* Move alloca ref past allocation size */
2280 r
= gv(RC_INT
); /* allocation size */
2283 o(0xe0 | REG_VALUE(r
));
2284 /* We align to 16 bytes rather than align */
2292 /* end of x86-64 code generator */
2293 /*************************************************************/
2294 #endif /* ! TARGET_DEFS_ONLY */
2295 /******************************************************/