2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
28 #define REG_ARGS_MAX 2 /* at most 2 registers used for each argument */
33 /* This struct stores the struct offsets at which %rax, %rdx, %xmm0, and
34 * %xmm1 are to be stored.
36 * struct { long long l; double x; }: ireg = { 0, -1 } freg = { 8, -1 }
37 * struct { double x; long long l; }: ireg = { 8, -1 } freg = { 0, -1 }
38 * struct { long long l; long long l2; }: ireg = { 0, 8 } freg = { -1, -1 }
39 * struct { double x; double x2; }: ireg = { -1, -1 } freg = { 0, 8 }
42 int ireg
[REG_ARGS_MAX
];
43 int freg
[REG_ARGS_MAX
];
47 /* a register can belong to several classes. The classes must be
48 sorted from more general to more precise (see gv2() code which does
49 assumptions on it). */
50 #define RC_INT 0x0001 /* generic integer register */
51 #define RC_FLOAT 0x0002 /* generic float register */
55 #define RC_ST0 0x0080 /* only for long double */
60 #define RC_XMM0 0x1000
61 #define RC_XMM1 0x2000
62 #define RC_XMM2 0x4000
63 #define RC_XMM3 0x8000
64 #define RC_XMM4 0x10000
65 #define RC_XMM5 0x20000
66 #define RC_XMM6 0x40000
67 #define RC_XMM7 0x80000
68 #define RC_IRET RC_RAX /* function return: integer register */
69 #define RC_LRET RC_RDX /* function return: second integer register */
70 #define RC_FRET RC_XMM0 /* function return: float register */
71 #define RC_QRET RC_XMM1 /* function return: second float register */
73 /* pretty names for the registers */
101 #define REX_BASE(reg) (((reg) >> 3) & 1)
102 #define REG_VALUE(reg) ((reg) & 7)
104 /* return registers for function */
105 #define REG_IRET TREG_RAX /* single word int return register */
106 #define REG_LRET TREG_RDX /* second word return register (for long long) */
107 #define REG_FRET TREG_XMM0 /* float return register */
108 #define REG_QRET TREG_XMM1 /* second float return register */
110 /* defined if function parameters must be evaluated in reverse order */
111 #define INVERT_FUNC_PARAMS
113 /* pointer size, in bytes */
116 /* long double size and alignment, in bytes */
117 #define LDOUBLE_SIZE 16
118 #define LDOUBLE_ALIGN 16
119 /* maximum alignment (for aligned attribute support) */
122 /******************************************************/
125 #define EM_TCC_TARGET EM_X86_64
127 /* relocation type for 32 bit data relocation */
128 #define R_DATA_32 R_X86_64_32
129 #define R_DATA_PTR R_X86_64_64
130 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
131 #define R_COPY R_X86_64_COPY
133 #define ELF_START_ADDR 0x400000
134 #define ELF_PAGE_SIZE 0x200000
136 /******************************************************/
137 #else /* ! TARGET_DEFS_ONLY */
138 /******************************************************/
142 ST_DATA
const int reg_classes
[NB_REGS
] = {
143 /* eax */ RC_INT
| RC_RAX
,
144 /* ecx */ RC_INT
| RC_RCX
,
145 /* edx */ RC_INT
| RC_RDX
,
159 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
160 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
161 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
162 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
163 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
164 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
165 /* xmm6 an xmm7 are included so gv() can be used on them,
166 but they are not tagged with RC_FLOAT because they are
167 callee saved on Windows */
173 static unsigned long func_sub_sp_offset
;
174 static int func_ret_sub
;
176 /* XXX: make it faster ? */
181 if (ind1
> cur_text_section
->data_allocated
)
182 section_realloc(cur_text_section
, ind1
);
183 cur_text_section
->data
[ind
] = c
;
187 void o(unsigned int c
)
209 void gen_le64(int64_t c
)
221 void orex(int ll
, int r
, int r2
, int b
)
223 if ((r
& VT_VALMASK
) >= VT_CONST
)
225 if ((r2
& VT_VALMASK
) >= VT_CONST
)
227 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
228 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
232 /* output a symbol and patch all calls to it */
233 void gsym_addr(int t
, int a
)
237 ptr
= (int *)(cur_text_section
->data
+ t
);
238 n
= *ptr
; /* next value */
249 /* psym is used to put an instruction with a data field which is a
250 reference to a symbol. It is in fact the same as oad ! */
253 static int is64_type(int t
)
255 return ((t
& VT_BTYPE
) == VT_PTR
||
256 (t
& VT_BTYPE
) == VT_FUNC
||
257 (t
& VT_BTYPE
) == VT_LLONG
);
260 /* instruction + 4 bytes data. Return the address of the data */
261 ST_FUNC
int oad(int c
, int s
)
267 if (ind1
> cur_text_section
->data_allocated
)
268 section_realloc(cur_text_section
, ind1
);
269 *(int *)(cur_text_section
->data
+ ind
) = s
;
275 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
278 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
282 /* output constant with relocation if 'r & VT_SYM' is true */
283 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
286 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
290 /* output constant with relocation if 'r & VT_SYM' is true */
291 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
294 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
298 /* output got address with relocation */
299 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
301 #ifndef TCC_TARGET_PE
304 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
305 sr
= cur_text_section
->reloc
;
306 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
309 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
310 get_tok_str(sym
->v
, NULL
), c
, r
,
311 cur_text_section
->data
[ind
-3],
312 cur_text_section
->data
[ind
-2],
313 cur_text_section
->data
[ind
-1]
315 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
319 /* we use add c, %xxx for displacement */
321 o(0xc0 + REG_VALUE(r
));
326 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
328 op_reg
= REG_VALUE(op_reg
) << 3;
329 if ((r
& VT_VALMASK
) == VT_CONST
) {
330 /* constant memory reference */
333 gen_gotpcrel(r
, sym
, c
);
335 gen_addrpc32(r
, sym
, c
);
337 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
338 /* currently, we use only ebp as base */
340 /* short reference */
344 oad(0x85 | op_reg
, c
);
346 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
348 g(0x80 | op_reg
| REG_VALUE(r
));
351 g(0x00 | op_reg
| REG_VALUE(r
));
354 g(0x00 | op_reg
| REG_VALUE(r
));
358 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
360 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
362 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
365 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
367 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
370 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
371 orex(1, r
, op_reg
, opcode
);
372 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
376 /* load 'r' from value 'sv' */
377 void load(int r
, SValue
*sv
)
379 int v
, t
, ft
, fc
, fr
;
384 sv
= pe_getimport(sv
, &v2
);
388 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
391 #ifndef TCC_TARGET_PE
392 /* we use indirect access via got */
393 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
394 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
395 /* use the result register as a temporal register */
396 int tr
= r
| TREG_MEM
;
398 /* we cannot use float registers as a temporal register */
399 tr
= get_reg(RC_INT
) | TREG_MEM
;
401 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
403 /* load from the temporal register */
411 if (v
== VT_LLOCAL
) {
413 v1
.r
= VT_LOCAL
| VT_LVAL
;
416 if (!(reg_classes
[fr
] & (RC_INT
|RC_R11
)))
417 fr
= get_reg(RC_INT
);
421 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
423 r
= REG_VALUE(r
); /* movd */
424 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
425 b
= 0x7e0ff3; /* movq */
427 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
428 b
= 0xdb, r
= 5; /* fldt */
429 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
430 b
= 0xbe0f; /* movsbl */
431 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
432 b
= 0xb60f; /* movzbl */
433 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
434 b
= 0xbf0f; /* movswl */
435 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
436 b
= 0xb70f; /* movzwl */
438 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
439 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
440 || ((ft
& VT_BTYPE
) == VT_FUNC
));
445 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
448 gen_modrm(r
, fr
, sv
->sym
, fc
);
455 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
456 gen_addrpc32(fr
, sv
->sym
, fc
);
458 if (sv
->sym
->type
.t
& VT_STATIC
) {
460 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
461 gen_addrpc32(fr
, sv
->sym
, fc
);
464 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
465 gen_gotpcrel(r
, sv
->sym
, fc
);
468 } else if (is64_type(ft
)) {
469 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
472 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
475 } else if (v
== VT_LOCAL
) {
476 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
477 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
478 } else if (v
== VT_CMP
) {
480 if ((fc
& ~0x100) != TOK_NE
)
481 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
483 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
486 /* This was a float compare. If the parity bit is
487 set the result was unordered, meaning false for everything
488 except TOK_NE, and true for TOK_NE. */
490 o(0x037a + (REX_BASE(r
) << 8));
492 orex(0,r
,0, 0x0f); /* setxx %br */
494 o(0xc0 + REG_VALUE(r
));
495 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
498 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
499 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
502 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
504 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
506 /* gen_cvt_ftof(VT_DOUBLE); */
507 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
508 /* movsd -0x10(%rsp),%xmmN */
510 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
513 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
514 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
517 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
520 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
522 } else if (r
== TREG_ST0
) {
523 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
524 /* gen_cvt_ftof(VT_LDOUBLE); */
525 /* movsd %xmmN,-0x10(%rsp) */
527 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
529 o(0xf02444dd); /* fldl -0x10(%rsp) */
532 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
538 /* store register 'r' in lvalue 'v' */
539 void store(int r
, SValue
*v
)
543 /* store the REX prefix in this variable when PIC is enabled */
548 v
= pe_getimport(v
, &v2
);
553 fr
= v
->r
& VT_VALMASK
;
556 #ifndef TCC_TARGET_PE
557 /* we need to access the variable via got */
558 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
559 /* mov xx(%rip), %r11 */
561 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
562 pic
= is64_type(bt
) ? 0x49 : 0x41;
566 /* XXX: incorrect if float reg to reg */
567 if (bt
== VT_FLOAT
) {
570 o(0x7e0f); /* movd */
572 } else if (bt
== VT_DOUBLE
) {
575 o(0xd60f); /* movq */
577 } else if (bt
== VT_LDOUBLE
) {
578 o(0xc0d9); /* fld %st(0) */
586 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
588 else if (is64_type(bt
))
594 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
599 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
600 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
601 } else if (fr
!= r
) {
602 /* XXX: don't we really come here? */
604 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
607 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
608 gen_modrm(r
, v
->r
, v
->sym
, fc
);
609 } else if (fr
!= r
) {
610 /* XXX: don't we really come here? */
612 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
617 /* 'is_jmp' is '1' if it is a jump */
618 static void gcall_or_jmp(int is_jmp
)
621 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
&&
622 ((vtop
->r
& VT_SYM
) || (vtop
->c
.ll
-4) == (int)(vtop
->c
.ll
-4))) {
624 if (vtop
->r
& VT_SYM
) {
625 /* relocation case */
627 greloc(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PC32
);
629 greloc(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PLT32
);
632 /* put an empty PC32 relocation */
633 put_elf_reloc(symtab_section
, cur_text_section
,
634 ind
+ 1, R_X86_64_PC32
, 0);
636 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
638 /* otherwise, indirect call */
642 o(0xff); /* call/jmp *r */
643 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
647 #if defined(CONFIG_TCC_BCHECK)
648 #ifndef TCC_TARGET_PE
649 static addr_t func_bound_offset
;
650 static unsigned long func_bound_ind
;
653 static void gen_static_call(int v
)
655 Sym
*sym
= external_global_sym(v
, &func_old_type
, 0);
657 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
660 /* generate a bounded pointer addition */
661 ST_FUNC
void gen_bounded_ptr_add(void)
663 /* save all temporary registers */
666 /* prepare fast x86_64 function call */
668 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
672 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
675 /* do a fast function call */
676 gen_static_call(TOK___bound_ptr_add
);
678 /* returned pointer is in rax */
680 vtop
->r
= TREG_RAX
| VT_BOUNDED
;
683 /* relocation offset of the bounding function call point */
684 vtop
->c
.ull
= (cur_text_section
->reloc
->data_offset
- sizeof(ElfW(Rela
)));
687 /* patch pointer addition in vtop so that pointer dereferencing is
689 ST_FUNC
void gen_bounded_ptr_deref(void)
697 /* XXX: put that code in generic part of tcc */
698 if (!is_float(vtop
->type
.t
)) {
699 if (vtop
->r
& VT_LVAL_BYTE
)
701 else if (vtop
->r
& VT_LVAL_SHORT
)
705 size
= type_size(&vtop
->type
, &align
);
707 case 1: func
= TOK___bound_ptr_indir1
; break;
708 case 2: func
= TOK___bound_ptr_indir2
; break;
709 case 4: func
= TOK___bound_ptr_indir4
; break;
710 case 8: func
= TOK___bound_ptr_indir8
; break;
711 case 12: func
= TOK___bound_ptr_indir12
; break;
712 case 16: func
= TOK___bound_ptr_indir16
; break;
714 tcc_error("unhandled size when dereferencing bounded pointer");
719 sym
= external_global_sym(func
, &func_old_type
, 0);
721 put_extern_sym(sym
, NULL
, 0, 0);
723 /* patch relocation */
724 /* XXX: find a better solution ? */
726 rel
= (ElfW(Rela
) *)(cur_text_section
->reloc
->data
+ vtop
->c
.ull
);
727 rel
->r_info
= ELF64_R_INFO(sym
->c
, ELF64_R_TYPE(rel
->r_info
));
734 static const uint8_t arg_regs
[REGN
] = {
735 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
738 /* Prepare arguments in R10 and R11 rather than RCX and RDX
739 because gv() will not ever use these */
740 static int arg_prepare_reg(int idx
) {
741 if (idx
== 0 || idx
== 1)
742 /* idx=0: r10, idx=1: r11 */
745 return arg_regs
[idx
];
748 static int func_scratch
;
750 /* Generate function call. The function address is pushed first, then
751 all the parameters in call order. This functions pops all the
752 parameters and the function address. */
754 void gen_offs_sp(int b
, int r
, int d
)
756 orex(1,0,r
& 0x100 ? 0 : r
, b
);
758 o(0x2444 | (REG_VALUE(r
) << 3));
761 o(0x2484 | (REG_VALUE(r
) << 3));
766 ST_FUNC
int regargs_nregs(RegArgs
*args
)
771 /* Return the number of registers needed to return the struct, or 0 if
772 returning via struct pointer. */
773 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
, RegArgs
*args
)
777 *ret_align
= 1; // Never have to re-align return values for x86-64
778 size
= type_size(vt
, &align
);
782 } else if (size
> 4) {
785 } else if (size
> 2) {
788 } else if (size
> 1) {
799 static int is_sse_float(int t
) {
802 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
805 int gfunc_arg_size(CType
*type
) {
807 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
809 return type_size(type
, &align
);
812 void gfunc_call(int nb_args
)
814 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
817 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
820 /* for struct arguments, we need to call memcpy and the function
821 call breaks register passing arguments we are preparing.
822 So, we process arguments which will be passed by stack first. */
823 struct_size
= args_size
;
824 for(i
= 0; i
< nb_args
; i
++) {
829 bt
= (sv
->type
.t
& VT_BTYPE
);
830 size
= gfunc_arg_size(&sv
->type
);
833 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
835 if (bt
== VT_STRUCT
) {
836 /* align to stack align size */
837 size
= (size
+ 15) & ~15;
838 /* generate structure store */
840 gen_offs_sp(0x8d, r
, struct_size
);
843 /* generate memcpy call */
844 vset(&sv
->type
, r
| VT_LVAL
, 0);
848 } else if (bt
== VT_LDOUBLE
) {
850 gen_offs_sp(0xdb, 0x107, struct_size
);
855 if (func_scratch
< struct_size
)
856 func_scratch
= struct_size
;
859 struct_size
= args_size
;
861 for(i
= 0; i
< nb_args
; i
++) {
863 bt
= (vtop
->type
.t
& VT_BTYPE
);
865 size
= gfunc_arg_size(&vtop
->type
);
867 /* align to stack align size */
868 size
= (size
+ 15) & ~15;
871 gen_offs_sp(0x8d, d
, struct_size
);
872 gen_offs_sp(0x89, d
, arg
*8);
874 d
= arg_prepare_reg(arg
);
875 gen_offs_sp(0x8d, d
, struct_size
);
879 if (is_sse_float(vtop
->type
.t
)) {
880 gv(RC_XMM0
); /* only use one float register */
882 /* movq %xmm0, j*8(%rsp) */
883 gen_offs_sp(0xd60f66, 0x100, arg
*8);
885 /* movaps %xmm0, %xmmN */
887 o(0xc0 + (arg
<< 3));
888 d
= arg_prepare_reg(arg
);
889 /* mov %xmm0, %rxx */
892 o(0xc0 + REG_VALUE(d
));
895 if (bt
== VT_STRUCT
) {
896 vtop
->type
.ref
= NULL
;
897 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
898 : size
> 1 ? VT_SHORT
: VT_BYTE
;
903 gen_offs_sp(0x89, r
, arg
*8);
905 d
= arg_prepare_reg(arg
);
906 orex(1,d
,r
,0x89); /* mov */
907 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
915 /* Copy R10 and R11 into RCX and RDX, respectively */
917 o(0xd1894c); /* mov %r10, %rcx */
919 o(0xda894c); /* mov %r11, %rdx */
928 #define FUNC_PROLOG_SIZE 11
930 /* generate function prolog of type 't' */
931 void gfunc_prolog(CType
*func_type
)
933 int addr
, reg_param_index
, bt
, size
;
942 ind
+= FUNC_PROLOG_SIZE
;
943 func_sub_sp_offset
= ind
;
946 sym
= func_type
->ref
;
948 /* if the function returns a structure, then add an
949 implicit pointer parameter */
951 func_var
= (sym
->c
== FUNC_ELLIPSIS
);
952 size
= gfunc_arg_size(&func_vt
);
954 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
960 /* define parameters */
961 while ((sym
= sym
->next
) != NULL
) {
963 bt
= type
->t
& VT_BTYPE
;
964 size
= gfunc_arg_size(type
);
966 if (reg_param_index
< REGN
) {
967 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
969 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
971 if (reg_param_index
< REGN
) {
972 /* save arguments passed by register */
973 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
974 o(0xd60f66); /* movq */
975 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
977 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
980 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
986 while (reg_param_index
< REGN
) {
987 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
988 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
995 /* generate function epilog */
996 void gfunc_epilog(void)
1000 o(0xc9); /* leave */
1001 if (func_ret_sub
== 0) {
1004 o(0xc2); /* ret n */
1006 g(func_ret_sub
>> 8);
1010 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1011 /* align local size to word & save local variables */
1012 v
= (func_scratch
+ -loc
+ 15) & -16;
1015 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
1016 oad(0xb8, v
); /* mov stacksize, %eax */
1017 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
1018 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
1019 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1021 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1022 o(0xec8148); /* sub rsp, stacksize */
1026 cur_text_section
->data_offset
= saved_ind
;
1027 pe_add_unwind_data(ind
, saved_ind
, v
);
1028 ind
= cur_text_section
->data_offset
;
1033 static void gadd_sp(int val
)
1035 if (val
== (char)val
) {
1039 oad(0xc48148, val
); /* add $xxx, %rsp */
1043 typedef enum X86_64_Mode
{
1046 x86_64_mode_integer
,
1051 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
1055 else if (a
== x86_64_mode_none
)
1057 else if (b
== x86_64_mode_none
)
1059 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
1060 return x86_64_mode_memory
;
1061 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
1062 return x86_64_mode_integer
;
1063 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
1064 return x86_64_mode_memory
;
1066 return x86_64_mode_sse
;
1069 /* classify the x86 eightbytes from byte index start to byte index
1070 * end, at offset offset from the root struct */
1071 static X86_64_Mode
classify_x86_64_inner(CType
*ty
, int offset
, int start
, int end
)
1076 switch (ty
->t
& VT_BTYPE
) {
1077 case VT_VOID
: return x86_64_mode_none
;
1086 case VT_ENUM
: return x86_64_mode_integer
;
1089 case VT_DOUBLE
: return x86_64_mode_sse
;
1091 case VT_LDOUBLE
: return x86_64_mode_x87
;
1096 mode
= x86_64_mode_none
;
1097 while ((f
= f
->next
) != NULL
) {
1098 if (f
->c
+ offset
>= start
&& f
->c
+ offset
< end
)
1099 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
, f
->c
+ offset
, start
, end
));
1108 static X86_64_Mode
classify_x86_64_arg_eightbyte(CType
*ty
, int offset
)
1112 assert((ty
->t
& VT_BTYPE
) == VT_STRUCT
);
1114 mode
= classify_x86_64_inner(ty
, 0, offset
, offset
+ 8);
1119 static void regargs_init(RegArgs
*args
)
1122 for(i
=0; i
<REG_ARGS_MAX
; i
++) {
1128 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, RegArgs
*args
)
1130 X86_64_Mode mode
= x86_64_mode_none
;
1131 int size
, align
, ret_t
= 0;
1132 int ireg
= 0, freg
= 0;
1137 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1141 args
->ireg
[ireg
++] = 0;
1143 mode
= x86_64_mode_integer
;
1145 size
= type_size(ty
, &align
);
1146 *psize
= (size
+ 7) & ~7;
1147 *palign
= (align
+ 7) & ~7;
1150 mode
= x86_64_mode_memory
;
1154 for(start
=0; start
< size
; start
+= 8) {
1155 if ((ty
->t
& VT_BTYPE
) == VT_STRUCT
) {
1156 mode
= classify_x86_64_arg_eightbyte(ty
, start
);
1158 mode
= classify_x86_64_inner(ty
, 0, 0, size
);
1161 if (mode
== x86_64_mode_integer
) {
1163 args
->ireg
[ireg
++] = start
;
1164 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1165 } else if (mode
== x86_64_mode_sse
) {
1167 args
->freg
[freg
++] = start
;
1168 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1184 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1186 /* This definition must be synced with stdarg.h */
1187 enum __va_arg_type
{
1188 __va_gen_reg
, __va_float_reg
, __va_stack
1191 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, NULL
);
1193 default: return __va_stack
;
1194 case x86_64_mode_integer
: return __va_gen_reg
;
1195 case x86_64_mode_sse
: return __va_float_reg
;
1199 static int regargs_iregs(RegArgs
*args
)
1203 for(i
=0; i
<REG_ARGS_MAX
; i
++) {
1204 if(args
->ireg
[i
] != -1)
1211 static int regargs_fregs(RegArgs
*args
)
1215 for(i
=0; i
<REG_ARGS_MAX
; i
++) {
1216 if(args
->freg
[i
] != -1)
1223 /* Count the total number of registers used by args */
1224 ST_FUNC
int regargs_nregs(RegArgs
*args
)
1228 for(i
=0; i
<REG_ARGS_MAX
; i
++) {
1229 if(args
->ireg
[i
] != -1)
1232 if(args
->freg
[i
] != -1)
1239 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
, RegArgs
*args
)
1243 *ret_align
= 1; // Never have to re-align return values for x86-64
1246 mode
= classify_x86_64_arg(vt
, ret
, &size
, &align
, args
);
1248 return mode
!= x86_64_mode_memory
&&
1249 mode
!= x86_64_mode_none
;
1253 static const uint8_t arg_regs
[REGN
] = {
1254 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1257 static int arg_prepare_reg(int idx
) {
1258 if (idx
== 2 || idx
== 3)
1259 /* idx=2: r10, idx=3: r11 */
1262 return arg_regs
[idx
];
1265 /* Generate function call. The function address is pushed first, then
1266 all the parameters in call order. This functions pops all the
1267 parameters and the function address. */
1268 void gfunc_call(int nb_args
)
1272 int size
, align
, r
, args_size
, stack_adjust
, run_start
, run_end
, i
;
1273 int nb_reg_args
= 0;
1274 int nb_sse_args
= 0;
1275 int sse_reg
= 0, gen_reg
= 0;
1276 RegArgs
*reg_args
= alloca(nb_args
* sizeof *reg_args
);
1278 /* calculate the number of integer/float register arguments */
1279 for(i
= nb_args
- 1; i
>= 0; i
--) {
1281 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_args
[i
]);
1282 fregs
= regargs_fregs(®_args
[i
]);
1283 iregs
= regargs_iregs(®_args
[i
]);
1285 nb_sse_args
+= fregs
;
1286 nb_reg_args
+= iregs
;
1288 if (sse_reg
+ fregs
> 8 || gen_reg
+ iregs
> REGN
) {
1289 regargs_init(®_args
[i
]);
1296 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1297 and ended by a 16-byte aligned argument. This is because, from the point of view of
1298 the callee, argument alignment is computed from the bottom up. */
1299 /* for struct arguments, we need to call memcpy and the function
1300 call breaks register passing arguments we are preparing.
1301 So, we process arguments which will be passed by stack first. */
1302 gen_reg
= nb_reg_args
;
1303 sse_reg
= nb_sse_args
;
1306 while (run_start
!= nb_args
) {
1307 int run_gen_reg
= gen_reg
, run_sse_reg
= sse_reg
;
1311 for(i
= run_start
; (i
< nb_args
) && (run_end
== nb_args
); i
++) {
1312 int stack
= regargs_nregs(®_args
[i
]) == 0;
1313 classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, NULL
);
1319 stack_adjust
+= size
;
1323 gen_reg
= run_gen_reg
;
1324 sse_reg
= run_sse_reg
;
1326 /* adjust stack to align SSE boundary */
1327 if (stack_adjust
&= 15) {
1328 /* fetch cpu flag before the following sub will change the value */
1329 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1332 stack_adjust
= 16 - stack_adjust
;
1334 oad(0xec81, stack_adjust
); /* sub $xxx, %rsp */
1335 args_size
+= stack_adjust
;
1338 for(i
= run_start
; i
< run_end
;) {
1339 int arg_stored
= regargs_nregs(®_args
[i
]) == 0;
1348 /* Swap argument to top, it will possibly be changed here,
1349 and might use more temps. At the end of the loop we keep
1350 in on the stack and swap it back to its original position
1351 if it is a register. */
1356 classify_x86_64_arg(&vtop
->type
, NULL
, &size
, &align
, &args
);
1358 switch (vtop
->type
.t
& VT_BTYPE
) {
1360 /* allocate the necessary size on stack */
1362 oad(0xec81, size
); /* sub $xxx, %rsp */
1363 /* generate structure store */
1364 r
= get_reg(RC_INT
);
1365 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1366 o(0xe0 + REG_VALUE(r
));
1367 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1380 o(0x50); /* push $rax */
1381 /* movq %xmmN, (%rsp) */
1383 o(0x04 + REG_VALUE(r
)*8);
1390 /* XXX: implicit cast ? */
1393 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1398 /* And swap the argument back to its original position. */
1404 assert((vtop
->type
.t
== tmp
.type
.t
) && (vtop
->r
== tmp
.r
));
1406 memmove(reg_args
+ i
, reg_args
+ i
+ 1, (nb_args
- i
- 1) * sizeof *reg_args
);
1411 /* handle 16 byte aligned arguments at end of run */
1412 run_start
= i
= run_end
;
1413 while (i
< nb_args
) {
1414 /* Rotate argument to top since it will always be popped */
1415 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, NULL
);
1421 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1423 oad(0xec8148, size
); /* sub $xxx, %rsp */
1424 o(0x7cdb); /* fstpt 0(%rsp) */
1429 assert(mode
== x86_64_mode_memory
);
1431 /* allocate the necessary size on stack */
1433 oad(0xec81, size
); /* sub $xxx, %rsp */
1434 /* generate structure store */
1435 r
= get_reg(RC_INT
);
1436 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1437 o(0xe0 + REG_VALUE(r
));
1438 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1445 memmove(reg_args
+ i
, reg_args
+ i
+ 1, (nb_args
- i
- 1) * sizeof *reg_args
);
1450 /* XXX This should be superfluous. */
1451 save_regs(0); /* save used temporary registers */
1453 /* recalculate the number of register arguments there actually
1454 * are. This is slow but more obviously correct than using the
1458 for(i
= 0; i
< nb_args
; i
++) {
1459 gen_reg
+= regargs_iregs(®_args
[i
]);
1460 sse_reg
+= regargs_fregs(®_args
[i
]);
1463 /* then, we prepare register passing arguments.
1464 Note that we cannot set RDX and RCX in this loop because gv()
1465 may break these temporary registers. Let's use R10 and R11
1467 assert(gen_reg
<= REGN
);
1468 assert(sse_reg
<= 8);
1469 for(i
= 0; i
< nb_args
; i
++) {
1474 /* Alter stack entry type so that gv() knows how to treat it */
1475 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
1478 for(k
=REG_ARGS_MAX
-1; k
>=0; k
--) {
1479 if (args
.freg
[k
] == -1)
1483 assert(sse_reg
>= 0);
1486 vtop
->type
.t
= VT_DOUBLE
;
1487 vtop
->c
.ull
+= args
.freg
[k
];
1488 gv(RC_XMM0
<< sse_reg
);
1491 for(k
=REG_ARGS_MAX
-1; k
>=0; k
--) {
1493 if (args
.ireg
[k
] == -1)
1499 vtop
->type
.t
= VT_LLONG
;
1500 vtop
->c
.ull
+= args
.ireg
[k
];
1502 d
= arg_prepare_reg(gen_reg
);
1503 orex(1,d
,r
,0x89); /* mov */
1504 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1508 /* XXX is it really necessary to set vtop->type? */
1509 classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, NULL
);
1511 if (args
.freg
[0] != -1) {
1513 /* Load directly to register */
1514 gv(RC_XMM0
<< sse_reg
);
1515 } else if (args
.ireg
[0] != -1) {
1518 /* XXX: implicit cast ? */
1521 d
= arg_prepare_reg(gen_reg
);
1522 orex(1,d
,r
,0x89); /* mov */
1523 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1530 assert(gen_reg
== 0);
1531 assert(sse_reg
== 0);
1533 /* We shouldn't have many operands on the stack anymore, but the
1534 call address itself is still there, and it might be in %eax
1535 (or edx/ecx) currently, which the below writes would clobber.
1536 So evict all remaining operands here. */
1539 /* Copy R10 and R11 into RDX and RCX, respectively */
1540 if (nb_reg_args
> 2) {
1541 o(0xd2894c); /* mov %r10, %rdx */
1542 if (nb_reg_args
> 3) {
1543 o(0xd9894c); /* mov %r11, %rcx */
1547 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1555 #define FUNC_PROLOG_SIZE 11
1557 static void push_arg_reg(int i
) {
1559 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1562 /* generate function prolog of type 't' */
1563 void gfunc_prolog(CType
*func_type
)
1566 int i
, addr
, align
, size
;
1567 int param_addr
= 0, reg_param_index
, sse_param_index
;
1571 sym
= func_type
->ref
;
1572 addr
= PTR_SIZE
* 2;
1574 ind
+= FUNC_PROLOG_SIZE
;
1575 func_sub_sp_offset
= ind
;
1578 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1579 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1580 seen_reg_num
= seen_sse_num
= 0;
1581 /* frame pointer and return address */
1582 seen_stack_size
= PTR_SIZE
* 2;
1583 /* count the number of seen parameters */
1584 sym
= func_type
->ref
;
1585 while ((sym
= sym
->next
) != NULL
) {
1589 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, &args
);
1594 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1597 case x86_64_mode_integer
:
1598 case x86_64_mode_sse
: {
1601 seen_sse_num
+= regargs_fregs(&args
);
1602 seen_reg_num
+= regargs_iregs(&args
);
1604 if (seen_reg_num
> 8) {
1608 if (seen_sse_num
> 8) {
1621 /* movl $0x????????, -0x10(%rbp) */
1623 gen_le32(seen_reg_num
* 8);
1624 /* movl $0x????????, -0xc(%rbp) */
1626 gen_le32(seen_sse_num
* 16 + 48);
1627 /* movl $0x????????, -0x8(%rbp) */
1629 gen_le32(seen_stack_size
);
1631 /* save all register passing arguments */
1632 for (i
= 0; i
< 8; i
++) {
1634 o(0xd60f66); /* movq */
1635 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1636 /* movq $0, loc+8(%rbp) */
1641 for (i
= 0; i
< REGN
; i
++) {
1642 push_arg_reg(REGN
-1-i
);
1646 sym
= func_type
->ref
;
1647 reg_param_index
= 0;
1648 sse_param_index
= 0;
1650 /* if the function returns a structure, then add an
1651 implicit pointer parameter */
1652 func_vt
= sym
->type
;
1653 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, NULL
);
1654 if (mode
== x86_64_mode_memory
) {
1655 push_arg_reg(reg_param_index
);
1659 /* define parameters */
1660 while ((sym
= sym
->next
) != NULL
) {
1662 int reg_count_integer
= 0;
1663 int reg_count_sse
= 0;
1667 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, &args
);
1668 reg_count_integer
= regargs_iregs(&args
);
1669 reg_count_sse
= regargs_fregs(&args
);
1672 case x86_64_mode_integer
:
1673 case x86_64_mode_sse
:
1674 if (reg_count_integer
|| reg_count_sse
) {
1675 if ((reg_count_sse
== 0 || sse_param_index
+ reg_count_sse
<= 8) &&
1676 (reg_count_integer
== 0 || reg_param_index
+ reg_count_integer
<= REGN
)) {
1677 /* argument fits into registers */
1683 /* save arguments passed by register */
1684 loc
-= (reg_count_sse
+ reg_count_integer
) * 8;
1686 for (i
= 0; i
< reg_count_sse
; ++i
) {
1687 o(0xd60f66); /* movq */
1688 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ args
.freg
[i
]);
1691 for (i
= 0; i
< reg_count_integer
; ++i
) {
1692 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ args
.ireg
[i
]);
1696 addr
= (addr
+ align
- 1) & -align
;
1702 case x86_64_mode_memory
:
1703 case x86_64_mode_x87
:
1704 addr
= (addr
+ align
- 1) & -align
;
1708 default: break; /* nothing to be done for x86_64_mode_none */
1710 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1711 VT_LOCAL
| VT_LVAL
, param_addr
);
1714 #ifdef CONFIG_TCC_BCHECK
1715 /* leave some room for bound checking code */
1716 if (tcc_state
->do_bounds_check
) {
1717 func_bound_offset
= lbounds_section
->data_offset
;
1718 func_bound_ind
= ind
;
1719 oad(0xb8, 0); /* lbound section pointer */
1720 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1721 oad(0xb8, 0); /* call to function */
1726 /* generate function epilog */
1727 void gfunc_epilog(void)
1731 #ifdef CONFIG_TCC_BCHECK
1732 if (tcc_state
->do_bounds_check
1733 && func_bound_offset
!= lbounds_section
->data_offset
)
1739 /* add end of table info */
1740 bounds_ptr
= section_ptr_add(lbounds_section
, sizeof(addr_t
));
1743 /* generate bound local allocation */
1744 sym_data
= get_sym_ref(&char_pointer_type
, lbounds_section
,
1745 func_bound_offset
, lbounds_section
->data_offset
);
1747 ind
= func_bound_ind
;
1748 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1750 gen_static_call(TOK___bound_local_new
);
1753 /* generate bound check local freeing */
1754 o(0x5250); /* save returned value, if any */
1755 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1756 oad(0xb8, 0); /* mov xxx, %rax */
1757 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1758 gen_static_call(TOK___bound_local_delete
);
1759 o(0x585a); /* restore returned value, if any */
1762 o(0xc9); /* leave */
1763 if (func_ret_sub
== 0) {
1766 o(0xc2); /* ret n */
1768 g(func_ret_sub
>> 8);
1770 /* align local size to word & save local variables */
1771 v
= (-loc
+ 15) & -16;
1773 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1774 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1775 o(0xec8148); /* sub rsp, stacksize */
1782 /* generate a jump to a label */
1785 return psym(0xe9, t
);
1788 /* generate a jump to a fixed address */
1789 void gjmp_addr(int a
)
1797 oad(0xe9, a
- ind
- 5);
1801 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1802 int gtst(int inv
, int t
)
1806 v
= vtop
->r
& VT_VALMASK
;
1808 /* fast case : can jump directly since flags are set */
1809 if (vtop
->c
.i
& 0x100)
1811 /* This was a float compare. If the parity flag is set
1812 the result was unordered. For anything except != this
1813 means false and we don't jump (anding both conditions).
1814 For != this means true (oring both).
1815 Take care about inverting the test. We need to jump
1816 to our target if the result was unordered and test wasn't NE,
1817 otherwise if unordered we don't want to jump. */
1818 vtop
->c
.i
&= ~0x100;
1819 if (!inv
== (vtop
->c
.i
!= TOK_NE
))
1820 o(0x067a); /* jp +6 */
1824 t
= psym(0x8a, t
); /* jp t */
1828 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1829 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1830 /* && or || optimization */
1831 if ((v
& 1) == inv
) {
1832 /* insert vtop->c jump list in t */
1835 p
= (int *)(cur_text_section
->data
+ *p
);
1847 /* generate an integer binary operation */
1848 void gen_opi(int op
)
1853 ll
= is64_type(vtop
[-1].type
.t
);
1854 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1855 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1859 case TOK_ADDC1
: /* add with carry generation */
1862 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1869 /* XXX: generate inc and dec for smaller code ? */
1870 orex(ll
, r
, 0, 0x83);
1871 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1874 orex(ll
, r
, 0, 0x81);
1875 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1878 gv2(RC_INT
, RC_INT
);
1881 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1882 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1885 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1891 case TOK_SUBC1
: /* sub with carry generation */
1894 case TOK_ADDC2
: /* add with carry use */
1897 case TOK_SUBC2
: /* sub with carry use */
1910 gv2(RC_INT
, RC_INT
);
1913 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1914 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1926 opc
= 0xc0 | (opc
<< 3);
1932 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1933 o(opc
| REG_VALUE(r
));
1934 g(vtop
->c
.i
& (ll
? 63 : 31));
1936 /* we generate the shift in ecx */
1937 gv2(RC_INT
, RC_RCX
);
1939 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1940 o(opc
| REG_VALUE(r
));
1953 /* first operand must be in eax */
1954 /* XXX: need better constraint for second operand */
1955 gv2(RC_RAX
, RC_RCX
);
1960 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1961 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1962 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1963 if (op
== '%' || op
== TOK_UMOD
)
1975 void gen_opl(int op
)
1980 /* generate a floating point operation 'v = t1 op t2' instruction. The
1981 two operands are guaranted to have the same floating point type */
1982 /* XXX: need to use ST1 too */
1983 void gen_opf(int op
)
1985 int a
, ft
, fc
, swapped
, r
;
1987 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1989 /* convert constants to memory references */
1990 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1995 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1998 /* must put at least one value in the floating point register */
1999 if ((vtop
[-1].r
& VT_LVAL
) &&
2000 (vtop
[0].r
& VT_LVAL
)) {
2006 /* swap the stack if needed so that t1 is the register and t2 is
2007 the memory reference */
2008 if (vtop
[-1].r
& VT_LVAL
) {
2012 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
2013 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
2014 /* load on stack second operand */
2015 load(TREG_ST0
, vtop
);
2016 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
2017 if (op
== TOK_GE
|| op
== TOK_GT
)
2019 else if (op
== TOK_EQ
|| op
== TOK_NE
)
2022 o(0xc9d9); /* fxch %st(1) */
2023 if (op
== TOK_EQ
|| op
== TOK_NE
)
2024 o(0xe9da); /* fucompp */
2026 o(0xd9de); /* fcompp */
2027 o(0xe0df); /* fnstsw %ax */
2029 o(0x45e480); /* and $0x45, %ah */
2030 o(0x40fC80); /* cmp $0x40, %ah */
2031 } else if (op
== TOK_NE
) {
2032 o(0x45e480); /* and $0x45, %ah */
2033 o(0x40f480); /* xor $0x40, %ah */
2035 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
2036 o(0x05c4f6); /* test $0x05, %ah */
2039 o(0x45c4f6); /* test $0x45, %ah */
2046 /* no memory reference possible for long double operations */
2047 load(TREG_ST0
, vtop
);
2071 o(0xde); /* fxxxp %st, %st(1) */
2076 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
2077 /* if saved lvalue, then we must reload it */
2080 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
2082 r
= get_reg(RC_INT
);
2084 v1
.r
= VT_LOCAL
| VT_LVAL
;
2090 if (op
== TOK_EQ
|| op
== TOK_NE
) {
2093 if (op
== TOK_LE
|| op
== TOK_LT
)
2095 if (op
== TOK_LE
|| op
== TOK_GE
) {
2096 op
= 0x93; /* setae */
2098 op
= 0x97; /* seta */
2106 assert(!(vtop
[-1].r
& VT_LVAL
));
2108 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
2110 if (op
== TOK_EQ
|| op
== TOK_NE
)
2111 o(0x2e0f); /* ucomisd */
2113 o(0x2f0f); /* comisd */
2115 if (vtop
->r
& VT_LVAL
) {
2116 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2118 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2123 vtop
->c
.i
= op
| 0x100;
2125 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
2143 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
2146 /* if saved lvalue, then we must reload it */
2147 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
2149 r
= get_reg(RC_INT
);
2151 v1
.r
= VT_LOCAL
| VT_LVAL
;
2157 assert(!(vtop
[-1].r
& VT_LVAL
));
2159 assert(vtop
->r
& VT_LVAL
);
2164 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
2172 if (vtop
->r
& VT_LVAL
) {
2173 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2175 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2183 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2184 and 'long long' cases. */
2185 void gen_cvt_itof(int t
)
2187 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
2190 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2191 /* signed long long to float/double/long double (unsigned case
2192 is handled generically) */
2193 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2194 o(0x242cdf); /* fildll (%rsp) */
2195 o(0x08c48348); /* add $8, %rsp */
2196 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2197 (VT_INT
| VT_UNSIGNED
)) {
2198 /* unsigned int to float/double/long double */
2199 o(0x6a); /* push $0 */
2201 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2202 o(0x242cdf); /* fildll (%rsp) */
2203 o(0x10c48348); /* add $16, %rsp */
2205 /* int to float/double/long double */
2206 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2207 o(0x2404db); /* fildl (%rsp) */
2208 o(0x08c48348); /* add $8, %rsp */
2212 int r
= get_reg(RC_FLOAT
);
2214 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
2215 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2216 (VT_INT
| VT_UNSIGNED
) ||
2217 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2221 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
2226 /* convert from one floating point type to another */
2227 void gen_cvt_ftof(int t
)
2235 if (bt
== VT_FLOAT
) {
2237 if (tbt
== VT_DOUBLE
) {
2238 o(0x140f); /* unpcklps */
2239 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2240 o(0x5a0f); /* cvtps2pd */
2241 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2242 } else if (tbt
== VT_LDOUBLE
) {
2244 /* movss %xmm0,-0x10(%rsp) */
2246 o(0x44 + REG_VALUE(vtop
->r
)*8);
2248 o(0xf02444d9); /* flds -0x10(%rsp) */
2251 } else if (bt
== VT_DOUBLE
) {
2253 if (tbt
== VT_FLOAT
) {
2254 o(0x140f66); /* unpcklpd */
2255 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2256 o(0x5a0f66); /* cvtpd2ps */
2257 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2258 } else if (tbt
== VT_LDOUBLE
) {
2260 /* movsd %xmm0,-0x10(%rsp) */
2262 o(0x44 + REG_VALUE(vtop
->r
)*8);
2264 o(0xf02444dd); /* fldl -0x10(%rsp) */
2270 r
= get_reg(RC_FLOAT
);
2271 if (tbt
== VT_DOUBLE
) {
2272 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2273 /* movsd -0x10(%rsp),%xmm0 */
2275 o(0x44 + REG_VALUE(r
)*8);
2278 } else if (tbt
== VT_FLOAT
) {
2279 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2280 /* movss -0x10(%rsp),%xmm0 */
2282 o(0x44 + REG_VALUE(r
)*8);
2289 /* convert fp to int 't' type */
2290 void gen_cvt_ftoi(int t
)
2292 int ft
, bt
, size
, r
;
2295 if (bt
== VT_LDOUBLE
) {
2296 gen_cvt_ftof(VT_DOUBLE
);
2306 r
= get_reg(RC_INT
);
2307 if (bt
== VT_FLOAT
) {
2309 } else if (bt
== VT_DOUBLE
) {
2314 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2315 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2319 /* computed goto support */
2326 /* Save the stack pointer onto the stack and return the location of its address */
2327 ST_FUNC
void gen_vla_sp_save(int addr
) {
2328 /* mov %rsp,addr(%rbp)*/
2329 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2332 /* Restore the SP from a location on the stack */
2333 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2334 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2337 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2338 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2339 #ifdef TCC_TARGET_PE
2340 /* alloca does more than just adjust %rsp on Windows */
2341 vpush_global_sym(&func_old_type
, TOK_alloca
);
2342 vswap(); /* Move alloca ref past allocation size */
2344 vset(type
, REG_IRET
, 0);
2346 int r
= gv(RC_INT
); /* allocation size */
2349 o(0xe0 | REG_VALUE(r
));
2350 /* We align to 16 bytes rather than align */
2358 /* end of x86-64 code generator */
2359 /*************************************************************/
2360 #endif /* ! TARGET_DEFS_ONLY */
2361 /******************************************************/