2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
28 #define REG_ARGS_MAX 2 /* at most 2 registers used for each argument */
33 /* This struct stores the struct offsets at which %rax, %rdx, %xmm0, and
34 * %xmm1 are to be stored.
36 * struct { long long l; double x; }: ireg = { 0, -1 } freg = { 8, -1 }
37 * struct { double x; long long l; }: ireg = { 8, -1 } freg = { 0, -1 }
38 * struct { long long l; long long l2; }: ireg = { 0, 8 } freg = { -1, -1 }
39 * struct { double x; double x2; }: ireg = { -1, -1 } freg = { 0, 8 }
42 int ireg
[REG_ARGS_MAX
];
43 int freg
[REG_ARGS_MAX
];
47 /* a register can belong to several classes. The classes must be
48 sorted from more general to more precise (see gv2() code which does
49 assumptions on it). */
50 #define RC_INT 0x0001 /* generic integer register */
51 #define RC_FLOAT 0x0002 /* generic float register */
55 #define RC_ST0 0x0080 /* only for long double */
60 #define RC_XMM0 0x1000
61 #define RC_XMM1 0x2000
62 #define RC_XMM2 0x4000
63 #define RC_XMM3 0x8000
64 #define RC_XMM4 0x10000
65 #define RC_XMM5 0x20000
66 #define RC_XMM6 0x40000
67 #define RC_XMM7 0x80000
68 #define RC_IRET RC_RAX /* function return: integer register */
69 #define RC_LRET RC_RDX /* function return: second integer register */
70 #define RC_FRET RC_XMM0 /* function return: float register */
71 #define RC_QRET RC_XMM1 /* function return: second float register */
73 /* pretty names for the registers */
101 #define REX_BASE(reg) (((reg) >> 3) & 1)
102 #define REG_VALUE(reg) ((reg) & 7)
104 /* return registers for function */
105 #define REG_IRET TREG_RAX /* single word int return register */
106 #define REG_LRET TREG_RDX /* second word return register (for long long) */
107 #define REG_FRET TREG_XMM0 /* float return register */
108 #define REG_QRET TREG_XMM1 /* second float return register */
110 /* defined if function parameters must be evaluated in reverse order */
111 #define INVERT_FUNC_PARAMS
113 /* pointer size, in bytes */
116 /* long double size and alignment, in bytes */
117 #define LDOUBLE_SIZE 16
118 #define LDOUBLE_ALIGN 16
119 /* maximum alignment (for aligned attribute support) */
122 /******************************************************/
125 #define EM_TCC_TARGET EM_X86_64
127 /* relocation type for 32 bit data relocation */
128 #define R_DATA_32 R_X86_64_32
129 #define R_DATA_PTR R_X86_64_64
130 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
131 #define R_COPY R_X86_64_COPY
133 #define ELF_START_ADDR 0x400000
134 #define ELF_PAGE_SIZE 0x200000
136 /******************************************************/
137 #else /* ! TARGET_DEFS_ONLY */
138 /******************************************************/
142 ST_DATA
const int reg_classes
[NB_REGS
] = {
143 /* eax */ RC_INT
| RC_RAX
,
144 /* ecx */ RC_INT
| RC_RCX
,
145 /* edx */ RC_INT
| RC_RDX
,
159 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
160 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
161 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
162 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
163 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
164 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
165 /* xmm6 an xmm7 are included so gv() can be used on them,
166 but they are not tagged with RC_FLOAT because they are
167 callee saved on Windows */
173 static unsigned long func_sub_sp_offset
;
174 static int func_ret_sub
;
176 /* XXX: make it faster ? */
181 if (ind1
> cur_text_section
->data_allocated
)
182 section_realloc(cur_text_section
, ind1
);
183 cur_text_section
->data
[ind
] = c
;
187 void o(unsigned int c
)
209 void gen_le64(int64_t c
)
221 void orex(int ll
, int r
, int r2
, int b
)
223 if ((r
& VT_VALMASK
) >= VT_CONST
)
225 if ((r2
& VT_VALMASK
) >= VT_CONST
)
227 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
228 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
232 /* output a symbol and patch all calls to it */
233 void gsym_addr(int t
, int a
)
237 ptr
= (int *)(cur_text_section
->data
+ t
);
238 n
= *ptr
; /* next value */
249 /* psym is used to put an instruction with a data field which is a
250 reference to a symbol. It is in fact the same as oad ! */
253 static int is64_type(int t
)
255 return ((t
& VT_BTYPE
) == VT_PTR
||
256 (t
& VT_BTYPE
) == VT_FUNC
||
257 (t
& VT_BTYPE
) == VT_LLONG
);
260 /* instruction + 4 bytes data. Return the address of the data */
261 ST_FUNC
int oad(int c
, int s
)
267 if (ind1
> cur_text_section
->data_allocated
)
268 section_realloc(cur_text_section
, ind1
);
269 *(int *)(cur_text_section
->data
+ ind
) = s
;
275 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
278 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
282 /* output constant with relocation if 'r & VT_SYM' is true */
283 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
286 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
290 /* output constant with relocation if 'r & VT_SYM' is true */
291 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
294 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
298 /* output got address with relocation */
299 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
301 #ifndef TCC_TARGET_PE
304 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
305 sr
= cur_text_section
->reloc
;
306 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
309 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
310 get_tok_str(sym
->v
, NULL
), c
, r
,
311 cur_text_section
->data
[ind
-3],
312 cur_text_section
->data
[ind
-2],
313 cur_text_section
->data
[ind
-1]
315 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
319 /* we use add c, %xxx for displacement */
321 o(0xc0 + REG_VALUE(r
));
326 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
328 op_reg
= REG_VALUE(op_reg
) << 3;
329 if ((r
& VT_VALMASK
) == VT_CONST
) {
330 /* constant memory reference */
333 gen_gotpcrel(r
, sym
, c
);
335 gen_addrpc32(r
, sym
, c
);
337 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
338 /* currently, we use only ebp as base */
340 /* short reference */
344 oad(0x85 | op_reg
, c
);
346 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
348 g(0x80 | op_reg
| REG_VALUE(r
));
351 g(0x00 | op_reg
| REG_VALUE(r
));
354 g(0x00 | op_reg
| REG_VALUE(r
));
358 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
360 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
362 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
365 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
367 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
370 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
371 orex(1, r
, op_reg
, opcode
);
372 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
376 /* load 'r' from value 'sv' */
377 void load(int r
, SValue
*sv
)
379 int v
, t
, ft
, fc
, fr
;
384 sv
= pe_getimport(sv
, &v2
);
388 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
391 #ifndef TCC_TARGET_PE
392 /* we use indirect access via got */
393 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
394 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
395 /* use the result register as a temporal register */
396 int tr
= r
| TREG_MEM
;
398 /* we cannot use float registers as a temporal register */
399 tr
= get_reg(RC_INT
) | TREG_MEM
;
401 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
403 /* load from the temporal register */
411 if (v
== VT_LLOCAL
) {
413 v1
.r
= VT_LOCAL
| VT_LVAL
;
416 if (!(reg_classes
[fr
] & (RC_INT
|RC_R11
)))
417 fr
= get_reg(RC_INT
);
421 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
423 r
= REG_VALUE(r
); /* movd */
424 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
425 b
= 0x7e0ff3; /* movq */
427 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
428 b
= 0xdb, r
= 5; /* fldt */
429 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
430 b
= 0xbe0f; /* movsbl */
431 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
432 b
= 0xb60f; /* movzbl */
433 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
434 b
= 0xbf0f; /* movswl */
435 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
436 b
= 0xb70f; /* movzwl */
438 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
439 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
440 || ((ft
& VT_BTYPE
) == VT_FUNC
));
445 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
448 gen_modrm(r
, fr
, sv
->sym
, fc
);
455 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
456 gen_addrpc32(fr
, sv
->sym
, fc
);
458 if (sv
->sym
->type
.t
& VT_STATIC
) {
460 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
461 gen_addrpc32(fr
, sv
->sym
, fc
);
464 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
465 gen_gotpcrel(r
, sv
->sym
, fc
);
468 } else if (is64_type(ft
)) {
469 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
472 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
475 } else if (v
== VT_LOCAL
) {
476 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
477 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
478 } else if (v
== VT_CMP
) {
480 if ((fc
& ~0x100) != TOK_NE
)
481 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
483 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
485 /* This was a float compare. If the parity bit is
486 * set the result was unordered, meaning false for everything
487 * except TOK_NE, and true for TOK_NE. */
489 o(0x037a + (REX_BASE(r
) << 8));
491 orex(0,r
,0, 0x0f); /* setxx %br */
493 o(0xc0 + REG_VALUE(r
));
494 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
497 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
498 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
501 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
503 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
505 /* gen_cvt_ftof(VT_DOUBLE); */
506 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
507 /* movsd -0x10(%rsp),%xmmN */
509 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
512 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
513 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
516 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
519 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
521 } else if (r
== TREG_ST0
) {
522 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
523 /* gen_cvt_ftof(VT_LDOUBLE); */
524 /* movsd %xmmN,-0x10(%rsp) */
526 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
528 o(0xf02444dd); /* fldl -0x10(%rsp) */
531 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
537 /* store register 'r' in lvalue 'v' */
538 void store(int r
, SValue
*v
)
542 /* store the REX prefix in this variable when PIC is enabled */
547 v
= pe_getimport(v
, &v2
);
552 fr
= v
->r
& VT_VALMASK
;
555 #ifndef TCC_TARGET_PE
556 /* we need to access the variable via got */
557 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
558 /* mov xx(%rip), %r11 */
560 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
561 pic
= is64_type(bt
) ? 0x49 : 0x41;
565 /* XXX: incorrect if float reg to reg */
566 if (bt
== VT_FLOAT
) {
569 o(0x7e0f); /* movd */
571 } else if (bt
== VT_DOUBLE
) {
574 o(0xd60f); /* movq */
576 } else if (bt
== VT_LDOUBLE
) {
577 o(0xc0d9); /* fld %st(0) */
585 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
587 else if (is64_type(bt
))
593 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
598 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
599 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
600 } else if (fr
!= r
) {
601 /* XXX: don't we really come here? */
603 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
606 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
607 gen_modrm(r
, v
->r
, v
->sym
, fc
);
608 } else if (fr
!= r
) {
609 /* XXX: don't we really come here? */
611 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
616 /* 'is_jmp' is '1' if it is a jump */
617 static void gcall_or_jmp(int is_jmp
)
620 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
&&
621 ((vtop
->r
& VT_SYM
) || (vtop
->c
.ll
-4) == (int)(vtop
->c
.ll
-4))) {
623 if (vtop
->r
& VT_SYM
) {
624 /* relocation case */
626 greloc(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PC32
);
628 greloc(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PLT32
);
631 /* put an empty PC32 relocation */
632 put_elf_reloc(symtab_section
, cur_text_section
,
633 ind
+ 1, R_X86_64_PC32
, 0);
635 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
637 /* otherwise, indirect call */
641 o(0xff); /* call/jmp *r */
642 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
646 #if defined(CONFIG_TCC_BCHECK)
647 #ifndef TCC_TARGET_PE
648 static addr_t func_bound_offset
;
649 static unsigned long func_bound_ind
;
652 static void gen_static_call(int v
)
654 Sym
*sym
= external_global_sym(v
, &func_old_type
, 0);
656 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
659 /* generate a bounded pointer addition */
660 ST_FUNC
void gen_bounded_ptr_add(void)
662 /* save all temporary registers */
665 /* prepare fast x86_64 function call */
667 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
671 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
674 /* do a fast function call */
675 gen_static_call(TOK___bound_ptr_add
);
677 /* returned pointer is in rax */
679 vtop
->r
= TREG_RAX
| VT_BOUNDED
;
682 /* relocation offset of the bounding function call point */
683 vtop
->c
.ull
= (cur_text_section
->reloc
->data_offset
- sizeof(ElfW(Rela
)));
686 /* patch pointer addition in vtop so that pointer dereferencing is
688 ST_FUNC
void gen_bounded_ptr_deref(void)
696 /* XXX: put that code in generic part of tcc */
697 if (!is_float(vtop
->type
.t
)) {
698 if (vtop
->r
& VT_LVAL_BYTE
)
700 else if (vtop
->r
& VT_LVAL_SHORT
)
704 size
= type_size(&vtop
->type
, &align
);
706 case 1: func
= TOK___bound_ptr_indir1
; break;
707 case 2: func
= TOK___bound_ptr_indir2
; break;
708 case 4: func
= TOK___bound_ptr_indir4
; break;
709 case 8: func
= TOK___bound_ptr_indir8
; break;
710 case 12: func
= TOK___bound_ptr_indir12
; break;
711 case 16: func
= TOK___bound_ptr_indir16
; break;
713 tcc_error("unhandled size when dereferencing bounded pointer");
718 sym
= external_global_sym(func
, &func_old_type
, 0);
720 put_extern_sym(sym
, NULL
, 0, 0);
722 /* patch relocation */
723 /* XXX: find a better solution ? */
725 rel
= (ElfW(Rela
) *)(cur_text_section
->reloc
->data
+ vtop
->c
.ull
);
726 rel
->r_info
= ELF64_R_INFO(sym
->c
, ELF64_R_TYPE(rel
->r_info
));
733 static const uint8_t arg_regs
[REGN
] = {
734 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
737 /* Prepare arguments in R10 and R11 rather than RCX and RDX
738 because gv() will not ever use these */
739 static int arg_prepare_reg(int idx
) {
740 if (idx
== 0 || idx
== 1)
741 /* idx=0: r10, idx=1: r11 */
744 return arg_regs
[idx
];
747 static int func_scratch
;
749 /* Generate function call. The function address is pushed first, then
750 all the parameters in call order. This functions pops all the
751 parameters and the function address. */
753 void gen_offs_sp(int b
, int r
, int d
)
755 orex(1,0,r
& 0x100 ? 0 : r
, b
);
757 o(0x2444 | (REG_VALUE(r
) << 3));
760 o(0x2484 | (REG_VALUE(r
) << 3));
765 ST_FUNC
int regargs_nregs(RegArgs
*args
)
770 /* Return the number of registers needed to return the struct, or 0 if
771 returning via struct pointer. */
772 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
, RegArgs
*args
)
776 *ret_align
= 1; // Never have to re-align return values for x86-64
777 size
= type_size(vt
, &align
);
781 } else if (size
> 4) {
784 } else if (size
> 2) {
787 } else if (size
> 1) {
798 static int is_sse_float(int t
) {
801 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
804 int gfunc_arg_size(CType
*type
) {
806 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
808 return type_size(type
, &align
);
811 void gfunc_call(int nb_args
)
813 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
816 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
819 /* for struct arguments, we need to call memcpy and the function
820 call breaks register passing arguments we are preparing.
821 So, we process arguments which will be passed by stack first. */
822 struct_size
= args_size
;
823 for(i
= 0; i
< nb_args
; i
++) {
828 bt
= (sv
->type
.t
& VT_BTYPE
);
829 size
= gfunc_arg_size(&sv
->type
);
832 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
834 if (bt
== VT_STRUCT
) {
835 /* align to stack align size */
836 size
= (size
+ 15) & ~15;
837 /* generate structure store */
839 gen_offs_sp(0x8d, r
, struct_size
);
842 /* generate memcpy call */
843 vset(&sv
->type
, r
| VT_LVAL
, 0);
847 } else if (bt
== VT_LDOUBLE
) {
849 gen_offs_sp(0xdb, 0x107, struct_size
);
854 if (func_scratch
< struct_size
)
855 func_scratch
= struct_size
;
858 struct_size
= args_size
;
860 for(i
= 0; i
< nb_args
; i
++) {
862 bt
= (vtop
->type
.t
& VT_BTYPE
);
864 size
= gfunc_arg_size(&vtop
->type
);
866 /* align to stack align size */
867 size
= (size
+ 15) & ~15;
870 gen_offs_sp(0x8d, d
, struct_size
);
871 gen_offs_sp(0x89, d
, arg
*8);
873 d
= arg_prepare_reg(arg
);
874 gen_offs_sp(0x8d, d
, struct_size
);
878 if (is_sse_float(vtop
->type
.t
)) {
879 gv(RC_XMM0
); /* only use one float register */
881 /* movq %xmm0, j*8(%rsp) */
882 gen_offs_sp(0xd60f66, 0x100, arg
*8);
884 /* movaps %xmm0, %xmmN */
886 o(0xc0 + (arg
<< 3));
887 d
= arg_prepare_reg(arg
);
888 /* mov %xmm0, %rxx */
891 o(0xc0 + REG_VALUE(d
));
894 if (bt
== VT_STRUCT
) {
895 vtop
->type
.ref
= NULL
;
896 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
897 : size
> 1 ? VT_SHORT
: VT_BYTE
;
902 gen_offs_sp(0x89, r
, arg
*8);
904 d
= arg_prepare_reg(arg
);
905 orex(1,d
,r
,0x89); /* mov */
906 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
914 /* Copy R10 and R11 into RCX and RDX, respectively */
916 o(0xd1894c); /* mov %r10, %rcx */
918 o(0xda894c); /* mov %r11, %rdx */
927 #define FUNC_PROLOG_SIZE 11
929 /* generate function prolog of type 't' */
930 void gfunc_prolog(CType
*func_type
)
932 int addr
, reg_param_index
, bt
, size
;
941 ind
+= FUNC_PROLOG_SIZE
;
942 func_sub_sp_offset
= ind
;
945 sym
= func_type
->ref
;
947 /* if the function returns a structure, then add an
948 implicit pointer parameter */
950 func_var
= (sym
->c
== FUNC_ELLIPSIS
);
951 size
= gfunc_arg_size(&func_vt
);
953 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
959 /* define parameters */
960 while ((sym
= sym
->next
) != NULL
) {
962 bt
= type
->t
& VT_BTYPE
;
963 size
= gfunc_arg_size(type
);
965 if (reg_param_index
< REGN
) {
966 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
968 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
970 if (reg_param_index
< REGN
) {
971 /* save arguments passed by register */
972 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
973 o(0xd60f66); /* movq */
974 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
976 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
979 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
985 while (reg_param_index
< REGN
) {
986 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
987 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
994 /* generate function epilog */
995 void gfunc_epilog(void)
1000 if (func_ret_sub
== 0) {
1003 o(0xc2); /* ret n */
1005 g(func_ret_sub
>> 8);
1009 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1010 /* align local size to word & save local variables */
1011 v
= (func_scratch
+ -loc
+ 15) & -16;
1014 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
1015 oad(0xb8, v
); /* mov stacksize, %eax */
1016 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
1017 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
1018 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1020 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1021 o(0xec8148); /* sub rsp, stacksize */
1025 cur_text_section
->data_offset
= saved_ind
;
1026 pe_add_unwind_data(ind
, saved_ind
, v
);
1027 ind
= cur_text_section
->data_offset
;
1032 static void gadd_sp(int val
)
1034 if (val
== (char)val
) {
1038 oad(0xc48148, val
); /* add $xxx, %rsp */
1042 typedef enum X86_64_Mode
{
1045 x86_64_mode_integer
,
1050 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
1054 else if (a
== x86_64_mode_none
)
1056 else if (b
== x86_64_mode_none
)
1058 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
1059 return x86_64_mode_memory
;
1060 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
1061 return x86_64_mode_integer
;
1062 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
1063 return x86_64_mode_memory
;
1065 return x86_64_mode_sse
;
1068 /* classify the x86 eightbytes from byte index start to byte index
1069 * end, at offset offset from the root struct */
1070 static X86_64_Mode
classify_x86_64_inner(CType
*ty
, int offset
, int start
, int end
)
1075 switch (ty
->t
& VT_BTYPE
) {
1076 case VT_VOID
: return x86_64_mode_none
;
1085 case VT_ENUM
: return x86_64_mode_integer
;
1088 case VT_DOUBLE
: return x86_64_mode_sse
;
1090 case VT_LDOUBLE
: return x86_64_mode_x87
;
1095 mode
= x86_64_mode_none
;
1096 while ((f
= f
->next
) != NULL
) {
1097 if (f
->c
+ offset
>= start
&& f
->c
+ offset
< end
)
1098 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
, f
->c
+ offset
, start
, end
));
1107 static X86_64_Mode
classify_x86_64_arg_eightbyte(CType
*ty
, int offset
)
1111 assert((ty
->t
& VT_BTYPE
) == VT_STRUCT
);
1113 mode
= classify_x86_64_inner(ty
, 0, offset
, offset
+ 8);
1118 static void regargs_init(RegArgs
*args
)
1121 for(i
=0; i
<REG_ARGS_MAX
; i
++) {
1127 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, RegArgs
*args
)
1129 X86_64_Mode mode
= x86_64_mode_none
;
1130 int size
, align
, ret_t
= 0;
1131 int ireg
= 0, freg
= 0;
1136 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1140 args
->ireg
[ireg
++] = 0;
1142 mode
= x86_64_mode_integer
;
1144 size
= type_size(ty
, &align
);
1145 *psize
= (size
+ 7) & ~7;
1146 *palign
= (align
+ 7) & ~7;
1149 mode
= x86_64_mode_memory
;
1153 for(start
=0; start
< size
; start
+= 8) {
1154 if ((ty
->t
& VT_BTYPE
) == VT_STRUCT
) {
1155 mode
= classify_x86_64_arg_eightbyte(ty
, start
);
1157 mode
= classify_x86_64_inner(ty
, 0, 0, size
);
1160 if (mode
== x86_64_mode_integer
) {
1162 args
->ireg
[ireg
++] = start
;
1163 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1164 } else if (mode
== x86_64_mode_sse
) {
1166 args
->freg
[freg
++] = start
;
1167 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1183 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1185 /* This definition must be synced with stdarg.h */
1186 enum __va_arg_type
{
1187 __va_gen_reg
, __va_float_reg
, __va_stack
1190 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, NULL
);
1192 default: return __va_stack
;
1193 case x86_64_mode_integer
: return __va_gen_reg
;
1194 case x86_64_mode_sse
: return __va_float_reg
;
1198 static int regargs_iregs(RegArgs
*args
)
1202 for(i
=0; i
<REG_ARGS_MAX
; i
++) {
1203 if(args
->ireg
[i
] != -1)
1210 static int regargs_fregs(RegArgs
*args
)
1214 for(i
=0; i
<REG_ARGS_MAX
; i
++) {
1215 if(args
->freg
[i
] != -1)
1222 /* Count the total number of registers used by args */
1223 ST_FUNC
int regargs_nregs(RegArgs
*args
)
1227 for(i
=0; i
<REG_ARGS_MAX
; i
++) {
1228 if(args
->ireg
[i
] != -1)
1231 if(args
->freg
[i
] != -1)
1238 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
, RegArgs
*args
)
1242 *ret_align
= 1; // Never have to re-align return values for x86-64
1245 mode
= classify_x86_64_arg(vt
, ret
, &size
, &align
, args
);
1247 return mode
!= x86_64_mode_memory
&&
1248 mode
!= x86_64_mode_none
;
1252 static const uint8_t arg_regs
[REGN
] = {
1253 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1256 static int arg_prepare_reg(int idx
) {
1257 if (idx
== 2 || idx
== 3)
1258 /* idx=2: r10, idx=3: r11 */
1261 return arg_regs
[idx
];
1264 /* Generate function call. The function address is pushed first, then
1265 all the parameters in call order. This functions pops all the
1266 parameters and the function address. */
1267 void gfunc_call(int nb_args
)
1271 int size
, align
, r
, args_size
, stack_adjust
, run_start
, run_end
, i
;
1272 int nb_reg_args
= 0;
1273 int nb_sse_args
= 0;
1274 int sse_reg
= 0, gen_reg
= 0;
1275 RegArgs
*reg_args
= alloca(nb_args
* sizeof *reg_args
);
1277 /* calculate the number of integer/float register arguments */
1278 for(i
= nb_args
- 1; i
>= 0; i
--) {
1280 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_args
[i
]);
1281 fregs
= regargs_fregs(®_args
[i
]);
1282 iregs
= regargs_iregs(®_args
[i
]);
1284 nb_sse_args
+= fregs
;
1285 nb_reg_args
+= iregs
;
1287 if (sse_reg
+ fregs
> 8 || gen_reg
+ iregs
> REGN
) {
1288 regargs_init(®_args
[i
]);
1295 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1296 and ended by a 16-byte aligned argument. This is because, from the point of view of
1297 the callee, argument alignment is computed from the bottom up. */
1298 /* for struct arguments, we need to call memcpy and the function
1299 call breaks register passing arguments we are preparing.
1300 So, we process arguments which will be passed by stack first. */
1301 gen_reg
= nb_reg_args
;
1302 sse_reg
= nb_sse_args
;
1305 while (run_start
!= nb_args
) {
1306 int run_gen_reg
= gen_reg
, run_sse_reg
= sse_reg
;
1310 for(i
= run_start
; (i
< nb_args
) && (run_end
== nb_args
); i
++) {
1311 int stack
= regargs_nregs(®_args
[i
]) == 0;
1312 classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, NULL
);
1318 stack_adjust
+= size
;
1322 gen_reg
= run_gen_reg
;
1323 sse_reg
= run_sse_reg
;
1325 /* adjust stack to align SSE boundary */
1326 if (stack_adjust
&= 15) {
1327 /* fetch cpu flag before the following sub will change the value */
1328 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1331 stack_adjust
= 16 - stack_adjust
;
1333 oad(0xec81, stack_adjust
); /* sub $xxx, %rsp */
1334 args_size
+= stack_adjust
;
1337 for(i
= run_start
; i
< run_end
;) {
1338 int arg_stored
= regargs_nregs(®_args
[i
]) == 0;
1347 /* Swap argument to top, it will possibly be changed here,
1348 and might use more temps. At the end of the loop we keep
1349 in on the stack and swap it back to its original position
1350 if it is a register. */
1355 classify_x86_64_arg(&vtop
->type
, NULL
, &size
, &align
, &args
);
1357 switch (vtop
->type
.t
& VT_BTYPE
) {
1359 /* allocate the necessary size on stack */
1361 oad(0xec81, size
); /* sub $xxx, %rsp */
1362 /* generate structure store */
1363 r
= get_reg(RC_INT
);
1364 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1365 o(0xe0 + REG_VALUE(r
));
1366 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1379 o(0x50); /* push $rax */
1380 /* movq %xmmN, (%rsp) */
1382 o(0x04 + REG_VALUE(r
)*8);
1389 /* XXX: implicit cast ? */
1392 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1397 /* And swap the argument back to its original position. */
1403 assert((vtop
->type
.t
== tmp
.type
.t
) && (vtop
->r
== tmp
.r
));
1405 memmove(reg_args
+ i
, reg_args
+ i
+ 1, (nb_args
- i
- 1) * sizeof *reg_args
);
1410 /* handle 16 byte aligned arguments at end of run */
1411 run_start
= i
= run_end
;
1412 while (i
< nb_args
) {
1413 /* Rotate argument to top since it will always be popped */
1414 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, NULL
);
1420 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1422 oad(0xec8148, size
); /* sub $xxx, %rsp */
1423 o(0x7cdb); /* fstpt 0(%rsp) */
1428 assert(mode
== x86_64_mode_memory
);
1430 /* allocate the necessary size on stack */
1432 oad(0xec81, size
); /* sub $xxx, %rsp */
1433 /* generate structure store */
1434 r
= get_reg(RC_INT
);
1435 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1436 o(0xe0 + REG_VALUE(r
));
1437 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1444 memmove(reg_args
+ i
, reg_args
+ i
+ 1, (nb_args
- i
- 1) * sizeof *reg_args
);
1449 /* XXX This should be superfluous. */
1450 save_regs(0); /* save used temporary registers */
1452 /* recalculate the number of register arguments there actually
1453 * are. This is slow but more obviously correct than using the
1457 for(i
= 0; i
< nb_args
; i
++) {
1458 gen_reg
+= regargs_iregs(®_args
[i
]);
1459 sse_reg
+= regargs_fregs(®_args
[i
]);
1462 /* then, we prepare register passing arguments.
1463 Note that we cannot set RDX and RCX in this loop because gv()
1464 may break these temporary registers. Let's use R10 and R11
1466 assert(gen_reg
<= REGN
);
1467 assert(sse_reg
<= 8);
1468 for(i
= 0; i
< nb_args
; i
++) {
1473 /* Alter stack entry type so that gv() knows how to treat it */
1474 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
1477 for(k
=REG_ARGS_MAX
-1; k
>=0; k
--) {
1478 if (args
.freg
[k
] == -1)
1482 assert(sse_reg
>= 0);
1485 vtop
->type
.t
= VT_DOUBLE
;
1486 vtop
->c
.ull
+= args
.freg
[k
];
1487 gv(RC_XMM0
<< sse_reg
);
1490 for(k
=REG_ARGS_MAX
-1; k
>=0; k
--) {
1492 if (args
.ireg
[k
] == -1)
1498 vtop
->type
.t
= VT_LLONG
;
1499 vtop
->c
.ull
+= args
.ireg
[k
];
1501 d
= arg_prepare_reg(gen_reg
);
1502 orex(1,d
,r
,0x89); /* mov */
1503 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1507 /* XXX is it really necessary to set vtop->type? */
1508 classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, NULL
);
1510 if (args
.freg
[0] != -1) {
1512 /* Load directly to register */
1513 gv(RC_XMM0
<< sse_reg
);
1514 } else if (args
.ireg
[0] != -1) {
1517 /* XXX: implicit cast ? */
1520 d
= arg_prepare_reg(gen_reg
);
1521 orex(1,d
,r
,0x89); /* mov */
1522 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1529 assert(gen_reg
== 0);
1530 assert(sse_reg
== 0);
1532 /* We shouldn't have many operands on the stack anymore, but the
1533 call address itself is still there, and it might be in %eax
1534 (or edx/ecx) currently, which the below writes would clobber.
1535 So evict all remaining operands here. */
1538 /* Copy R10 and R11 into RDX and RCX, respectively */
1539 if (nb_reg_args
> 2) {
1540 o(0xd2894c); /* mov %r10, %rdx */
1541 if (nb_reg_args
> 3) {
1542 o(0xd9894c); /* mov %r11, %rcx */
1546 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1554 #define FUNC_PROLOG_SIZE 11
1556 static void push_arg_reg(int i
) {
1558 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1561 /* generate function prolog of type 't' */
1562 void gfunc_prolog(CType
*func_type
)
1565 int i
, addr
, align
, size
;
1566 int param_addr
= 0, reg_param_index
, sse_param_index
;
1570 sym
= func_type
->ref
;
1571 addr
= PTR_SIZE
* 2;
1573 ind
+= FUNC_PROLOG_SIZE
;
1574 func_sub_sp_offset
= ind
;
1577 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1578 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1579 seen_reg_num
= seen_sse_num
= 0;
1580 /* frame pointer and return address */
1581 seen_stack_size
= PTR_SIZE
* 2;
1582 /* count the number of seen parameters */
1583 sym
= func_type
->ref
;
1584 while ((sym
= sym
->next
) != NULL
) {
1588 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, &args
);
1593 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1596 case x86_64_mode_integer
:
1597 case x86_64_mode_sse
: {
1600 seen_sse_num
+= regargs_fregs(&args
);
1601 seen_reg_num
+= regargs_iregs(&args
);
1603 if (seen_reg_num
> 8) {
1607 if (seen_sse_num
> 8) {
1620 /* movl $0x????????, -0x10(%rbp) */
1622 gen_le32(seen_reg_num
* 8);
1623 /* movl $0x????????, -0xc(%rbp) */
1625 gen_le32(seen_sse_num
* 16 + 48);
1626 /* movl $0x????????, -0x8(%rbp) */
1628 gen_le32(seen_stack_size
);
1630 /* save all register passing arguments */
1631 for (i
= 0; i
< 8; i
++) {
1633 o(0xd60f66); /* movq */
1634 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1635 /* movq $0, loc+8(%rbp) */
1640 for (i
= 0; i
< REGN
; i
++) {
1641 push_arg_reg(REGN
-1-i
);
1645 sym
= func_type
->ref
;
1646 reg_param_index
= 0;
1647 sse_param_index
= 0;
1649 /* if the function returns a structure, then add an
1650 implicit pointer parameter */
1651 func_vt
= sym
->type
;
1652 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, NULL
);
1653 if (mode
== x86_64_mode_memory
) {
1654 push_arg_reg(reg_param_index
);
1658 /* define parameters */
1659 while ((sym
= sym
->next
) != NULL
) {
1661 int reg_count_integer
= 0;
1662 int reg_count_sse
= 0;
1666 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, &args
);
1667 reg_count_integer
= regargs_iregs(&args
);
1668 reg_count_sse
= regargs_fregs(&args
);
1671 case x86_64_mode_integer
:
1672 case x86_64_mode_sse
:
1673 if (reg_count_integer
|| reg_count_sse
) {
1674 if ((reg_count_sse
== 0 || sse_param_index
+ reg_count_sse
<= 8) &&
1675 (reg_count_integer
== 0 || reg_param_index
+ reg_count_integer
<= REGN
)) {
1676 /* argument fits into registers */
1682 /* save arguments passed by register */
1683 loc
-= (reg_count_sse
+ reg_count_integer
) * 8;
1685 for (i
= 0; i
< reg_count_sse
; ++i
) {
1686 o(0xd60f66); /* movq */
1687 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ args
.freg
[i
]);
1690 for (i
= 0; i
< reg_count_integer
; ++i
) {
1691 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ args
.ireg
[i
]);
1695 addr
= (addr
+ align
- 1) & -align
;
1701 case x86_64_mode_memory
:
1702 case x86_64_mode_x87
:
1703 addr
= (addr
+ align
- 1) & -align
;
1707 default: break; /* nothing to be done for x86_64_mode_none */
1709 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1710 VT_LOCAL
| VT_LVAL
, param_addr
);
1713 #ifdef CONFIG_TCC_BCHECK
1714 /* leave some room for bound checking code */
1715 if (tcc_state
->do_bounds_check
) {
1716 func_bound_offset
= lbounds_section
->data_offset
;
1717 func_bound_ind
= ind
;
1718 oad(0xb8, 0); /* lbound section pointer */
1719 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1720 oad(0xb8, 0); /* call to function */
1725 /* generate function epilog */
1726 void gfunc_epilog(void)
1730 #ifdef CONFIG_TCC_BCHECK
1731 if (tcc_state
->do_bounds_check
1732 && func_bound_offset
!= lbounds_section
->data_offset
)
1738 /* add end of table info */
1739 bounds_ptr
= section_ptr_add(lbounds_section
, sizeof(addr_t
));
1742 /* generate bound local allocation */
1743 sym_data
= get_sym_ref(&char_pointer_type
, lbounds_section
,
1744 func_bound_offset
, lbounds_section
->data_offset
);
1746 ind
= func_bound_ind
;
1747 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1749 gen_static_call(TOK___bound_local_new
);
1752 /* generate bound check local freeing */
1753 o(0x5250); /* save returned value, if any */
1754 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1755 oad(0xb8, 0); /* mov xxx, %rax */
1756 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1757 gen_static_call(TOK___bound_local_delete
);
1758 o(0x585a); /* restore returned value, if any */
1761 o(0xc9); /* leave */
1762 if (func_ret_sub
== 0) {
1765 o(0xc2); /* ret n */
1767 g(func_ret_sub
>> 8);
1769 /* align local size to word & save local variables */
1770 v
= (-loc
+ 15) & -16;
1772 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1773 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1774 o(0xec8148); /* sub rsp, stacksize */
1781 /* generate a jump to a label */
1784 return psym(0xe9, t
);
1787 /* generate a jump to a fixed address */
1788 void gjmp_addr(int a
)
1796 oad(0xe9, a
- ind
- 5);
1800 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1801 int gtst(int inv
, int t
)
1805 v
= vtop
->r
& VT_VALMASK
;
1807 /* fast case : can jump directly since flags are set */
1808 if (vtop
->c
.i
& 0x100)
1810 /* This was a float compare. If the parity flag is set
1811 the result was unordered. For anything except != this
1812 means false and we don't jump (anding both conditions).
1813 For != this means true (oring both).
1814 Take care about inverting the test. We need to jump
1815 to our target if the result was unordered and test wasn't NE,
1816 otherwise if unordered we don't want to jump. */
1817 vtop
->c
.i
&= ~0x100;
1818 if (!inv
== (vtop
->c
.i
!= TOK_NE
))
1819 o(0x067a); /* jp +6 */
1822 t
= psym(0x8a, t
); /* jp t */
1826 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1827 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1828 /* && or || optimization */
1829 if ((v
& 1) == inv
) {
1830 /* insert vtop->c jump list in t */
1833 p
= (int *)(cur_text_section
->data
+ *p
);
1845 /* generate an integer binary operation */
1846 void gen_opi(int op
)
1851 ll
= is64_type(vtop
[-1].type
.t
);
1852 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1853 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1857 case TOK_ADDC1
: /* add with carry generation */
1860 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1867 /* XXX: generate inc and dec for smaller code ? */
1868 orex(ll
, r
, 0, 0x83);
1869 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1872 orex(ll
, r
, 0, 0x81);
1873 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1876 gv2(RC_INT
, RC_INT
);
1879 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1880 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1883 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1889 case TOK_SUBC1
: /* sub with carry generation */
1892 case TOK_ADDC2
: /* add with carry use */
1895 case TOK_SUBC2
: /* sub with carry use */
1908 gv2(RC_INT
, RC_INT
);
1911 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1912 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1924 opc
= 0xc0 | (opc
<< 3);
1930 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1931 o(opc
| REG_VALUE(r
));
1932 g(vtop
->c
.i
& (ll
? 63 : 31));
1934 /* we generate the shift in ecx */
1935 gv2(RC_INT
, RC_RCX
);
1937 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1938 o(opc
| REG_VALUE(r
));
1951 /* first operand must be in eax */
1952 /* XXX: need better constraint for second operand */
1953 gv2(RC_RAX
, RC_RCX
);
1958 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1959 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1960 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1961 if (op
== '%' || op
== TOK_UMOD
)
1973 void gen_opl(int op
)
1978 /* generate a floating point operation 'v = t1 op t2' instruction. The
1979 two operands are guaranted to have the same floating point type */
1980 /* XXX: need to use ST1 too */
1981 void gen_opf(int op
)
1983 int a
, ft
, fc
, swapped
, r
;
1985 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1987 /* convert constants to memory references */
1988 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1993 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1996 /* must put at least one value in the floating point register */
1997 if ((vtop
[-1].r
& VT_LVAL
) &&
1998 (vtop
[0].r
& VT_LVAL
)) {
2004 /* swap the stack if needed so that t1 is the register and t2 is
2005 the memory reference */
2006 if (vtop
[-1].r
& VT_LVAL
) {
2010 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
2011 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
2012 /* load on stack second operand */
2013 load(TREG_ST0
, vtop
);
2014 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
2015 if (op
== TOK_GE
|| op
== TOK_GT
)
2017 else if (op
== TOK_EQ
|| op
== TOK_NE
)
2020 o(0xc9d9); /* fxch %st(1) */
2021 if (op
== TOK_EQ
|| op
== TOK_NE
)
2022 o(0xe9da); /* fucompp */
2024 o(0xd9de); /* fcompp */
2025 o(0xe0df); /* fnstsw %ax */
2027 o(0x45e480); /* and $0x45, %ah */
2028 o(0x40fC80); /* cmp $0x40, %ah */
2029 } else if (op
== TOK_NE
) {
2030 o(0x45e480); /* and $0x45, %ah */
2031 o(0x40f480); /* xor $0x40, %ah */
2033 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
2034 o(0x05c4f6); /* test $0x05, %ah */
2037 o(0x45c4f6); /* test $0x45, %ah */
2044 /* no memory reference possible for long double operations */
2045 load(TREG_ST0
, vtop
);
2069 o(0xde); /* fxxxp %st, %st(1) */
2074 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
2075 /* if saved lvalue, then we must reload it */
2078 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
2080 r
= get_reg(RC_INT
);
2082 v1
.r
= VT_LOCAL
| VT_LVAL
;
2088 if (op
== TOK_EQ
|| op
== TOK_NE
) {
2091 if (op
== TOK_LE
|| op
== TOK_LT
)
2093 if (op
== TOK_LE
|| op
== TOK_GE
) {
2094 op
= 0x93; /* setae */
2096 op
= 0x97; /* seta */
2104 assert(!(vtop
[-1].r
& VT_LVAL
));
2106 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
2108 if (op
== TOK_EQ
|| op
== TOK_NE
)
2109 o(0x2e0f); /* ucomisd */
2111 o(0x2f0f); /* comisd */
2113 if (vtop
->r
& VT_LVAL
) {
2114 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2116 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2121 vtop
->c
.i
= op
| 0x100;
2123 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
2141 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
2144 /* if saved lvalue, then we must reload it */
2145 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
2147 r
= get_reg(RC_INT
);
2149 v1
.r
= VT_LOCAL
| VT_LVAL
;
2155 assert(!(vtop
[-1].r
& VT_LVAL
));
2157 assert(vtop
->r
& VT_LVAL
);
2162 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
2170 if (vtop
->r
& VT_LVAL
) {
2171 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2173 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2181 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2182 and 'long long' cases. */
2183 void gen_cvt_itof(int t
)
2185 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
2188 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2189 /* signed long long to float/double/long double (unsigned case
2190 is handled generically) */
2191 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2192 o(0x242cdf); /* fildll (%rsp) */
2193 o(0x08c48348); /* add $8, %rsp */
2194 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2195 (VT_INT
| VT_UNSIGNED
)) {
2196 /* unsigned int to float/double/long double */
2197 o(0x6a); /* push $0 */
2199 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2200 o(0x242cdf); /* fildll (%rsp) */
2201 o(0x10c48348); /* add $16, %rsp */
2203 /* int to float/double/long double */
2204 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2205 o(0x2404db); /* fildl (%rsp) */
2206 o(0x08c48348); /* add $8, %rsp */
2210 int r
= get_reg(RC_FLOAT
);
2212 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
2213 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2214 (VT_INT
| VT_UNSIGNED
) ||
2215 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2219 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
2224 /* convert from one floating point type to another */
2225 void gen_cvt_ftof(int t
)
2233 if (bt
== VT_FLOAT
) {
2235 if (tbt
== VT_DOUBLE
) {
2236 o(0x140f); /* unpcklps */
2237 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2238 o(0x5a0f); /* cvtps2pd */
2239 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2240 } else if (tbt
== VT_LDOUBLE
) {
2242 /* movss %xmm0,-0x10(%rsp) */
2244 o(0x44 + REG_VALUE(vtop
->r
)*8);
2246 o(0xf02444d9); /* flds -0x10(%rsp) */
2249 } else if (bt
== VT_DOUBLE
) {
2251 if (tbt
== VT_FLOAT
) {
2252 o(0x140f66); /* unpcklpd */
2253 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2254 o(0x5a0f66); /* cvtpd2ps */
2255 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2256 } else if (tbt
== VT_LDOUBLE
) {
2258 /* movsd %xmm0,-0x10(%rsp) */
2260 o(0x44 + REG_VALUE(vtop
->r
)*8);
2262 o(0xf02444dd); /* fldl -0x10(%rsp) */
2268 r
= get_reg(RC_FLOAT
);
2269 if (tbt
== VT_DOUBLE
) {
2270 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2271 /* movsd -0x10(%rsp),%xmm0 */
2273 o(0x44 + REG_VALUE(r
)*8);
2276 } else if (tbt
== VT_FLOAT
) {
2277 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2278 /* movss -0x10(%rsp),%xmm0 */
2280 o(0x44 + REG_VALUE(r
)*8);
2287 /* convert fp to int 't' type */
2288 void gen_cvt_ftoi(int t
)
2290 int ft
, bt
, size
, r
;
2293 if (bt
== VT_LDOUBLE
) {
2294 gen_cvt_ftof(VT_DOUBLE
);
2304 r
= get_reg(RC_INT
);
2305 if (bt
== VT_FLOAT
) {
2307 } else if (bt
== VT_DOUBLE
) {
2312 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2313 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2317 /* computed goto support */
2324 /* Save the stack pointer onto the stack and return the location of its address */
2325 ST_FUNC
void gen_vla_sp_save(int addr
) {
2326 /* mov %rsp,addr(%rbp)*/
2327 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2330 /* Restore the SP from a location on the stack */
2331 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2332 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2335 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2336 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2337 #ifdef TCC_TARGET_PE
2338 /* alloca does more than just adjust %rsp on Windows */
2339 vpush_global_sym(&func_old_type
, TOK_alloca
);
2340 vswap(); /* Move alloca ref past allocation size */
2342 vset(type
, REG_IRET
, 0);
2344 int r
= gv(RC_INT
); /* allocation size */
2347 o(0xe0 | REG_VALUE(r
));
2348 /* We align to 16 bytes rather than align */
2356 /* end of x86-64 code generator */
2357 /*************************************************************/
2358 #endif /* ! TARGET_DEFS_ONLY */
2359 /******************************************************/