2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0080 /* only for long double */
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
104 /******************************************************/
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x400000
116 #define ELF_PAGE_SIZE 0x200000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
124 ST_DATA
const int reg_classes
[NB_REGS
] = {
125 /* eax */ RC_INT
| RC_RAX
,
126 /* ecx */ RC_INT
| RC_RCX
,
127 /* edx */ RC_INT
| RC_RDX
,
141 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
142 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
143 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
144 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
145 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
146 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
155 static unsigned long func_sub_sp_offset
;
156 static int func_ret_sub
;
158 /* XXX: make it faster ? */
159 ST_FUNC
void g(int c
)
163 if (ind1
> cur_text_section
->data_allocated
)
164 section_realloc(cur_text_section
, ind1
);
165 cur_text_section
->data
[ind
] = c
;
169 ST_FUNC
void o(unsigned int c
)
177 ST_FUNC
void gen_le16(int v
)
183 ST_FUNC
void gen_le32(int c
)
191 ST_FUNC
void gen_le64(int64_t c
)
203 static void orex(int ll
, int r
, int r2
, int b
)
205 if ((r
& VT_VALMASK
) >= VT_CONST
)
207 if ((r2
& VT_VALMASK
) >= VT_CONST
)
209 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
210 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
214 /* output a symbol and patch all calls to it */
215 ST_FUNC
void gsym_addr(int t
, int a
)
218 unsigned char *ptr
= cur_text_section
->data
+ t
;
219 uint32_t n
= read32le(ptr
); /* next value */
220 write32le(ptr
, a
- t
- 4);
230 /* psym is used to put an instruction with a data field which is a
231 reference to a symbol. It is in fact the same as oad ! */
234 static int is64_type(int t
)
236 return ((t
& VT_BTYPE
) == VT_PTR
||
237 (t
& VT_BTYPE
) == VT_FUNC
||
238 (t
& VT_BTYPE
) == VT_LLONG
);
241 /* instruction + 4 bytes data. Return the address of the data */
242 ST_FUNC
int oad(int c
, int s
)
248 if (ind1
> cur_text_section
->data_allocated
)
249 section_realloc(cur_text_section
, ind1
);
250 write32le(cur_text_section
->data
+ ind
, s
);
256 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
259 greloca(cur_text_section
, sym
, ind
, R_X86_64_32
, c
), c
=0;
263 /* output constant with relocation if 'r & VT_SYM' is true */
264 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
267 greloca(cur_text_section
, sym
, ind
, R_X86_64_64
, c
), c
=0;
271 /* output constant with relocation if 'r & VT_SYM' is true */
272 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
275 greloca(cur_text_section
, sym
, ind
, R_X86_64_PC32
, c
-4), c
=4;
279 /* output got address with relocation */
280 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
282 #ifndef TCC_TARGET_PE
283 greloca(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
, -4);
285 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
286 get_tok_str(sym
->v
, NULL
), c
, r
,
287 cur_text_section
->data
[ind
-3],
288 cur_text_section
->data
[ind
-2],
289 cur_text_section
->data
[ind
-1]
291 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
295 /* we use add c, %xxx for displacement */
297 o(0xc0 + REG_VALUE(r
));
302 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
304 op_reg
= REG_VALUE(op_reg
) << 3;
305 if ((r
& VT_VALMASK
) == VT_CONST
) {
306 /* constant memory reference */
309 gen_gotpcrel(r
, sym
, c
);
311 gen_addrpc32(r
, sym
, c
);
313 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
314 /* currently, we use only ebp as base */
316 /* short reference */
320 oad(0x85 | op_reg
, c
);
322 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
324 g(0x80 | op_reg
| REG_VALUE(r
));
327 g(0x00 | op_reg
| REG_VALUE(r
));
330 g(0x00 | op_reg
| REG_VALUE(r
));
334 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
336 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
338 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
341 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
343 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
346 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
347 orex(1, r
, op_reg
, opcode
);
348 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
352 /* load 'r' from value 'sv' */
353 void load(int r
, SValue
*sv
)
355 int v
, t
, ft
, fc
, fr
;
360 sv
= pe_getimport(sv
, &v2
);
364 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
367 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
369 #ifndef TCC_TARGET_PE
370 /* we use indirect access via got */
371 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
372 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
373 /* use the result register as a temporal register */
374 int tr
= r
| TREG_MEM
;
376 /* we cannot use float registers as a temporal register */
377 tr
= get_reg(RC_INT
) | TREG_MEM
;
379 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
381 /* load from the temporal register */
389 if (v
== VT_LLOCAL
) {
391 v1
.r
= VT_LOCAL
| VT_LVAL
;
394 if (!(reg_classes
[fr
] & (RC_INT
|RC_R11
)))
395 fr
= get_reg(RC_INT
);
399 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
401 r
= REG_VALUE(r
); /* movd */
402 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
403 b
= 0x7e0ff3; /* movq */
405 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
406 b
= 0xdb, r
= 5; /* fldt */
407 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
408 b
= 0xbe0f; /* movsbl */
409 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
410 b
= 0xb60f; /* movzbl */
411 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
412 b
= 0xbf0f; /* movswl */
413 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
414 b
= 0xb70f; /* movzwl */
416 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
417 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
418 || ((ft
& VT_BTYPE
) == VT_FUNC
));
423 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
426 gen_modrm(r
, fr
, sv
->sym
, fc
);
433 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
434 gen_addrpc32(fr
, sv
->sym
, fc
);
436 if (sv
->sym
->type
.t
& VT_STATIC
) {
438 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
439 gen_addrpc32(fr
, sv
->sym
, fc
);
442 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
443 gen_gotpcrel(r
, sv
->sym
, fc
);
446 } else if (is64_type(ft
)) {
447 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
450 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
453 } else if (v
== VT_LOCAL
) {
454 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
455 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
456 } else if (v
== VT_CMP
) {
458 if ((fc
& ~0x100) != TOK_NE
)
459 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
461 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
464 /* This was a float compare. If the parity bit is
465 set the result was unordered, meaning false for everything
466 except TOK_NE, and true for TOK_NE. */
468 o(0x037a + (REX_BASE(r
) << 8));
470 orex(0,r
,0, 0x0f); /* setxx %br */
472 o(0xc0 + REG_VALUE(r
));
473 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
476 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
477 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
480 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
482 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
484 /* gen_cvt_ftof(VT_DOUBLE); */
485 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
486 /* movsd -0x10(%rsp),%xmmN */
488 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
491 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
492 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
495 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
498 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
500 } else if (r
== TREG_ST0
) {
501 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
502 /* gen_cvt_ftof(VT_LDOUBLE); */
503 /* movsd %xmmN,-0x10(%rsp) */
505 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
507 o(0xf02444dd); /* fldl -0x10(%rsp) */
510 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
516 /* store register 'r' in lvalue 'v' */
517 void store(int r
, SValue
*v
)
521 /* store the REX prefix in this variable when PIC is enabled */
526 v
= pe_getimport(v
, &v2
);
531 fr
= v
->r
& VT_VALMASK
;
532 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
535 #ifndef TCC_TARGET_PE
536 /* we need to access the variable via got */
537 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
538 /* mov xx(%rip), %r11 */
540 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.i
);
541 pic
= is64_type(bt
) ? 0x49 : 0x41;
545 /* XXX: incorrect if float reg to reg */
546 if (bt
== VT_FLOAT
) {
549 o(0x7e0f); /* movd */
551 } else if (bt
== VT_DOUBLE
) {
554 o(0xd60f); /* movq */
556 } else if (bt
== VT_LDOUBLE
) {
557 o(0xc0d9); /* fld %st(0) */
565 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
567 else if (is64_type(bt
))
573 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
578 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
579 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
580 } else if (fr
!= r
) {
581 /* XXX: don't we really come here? */
583 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
586 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
587 gen_modrm(r
, v
->r
, v
->sym
, fc
);
588 } else if (fr
!= r
) {
589 /* XXX: don't we really come here? */
591 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
596 /* 'is_jmp' is '1' if it is a jump */
597 static void gcall_or_jmp(int is_jmp
)
600 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
&&
601 ((vtop
->r
& VT_SYM
) || (vtop
->c
.i
-4) == (int)(vtop
->c
.i
-4))) {
603 if (vtop
->r
& VT_SYM
) {
604 /* relocation case */
606 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PC32
, (int)(vtop
->c
.i
-4));
608 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PLT32
, (int)(vtop
->c
.i
-4));
611 /* put an empty PC32 relocation */
612 put_elf_reloca(symtab_section
, cur_text_section
,
613 ind
+ 1, R_X86_64_PC32
, 0, (int)(vtop
->c
.i
-4));
615 oad(0xe8 + is_jmp
, 0); /* call/jmp im */
617 /* otherwise, indirect call */
621 o(0xff); /* call/jmp *r */
622 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
626 #if defined(CONFIG_TCC_BCHECK)
627 #ifndef TCC_TARGET_PE
628 static addr_t func_bound_offset
;
629 static unsigned long func_bound_ind
;
632 static void gen_static_call(int v
)
634 Sym
*sym
= external_global_sym(v
, &func_old_type
, 0);
636 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
639 /* generate a bounded pointer addition */
640 ST_FUNC
void gen_bounded_ptr_add(void)
642 /* save all temporary registers */
645 /* prepare fast x86_64 function call */
647 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
651 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
654 /* do a fast function call */
655 gen_static_call(TOK___bound_ptr_add
);
657 /* returned pointer is in rax */
659 vtop
->r
= TREG_RAX
| VT_BOUNDED
;
662 /* relocation offset of the bounding function call point */
663 vtop
->c
.i
= (cur_text_section
->reloc
->data_offset
- sizeof(ElfW(Rela
)));
666 /* patch pointer addition in vtop so that pointer dereferencing is
668 ST_FUNC
void gen_bounded_ptr_deref(void)
676 /* XXX: put that code in generic part of tcc */
677 if (!is_float(vtop
->type
.t
)) {
678 if (vtop
->r
& VT_LVAL_BYTE
)
680 else if (vtop
->r
& VT_LVAL_SHORT
)
684 size
= type_size(&vtop
->type
, &align
);
686 case 1: func
= TOK___bound_ptr_indir1
; break;
687 case 2: func
= TOK___bound_ptr_indir2
; break;
688 case 4: func
= TOK___bound_ptr_indir4
; break;
689 case 8: func
= TOK___bound_ptr_indir8
; break;
690 case 12: func
= TOK___bound_ptr_indir12
; break;
691 case 16: func
= TOK___bound_ptr_indir16
; break;
693 tcc_error("unhandled size when dereferencing bounded pointer");
698 sym
= external_global_sym(func
, &func_old_type
, 0);
700 put_extern_sym(sym
, NULL
, 0, 0);
702 /* patch relocation */
703 /* XXX: find a better solution ? */
705 rel
= (ElfW(Rela
) *)(cur_text_section
->reloc
->data
+ vtop
->c
.i
);
706 rel
->r_info
= ELF64_R_INFO(sym
->c
, ELF64_R_TYPE(rel
->r_info
));
713 static const uint8_t arg_regs
[REGN
] = {
714 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
717 /* Prepare arguments in R10 and R11 rather than RCX and RDX
718 because gv() will not ever use these */
719 static int arg_prepare_reg(int idx
) {
720 if (idx
== 0 || idx
== 1)
721 /* idx=0: r10, idx=1: r11 */
724 return arg_regs
[idx
];
727 static int func_scratch
;
729 /* Generate function call. The function address is pushed first, then
730 all the parameters in call order. This functions pops all the
731 parameters and the function address. */
733 void gen_offs_sp(int b
, int r
, int d
)
735 orex(1,0,r
& 0x100 ? 0 : r
, b
);
737 o(0x2444 | (REG_VALUE(r
) << 3));
740 o(0x2484 | (REG_VALUE(r
) << 3));
745 /* Return the number of registers needed to return the struct, or 0 if
746 returning via struct pointer. */
747 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
751 *ret_align
= 1; // Never have to re-align return values for x86-64
752 size
= type_size(vt
, &align
);
756 } else if (size
> 4) {
759 } else if (size
> 2) {
762 } else if (size
> 1) {
771 static int is_sse_float(int t
) {
774 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
777 int gfunc_arg_size(CType
*type
) {
779 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
781 return type_size(type
, &align
);
784 void gfunc_call(int nb_args
)
786 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
789 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
792 /* for struct arguments, we need to call memcpy and the function
793 call breaks register passing arguments we are preparing.
794 So, we process arguments which will be passed by stack first. */
795 struct_size
= args_size
;
796 for(i
= 0; i
< nb_args
; i
++) {
801 bt
= (sv
->type
.t
& VT_BTYPE
);
802 size
= gfunc_arg_size(&sv
->type
);
805 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
807 if (bt
== VT_STRUCT
) {
808 /* align to stack align size */
809 size
= (size
+ 15) & ~15;
810 /* generate structure store */
812 gen_offs_sp(0x8d, r
, struct_size
);
815 /* generate memcpy call */
816 vset(&sv
->type
, r
| VT_LVAL
, 0);
820 } else if (bt
== VT_LDOUBLE
) {
822 gen_offs_sp(0xdb, 0x107, struct_size
);
827 if (func_scratch
< struct_size
)
828 func_scratch
= struct_size
;
831 struct_size
= args_size
;
833 for(i
= 0; i
< nb_args
; i
++) {
835 bt
= (vtop
->type
.t
& VT_BTYPE
);
837 size
= gfunc_arg_size(&vtop
->type
);
839 /* align to stack align size */
840 size
= (size
+ 15) & ~15;
843 gen_offs_sp(0x8d, d
, struct_size
);
844 gen_offs_sp(0x89, d
, arg
*8);
846 d
= arg_prepare_reg(arg
);
847 gen_offs_sp(0x8d, d
, struct_size
);
851 if (is_sse_float(vtop
->type
.t
)) {
852 gv(RC_XMM0
); /* only use one float register */
854 /* movq %xmm0, j*8(%rsp) */
855 gen_offs_sp(0xd60f66, 0x100, arg
*8);
857 /* movaps %xmm0, %xmmN */
859 o(0xc0 + (arg
<< 3));
860 d
= arg_prepare_reg(arg
);
861 /* mov %xmm0, %rxx */
864 o(0xc0 + REG_VALUE(d
));
867 if (bt
== VT_STRUCT
) {
868 vtop
->type
.ref
= NULL
;
869 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
870 : size
> 1 ? VT_SHORT
: VT_BYTE
;
875 gen_offs_sp(0x89, r
, arg
*8);
877 d
= arg_prepare_reg(arg
);
878 orex(1,d
,r
,0x89); /* mov */
879 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
887 /* Copy R10 and R11 into RCX and RDX, respectively */
889 o(0xd1894c); /* mov %r10, %rcx */
891 o(0xda894c); /* mov %r11, %rdx */
896 /* other compilers don't clear the upper bits when returning char/short */
897 bt
= vtop
->type
.ref
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
);
898 if (bt
== (VT_BYTE
| VT_UNSIGNED
))
899 o(0xc0b60f); /* movzbl %al, %eax */
900 else if (bt
== VT_BYTE
)
901 o(0xc0be0f); /* movsbl %al, %eax */
902 else if (bt
== VT_SHORT
)
904 else if (bt
== (VT_SHORT
| VT_UNSIGNED
))
905 o(0xc0b70f); /* movzbl %al, %eax */
906 #if 0 /* handled in gen_cast() */
907 else if (bt
== VT_INT
)
908 o(0x9848); /* cltq */
909 else if (bt
== (VT_INT
| VT_UNSIGNED
))
910 o(0xc089); /* mov %eax,%eax */
916 #define FUNC_PROLOG_SIZE 11
918 /* generate function prolog of type 't' */
919 void gfunc_prolog(CType
*func_type
)
921 int addr
, reg_param_index
, bt
, size
;
930 ind
+= FUNC_PROLOG_SIZE
;
931 func_sub_sp_offset
= ind
;
934 sym
= func_type
->ref
;
936 /* if the function returns a structure, then add an
937 implicit pointer parameter */
939 func_var
= (sym
->c
== FUNC_ELLIPSIS
);
940 size
= gfunc_arg_size(&func_vt
);
942 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
948 /* define parameters */
949 while ((sym
= sym
->next
) != NULL
) {
951 bt
= type
->t
& VT_BTYPE
;
952 size
= gfunc_arg_size(type
);
954 if (reg_param_index
< REGN
) {
955 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
957 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
959 if (reg_param_index
< REGN
) {
960 /* save arguments passed by register */
961 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
962 o(0xd60f66); /* movq */
963 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
965 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
968 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
974 while (reg_param_index
< REGN
) {
975 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
976 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
983 /* generate function epilog */
984 void gfunc_epilog(void)
989 if (func_ret_sub
== 0) {
994 g(func_ret_sub
>> 8);
998 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
999 /* align local size to word & save local variables */
1000 v
= (func_scratch
+ -loc
+ 15) & -16;
1003 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
1004 oad(0xb8, v
); /* mov stacksize, %eax */
1005 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1006 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
1007 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1009 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1010 o(0xec8148); /* sub rsp, stacksize */
1014 cur_text_section
->data_offset
= saved_ind
;
1015 pe_add_unwind_data(ind
, saved_ind
, v
);
1016 ind
= cur_text_section
->data_offset
;
1021 static void gadd_sp(int val
)
1023 if (val
== (char)val
) {
1027 oad(0xc48148, val
); /* add $xxx, %rsp */
1031 typedef enum X86_64_Mode
{
1034 x86_64_mode_integer
,
1039 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
1043 else if (a
== x86_64_mode_none
)
1045 else if (b
== x86_64_mode_none
)
1047 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
1048 return x86_64_mode_memory
;
1049 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
1050 return x86_64_mode_integer
;
1051 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
1052 return x86_64_mode_memory
;
1054 return x86_64_mode_sse
;
1057 static X86_64_Mode
classify_x86_64_inner(CType
*ty
)
1062 switch (ty
->t
& VT_BTYPE
) {
1063 case VT_VOID
: return x86_64_mode_none
;
1072 case VT_ENUM
: return x86_64_mode_integer
;
1075 case VT_DOUBLE
: return x86_64_mode_sse
;
1077 case VT_LDOUBLE
: return x86_64_mode_x87
;
1082 mode
= x86_64_mode_none
;
1083 for (f
= f
->next
; f
; f
= f
->next
)
1084 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
1092 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
)
1095 int size
, align
, ret_t
= 0;
1097 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1102 mode
= x86_64_mode_integer
;
1104 size
= type_size(ty
, &align
);
1105 *psize
= (size
+ 7) & ~7;
1106 *palign
= (align
+ 7) & ~7;
1109 mode
= x86_64_mode_memory
;
1111 mode
= classify_x86_64_inner(ty
);
1113 case x86_64_mode_integer
:
1119 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1123 case x86_64_mode_x87
:
1128 case x86_64_mode_sse
:
1134 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1137 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1150 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1152 /* This definition must be synced with stdarg.h */
1153 enum __va_arg_type
{
1154 __va_gen_reg
, __va_float_reg
, __va_stack
1156 int size
, align
, reg_count
;
1157 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1159 default: return __va_stack
;
1160 case x86_64_mode_integer
: return __va_gen_reg
;
1161 case x86_64_mode_sse
: return __va_float_reg
;
1165 /* Return the number of registers needed to return the struct, or 0 if
1166 returning via struct pointer. */
1167 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
1169 int size
, align
, reg_count
;
1170 *ret_align
= 1; // Never have to re-align return values for x86-64
1172 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) != x86_64_mode_memory
);
1176 static const uint8_t arg_regs
[REGN
] = {
1177 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1180 static int arg_prepare_reg(int idx
) {
1181 if (idx
== 2 || idx
== 3)
1182 /* idx=2: r10, idx=3: r11 */
1185 return arg_regs
[idx
];
1188 /* Generate function call. The function address is pushed first, then
1189 all the parameters in call order. This functions pops all the
1190 parameters and the function address. */
1191 void gfunc_call(int nb_args
)
1195 int size
, align
, r
, args_size
, stack_adjust
, run_start
, run_end
, i
, reg_count
;
1196 int nb_reg_args
= 0;
1197 int nb_sse_args
= 0;
1198 int sse_reg
, gen_reg
;
1200 /* calculate the number of integer/float register arguments */
1201 for(i
= 0; i
< nb_args
; i
++) {
1202 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1203 if (mode
== x86_64_mode_sse
)
1204 nb_sse_args
+= reg_count
;
1205 else if (mode
== x86_64_mode_integer
)
1206 nb_reg_args
+= reg_count
;
1209 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1210 and ended by a 16-byte aligned argument. This is because, from the point of view of
1211 the callee, argument alignment is computed from the bottom up. */
1212 /* for struct arguments, we need to call memcpy and the function
1213 call breaks register passing arguments we are preparing.
1214 So, we process arguments which will be passed by stack first. */
1215 gen_reg
= nb_reg_args
;
1216 sse_reg
= nb_sse_args
;
1219 while (run_start
!= nb_args
) {
1220 int run_gen_reg
= gen_reg
, run_sse_reg
= sse_reg
;
1224 for(i
= run_start
; (i
< nb_args
) && (run_end
== nb_args
); i
++) {
1225 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1227 case x86_64_mode_memory
:
1228 case x86_64_mode_x87
:
1233 stack_adjust
+= size
;
1236 case x86_64_mode_sse
:
1237 sse_reg
-= reg_count
;
1238 if (sse_reg
+ reg_count
> 8) goto stack_arg
;
1241 case x86_64_mode_integer
:
1242 gen_reg
-= reg_count
;
1243 if (gen_reg
+ reg_count
> REGN
) goto stack_arg
;
1245 default: break; /* nothing to be done for x86_64_mode_none */
1249 gen_reg
= run_gen_reg
;
1250 sse_reg
= run_sse_reg
;
1252 /* adjust stack to align SSE boundary */
1253 if (stack_adjust
&= 15) {
1254 /* fetch cpu flag before the following sub will change the value */
1255 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1258 stack_adjust
= 16 - stack_adjust
;
1260 oad(0xec81, stack_adjust
); /* sub $xxx, %rsp */
1261 args_size
+= stack_adjust
;
1264 for(i
= run_start
; i
< run_end
;) {
1265 /* Swap argument to top, it will possibly be changed here,
1266 and might use more temps. At the end of the loop we keep
1267 in on the stack and swap it back to its original position
1268 if it is a register. */
1269 SValue tmp
= vtop
[0];
1274 mode
= classify_x86_64_arg(&vtop
->type
, NULL
, &size
, &align
, ®_count
);
1276 switch (vtop
->type
.t
& VT_BTYPE
) {
1278 if (mode
== x86_64_mode_sse
) {
1280 sse_reg
-= reg_count
;
1283 } else if (mode
== x86_64_mode_integer
) {
1285 gen_reg
-= reg_count
;
1291 /* allocate the necessary size on stack */
1293 oad(0xec81, size
); /* sub $xxx, %rsp */
1294 /* generate structure store */
1295 r
= get_reg(RC_INT
);
1296 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1297 o(0xe0 + REG_VALUE(r
));
1298 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1311 assert(mode
== x86_64_mode_sse
);
1315 o(0x50); /* push $rax */
1316 /* movq %xmmN, (%rsp) */
1318 o(0x04 + REG_VALUE(r
)*8);
1327 assert(mode
== x86_64_mode_integer
);
1329 /* XXX: implicit cast ? */
1330 if (gen_reg
> REGN
) {
1333 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1341 /* And swap the argument back to it's original position. */
1348 assert((vtop
->type
.t
== tmp
.type
.t
) && (vtop
->r
== tmp
.r
));
1357 /* handle 16 byte aligned arguments at end of run */
1358 run_start
= i
= run_end
;
1359 while (i
< nb_args
) {
1360 /* Rotate argument to top since it will always be popped */
1361 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1367 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1369 oad(0xec8148, size
); /* sub $xxx, %rsp */
1370 o(0x7cdb); /* fstpt 0(%rsp) */
1375 assert(mode
== x86_64_mode_memory
);
1377 /* allocate the necessary size on stack */
1379 oad(0xec81, size
); /* sub $xxx, %rsp */
1380 /* generate structure store */
1381 r
= get_reg(RC_INT
);
1382 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1383 o(0xe0 + REG_VALUE(r
));
1384 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1395 /* XXX This should be superfluous. */
1396 save_regs(0); /* save used temporary registers */
1398 /* then, we prepare register passing arguments.
1399 Note that we cannot set RDX and RCX in this loop because gv()
1400 may break these temporary registers. Let's use R10 and R11
1402 assert(gen_reg
<= REGN
);
1403 assert(sse_reg
<= 8);
1404 for(i
= 0; i
< nb_args
; i
++) {
1405 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1406 /* Alter stack entry type so that gv() knows how to treat it */
1408 if (mode
== x86_64_mode_sse
) {
1409 if (reg_count
== 2) {
1411 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1412 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1413 /* movaps %xmm0, %xmmN */
1415 o(0xc0 + (sse_reg
<< 3));
1416 /* movaps %xmm1, %xmmN */
1418 o(0xc1 + ((sse_reg
+1) << 3));
1421 assert(reg_count
== 1);
1423 /* Load directly to register */
1424 gv(RC_XMM0
<< sse_reg
);
1426 } else if (mode
== x86_64_mode_integer
) {
1428 /* XXX: implicit cast ? */
1430 gen_reg
-= reg_count
;
1432 d
= arg_prepare_reg(gen_reg
);
1433 orex(1,d
,r
,0x89); /* mov */
1434 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1435 if (reg_count
== 2) {
1436 d
= arg_prepare_reg(gen_reg
+1);
1437 orex(1,d
,vtop
->r2
,0x89); /* mov */
1438 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1443 assert(gen_reg
== 0);
1444 assert(sse_reg
== 0);
1446 /* We shouldn't have many operands on the stack anymore, but the
1447 call address itself is still there, and it might be in %eax
1448 (or edx/ecx) currently, which the below writes would clobber.
1449 So evict all remaining operands here. */
1452 /* Copy R10 and R11 into RDX and RCX, respectively */
1453 if (nb_reg_args
> 2) {
1454 o(0xd2894c); /* mov %r10, %rdx */
1455 if (nb_reg_args
> 3) {
1456 o(0xd9894c); /* mov %r11, %rcx */
1460 if (vtop
->type
.ref
->c
!= FUNC_NEW
) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1461 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1469 #define FUNC_PROLOG_SIZE 11
1471 static void push_arg_reg(int i
) {
1473 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1476 /* generate function prolog of type 't' */
1477 void gfunc_prolog(CType
*func_type
)
1480 int i
, addr
, align
, size
, reg_count
;
1481 int param_addr
= 0, reg_param_index
, sse_param_index
;
1485 sym
= func_type
->ref
;
1486 addr
= PTR_SIZE
* 2;
1488 ind
+= FUNC_PROLOG_SIZE
;
1489 func_sub_sp_offset
= ind
;
1492 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1493 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1494 seen_reg_num
= seen_sse_num
= 0;
1495 /* frame pointer and return address */
1496 seen_stack_size
= PTR_SIZE
* 2;
1497 /* count the number of seen parameters */
1498 sym
= func_type
->ref
;
1499 while ((sym
= sym
->next
) != NULL
) {
1501 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1505 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1508 case x86_64_mode_integer
:
1509 if (seen_reg_num
+ reg_count
<= 8) {
1510 seen_reg_num
+= reg_count
;
1517 case x86_64_mode_sse
:
1518 if (seen_sse_num
+ reg_count
<= 8) {
1519 seen_sse_num
+= reg_count
;
1529 /* movl $0x????????, -0x10(%rbp) */
1531 gen_le32(seen_reg_num
* 8);
1532 /* movl $0x????????, -0xc(%rbp) */
1534 gen_le32(seen_sse_num
* 16 + 48);
1535 /* movl $0x????????, -0x8(%rbp) */
1537 gen_le32(seen_stack_size
);
1539 /* save all register passing arguments */
1540 for (i
= 0; i
< 8; i
++) {
1542 o(0xd60f66); /* movq */
1543 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1544 /* movq $0, loc+8(%rbp) */
1549 for (i
= 0; i
< REGN
; i
++) {
1550 push_arg_reg(REGN
-1-i
);
1554 sym
= func_type
->ref
;
1555 reg_param_index
= 0;
1556 sse_param_index
= 0;
1558 /* if the function returns a structure, then add an
1559 implicit pointer parameter */
1560 func_vt
= sym
->type
;
1561 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1562 if (mode
== x86_64_mode_memory
) {
1563 push_arg_reg(reg_param_index
);
1567 /* define parameters */
1568 while ((sym
= sym
->next
) != NULL
) {
1570 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1572 case x86_64_mode_sse
:
1573 if (sse_param_index
+ reg_count
<= 8) {
1574 /* save arguments passed by register */
1575 loc
-= reg_count
* 8;
1577 for (i
= 0; i
< reg_count
; ++i
) {
1578 o(0xd60f66); /* movq */
1579 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1583 addr
= (addr
+ align
- 1) & -align
;
1589 case x86_64_mode_memory
:
1590 case x86_64_mode_x87
:
1591 addr
= (addr
+ align
- 1) & -align
;
1596 case x86_64_mode_integer
: {
1597 if (reg_param_index
+ reg_count
<= REGN
) {
1598 /* save arguments passed by register */
1599 loc
-= reg_count
* 8;
1601 for (i
= 0; i
< reg_count
; ++i
) {
1602 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1606 addr
= (addr
+ align
- 1) & -align
;
1612 default: break; /* nothing to be done for x86_64_mode_none */
1614 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1615 VT_LOCAL
| VT_LVAL
, param_addr
);
1618 #ifdef CONFIG_TCC_BCHECK
1619 /* leave some room for bound checking code */
1620 if (tcc_state
->do_bounds_check
) {
1621 func_bound_offset
= lbounds_section
->data_offset
;
1622 func_bound_ind
= ind
;
1623 oad(0xb8, 0); /* lbound section pointer */
1624 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1625 oad(0xb8, 0); /* call to function */
1630 /* generate function epilog */
1631 void gfunc_epilog(void)
1635 #ifdef CONFIG_TCC_BCHECK
1636 if (tcc_state
->do_bounds_check
1637 && func_bound_offset
!= lbounds_section
->data_offset
)
1643 /* add end of table info */
1644 bounds_ptr
= section_ptr_add(lbounds_section
, sizeof(addr_t
));
1647 /* generate bound local allocation */
1648 sym_data
= get_sym_ref(&char_pointer_type
, lbounds_section
,
1649 func_bound_offset
, lbounds_section
->data_offset
);
1651 ind
= func_bound_ind
;
1652 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1654 gen_static_call(TOK___bound_local_new
);
1657 /* generate bound check local freeing */
1658 o(0x5250); /* save returned value, if any */
1659 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1660 oad(0xb8, 0); /* mov xxx, %rax */
1661 o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
1662 gen_static_call(TOK___bound_local_delete
);
1663 o(0x585a); /* restore returned value, if any */
1666 o(0xc9); /* leave */
1667 if (func_ret_sub
== 0) {
1670 o(0xc2); /* ret n */
1672 g(func_ret_sub
>> 8);
1674 /* align local size to word & save local variables */
1675 v
= (-loc
+ 15) & -16;
1677 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1678 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1679 o(0xec8148); /* sub rsp, stacksize */
1686 /* generate a jump to a label */
1689 return psym(0xe9, t
);
1692 /* generate a jump to a fixed address */
1693 void gjmp_addr(int a
)
1701 oad(0xe9, a
- ind
- 5);
1705 ST_FUNC
void gtst_addr(int inv
, int a
)
1707 inv
^= (vtop
--)->c
.i
;
1714 oad(inv
- 16, a
- 4);
1718 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1719 ST_FUNC
int gtst(int inv
, int t
)
1721 int v
= vtop
->r
& VT_VALMASK
;
1723 /* fast case : can jump directly since flags are set */
1724 if (vtop
->c
.i
& 0x100)
1726 /* This was a float compare. If the parity flag is set
1727 the result was unordered. For anything except != this
1728 means false and we don't jump (anding both conditions).
1729 For != this means true (oring both).
1730 Take care about inverting the test. We need to jump
1731 to our target if the result was unordered and test wasn't NE,
1732 otherwise if unordered we don't want to jump. */
1733 vtop
->c
.i
&= ~0x100;
1734 if (inv
== (vtop
->c
.i
== TOK_NE
))
1735 o(0x067a); /* jp +6 */
1739 t
= psym(0x8a, t
); /* jp t */
1743 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1744 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1745 /* && or || optimization */
1746 if ((v
& 1) == inv
) {
1747 /* insert vtop->c jump list in t */
1748 uint32_t n1
, n
= vtop
->c
.i
;
1750 while ((n1
= read32le(cur_text_section
->data
+ n
)))
1752 write32le(cur_text_section
->data
+ n
, t
);
1764 /* generate an integer binary operation */
1765 void gen_opi(int op
)
1770 ll
= is64_type(vtop
[-1].type
.t
);
1771 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1772 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1776 case TOK_ADDC1
: /* add with carry generation */
1779 if (cc
&& (!ll
|| (int)vtop
->c
.i
== vtop
->c
.i
)) {
1786 /* XXX: generate inc and dec for smaller code ? */
1787 orex(ll
, r
, 0, 0x83);
1788 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1791 orex(ll
, r
, 0, 0x81);
1792 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1795 gv2(RC_INT
, RC_INT
);
1798 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1799 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1802 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1808 case TOK_SUBC1
: /* sub with carry generation */
1811 case TOK_ADDC2
: /* add with carry use */
1814 case TOK_SUBC2
: /* sub with carry use */
1827 gv2(RC_INT
, RC_INT
);
1830 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1831 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1843 opc
= 0xc0 | (opc
<< 3);
1849 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1850 o(opc
| REG_VALUE(r
));
1851 g(vtop
->c
.i
& (ll
? 63 : 31));
1853 /* we generate the shift in ecx */
1854 gv2(RC_INT
, RC_RCX
);
1856 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1857 o(opc
| REG_VALUE(r
));
1870 /* first operand must be in eax */
1871 /* XXX: need better constraint for second operand */
1872 gv2(RC_RAX
, RC_RCX
);
1877 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1878 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1879 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1880 if (op
== '%' || op
== TOK_UMOD
)
1892 void gen_opl(int op
)
1897 /* generate a floating point operation 'v = t1 op t2' instruction. The
1898 two operands are guaranted to have the same floating point type */
1899 /* XXX: need to use ST1 too */
1900 void gen_opf(int op
)
1902 int a
, ft
, fc
, swapped
, r
;
1904 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1906 /* convert constants to memory references */
1907 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1912 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1915 /* must put at least one value in the floating point register */
1916 if ((vtop
[-1].r
& VT_LVAL
) &&
1917 (vtop
[0].r
& VT_LVAL
)) {
1923 /* swap the stack if needed so that t1 is the register and t2 is
1924 the memory reference */
1925 if (vtop
[-1].r
& VT_LVAL
) {
1929 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1930 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1931 /* load on stack second operand */
1932 load(TREG_ST0
, vtop
);
1933 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1934 if (op
== TOK_GE
|| op
== TOK_GT
)
1936 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1939 o(0xc9d9); /* fxch %st(1) */
1940 if (op
== TOK_EQ
|| op
== TOK_NE
)
1941 o(0xe9da); /* fucompp */
1943 o(0xd9de); /* fcompp */
1944 o(0xe0df); /* fnstsw %ax */
1946 o(0x45e480); /* and $0x45, %ah */
1947 o(0x40fC80); /* cmp $0x40, %ah */
1948 } else if (op
== TOK_NE
) {
1949 o(0x45e480); /* and $0x45, %ah */
1950 o(0x40f480); /* xor $0x40, %ah */
1952 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1953 o(0x05c4f6); /* test $0x05, %ah */
1956 o(0x45c4f6); /* test $0x45, %ah */
1963 /* no memory reference possible for long double operations */
1964 load(TREG_ST0
, vtop
);
1988 o(0xde); /* fxxxp %st, %st(1) */
1993 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1994 /* if saved lvalue, then we must reload it */
1997 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1999 r
= get_reg(RC_INT
);
2001 v1
.r
= VT_LOCAL
| VT_LVAL
;
2007 if (op
== TOK_EQ
|| op
== TOK_NE
) {
2010 if (op
== TOK_LE
|| op
== TOK_LT
)
2012 if (op
== TOK_LE
|| op
== TOK_GE
) {
2013 op
= 0x93; /* setae */
2015 op
= 0x97; /* seta */
2023 assert(!(vtop
[-1].r
& VT_LVAL
));
2025 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
2027 if (op
== TOK_EQ
|| op
== TOK_NE
)
2028 o(0x2e0f); /* ucomisd */
2030 o(0x2f0f); /* comisd */
2032 if (vtop
->r
& VT_LVAL
) {
2033 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2035 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2040 vtop
->c
.i
= op
| 0x100;
2042 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
2060 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
2063 /* if saved lvalue, then we must reload it */
2064 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
2066 r
= get_reg(RC_INT
);
2068 v1
.r
= VT_LOCAL
| VT_LVAL
;
2074 assert(!(vtop
[-1].r
& VT_LVAL
));
2076 assert(vtop
->r
& VT_LVAL
);
2081 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
2089 if (vtop
->r
& VT_LVAL
) {
2090 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2092 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2100 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2101 and 'long long' cases. */
2102 void gen_cvt_itof(int t
)
2104 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
2107 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2108 /* signed long long to float/double/long double (unsigned case
2109 is handled generically) */
2110 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2111 o(0x242cdf); /* fildll (%rsp) */
2112 o(0x08c48348); /* add $8, %rsp */
2113 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2114 (VT_INT
| VT_UNSIGNED
)) {
2115 /* unsigned int to float/double/long double */
2116 o(0x6a); /* push $0 */
2118 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2119 o(0x242cdf); /* fildll (%rsp) */
2120 o(0x10c48348); /* add $16, %rsp */
2122 /* int to float/double/long double */
2123 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2124 o(0x2404db); /* fildl (%rsp) */
2125 o(0x08c48348); /* add $8, %rsp */
2129 int r
= get_reg(RC_FLOAT
);
2131 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
2132 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2133 (VT_INT
| VT_UNSIGNED
) ||
2134 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2138 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
2143 /* convert from one floating point type to another */
2144 void gen_cvt_ftof(int t
)
2152 if (bt
== VT_FLOAT
) {
2154 if (tbt
== VT_DOUBLE
) {
2155 o(0x140f); /* unpcklps */
2156 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2157 o(0x5a0f); /* cvtps2pd */
2158 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2159 } else if (tbt
== VT_LDOUBLE
) {
2161 /* movss %xmm0,-0x10(%rsp) */
2163 o(0x44 + REG_VALUE(vtop
->r
)*8);
2165 o(0xf02444d9); /* flds -0x10(%rsp) */
2168 } else if (bt
== VT_DOUBLE
) {
2170 if (tbt
== VT_FLOAT
) {
2171 o(0x140f66); /* unpcklpd */
2172 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2173 o(0x5a0f66); /* cvtpd2ps */
2174 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2175 } else if (tbt
== VT_LDOUBLE
) {
2177 /* movsd %xmm0,-0x10(%rsp) */
2179 o(0x44 + REG_VALUE(vtop
->r
)*8);
2181 o(0xf02444dd); /* fldl -0x10(%rsp) */
2187 r
= get_reg(RC_FLOAT
);
2188 if (tbt
== VT_DOUBLE
) {
2189 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2190 /* movsd -0x10(%rsp),%xmm0 */
2192 o(0x44 + REG_VALUE(r
)*8);
2195 } else if (tbt
== VT_FLOAT
) {
2196 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2197 /* movss -0x10(%rsp),%xmm0 */
2199 o(0x44 + REG_VALUE(r
)*8);
2206 /* convert fp to int 't' type */
2207 void gen_cvt_ftoi(int t
)
2209 int ft
, bt
, size
, r
;
2212 if (bt
== VT_LDOUBLE
) {
2213 gen_cvt_ftof(VT_DOUBLE
);
2223 r
= get_reg(RC_INT
);
2224 if (bt
== VT_FLOAT
) {
2226 } else if (bt
== VT_DOUBLE
) {
2231 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2232 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2236 /* computed goto support */
2243 /* Save the stack pointer onto the stack and return the location of its address */
2244 ST_FUNC
void gen_vla_sp_save(int addr
) {
2245 /* mov %rsp,addr(%rbp)*/
2246 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2249 /* Restore the SP from a location on the stack */
2250 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2251 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2254 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2255 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2256 #ifdef TCC_TARGET_PE
2257 /* alloca does more than just adjust %rsp on Windows */
2258 vpush_global_sym(&func_old_type
, TOK_alloca
);
2259 vswap(); /* Move alloca ref past allocation size */
2263 r
= gv(RC_INT
); /* allocation size */
2266 o(0xe0 | REG_VALUE(r
));
2267 /* We align to 16 bytes rather than align */
2275 /* end of x86-64 code generator */
2276 /*************************************************************/
2277 #endif /* ! TARGET_DEFS_ONLY */
2278 /******************************************************/