2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0080 /* only for long double */
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
104 /******************************************************/
105 #else /* ! TARGET_DEFS_ONLY */
106 /******************************************************/
110 ST_DATA
const int reg_classes
[NB_REGS
] = {
111 /* eax */ RC_INT
| RC_RAX
,
112 /* ecx */ RC_INT
| RC_RCX
,
113 /* edx */ RC_INT
| RC_RDX
,
127 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
128 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
129 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
130 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
131 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
132 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
133 /* xmm6 an xmm7 are included so gv() can be used on them,
134 but they are not tagged with RC_FLOAT because they are
135 callee saved on Windows */
141 static unsigned long func_sub_sp_offset
;
142 static int func_ret_sub
;
144 /* XXX: make it faster ? */
145 ST_FUNC
void g(int c
)
149 if (ind1
> cur_text_section
->data_allocated
)
150 section_realloc(cur_text_section
, ind1
);
151 cur_text_section
->data
[ind
] = c
;
155 ST_FUNC
void o(unsigned int c
)
163 ST_FUNC
void gen_le16(int v
)
169 ST_FUNC
void gen_le32(int c
)
177 ST_FUNC
void gen_le64(int64_t c
)
189 static void orex(int ll
, int r
, int r2
, int b
)
191 if ((r
& VT_VALMASK
) >= VT_CONST
)
193 if ((r2
& VT_VALMASK
) >= VT_CONST
)
195 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
196 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
200 /* output a symbol and patch all calls to it */
201 ST_FUNC
void gsym_addr(int t
, int a
)
204 unsigned char *ptr
= cur_text_section
->data
+ t
;
205 uint32_t n
= read32le(ptr
); /* next value */
206 write32le(ptr
, a
- t
- 4);
216 /* psym is used to put an instruction with a data field which is a
217 reference to a symbol. It is in fact the same as oad ! */
220 static int is64_type(int t
)
222 return ((t
& VT_BTYPE
) == VT_PTR
||
223 (t
& VT_BTYPE
) == VT_FUNC
||
224 (t
& VT_BTYPE
) == VT_LLONG
);
227 /* instruction + 4 bytes data. Return the address of the data */
228 ST_FUNC
int oad(int c
, int s
)
234 if (ind1
> cur_text_section
->data_allocated
)
235 section_realloc(cur_text_section
, ind1
);
236 write32le(cur_text_section
->data
+ ind
, s
);
242 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
245 greloca(cur_text_section
, sym
, ind
, R_X86_64_32
, c
), c
=0;
249 /* output constant with relocation if 'r & VT_SYM' is true */
250 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
253 greloca(cur_text_section
, sym
, ind
, R_X86_64_64
, c
), c
=0;
257 /* output constant with relocation if 'r & VT_SYM' is true */
258 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
261 greloca(cur_text_section
, sym
, ind
, R_X86_64_PC32
, c
-4), c
=4;
265 /* output got address with relocation */
266 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
268 #ifndef TCC_TARGET_PE
269 greloca(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
, -4);
271 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
272 get_tok_str(sym
->v
, NULL
), c
, r
,
273 cur_text_section
->data
[ind
-3],
274 cur_text_section
->data
[ind
-2],
275 cur_text_section
->data
[ind
-1]
277 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
281 /* we use add c, %xxx for displacement */
283 o(0xc0 + REG_VALUE(r
));
288 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
290 op_reg
= REG_VALUE(op_reg
) << 3;
291 if ((r
& VT_VALMASK
) == VT_CONST
) {
292 /* constant memory reference */
295 gen_gotpcrel(r
, sym
, c
);
297 gen_addrpc32(r
, sym
, c
);
299 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
300 /* currently, we use only ebp as base */
302 /* short reference */
306 oad(0x85 | op_reg
, c
);
308 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
310 g(0x80 | op_reg
| REG_VALUE(r
));
313 g(0x00 | op_reg
| REG_VALUE(r
));
316 g(0x00 | op_reg
| REG_VALUE(r
));
320 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
322 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
324 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
327 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
329 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
332 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
333 orex(1, r
, op_reg
, opcode
);
334 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
338 /* load 'r' from value 'sv' */
339 void load(int r
, SValue
*sv
)
341 int v
, t
, ft
, fc
, fr
;
346 sv
= pe_getimport(sv
, &v2
);
350 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
353 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
355 #ifndef TCC_TARGET_PE
356 /* we use indirect access via got */
357 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
358 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
359 /* use the result register as a temporal register */
360 int tr
= r
| TREG_MEM
;
362 /* we cannot use float registers as a temporal register */
363 tr
= get_reg(RC_INT
) | TREG_MEM
;
365 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
367 /* load from the temporal register */
375 if (v
== VT_LLOCAL
) {
377 v1
.r
= VT_LOCAL
| VT_LVAL
;
380 if (!(reg_classes
[fr
] & (RC_INT
|RC_R11
)))
381 fr
= get_reg(RC_INT
);
385 /* Like GCC we can load from small enough properly sized
386 structs and unions as well.
387 XXX maybe move to generic operand handling, but should
388 occur only with asm, so tccasm.c might also be a better place */
389 if ((ft
& VT_BTYPE
) == VT_STRUCT
) {
391 switch (type_size(&sv
->type
, &align
)) {
392 case 1: ft
= VT_BYTE
; break;
393 case 2: ft
= VT_SHORT
; break;
394 case 4: ft
= VT_INT
; break;
395 case 8: ft
= VT_LLONG
; break;
397 tcc_error("invalid aggregate type for register load");
401 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
403 r
= REG_VALUE(r
); /* movd */
404 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
405 b
= 0x7e0ff3; /* movq */
407 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
408 b
= 0xdb, r
= 5; /* fldt */
409 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
410 b
= 0xbe0f; /* movsbl */
411 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
412 b
= 0xb60f; /* movzbl */
413 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
414 b
= 0xbf0f; /* movswl */
415 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
416 b
= 0xb70f; /* movzwl */
418 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
419 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
420 || ((ft
& VT_BTYPE
) == VT_FUNC
));
425 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
428 gen_modrm(r
, fr
, sv
->sym
, fc
);
435 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
436 gen_addrpc32(fr
, sv
->sym
, fc
);
438 if (sv
->sym
->type
.t
& VT_STATIC
) {
440 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
441 gen_addrpc32(fr
, sv
->sym
, fc
);
444 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
445 gen_gotpcrel(r
, sv
->sym
, fc
);
448 } else if (is64_type(ft
)) {
449 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
452 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
455 } else if (v
== VT_LOCAL
) {
456 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
457 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
458 } else if (v
== VT_CMP
) {
460 if ((fc
& ~0x100) != TOK_NE
)
461 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
463 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
466 /* This was a float compare. If the parity bit is
467 set the result was unordered, meaning false for everything
468 except TOK_NE, and true for TOK_NE. */
470 o(0x037a + (REX_BASE(r
) << 8));
472 orex(0,r
,0, 0x0f); /* setxx %br */
474 o(0xc0 + REG_VALUE(r
));
475 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
478 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
479 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
482 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
484 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
486 /* gen_cvt_ftof(VT_DOUBLE); */
487 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
488 /* movsd -0x10(%rsp),%xmmN */
490 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
493 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
494 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
497 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
500 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
502 } else if (r
== TREG_ST0
) {
503 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
504 /* gen_cvt_ftof(VT_LDOUBLE); */
505 /* movsd %xmmN,-0x10(%rsp) */
507 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
509 o(0xf02444dd); /* fldl -0x10(%rsp) */
512 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
518 /* store register 'r' in lvalue 'v' */
519 void store(int r
, SValue
*v
)
523 /* store the REX prefix in this variable when PIC is enabled */
528 v
= pe_getimport(v
, &v2
);
533 fr
= v
->r
& VT_VALMASK
;
534 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
537 #ifndef TCC_TARGET_PE
538 /* we need to access the variable via got */
539 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
540 /* mov xx(%rip), %r11 */
542 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.i
);
543 pic
= is64_type(bt
) ? 0x49 : 0x41;
547 /* XXX: incorrect if float reg to reg */
548 if (bt
== VT_FLOAT
) {
551 o(0x7e0f); /* movd */
553 } else if (bt
== VT_DOUBLE
) {
556 o(0xd60f); /* movq */
558 } else if (bt
== VT_LDOUBLE
) {
559 o(0xc0d9); /* fld %st(0) */
567 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
569 else if (is64_type(bt
))
575 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
580 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
581 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
582 } else if (fr
!= r
) {
583 /* XXX: don't we really come here? */
585 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
588 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
589 gen_modrm(r
, v
->r
, v
->sym
, fc
);
590 } else if (fr
!= r
) {
591 /* XXX: don't we really come here? */
593 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
598 /* 'is_jmp' is '1' if it is a jump */
599 static void gcall_or_jmp(int is_jmp
)
602 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
&&
603 ((vtop
->r
& VT_SYM
) || (vtop
->c
.i
-4) == (int)(vtop
->c
.i
-4))) {
605 if (vtop
->r
& VT_SYM
) {
606 /* relocation case */
608 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PC32
, (int)(vtop
->c
.i
-4));
610 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PLT32
, (int)(vtop
->c
.i
-4));
613 /* put an empty PC32 relocation */
614 put_elf_reloca(symtab_section
, cur_text_section
,
615 ind
+ 1, R_X86_64_PC32
, 0, (int)(vtop
->c
.i
-4));
617 oad(0xe8 + is_jmp
, 0); /* call/jmp im */
619 /* otherwise, indirect call */
623 o(0xff); /* call/jmp *r */
624 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
628 #if defined(CONFIG_TCC_BCHECK)
629 #ifndef TCC_TARGET_PE
630 static addr_t func_bound_offset
;
631 static unsigned long func_bound_ind
;
634 static void gen_static_call(int v
)
636 Sym
*sym
= external_global_sym(v
, &func_old_type
, 0);
638 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
641 /* generate a bounded pointer addition */
642 ST_FUNC
void gen_bounded_ptr_add(void)
644 /* save all temporary registers */
647 /* prepare fast x86_64 function call */
649 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
653 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
656 /* do a fast function call */
657 gen_static_call(TOK___bound_ptr_add
);
659 /* returned pointer is in rax */
661 vtop
->r
= TREG_RAX
| VT_BOUNDED
;
664 /* relocation offset of the bounding function call point */
665 vtop
->c
.i
= (cur_text_section
->reloc
->data_offset
- sizeof(ElfW(Rela
)));
668 /* patch pointer addition in vtop so that pointer dereferencing is
670 ST_FUNC
void gen_bounded_ptr_deref(void)
678 /* XXX: put that code in generic part of tcc */
679 if (!is_float(vtop
->type
.t
)) {
680 if (vtop
->r
& VT_LVAL_BYTE
)
682 else if (vtop
->r
& VT_LVAL_SHORT
)
686 size
= type_size(&vtop
->type
, &align
);
688 case 1: func
= TOK___bound_ptr_indir1
; break;
689 case 2: func
= TOK___bound_ptr_indir2
; break;
690 case 4: func
= TOK___bound_ptr_indir4
; break;
691 case 8: func
= TOK___bound_ptr_indir8
; break;
692 case 12: func
= TOK___bound_ptr_indir12
; break;
693 case 16: func
= TOK___bound_ptr_indir16
; break;
695 tcc_error("unhandled size when dereferencing bounded pointer");
700 sym
= external_global_sym(func
, &func_old_type
, 0);
702 put_extern_sym(sym
, NULL
, 0, 0);
704 /* patch relocation */
705 /* XXX: find a better solution ? */
707 rel
= (ElfW(Rela
) *)(cur_text_section
->reloc
->data
+ vtop
->c
.i
);
708 rel
->r_info
= ELF64_R_INFO(sym
->c
, ELF64_R_TYPE(rel
->r_info
));
715 static const uint8_t arg_regs
[REGN
] = {
716 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
719 /* Prepare arguments in R10 and R11 rather than RCX and RDX
720 because gv() will not ever use these */
721 static int arg_prepare_reg(int idx
) {
722 if (idx
== 0 || idx
== 1)
723 /* idx=0: r10, idx=1: r11 */
726 return arg_regs
[idx
];
729 static int func_scratch
;
731 /* Generate function call. The function address is pushed first, then
732 all the parameters in call order. This functions pops all the
733 parameters and the function address. */
735 void gen_offs_sp(int b
, int r
, int d
)
737 orex(1,0,r
& 0x100 ? 0 : r
, b
);
739 o(0x2444 | (REG_VALUE(r
) << 3));
742 o(0x2484 | (REG_VALUE(r
) << 3));
747 /* Return the number of registers needed to return the struct, or 0 if
748 returning via struct pointer. */
749 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
753 *ret_align
= 1; // Never have to re-align return values for x86-64
754 size
= type_size(vt
, &align
);
758 } else if (size
> 4) {
761 } else if (size
> 2) {
764 } else if (size
> 1) {
773 static int is_sse_float(int t
) {
776 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
779 int gfunc_arg_size(CType
*type
) {
781 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
783 return type_size(type
, &align
);
786 void gfunc_call(int nb_args
)
788 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
791 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
794 /* for struct arguments, we need to call memcpy and the function
795 call breaks register passing arguments we are preparing.
796 So, we process arguments which will be passed by stack first. */
797 struct_size
= args_size
;
798 for(i
= 0; i
< nb_args
; i
++) {
803 bt
= (sv
->type
.t
& VT_BTYPE
);
804 size
= gfunc_arg_size(&sv
->type
);
807 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
809 if (bt
== VT_STRUCT
) {
810 /* align to stack align size */
811 size
= (size
+ 15) & ~15;
812 /* generate structure store */
814 gen_offs_sp(0x8d, r
, struct_size
);
817 /* generate memcpy call */
818 vset(&sv
->type
, r
| VT_LVAL
, 0);
822 } else if (bt
== VT_LDOUBLE
) {
824 gen_offs_sp(0xdb, 0x107, struct_size
);
829 if (func_scratch
< struct_size
)
830 func_scratch
= struct_size
;
833 struct_size
= args_size
;
835 for(i
= 0; i
< nb_args
; i
++) {
837 bt
= (vtop
->type
.t
& VT_BTYPE
);
839 size
= gfunc_arg_size(&vtop
->type
);
841 /* align to stack align size */
842 size
= (size
+ 15) & ~15;
845 gen_offs_sp(0x8d, d
, struct_size
);
846 gen_offs_sp(0x89, d
, arg
*8);
848 d
= arg_prepare_reg(arg
);
849 gen_offs_sp(0x8d, d
, struct_size
);
853 if (is_sse_float(vtop
->type
.t
)) {
854 gv(RC_XMM0
); /* only use one float register */
856 /* movq %xmm0, j*8(%rsp) */
857 gen_offs_sp(0xd60f66, 0x100, arg
*8);
859 /* movaps %xmm0, %xmmN */
861 o(0xc0 + (arg
<< 3));
862 d
= arg_prepare_reg(arg
);
863 /* mov %xmm0, %rxx */
866 o(0xc0 + REG_VALUE(d
));
869 if (bt
== VT_STRUCT
) {
870 vtop
->type
.ref
= NULL
;
871 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
872 : size
> 1 ? VT_SHORT
: VT_BYTE
;
877 gen_offs_sp(0x89, r
, arg
*8);
879 d
= arg_prepare_reg(arg
);
880 orex(1,d
,r
,0x89); /* mov */
881 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
889 /* Copy R10 and R11 into RCX and RDX, respectively */
891 o(0xd1894c); /* mov %r10, %rcx */
893 o(0xda894c); /* mov %r11, %rdx */
898 /* other compilers don't clear the upper bits when returning char/short */
899 bt
= vtop
->type
.ref
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
);
900 if (bt
== (VT_BYTE
| VT_UNSIGNED
))
901 o(0xc0b60f); /* movzbl %al, %eax */
902 else if (bt
== VT_BYTE
)
903 o(0xc0be0f); /* movsbl %al, %eax */
904 else if (bt
== VT_SHORT
)
906 else if (bt
== (VT_SHORT
| VT_UNSIGNED
))
907 o(0xc0b70f); /* movzbl %al, %eax */
908 #if 0 /* handled in gen_cast() */
909 else if (bt
== VT_INT
)
910 o(0x9848); /* cltq */
911 else if (bt
== (VT_INT
| VT_UNSIGNED
))
912 o(0xc089); /* mov %eax,%eax */
918 #define FUNC_PROLOG_SIZE 11
920 /* generate function prolog of type 't' */
921 void gfunc_prolog(CType
*func_type
)
923 int addr
, reg_param_index
, bt
, size
;
932 ind
+= FUNC_PROLOG_SIZE
;
933 func_sub_sp_offset
= ind
;
936 sym
= func_type
->ref
;
938 /* if the function returns a structure, then add an
939 implicit pointer parameter */
941 func_var
= (sym
->c
== FUNC_ELLIPSIS
);
942 size
= gfunc_arg_size(&func_vt
);
944 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
950 /* define parameters */
951 while ((sym
= sym
->next
) != NULL
) {
953 bt
= type
->t
& VT_BTYPE
;
954 size
= gfunc_arg_size(type
);
956 if (reg_param_index
< REGN
) {
957 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
959 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
961 if (reg_param_index
< REGN
) {
962 /* save arguments passed by register */
963 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
964 o(0xd60f66); /* movq */
965 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
967 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
970 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
976 while (reg_param_index
< REGN
) {
977 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
978 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
985 /* generate function epilog */
986 void gfunc_epilog(void)
991 if (func_ret_sub
== 0) {
996 g(func_ret_sub
>> 8);
1000 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1001 /* align local size to word & save local variables */
1002 v
= (func_scratch
+ -loc
+ 15) & -16;
1005 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
1006 oad(0xb8, v
); /* mov stacksize, %eax */
1007 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1008 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
1009 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1011 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1012 o(0xec8148); /* sub rsp, stacksize */
1016 cur_text_section
->data_offset
= saved_ind
;
1017 pe_add_unwind_data(ind
, saved_ind
, v
);
1018 ind
= cur_text_section
->data_offset
;
1023 static void gadd_sp(int val
)
1025 if (val
== (char)val
) {
1029 oad(0xc48148, val
); /* add $xxx, %rsp */
1033 typedef enum X86_64_Mode
{
1036 x86_64_mode_integer
,
1041 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
1045 else if (a
== x86_64_mode_none
)
1047 else if (b
== x86_64_mode_none
)
1049 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
1050 return x86_64_mode_memory
;
1051 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
1052 return x86_64_mode_integer
;
1053 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
1054 return x86_64_mode_memory
;
1056 return x86_64_mode_sse
;
1059 static X86_64_Mode
classify_x86_64_inner(CType
*ty
)
1064 switch (ty
->t
& VT_BTYPE
) {
1065 case VT_VOID
: return x86_64_mode_none
;
1074 case VT_ENUM
: return x86_64_mode_integer
;
1077 case VT_DOUBLE
: return x86_64_mode_sse
;
1079 case VT_LDOUBLE
: return x86_64_mode_x87
;
1084 mode
= x86_64_mode_none
;
1085 for (f
= f
->next
; f
; f
= f
->next
)
1086 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
1094 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
)
1097 int size
, align
, ret_t
= 0;
1099 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1104 mode
= x86_64_mode_integer
;
1106 size
= type_size(ty
, &align
);
1107 *psize
= (size
+ 7) & ~7;
1108 *palign
= (align
+ 7) & ~7;
1111 mode
= x86_64_mode_memory
;
1113 mode
= classify_x86_64_inner(ty
);
1115 case x86_64_mode_integer
:
1121 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1125 case x86_64_mode_x87
:
1130 case x86_64_mode_sse
:
1136 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1139 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1152 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1154 /* This definition must be synced with stdarg.h */
1155 enum __va_arg_type
{
1156 __va_gen_reg
, __va_float_reg
, __va_stack
1158 int size
, align
, reg_count
;
1159 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1161 default: return __va_stack
;
1162 case x86_64_mode_integer
: return __va_gen_reg
;
1163 case x86_64_mode_sse
: return __va_float_reg
;
1167 /* Return the number of registers needed to return the struct, or 0 if
1168 returning via struct pointer. */
1169 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
1171 int size
, align
, reg_count
;
1172 *ret_align
= 1; // Never have to re-align return values for x86-64
1174 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) != x86_64_mode_memory
);
1178 static const uint8_t arg_regs
[REGN
] = {
1179 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1182 static int arg_prepare_reg(int idx
) {
1183 if (idx
== 2 || idx
== 3)
1184 /* idx=2: r10, idx=3: r11 */
1187 return arg_regs
[idx
];
1190 /* Generate function call. The function address is pushed first, then
1191 all the parameters in call order. This functions pops all the
1192 parameters and the function address. */
1193 void gfunc_call(int nb_args
)
1197 int size
, align
, r
, args_size
, stack_adjust
, run_start
, run_end
, i
, reg_count
;
1198 int nb_reg_args
= 0;
1199 int nb_sse_args
= 0;
1200 int sse_reg
, gen_reg
;
1202 /* calculate the number of integer/float register arguments */
1203 for(i
= 0; i
< nb_args
; i
++) {
1204 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1205 if (mode
== x86_64_mode_sse
)
1206 nb_sse_args
+= reg_count
;
1207 else if (mode
== x86_64_mode_integer
)
1208 nb_reg_args
+= reg_count
;
1211 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1212 and ended by a 16-byte aligned argument. This is because, from the point of view of
1213 the callee, argument alignment is computed from the bottom up. */
1214 /* for struct arguments, we need to call memcpy and the function
1215 call breaks register passing arguments we are preparing.
1216 So, we process arguments which will be passed by stack first. */
1217 gen_reg
= nb_reg_args
;
1218 sse_reg
= nb_sse_args
;
1221 while (run_start
!= nb_args
) {
1222 int run_gen_reg
= gen_reg
, run_sse_reg
= sse_reg
;
1226 for(i
= run_start
; (i
< nb_args
) && (run_end
== nb_args
); i
++) {
1227 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1229 case x86_64_mode_memory
:
1230 case x86_64_mode_x87
:
1235 stack_adjust
+= size
;
1238 case x86_64_mode_sse
:
1239 sse_reg
-= reg_count
;
1240 if (sse_reg
+ reg_count
> 8) goto stack_arg
;
1243 case x86_64_mode_integer
:
1244 gen_reg
-= reg_count
;
1245 if (gen_reg
+ reg_count
> REGN
) goto stack_arg
;
1247 default: break; /* nothing to be done for x86_64_mode_none */
1251 gen_reg
= run_gen_reg
;
1252 sse_reg
= run_sse_reg
;
1254 /* adjust stack to align SSE boundary */
1255 if (stack_adjust
&= 15) {
1256 /* fetch cpu flag before the following sub will change the value */
1257 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1260 stack_adjust
= 16 - stack_adjust
;
1262 oad(0xec81, stack_adjust
); /* sub $xxx, %rsp */
1263 args_size
+= stack_adjust
;
1266 for(i
= run_start
; i
< run_end
;) {
1267 /* Swap argument to top, it will possibly be changed here,
1268 and might use more temps. At the end of the loop we keep
1269 in on the stack and swap it back to its original position
1270 if it is a register. */
1271 SValue tmp
= vtop
[0];
1276 mode
= classify_x86_64_arg(&vtop
->type
, NULL
, &size
, &align
, ®_count
);
1278 switch (vtop
->type
.t
& VT_BTYPE
) {
1280 if (mode
== x86_64_mode_sse
) {
1282 sse_reg
-= reg_count
;
1285 } else if (mode
== x86_64_mode_integer
) {
1287 gen_reg
-= reg_count
;
1293 /* allocate the necessary size on stack */
1295 oad(0xec81, size
); /* sub $xxx, %rsp */
1296 /* generate structure store */
1297 r
= get_reg(RC_INT
);
1298 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1299 o(0xe0 + REG_VALUE(r
));
1300 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1313 assert(mode
== x86_64_mode_sse
);
1317 o(0x50); /* push $rax */
1318 /* movq %xmmN, (%rsp) */
1320 o(0x04 + REG_VALUE(r
)*8);
1329 assert(mode
== x86_64_mode_integer
);
1331 /* XXX: implicit cast ? */
1332 if (gen_reg
> REGN
) {
1335 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1343 /* And swap the argument back to it's original position. */
1350 assert((vtop
->type
.t
== tmp
.type
.t
) && (vtop
->r
== tmp
.r
));
1359 /* handle 16 byte aligned arguments at end of run */
1360 run_start
= i
= run_end
;
1361 while (i
< nb_args
) {
1362 /* Rotate argument to top since it will always be popped */
1363 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1369 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1371 oad(0xec8148, size
); /* sub $xxx, %rsp */
1372 o(0x7cdb); /* fstpt 0(%rsp) */
1377 assert(mode
== x86_64_mode_memory
);
1379 /* allocate the necessary size on stack */
1381 oad(0xec81, size
); /* sub $xxx, %rsp */
1382 /* generate structure store */
1383 r
= get_reg(RC_INT
);
1384 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1385 o(0xe0 + REG_VALUE(r
));
1386 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1397 /* XXX This should be superfluous. */
1398 save_regs(0); /* save used temporary registers */
1400 /* then, we prepare register passing arguments.
1401 Note that we cannot set RDX and RCX in this loop because gv()
1402 may break these temporary registers. Let's use R10 and R11
1404 assert(gen_reg
<= REGN
);
1405 assert(sse_reg
<= 8);
1406 for(i
= 0; i
< nb_args
; i
++) {
1407 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1408 /* Alter stack entry type so that gv() knows how to treat it */
1410 if (mode
== x86_64_mode_sse
) {
1411 if (reg_count
== 2) {
1413 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1414 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1415 /* movaps %xmm0, %xmmN */
1417 o(0xc0 + (sse_reg
<< 3));
1418 /* movaps %xmm1, %xmmN */
1420 o(0xc1 + ((sse_reg
+1) << 3));
1423 assert(reg_count
== 1);
1425 /* Load directly to register */
1426 gv(RC_XMM0
<< sse_reg
);
1428 } else if (mode
== x86_64_mode_integer
) {
1430 /* XXX: implicit cast ? */
1432 gen_reg
-= reg_count
;
1434 d
= arg_prepare_reg(gen_reg
);
1435 orex(1,d
,r
,0x89); /* mov */
1436 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1437 if (reg_count
== 2) {
1438 d
= arg_prepare_reg(gen_reg
+1);
1439 orex(1,d
,vtop
->r2
,0x89); /* mov */
1440 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1445 assert(gen_reg
== 0);
1446 assert(sse_reg
== 0);
1448 /* We shouldn't have many operands on the stack anymore, but the
1449 call address itself is still there, and it might be in %eax
1450 (or edx/ecx) currently, which the below writes would clobber.
1451 So evict all remaining operands here. */
1454 /* Copy R10 and R11 into RDX and RCX, respectively */
1455 if (nb_reg_args
> 2) {
1456 o(0xd2894c); /* mov %r10, %rdx */
1457 if (nb_reg_args
> 3) {
1458 o(0xd9894c); /* mov %r11, %rcx */
1462 if (vtop
->type
.ref
->c
!= FUNC_NEW
) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1463 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1471 #define FUNC_PROLOG_SIZE 11
1473 static void push_arg_reg(int i
) {
1475 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1478 /* generate function prolog of type 't' */
1479 void gfunc_prolog(CType
*func_type
)
1482 int i
, addr
, align
, size
, reg_count
;
1483 int param_addr
= 0, reg_param_index
, sse_param_index
;
1487 sym
= func_type
->ref
;
1488 addr
= PTR_SIZE
* 2;
1490 ind
+= FUNC_PROLOG_SIZE
;
1491 func_sub_sp_offset
= ind
;
1494 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1495 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1496 seen_reg_num
= seen_sse_num
= 0;
1497 /* frame pointer and return address */
1498 seen_stack_size
= PTR_SIZE
* 2;
1499 /* count the number of seen parameters */
1500 sym
= func_type
->ref
;
1501 while ((sym
= sym
->next
) != NULL
) {
1503 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1507 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1510 case x86_64_mode_integer
:
1511 if (seen_reg_num
+ reg_count
<= 8) {
1512 seen_reg_num
+= reg_count
;
1519 case x86_64_mode_sse
:
1520 if (seen_sse_num
+ reg_count
<= 8) {
1521 seen_sse_num
+= reg_count
;
1531 /* movl $0x????????, -0x10(%rbp) */
1533 gen_le32(seen_reg_num
* 8);
1534 /* movl $0x????????, -0xc(%rbp) */
1536 gen_le32(seen_sse_num
* 16 + 48);
1537 /* movl $0x????????, -0x8(%rbp) */
1539 gen_le32(seen_stack_size
);
1541 /* save all register passing arguments */
1542 for (i
= 0; i
< 8; i
++) {
1544 o(0xd60f66); /* movq */
1545 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1546 /* movq $0, loc+8(%rbp) */
1551 for (i
= 0; i
< REGN
; i
++) {
1552 push_arg_reg(REGN
-1-i
);
1556 sym
= func_type
->ref
;
1557 reg_param_index
= 0;
1558 sse_param_index
= 0;
1560 /* if the function returns a structure, then add an
1561 implicit pointer parameter */
1562 func_vt
= sym
->type
;
1563 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1564 if (mode
== x86_64_mode_memory
) {
1565 push_arg_reg(reg_param_index
);
1569 /* define parameters */
1570 while ((sym
= sym
->next
) != NULL
) {
1572 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1574 case x86_64_mode_sse
:
1575 if (sse_param_index
+ reg_count
<= 8) {
1576 /* save arguments passed by register */
1577 loc
-= reg_count
* 8;
1579 for (i
= 0; i
< reg_count
; ++i
) {
1580 o(0xd60f66); /* movq */
1581 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1585 addr
= (addr
+ align
- 1) & -align
;
1591 case x86_64_mode_memory
:
1592 case x86_64_mode_x87
:
1593 addr
= (addr
+ align
- 1) & -align
;
1598 case x86_64_mode_integer
: {
1599 if (reg_param_index
+ reg_count
<= REGN
) {
1600 /* save arguments passed by register */
1601 loc
-= reg_count
* 8;
1603 for (i
= 0; i
< reg_count
; ++i
) {
1604 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1608 addr
= (addr
+ align
- 1) & -align
;
1614 default: break; /* nothing to be done for x86_64_mode_none */
1616 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1617 VT_LOCAL
| VT_LVAL
, param_addr
);
1620 #ifdef CONFIG_TCC_BCHECK
1621 /* leave some room for bound checking code */
1622 if (tcc_state
->do_bounds_check
) {
1623 func_bound_offset
= lbounds_section
->data_offset
;
1624 func_bound_ind
= ind
;
1625 oad(0xb8, 0); /* lbound section pointer */
1626 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1627 oad(0xb8, 0); /* call to function */
1632 /* generate function epilog */
1633 void gfunc_epilog(void)
1637 #ifdef CONFIG_TCC_BCHECK
1638 if (tcc_state
->do_bounds_check
1639 && func_bound_offset
!= lbounds_section
->data_offset
)
1645 /* add end of table info */
1646 bounds_ptr
= section_ptr_add(lbounds_section
, sizeof(addr_t
));
1649 /* generate bound local allocation */
1650 sym_data
= get_sym_ref(&char_pointer_type
, lbounds_section
,
1651 func_bound_offset
, lbounds_section
->data_offset
);
1653 ind
= func_bound_ind
;
1654 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1656 gen_static_call(TOK___bound_local_new
);
1659 /* generate bound check local freeing */
1660 o(0x5250); /* save returned value, if any */
1661 greloc(cur_text_section
, sym_data
, ind
+ 1, R_386_32
);
1662 oad(0xb8, 0); /* mov xxx, %rax */
1663 o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
1664 gen_static_call(TOK___bound_local_delete
);
1665 o(0x585a); /* restore returned value, if any */
1668 o(0xc9); /* leave */
1669 if (func_ret_sub
== 0) {
1672 o(0xc2); /* ret n */
1674 g(func_ret_sub
>> 8);
1676 /* align local size to word & save local variables */
1677 v
= (-loc
+ 15) & -16;
1679 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1680 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1681 o(0xec8148); /* sub rsp, stacksize */
1688 /* generate a jump to a label */
1691 return psym(0xe9, t
);
1694 /* generate a jump to a fixed address */
1695 void gjmp_addr(int a
)
1703 oad(0xe9, a
- ind
- 5);
1707 ST_FUNC
void gtst_addr(int inv
, int a
)
1709 inv
^= (vtop
--)->c
.i
;
1716 oad(inv
- 16, a
- 4);
1720 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1721 ST_FUNC
int gtst(int inv
, int t
)
1723 int v
= vtop
->r
& VT_VALMASK
;
1725 /* fast case : can jump directly since flags are set */
1726 if (vtop
->c
.i
& 0x100)
1728 /* This was a float compare. If the parity flag is set
1729 the result was unordered. For anything except != this
1730 means false and we don't jump (anding both conditions).
1731 For != this means true (oring both).
1732 Take care about inverting the test. We need to jump
1733 to our target if the result was unordered and test wasn't NE,
1734 otherwise if unordered we don't want to jump. */
1735 vtop
->c
.i
&= ~0x100;
1736 if (inv
== (vtop
->c
.i
== TOK_NE
))
1737 o(0x067a); /* jp +6 */
1741 t
= psym(0x8a, t
); /* jp t */
1745 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1746 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1747 /* && or || optimization */
1748 if ((v
& 1) == inv
) {
1749 /* insert vtop->c jump list in t */
1750 uint32_t n1
, n
= vtop
->c
.i
;
1752 while ((n1
= read32le(cur_text_section
->data
+ n
)))
1754 write32le(cur_text_section
->data
+ n
, t
);
1766 /* generate an integer binary operation */
1767 void gen_opi(int op
)
1772 ll
= is64_type(vtop
[-1].type
.t
);
1773 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1774 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1778 case TOK_ADDC1
: /* add with carry generation */
1781 if (cc
&& (!ll
|| (int)vtop
->c
.i
== vtop
->c
.i
)) {
1788 /* XXX: generate inc and dec for smaller code ? */
1789 orex(ll
, r
, 0, 0x83);
1790 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1793 orex(ll
, r
, 0, 0x81);
1794 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1797 gv2(RC_INT
, RC_INT
);
1800 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1801 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1804 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1810 case TOK_SUBC1
: /* sub with carry generation */
1813 case TOK_ADDC2
: /* add with carry use */
1816 case TOK_SUBC2
: /* sub with carry use */
1829 gv2(RC_INT
, RC_INT
);
1832 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1833 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1845 opc
= 0xc0 | (opc
<< 3);
1851 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1852 o(opc
| REG_VALUE(r
));
1853 g(vtop
->c
.i
& (ll
? 63 : 31));
1855 /* we generate the shift in ecx */
1856 gv2(RC_INT
, RC_RCX
);
1858 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1859 o(opc
| REG_VALUE(r
));
1872 /* first operand must be in eax */
1873 /* XXX: need better constraint for second operand */
1874 gv2(RC_RAX
, RC_RCX
);
1879 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1880 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1881 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1882 if (op
== '%' || op
== TOK_UMOD
)
1894 void gen_opl(int op
)
1899 /* generate a floating point operation 'v = t1 op t2' instruction. The
1900 two operands are guaranted to have the same floating point type */
1901 /* XXX: need to use ST1 too */
1902 void gen_opf(int op
)
1904 int a
, ft
, fc
, swapped
, r
;
1906 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1908 /* convert constants to memory references */
1909 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1914 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1917 /* must put at least one value in the floating point register */
1918 if ((vtop
[-1].r
& VT_LVAL
) &&
1919 (vtop
[0].r
& VT_LVAL
)) {
1925 /* swap the stack if needed so that t1 is the register and t2 is
1926 the memory reference */
1927 if (vtop
[-1].r
& VT_LVAL
) {
1931 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1932 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1933 /* load on stack second operand */
1934 load(TREG_ST0
, vtop
);
1935 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1936 if (op
== TOK_GE
|| op
== TOK_GT
)
1938 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1941 o(0xc9d9); /* fxch %st(1) */
1942 if (op
== TOK_EQ
|| op
== TOK_NE
)
1943 o(0xe9da); /* fucompp */
1945 o(0xd9de); /* fcompp */
1946 o(0xe0df); /* fnstsw %ax */
1948 o(0x45e480); /* and $0x45, %ah */
1949 o(0x40fC80); /* cmp $0x40, %ah */
1950 } else if (op
== TOK_NE
) {
1951 o(0x45e480); /* and $0x45, %ah */
1952 o(0x40f480); /* xor $0x40, %ah */
1954 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1955 o(0x05c4f6); /* test $0x05, %ah */
1958 o(0x45c4f6); /* test $0x45, %ah */
1965 /* no memory reference possible for long double operations */
1966 load(TREG_ST0
, vtop
);
1990 o(0xde); /* fxxxp %st, %st(1) */
1995 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1996 /* if saved lvalue, then we must reload it */
1999 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
2001 r
= get_reg(RC_INT
);
2003 v1
.r
= VT_LOCAL
| VT_LVAL
;
2009 if (op
== TOK_EQ
|| op
== TOK_NE
) {
2012 if (op
== TOK_LE
|| op
== TOK_LT
)
2014 if (op
== TOK_LE
|| op
== TOK_GE
) {
2015 op
= 0x93; /* setae */
2017 op
= 0x97; /* seta */
2025 assert(!(vtop
[-1].r
& VT_LVAL
));
2027 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
2029 if (op
== TOK_EQ
|| op
== TOK_NE
)
2030 o(0x2e0f); /* ucomisd */
2032 o(0x2f0f); /* comisd */
2034 if (vtop
->r
& VT_LVAL
) {
2035 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2037 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2042 vtop
->c
.i
= op
| 0x100;
2044 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
2062 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
2065 /* if saved lvalue, then we must reload it */
2066 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
2068 r
= get_reg(RC_INT
);
2070 v1
.r
= VT_LOCAL
| VT_LVAL
;
2076 assert(!(vtop
[-1].r
& VT_LVAL
));
2078 assert(vtop
->r
& VT_LVAL
);
2083 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
2091 if (vtop
->r
& VT_LVAL
) {
2092 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2094 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2102 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2103 and 'long long' cases. */
2104 void gen_cvt_itof(int t
)
2106 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
2109 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2110 /* signed long long to float/double/long double (unsigned case
2111 is handled generically) */
2112 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2113 o(0x242cdf); /* fildll (%rsp) */
2114 o(0x08c48348); /* add $8, %rsp */
2115 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2116 (VT_INT
| VT_UNSIGNED
)) {
2117 /* unsigned int to float/double/long double */
2118 o(0x6a); /* push $0 */
2120 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2121 o(0x242cdf); /* fildll (%rsp) */
2122 o(0x10c48348); /* add $16, %rsp */
2124 /* int to float/double/long double */
2125 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2126 o(0x2404db); /* fildl (%rsp) */
2127 o(0x08c48348); /* add $8, %rsp */
2131 int r
= get_reg(RC_FLOAT
);
2133 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
2134 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2135 (VT_INT
| VT_UNSIGNED
) ||
2136 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2140 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
2145 /* convert from one floating point type to another */
2146 void gen_cvt_ftof(int t
)
2154 if (bt
== VT_FLOAT
) {
2156 if (tbt
== VT_DOUBLE
) {
2157 o(0x140f); /* unpcklps */
2158 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2159 o(0x5a0f); /* cvtps2pd */
2160 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2161 } else if (tbt
== VT_LDOUBLE
) {
2163 /* movss %xmm0,-0x10(%rsp) */
2165 o(0x44 + REG_VALUE(vtop
->r
)*8);
2167 o(0xf02444d9); /* flds -0x10(%rsp) */
2170 } else if (bt
== VT_DOUBLE
) {
2172 if (tbt
== VT_FLOAT
) {
2173 o(0x140f66); /* unpcklpd */
2174 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2175 o(0x5a0f66); /* cvtpd2ps */
2176 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2177 } else if (tbt
== VT_LDOUBLE
) {
2179 /* movsd %xmm0,-0x10(%rsp) */
2181 o(0x44 + REG_VALUE(vtop
->r
)*8);
2183 o(0xf02444dd); /* fldl -0x10(%rsp) */
2189 r
= get_reg(RC_FLOAT
);
2190 if (tbt
== VT_DOUBLE
) {
2191 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2192 /* movsd -0x10(%rsp),%xmm0 */
2194 o(0x44 + REG_VALUE(r
)*8);
2197 } else if (tbt
== VT_FLOAT
) {
2198 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2199 /* movss -0x10(%rsp),%xmm0 */
2201 o(0x44 + REG_VALUE(r
)*8);
2208 /* convert fp to int 't' type */
2209 void gen_cvt_ftoi(int t
)
2211 int ft
, bt
, size
, r
;
2214 if (bt
== VT_LDOUBLE
) {
2215 gen_cvt_ftof(VT_DOUBLE
);
2225 r
= get_reg(RC_INT
);
2226 if (bt
== VT_FLOAT
) {
2228 } else if (bt
== VT_DOUBLE
) {
2233 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2234 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2238 /* computed goto support */
2245 /* Save the stack pointer onto the stack and return the location of its address */
2246 ST_FUNC
void gen_vla_sp_save(int addr
) {
2247 /* mov %rsp,addr(%rbp)*/
2248 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2251 /* Restore the SP from a location on the stack */
2252 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2253 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2256 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2257 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2258 #ifdef TCC_TARGET_PE
2259 /* alloca does more than just adjust %rsp on Windows */
2260 vpush_global_sym(&func_old_type
, TOK_alloca
);
2261 vswap(); /* Move alloca ref past allocation size */
2265 r
= gv(RC_INT
); /* allocation size */
2268 o(0xe0 | REG_VALUE(r
));
2269 /* We align to 16 bytes rather than align */
2277 /* end of x86-64 code generator */
2278 /*************************************************************/
2279 #endif /* ! TARGET_DEFS_ONLY */
2280 /******************************************************/