2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0080 /* only for long double */
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
104 /******************************************************/
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x08048000
116 #define ELF_PAGE_SIZE 0x1000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
124 ST_DATA
const int reg_classes
[NB_REGS
] = {
125 /* eax */ RC_INT
| RC_RAX
,
126 /* ecx */ RC_INT
| RC_RCX
,
127 /* edx */ RC_INT
| RC_RDX
,
141 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
142 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
143 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
144 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
145 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
146 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
155 static unsigned long func_sub_sp_offset
;
156 static int func_ret_sub
;
158 /* XXX: make it faster ? */
163 if (ind1
> cur_text_section
->data_allocated
)
164 section_realloc(cur_text_section
, ind1
);
165 cur_text_section
->data
[ind
] = c
;
169 void o(unsigned int c
)
191 void gen_le64(int64_t c
)
203 void orex(int ll
, int r
, int r2
, int b
)
205 if ((r
& VT_VALMASK
) >= VT_CONST
)
207 if ((r2
& VT_VALMASK
) >= VT_CONST
)
209 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
210 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
214 /* output a symbol and patch all calls to it */
215 void gsym_addr(int t
, int a
)
219 ptr
= (int *)(cur_text_section
->data
+ t
);
220 n
= *ptr
; /* next value */
231 /* psym is used to put an instruction with a data field which is a
232 reference to a symbol. It is in fact the same as oad ! */
235 static int is64_type(int t
)
237 return ((t
& VT_BTYPE
) == VT_PTR
||
238 (t
& VT_BTYPE
) == VT_FUNC
||
239 (t
& VT_BTYPE
) == VT_LLONG
);
242 static int is_sse_float(int t
) {
245 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
249 /* instruction + 4 bytes data. Return the address of the data */
250 ST_FUNC
int oad(int c
, int s
)
256 if (ind1
> cur_text_section
->data_allocated
)
257 section_realloc(cur_text_section
, ind1
);
258 *(int *)(cur_text_section
->data
+ ind
) = s
;
264 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
267 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
271 /* output constant with relocation if 'r & VT_SYM' is true */
272 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
275 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
279 /* output constant with relocation if 'r & VT_SYM' is true */
280 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
283 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
287 /* output got address with relocation */
288 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
290 #ifndef TCC_TARGET_PE
293 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
294 sr
= cur_text_section
->reloc
;
295 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
298 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
299 cur_text_section
->data
[ind
-3],
300 cur_text_section
->data
[ind
-2],
301 cur_text_section
->data
[ind
-1]
303 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
307 /* we use add c, %xxx for displacement */
309 o(0xc0 + REG_VALUE(r
));
314 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
316 op_reg
= REG_VALUE(op_reg
) << 3;
317 if ((r
& VT_VALMASK
) == VT_CONST
) {
318 /* constant memory reference */
321 gen_gotpcrel(r
, sym
, c
);
323 gen_addrpc32(r
, sym
, c
);
325 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
326 /* currently, we use only ebp as base */
328 /* short reference */
332 oad(0x85 | op_reg
, c
);
334 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
336 g(0x80 | op_reg
| REG_VALUE(r
));
339 g(0x00 | op_reg
| REG_VALUE(r
));
342 g(0x00 | op_reg
| REG_VALUE(r
));
346 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
348 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
350 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
353 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
355 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
358 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
359 orex(1, r
, op_reg
, opcode
);
360 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
364 /* load 'r' from value 'sv' */
365 void load(int r
, SValue
*sv
)
367 int v
, t
, ft
, fc
, fr
;
372 sv
= pe_getimport(sv
, &v2
);
379 #ifndef TCC_TARGET_PE
380 /* we use indirect access via got */
381 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
382 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
383 /* use the result register as a temporal register */
384 int tr
= r
| TREG_MEM
;
386 /* we cannot use float registers as a temporal register */
387 tr
= get_reg(RC_INT
) | TREG_MEM
;
389 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
391 /* load from the temporal register */
399 if (v
== VT_LLOCAL
) {
401 v1
.r
= VT_LOCAL
| VT_LVAL
;
404 if (!(reg_classes
[fr
] & RC_INT
))
405 fr
= get_reg(RC_INT
);
409 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
411 r
= REG_VALUE(r
); /* movd */
412 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
413 b
= 0x7e0ff3; /* movq */
415 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
416 b
= 0xdb, r
= 5; /* fldt */
417 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
418 b
= 0xbe0f; /* movsbl */
419 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
420 b
= 0xb60f; /* movzbl */
421 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
422 b
= 0xbf0f; /* movswl */
423 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
424 b
= 0xb70f; /* movzwl */
426 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
427 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
428 || ((ft
& VT_BTYPE
) == VT_FUNC
));
433 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
436 gen_modrm(r
, fr
, sv
->sym
, fc
);
443 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
444 gen_addrpc32(fr
, sv
->sym
, fc
);
446 if (sv
->sym
->type
.t
& VT_STATIC
) {
448 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
449 gen_addrpc32(fr
, sv
->sym
, fc
);
452 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
453 gen_gotpcrel(r
, sv
->sym
, fc
);
456 } else if (is64_type(ft
)) {
457 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
460 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
463 } else if (v
== VT_LOCAL
) {
464 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
465 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
466 } else if (v
== VT_CMP
) {
468 if ((fc
& ~0x100) != TOK_NE
)
469 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
471 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
474 /* This was a float compare. If the parity bit is
475 set the result was unordered, meaning false for everything
476 except TOK_NE, and true for TOK_NE. */
478 o(0x037a + (REX_BASE(r
) << 8));
480 orex(0,r
,0, 0x0f); /* setxx %br */
482 o(0xc0 + REG_VALUE(r
));
483 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
486 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
487 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
490 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
492 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
494 /* gen_cvt_ftof(VT_DOUBLE); */
495 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
496 /* movsd -0x10(%rsp),%xmmN */
498 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
501 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
502 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
505 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
508 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
510 } else if (r
== TREG_ST0
) {
511 assert((v
>= TREG_XMM0
) || (v
<= TREG_XMM7
));
512 /* gen_cvt_ftof(VT_LDOUBLE); */
513 /* movsd %xmmN,-0x10(%rsp) */
515 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
517 o(0xf02444dd); /* fldl -0x10(%rsp) */
520 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
526 /* store register 'r' in lvalue 'v' */
527 void store(int r
, SValue
*v
)
531 /* store the REX prefix in this variable when PIC is enabled */
536 v
= pe_getimport(v
, &v2
);
541 fr
= v
->r
& VT_VALMASK
;
544 #ifndef TCC_TARGET_PE
545 /* we need to access the variable via got */
546 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
547 /* mov xx(%rip), %r11 */
549 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
550 pic
= is64_type(bt
) ? 0x49 : 0x41;
554 /* XXX: incorrect if float reg to reg */
555 if (bt
== VT_FLOAT
) {
558 o(0x7e0f); /* movd */
560 } else if (bt
== VT_DOUBLE
) {
563 o(0xd60f); /* movq */
565 } else if (bt
== VT_LDOUBLE
) {
566 o(0xc0d9); /* fld %st(0) */
574 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
576 else if (is64_type(bt
))
582 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
587 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
588 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
589 } else if (fr
!= r
) {
590 /* XXX: don't we really come here? */
592 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
595 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
596 gen_modrm(r
, v
->r
, v
->sym
, fc
);
597 } else if (fr
!= r
) {
598 /* XXX: don't we really come here? */
600 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
605 /* 'is_jmp' is '1' if it is a jump */
606 static void gcall_or_jmp(int is_jmp
)
609 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
611 if (vtop
->r
& VT_SYM
) {
612 /* relocation case */
613 greloc(cur_text_section
, vtop
->sym
,
614 ind
+ 1, R_X86_64_PC32
);
616 /* put an empty PC32 relocation */
617 put_elf_reloc(symtab_section
, cur_text_section
,
618 ind
+ 1, R_X86_64_PC32
, 0);
620 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
622 /* otherwise, indirect call */
626 o(0xff); /* call/jmp *r */
627 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
634 static const uint8_t arg_regs
[REGN
] = {
635 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
638 /* Prepare arguments in R10 and R11 rather than RCX and RDX
639 because gv() will not ever use these */
640 static int arg_prepare_reg(int idx
) {
641 if (idx
== 0 || idx
== 1)
642 /* idx=0: r10, idx=1: r11 */
645 return arg_regs
[idx
];
648 static int func_scratch
;
650 /* Generate function call. The function address is pushed first, then
651 all the parameters in call order. This functions pops all the
652 parameters and the function address. */
654 void gen_offs_sp(int b
, int r
, int d
)
656 orex(1,0,r
& 0x100 ? 0 : r
, b
);
658 o(0x2444 | (REG_VALUE(r
) << 3));
661 o(0x2484 | (REG_VALUE(r
) << 3));
666 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
667 ST_FUNC
int gfunc_sret(CType
*vt
, CType
*ret
, int *ret_align
)
670 *ret_align
= 1; // Never have to re-align return values for x86-64
671 size
= type_size(vt
, &align
);
675 } else if (size
> 4) {
678 } else if (size
> 2) {
681 } else if (size
> 1) {
690 int gfunc_arg_size(CType
*type
) {
692 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
694 return type_size(type
, &align
);
697 void gfunc_call(int nb_args
)
699 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
702 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
705 /* for struct arguments, we need to call memcpy and the function
706 call breaks register passing arguments we are preparing.
707 So, we process arguments which will be passed by stack first. */
708 struct_size
= args_size
;
709 for(i
= 0; i
< nb_args
; i
++) {
714 bt
= (sv
->type
.t
& VT_BTYPE
);
715 size
= gfunc_arg_size(&sv
->type
);
718 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
720 if (bt
== VT_STRUCT
) {
721 /* align to stack align size */
722 size
= (size
+ 15) & ~15;
723 /* generate structure store */
725 gen_offs_sp(0x8d, r
, struct_size
);
728 /* generate memcpy call */
729 vset(&sv
->type
, r
| VT_LVAL
, 0);
733 } else if (bt
== VT_LDOUBLE
) {
735 gen_offs_sp(0xdb, 0x107, struct_size
);
740 if (func_scratch
< struct_size
)
741 func_scratch
= struct_size
;
744 struct_size
= args_size
;
746 for(i
= 0; i
< nb_args
; i
++) {
748 bt
= (vtop
->type
.t
& VT_BTYPE
);
750 size
= gfunc_arg_size(&vtop
->type
);
752 /* align to stack align size */
753 size
= (size
+ 15) & ~15;
756 gen_offs_sp(0x8d, d
, struct_size
);
757 gen_offs_sp(0x89, d
, arg
*8);
759 d
= arg_prepare_reg(arg
);
760 gen_offs_sp(0x8d, d
, struct_size
);
764 if (is_sse_float(vtop
->type
.t
)) {
765 gv(RC_XMM0
); /* only use one float register */
767 /* movq %xmm0, j*8(%rsp) */
768 gen_offs_sp(0xd60f66, 0x100, arg
*8);
770 /* movaps %xmm0, %xmmN */
772 o(0xc0 + (arg
<< 3));
773 d
= arg_prepare_reg(arg
);
774 /* mov %xmm0, %rxx */
777 o(0xc0 + REG_VALUE(d
));
780 if (bt
== VT_STRUCT
) {
781 vtop
->type
.ref
= NULL
;
782 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
783 : size
> 1 ? VT_SHORT
: VT_BYTE
;
788 gen_offs_sp(0x89, r
, arg
*8);
790 d
= arg_prepare_reg(arg
);
791 orex(1,d
,r
,0x89); /* mov */
792 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
800 /* Copy R10 and R11 into RCX and RDX, respectively */
802 o(0xd1894c); /* mov %r10, %rcx */
804 o(0xda894c); /* mov %r11, %rdx */
813 #define FUNC_PROLOG_SIZE 11
815 /* generate function prolog of type 't' */
816 void gfunc_prolog(CType
*func_type
)
818 int addr
, reg_param_index
, bt
, size
;
827 ind
+= FUNC_PROLOG_SIZE
;
828 func_sub_sp_offset
= ind
;
831 sym
= func_type
->ref
;
833 /* if the function returns a structure, then add an
834 implicit pointer parameter */
836 size
= gfunc_arg_size(&func_vt
);
838 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
844 /* define parameters */
845 while ((sym
= sym
->next
) != NULL
) {
847 bt
= type
->t
& VT_BTYPE
;
848 size
= gfunc_arg_size(type
);
850 if (reg_param_index
< REGN
) {
851 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
853 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
855 if (reg_param_index
< REGN
) {
856 /* save arguments passed by register */
857 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
858 o(0xd60f66); /* movq */
859 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
861 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
864 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
870 while (reg_param_index
< REGN
) {
871 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
872 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
879 /* generate function epilog */
880 void gfunc_epilog(void)
885 if (func_ret_sub
== 0) {
890 g(func_ret_sub
>> 8);
894 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
895 /* align local size to word & save local variables */
896 v
= (func_scratch
+ -loc
+ 15) & -16;
899 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
900 oad(0xb8, v
); /* mov stacksize, %eax */
901 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
902 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
903 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
905 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
906 o(0xec8148); /* sub rsp, stacksize */
910 cur_text_section
->data_offset
= saved_ind
;
911 pe_add_unwind_data(ind
, saved_ind
, v
);
912 ind
= cur_text_section
->data_offset
;
917 static void gadd_sp(int val
)
919 if (val
== (char)val
) {
923 oad(0xc48148, val
); /* add $xxx, %rsp */
927 typedef enum X86_64_Mode
{
935 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
) {
938 else if (a
== x86_64_mode_none
)
940 else if (b
== x86_64_mode_none
)
942 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
943 return x86_64_mode_memory
;
944 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
945 return x86_64_mode_integer
;
946 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
947 return x86_64_mode_memory
;
949 return x86_64_mode_sse
;
952 static X86_64_Mode
classify_x86_64_inner(CType
*ty
) {
956 switch (ty
->t
& VT_BTYPE
) {
957 case VT_VOID
: return x86_64_mode_none
;
966 case VT_ENUM
: return x86_64_mode_integer
;
969 case VT_DOUBLE
: return x86_64_mode_sse
;
971 case VT_LDOUBLE
: return x86_64_mode_x87
;
977 if (f
->next
&& (f
->c
== f
->next
->c
))
978 return x86_64_mode_memory
;
980 mode
= x86_64_mode_none
;
981 for (; f
; f
= f
->next
)
982 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
990 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
) {
992 int size
, align
, ret_t
;
994 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
998 mode
= x86_64_mode_integer
;
1000 size
= type_size(ty
, &align
);
1001 *psize
= (size
+ 7) & ~7;
1002 *palign
= (align
+ 7) & ~7;
1005 mode
= x86_64_mode_memory
;
1007 mode
= classify_x86_64_inner(ty
);
1009 case x86_64_mode_integer
:
1015 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1019 case x86_64_mode_x87
:
1024 case x86_64_mode_sse
:
1030 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1045 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
) {
1046 /* This definition must be synced with stdarg.h */
1047 enum __va_arg_type
{
1048 __va_gen_reg
, __va_float_reg
, __va_stack
1050 int size
, align
, reg_count
;
1051 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1053 default: return __va_stack
;
1054 case x86_64_mode_integer
: return __va_gen_reg
;
1055 case x86_64_mode_sse
: return __va_float_reg
;
1059 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1060 int gfunc_sret(CType
*vt
, CType
*ret
, int *ret_align
) {
1061 int size
, align
, reg_count
;
1062 *ret_align
= 1; // Never have to re-align return values for x86-64
1063 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) == x86_64_mode_memory
);
1067 static const uint8_t arg_regs
[REGN
] = {
1068 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1071 static int arg_prepare_reg(int idx
) {
1072 if (idx
== 2 || idx
== 3)
1073 /* idx=2: r10, idx=3: r11 */
1076 return arg_regs
[idx
];
1079 /* Generate function call. The function address is pushed first, then
1080 all the parameters in call order. This functions pops all the
1081 parameters and the function address. */
1082 void gfunc_call(int nb_args
)
1086 int size
, align
, r
, args_size
, stack_adjust
, run_start
, run_end
, i
, j
, reg_count
;
1087 int nb_reg_args
= 0;
1088 int nb_sse_args
= 0;
1089 int sse_reg
, gen_reg
;
1091 /* calculate the number of integer/float register arguments */
1092 for(i
= 0; i
< nb_args
; i
++) {
1093 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1094 if (mode
== x86_64_mode_sse
)
1095 nb_sse_args
+= reg_count
;
1096 else if (mode
== x86_64_mode_integer
)
1097 nb_reg_args
+= reg_count
;
1100 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1101 and ended by a 16-byte aligned argument. This is because, from the point of view of
1102 the callee, argument alignment is computed from the bottom up. */
1103 /* for struct arguments, we need to call memcpy and the function
1104 call breaks register passing arguments we are preparing.
1105 So, we process arguments which will be passed by stack first. */
1106 gen_reg
= nb_reg_args
;
1107 sse_reg
= nb_sse_args
;
1110 while (run_start
!= nb_args
) {
1111 int run_gen_reg
= gen_reg
, run_sse_reg
= sse_reg
;
1115 for(i
= run_start
; (i
< nb_args
) && (run_end
== nb_args
); i
++) {
1116 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1118 case x86_64_mode_memory
:
1119 case x86_64_mode_x87
:
1124 stack_adjust
+= size
;
1127 case x86_64_mode_sse
:
1128 sse_reg
-= reg_count
;
1129 if (sse_reg
+ reg_count
> 8) goto stack_arg
;
1132 case x86_64_mode_integer
:
1133 gen_reg
-= reg_count
;
1134 if (gen_reg
+ reg_count
> REGN
) goto stack_arg
;
1139 gen_reg
= run_gen_reg
;
1140 sse_reg
= run_sse_reg
;
1142 /* adjust stack to align SSE boundary */
1143 if (stack_adjust
&= 15) {
1144 /* fetch cpu flag before the following sub will change the value */
1145 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1148 stack_adjust
= 16 - stack_adjust
;
1150 oad(0xec81, stack_adjust
); /* sub $xxx, %rsp */
1151 args_size
+= stack_adjust
;
1154 for(i
= run_start
; i
< run_end
;) {
1155 /* Swap argument to top, it will possibly be changed here,
1156 and might use more temps. At the end of the loop we keep
1157 in on the stack and swap it back to its original position
1158 if it is a register. */
1159 SValue tmp
= vtop
[0];
1163 mode
= classify_x86_64_arg(&vtop
->type
, NULL
, &size
, &align
, ®_count
);
1166 switch (vtop
->type
.t
& VT_BTYPE
) {
1168 if (mode
== x86_64_mode_sse
) {
1170 sse_reg
-= reg_count
;
1173 } else if (mode
== x86_64_mode_integer
) {
1175 gen_reg
-= reg_count
;
1181 /* allocate the necessary size on stack */
1183 oad(0xec81, size
); /* sub $xxx, %rsp */
1184 /* generate structure store */
1185 r
= get_reg(RC_INT
);
1186 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1187 o(0xe0 + REG_VALUE(r
));
1188 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1201 assert(mode
== x86_64_mode_sse
);
1205 o(0x50); /* push $rax */
1206 /* movq %xmmN, (%rsp) */
1208 o(0x04 + REG_VALUE(r
)*8);
1217 assert(mode
== x86_64_mode_integer
);
1219 /* XXX: implicit cast ? */
1220 if (gen_reg
> REGN
) {
1223 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1231 /* And swap the argument back to it's original position. */
1238 assert((vtop
->type
.t
== tmp
.type
.t
) && (vtop
->r
== tmp
.r
));
1247 /* handle 16 byte aligned arguments at end of run */
1248 run_start
= i
= run_end
;
1249 while (i
< nb_args
) {
1250 /* Rotate argument to top since it will always be popped */
1251 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1257 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1259 oad(0xec8148, size
); /* sub $xxx, %rsp */
1260 o(0x7cdb); /* fstpt 0(%rsp) */
1265 assert(mode
== x86_64_mode_memory
);
1267 /* allocate the necessary size on stack */
1269 oad(0xec81, size
); /* sub $xxx, %rsp */
1270 /* generate structure store */
1271 r
= get_reg(RC_INT
);
1272 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1273 o(0xe0 + REG_VALUE(r
));
1274 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1285 /* XXX This should be superfluous. */
1286 save_regs(0); /* save used temporary registers */
1288 /* then, we prepare register passing arguments.
1289 Note that we cannot set RDX and RCX in this loop because gv()
1290 may break these temporary registers. Let's use R10 and R11
1292 assert(gen_reg
<= REGN
);
1293 assert(sse_reg
<= 8);
1294 for(i
= 0; i
< nb_args
; i
++) {
1295 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1296 /* Alter stack entry type so that gv() knows how to treat it */
1298 if (mode
== x86_64_mode_sse
) {
1299 if (reg_count
== 2) {
1301 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1302 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1303 /* movaps %xmm0, %xmmN */
1305 o(0xc0 + (sse_reg
<< 3));
1306 /* movaps %xmm1, %xmmN */
1308 o(0xc1 + ((sse_reg
+1) << 3));
1311 assert(reg_count
== 1);
1313 /* Load directly to register */
1314 gv(RC_XMM0
<< sse_reg
);
1316 } else if (mode
== x86_64_mode_integer
) {
1318 /* XXX: implicit cast ? */
1319 gen_reg
-= reg_count
;
1321 int d
= arg_prepare_reg(gen_reg
);
1322 orex(1,d
,r
,0x89); /* mov */
1323 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1324 if (reg_count
== 2) {
1325 d
= arg_prepare_reg(gen_reg
+1);
1326 orex(1,d
,vtop
->r2
,0x89); /* mov */
1327 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1332 assert(gen_reg
== 0);
1333 assert(sse_reg
== 0);
1335 /* We shouldn't have many operands on the stack anymore, but the
1336 call address itself is still there, and it might be in %eax
1337 (or edx/ecx) currently, which the below writes would clobber.
1338 So evict all remaining operands here. */
1341 /* Copy R10 and R11 into RDX and RCX, respectively */
1342 if (nb_reg_args
> 2) {
1343 o(0xd2894c); /* mov %r10, %rdx */
1344 if (nb_reg_args
> 3) {
1345 o(0xd9894c); /* mov %r11, %rcx */
1349 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1357 #define FUNC_PROLOG_SIZE 11
1359 static void push_arg_reg(int i
) {
1361 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1364 /* generate function prolog of type 't' */
1365 void gfunc_prolog(CType
*func_type
)
1368 int i
, addr
, align
, size
, reg_count
;
1369 int param_addr
, reg_param_index
, sse_param_index
;
1373 sym
= func_type
->ref
;
1374 addr
= PTR_SIZE
* 2;
1376 ind
+= FUNC_PROLOG_SIZE
;
1377 func_sub_sp_offset
= ind
;
1380 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1381 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1382 seen_reg_num
= seen_sse_num
= 0;
1383 /* frame pointer and return address */
1384 seen_stack_size
= PTR_SIZE
* 2;
1385 /* count the number of seen parameters */
1386 sym
= func_type
->ref
;
1387 while ((sym
= sym
->next
) != NULL
) {
1389 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1393 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1396 case x86_64_mode_integer
:
1397 if (seen_reg_num
+ reg_count
<= 8) {
1398 seen_reg_num
+= reg_count
;
1405 case x86_64_mode_sse
:
1406 if (seen_sse_num
+ reg_count
<= 8) {
1407 seen_sse_num
+= reg_count
;
1417 /* movl $0x????????, -0x10(%rbp) */
1419 gen_le32(seen_reg_num
* 8);
1420 /* movl $0x????????, -0xc(%rbp) */
1422 gen_le32(seen_sse_num
* 16 + 48);
1423 /* movl $0x????????, -0x8(%rbp) */
1425 gen_le32(seen_stack_size
);
1427 /* save all register passing arguments */
1428 for (i
= 0; i
< 8; i
++) {
1430 o(0xd60f66); /* movq */
1431 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1432 /* movq $0, loc+8(%rbp) */
1437 for (i
= 0; i
< REGN
; i
++) {
1438 push_arg_reg(REGN
-1-i
);
1442 sym
= func_type
->ref
;
1443 reg_param_index
= 0;
1444 sse_param_index
= 0;
1446 /* if the function returns a structure, then add an
1447 implicit pointer parameter */
1448 func_vt
= sym
->type
;
1449 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1450 if (mode
== x86_64_mode_memory
) {
1451 push_arg_reg(reg_param_index
);
1455 /* define parameters */
1456 while ((sym
= sym
->next
) != NULL
) {
1458 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1460 case x86_64_mode_sse
:
1461 if (sse_param_index
+ reg_count
<= 8) {
1462 /* save arguments passed by register */
1463 loc
-= reg_count
* 8;
1465 for (i
= 0; i
< reg_count
; ++i
) {
1466 o(0xd60f66); /* movq */
1467 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1471 addr
= (addr
+ align
- 1) & -align
;
1474 sse_param_index
+= reg_count
;
1478 case x86_64_mode_memory
:
1479 case x86_64_mode_x87
:
1480 addr
= (addr
+ align
- 1) & -align
;
1485 case x86_64_mode_integer
: {
1486 if (reg_param_index
+ reg_count
<= REGN
) {
1487 /* save arguments passed by register */
1488 loc
-= reg_count
* 8;
1490 for (i
= 0; i
< reg_count
; ++i
) {
1491 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1495 addr
= (addr
+ align
- 1) & -align
;
1498 reg_param_index
+= reg_count
;
1503 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1504 VT_LOCAL
| VT_LVAL
, param_addr
);
1508 /* generate function epilog */
1509 void gfunc_epilog(void)
1513 o(0xc9); /* leave */
1514 if (func_ret_sub
== 0) {
1517 o(0xc2); /* ret n */
1519 g(func_ret_sub
>> 8);
1521 /* align local size to word & save local variables */
1522 v
= (-loc
+ 15) & -16;
1524 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1525 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1526 o(0xec8148); /* sub rsp, stacksize */
1533 /* generate a jump to a label */
1536 return psym(0xe9, t
);
1539 /* generate a jump to a fixed address */
1540 void gjmp_addr(int a
)
1548 oad(0xe9, a
- ind
- 5);
1552 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1553 int gtst(int inv
, int t
)
1557 v
= vtop
->r
& VT_VALMASK
;
1559 /* fast case : can jump directly since flags are set */
1560 if (vtop
->c
.i
& 0x100)
1562 /* This was a float compare. If the parity flag is set
1563 the result was unordered. For anything except != this
1564 means false and we don't jump (anding both conditions).
1565 For != this means true (oring both).
1566 Take care about inverting the test. We need to jump
1567 to our target if the result was unordered and test wasn't NE,
1568 otherwise if unordered we don't want to jump. */
1569 vtop
->c
.i
&= ~0x100;
1570 if (!inv
== (vtop
->c
.i
!= TOK_NE
))
1571 o(0x067a); /* jp +6 */
1575 t
= psym(0x8a, t
); /* jp t */
1579 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1580 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1581 /* && or || optimization */
1582 if ((v
& 1) == inv
) {
1583 /* insert vtop->c jump list in t */
1586 p
= (int *)(cur_text_section
->data
+ *p
);
1594 if (is_float(vtop
->type
.t
) ||
1595 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1599 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1600 /* constant jmp optimization */
1601 if ((vtop
->c
.i
!= 0) != inv
)
1606 o(0xc0 + REG_VALUE(v
) * 9);
1608 t
= psym(0x85 ^ inv
, t
);
1615 /* generate an integer binary operation */
1616 void gen_opi(int op
)
1621 ll
= is64_type(vtop
[-1].type
.t
);
1622 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1623 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1627 case TOK_ADDC1
: /* add with carry generation */
1630 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1637 /* XXX: generate inc and dec for smaller code ? */
1638 orex(ll
, r
, 0, 0x83);
1639 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1642 orex(ll
, r
, 0, 0x81);
1643 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1646 gv2(RC_INT
, RC_INT
);
1649 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1650 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1653 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1659 case TOK_SUBC1
: /* sub with carry generation */
1662 case TOK_ADDC2
: /* add with carry use */
1665 case TOK_SUBC2
: /* sub with carry use */
1678 gv2(RC_INT
, RC_INT
);
1681 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1682 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1694 opc
= 0xc0 | (opc
<< 3);
1700 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1701 o(opc
| REG_VALUE(r
));
1702 g(vtop
->c
.i
& (ll
? 63 : 31));
1704 /* we generate the shift in ecx */
1705 gv2(RC_INT
, RC_RCX
);
1707 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1708 o(opc
| REG_VALUE(r
));
1721 /* first operand must be in eax */
1722 /* XXX: need better constraint for second operand */
1723 gv2(RC_RAX
, RC_RCX
);
1728 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1729 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1730 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1731 if (op
== '%' || op
== TOK_UMOD
)
1743 void gen_opl(int op
)
1748 /* generate a floating point operation 'v = t1 op t2' instruction. The
1749 two operands are guaranted to have the same floating point type */
1750 /* XXX: need to use ST1 too */
1751 void gen_opf(int op
)
1753 int a
, ft
, fc
, swapped
, r
;
1755 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1757 /* convert constants to memory references */
1758 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1763 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1766 /* must put at least one value in the floating point register */
1767 if ((vtop
[-1].r
& VT_LVAL
) &&
1768 (vtop
[0].r
& VT_LVAL
)) {
1774 /* swap the stack if needed so that t1 is the register and t2 is
1775 the memory reference */
1776 if (vtop
[-1].r
& VT_LVAL
) {
1780 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1781 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1782 /* load on stack second operand */
1783 load(TREG_ST0
, vtop
);
1784 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1785 if (op
== TOK_GE
|| op
== TOK_GT
)
1787 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1790 o(0xc9d9); /* fxch %st(1) */
1791 o(0xe9da); /* fucompp */
1792 o(0xe0df); /* fnstsw %ax */
1794 o(0x45e480); /* and $0x45, %ah */
1795 o(0x40fC80); /* cmp $0x40, %ah */
1796 } else if (op
== TOK_NE
) {
1797 o(0x45e480); /* and $0x45, %ah */
1798 o(0x40f480); /* xor $0x40, %ah */
1800 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1801 o(0x05c4f6); /* test $0x05, %ah */
1804 o(0x45c4f6); /* test $0x45, %ah */
1811 /* no memory reference possible for long double operations */
1812 load(TREG_ST0
, vtop
);
1836 o(0xde); /* fxxxp %st, %st(1) */
1841 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1842 /* if saved lvalue, then we must reload it */
1845 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1847 r
= get_reg(RC_INT
);
1849 v1
.r
= VT_LOCAL
| VT_LVAL
;
1855 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1858 if (op
== TOK_LE
|| op
== TOK_LT
)
1860 if (op
== TOK_LE
|| op
== TOK_GE
) {
1861 op
= 0x93; /* setae */
1863 op
= 0x97; /* seta */
1871 assert(!(vtop
[-1].r
& VT_LVAL
));
1873 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
1875 o(0x2e0f); /* ucomisd */
1877 if (vtop
->r
& VT_LVAL
) {
1878 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1880 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1885 vtop
->c
.i
= op
| 0x100;
1887 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
1905 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
1908 /* if saved lvalue, then we must reload it */
1909 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
1911 r
= get_reg(RC_INT
);
1913 v1
.r
= VT_LOCAL
| VT_LVAL
;
1919 assert(!(vtop
[-1].r
& VT_LVAL
));
1921 assert(vtop
->r
& VT_LVAL
);
1926 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1934 if (vtop
->r
& VT_LVAL
) {
1935 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1937 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1945 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1946 and 'long long' cases. */
1947 void gen_cvt_itof(int t
)
1949 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1952 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1953 /* signed long long to float/double/long double (unsigned case
1954 is handled generically) */
1955 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1956 o(0x242cdf); /* fildll (%rsp) */
1957 o(0x08c48348); /* add $8, %rsp */
1958 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1959 (VT_INT
| VT_UNSIGNED
)) {
1960 /* unsigned int to float/double/long double */
1961 o(0x6a); /* push $0 */
1963 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1964 o(0x242cdf); /* fildll (%rsp) */
1965 o(0x10c48348); /* add $16, %rsp */
1967 /* int to float/double/long double */
1968 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1969 o(0x2404db); /* fildl (%rsp) */
1970 o(0x08c48348); /* add $8, %rsp */
1974 int r
= get_reg(RC_FLOAT
);
1976 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
1977 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1978 (VT_INT
| VT_UNSIGNED
) ||
1979 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1983 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
1988 /* convert from one floating point type to another */
1989 void gen_cvt_ftof(int t
)
1997 if (bt
== VT_FLOAT
) {
1999 if (tbt
== VT_DOUBLE
) {
2000 o(0x140f); /* unpcklps */
2001 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2002 o(0x5a0f); /* cvtps2pd */
2003 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2004 } else if (tbt
== VT_LDOUBLE
) {
2006 /* movss %xmm0,-0x10(%rsp) */
2008 o(0x44 + REG_VALUE(vtop
->r
)*8);
2010 o(0xf02444d9); /* flds -0x10(%rsp) */
2013 } else if (bt
== VT_DOUBLE
) {
2015 if (tbt
== VT_FLOAT
) {
2016 o(0x140f66); /* unpcklpd */
2017 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2018 o(0x5a0f66); /* cvtpd2ps */
2019 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2020 } else if (tbt
== VT_LDOUBLE
) {
2022 /* movsd %xmm0,-0x10(%rsp) */
2024 o(0x44 + REG_VALUE(vtop
->r
)*8);
2026 o(0xf02444dd); /* fldl -0x10(%rsp) */
2032 r
= get_reg(RC_FLOAT
);
2033 if (tbt
== VT_DOUBLE
) {
2034 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2035 /* movsd -0x10(%rsp),%xmm0 */
2037 o(0x44 + REG_VALUE(r
)*8);
2040 } else if (tbt
== VT_FLOAT
) {
2041 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2042 /* movss -0x10(%rsp),%xmm0 */
2044 o(0x44 + REG_VALUE(r
)*8);
2051 /* convert fp to int 't' type */
2052 void gen_cvt_ftoi(int t
)
2054 int ft
, bt
, size
, r
;
2057 if (bt
== VT_LDOUBLE
) {
2058 gen_cvt_ftof(VT_DOUBLE
);
2068 r
= get_reg(RC_INT
);
2069 if (bt
== VT_FLOAT
) {
2071 } else if (bt
== VT_DOUBLE
) {
2076 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2077 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2081 /* computed goto support */
2088 /* Save the stack pointer onto the stack and return the location of its address */
2089 ST_FUNC
void gen_vla_sp_save(int addr
) {
2090 /* mov %rsp,addr(%rbp)*/
2091 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2094 /* Restore the SP from a location on the stack */
2095 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2096 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2099 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2100 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2101 #ifdef TCC_TARGET_PE
2102 /* alloca does more than just adjust %rsp on Windows */
2103 vpush_global_sym(&func_old_type
, TOK_alloca
);
2104 vswap(); /* Move alloca ref past allocation size */
2106 vset(type
, REG_IRET
, 0);
2109 r
= gv(RC_INT
); /* allocation size */
2112 o(0xe0 | REG_VALUE(r
));
2113 /* We align to 16 bytes rather than align */
2118 o(0xe0 | REG_VALUE(r
));
2125 /* end of x86-64 code generator */
2126 /*************************************************************/
2127 #endif /* ! TARGET_DEFS_ONLY */
2128 /******************************************************/