2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0080 /* only for long double */
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
104 /******************************************************/
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x08048000
116 #define ELF_PAGE_SIZE 0x1000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
124 ST_DATA
const int reg_classes
[NB_REGS
] = {
125 /* eax */ RC_INT
| RC_RAX
,
126 /* ecx */ RC_INT
| RC_RCX
,
127 /* edx */ RC_INT
| RC_RDX
,
141 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
142 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
143 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
144 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
145 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
146 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
155 static unsigned long func_sub_sp_offset
;
156 static int func_ret_sub
;
158 /* XXX: make it faster ? */
163 if (ind1
> cur_text_section
->data_allocated
)
164 section_realloc(cur_text_section
, ind1
);
165 cur_text_section
->data
[ind
] = c
;
169 void o(unsigned int c
)
191 void gen_le64(int64_t c
)
203 void orex(int ll
, int r
, int r2
, int b
)
205 if ((r
& VT_VALMASK
) >= VT_CONST
)
207 if ((r2
& VT_VALMASK
) >= VT_CONST
)
209 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
210 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
214 /* output a symbol and patch all calls to it */
215 void gsym_addr(int t
, int a
)
219 ptr
= (int *)(cur_text_section
->data
+ t
);
220 n
= *ptr
; /* next value */
231 /* psym is used to put an instruction with a data field which is a
232 reference to a symbol. It is in fact the same as oad ! */
235 static int is64_type(int t
)
237 return ((t
& VT_BTYPE
) == VT_PTR
||
238 (t
& VT_BTYPE
) == VT_FUNC
||
239 (t
& VT_BTYPE
) == VT_LLONG
);
242 static int is_sse_float(int t
) {
245 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
249 /* instruction + 4 bytes data. Return the address of the data */
250 ST_FUNC
int oad(int c
, int s
)
256 if (ind1
> cur_text_section
->data_allocated
)
257 section_realloc(cur_text_section
, ind1
);
258 *(int *)(cur_text_section
->data
+ ind
) = s
;
264 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
267 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
271 /* output constant with relocation if 'r & VT_SYM' is true */
272 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
275 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
279 /* output constant with relocation if 'r & VT_SYM' is true */
280 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
283 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
287 /* output got address with relocation */
288 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
290 #ifndef TCC_TARGET_PE
293 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
294 sr
= cur_text_section
->reloc
;
295 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
298 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
299 cur_text_section
->data
[ind
-3],
300 cur_text_section
->data
[ind
-2],
301 cur_text_section
->data
[ind
-1]
303 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
307 /* we use add c, %xxx for displacement */
309 o(0xc0 + REG_VALUE(r
));
314 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
316 op_reg
= REG_VALUE(op_reg
) << 3;
317 if ((r
& VT_VALMASK
) == VT_CONST
) {
318 /* constant memory reference */
321 gen_gotpcrel(r
, sym
, c
);
323 gen_addrpc32(r
, sym
, c
);
325 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
326 /* currently, we use only ebp as base */
328 /* short reference */
332 oad(0x85 | op_reg
, c
);
334 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
336 g(0x80 | op_reg
| REG_VALUE(r
));
339 g(0x00 | op_reg
| REG_VALUE(r
));
342 g(0x00 | op_reg
| REG_VALUE(r
));
346 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
348 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
350 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
353 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
355 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
358 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
359 orex(1, r
, op_reg
, opcode
);
360 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
364 /* load 'r' from value 'sv' */
365 void load(int r
, SValue
*sv
)
367 int v
, t
, ft
, fc
, fr
;
372 sv
= pe_getimport(sv
, &v2
);
379 #ifndef TCC_TARGET_PE
380 /* we use indirect access via got */
381 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
382 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
383 /* use the result register as a temporal register */
384 int tr
= r
| TREG_MEM
;
386 /* we cannot use float registers as a temporal register */
387 tr
= get_reg(RC_INT
) | TREG_MEM
;
389 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
391 /* load from the temporal register */
399 if (v
== VT_LLOCAL
) {
401 v1
.r
= VT_LOCAL
| VT_LVAL
;
404 if (!(reg_classes
[fr
] & RC_INT
))
405 fr
= get_reg(RC_INT
);
409 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
411 r
= REG_VALUE(r
); /* movd */
412 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
413 b
= 0x7e0ff3; /* movq */
415 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
416 b
= 0xdb, r
= 5; /* fldt */
417 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
418 b
= 0xbe0f; /* movsbl */
419 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
420 b
= 0xb60f; /* movzbl */
421 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
422 b
= 0xbf0f; /* movswl */
423 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
424 b
= 0xb70f; /* movzwl */
426 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
427 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
428 || ((ft
& VT_BTYPE
) == VT_FUNC
));
433 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
436 gen_modrm(r
, fr
, sv
->sym
, fc
);
443 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
444 gen_addrpc32(fr
, sv
->sym
, fc
);
446 if (sv
->sym
->type
.t
& VT_STATIC
) {
448 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
449 gen_addrpc32(fr
, sv
->sym
, fc
);
452 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
453 gen_gotpcrel(r
, sv
->sym
, fc
);
456 } else if (is64_type(ft
)) {
457 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
460 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
463 } else if (v
== VT_LOCAL
) {
464 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
465 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
466 } else if (v
== VT_CMP
) {
468 if ((fc
& ~0x100) != TOK_NE
)
469 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
471 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
474 /* This was a float compare. If the parity bit is
475 set the result was unordered, meaning false for everything
476 except TOK_NE, and true for TOK_NE. */
478 o(0x037a + (REX_BASE(r
) << 8));
480 orex(0,r
,0, 0x0f); /* setxx %br */
482 o(0xc0 + REG_VALUE(r
));
483 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
486 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
487 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
490 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
492 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
494 /* gen_cvt_ftof(VT_DOUBLE); */
495 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
496 /* movsd -0x10(%rsp),%xmmN */
498 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
501 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
502 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
505 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
508 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
510 } else if (r
== TREG_ST0
) {
511 assert((v
>= TREG_XMM0
) || (v
<= TREG_XMM7
));
512 /* gen_cvt_ftof(VT_LDOUBLE); */
513 /* movsd %xmmN,-0x10(%rsp) */
515 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
517 o(0xf02444dd); /* fldl -0x10(%rsp) */
520 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
526 /* store register 'r' in lvalue 'v' */
527 void store(int r
, SValue
*v
)
531 /* store the REX prefix in this variable when PIC is enabled */
536 v
= pe_getimport(v
, &v2
);
541 fr
= v
->r
& VT_VALMASK
;
544 #ifndef TCC_TARGET_PE
545 /* we need to access the variable via got */
546 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
547 /* mov xx(%rip), %r11 */
549 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
550 pic
= is64_type(bt
) ? 0x49 : 0x41;
554 /* XXX: incorrect if float reg to reg */
555 if (bt
== VT_FLOAT
) {
558 o(0x7e0f); /* movd */
560 } else if (bt
== VT_DOUBLE
) {
563 o(0xd60f); /* movq */
565 } else if (bt
== VT_LDOUBLE
) {
566 o(0xc0d9); /* fld %st(0) */
574 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
576 else if (is64_type(bt
))
582 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
587 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
588 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
589 } else if (fr
!= r
) {
590 /* XXX: don't we really come here? */
592 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
595 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
596 gen_modrm(r
, v
->r
, v
->sym
, fc
);
597 } else if (fr
!= r
) {
598 /* XXX: don't we really come here? */
600 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
605 /* 'is_jmp' is '1' if it is a jump */
606 static void gcall_or_jmp(int is_jmp
)
609 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
611 if (vtop
->r
& VT_SYM
) {
612 /* relocation case */
613 greloc(cur_text_section
, vtop
->sym
,
614 ind
+ 1, R_X86_64_PC32
);
616 /* put an empty PC32 relocation */
617 put_elf_reloc(symtab_section
, cur_text_section
,
618 ind
+ 1, R_X86_64_PC32
, 0);
620 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
622 /* otherwise, indirect call */
626 o(0xff); /* call/jmp *r */
627 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
634 static const uint8_t arg_regs
[REGN
] = {
635 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
638 /* Prepare arguments in R10 and R11 rather than RCX and RDX
639 because gv() will not ever use these */
640 static int arg_prepare_reg(int idx
) {
641 if (idx
== 0 || idx
== 1)
642 /* idx=0: r10, idx=1: r11 */
645 return arg_regs
[idx
];
648 static int func_scratch
;
650 /* Generate function call. The function address is pushed first, then
651 all the parameters in call order. This functions pops all the
652 parameters and the function address. */
654 void gen_offs_sp(int b
, int r
, int d
)
656 orex(1,0,r
& 0x100 ? 0 : r
, b
);
658 o(0x2444 | (REG_VALUE(r
) << 3));
661 o(0x2484 | (REG_VALUE(r
) << 3));
666 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
667 ST_FUNC
int gfunc_sret(CType
*vt
, CType
*ret
, int *ret_align
) {
668 *ret_align
= 1; // Never have to re-align return values for x86-64
670 size
= type_size(vt
, &align
);
674 } else if (size
> 4) {
677 } else if (size
> 2) {
680 } else if (size
> 1) {
689 int gfunc_arg_size(CType
*type
) {
690 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
693 return type_size(type
, &align
);
696 void gfunc_call(int nb_args
)
698 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
701 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
704 /* for struct arguments, we need to call memcpy and the function
705 call breaks register passing arguments we are preparing.
706 So, we process arguments which will be passed by stack first. */
707 struct_size
= args_size
;
708 for(i
= 0; i
< nb_args
; i
++) {
711 SValue
*sv
= &vtop
[-i
];
712 bt
= (sv
->type
.t
& VT_BTYPE
);
713 size
= gfunc_arg_size(&sv
->type
);
716 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
718 if (bt
== VT_STRUCT
) {
719 /* align to stack align size */
720 size
= (size
+ 15) & ~15;
721 /* generate structure store */
723 gen_offs_sp(0x8d, r
, struct_size
);
726 /* generate memcpy call */
727 vset(&sv
->type
, r
| VT_LVAL
, 0);
731 } else if (bt
== VT_LDOUBLE
) {
733 gen_offs_sp(0xdb, 0x107, struct_size
);
738 if (func_scratch
< struct_size
)
739 func_scratch
= struct_size
;
742 struct_size
= args_size
;
744 for(i
= 0; i
< nb_args
; i
++) {
746 bt
= (vtop
->type
.t
& VT_BTYPE
);
748 size
= gfunc_arg_size(&vtop
->type
);
750 /* align to stack align size */
751 size
= (size
+ 15) & ~15;
754 gen_offs_sp(0x8d, d
, struct_size
);
755 gen_offs_sp(0x89, d
, arg
*8);
757 d
= arg_prepare_reg(arg
);
758 gen_offs_sp(0x8d, d
, struct_size
);
762 if (is_sse_float(vtop
->type
.t
)) {
763 gv(RC_XMM0
); /* only use one float register */
765 /* movq %xmm0, j*8(%rsp) */
766 gen_offs_sp(0xd60f66, 0x100, arg
*8);
768 /* movaps %xmm0, %xmmN */
770 o(0xc0 + (arg
<< 3));
771 d
= arg_prepare_reg(arg
);
772 /* mov %xmm0, %rxx */
775 o(0xc0 + REG_VALUE(d
));
778 if (bt
== VT_STRUCT
) {
779 vtop
->type
.ref
= NULL
;
780 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
781 : size
> 1 ? VT_SHORT
: VT_BYTE
;
786 gen_offs_sp(0x89, r
, arg
*8);
788 d
= arg_prepare_reg(arg
);
789 orex(1,d
,r
,0x89); /* mov */
790 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
798 /* Copy R10 and R11 into RCX and RDX, respectively */
800 o(0xd1894c); /* mov %r10, %rcx */
802 o(0xda894c); /* mov %r11, %rdx */
811 #define FUNC_PROLOG_SIZE 11
813 /* generate function prolog of type 't' */
814 void gfunc_prolog(CType
*func_type
)
816 int addr
, reg_param_index
, bt
, size
;
825 ind
+= FUNC_PROLOG_SIZE
;
826 func_sub_sp_offset
= ind
;
829 sym
= func_type
->ref
;
831 /* if the function returns a structure, then add an
832 implicit pointer parameter */
834 size
= gfunc_arg_size(&func_vt
);
836 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
842 /* define parameters */
843 while ((sym
= sym
->next
) != NULL
) {
845 bt
= type
->t
& VT_BTYPE
;
846 size
= gfunc_arg_size(type
);
848 if (reg_param_index
< REGN
) {
849 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
851 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
853 if (reg_param_index
< REGN
) {
854 /* save arguments passed by register */
855 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
856 o(0xd60f66); /* movq */
857 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
859 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
862 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
868 while (reg_param_index
< REGN
) {
869 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
870 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
877 /* generate function epilog */
878 void gfunc_epilog(void)
883 if (func_ret_sub
== 0) {
888 g(func_ret_sub
>> 8);
892 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
893 /* align local size to word & save local variables */
894 v
= (func_scratch
+ -loc
+ 15) & -16;
897 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
898 oad(0xb8, v
); /* mov stacksize, %eax */
899 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
900 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
901 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
903 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
904 o(0xec8148); /* sub rsp, stacksize */
908 cur_text_section
->data_offset
= saved_ind
;
909 pe_add_unwind_data(ind
, saved_ind
, v
);
910 ind
= cur_text_section
->data_offset
;
915 static void gadd_sp(int val
)
917 if (val
== (char)val
) {
921 oad(0xc48148, val
); /* add $xxx, %rsp */
925 typedef enum X86_64_Mode
{
933 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
) {
936 else if (a
== x86_64_mode_none
)
938 else if (b
== x86_64_mode_none
)
940 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
941 return x86_64_mode_memory
;
942 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
943 return x86_64_mode_integer
;
944 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
945 return x86_64_mode_memory
;
947 return x86_64_mode_sse
;
950 static X86_64_Mode
classify_x86_64_inner(CType
*ty
) {
954 switch (ty
->t
& VT_BTYPE
) {
955 case VT_VOID
: return x86_64_mode_none
;
964 case VT_ENUM
: return x86_64_mode_integer
;
967 case VT_DOUBLE
: return x86_64_mode_sse
;
969 case VT_LDOUBLE
: return x86_64_mode_x87
;
975 if (f
->next
&& (f
->c
== f
->next
->c
))
976 return x86_64_mode_memory
;
978 mode
= x86_64_mode_none
;
979 for (; f
; f
= f
->next
)
980 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
988 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
) {
990 int size
, align
, ret_t
;
992 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
996 mode
= x86_64_mode_integer
;
998 size
= type_size(ty
, &align
);
999 *psize
= (size
+ 7) & ~7;
1000 *palign
= (align
+ 7) & ~7;
1003 mode
= x86_64_mode_memory
;
1005 mode
= classify_x86_64_inner(ty
);
1007 case x86_64_mode_integer
:
1013 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1017 case x86_64_mode_x87
:
1022 case x86_64_mode_sse
:
1028 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1043 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
) {
1044 /* This definition must be synced with stdarg.h */
1045 enum __va_arg_type
{
1046 __va_gen_reg
, __va_float_reg
, __va_stack
1048 int size
, align
, reg_count
;
1049 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1051 default: return __va_stack
;
1052 case x86_64_mode_integer
: return __va_gen_reg
;
1053 case x86_64_mode_sse
: return __va_float_reg
;
1057 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1058 int gfunc_sret(CType
*vt
, CType
*ret
, int *ret_align
) {
1059 int size
, align
, reg_count
;
1060 *ret_align
= 1; // Never have to re-align return values for x86-64
1061 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) == x86_64_mode_memory
);
1065 static const uint8_t arg_regs
[REGN
] = {
1066 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1069 static int arg_prepare_reg(int idx
) {
1070 if (idx
== 2 || idx
== 3)
1071 /* idx=2: r10, idx=3: r11 */
1074 return arg_regs
[idx
];
1077 /* Generate function call. The function address is pushed first, then
1078 all the parameters in call order. This functions pops all the
1079 parameters and the function address. */
1080 void gfunc_call(int nb_args
)
1084 int size
, align
, r
, args_size
, stack_adjust
, run_start
, run_end
, i
, j
, reg_count
;
1085 int nb_reg_args
= 0;
1086 int nb_sse_args
= 0;
1087 int sse_reg
, gen_reg
;
1089 /* calculate the number of integer/float register arguments */
1090 for(i
= 0; i
< nb_args
; i
++) {
1091 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1092 if (mode
== x86_64_mode_sse
)
1093 nb_sse_args
+= reg_count
;
1094 else if (mode
== x86_64_mode_integer
)
1095 nb_reg_args
+= reg_count
;
1098 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1099 and ended by a 16-byte aligned argument. This is because, from the point of view of
1100 the callee, argument alignment is computed from the bottom up. */
1101 /* for struct arguments, we need to call memcpy and the function
1102 call breaks register passing arguments we are preparing.
1103 So, we process arguments which will be passed by stack first. */
1104 gen_reg
= nb_reg_args
;
1105 sse_reg
= nb_sse_args
;
1108 while (run_start
!= nb_args
) {
1109 int run_gen_reg
= gen_reg
, run_sse_reg
= sse_reg
;
1113 for(i
= run_start
; (i
< nb_args
) && (run_end
== nb_args
); i
++) {
1114 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1116 case x86_64_mode_memory
:
1117 case x86_64_mode_x87
:
1122 stack_adjust
+= size
;
1125 case x86_64_mode_sse
:
1126 sse_reg
-= reg_count
;
1127 if (sse_reg
+ reg_count
> 8) goto stack_arg
;
1130 case x86_64_mode_integer
:
1131 gen_reg
-= reg_count
;
1132 if (gen_reg
+ reg_count
> REGN
) goto stack_arg
;
1137 gen_reg
= run_gen_reg
;
1138 sse_reg
= run_sse_reg
;
1140 /* adjust stack to align SSE boundary */
1141 if (stack_adjust
&= 15) {
1142 /* fetch cpu flag before the following sub will change the value */
1143 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1146 stack_adjust
= 16 - stack_adjust
;
1148 oad(0xec81, stack_adjust
); /* sub $xxx, %rsp */
1149 args_size
+= stack_adjust
;
1152 for(i
= run_start
; i
< run_end
;) {
1153 /* Swap argument to top, it will possibly be changed here,
1154 and might use more temps. At the end of the loop we keep
1155 in on the stack and swap it back to its original position
1156 if it is a register. */
1157 SValue tmp
= vtop
[0];
1161 mode
= classify_x86_64_arg(&vtop
->type
, NULL
, &size
, &align
, ®_count
);
1164 switch (vtop
->type
.t
& VT_BTYPE
) {
1166 if (mode
== x86_64_mode_sse
) {
1168 sse_reg
-= reg_count
;
1171 } else if (mode
== x86_64_mode_integer
) {
1173 gen_reg
-= reg_count
;
1179 /* allocate the necessary size on stack */
1181 oad(0xec81, size
); /* sub $xxx, %rsp */
1182 /* generate structure store */
1183 r
= get_reg(RC_INT
);
1184 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1185 o(0xe0 + REG_VALUE(r
));
1186 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1199 assert(mode
== x86_64_mode_sse
);
1203 o(0x50); /* push $rax */
1204 /* movq %xmmN, (%rsp) */
1206 o(0x04 + REG_VALUE(r
)*8);
1215 assert(mode
== x86_64_mode_integer
);
1217 /* XXX: implicit cast ? */
1218 if (gen_reg
> REGN
) {
1221 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1229 /* And swap the argument back to it's original position. */
1236 assert((vtop
->type
.t
== tmp
.type
.t
) && (vtop
->r
== tmp
.r
));
1245 /* handle 16 byte aligned arguments at end of run */
1246 run_start
= i
= run_end
;
1247 while (i
< nb_args
) {
1248 /* Rotate argument to top since it will always be popped */
1249 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1255 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1257 oad(0xec8148, size
); /* sub $xxx, %rsp */
1258 o(0x7cdb); /* fstpt 0(%rsp) */
1263 assert(mode
== x86_64_mode_memory
);
1265 /* allocate the necessary size on stack */
1267 oad(0xec81, size
); /* sub $xxx, %rsp */
1268 /* generate structure store */
1269 r
= get_reg(RC_INT
);
1270 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1271 o(0xe0 + REG_VALUE(r
));
1272 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1283 /* XXX This should be superfluous. */
1284 save_regs(0); /* save used temporary registers */
1286 /* then, we prepare register passing arguments.
1287 Note that we cannot set RDX and RCX in this loop because gv()
1288 may break these temporary registers. Let's use R10 and R11
1290 assert(gen_reg
<= REGN
);
1291 assert(sse_reg
<= 8);
1292 for(i
= 0; i
< nb_args
; i
++) {
1293 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1294 /* Alter stack entry type so that gv() knows how to treat it */
1296 if (mode
== x86_64_mode_sse
) {
1297 if (reg_count
== 2) {
1299 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1300 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1301 /* movaps %xmm0, %xmmN */
1303 o(0xc0 + (sse_reg
<< 3));
1304 /* movaps %xmm1, %xmmN */
1306 o(0xc1 + ((sse_reg
+1) << 3));
1309 assert(reg_count
== 1);
1311 /* Load directly to register */
1312 gv(RC_XMM0
<< sse_reg
);
1314 } else if (mode
== x86_64_mode_integer
) {
1316 /* XXX: implicit cast ? */
1317 gen_reg
-= reg_count
;
1319 int d
= arg_prepare_reg(gen_reg
);
1320 orex(1,d
,r
,0x89); /* mov */
1321 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1322 if (reg_count
== 2) {
1323 d
= arg_prepare_reg(gen_reg
+1);
1324 orex(1,d
,vtop
->r2
,0x89); /* mov */
1325 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1330 assert(gen_reg
== 0);
1331 assert(sse_reg
== 0);
1333 /* We shouldn't have many operands on the stack anymore, but the
1334 call address itself is still there, and it might be in %eax
1335 (or edx/ecx) currently, which the below writes would clobber.
1336 So evict all remaining operands here. */
1339 /* Copy R10 and R11 into RDX and RCX, respectively */
1340 if (nb_reg_args
> 2) {
1341 o(0xd2894c); /* mov %r10, %rdx */
1342 if (nb_reg_args
> 3) {
1343 o(0xd9894c); /* mov %r11, %rcx */
1347 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1355 #define FUNC_PROLOG_SIZE 11
1357 static void push_arg_reg(int i
) {
1359 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1362 /* generate function prolog of type 't' */
1363 void gfunc_prolog(CType
*func_type
)
1366 int i
, addr
, align
, size
, reg_count
;
1367 int param_addr
, reg_param_index
, sse_param_index
;
1371 sym
= func_type
->ref
;
1372 addr
= PTR_SIZE
* 2;
1374 ind
+= FUNC_PROLOG_SIZE
;
1375 func_sub_sp_offset
= ind
;
1378 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1379 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1380 seen_reg_num
= seen_sse_num
= 0;
1381 /* frame pointer and return address */
1382 seen_stack_size
= PTR_SIZE
* 2;
1383 /* count the number of seen parameters */
1384 sym
= func_type
->ref
;
1385 while ((sym
= sym
->next
) != NULL
) {
1387 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1391 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1394 case x86_64_mode_integer
:
1395 if (seen_reg_num
+ reg_count
<= 8) {
1396 seen_reg_num
+= reg_count
;
1403 case x86_64_mode_sse
:
1404 if (seen_sse_num
+ reg_count
<= 8) {
1405 seen_sse_num
+= reg_count
;
1415 /* movl $0x????????, -0x10(%rbp) */
1417 gen_le32(seen_reg_num
* 8);
1418 /* movl $0x????????, -0xc(%rbp) */
1420 gen_le32(seen_sse_num
* 16 + 48);
1421 /* movl $0x????????, -0x8(%rbp) */
1423 gen_le32(seen_stack_size
);
1425 /* save all register passing arguments */
1426 for (i
= 0; i
< 8; i
++) {
1428 o(0xd60f66); /* movq */
1429 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1430 /* movq $0, loc+8(%rbp) */
1435 for (i
= 0; i
< REGN
; i
++) {
1436 push_arg_reg(REGN
-1-i
);
1440 sym
= func_type
->ref
;
1441 reg_param_index
= 0;
1442 sse_param_index
= 0;
1444 /* if the function returns a structure, then add an
1445 implicit pointer parameter */
1446 func_vt
= sym
->type
;
1447 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1448 if (mode
== x86_64_mode_memory
) {
1449 push_arg_reg(reg_param_index
);
1453 /* define parameters */
1454 while ((sym
= sym
->next
) != NULL
) {
1456 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1458 case x86_64_mode_sse
:
1459 if (sse_param_index
+ reg_count
<= 8) {
1460 /* save arguments passed by register */
1461 loc
-= reg_count
* 8;
1463 for (i
= 0; i
< reg_count
; ++i
) {
1464 o(0xd60f66); /* movq */
1465 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1469 addr
= (addr
+ align
- 1) & -align
;
1472 sse_param_index
+= reg_count
;
1476 case x86_64_mode_memory
:
1477 case x86_64_mode_x87
:
1478 addr
= (addr
+ align
- 1) & -align
;
1483 case x86_64_mode_integer
: {
1484 if (reg_param_index
+ reg_count
<= REGN
) {
1485 /* save arguments passed by register */
1486 loc
-= reg_count
* 8;
1488 for (i
= 0; i
< reg_count
; ++i
) {
1489 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1493 addr
= (addr
+ align
- 1) & -align
;
1496 reg_param_index
+= reg_count
;
1501 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1502 VT_LOCAL
| VT_LVAL
, param_addr
);
1506 /* generate function epilog */
1507 void gfunc_epilog(void)
1511 o(0xc9); /* leave */
1512 if (func_ret_sub
== 0) {
1515 o(0xc2); /* ret n */
1517 g(func_ret_sub
>> 8);
1519 /* align local size to word & save local variables */
1520 v
= (-loc
+ 15) & -16;
1522 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1523 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1524 o(0xec8148); /* sub rsp, stacksize */
1531 /* generate a jump to a label */
1534 return psym(0xe9, t
);
1537 /* generate a jump to a fixed address */
1538 void gjmp_addr(int a
)
1546 oad(0xe9, a
- ind
- 5);
1550 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1551 int gtst(int inv
, int t
)
1555 v
= vtop
->r
& VT_VALMASK
;
1557 /* fast case : can jump directly since flags are set */
1558 if (vtop
->c
.i
& 0x100)
1560 /* This was a float compare. If the parity flag is set
1561 the result was unordered. For anything except != this
1562 means false and we don't jump (anding both conditions).
1563 For != this means true (oring both).
1564 Take care about inverting the test. We need to jump
1565 to our target if the result was unordered and test wasn't NE,
1566 otherwise if unordered we don't want to jump. */
1567 vtop
->c
.i
&= ~0x100;
1568 if (!inv
== (vtop
->c
.i
!= TOK_NE
))
1569 o(0x067a); /* jp +6 */
1573 t
= psym(0x8a, t
); /* jp t */
1577 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1578 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1579 /* && or || optimization */
1580 if ((v
& 1) == inv
) {
1581 /* insert vtop->c jump list in t */
1584 p
= (int *)(cur_text_section
->data
+ *p
);
1592 if (is_float(vtop
->type
.t
) ||
1593 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1597 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1598 /* constant jmp optimization */
1599 if ((vtop
->c
.i
!= 0) != inv
)
1604 o(0xc0 + REG_VALUE(v
) * 9);
1606 t
= psym(0x85 ^ inv
, t
);
1613 /* generate an integer binary operation */
1614 void gen_opi(int op
)
1619 ll
= is64_type(vtop
[-1].type
.t
);
1620 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1621 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1625 case TOK_ADDC1
: /* add with carry generation */
1628 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1635 /* XXX: generate inc and dec for smaller code ? */
1636 orex(ll
, r
, 0, 0x83);
1637 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1640 orex(ll
, r
, 0, 0x81);
1641 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1644 gv2(RC_INT
, RC_INT
);
1647 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1648 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1651 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1657 case TOK_SUBC1
: /* sub with carry generation */
1660 case TOK_ADDC2
: /* add with carry use */
1663 case TOK_SUBC2
: /* sub with carry use */
1676 gv2(RC_INT
, RC_INT
);
1679 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1680 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1692 opc
= 0xc0 | (opc
<< 3);
1698 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1699 o(opc
| REG_VALUE(r
));
1700 g(vtop
->c
.i
& (ll
? 63 : 31));
1702 /* we generate the shift in ecx */
1703 gv2(RC_INT
, RC_RCX
);
1705 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1706 o(opc
| REG_VALUE(r
));
1719 /* first operand must be in eax */
1720 /* XXX: need better constraint for second operand */
1721 gv2(RC_RAX
, RC_RCX
);
1726 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1727 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1728 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1729 if (op
== '%' || op
== TOK_UMOD
)
1741 void gen_opl(int op
)
1746 /* generate a floating point operation 'v = t1 op t2' instruction. The
1747 two operands are guaranted to have the same floating point type */
1748 /* XXX: need to use ST1 too */
1749 void gen_opf(int op
)
1751 int a
, ft
, fc
, swapped
, r
;
1753 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1755 /* convert constants to memory references */
1756 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1761 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1764 /* must put at least one value in the floating point register */
1765 if ((vtop
[-1].r
& VT_LVAL
) &&
1766 (vtop
[0].r
& VT_LVAL
)) {
1772 /* swap the stack if needed so that t1 is the register and t2 is
1773 the memory reference */
1774 if (vtop
[-1].r
& VT_LVAL
) {
1778 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1779 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1780 /* load on stack second operand */
1781 load(TREG_ST0
, vtop
);
1782 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1783 if (op
== TOK_GE
|| op
== TOK_GT
)
1785 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1788 o(0xc9d9); /* fxch %st(1) */
1789 o(0xe9da); /* fucompp */
1790 o(0xe0df); /* fnstsw %ax */
1792 o(0x45e480); /* and $0x45, %ah */
1793 o(0x40fC80); /* cmp $0x40, %ah */
1794 } else if (op
== TOK_NE
) {
1795 o(0x45e480); /* and $0x45, %ah */
1796 o(0x40f480); /* xor $0x40, %ah */
1798 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1799 o(0x05c4f6); /* test $0x05, %ah */
1802 o(0x45c4f6); /* test $0x45, %ah */
1809 /* no memory reference possible for long double operations */
1810 load(TREG_ST0
, vtop
);
1834 o(0xde); /* fxxxp %st, %st(1) */
1839 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1840 /* if saved lvalue, then we must reload it */
1843 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1845 r
= get_reg(RC_INT
);
1847 v1
.r
= VT_LOCAL
| VT_LVAL
;
1853 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1856 if (op
== TOK_LE
|| op
== TOK_LT
)
1858 if (op
== TOK_LE
|| op
== TOK_GE
) {
1859 op
= 0x93; /* setae */
1861 op
= 0x97; /* seta */
1869 assert(!(vtop
[-1].r
& VT_LVAL
));
1871 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
1873 o(0x2e0f); /* ucomisd */
1875 if (vtop
->r
& VT_LVAL
) {
1876 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1878 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1883 vtop
->c
.i
= op
| 0x100;
1885 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
1903 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
1906 /* if saved lvalue, then we must reload it */
1907 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
1909 r
= get_reg(RC_INT
);
1911 v1
.r
= VT_LOCAL
| VT_LVAL
;
1917 assert(!(vtop
[-1].r
& VT_LVAL
));
1919 assert(vtop
->r
& VT_LVAL
);
1924 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1932 if (vtop
->r
& VT_LVAL
) {
1933 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1935 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1943 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1944 and 'long long' cases. */
1945 void gen_cvt_itof(int t
)
1947 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1950 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1951 /* signed long long to float/double/long double (unsigned case
1952 is handled generically) */
1953 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1954 o(0x242cdf); /* fildll (%rsp) */
1955 o(0x08c48348); /* add $8, %rsp */
1956 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1957 (VT_INT
| VT_UNSIGNED
)) {
1958 /* unsigned int to float/double/long double */
1959 o(0x6a); /* push $0 */
1961 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1962 o(0x242cdf); /* fildll (%rsp) */
1963 o(0x10c48348); /* add $16, %rsp */
1965 /* int to float/double/long double */
1966 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1967 o(0x2404db); /* fildl (%rsp) */
1968 o(0x08c48348); /* add $8, %rsp */
1972 int r
= get_reg(RC_FLOAT
);
1974 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
1975 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1976 (VT_INT
| VT_UNSIGNED
) ||
1977 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1981 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
1986 /* convert from one floating point type to another */
1987 void gen_cvt_ftof(int t
)
1995 if (bt
== VT_FLOAT
) {
1997 if (tbt
== VT_DOUBLE
) {
1998 o(0x140f); /* unpcklps */
1999 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2000 o(0x5a0f); /* cvtps2pd */
2001 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2002 } else if (tbt
== VT_LDOUBLE
) {
2004 /* movss %xmm0,-0x10(%rsp) */
2006 o(0x44 + REG_VALUE(vtop
->r
)*8);
2008 o(0xf02444d9); /* flds -0x10(%rsp) */
2011 } else if (bt
== VT_DOUBLE
) {
2013 if (tbt
== VT_FLOAT
) {
2014 o(0x140f66); /* unpcklpd */
2015 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2016 o(0x5a0f66); /* cvtpd2ps */
2017 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2018 } else if (tbt
== VT_LDOUBLE
) {
2020 /* movsd %xmm0,-0x10(%rsp) */
2022 o(0x44 + REG_VALUE(vtop
->r
)*8);
2024 o(0xf02444dd); /* fldl -0x10(%rsp) */
2029 int r
= get_reg(RC_FLOAT
);
2030 if (tbt
== VT_DOUBLE
) {
2031 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2032 /* movsd -0x10(%rsp),%xmm0 */
2034 o(0x44 + REG_VALUE(r
)*8);
2037 } else if (tbt
== VT_FLOAT
) {
2038 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2039 /* movss -0x10(%rsp),%xmm0 */
2041 o(0x44 + REG_VALUE(r
)*8);
2048 /* convert fp to int 't' type */
2049 void gen_cvt_ftoi(int t
)
2051 int ft
, bt
, size
, r
;
2054 if (bt
== VT_LDOUBLE
) {
2055 gen_cvt_ftof(VT_DOUBLE
);
2065 r
= get_reg(RC_INT
);
2066 if (bt
== VT_FLOAT
) {
2068 } else if (bt
== VT_DOUBLE
) {
2073 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2074 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2078 /* computed goto support */
2085 /* Save the stack pointer onto the stack and return the location of its address */
2086 ST_FUNC
void gen_vla_sp_save(int addr
) {
2087 /* mov %rsp,addr(%rbp)*/
2088 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2091 /* Restore the SP from a location on the stack */
2092 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2093 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2096 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2097 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2098 #ifdef TCC_TARGET_PE
2099 /* alloca does more than just adjust %rsp on Windows */
2100 vpush_global_sym(&func_old_type
, TOK_alloca
);
2101 vswap(); /* Move alloca ref past allocation size */
2103 vset(type
, REG_IRET
, 0);
2106 r
= gv(RC_INT
); /* allocation size */
2109 o(0xe0 | REG_VALUE(r
));
2110 /* We align to 16 bytes rather than align */
2115 o(0xe0 | REG_VALUE(r
));
2122 /* end of x86-64 code generator */
2123 /*************************************************************/
2124 #endif /* ! TARGET_DEFS_ONLY */
2125 /******************************************************/