2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0080 /* only for long double */
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
104 /******************************************************/
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x400000
116 #define ELF_PAGE_SIZE 0x200000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
124 ST_DATA
const int reg_classes
[NB_REGS
] = {
125 /* eax */ RC_INT
| RC_RAX
,
126 /* ecx */ RC_INT
| RC_RCX
,
127 /* edx */ RC_INT
| RC_RDX
,
141 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
142 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
143 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
144 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
145 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
146 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
155 static unsigned long func_sub_sp_offset
;
156 static int func_ret_sub
;
158 /* XXX: make it faster ? */
163 if (ind1
> cur_text_section
->data_allocated
)
164 section_realloc(cur_text_section
, ind1
);
165 cur_text_section
->data
[ind
] = c
;
169 void o(unsigned int c
)
191 void gen_le64(int64_t c
)
203 void orex(int ll
, int r
, int r2
, int b
)
205 if ((r
& VT_VALMASK
) >= VT_CONST
)
207 if ((r2
& VT_VALMASK
) >= VT_CONST
)
209 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
210 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
214 /* output a symbol and patch all calls to it */
215 void gsym_addr(int t
, int a
)
219 ptr
= (int *)(cur_text_section
->data
+ t
);
220 n
= *ptr
; /* next value */
231 /* psym is used to put an instruction with a data field which is a
232 reference to a symbol. It is in fact the same as oad ! */
235 static int is64_type(int t
)
237 return ((t
& VT_BTYPE
) == VT_PTR
||
238 (t
& VT_BTYPE
) == VT_FUNC
||
239 (t
& VT_BTYPE
) == VT_LLONG
);
242 /* instruction + 4 bytes data. Return the address of the data */
243 ST_FUNC
int oad(int c
, int s
)
249 if (ind1
> cur_text_section
->data_allocated
)
250 section_realloc(cur_text_section
, ind1
);
251 *(int *)(cur_text_section
->data
+ ind
) = s
;
257 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
260 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
264 /* output constant with relocation if 'r & VT_SYM' is true */
265 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
268 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
272 /* output constant with relocation if 'r & VT_SYM' is true */
273 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
276 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
280 /* output got address with relocation */
281 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
283 #ifndef TCC_TARGET_PE
286 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
287 sr
= cur_text_section
->reloc
;
288 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
291 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
292 get_tok_str(sym
->v
, NULL
), c
, r
,
293 cur_text_section
->data
[ind
-3],
294 cur_text_section
->data
[ind
-2],
295 cur_text_section
->data
[ind
-1]
297 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
301 /* we use add c, %xxx for displacement */
303 o(0xc0 + REG_VALUE(r
));
308 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
310 op_reg
= REG_VALUE(op_reg
) << 3;
311 if ((r
& VT_VALMASK
) == VT_CONST
) {
312 /* constant memory reference */
315 gen_gotpcrel(r
, sym
, c
);
317 gen_addrpc32(r
, sym
, c
);
319 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
320 /* currently, we use only ebp as base */
322 /* short reference */
326 oad(0x85 | op_reg
, c
);
328 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
330 g(0x80 | op_reg
| REG_VALUE(r
));
333 g(0x00 | op_reg
| REG_VALUE(r
));
336 g(0x00 | op_reg
| REG_VALUE(r
));
340 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
342 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
344 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
347 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
349 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
352 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
353 orex(1, r
, op_reg
, opcode
);
354 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
358 /* load 'r' from value 'sv' */
359 void load(int r
, SValue
*sv
)
361 int v
, t
, ft
, fc
, fr
;
366 sv
= pe_getimport(sv
, &v2
);
370 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
373 #ifndef TCC_TARGET_PE
374 /* we use indirect access via got */
375 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
376 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
377 /* use the result register as a temporal register */
378 int tr
= r
| TREG_MEM
;
380 /* we cannot use float registers as a temporal register */
381 tr
= get_reg(RC_INT
) | TREG_MEM
;
383 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
385 /* load from the temporal register */
393 if (v
== VT_LLOCAL
) {
395 v1
.r
= VT_LOCAL
| VT_LVAL
;
398 if (!(reg_classes
[fr
] & RC_INT
))
399 fr
= get_reg(RC_INT
);
403 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
405 r
= REG_VALUE(r
); /* movd */
406 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
407 b
= 0x7e0ff3; /* movq */
409 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
410 b
= 0xdb, r
= 5; /* fldt */
411 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
412 b
= 0xbe0f; /* movsbl */
413 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
414 b
= 0xb60f; /* movzbl */
415 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
416 b
= 0xbf0f; /* movswl */
417 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
418 b
= 0xb70f; /* movzwl */
420 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
421 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
422 || ((ft
& VT_BTYPE
) == VT_FUNC
));
427 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
430 gen_modrm(r
, fr
, sv
->sym
, fc
);
437 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
438 gen_addrpc32(fr
, sv
->sym
, fc
);
440 if (sv
->sym
->type
.t
& VT_STATIC
) {
442 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
443 gen_addrpc32(fr
, sv
->sym
, fc
);
446 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
447 gen_gotpcrel(r
, sv
->sym
, fc
);
450 } else if (is64_type(ft
)) {
451 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
454 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
457 } else if (v
== VT_LOCAL
) {
458 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
459 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
460 } else if (v
== VT_CMP
) {
462 if ((fc
& ~0x100) != TOK_NE
)
463 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
465 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
468 /* This was a float compare. If the parity bit is
469 set the result was unordered, meaning false for everything
470 except TOK_NE, and true for TOK_NE. */
472 o(0x037a + (REX_BASE(r
) << 8));
474 orex(0,r
,0, 0x0f); /* setxx %br */
476 o(0xc0 + REG_VALUE(r
));
477 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
480 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
481 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
484 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
486 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
488 /* gen_cvt_ftof(VT_DOUBLE); */
489 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
490 /* movsd -0x10(%rsp),%xmmN */
492 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
495 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
496 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
499 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
502 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
504 } else if (r
== TREG_ST0
) {
505 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
506 /* gen_cvt_ftof(VT_LDOUBLE); */
507 /* movsd %xmmN,-0x10(%rsp) */
509 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
511 o(0xf02444dd); /* fldl -0x10(%rsp) */
514 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
520 /* store register 'r' in lvalue 'v' */
521 void store(int r
, SValue
*v
)
525 /* store the REX prefix in this variable when PIC is enabled */
530 v
= pe_getimport(v
, &v2
);
535 fr
= v
->r
& VT_VALMASK
;
538 #ifndef TCC_TARGET_PE
539 /* we need to access the variable via got */
540 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
541 /* mov xx(%rip), %r11 */
543 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
544 pic
= is64_type(bt
) ? 0x49 : 0x41;
548 /* XXX: incorrect if float reg to reg */
549 if (bt
== VT_FLOAT
) {
552 o(0x7e0f); /* movd */
554 } else if (bt
== VT_DOUBLE
) {
557 o(0xd60f); /* movq */
559 } else if (bt
== VT_LDOUBLE
) {
560 o(0xc0d9); /* fld %st(0) */
568 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
570 else if (is64_type(bt
))
576 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
581 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
582 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
583 } else if (fr
!= r
) {
584 /* XXX: don't we really come here? */
586 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
589 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
590 gen_modrm(r
, v
->r
, v
->sym
, fc
);
591 } else if (fr
!= r
) {
592 /* XXX: don't we really come here? */
594 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
599 /* 'is_jmp' is '1' if it is a jump */
600 static void gcall_or_jmp(int is_jmp
)
603 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
605 if (vtop
->r
& VT_SYM
) {
606 /* relocation case */
608 greloc(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PC32
);
610 greloc(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PLT32
);
613 /* put an empty PC32 relocation */
614 put_elf_reloc(symtab_section
, cur_text_section
,
615 ind
+ 1, R_X86_64_PC32
, 0);
617 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
619 /* otherwise, indirect call */
623 o(0xff); /* call/jmp *r */
624 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
631 static const uint8_t arg_regs
[REGN
] = {
632 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
635 /* Prepare arguments in R10 and R11 rather than RCX and RDX
636 because gv() will not ever use these */
637 static int arg_prepare_reg(int idx
) {
638 if (idx
== 0 || idx
== 1)
639 /* idx=0: r10, idx=1: r11 */
642 return arg_regs
[idx
];
645 static int func_scratch
;
647 /* Generate function call. The function address is pushed first, then
648 all the parameters in call order. This functions pops all the
649 parameters and the function address. */
651 void gen_offs_sp(int b
, int r
, int d
)
653 orex(1,0,r
& 0x100 ? 0 : r
, b
);
655 o(0x2444 | (REG_VALUE(r
) << 3));
658 o(0x2484 | (REG_VALUE(r
) << 3));
663 /* Return the number of registers needed to return the struct, or 0 if
664 returning via struct pointer. */
665 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
)
668 *ret_align
= 1; // Never have to re-align return values for x86-64
669 size
= type_size(vt
, &align
);
673 } else if (size
> 4) {
676 } else if (size
> 2) {
679 } else if (size
> 1) {
688 static int is_sse_float(int t
) {
691 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
694 int gfunc_arg_size(CType
*type
) {
696 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
698 return type_size(type
, &align
);
701 void gfunc_call(int nb_args
)
703 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
706 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
709 /* for struct arguments, we need to call memcpy and the function
710 call breaks register passing arguments we are preparing.
711 So, we process arguments which will be passed by stack first. */
712 struct_size
= args_size
;
713 for(i
= 0; i
< nb_args
; i
++) {
718 bt
= (sv
->type
.t
& VT_BTYPE
);
719 size
= gfunc_arg_size(&sv
->type
);
722 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
724 if (bt
== VT_STRUCT
) {
725 /* align to stack align size */
726 size
= (size
+ 15) & ~15;
727 /* generate structure store */
729 gen_offs_sp(0x8d, r
, struct_size
);
732 /* generate memcpy call */
733 vset(&sv
->type
, r
| VT_LVAL
, 0);
737 } else if (bt
== VT_LDOUBLE
) {
739 gen_offs_sp(0xdb, 0x107, struct_size
);
744 if (func_scratch
< struct_size
)
745 func_scratch
= struct_size
;
748 struct_size
= args_size
;
750 for(i
= 0; i
< nb_args
; i
++) {
752 bt
= (vtop
->type
.t
& VT_BTYPE
);
754 size
= gfunc_arg_size(&vtop
->type
);
756 /* align to stack align size */
757 size
= (size
+ 15) & ~15;
760 gen_offs_sp(0x8d, d
, struct_size
);
761 gen_offs_sp(0x89, d
, arg
*8);
763 d
= arg_prepare_reg(arg
);
764 gen_offs_sp(0x8d, d
, struct_size
);
768 if (is_sse_float(vtop
->type
.t
)) {
769 gv(RC_XMM0
); /* only use one float register */
771 /* movq %xmm0, j*8(%rsp) */
772 gen_offs_sp(0xd60f66, 0x100, arg
*8);
774 /* movaps %xmm0, %xmmN */
776 o(0xc0 + (arg
<< 3));
777 d
= arg_prepare_reg(arg
);
778 /* mov %xmm0, %rxx */
781 o(0xc0 + REG_VALUE(d
));
784 if (bt
== VT_STRUCT
) {
785 vtop
->type
.ref
= NULL
;
786 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
787 : size
> 1 ? VT_SHORT
: VT_BYTE
;
792 gen_offs_sp(0x89, r
, arg
*8);
794 d
= arg_prepare_reg(arg
);
795 orex(1,d
,r
,0x89); /* mov */
796 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
804 /* Copy R10 and R11 into RCX and RDX, respectively */
806 o(0xd1894c); /* mov %r10, %rcx */
808 o(0xda894c); /* mov %r11, %rdx */
817 #define FUNC_PROLOG_SIZE 11
819 /* generate function prolog of type 't' */
820 void gfunc_prolog(CType
*func_type
)
822 int addr
, reg_param_index
, bt
, size
;
831 ind
+= FUNC_PROLOG_SIZE
;
832 func_sub_sp_offset
= ind
;
835 sym
= func_type
->ref
;
837 /* if the function returns a structure, then add an
838 implicit pointer parameter */
840 func_var
= (sym
->c
== FUNC_ELLIPSIS
);
841 size
= gfunc_arg_size(&func_vt
);
843 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
849 /* define parameters */
850 while ((sym
= sym
->next
) != NULL
) {
852 bt
= type
->t
& VT_BTYPE
;
853 size
= gfunc_arg_size(type
);
855 if (reg_param_index
< REGN
) {
856 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
858 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
860 if (reg_param_index
< REGN
) {
861 /* save arguments passed by register */
862 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
863 o(0xd60f66); /* movq */
864 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
866 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
869 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
875 while (reg_param_index
< REGN
) {
876 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
877 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
884 /* generate function epilog */
885 void gfunc_epilog(void)
890 if (func_ret_sub
== 0) {
895 g(func_ret_sub
>> 8);
899 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
900 /* align local size to word & save local variables */
901 v
= (func_scratch
+ -loc
+ 15) & -16;
904 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
905 oad(0xb8, v
); /* mov stacksize, %eax */
906 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
907 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
908 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
910 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
911 o(0xec8148); /* sub rsp, stacksize */
915 cur_text_section
->data_offset
= saved_ind
;
916 pe_add_unwind_data(ind
, saved_ind
, v
);
917 ind
= cur_text_section
->data_offset
;
922 static void gadd_sp(int val
)
924 if (val
== (char)val
) {
928 oad(0xc48148, val
); /* add $xxx, %rsp */
932 typedef enum X86_64_Mode
{
940 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
944 else if (a
== x86_64_mode_none
)
946 else if (b
== x86_64_mode_none
)
948 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
949 return x86_64_mode_memory
;
950 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
951 return x86_64_mode_integer
;
952 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
953 return x86_64_mode_memory
;
955 return x86_64_mode_sse
;
958 static X86_64_Mode
classify_x86_64_inner(CType
*ty
)
963 switch (ty
->t
& VT_BTYPE
) {
964 case VT_VOID
: return x86_64_mode_none
;
973 case VT_ENUM
: return x86_64_mode_integer
;
976 case VT_DOUBLE
: return x86_64_mode_sse
;
978 case VT_LDOUBLE
: return x86_64_mode_x87
;
984 if (f
->next
&& (f
->c
== f
->next
->c
))
985 return x86_64_mode_memory
;
987 mode
= x86_64_mode_none
;
988 for (; f
; f
= f
->next
)
989 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
997 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
)
1000 int size
, align
, ret_t
= 0;
1002 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1007 mode
= x86_64_mode_integer
;
1009 size
= type_size(ty
, &align
);
1010 *psize
= (size
+ 7) & ~7;
1011 *palign
= (align
+ 7) & ~7;
1014 mode
= x86_64_mode_memory
;
1016 mode
= classify_x86_64_inner(ty
);
1018 case x86_64_mode_integer
:
1024 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1028 case x86_64_mode_x87
:
1033 case x86_64_mode_sse
:
1039 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1042 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1055 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1057 /* This definition must be synced with stdarg.h */
1058 enum __va_arg_type
{
1059 __va_gen_reg
, __va_float_reg
, __va_stack
1061 int size
, align
, reg_count
;
1062 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1064 default: return __va_stack
;
1065 case x86_64_mode_integer
: return __va_gen_reg
;
1066 case x86_64_mode_sse
: return __va_float_reg
;
1070 /* Return the number of registers needed to return the struct, or 0 if
1071 returning via struct pointer. */
1072 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
)
1074 int size
, align
, reg_count
;
1075 *ret_align
= 1; // Never have to re-align return values for x86-64
1076 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) != x86_64_mode_memory
);
1080 static const uint8_t arg_regs
[REGN
] = {
1081 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1084 static int arg_prepare_reg(int idx
) {
1085 if (idx
== 2 || idx
== 3)
1086 /* idx=2: r10, idx=3: r11 */
1089 return arg_regs
[idx
];
1092 /* Generate function call. The function address is pushed first, then
1093 all the parameters in call order. This functions pops all the
1094 parameters and the function address. */
1095 void gfunc_call(int nb_args
)
1099 int size
, align
, r
, args_size
, stack_adjust
, run_start
, run_end
, i
, reg_count
;
1100 int nb_reg_args
= 0;
1101 int nb_sse_args
= 0;
1102 int sse_reg
, gen_reg
;
1104 /* calculate the number of integer/float register arguments */
1105 for(i
= 0; i
< nb_args
; i
++) {
1106 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1107 if (mode
== x86_64_mode_sse
)
1108 nb_sse_args
+= reg_count
;
1109 else if (mode
== x86_64_mode_integer
)
1110 nb_reg_args
+= reg_count
;
1113 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1114 and ended by a 16-byte aligned argument. This is because, from the point of view of
1115 the callee, argument alignment is computed from the bottom up. */
1116 /* for struct arguments, we need to call memcpy and the function
1117 call breaks register passing arguments we are preparing.
1118 So, we process arguments which will be passed by stack first. */
1119 gen_reg
= nb_reg_args
;
1120 sse_reg
= nb_sse_args
;
1123 while (run_start
!= nb_args
) {
1124 int run_gen_reg
= gen_reg
, run_sse_reg
= sse_reg
;
1128 for(i
= run_start
; (i
< nb_args
) && (run_end
== nb_args
); i
++) {
1129 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1131 case x86_64_mode_memory
:
1132 case x86_64_mode_x87
:
1137 stack_adjust
+= size
;
1140 case x86_64_mode_sse
:
1141 sse_reg
-= reg_count
;
1142 if (sse_reg
+ reg_count
> 8) goto stack_arg
;
1145 case x86_64_mode_integer
:
1146 gen_reg
-= reg_count
;
1147 if (gen_reg
+ reg_count
> REGN
) goto stack_arg
;
1149 default: break; /* nothing to be done for x86_64_mode_none */
1153 gen_reg
= run_gen_reg
;
1154 sse_reg
= run_sse_reg
;
1156 /* adjust stack to align SSE boundary */
1157 if (stack_adjust
&= 15) {
1158 /* fetch cpu flag before the following sub will change the value */
1159 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1162 stack_adjust
= 16 - stack_adjust
;
1164 oad(0xec81, stack_adjust
); /* sub $xxx, %rsp */
1165 args_size
+= stack_adjust
;
1168 for(i
= run_start
; i
< run_end
;) {
1169 /* Swap argument to top, it will possibly be changed here,
1170 and might use more temps. At the end of the loop we keep
1171 in on the stack and swap it back to its original position
1172 if it is a register. */
1173 SValue tmp
= vtop
[0];
1177 mode
= classify_x86_64_arg(&vtop
->type
, NULL
, &size
, &align
, ®_count
);
1180 switch (vtop
->type
.t
& VT_BTYPE
) {
1182 if (mode
== x86_64_mode_sse
) {
1184 sse_reg
-= reg_count
;
1187 } else if (mode
== x86_64_mode_integer
) {
1189 gen_reg
-= reg_count
;
1195 /* allocate the necessary size on stack */
1197 oad(0xec81, size
); /* sub $xxx, %rsp */
1198 /* generate structure store */
1199 r
= get_reg(RC_INT
);
1200 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1201 o(0xe0 + REG_VALUE(r
));
1202 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1215 assert(mode
== x86_64_mode_sse
);
1219 o(0x50); /* push $rax */
1220 /* movq %xmmN, (%rsp) */
1222 o(0x04 + REG_VALUE(r
)*8);
1231 assert(mode
== x86_64_mode_integer
);
1233 /* XXX: implicit cast ? */
1234 if (gen_reg
> REGN
) {
1237 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1245 /* And swap the argument back to it's original position. */
1252 assert((vtop
->type
.t
== tmp
.type
.t
) && (vtop
->r
== tmp
.r
));
1261 /* handle 16 byte aligned arguments at end of run */
1262 run_start
= i
= run_end
;
1263 while (i
< nb_args
) {
1264 /* Rotate argument to top since it will always be popped */
1265 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1271 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1273 oad(0xec8148, size
); /* sub $xxx, %rsp */
1274 o(0x7cdb); /* fstpt 0(%rsp) */
1279 assert(mode
== x86_64_mode_memory
);
1281 /* allocate the necessary size on stack */
1283 oad(0xec81, size
); /* sub $xxx, %rsp */
1284 /* generate structure store */
1285 r
= get_reg(RC_INT
);
1286 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1287 o(0xe0 + REG_VALUE(r
));
1288 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1299 /* XXX This should be superfluous. */
1300 save_regs(0); /* save used temporary registers */
1302 /* then, we prepare register passing arguments.
1303 Note that we cannot set RDX and RCX in this loop because gv()
1304 may break these temporary registers. Let's use R10 and R11
1306 assert(gen_reg
<= REGN
);
1307 assert(sse_reg
<= 8);
1308 for(i
= 0; i
< nb_args
; i
++) {
1309 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1310 /* Alter stack entry type so that gv() knows how to treat it */
1312 if (mode
== x86_64_mode_sse
) {
1313 if (reg_count
== 2) {
1315 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1316 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1317 /* movaps %xmm0, %xmmN */
1319 o(0xc0 + (sse_reg
<< 3));
1320 /* movaps %xmm1, %xmmN */
1322 o(0xc1 + ((sse_reg
+1) << 3));
1325 assert(reg_count
== 1);
1327 /* Load directly to register */
1328 gv(RC_XMM0
<< sse_reg
);
1330 } else if (mode
== x86_64_mode_integer
) {
1332 /* XXX: implicit cast ? */
1333 gen_reg
-= reg_count
;
1335 int d
= arg_prepare_reg(gen_reg
);
1336 orex(1,d
,r
,0x89); /* mov */
1337 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1338 if (reg_count
== 2) {
1339 d
= arg_prepare_reg(gen_reg
+1);
1340 orex(1,d
,vtop
->r2
,0x89); /* mov */
1341 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1346 assert(gen_reg
== 0);
1347 assert(sse_reg
== 0);
1349 /* We shouldn't have many operands on the stack anymore, but the
1350 call address itself is still there, and it might be in %eax
1351 (or edx/ecx) currently, which the below writes would clobber.
1352 So evict all remaining operands here. */
1355 /* Copy R10 and R11 into RDX and RCX, respectively */
1356 if (nb_reg_args
> 2) {
1357 o(0xd2894c); /* mov %r10, %rdx */
1358 if (nb_reg_args
> 3) {
1359 o(0xd9894c); /* mov %r11, %rcx */
1363 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1371 #define FUNC_PROLOG_SIZE 11
1373 static void push_arg_reg(int i
) {
1375 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1378 /* generate function prolog of type 't' */
1379 void gfunc_prolog(CType
*func_type
)
1382 int i
, addr
, align
, size
, reg_count
;
1383 int param_addr
= 0, reg_param_index
, sse_param_index
;
1387 sym
= func_type
->ref
;
1388 addr
= PTR_SIZE
* 2;
1390 ind
+= FUNC_PROLOG_SIZE
;
1391 func_sub_sp_offset
= ind
;
1394 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1395 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1396 seen_reg_num
= seen_sse_num
= 0;
1397 /* frame pointer and return address */
1398 seen_stack_size
= PTR_SIZE
* 2;
1399 /* count the number of seen parameters */
1400 sym
= func_type
->ref
;
1401 while ((sym
= sym
->next
) != NULL
) {
1403 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1407 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1410 case x86_64_mode_integer
:
1411 if (seen_reg_num
+ reg_count
<= 8) {
1412 seen_reg_num
+= reg_count
;
1419 case x86_64_mode_sse
:
1420 if (seen_sse_num
+ reg_count
<= 8) {
1421 seen_sse_num
+= reg_count
;
1431 /* movl $0x????????, -0x10(%rbp) */
1433 gen_le32(seen_reg_num
* 8);
1434 /* movl $0x????????, -0xc(%rbp) */
1436 gen_le32(seen_sse_num
* 16 + 48);
1437 /* movl $0x????????, -0x8(%rbp) */
1439 gen_le32(seen_stack_size
);
1441 /* save all register passing arguments */
1442 for (i
= 0; i
< 8; i
++) {
1444 o(0xd60f66); /* movq */
1445 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1446 /* movq $0, loc+8(%rbp) */
1451 for (i
= 0; i
< REGN
; i
++) {
1452 push_arg_reg(REGN
-1-i
);
1456 sym
= func_type
->ref
;
1457 reg_param_index
= 0;
1458 sse_param_index
= 0;
1460 /* if the function returns a structure, then add an
1461 implicit pointer parameter */
1462 func_vt
= sym
->type
;
1463 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1464 if (mode
== x86_64_mode_memory
) {
1465 push_arg_reg(reg_param_index
);
1469 /* define parameters */
1470 while ((sym
= sym
->next
) != NULL
) {
1472 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1474 case x86_64_mode_sse
:
1475 if (sse_param_index
+ reg_count
<= 8) {
1476 /* save arguments passed by register */
1477 loc
-= reg_count
* 8;
1479 for (i
= 0; i
< reg_count
; ++i
) {
1480 o(0xd60f66); /* movq */
1481 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1485 addr
= (addr
+ align
- 1) & -align
;
1488 sse_param_index
+= reg_count
;
1492 case x86_64_mode_memory
:
1493 case x86_64_mode_x87
:
1494 addr
= (addr
+ align
- 1) & -align
;
1499 case x86_64_mode_integer
: {
1500 if (reg_param_index
+ reg_count
<= REGN
) {
1501 /* save arguments passed by register */
1502 loc
-= reg_count
* 8;
1504 for (i
= 0; i
< reg_count
; ++i
) {
1505 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1509 addr
= (addr
+ align
- 1) & -align
;
1512 reg_param_index
+= reg_count
;
1516 default: break; /* nothing to be done for x86_64_mode_none */
1518 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1519 VT_LOCAL
| VT_LVAL
, param_addr
);
1523 /* generate function epilog */
1524 void gfunc_epilog(void)
1528 o(0xc9); /* leave */
1529 if (func_ret_sub
== 0) {
1532 o(0xc2); /* ret n */
1534 g(func_ret_sub
>> 8);
1536 /* align local size to word & save local variables */
1537 v
= (-loc
+ 15) & -16;
1539 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1540 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1541 o(0xec8148); /* sub rsp, stacksize */
1548 /* generate a jump to a label */
1551 return psym(0xe9, t
);
1554 /* generate a jump to a fixed address */
1555 void gjmp_addr(int a
)
1563 oad(0xe9, a
- ind
- 5);
1567 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1568 int gtst(int inv
, int t
)
1572 v
= vtop
->r
& VT_VALMASK
;
1574 /* fast case : can jump directly since flags are set */
1575 if (vtop
->c
.i
& 0x100)
1577 /* This was a float compare. If the parity flag is set
1578 the result was unordered. For anything except != this
1579 means false and we don't jump (anding both conditions).
1580 For != this means true (oring both).
1581 Take care about inverting the test. We need to jump
1582 to our target if the result was unordered and test wasn't NE,
1583 otherwise if unordered we don't want to jump. */
1584 vtop
->c
.i
&= ~0x100;
1585 if (!inv
== (vtop
->c
.i
!= TOK_NE
))
1586 o(0x067a); /* jp +6 */
1590 t
= psym(0x8a, t
); /* jp t */
1594 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1595 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1596 /* && or || optimization */
1597 if ((v
& 1) == inv
) {
1598 /* insert vtop->c jump list in t */
1601 p
= (int *)(cur_text_section
->data
+ *p
);
1613 /* generate an integer binary operation */
1614 void gen_opi(int op
)
1619 ll
= is64_type(vtop
[-1].type
.t
);
1620 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1621 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1625 case TOK_ADDC1
: /* add with carry generation */
1628 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1635 /* XXX: generate inc and dec for smaller code ? */
1636 orex(ll
, r
, 0, 0x83);
1637 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1640 orex(ll
, r
, 0, 0x81);
1641 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1644 gv2(RC_INT
, RC_INT
);
1647 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1648 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1651 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1657 case TOK_SUBC1
: /* sub with carry generation */
1660 case TOK_ADDC2
: /* add with carry use */
1663 case TOK_SUBC2
: /* sub with carry use */
1676 gv2(RC_INT
, RC_INT
);
1679 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1680 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1692 opc
= 0xc0 | (opc
<< 3);
1698 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1699 o(opc
| REG_VALUE(r
));
1700 g(vtop
->c
.i
& (ll
? 63 : 31));
1702 /* we generate the shift in ecx */
1703 gv2(RC_INT
, RC_RCX
);
1705 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1706 o(opc
| REG_VALUE(r
));
1719 /* first operand must be in eax */
1720 /* XXX: need better constraint for second operand */
1721 gv2(RC_RAX
, RC_RCX
);
1726 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1727 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1728 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1729 if (op
== '%' || op
== TOK_UMOD
)
1741 void gen_opl(int op
)
1746 /* generate a floating point operation 'v = t1 op t2' instruction. The
1747 two operands are guaranted to have the same floating point type */
1748 /* XXX: need to use ST1 too */
1749 void gen_opf(int op
)
1751 int a
, ft
, fc
, swapped
, r
;
1753 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1755 /* convert constants to memory references */
1756 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1761 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1764 /* must put at least one value in the floating point register */
1765 if ((vtop
[-1].r
& VT_LVAL
) &&
1766 (vtop
[0].r
& VT_LVAL
)) {
1772 /* swap the stack if needed so that t1 is the register and t2 is
1773 the memory reference */
1774 if (vtop
[-1].r
& VT_LVAL
) {
1778 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1779 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1780 /* load on stack second operand */
1781 load(TREG_ST0
, vtop
);
1782 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1783 if (op
== TOK_GE
|| op
== TOK_GT
)
1785 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1788 o(0xc9d9); /* fxch %st(1) */
1789 if (op
== TOK_EQ
|| op
== TOK_NE
)
1790 o(0xe9da); /* fucompp */
1792 o(0xd9de); /* fcompp */
1793 o(0xe0df); /* fnstsw %ax */
1795 o(0x45e480); /* and $0x45, %ah */
1796 o(0x40fC80); /* cmp $0x40, %ah */
1797 } else if (op
== TOK_NE
) {
1798 o(0x45e480); /* and $0x45, %ah */
1799 o(0x40f480); /* xor $0x40, %ah */
1801 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1802 o(0x05c4f6); /* test $0x05, %ah */
1805 o(0x45c4f6); /* test $0x45, %ah */
1812 /* no memory reference possible for long double operations */
1813 load(TREG_ST0
, vtop
);
1837 o(0xde); /* fxxxp %st, %st(1) */
1842 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1843 /* if saved lvalue, then we must reload it */
1846 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1848 r
= get_reg(RC_INT
);
1850 v1
.r
= VT_LOCAL
| VT_LVAL
;
1856 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1859 if (op
== TOK_LE
|| op
== TOK_LT
)
1861 if (op
== TOK_LE
|| op
== TOK_GE
) {
1862 op
= 0x93; /* setae */
1864 op
= 0x97; /* seta */
1872 assert(!(vtop
[-1].r
& VT_LVAL
));
1874 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
1876 if (op
== TOK_EQ
|| op
== TOK_NE
)
1877 o(0x2e0f); /* ucomisd */
1879 o(0x2f0f); /* comisd */
1881 if (vtop
->r
& VT_LVAL
) {
1882 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1884 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1889 vtop
->c
.i
= op
| 0x100;
1891 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
1909 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
1912 /* if saved lvalue, then we must reload it */
1913 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
1915 r
= get_reg(RC_INT
);
1917 v1
.r
= VT_LOCAL
| VT_LVAL
;
1923 assert(!(vtop
[-1].r
& VT_LVAL
));
1925 assert(vtop
->r
& VT_LVAL
);
1930 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1938 if (vtop
->r
& VT_LVAL
) {
1939 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1941 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1949 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1950 and 'long long' cases. */
1951 void gen_cvt_itof(int t
)
1953 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1956 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1957 /* signed long long to float/double/long double (unsigned case
1958 is handled generically) */
1959 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1960 o(0x242cdf); /* fildll (%rsp) */
1961 o(0x08c48348); /* add $8, %rsp */
1962 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1963 (VT_INT
| VT_UNSIGNED
)) {
1964 /* unsigned int to float/double/long double */
1965 o(0x6a); /* push $0 */
1967 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1968 o(0x242cdf); /* fildll (%rsp) */
1969 o(0x10c48348); /* add $16, %rsp */
1971 /* int to float/double/long double */
1972 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1973 o(0x2404db); /* fildl (%rsp) */
1974 o(0x08c48348); /* add $8, %rsp */
1978 int r
= get_reg(RC_FLOAT
);
1980 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
1981 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1982 (VT_INT
| VT_UNSIGNED
) ||
1983 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1987 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
1992 /* convert from one floating point type to another */
1993 void gen_cvt_ftof(int t
)
2001 if (bt
== VT_FLOAT
) {
2003 if (tbt
== VT_DOUBLE
) {
2004 o(0x140f); /* unpcklps */
2005 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2006 o(0x5a0f); /* cvtps2pd */
2007 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2008 } else if (tbt
== VT_LDOUBLE
) {
2010 /* movss %xmm0,-0x10(%rsp) */
2012 o(0x44 + REG_VALUE(vtop
->r
)*8);
2014 o(0xf02444d9); /* flds -0x10(%rsp) */
2017 } else if (bt
== VT_DOUBLE
) {
2019 if (tbt
== VT_FLOAT
) {
2020 o(0x140f66); /* unpcklpd */
2021 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2022 o(0x5a0f66); /* cvtpd2ps */
2023 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2024 } else if (tbt
== VT_LDOUBLE
) {
2026 /* movsd %xmm0,-0x10(%rsp) */
2028 o(0x44 + REG_VALUE(vtop
->r
)*8);
2030 o(0xf02444dd); /* fldl -0x10(%rsp) */
2036 r
= get_reg(RC_FLOAT
);
2037 if (tbt
== VT_DOUBLE
) {
2038 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2039 /* movsd -0x10(%rsp),%xmm0 */
2041 o(0x44 + REG_VALUE(r
)*8);
2044 } else if (tbt
== VT_FLOAT
) {
2045 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2046 /* movss -0x10(%rsp),%xmm0 */
2048 o(0x44 + REG_VALUE(r
)*8);
2055 /* convert fp to int 't' type */
2056 void gen_cvt_ftoi(int t
)
2058 int ft
, bt
, size
, r
;
2061 if (bt
== VT_LDOUBLE
) {
2062 gen_cvt_ftof(VT_DOUBLE
);
2072 r
= get_reg(RC_INT
);
2073 if (bt
== VT_FLOAT
) {
2075 } else if (bt
== VT_DOUBLE
) {
2080 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2081 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2085 /* computed goto support */
2092 /* Save the stack pointer onto the stack and return the location of its address */
2093 ST_FUNC
void gen_vla_sp_save(int addr
) {
2094 /* mov %rsp,addr(%rbp)*/
2095 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2098 /* Restore the SP from a location on the stack */
2099 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2100 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2103 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2104 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2105 #ifdef TCC_TARGET_PE
2106 /* alloca does more than just adjust %rsp on Windows */
2107 vpush_global_sym(&func_old_type
, TOK_alloca
);
2108 vswap(); /* Move alloca ref past allocation size */
2110 vset(type
, REG_IRET
, 0);
2113 r
= gv(RC_INT
); /* allocation size */
2116 o(0xe0 | REG_VALUE(r
));
2117 /* We align to 16 bytes rather than align */
2122 o(0xe0 | REG_VALUE(r
));
2129 /* end of x86-64 code generator */
2130 /*************************************************************/
2131 #endif /* ! TARGET_DEFS_ONLY */
2132 /******************************************************/