2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0080 /* only for long double */
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_IRET RC_RAX /* function return: integer register */
49 #define RC_LRET RC_RDX /* function return: second integer register */
50 #define RC_FRET RC_XMM0 /* function return: float register */
51 #define RC_QRET RC_XMM1 /* function return: second float register */
53 /* pretty names for the registers */
75 TREG_ST0
= 4, // SP slot won't be used
80 #define REX_BASE(reg) (((reg) >> 3) & 1)
81 #define REG_VALUE(reg) ((reg) & 7)
83 /* return registers for function */
84 #define REG_IRET TREG_RAX /* single word int return register */
85 #define REG_LRET TREG_RDX /* second word return register (for long long) */
86 #define REG_FRET TREG_XMM0 /* float return register */
87 #define REG_QRET TREG_XMM1 /* second float return register */
89 /* defined if function parameters must be evaluated in reverse order */
90 #define INVERT_FUNC_PARAMS
92 /* pointer size, in bytes */
95 /* long double size and alignment, in bytes */
96 #define LDOUBLE_SIZE 16
97 #define LDOUBLE_ALIGN 8
98 /* maximum alignment (for aligned attribute support) */
101 /******************************************************/
104 #define EM_TCC_TARGET EM_X86_64
106 /* relocation type for 32 bit data relocation */
107 #define R_DATA_32 R_X86_64_32
108 #define R_DATA_PTR R_X86_64_64
109 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
110 #define R_COPY R_X86_64_COPY
112 #define ELF_START_ADDR 0x08048000
113 #define ELF_PAGE_SIZE 0x1000
115 /******************************************************/
116 #else /* ! TARGET_DEFS_ONLY */
117 /******************************************************/
121 ST_DATA
const int reg_classes
[NB_REGS
] = {
122 /* eax */ RC_INT
| RC_RAX
,
123 /* ecx */ RC_INT
| RC_RCX
,
124 /* edx */ RC_INT
| RC_RDX
,
138 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
139 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
140 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
141 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
142 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
143 /* xmm5 */ RC_FLOAT
| RC_XMM5
/* only up to xmm5: xmm6-15 must be callee saved on Win64 */
146 static unsigned long func_sub_sp_offset
;
147 static int func_ret_sub
;
149 /* XXX: make it faster ? */
154 if (ind1
> cur_text_section
->data_allocated
)
155 section_realloc(cur_text_section
, ind1
);
156 cur_text_section
->data
[ind
] = c
;
160 void o(unsigned int c
)
182 void gen_le64(int64_t c
)
194 void orex(int ll
, int r
, int r2
, int b
)
196 if ((r
& VT_VALMASK
) >= VT_CONST
)
198 if ((r2
& VT_VALMASK
) >= VT_CONST
)
200 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
201 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
205 /* output a symbol and patch all calls to it */
206 void gsym_addr(int t
, int a
)
210 ptr
= (int *)(cur_text_section
->data
+ t
);
211 n
= *ptr
; /* next value */
222 /* psym is used to put an instruction with a data field which is a
223 reference to a symbol. It is in fact the same as oad ! */
226 static int is64_type(int t
)
228 return ((t
& VT_BTYPE
) == VT_PTR
||
229 (t
& VT_BTYPE
) == VT_FUNC
||
230 (t
& VT_BTYPE
) == VT_LLONG
);
233 static int is_sse_float(int t
) {
236 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
240 /* instruction + 4 bytes data. Return the address of the data */
241 ST_FUNC
int oad(int c
, int s
)
247 if (ind1
> cur_text_section
->data_allocated
)
248 section_realloc(cur_text_section
, ind1
);
249 *(int *)(cur_text_section
->data
+ ind
) = s
;
255 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
258 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
262 /* output constant with relocation if 'r & VT_SYM' is true */
263 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
266 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
270 /* output constant with relocation if 'r & VT_SYM' is true */
271 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
274 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
278 /* output got address with relocation */
279 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
281 #ifndef TCC_TARGET_PE
284 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
285 sr
= cur_text_section
->reloc
;
286 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
289 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
290 cur_text_section
->data
[ind
-3],
291 cur_text_section
->data
[ind
-2],
292 cur_text_section
->data
[ind
-1]
294 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
298 /* we use add c, %xxx for displacement */
300 o(0xc0 + REG_VALUE(r
));
305 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
307 op_reg
= REG_VALUE(op_reg
) << 3;
308 if ((r
& VT_VALMASK
) == VT_CONST
) {
309 /* constant memory reference */
312 gen_gotpcrel(r
, sym
, c
);
314 gen_addrpc32(r
, sym
, c
);
316 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
317 /* currently, we use only ebp as base */
319 /* short reference */
323 oad(0x85 | op_reg
, c
);
325 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
327 g(0x80 | op_reg
| REG_VALUE(r
));
330 g(0x00 | op_reg
| REG_VALUE(r
));
333 g(0x00 | op_reg
| REG_VALUE(r
));
337 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
339 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
341 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
344 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
346 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
349 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
350 orex(1, r
, op_reg
, opcode
);
351 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
355 /* load 'r' from value 'sv' */
356 void load(int r
, SValue
*sv
)
358 int v
, t
, ft
, fc
, fr
;
363 sv
= pe_getimport(sv
, &v2
);
370 #ifndef TCC_TARGET_PE
371 /* we use indirect access via got */
372 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
373 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
374 /* use the result register as a temporal register */
375 int tr
= r
| TREG_MEM
;
377 /* we cannot use float registers as a temporal register */
378 tr
= get_reg(RC_INT
) | TREG_MEM
;
380 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
382 /* load from the temporal register */
390 if (v
== VT_LLOCAL
) {
392 v1
.r
= VT_LOCAL
| VT_LVAL
;
395 if (!(reg_classes
[fr
] & RC_INT
))
396 fr
= get_reg(RC_INT
);
400 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
402 r
= REG_VALUE(r
); /* movd */
403 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
404 b
= 0x7e0ff3; /* movq */
406 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
407 b
= 0xdb, r
= 5; /* fldt */
408 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
409 b
= 0xbe0f; /* movsbl */
410 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
411 b
= 0xb60f; /* movzbl */
412 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
413 b
= 0xbf0f; /* movswl */
414 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
415 b
= 0xb70f; /* movzwl */
417 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
418 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
419 || ((ft
& VT_BTYPE
) == VT_FUNC
));
424 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
427 gen_modrm(r
, fr
, sv
->sym
, fc
);
434 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
435 gen_addrpc32(fr
, sv
->sym
, fc
);
437 if (sv
->sym
->type
.t
& VT_STATIC
) {
439 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
440 gen_addrpc32(fr
, sv
->sym
, fc
);
443 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
444 gen_gotpcrel(r
, sv
->sym
, fc
);
447 } else if (is64_type(ft
)) {
448 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
451 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
454 } else if (v
== VT_LOCAL
) {
455 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
456 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
457 } else if (v
== VT_CMP
) {
459 if ((fc
& ~0x100) != TOK_NE
)
460 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
462 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
465 /* This was a float compare. If the parity bit is
466 set the result was unordered, meaning false for everything
467 except TOK_NE, and true for TOK_NE. */
469 o(0x037a + (REX_BASE(r
) << 8));
471 orex(0,r
,0, 0x0f); /* setxx %br */
473 o(0xc0 + REG_VALUE(r
));
474 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
477 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
478 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
481 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
483 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
485 /* gen_cvt_ftof(VT_DOUBLE); */
486 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
487 /* movsd -0x10(%rsp),%xmmN */
489 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
492 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
493 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
496 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
499 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
501 } else if (r
== TREG_ST0
) {
502 assert((v
== TREG_XMM0
) || (v
== TREG_XMM1
));
503 /* gen_cvt_ftof(VT_LDOUBLE); */
504 /* movsd %xmmN,-0x10(%rsp) */
506 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
508 o(0xf02444dd); /* fldl -0x10(%rsp) */
511 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
517 /* store register 'r' in lvalue 'v' */
518 void store(int r
, SValue
*v
)
522 /* store the REX prefix in this variable when PIC is enabled */
527 v
= pe_getimport(v
, &v2
);
532 fr
= v
->r
& VT_VALMASK
;
535 #ifndef TCC_TARGET_PE
536 /* we need to access the variable via got */
537 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
538 /* mov xx(%rip), %r11 */
540 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
541 pic
= is64_type(bt
) ? 0x49 : 0x41;
545 /* XXX: incorrect if float reg to reg */
546 if (bt
== VT_FLOAT
) {
549 o(0x7e0f); /* movd */
551 } else if (bt
== VT_DOUBLE
) {
554 o(0xd60f); /* movq */
556 } else if (bt
== VT_LDOUBLE
) {
557 o(0xc0d9); /* fld %st(0) */
565 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
567 else if (is64_type(bt
))
573 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
578 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
579 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
580 } else if (fr
!= r
) {
581 /* XXX: don't we really come here? */
583 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
586 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
587 gen_modrm(r
, v
->r
, v
->sym
, fc
);
588 } else if (fr
!= r
) {
589 /* XXX: don't we really come here? */
591 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
596 /* 'is_jmp' is '1' if it is a jump */
597 static void gcall_or_jmp(int is_jmp
)
600 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
602 if (vtop
->r
& VT_SYM
) {
603 /* relocation case */
604 greloc(cur_text_section
, vtop
->sym
,
605 ind
+ 1, R_X86_64_PC32
);
607 /* put an empty PC32 relocation */
608 put_elf_reloc(symtab_section
, cur_text_section
,
609 ind
+ 1, R_X86_64_PC32
, 0);
611 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
613 /* otherwise, indirect call */
617 o(0xff); /* call/jmp *r */
618 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
625 static const uint8_t arg_regs
[REGN
] = {
626 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
629 /* Prepare arguments in R10 and R11 rather than RCX and RDX
630 because gv() will not ever use these */
631 static int arg_prepare_reg(int idx
) {
632 if (idx
== 0 || idx
== 1)
633 /* idx=0: r10, idx=1: r11 */
636 return arg_regs
[idx
];
639 static int func_scratch
;
641 /* Generate function call. The function address is pushed first, then
642 all the parameters in call order. This functions pops all the
643 parameters and the function address. */
645 void gen_offs_sp(int b
, int r
, int d
)
647 orex(1,0,r
& 0x100 ? 0 : r
, b
);
649 o(0x2444 | (REG_VALUE(r
) << 3));
652 o(0x2484 | (REG_VALUE(r
) << 3));
657 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
658 ST_FUNC
int gfunc_sret(CType
*vt
, CType
*ret
, int *ret_align
) {
659 *ret_align
= 1; // Never have to re-align return values for x86-64
661 size
= type_size(vt
, &align
);
665 } else if (size
> 4) {
668 } else if (size
> 2) {
671 } else if (size
> 1) {
680 int gfunc_arg_size(CType
*type
) {
681 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
684 return type_size(type
, &align
);
687 void gfunc_call(int nb_args
)
689 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
692 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
695 /* for struct arguments, we need to call memcpy and the function
696 call breaks register passing arguments we are preparing.
697 So, we process arguments which will be passed by stack first. */
698 struct_size
= args_size
;
699 for(i
= 0; i
< nb_args
; i
++) {
702 SValue
*sv
= &vtop
[-i
];
703 bt
= (sv
->type
.t
& VT_BTYPE
);
704 size
= gfunc_arg_size(&sv
->type
);
707 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
709 if (bt
== VT_STRUCT
) {
710 /* align to stack align size */
711 size
= (size
+ 15) & ~15;
712 /* generate structure store */
714 gen_offs_sp(0x8d, r
, struct_size
);
717 /* generate memcpy call */
718 vset(&sv
->type
, r
| VT_LVAL
, 0);
722 } else if (bt
== VT_LDOUBLE
) {
724 gen_offs_sp(0xdb, 0x107, struct_size
);
729 if (func_scratch
< struct_size
)
730 func_scratch
= struct_size
;
733 struct_size
= args_size
;
735 for(i
= 0; i
< nb_args
; i
++) {
737 bt
= (vtop
->type
.t
& VT_BTYPE
);
739 size
= gfunc_arg_size(&vtop
->type
);
741 /* align to stack align size */
742 size
= (size
+ 15) & ~15;
745 gen_offs_sp(0x8d, d
, struct_size
);
746 gen_offs_sp(0x89, d
, arg
*8);
748 d
= arg_prepare_reg(arg
);
749 gen_offs_sp(0x8d, d
, struct_size
);
753 if (is_sse_float(vtop
->type
.t
)) {
754 gv(RC_XMM0
); /* only use one float register */
756 /* movq %xmm0, j*8(%rsp) */
757 gen_offs_sp(0xd60f66, 0x100, arg
*8);
759 /* movaps %xmm0, %xmmN */
761 o(0xc0 + (arg
<< 3));
762 d
= arg_prepare_reg(arg
);
763 /* mov %xmm0, %rxx */
766 o(0xc0 + REG_VALUE(d
));
769 if (bt
== VT_STRUCT
) {
770 vtop
->type
.ref
= NULL
;
771 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
772 : size
> 1 ? VT_SHORT
: VT_BYTE
;
777 gen_offs_sp(0x89, r
, arg
*8);
779 d
= arg_prepare_reg(arg
);
780 orex(1,d
,r
,0x89); /* mov */
781 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
789 /* Copy R10 and R11 into RCX and RDX, respectively */
791 o(0xd1894c); /* mov %r10, %rcx */
793 o(0xda894c); /* mov %r11, %rdx */
802 #define FUNC_PROLOG_SIZE 11
804 /* generate function prolog of type 't' */
805 void gfunc_prolog(CType
*func_type
)
807 int addr
, reg_param_index
, bt
, size
;
816 ind
+= FUNC_PROLOG_SIZE
;
817 func_sub_sp_offset
= ind
;
820 sym
= func_type
->ref
;
822 /* if the function returns a structure, then add an
823 implicit pointer parameter */
825 size
= gfunc_arg_size(&func_vt
);
827 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
833 /* define parameters */
834 while ((sym
= sym
->next
) != NULL
) {
836 bt
= type
->t
& VT_BTYPE
;
837 size
= gfunc_arg_size(type
);
839 if (reg_param_index
< REGN
) {
840 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
842 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
844 if (reg_param_index
< REGN
) {
845 /* save arguments passed by register */
846 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
847 o(0xd60f66); /* movq */
848 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
850 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
853 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
859 while (reg_param_index
< REGN
) {
860 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
861 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
868 /* generate function epilog */
869 void gfunc_epilog(void)
874 if (func_ret_sub
== 0) {
879 g(func_ret_sub
>> 8);
883 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
884 /* align local size to word & save local variables */
885 v
= (func_scratch
+ -loc
+ 15) & -16;
888 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
889 oad(0xb8, v
); /* mov stacksize, %eax */
890 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
891 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
892 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
894 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
895 o(0xec8148); /* sub rsp, stacksize */
899 cur_text_section
->data_offset
= saved_ind
;
900 pe_add_unwind_data(ind
, saved_ind
, v
);
901 ind
= cur_text_section
->data_offset
;
906 static void gadd_sp(int val
)
908 if (val
== (char)val
) {
912 oad(0xc48148, val
); /* add $xxx, %rsp */
916 typedef enum X86_64_Mode
{
924 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
) {
927 else if (a
== x86_64_mode_none
)
929 else if (b
== x86_64_mode_none
)
931 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
932 return x86_64_mode_memory
;
933 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
934 return x86_64_mode_integer
;
935 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
936 return x86_64_mode_memory
;
938 return x86_64_mode_sse
;
941 static X86_64_Mode
classify_x86_64_inner(CType
*ty
) {
945 switch (ty
->t
& VT_BTYPE
) {
946 case VT_VOID
: return x86_64_mode_none
;
955 case VT_ENUM
: return x86_64_mode_integer
;
958 case VT_DOUBLE
: return x86_64_mode_sse
;
960 case VT_LDOUBLE
: return x86_64_mode_x87
;
966 if (f
->next
&& (f
->c
== f
->next
->c
))
967 return x86_64_mode_memory
;
969 mode
= x86_64_mode_none
;
970 for (; f
; f
= f
->next
)
971 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
979 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *reg_count
) {
981 int size
, align
, ret_t
;
983 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
987 mode
= x86_64_mode_integer
;
989 size
= type_size(ty
, &align
);
990 *psize
= (size
+ 7) & ~7;
993 mode
= x86_64_mode_memory
;
995 mode
= classify_x86_64_inner(ty
);
997 case x86_64_mode_integer
:
1003 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1007 case x86_64_mode_x87
:
1012 case x86_64_mode_sse
:
1018 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1033 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
) {
1034 /* This definition must be synced with stdarg.h */
1035 enum __va_arg_type
{
1036 __va_gen_reg
, __va_float_reg
, __va_stack
1038 int size
, reg_count
;
1039 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, ®_count
);
1041 default: return __va_stack
;
1042 case x86_64_mode_integer
: return __va_gen_reg
;
1043 case x86_64_mode_sse
: return __va_float_reg
;
1047 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1048 int gfunc_sret(CType
*vt
, CType
*ret
, int *ret_align
) {
1049 int size
, reg_count
;
1050 *ret_align
= 1; // Never have to re-align return values for x86-64
1051 return (classify_x86_64_arg(vt
, ret
, &size
, ®_count
) == x86_64_mode_memory
);
1055 static const uint8_t arg_regs
[REGN
] = {
1056 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1059 static int arg_prepare_reg(int idx
) {
1060 if (idx
== 2 || idx
== 3)
1061 /* idx=2: r10, idx=3: r11 */
1064 return arg_regs
[idx
];
1067 /* Generate function call. The function address is pushed first, then
1068 all the parameters in call order. This functions pops all the
1069 parameters and the function address. */
1070 void gfunc_call(int nb_args
)
1074 int size
, align
, r
, args_size
, i
, j
, reg_count
;
1075 int nb_reg_args
= 0;
1076 int nb_sse_args
= 0;
1077 int sse_reg
, gen_reg
;
1079 /* calculate the number of integer/float arguments */
1081 for(i
= 0; i
< nb_args
; i
++) {
1082 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, ®_count
);
1084 case x86_64_mode_memory
:
1085 case x86_64_mode_x87
:
1089 case x86_64_mode_sse
:
1090 nb_sse_args
+= reg_count
;
1091 if (nb_sse_args
> 8) args_size
+= size
;
1094 case x86_64_mode_integer
:
1095 nb_reg_args
+= reg_count
;
1096 if (nb_reg_args
> REGN
) args_size
+= size
;
1101 /* for struct arguments, we need to call memcpy and the function
1102 call breaks register passing arguments we are preparing.
1103 So, we process arguments which will be passed by stack first. */
1104 gen_reg
= nb_reg_args
;
1105 sse_reg
= nb_sse_args
;
1107 /* adjust stack to align SSE boundary */
1108 if (args_size
&= 15) {
1109 /* fetch cpu flag before the following sub will change the value */
1110 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1113 args_size
= 16 - args_size
;
1115 oad(0xec81, args_size
); /* sub $xxx, %rsp */
1118 for(i
= 0; i
< nb_args
;) {
1119 /* Swap argument to top, it will possibly be changed here,
1120 and might use more temps. At the end of the loop we keep
1121 in on the stack and swap it back to its original position
1122 if it is a register. */
1123 SValue tmp
= vtop
[0];
1127 mode
= classify_x86_64_arg(&vtop
->type
, NULL
, &size
, ®_count
);
1130 switch (vtop
->type
.t
& VT_BTYPE
) {
1132 if (mode
== x86_64_mode_sse
) {
1134 sse_reg
-= reg_count
;
1137 } else if (mode
== x86_64_mode_integer
) {
1139 gen_reg
-= reg_count
;
1145 /* allocate the necessary size on stack */
1147 oad(0xec81, size
); /* sub $xxx, %rsp */
1148 /* generate structure store */
1149 r
= get_reg(RC_INT
);
1150 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1151 o(0xe0 + REG_VALUE(r
));
1152 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1161 size
= LDOUBLE_SIZE
;
1162 oad(0xec8148, size
); /* sub $xxx, %rsp */
1163 o(0x7cdb); /* fstpt 0(%rsp) */
1171 assert(mode
== x86_64_mode_sse
);
1175 o(0x50); /* push $rax */
1176 /* movq %xmm0, (%rsp) */
1178 o(0x04 + REG_VALUE(r
)*8);
1187 assert(mode
== x86_64_mode_integer
);
1189 /* XXX: implicit cast ? */
1190 if (gen_reg
> REGN
) {
1193 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1201 /* And swap the argument back to it's original position. */
1208 assert(vtop
->type
.t
== tmp
.type
.t
);
1216 /* XXX This should be superfluous. */
1217 save_regs(0); /* save used temporary registers */
1219 /* then, we prepare register passing arguments.
1220 Note that we cannot set RDX and RCX in this loop because gv()
1221 may break these temporary registers. Let's use R10 and R11
1223 assert(gen_reg
<= REGN
);
1224 assert(sse_reg
<= 8);
1225 for(i
= 0; i
< nb_args
; i
++) {
1226 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, ®_count
);
1227 /* Alter stack entry type so that gv() knows how to treat it */
1229 if (mode
== x86_64_mode_sse
) {
1230 if (reg_count
== 2) {
1232 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1233 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1234 /* movaps %xmm0, %xmmN */
1236 o(0xc0 + (sse_reg
<< 3));
1237 /* movaps %xmm1, %xmmN */
1239 o(0xc1 + ((sse_reg
+1) << 3));
1242 assert(reg_count
== 1);
1244 /* Load directly to register */
1245 gv(RC_XMM0
<< sse_reg
);
1247 } else if (mode
== x86_64_mode_integer
) {
1249 /* XXX: implicit cast ? */
1250 gen_reg
-= reg_count
;
1252 int d
= arg_prepare_reg(gen_reg
);
1253 orex(1,d
,r
,0x89); /* mov */
1254 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1255 if (reg_count
== 2) {
1256 d
= arg_prepare_reg(gen_reg
+1);
1257 orex(1,d
,vtop
->r2
,0x89); /* mov */
1258 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1263 assert(gen_reg
== 0);
1264 assert(sse_reg
== 0);
1266 /* We shouldn't have many operands on the stack anymore, but the
1267 call address itself is still there, and it might be in %eax
1268 (or edx/ecx) currently, which the below writes would clobber.
1269 So evict all remaining operands here. */
1272 /* Copy R10 and R11 into RDX and RCX, respectively */
1273 if (nb_reg_args
> 2) {
1274 o(0xd2894c); /* mov %r10, %rdx */
1275 if (nb_reg_args
> 3) {
1276 o(0xd9894c); /* mov %r11, %rcx */
1280 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1288 #define FUNC_PROLOG_SIZE 11
1290 static void push_arg_reg(int i
) {
1292 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1295 /* generate function prolog of type 't' */
1296 void gfunc_prolog(CType
*func_type
)
1299 int i
, addr
, align
, size
, reg_count
;
1300 int param_addr
, reg_param_index
, sse_param_index
;
1304 sym
= func_type
->ref
;
1305 addr
= PTR_SIZE
* 2;
1307 ind
+= FUNC_PROLOG_SIZE
;
1308 func_sub_sp_offset
= ind
;
1311 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1312 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1313 seen_reg_num
= seen_sse_num
= 0;
1314 /* frame pointer and return address */
1315 seen_stack_size
= PTR_SIZE
* 2;
1316 /* count the number of seen parameters */
1317 sym
= func_type
->ref
;
1318 while ((sym
= sym
->next
) != NULL
) {
1320 mode
= classify_x86_64_arg(type
, NULL
, &size
, ®_count
);
1323 seen_stack_size
+= size
;
1326 case x86_64_mode_integer
:
1327 if (seen_reg_num
+ reg_count
<= 8) {
1328 seen_reg_num
+= reg_count
;
1331 seen_stack_size
+= size
;
1335 case x86_64_mode_sse
:
1336 if (seen_sse_num
+ reg_count
<= 8) {
1337 seen_sse_num
+= reg_count
;
1340 seen_stack_size
+= size
;
1347 /* movl $0x????????, -0x10(%rbp) */
1349 gen_le32(seen_reg_num
* 8);
1350 /* movl $0x????????, -0xc(%rbp) */
1352 gen_le32(seen_sse_num
* 16 + 48);
1353 /* movl $0x????????, -0x8(%rbp) */
1355 gen_le32(seen_stack_size
);
1357 /* save all register passing arguments */
1358 for (i
= 0; i
< 8; i
++) {
1360 o(0xd60f66); /* movq */
1361 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1362 /* movq $0, loc+8(%rbp) */
1367 for (i
= 0; i
< REGN
; i
++) {
1368 push_arg_reg(REGN
-1-i
);
1372 sym
= func_type
->ref
;
1373 reg_param_index
= 0;
1374 sse_param_index
= 0;
1376 /* if the function returns a structure, then add an
1377 implicit pointer parameter */
1378 func_vt
= sym
->type
;
1379 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, ®_count
);
1380 if (mode
== x86_64_mode_memory
) {
1381 push_arg_reg(reg_param_index
);
1385 /* define parameters */
1386 while ((sym
= sym
->next
) != NULL
) {
1388 mode
= classify_x86_64_arg(type
, NULL
, &size
, ®_count
);
1390 case x86_64_mode_sse
:
1391 if (sse_param_index
+ reg_count
<= 8) {
1392 /* save arguments passed by register */
1393 loc
-= reg_count
* 8;
1395 for (i
= 0; i
< reg_count
; ++i
) {
1396 o(0xd60f66); /* movq */
1397 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1403 sse_param_index
+= reg_count
;
1407 case x86_64_mode_memory
:
1408 case x86_64_mode_x87
:
1413 case x86_64_mode_integer
: {
1414 if (reg_param_index
+ reg_count
<= REGN
) {
1415 /* save arguments passed by register */
1416 loc
-= reg_count
* 8;
1418 for (i
= 0; i
< reg_count
; ++i
) {
1419 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1425 reg_param_index
+= reg_count
;
1430 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1431 VT_LOCAL
| VT_LVAL
, param_addr
);
1435 /* generate function epilog */
1436 void gfunc_epilog(void)
1440 o(0xc9); /* leave */
1441 if (func_ret_sub
== 0) {
1444 o(0xc2); /* ret n */
1446 g(func_ret_sub
>> 8);
1448 /* align local size to word & save local variables */
1449 v
= (-loc
+ 15) & -16;
1451 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1452 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1453 o(0xec8148); /* sub rsp, stacksize */
1460 /* generate a jump to a label */
1463 return psym(0xe9, t
);
1466 /* generate a jump to a fixed address */
1467 void gjmp_addr(int a
)
1475 oad(0xe9, a
- ind
- 5);
1479 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1480 int gtst(int inv
, int t
)
1484 v
= vtop
->r
& VT_VALMASK
;
1486 /* fast case : can jump directly since flags are set */
1487 if (vtop
->c
.i
& 0x100)
1489 /* This was a float compare. If the parity flag is set
1490 the result was unordered. For anything except != this
1491 means false and we don't jump (anding both conditions).
1492 For != this means true (oring both).
1493 Take care about inverting the test. We need to jump
1494 to our target if the result was unordered and test wasn't NE,
1495 otherwise if unordered we don't want to jump. */
1496 vtop
->c
.i
&= ~0x100;
1497 if (!inv
== (vtop
->c
.i
!= TOK_NE
))
1498 o(0x067a); /* jp +6 */
1502 t
= psym(0x8a, t
); /* jp t */
1506 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1507 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1508 /* && or || optimization */
1509 if ((v
& 1) == inv
) {
1510 /* insert vtop->c jump list in t */
1513 p
= (int *)(cur_text_section
->data
+ *p
);
1521 if (is_float(vtop
->type
.t
) ||
1522 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1526 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1527 /* constant jmp optimization */
1528 if ((vtop
->c
.i
!= 0) != inv
)
1533 o(0xc0 + REG_VALUE(v
) * 9);
1535 t
= psym(0x85 ^ inv
, t
);
1542 /* generate an integer binary operation */
1543 void gen_opi(int op
)
1548 ll
= is64_type(vtop
[-1].type
.t
);
1549 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1550 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1554 case TOK_ADDC1
: /* add with carry generation */
1557 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1564 /* XXX: generate inc and dec for smaller code ? */
1565 orex(ll
, r
, 0, 0x83);
1566 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1569 orex(ll
, r
, 0, 0x81);
1570 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1573 gv2(RC_INT
, RC_INT
);
1576 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1577 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1580 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1586 case TOK_SUBC1
: /* sub with carry generation */
1589 case TOK_ADDC2
: /* add with carry use */
1592 case TOK_SUBC2
: /* sub with carry use */
1605 gv2(RC_INT
, RC_INT
);
1608 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1609 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1621 opc
= 0xc0 | (opc
<< 3);
1627 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1628 o(opc
| REG_VALUE(r
));
1629 g(vtop
->c
.i
& (ll
? 63 : 31));
1631 /* we generate the shift in ecx */
1632 gv2(RC_INT
, RC_RCX
);
1634 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1635 o(opc
| REG_VALUE(r
));
1648 /* first operand must be in eax */
1649 /* XXX: need better constraint for second operand */
1650 gv2(RC_RAX
, RC_RCX
);
1655 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1656 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1657 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1658 if (op
== '%' || op
== TOK_UMOD
)
1670 void gen_opl(int op
)
1675 /* generate a floating point operation 'v = t1 op t2' instruction. The
1676 two operands are guaranted to have the same floating point type */
1677 /* XXX: need to use ST1 too */
1678 void gen_opf(int op
)
1680 int a
, ft
, fc
, swapped
, r
;
1682 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1684 /* convert constants to memory references */
1685 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1690 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1693 /* must put at least one value in the floating point register */
1694 if ((vtop
[-1].r
& VT_LVAL
) &&
1695 (vtop
[0].r
& VT_LVAL
)) {
1701 /* swap the stack if needed so that t1 is the register and t2 is
1702 the memory reference */
1703 if (vtop
[-1].r
& VT_LVAL
) {
1707 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1708 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1709 /* load on stack second operand */
1710 load(TREG_ST0
, vtop
);
1711 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1712 if (op
== TOK_GE
|| op
== TOK_GT
)
1714 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1717 o(0xc9d9); /* fxch %st(1) */
1718 o(0xe9da); /* fucompp */
1719 o(0xe0df); /* fnstsw %ax */
1721 o(0x45e480); /* and $0x45, %ah */
1722 o(0x40fC80); /* cmp $0x40, %ah */
1723 } else if (op
== TOK_NE
) {
1724 o(0x45e480); /* and $0x45, %ah */
1725 o(0x40f480); /* xor $0x40, %ah */
1727 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1728 o(0x05c4f6); /* test $0x05, %ah */
1731 o(0x45c4f6); /* test $0x45, %ah */
1738 /* no memory reference possible for long double operations */
1739 load(TREG_ST0
, vtop
);
1763 o(0xde); /* fxxxp %st, %st(1) */
1768 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1769 /* if saved lvalue, then we must reload it */
1772 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1774 r
= get_reg(RC_INT
);
1776 v1
.r
= VT_LOCAL
| VT_LVAL
;
1782 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1785 if (op
== TOK_LE
|| op
== TOK_LT
)
1787 if (op
== TOK_LE
|| op
== TOK_GE
) {
1788 op
= 0x93; /* setae */
1790 op
= 0x97; /* seta */
1798 assert(!(vtop
[-1].r
& VT_LVAL
));
1800 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
1802 o(0x2e0f); /* ucomisd */
1804 if (vtop
->r
& VT_LVAL
) {
1805 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1807 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1812 vtop
->c
.i
= op
| 0x100;
1814 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
1832 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
1835 /* if saved lvalue, then we must reload it */
1836 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
1838 r
= get_reg(RC_INT
);
1840 v1
.r
= VT_LOCAL
| VT_LVAL
;
1846 assert(!(vtop
[-1].r
& VT_LVAL
));
1848 assert(vtop
->r
& VT_LVAL
);
1853 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1861 if (vtop
->r
& VT_LVAL
) {
1862 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1864 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1872 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1873 and 'long long' cases. */
1874 void gen_cvt_itof(int t
)
1876 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1879 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1880 /* signed long long to float/double/long double (unsigned case
1881 is handled generically) */
1882 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1883 o(0x242cdf); /* fildll (%rsp) */
1884 o(0x08c48348); /* add $8, %rsp */
1885 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1886 (VT_INT
| VT_UNSIGNED
)) {
1887 /* unsigned int to float/double/long double */
1888 o(0x6a); /* push $0 */
1890 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1891 o(0x242cdf); /* fildll (%rsp) */
1892 o(0x10c48348); /* add $16, %rsp */
1894 /* int to float/double/long double */
1895 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1896 o(0x2404db); /* fildl (%rsp) */
1897 o(0x08c48348); /* add $8, %rsp */
1901 int r
= get_reg(RC_FLOAT
);
1903 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
1904 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1905 (VT_INT
| VT_UNSIGNED
) ||
1906 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1910 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
1915 /* convert from one floating point type to another */
1916 void gen_cvt_ftof(int t
)
1924 if (bt
== VT_FLOAT
) {
1926 if (tbt
== VT_DOUBLE
) {
1927 o(0x140f); /* unpcklps */
1928 o(0xc0 + REG_VALUE(vtop
->r
)*9);
1929 o(0x5a0f); /* cvtps2pd */
1930 o(0xc0 + REG_VALUE(vtop
->r
)*9);
1931 } else if (tbt
== VT_LDOUBLE
) {
1933 /* movss %xmm0,-0x10(%rsp) */
1935 o(0x44 + REG_VALUE(vtop
->r
)*8);
1937 o(0xf02444d9); /* flds -0x10(%rsp) */
1940 } else if (bt
== VT_DOUBLE
) {
1942 if (tbt
== VT_FLOAT
) {
1943 o(0x140f66); /* unpcklpd */
1944 o(0xc0 + REG_VALUE(vtop
->r
)*9);
1945 o(0x5a0f66); /* cvtpd2ps */
1946 o(0xc0 + REG_VALUE(vtop
->r
)*9);
1947 } else if (tbt
== VT_LDOUBLE
) {
1949 /* movsd %xmm0,-0x10(%rsp) */
1951 o(0x44 + REG_VALUE(vtop
->r
)*8);
1953 o(0xf02444dd); /* fldl -0x10(%rsp) */
1958 int r
= get_reg(RC_FLOAT
);
1959 if (tbt
== VT_DOUBLE
) {
1960 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1961 /* movsd -0x10(%rsp),%xmm0 */
1963 o(0x44 + REG_VALUE(r
)*8);
1966 } else if (tbt
== VT_FLOAT
) {
1967 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1968 /* movss -0x10(%rsp),%xmm0 */
1970 o(0x44 + REG_VALUE(r
)*8);
1977 /* convert fp to int 't' type */
1978 void gen_cvt_ftoi(int t
)
1980 int ft
, bt
, size
, r
;
1983 if (bt
== VT_LDOUBLE
) {
1984 gen_cvt_ftof(VT_DOUBLE
);
1994 r
= get_reg(RC_INT
);
1995 if (bt
== VT_FLOAT
) {
1997 } else if (bt
== VT_DOUBLE
) {
2002 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2003 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2007 /* computed goto support */
2014 /* end of x86-64 code generator */
2015 /*************************************************************/
2016 #endif /* ! TARGET_DEFS_ONLY */
2017 /******************************************************/