2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0080 /* only for long double */
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
77 TREG_ST0
= 4, // SP slot won't be used
82 #define REX_BASE(reg) (((reg) >> 3) & 1)
83 #define REG_VALUE(reg) ((reg) & 7)
85 /* return registers for function */
86 #define REG_IRET TREG_RAX /* single word int return register */
87 #define REG_LRET TREG_RDX /* second word return register (for long long) */
88 #define REG_FRET TREG_XMM0 /* float return register */
89 #define REG_QRET TREG_XMM1 /* second float return register */
91 /* defined if function parameters must be evaluated in reverse order */
92 #define INVERT_FUNC_PARAMS
94 /* pointer size, in bytes */
97 /* long double size and alignment, in bytes */
98 #define LDOUBLE_SIZE 16
99 #define LDOUBLE_ALIGN 8
100 /* maximum alignment (for aligned attribute support) */
103 /******************************************************/
106 #define EM_TCC_TARGET EM_X86_64
108 /* relocation type for 32 bit data relocation */
109 #define R_DATA_32 R_X86_64_32
110 #define R_DATA_PTR R_X86_64_64
111 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
112 #define R_COPY R_X86_64_COPY
114 #define ELF_START_ADDR 0x08048000
115 #define ELF_PAGE_SIZE 0x1000
117 /******************************************************/
118 #else /* ! TARGET_DEFS_ONLY */
119 /******************************************************/
123 ST_DATA
const int reg_classes
[NB_REGS
] = {
124 /* eax */ RC_INT
| RC_RAX
,
125 /* ecx */ RC_INT
| RC_RCX
,
126 /* edx */ RC_INT
| RC_RDX
,
140 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
141 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
142 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
143 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
144 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
145 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
146 /* xmm6 */ RC_FLOAT
| RC_XMM6
,
147 /* xmm7 */ RC_FLOAT
| RC_XMM7
,
150 static unsigned long func_sub_sp_offset
;
151 static int func_ret_sub
;
153 /* XXX: make it faster ? */
158 if (ind1
> cur_text_section
->data_allocated
)
159 section_realloc(cur_text_section
, ind1
);
160 cur_text_section
->data
[ind
] = c
;
164 void o(unsigned int c
)
186 void gen_le64(int64_t c
)
198 void orex(int ll
, int r
, int r2
, int b
)
200 if ((r
& VT_VALMASK
) >= VT_CONST
)
202 if ((r2
& VT_VALMASK
) >= VT_CONST
)
204 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
205 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
209 /* output a symbol and patch all calls to it */
210 void gsym_addr(int t
, int a
)
214 ptr
= (int *)(cur_text_section
->data
+ t
);
215 n
= *ptr
; /* next value */
226 /* psym is used to put an instruction with a data field which is a
227 reference to a symbol. It is in fact the same as oad ! */
230 static int is64_type(int t
)
232 return ((t
& VT_BTYPE
) == VT_PTR
||
233 (t
& VT_BTYPE
) == VT_FUNC
||
234 (t
& VT_BTYPE
) == VT_LLONG
);
237 static int is_sse_float(int t
) {
240 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
244 /* instruction + 4 bytes data. Return the address of the data */
245 ST_FUNC
int oad(int c
, int s
)
251 if (ind1
> cur_text_section
->data_allocated
)
252 section_realloc(cur_text_section
, ind1
);
253 *(int *)(cur_text_section
->data
+ ind
) = s
;
259 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
262 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
266 /* output constant with relocation if 'r & VT_SYM' is true */
267 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
270 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
274 /* output constant with relocation if 'r & VT_SYM' is true */
275 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
278 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
282 /* output got address with relocation */
283 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
285 #ifndef TCC_TARGET_PE
288 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
289 sr
= cur_text_section
->reloc
;
290 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
293 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
294 cur_text_section
->data
[ind
-3],
295 cur_text_section
->data
[ind
-2],
296 cur_text_section
->data
[ind
-1]
298 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
302 /* we use add c, %xxx for displacement */
304 o(0xc0 + REG_VALUE(r
));
309 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
311 op_reg
= REG_VALUE(op_reg
) << 3;
312 if ((r
& VT_VALMASK
) == VT_CONST
) {
313 /* constant memory reference */
316 gen_gotpcrel(r
, sym
, c
);
318 gen_addrpc32(r
, sym
, c
);
320 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
321 /* currently, we use only ebp as base */
323 /* short reference */
327 oad(0x85 | op_reg
, c
);
329 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
331 g(0x80 | op_reg
| REG_VALUE(r
));
334 g(0x00 | op_reg
| REG_VALUE(r
));
337 g(0x00 | op_reg
| REG_VALUE(r
));
341 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
343 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
345 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
348 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
350 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
353 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
354 orex(1, r
, op_reg
, opcode
);
355 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
359 /* load 'r' from value 'sv' */
360 void load(int r
, SValue
*sv
)
362 int v
, t
, ft
, fc
, fr
;
367 sv
= pe_getimport(sv
, &v2
);
374 #ifndef TCC_TARGET_PE
375 /* we use indirect access via got */
376 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
377 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
378 /* use the result register as a temporal register */
379 int tr
= r
| TREG_MEM
;
381 /* we cannot use float registers as a temporal register */
382 tr
= get_reg(RC_INT
) | TREG_MEM
;
384 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
386 /* load from the temporal register */
394 if (v
== VT_LLOCAL
) {
396 v1
.r
= VT_LOCAL
| VT_LVAL
;
399 if (!(reg_classes
[fr
] & RC_INT
))
400 fr
= get_reg(RC_INT
);
404 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
406 r
= REG_VALUE(r
); /* movd */
407 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
408 b
= 0x7e0ff3; /* movq */
410 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
411 b
= 0xdb, r
= 5; /* fldt */
412 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
413 b
= 0xbe0f; /* movsbl */
414 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
415 b
= 0xb60f; /* movzbl */
416 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
417 b
= 0xbf0f; /* movswl */
418 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
419 b
= 0xb70f; /* movzwl */
421 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
422 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
423 || ((ft
& VT_BTYPE
) == VT_FUNC
));
428 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
431 gen_modrm(r
, fr
, sv
->sym
, fc
);
438 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
439 gen_addrpc32(fr
, sv
->sym
, fc
);
441 if (sv
->sym
->type
.t
& VT_STATIC
) {
443 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
444 gen_addrpc32(fr
, sv
->sym
, fc
);
447 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
448 gen_gotpcrel(r
, sv
->sym
, fc
);
451 } else if (is64_type(ft
)) {
452 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
455 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
458 } else if (v
== VT_LOCAL
) {
459 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
460 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
461 } else if (v
== VT_CMP
) {
463 if ((fc
& ~0x100) != TOK_NE
)
464 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
466 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
469 /* This was a float compare. If the parity bit is
470 set the result was unordered, meaning false for everything
471 except TOK_NE, and true for TOK_NE. */
473 o(0x037a + (REX_BASE(r
) << 8));
475 orex(0,r
,0, 0x0f); /* setxx %br */
477 o(0xc0 + REG_VALUE(r
));
478 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
481 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
482 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
485 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
487 if ((r
== TREG_XMM0
) || (r
== TREG_XMM1
)) {
489 /* gen_cvt_ftof(VT_DOUBLE); */
490 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
491 /* movsd -0x10(%rsp),%xmmN */
493 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
496 assert((v
== TREG_XMM0
) || (v
== TREG_XMM1
));
497 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
500 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
503 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
505 } else if (r
== TREG_ST0
) {
506 assert((v
== TREG_XMM0
) || (v
== TREG_XMM1
));
507 /* gen_cvt_ftof(VT_LDOUBLE); */
508 /* movsd %xmmN,-0x10(%rsp) */
510 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
512 o(0xf02444dd); /* fldl -0x10(%rsp) */
515 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
521 /* store register 'r' in lvalue 'v' */
522 void store(int r
, SValue
*v
)
526 /* store the REX prefix in this variable when PIC is enabled */
531 v
= pe_getimport(v
, &v2
);
536 fr
= v
->r
& VT_VALMASK
;
539 #ifndef TCC_TARGET_PE
540 /* we need to access the variable via got */
541 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
542 /* mov xx(%rip), %r11 */
544 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
545 pic
= is64_type(bt
) ? 0x49 : 0x41;
549 /* XXX: incorrect if float reg to reg */
550 if (bt
== VT_FLOAT
) {
553 o(0x7e0f); /* movd */
555 } else if (bt
== VT_DOUBLE
) {
558 o(0xd60f); /* movq */
560 } else if (bt
== VT_LDOUBLE
) {
561 o(0xc0d9); /* fld %st(0) */
569 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
571 else if (is64_type(bt
))
577 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
582 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
583 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
584 } else if (fr
!= r
) {
585 /* XXX: don't we really come here? */
587 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
590 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
591 gen_modrm(r
, v
->r
, v
->sym
, fc
);
592 } else if (fr
!= r
) {
593 /* XXX: don't we really come here? */
595 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
600 /* 'is_jmp' is '1' if it is a jump */
601 static void gcall_or_jmp(int is_jmp
)
604 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
606 if (vtop
->r
& VT_SYM
) {
607 /* relocation case */
608 greloc(cur_text_section
, vtop
->sym
,
609 ind
+ 1, R_X86_64_PC32
);
611 /* put an empty PC32 relocation */
612 put_elf_reloc(symtab_section
, cur_text_section
,
613 ind
+ 1, R_X86_64_PC32
, 0);
615 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
617 /* otherwise, indirect call */
621 o(0xff); /* call/jmp *r */
622 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
629 static const uint8_t arg_regs
[] = {
630 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
633 static int func_scratch
;
635 /* Generate function call. The function address is pushed first, then
636 all the parameters in call order. This functions pops all the
637 parameters and the function address. */
639 void gen_offs_sp(int b
, int r
, int d
)
641 orex(1,0,r
& 0x100 ? 0 : r
, b
);
643 o(0x2444 | (REG_VALUE(r
) << 3));
646 o(0x2484 | (REG_VALUE(r
) << 3));
651 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
652 ST_FUNC
int gfunc_sret(CType
*vt
, CType
*ret
, int *ret_align
) {
653 *ret_align
= 1; // Never have to re-align return values for x86-64
657 void gfunc_call(int nb_args
)
659 int size
, align
, r
, args_size
, i
, d
, j
, bt
, struct_size
;
660 int nb_reg_args
, gen_reg
;
662 nb_reg_args
= nb_args
;
663 args_size
= (nb_reg_args
< REGN
? REGN
: nb_reg_args
) * PTR_SIZE
;
665 /* for struct arguments, we need to call memcpy and the function
666 call breaks register passing arguments we are preparing.
667 So, we process arguments which will be passed by stack first. */
668 struct_size
= args_size
;
669 for(i
= 0; i
< nb_args
; i
++) {
670 SValue
*sv
= &vtop
[-i
];
671 bt
= (sv
->type
.t
& VT_BTYPE
);
672 if (bt
== VT_STRUCT
) {
673 size
= type_size(&sv
->type
, &align
);
674 /* align to stack align size */
675 size
= (size
+ 15) & ~15;
676 /* generate structure store */
678 gen_offs_sp(0x8d, r
, struct_size
);
681 /* generate memcpy call */
682 vset(&sv
->type
, r
| VT_LVAL
, 0);
687 } else if (bt
== VT_LDOUBLE
) {
690 gen_offs_sp(0xdb, 0x107, struct_size
);
696 if (func_scratch
< struct_size
)
697 func_scratch
= struct_size
;
699 for (i
= 0; i
< REGN
; ++i
)
700 save_reg(arg_regs
[i
]);
703 gen_reg
= nb_reg_args
;
704 struct_size
= args_size
;
706 for(i
= 0; i
< nb_args
; i
++) {
707 bt
= (vtop
->type
.t
& VT_BTYPE
);
709 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
710 if (bt
== VT_LDOUBLE
)
713 size
= type_size(&vtop
->type
, &align
);
714 /* align to stack align size */
715 size
= (size
+ 15) & ~15;
719 gen_offs_sp(0x8d, d
, struct_size
);
720 gen_offs_sp(0x89, d
, j
*8);
723 gen_offs_sp(0x8d, d
, struct_size
);
727 } else if (is_sse_float(vtop
->type
.t
)) {
728 gv(RC_XMM0
); /* only one float register */
731 /* movq %xmm0, j*8(%rsp) */
732 gen_offs_sp(0xd60f66, 0x100, j
*8);
734 /* movaps %xmm0, %xmmN */
738 /* mov %xmm0, %rxx */
741 o(0xc0 + REG_VALUE(d
));
747 gen_offs_sp(0x89, r
, j
*8);
751 gv(reg_classes
[d
] & ~RC_INT
);
756 o(0xc0 + REG_VALUE(d
) + REG_VALUE(r
) * 8);
770 #define FUNC_PROLOG_SIZE 11
772 /* generate function prolog of type 't' */
773 void gfunc_prolog(CType
*func_type
)
775 int addr
, reg_param_index
, bt
;
784 ind
+= FUNC_PROLOG_SIZE
;
785 func_sub_sp_offset
= ind
;
788 sym
= func_type
->ref
;
790 /* if the function returns a structure, then add an
791 implicit pointer parameter */
793 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
794 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
799 /* define parameters */
800 while ((sym
= sym
->next
) != NULL
) {
802 bt
= type
->t
& VT_BTYPE
;
803 if (reg_param_index
< REGN
) {
804 /* save arguments passed by register */
805 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
807 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
808 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
810 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
816 while (reg_param_index
< REGN
) {
817 if (func_type
->ref
->c
== FUNC_ELLIPSIS
)
818 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
824 /* generate function epilog */
825 void gfunc_epilog(void)
830 if (func_ret_sub
== 0) {
835 g(func_ret_sub
>> 8);
839 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
840 /* align local size to word & save local variables */
841 v
= (func_scratch
+ -loc
+ 15) & -16;
844 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
845 oad(0xb8, v
); /* mov stacksize, %eax */
846 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
847 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
848 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
850 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
851 o(0xec8148); /* sub rsp, stacksize */
855 cur_text_section
->data_offset
= saved_ind
;
856 pe_add_unwind_data(ind
, saved_ind
, v
);
857 ind
= cur_text_section
->data_offset
;
862 static void gadd_sp(int val
)
864 if (val
== (char)val
) {
868 oad(0xc48148, val
); /* add $xxx, %rsp */
872 typedef enum X86_64_Mode
{
880 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
) {
883 else if (a
== x86_64_mode_none
)
885 else if (b
== x86_64_mode_none
)
887 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
888 return x86_64_mode_memory
;
889 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
890 return x86_64_mode_integer
;
891 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
892 return x86_64_mode_memory
;
894 return x86_64_mode_sse
;
897 static X86_64_Mode
classify_x86_64_inner(CType
*ty
) {
901 switch (ty
->t
& VT_BTYPE
) {
902 case VT_VOID
: return x86_64_mode_none
;
911 case VT_ENUM
: return x86_64_mode_integer
;
914 case VT_DOUBLE
: return x86_64_mode_sse
;
916 case VT_LDOUBLE
: return x86_64_mode_x87
;
922 if (f
->next
&& (f
->c
== f
->next
->c
))
923 return x86_64_mode_memory
;
925 mode
= x86_64_mode_none
;
926 for (; f
; f
= f
->next
)
927 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
935 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *reg_count
) {
937 int size
, align
, ret_t
;
939 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
943 mode
= x86_64_mode_integer
;
945 size
= type_size(ty
, &align
);
946 *psize
= (size
+ 7) & ~7;
949 mode
= x86_64_mode_memory
;
951 mode
= classify_x86_64_inner(ty
);
953 case x86_64_mode_integer
:
959 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
963 case x86_64_mode_x87
:
968 case x86_64_mode_sse
:
974 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
989 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
) {
990 /* This definition must be synced with stdarg.h */
992 __va_gen_reg
, __va_float_reg
, __va_stack
995 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, ®_count
);
997 default: return __va_stack
;
998 case x86_64_mode_integer
: return __va_gen_reg
;
999 case x86_64_mode_sse
: return __va_float_reg
;
1003 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1004 int gfunc_sret(CType
*vt
, CType
*ret
, int *ret_align
) {
1005 int size
, reg_count
;
1006 *ret_align
= 1; // Never have to re-align return values for x86-64
1007 return (classify_x86_64_arg(vt
, ret
, &size
, ®_count
) == x86_64_mode_memory
);
1011 static const uint8_t arg_regs
[REGN
] = {
1012 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1015 static int arg_prepare_reg(int idx
) {
1016 if (idx
== 2 || idx
== 3)
1017 /* idx=2: r10, idx=3: r11 */
1020 return arg_regs
[idx
];
1023 /* Generate function call. The function address is pushed first, then
1024 all the parameters in call order. This functions pops all the
1025 parameters and the function address. */
1026 void gfunc_call(int nb_args
)
1030 int size
, align
, r
, args_size
, i
, j
, reg_count
;
1031 int nb_reg_args
= 0;
1032 int nb_sse_args
= 0;
1033 int sse_reg
, gen_reg
;
1035 /* calculate the number of integer/float arguments */
1037 for(i
= 0; i
< nb_args
; i
++) {
1038 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, ®_count
);
1040 case x86_64_mode_memory
:
1041 case x86_64_mode_x87
:
1045 case x86_64_mode_sse
:
1046 nb_sse_args
+= reg_count
;
1047 if (nb_sse_args
> 8) args_size
+= size
;
1050 case x86_64_mode_integer
:
1051 nb_reg_args
+= reg_count
;
1052 if (nb_reg_args
> REGN
) args_size
+= size
;
1057 /* for struct arguments, we need to call memcpy and the function
1058 call breaks register passing arguments we are preparing.
1059 So, we process arguments which will be passed by stack first. */
1060 gen_reg
= nb_reg_args
;
1061 sse_reg
= nb_sse_args
;
1063 /* adjust stack to align SSE boundary */
1064 if (args_size
&= 15) {
1065 /* fetch cpu flag before the following sub will change the value */
1066 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1069 args_size
= 16 - args_size
;
1071 oad(0xec81, args_size
); /* sub $xxx, %rsp */
1074 for(i
= 0; i
< nb_args
;) {
1075 /* Swap argument to top, it will possibly be changed here,
1076 and might use more temps. At the end of the loop we keep
1077 in on the stack and swap it back to its original position
1078 if it is a register. */
1079 SValue tmp
= vtop
[0];
1083 mode
= classify_x86_64_arg(&vtop
->type
, NULL
, &size
, ®_count
);
1086 switch (vtop
->type
.t
& VT_BTYPE
) {
1088 if (mode
== x86_64_mode_sse
) {
1090 sse_reg
-= reg_count
;
1093 } else if (mode
== x86_64_mode_integer
) {
1095 gen_reg
-= reg_count
;
1101 /* allocate the necessary size on stack */
1103 oad(0xec81, size
); /* sub $xxx, %rsp */
1104 /* generate structure store */
1105 r
= get_reg(RC_INT
);
1106 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1107 o(0xe0 + REG_VALUE(r
));
1108 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1117 size
= LDOUBLE_SIZE
;
1118 oad(0xec8148, size
); /* sub $xxx, %rsp */
1119 o(0x7cdb); /* fstpt 0(%rsp) */
1127 assert(mode
== x86_64_mode_sse
);
1131 o(0x50); /* push $rax */
1132 /* movq %xmm0, (%rsp) */
1134 o(0x04 + REG_VALUE(r
)*8);
1143 assert(mode
== x86_64_mode_integer
);
1145 /* XXX: implicit cast ? */
1146 if (gen_reg
> REGN
) {
1149 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1157 /* And swap the argument back to it's original position. */
1164 assert(vtop
->type
.t
== tmp
.type
.t
);
1172 /* XXX This should be superfluous. */
1173 save_regs(0); /* save used temporary registers */
1175 /* then, we prepare register passing arguments.
1176 Note that we cannot set RDX and RCX in this loop because gv()
1177 may break these temporary registers. Let's use R10 and R11
1179 assert(gen_reg
<= REGN
);
1180 assert(sse_reg
<= 8);
1181 for(i
= 0; i
< nb_args
; i
++) {
1182 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, ®_count
);
1183 /* Alter stack entry type so that gv() knows how to treat it */
1185 if (mode
== x86_64_mode_sse
) {
1186 if (reg_count
== 2) {
1188 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1189 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1190 /* movaps %xmm0, %xmmN */
1192 o(0xc0 + (sse_reg
<< 3));
1193 /* movaps %xmm1, %xmmN */
1195 o(0xc1 + ((sse_reg
+1) << 3));
1198 assert(reg_count
== 1);
1200 /* Load directly to register */
1201 gv(RC_XMM0
<< sse_reg
);
1203 } else if (mode
== x86_64_mode_integer
) {
1205 /* XXX: implicit cast ? */
1206 gen_reg
-= reg_count
;
1208 int d
= arg_prepare_reg(gen_reg
);
1209 orex(1,d
,r
,0x89); /* mov */
1210 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1211 if (reg_count
== 2) {
1212 /* Second word of two-word value should always be in rdx
1213 this case is handled via RC_IRET */
1214 d
= arg_prepare_reg(gen_reg
+1);
1215 orex(1,d
,vtop
->r2
,0x89); /* mov */
1216 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1221 assert(gen_reg
== 0);
1222 assert(sse_reg
== 0);
1224 /* We shouldn't have many operands on the stack anymore, but the
1225 call address itself is still there, and it might be in %eax
1226 (or edx/ecx) currently, which the below writes would clobber.
1227 So evict all remaining operands here. */
1230 /* Copy R10 and R11 into RDX and RCX, respectively */
1231 if (nb_reg_args
> 2) {
1232 o(0xd2894c); /* mov %r10, %rdx */
1233 if (nb_reg_args
> 3) {
1234 o(0xd9894c); /* mov %r11, %rcx */
1238 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1246 #define FUNC_PROLOG_SIZE 11
1248 static void push_arg_reg(int i
) {
1250 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1253 /* generate function prolog of type 't' */
1254 void gfunc_prolog(CType
*func_type
)
1257 int i
, addr
, align
, size
, reg_count
;
1258 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
1262 sym
= func_type
->ref
;
1263 addr
= PTR_SIZE
* 2;
1265 ind
+= FUNC_PROLOG_SIZE
;
1266 func_sub_sp_offset
= ind
;
1269 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1270 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1271 seen_reg_num
= seen_sse_num
= 0;
1272 /* frame pointer and return address */
1273 seen_stack_size
= PTR_SIZE
* 2;
1274 /* count the number of seen parameters */
1275 sym
= func_type
->ref
;
1276 while ((sym
= sym
->next
) != NULL
) {
1278 mode
= classify_x86_64_arg(type
, NULL
, &size
, ®_count
);
1281 seen_stack_size
+= size
;
1284 case x86_64_mode_integer
:
1285 if (seen_reg_num
+ reg_count
<= 8) {
1286 seen_reg_num
+= reg_count
;
1289 seen_stack_size
+= size
;
1293 case x86_64_mode_sse
:
1294 if (seen_sse_num
+ reg_count
<= 8) {
1295 seen_sse_num
+= reg_count
;
1298 seen_stack_size
+= size
;
1305 /* movl $0x????????, -0x10(%rbp) */
1307 gen_le32(seen_reg_num
* 8);
1308 /* movl $0x????????, -0xc(%rbp) */
1310 gen_le32(seen_sse_num
* 16 + 48);
1311 /* movl $0x????????, -0x8(%rbp) */
1313 gen_le32(seen_stack_size
);
1315 /* save all register passing arguments */
1316 for (i
= 0; i
< 8; i
++) {
1318 o(0xd60f66); /* movq */
1319 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1320 /* movq $0, loc+8(%rbp) */
1325 for (i
= 0; i
< REGN
; i
++) {
1326 push_arg_reg(REGN
-1-i
);
1330 sym
= func_type
->ref
;
1332 reg_param_index
= 0;
1333 sse_param_index
= 0;
1335 /* if the function returns a structure, then add an
1336 implicit pointer parameter */
1337 func_vt
= sym
->type
;
1338 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, ®_count
);
1339 if (mode
== x86_64_mode_memory
) {
1340 push_arg_reg(reg_param_index
);
1347 /* define parameters */
1348 while ((sym
= sym
->next
) != NULL
) {
1350 mode
= classify_x86_64_arg(type
, NULL
, &size
, ®_count
);
1352 case x86_64_mode_sse
:
1353 if (sse_param_index
+ reg_count
<= 8) {
1354 /* save arguments passed by register */
1355 loc
-= reg_count
* 8;
1357 for (i
= 0; i
< reg_count
; ++i
) {
1358 o(0xd60f66); /* movq */
1359 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1365 sse_param_index
+= reg_count
;
1369 case x86_64_mode_memory
:
1370 case x86_64_mode_x87
:
1375 case x86_64_mode_integer
: {
1376 if (reg_param_index
+ reg_count
<= REGN
) {
1377 /* save arguments passed by register */
1378 loc
-= reg_count
* 8;
1380 for (i
= 0; i
< reg_count
; ++i
) {
1381 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1387 reg_param_index
+= reg_count
;
1392 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1393 VT_LOCAL
| VT_LVAL
, param_addr
);
1398 /* generate function epilog */
1399 void gfunc_epilog(void)
1403 o(0xc9); /* leave */
1404 if (func_ret_sub
== 0) {
1407 o(0xc2); /* ret n */
1409 g(func_ret_sub
>> 8);
1411 /* align local size to word & save local variables */
1412 v
= (-loc
+ 15) & -16;
1414 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1415 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1416 o(0xec8148); /* sub rsp, stacksize */
1423 /* generate a jump to a label */
1426 return psym(0xe9, t
);
1429 /* generate a jump to a fixed address */
1430 void gjmp_addr(int a
)
1438 oad(0xe9, a
- ind
- 5);
1442 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1443 int gtst(int inv
, int t
)
1447 v
= vtop
->r
& VT_VALMASK
;
1449 /* fast case : can jump directly since flags are set */
1450 if (vtop
->c
.i
& 0x100)
1452 /* This was a float compare. If the parity flag is set
1453 the result was unordered. For anything except != this
1454 means false and we don't jump (anding both conditions).
1455 For != this means true (oring both).
1456 Take care about inverting the test. We need to jump
1457 to our target if the result was unordered and test wasn't NE,
1458 otherwise if unordered we don't want to jump. */
1459 vtop
->c
.i
&= ~0x100;
1460 if (!inv
== (vtop
->c
.i
!= TOK_NE
))
1461 o(0x067a); /* jp +6 */
1465 t
= psym(0x8a, t
); /* jp t */
1469 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1470 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1471 /* && or || optimization */
1472 if ((v
& 1) == inv
) {
1473 /* insert vtop->c jump list in t */
1476 p
= (int *)(cur_text_section
->data
+ *p
);
1484 if (is_float(vtop
->type
.t
) ||
1485 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1489 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1490 /* constant jmp optimization */
1491 if ((vtop
->c
.i
!= 0) != inv
)
1496 o(0xc0 + REG_VALUE(v
) * 9);
1498 t
= psym(0x85 ^ inv
, t
);
1505 /* generate an integer binary operation */
1506 void gen_opi(int op
)
1511 ll
= is64_type(vtop
[-1].type
.t
);
1512 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1513 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1517 case TOK_ADDC1
: /* add with carry generation */
1520 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1527 /* XXX: generate inc and dec for smaller code ? */
1528 orex(ll
, r
, 0, 0x83);
1529 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1532 orex(ll
, r
, 0, 0x81);
1533 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1536 gv2(RC_INT
, RC_INT
);
1539 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1540 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1543 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1549 case TOK_SUBC1
: /* sub with carry generation */
1552 case TOK_ADDC2
: /* add with carry use */
1555 case TOK_SUBC2
: /* sub with carry use */
1568 gv2(RC_INT
, RC_INT
);
1571 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1572 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1584 opc
= 0xc0 | (opc
<< 3);
1590 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1591 o(opc
| REG_VALUE(r
));
1592 g(vtop
->c
.i
& (ll
? 63 : 31));
1594 /* we generate the shift in ecx */
1595 gv2(RC_INT
, RC_RCX
);
1597 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1598 o(opc
| REG_VALUE(r
));
1611 /* first operand must be in eax */
1612 /* XXX: need better constraint for second operand */
1613 gv2(RC_RAX
, RC_RCX
);
1618 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1619 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1620 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1621 if (op
== '%' || op
== TOK_UMOD
)
1633 void gen_opl(int op
)
1638 /* generate a floating point operation 'v = t1 op t2' instruction. The
1639 two operands are guaranted to have the same floating point type */
1640 /* XXX: need to use ST1 too */
1641 void gen_opf(int op
)
1643 int a
, ft
, fc
, swapped
, r
;
1645 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1647 /* convert constants to memory references */
1648 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1653 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1656 /* must put at least one value in the floating point register */
1657 if ((vtop
[-1].r
& VT_LVAL
) &&
1658 (vtop
[0].r
& VT_LVAL
)) {
1664 /* swap the stack if needed so that t1 is the register and t2 is
1665 the memory reference */
1666 if (vtop
[-1].r
& VT_LVAL
) {
1670 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1671 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1672 /* load on stack second operand */
1673 load(TREG_ST0
, vtop
);
1674 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1675 if (op
== TOK_GE
|| op
== TOK_GT
)
1677 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1680 o(0xc9d9); /* fxch %st(1) */
1681 o(0xe9da); /* fucompp */
1682 o(0xe0df); /* fnstsw %ax */
1684 o(0x45e480); /* and $0x45, %ah */
1685 o(0x40fC80); /* cmp $0x40, %ah */
1686 } else if (op
== TOK_NE
) {
1687 o(0x45e480); /* and $0x45, %ah */
1688 o(0x40f480); /* xor $0x40, %ah */
1690 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1691 o(0x05c4f6); /* test $0x05, %ah */
1694 o(0x45c4f6); /* test $0x45, %ah */
1701 /* no memory reference possible for long double operations */
1702 load(TREG_ST0
, vtop
);
1726 o(0xde); /* fxxxp %st, %st(1) */
1731 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1732 /* if saved lvalue, then we must reload it */
1735 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1737 r
= get_reg(RC_INT
);
1739 v1
.r
= VT_LOCAL
| VT_LVAL
;
1745 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1748 if (op
== TOK_LE
|| op
== TOK_LT
)
1750 if (op
== TOK_LE
|| op
== TOK_GE
) {
1751 op
= 0x93; /* setae */
1753 op
= 0x97; /* seta */
1761 assert(!(vtop
[-1].r
& VT_LVAL
));
1763 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
1765 o(0x2e0f); /* ucomisd */
1767 if (vtop
->r
& VT_LVAL
) {
1768 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1770 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1775 vtop
->c
.i
= op
| 0x100;
1777 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
1795 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
1798 /* if saved lvalue, then we must reload it */
1799 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
1801 r
= get_reg(RC_INT
);
1803 v1
.r
= VT_LOCAL
| VT_LVAL
;
1809 assert(!(vtop
[-1].r
& VT_LVAL
));
1811 assert(vtop
->r
& VT_LVAL
);
1816 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1824 if (vtop
->r
& VT_LVAL
) {
1825 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1827 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1835 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1836 and 'long long' cases. */
1837 void gen_cvt_itof(int t
)
1839 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1842 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1843 /* signed long long to float/double/long double (unsigned case
1844 is handled generically) */
1845 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1846 o(0x242cdf); /* fildll (%rsp) */
1847 o(0x08c48348); /* add $8, %rsp */
1848 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1849 (VT_INT
| VT_UNSIGNED
)) {
1850 /* unsigned int to float/double/long double */
1851 o(0x6a); /* push $0 */
1853 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1854 o(0x242cdf); /* fildll (%rsp) */
1855 o(0x10c48348); /* add $16, %rsp */
1857 /* int to float/double/long double */
1858 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1859 o(0x2404db); /* fildl (%rsp) */
1860 o(0x08c48348); /* add $8, %rsp */
1864 int r
= get_reg(RC_FLOAT
);
1866 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
1867 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1868 (VT_INT
| VT_UNSIGNED
) ||
1869 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1873 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
1878 /* convert from one floating point type to another */
1879 void gen_cvt_ftof(int t
)
1887 if (bt
== VT_FLOAT
) {
1889 if (tbt
== VT_DOUBLE
) {
1890 o(0x140f); /* unpcklps */
1891 o(0xc0 + REG_VALUE(vtop
->r
)*9);
1892 o(0x5a0f); /* cvtps2pd */
1893 o(0xc0 + REG_VALUE(vtop
->r
)*9);
1894 } else if (tbt
== VT_LDOUBLE
) {
1896 /* movss %xmm0,-0x10(%rsp) */
1898 o(0x44 + REG_VALUE(vtop
->r
)*8);
1900 o(0xf02444d9); /* flds -0x10(%rsp) */
1903 } else if (bt
== VT_DOUBLE
) {
1905 if (tbt
== VT_FLOAT
) {
1906 o(0x140f66); /* unpcklpd */
1907 o(0xc0 + REG_VALUE(vtop
->r
)*9);
1908 o(0x5a0f66); /* cvtpd2ps */
1909 o(0xc0 + REG_VALUE(vtop
->r
)*9);
1910 } else if (tbt
== VT_LDOUBLE
) {
1912 /* movsd %xmm0,-0x10(%rsp) */
1914 o(0x44 + REG_VALUE(vtop
->r
)*8);
1916 o(0xf02444dd); /* fldl -0x10(%rsp) */
1921 int r
= get_reg(RC_FLOAT
);
1922 if (tbt
== VT_DOUBLE
) {
1923 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1924 /* movsd -0x10(%rsp),%xmm0 */
1926 o(0x44 + REG_VALUE(r
)*8);
1929 } else if (tbt
== VT_FLOAT
) {
1930 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1931 /* movss -0x10(%rsp),%xmm0 */
1933 o(0x44 + REG_VALUE(r
)*8);
1940 /* convert fp to int 't' type */
1941 void gen_cvt_ftoi(int t
)
1943 int ft
, bt
, size
, r
;
1946 if (bt
== VT_LDOUBLE
) {
1947 gen_cvt_ftof(VT_DOUBLE
);
1957 r
= get_reg(RC_INT
);
1958 if (bt
== VT_FLOAT
) {
1960 } else if (bt
== VT_DOUBLE
) {
1965 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1966 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
1970 /* computed goto support */
1977 /* end of x86-64 code generator */
1978 /*************************************************************/
1979 #endif /* ! TARGET_DEFS_ONLY */
1980 /******************************************************/