2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_ST0 0x0020 /* only for long double */
40 #define RC_XMM0 0x0100
41 #define RC_XMM1 0x0200
42 #define RC_XMM2 0x0400
43 #define RC_XMM3 0x0800
44 #define RC_XMM4 0x1000
45 #define RC_XMM5 0x2000
46 #define RC_XMM6 0x4000
47 #define RC_XMM7 0x8000
48 #define RC_RSI 0x10000
49 #define RC_RDI 0x20000
50 #define RC_INT1 0x40000 /* function_pointer */
51 #define RC_INT2 0x80000
52 #define RC_RBX 0x100000
53 #define RC_R10 0x200000
54 #define RC_R11 0x400000
55 #define RC_R12 0x800000
56 #define RC_R13 0x1000000
57 #define RC_R14 0x2000000
58 #define RC_R15 0x4000000
59 #define RC_IRET RC_RAX /* function return: integer register */
60 #define RC_LRET RC_RDX /* function return: second integer register */
61 #define RC_FRET RC_XMM0 /* function return: float register */
62 #define RC_QRET RC_XMM1 /* function return: second float register */
63 #define RC_MASK (RC_INT|RC_INT1|RC_INT2|RC_FLOAT)
65 /* pretty names for the registers */
91 #define REX_BASE(reg) (((reg) >> 3) & 1)
92 #define REG_VALUE(reg) ((reg) & 7)
95 /* return registers for function */
96 #define REG_IRET TREG_RAX /* single word int return register */
97 #define REG_LRET TREG_RDX /* second word return register (for long long) */
98 #define REG_FRET TREG_XMM0 /* float return register */
99 #define REG_QRET TREG_XMM1 /* second float return register */
101 /* defined if function parameters must be evaluated in reverse order */
102 #define INVERT_FUNC_PARAMS
104 /* pointer size, in bytes */
107 /* long double size and alignment, in bytes */
108 #define LDOUBLE_SIZE 16
109 #define LDOUBLE_ALIGN 16
110 /* maximum alignment (for aligned attribute support) */
113 /******************************************************/
116 #define EM_TCC_TARGET EM_X86_64
118 /* relocation type for 32 bit data relocation */
119 #define R_DATA_32 R_X86_64_32
120 #define R_DATA_PTR R_X86_64_64
121 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
122 #define R_COPY R_X86_64_COPY
124 #define ELF_START_ADDR 0x400000
125 #define ELF_PAGE_SIZE 0x200000
127 /******************************************************/
128 #else /* ! TARGET_DEFS_ONLY */
129 /******************************************************/
133 ST_DATA
const int reg_classes
[NB_REGS
] = {
134 /* eax */ RC_INT
|RC_RAX
|RC_INT2
,
135 /* ecx */ RC_INT
|RC_RCX
|RC_INT2
,
136 /* edx */ RC_INT
|RC_RDX
,
137 RC_INT
|RC_INT1
|RC_INT2
|RC_RBX
,
142 RC_INT
|RC_R8
|RC_INT2
,
143 RC_INT
|RC_R9
|RC_INT2
,
144 RC_INT
|RC_INT1
|RC_INT2
|RC_R10
,
145 RC_INT
|RC_INT1
|RC_INT2
|RC_R11
,
146 RC_INT
|RC_INT1
|RC_INT2
|RC_R12
,
147 RC_INT
|RC_INT1
|RC_INT2
|RC_R13
,
148 RC_INT
|RC_INT1
|RC_INT2
|RC_R14
,
149 RC_INT
|RC_INT1
|RC_INT2
|RC_R15
,
150 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
160 static unsigned long func_sub_sp_offset
;
161 static int func_ret_sub
;
163 /* XXX: make it faster ? */
168 if (ind1
> cur_text_section
->data_allocated
)
169 section_realloc(cur_text_section
, ind1
);
170 cur_text_section
->data
[ind
] = c
;
174 void o(unsigned int c
)
196 void gen_le64(int64_t c
)
208 void orex(int ll
, int r
, int r2
, int b
)
210 if ((r
& VT_VALMASK
) >= VT_CONST
)
212 if ((r2
& VT_VALMASK
) >= VT_CONST
)
214 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
215 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
219 /* output a symbol and patch all calls to it */
220 void gsym_addr(int t
, int a
)
224 ptr
= (int *)(cur_text_section
->data
+ t
);
225 n
= *ptr
; /* next value */
236 /* psym is used to put an instruction with a data field which is a
237 reference to a symbol. It is in fact the same as oad ! */
240 static int is64_type(int t
)
242 return ((t
& VT_BTYPE
) == VT_PTR
||
243 (t
& VT_BTYPE
) == VT_FUNC
||
244 (t
& VT_BTYPE
) == VT_LLONG
);
247 /* instruction + 4 bytes data. Return the address of the data */
248 ST_FUNC
int oad(int c
, int s
)
254 if (ind1
> cur_text_section
->data_allocated
)
255 section_realloc(cur_text_section
, ind1
);
256 *(int *)(cur_text_section
->data
+ ind
) = s
;
262 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
265 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
269 /* output constant with relocation if 'r & VT_SYM' is true */
270 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
273 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
277 /* output constant with relocation if 'r & VT_SYM' is true */
278 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
281 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
285 /* output got address with relocation */
286 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
288 #ifndef TCC_TARGET_PE
291 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
292 sr
= cur_text_section
->reloc
;
293 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
296 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
297 cur_text_section
->data
[ind
-3],
298 cur_text_section
->data
[ind
-2],
299 cur_text_section
->data
[ind
-1]
301 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
305 /* we use add c, %xxx for displacement */
307 o(0xc0 + REG_VALUE(r
));
312 static void gen_modrm_impl(int op_reg
, int fr
, Sym
*sym
, int c
, int flag
)
314 int r
= fr
& VT_VALMASK
;
315 op_reg
= REG_VALUE(op_reg
) << 3;
317 /* constant memory reference */
319 if (flag
& FLAG_GOT
) {
320 gen_gotpcrel(fr
, sym
, c
);
322 gen_addrpc32(fr
, sym
, c
);
324 } else if (r
== VT_LOCAL
) {
325 /* currently, we use only ebp as base */
327 /* short reference */
331 oad(0x85 | op_reg
, c
);
335 /* short reference */
336 g(0x40 | op_reg
| REG_VALUE(fr
));
341 g(0x80 | op_reg
| REG_VALUE(fr
));
347 g(0x00 | op_reg
| REG_VALUE(fr
));
353 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
355 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
357 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
360 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
362 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
365 if((op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
))
367 orex(1, r
, op_reg
, opcode
);
368 gen_modrm_impl(op_reg
, r
, sym
, c
, flag
);
372 /* load 'r' from value 'sv' */
373 void load(int r
, SValue
*sv
)
375 int v
, t
, ft
, fc
, fr
, ll
;
380 sv
= pe_getimport(sv
, &v2
);
384 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
388 #ifndef TCC_TARGET_PE
389 /* we use indirect access via got */
390 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
391 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
392 /* use the result register as a temporal register */
395 /* we cannot use float registers as a temporal register */
396 tr
= get_reg(RC_INT
) | TREG_MEM
;
400 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
401 /* load from the temporal register */
410 if((ft
& VT_BTYPE
) == VT_FUNC
)
413 size
= type_size(&sv
->type
, &align
);
416 if (v
== VT_LLOCAL
) {
418 v1
.r
= VT_LOCAL
| VT_LVAL
;
421 if (!(reg_classes
[fr
] & RC_INT
))
422 fr
= get_reg(RC_INT
);
427 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
428 b
= 0x100ff3; /* movss */
429 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
430 b
= 0x100ff2; /* movds */
431 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
432 b
= 0xdb, r
= 5; /* fldt */
433 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
434 b
= 0xbe0f; /* movsbl */
435 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
436 b
= 0xb60f; /* movzbl */
437 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
438 b
= 0xbf0f; /* movswl */
439 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
440 b
= 0xb70f; /* movzwl */
442 assert(((ft
& VT_BTYPE
) == VT_INT
) || ((ft
& VT_BTYPE
) == VT_LLONG
)
443 || ((ft
& VT_BTYPE
) == VT_PTR
) || ((ft
& VT_BTYPE
) == VT_ENUM
)
444 || ((ft
& VT_BTYPE
) == VT_FUNC
));
448 gen_modrm(r
, fr
, sv
->sym
, fc
);
454 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
455 gen_addrpc32(fr
, sv
->sym
, fc
);
457 if (sv
->sym
->type
.t
& VT_STATIC
) {
459 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
460 gen_addrpc32(fr
, sv
->sym
, fc
);
463 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
464 gen_gotpcrel(r
, sv
->sym
, fc
);
468 orex(ll
,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
474 } else if (v
== VT_LOCAL
) {
475 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
476 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
477 } else if (v
== VT_CMP
) {
478 orex(0, r
, 0, 0xb8 + REG_VALUE(r
));
479 if ((fc
& ~0x100) == TOK_NE
){
480 gen_le32(1);/* mov $0, r */
482 gen_le32(0);/* mov $1, r */
486 /* This was a float compare. If the parity bit is
487 set the result was unordered, meaning false for everything
488 except TOK_NE, and true for TOK_NE. */
489 o(0x037a + (REX_BASE(r
) << 8));/* jp 3*/
491 orex(0,r
,0, 0x0f); /* setxx %br */
493 o(0xc0 + REG_VALUE(r
));
494 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
497 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
498 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
501 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
503 if (reg_classes
[r
] & RC_FLOAT
) {
505 /* gen_cvt_ftof(VT_DOUBLE); */
506 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
507 /* movsd -0x10(%rsp),%xmm0 */
509 o(0xf02444 + REG_VALUE(r
)*8);
510 }else if(reg_classes
[v
] & RC_FLOAT
){
512 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
515 } else if (r
== TREG_ST0
) {
516 assert(reg_classes
[v
] & RC_FLOAT
);
517 /* gen_cvt_ftof(VT_LDOUBLE); */
518 /* movsd %xmm0,-0x10(%rsp) */
520 o(0xf02444 + REG_VALUE(v
)*8);
521 o(0xf02444dd); /* fldl -0x10(%rsp) */
524 orex(1,fr
,r
,0x8d); /* lea xxx(%ebp), r */
525 gen_modrm(r
, fr
, sv
->sym
, fc
);
528 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
) * 8); /* mov v, r */
535 /* store register 'r' in lvalue 'v' */
536 void store(int r
, SValue
*sv
)
538 int fr
, bt
, ft
, fc
, ll
, v
;
542 sv
= pe_getimport(sv
, &v2
);
544 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
551 //#ifndef TCC_TARGET_PE
552 /* we need to access the variable via got */
553 // if (fr == VT_CONST && (v->r & VT_SYM)) {
554 /* mov xx(%rip), %r11 */
556 // gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
557 //pic = is64_type(bt) ? 0x49 : 0x41;
561 /* XXX: incorrect if float reg to reg */
562 if (bt
== VT_FLOAT
) {
563 orex(0, fr
, r
, 0x110ff3); /* movss */
564 } else if (bt
== VT_DOUBLE
) {
565 orex(0, fr
, r
, 0x110ff2);/* movds */
566 } else if (bt
== VT_LDOUBLE
) {
567 o(0xc0d9); /* fld %st(0) */
568 orex(0, fr
, r
, 0xdb);/* fstpt */
573 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
574 orex(ll
, fr
, r
, 0x88);
576 orex(ll
, fr
, r
, 0x89);
579 if (v
== VT_CONST
|| v
== VT_LOCAL
|| (fr
& VT_LVAL
)) {
580 gen_modrm(r
, fr
, sv
->sym
, fc
);
582 /* XXX: don't we really come here? */
584 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8); /* mov r, fr */
588 /* 'is_jmp' is '1' if it is a jump */
589 static void gcall_or_jmp(int is_jmp
)
592 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
594 if (vtop
->r
& VT_SYM
) {
595 /* relocation case */
596 greloc(cur_text_section
, vtop
->sym
,
597 ind
+ 1, R_X86_64_PLT32
);
599 /* put an empty PC32 relocation */
600 put_elf_reloc(symtab_section
, cur_text_section
,
601 ind
+ 1, R_X86_64_PC32
, 0);
603 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
605 /* otherwise, indirect call */
606 r
= get_reg(RC_INT1
);
608 orex(0, r
, 0, 0xff); /* REX call/jmp *r */
609 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
613 static int func_scratch
;
616 int reloc_add(int inds
)
618 return psym(0, inds
);
621 void reloc_use(int t
, int data
)
625 ptr
= (int *)(cur_text_section
->data
+ t
);
626 t
= *ptr
; /* next value */
631 void struct_copy(SValue
*d
, SValue
*s
, SValue
*c
)
639 o(0xa4f3);// rep movsb
642 void gen_putz(SValue
*d
, int size
)
650 o(0xb8 + REG_VALUE(TREG_RCX
)); /* mov $xx, r */
656 /* Generate function call. The function address is pushed first, then
657 all the parameters in call order. This functions pops all the
658 parameters and the function address. */
659 void gen_offs_sp(int b
, int r
, int off
)
666 o(0x2404 | (REG_VALUE(r
) << 3));
667 }else if (off
== (char)off
) {
668 o(0x2444 | (REG_VALUE(r
) << 3));
671 o(0x2484 | (REG_VALUE(r
) << 3));
679 static const uint8_t arg_regs
[REGN
] = {
680 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
683 /* Prepare arguments in R10 and R11 rather than RCX and RDX
684 because gv() will not ever use these */
685 static int arg_prepare_reg(int idx
) {
686 if (idx
== 0 || idx
== 1)
687 /* idx=0: r10, idx=1: r11 */
690 return arg_regs
[idx
];
693 /* Return the number of registers needed to return the struct, or 0 if
694 returning via struct pointer. */
695 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
)
698 *ret_align
= 1; // Never have to re-align return values for x86-64
699 size
= type_size(vt
, &align
);
703 } else if (size
> 4) {
706 } else if (size
> 2) {
709 } else if (size
> 1) {
718 static int is_sse_float(int t
) {
721 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
724 int gfunc_arg_size(CType
*type
) {
726 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
728 return type_size(type
, &align
);
731 void gfunc_call(int nb_args
)
733 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
736 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
739 /* for struct arguments, we need to call memcpy and the function
740 call breaks register passing arguments we are preparing.
741 So, we process arguments which will be passed by stack first. */
742 struct_size
= args_size
;
743 for(i
= 0; i
< nb_args
; i
++) {
748 bt
= (sv
->type
.t
& VT_BTYPE
);
749 size
= gfunc_arg_size(&sv
->type
);
752 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
754 if (bt
== VT_STRUCT
) {
755 /* align to stack align size */
756 size
= (size
+ 15) & ~15;
757 /* generate structure store */
759 gen_offs_sp(0x8d, r
, struct_size
);
762 /* generate memcpy call */
763 vset(&sv
->type
, r
| VT_LVAL
, 0);
767 } else if (bt
== VT_LDOUBLE
) {
769 gen_offs_sp(0xdb, 0x107, struct_size
);
774 if (func_scratch
< struct_size
)
775 func_scratch
= struct_size
;
778 struct_size
= args_size
;
780 for(i
= 0; i
< nb_args
; i
++) {
782 bt
= (vtop
->type
.t
& VT_BTYPE
);
784 size
= gfunc_arg_size(&vtop
->type
);
786 /* align to stack align size */
787 size
= (size
+ 15) & ~15;
790 gen_offs_sp(0x8d, d
, struct_size
);
791 gen_offs_sp(0x89, d
, arg
*8);
793 d
= arg_prepare_reg(arg
);
794 gen_offs_sp(0x8d, d
, struct_size
);
798 if (is_sse_float(vtop
->type
.t
)) {
799 gv(RC_XMM0
); /* only use one float register */
801 /* movq %xmm0, j*8(%rsp) */
802 gen_offs_sp(0xd60f66, 0x100, arg
*8);
804 /* movaps %xmm0, %xmmN */
806 o(0xc0 + (arg
<< 3));
807 d
= arg_prepare_reg(arg
);
808 /* mov %xmm0, %rxx */
811 o(0xc0 + REG_VALUE(d
));
814 if (bt
== VT_STRUCT
) {
815 vtop
->type
.ref
= NULL
;
816 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
817 : size
> 1 ? VT_SHORT
: VT_BYTE
;
822 gen_offs_sp(0x89, r
, arg
*8);
824 d
= arg_prepare_reg(arg
);
825 orex(1,d
,r
,0x89); /* mov */
826 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
834 /* Copy R10 and R11 into RCX and RDX, respectively */
836 o(0xd1894c); /* mov %r10, %rcx */
838 o(0xda894c); /* mov %r11, %rdx */
847 #define FUNC_PROLOG_SIZE 11
849 /* generate function prolog of type 't' */
850 void gfunc_prolog(CType
*func_type
)
852 int addr
, reg_param_index
, bt
, size
;
856 func_ret_sub
= func_scratch
= r_loc
= 0;
860 ind
+= FUNC_PROLOG_SIZE
;
861 func_sub_sp_offset
= ind
;
864 sym
= func_type
->ref
;
866 /* if the function returns a structure, then add an
867 implicit pointer parameter */
869 func_var
= (sym
->c
== FUNC_ELLIPSIS
);
870 size
= gfunc_arg_size(&func_vt
);
872 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
878 /* define parameters */
879 while ((sym
= sym
->next
) != NULL
) {
881 bt
= type
->t
& VT_BTYPE
;
882 size
= gfunc_arg_size(type
);
884 if (reg_param_index
< REGN
) {
885 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
887 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
889 if (reg_param_index
< REGN
) {
890 /* save arguments passed by register */
891 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
892 o(0xd60f66); /* movq */
893 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
895 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
898 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
904 while (reg_param_index
< REGN
) {
905 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
906 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
913 /* generate function epilog */
914 void gfunc_epilog(void)
919 if (func_ret_sub
== 0) {
924 g(func_ret_sub
>> 8);
928 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
929 /* align local size to word & save local variables */
930 v
= (func_scratch
+ -loc
+ 15) & -16;
931 reloc_use(r_loc
, func_scratch
);
933 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
934 oad(0xb8, v
); /* mov stacksize, %eax */
935 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
936 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
937 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
939 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
940 o(0xec8148); /* sub rsp, stacksize */
944 cur_text_section
->data_offset
= saved_ind
;
945 pe_add_unwind_data(ind
, saved_ind
, v
);
946 ind
= cur_text_section
->data_offset
;
951 typedef enum X86_64_Mode
{
959 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
963 else if (a
== x86_64_mode_none
)
965 else if (b
== x86_64_mode_none
)
967 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
968 return x86_64_mode_memory
;
969 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
970 return x86_64_mode_integer
;
971 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
972 return x86_64_mode_memory
;
974 return x86_64_mode_sse
;
977 static X86_64_Mode
classify_x86_64_inner(CType
*ty
)
982 switch (ty
->t
& VT_BTYPE
) {
983 case VT_VOID
: return x86_64_mode_none
;
993 case VT_ENUM
: return x86_64_mode_integer
;
997 case VT_DOUBLE
: return x86_64_mode_sse
;
999 case VT_LDOUBLE
: return x86_64_mode_x87
;
1005 if (f
->next
&& (f
->c
== f
->next
->c
))
1006 return x86_64_mode_memory
;
1008 mode
= x86_64_mode_none
;
1009 for (f
= f
->next
; f
; f
= f
->next
)
1010 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
1018 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
)
1021 int size
, align
, ret_t
= 0;
1023 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1028 mode
= x86_64_mode_integer
;
1030 size
= type_size(ty
, &align
);
1031 *psize
= (size
+ 7) & ~7;
1032 *palign
= (align
+ 7) & ~7;
1035 mode
= x86_64_mode_memory
;
1038 mode
= classify_x86_64_inner(ty
);
1040 case x86_64_mode_integer
:
1055 ret_t
|= (ty
->t
& VT_UNSIGNED
);
1057 case x86_64_mode_x87
:
1061 case x86_64_mode_sse
:
1067 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1072 break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1085 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1087 /* This definition must be synced with stdarg.h */
1088 enum __va_arg_type
{
1089 __va_gen_reg
, __va_float_reg
, __va_ld_reg
, __va_stack
1091 int size
, align
, reg_count
;
1092 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1094 default: return __va_stack
;
1095 case x86_64_mode_x87
: return __va_ld_reg
;
1096 case x86_64_mode_integer
: return __va_gen_reg
;
1097 case x86_64_mode_sse
: return __va_float_reg
;
1101 /* Return the number of registers needed to return the struct, or 0 if
1102 returning via struct pointer. */
1103 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
)
1105 int size
, align
, reg_count
;
1106 *ret_align
= 1; // Never have to re-align return values for x86-64
1107 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) != x86_64_mode_memory
);
1111 static const uint8_t arg_regs
[REGN
] = {
1112 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1115 /* Generate function call. The function address is pushed first, then
1116 all the parameters in call order. This functions pops all the
1117 parameters and the function address. */
1118 void gfunc_call(int nb_args
)
1121 int size
, align
, args_size
, s
, e
, i
, reg_count
;
1122 int nb_reg_args
= 0;
1123 int nb_sse_args
= 0;
1124 int gen_reg
, sse_reg
;
1127 /* fetch cpu flag before the following sub will change the value */
1128 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1130 /* calculate the number of integer/float register arguments */
1131 for(i
= 0; i
< nb_args
; i
++) {
1132 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1133 if (mode
== x86_64_mode_sse
)
1134 nb_sse_args
+= reg_count
;
1135 else if (mode
== x86_64_mode_integer
)
1136 nb_reg_args
+= reg_count
;
1140 gen_reg
= nb_reg_args
;
1141 sse_reg
= nb_sse_args
;
1142 /* for struct arguments, we need to call memcpy and the function
1143 call breaks register passing arguments we are preparing.
1144 So, we process arguments which will be passed by stack first. */
1145 for(i
= 0; i
< nb_args
; i
++) {
1146 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1148 case x86_64_mode_x87
:
1149 if((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
)
1152 args_size
= (args_size
+ 15) & ~15;
1153 case x86_64_mode_memory
:
1157 case x86_64_mode_sse
:
1158 sse_reg
-= reg_count
;
1159 if (sse_reg
+ reg_count
> 8)
1162 case x86_64_mode_integer
:
1163 gen_reg
-= reg_count
;
1164 if (gen_reg
+ reg_count
> REGN
)
1167 default: break; /* nothing to be done for x86_64_mode_none */
1171 args_size
= (args_size
+ 15) & ~15;
1172 if (func_scratch
< args_size
)
1173 func_scratch
= args_size
;
1175 gen_reg
= nb_reg_args
;
1176 sse_reg
= nb_sse_args
;
1177 for(s
= e
= 0; s
< nb_args
; s
= e
){
1178 int run_gen
, run_sse
, st_size
;
1182 for(i
= s
; i
< nb_args
; i
++) {
1183 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1185 case x86_64_mode_x87
:
1186 if((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
){
1192 case x86_64_mode_memory
:
1196 case x86_64_mode_sse
:
1197 sse_reg
-= reg_count
;
1198 if (sse_reg
+ reg_count
> 8)
1201 case x86_64_mode_integer
:
1202 gen_reg
-= reg_count
;
1203 if (gen_reg
+ reg_count
> REGN
)
1206 default: break; /* nothing to be done for x86_64_mode_none */
1211 st_size
= -st_size
& 15;// 16 - (size & 15)
1213 args_size
-= st_size
;
1217 for(i
= s
; i
< e
; i
++) {
1219 /* Swap argument to top, it will possibly be changed here,
1220 and might use more temps. All arguments must remain on the
1221 stack, so that get_reg can correctly evict some of them onto
1222 stack. We could use also use a vrott(nb_args) at the end
1223 of this loop, but this seems faster. */
1230 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1232 case x86_64_mode_x87
:
1233 /* Must ensure TREG_ST0 only */
1234 if((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
){
1236 vtop
[-1].r
= VT_CONST
;
1240 gen_offs_sp(0xdb, 0x107, args_size
);
1241 vtop
--;//Release TREG_ST0
1245 gen_offs_sp(0xdb, 0x107, args_size
);
1246 vtop
->r
= VT_CONST
;//Release TREG_ST0
1249 case x86_64_mode_memory
:
1251 vset(&char_pointer_type
, TREG_RSP
, args_size
);/* generate memcpy RSP */
1253 vtop
->type
= char_pointer_type
;
1256 struct_copy(&vtop
[-2], &vtop
[-1], &vtop
[0]);
1259 case x86_64_mode_sse
:
1260 sse_reg
-= reg_count
;
1261 if (sse_reg
+ reg_count
> 8){
1266 case x86_64_mode_integer
:
1267 gen_reg
-= reg_count
;
1268 if (gen_reg
+ reg_count
> REGN
){
1271 vset(&type
, TREG_RSP
| VT_LVAL
, args_size
);
1278 default: break; /* nothing to be done for x86_64_mode_none */
1290 gen_reg
= nb_reg_args
;
1291 sse_reg
= nb_sse_args
;
1292 for(i
= 0; i
< nb_args
; i
++) {
1294 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1295 /* Alter stack entry type so that gv() knows how to treat it */
1297 /* Alter stack entry type so that gv() knows how to treat it */
1298 if (mode
== x86_64_mode_sse
) {
1299 sse_reg
-= reg_count
;
1300 if (sse_reg
+ reg_count
<= 8) {
1301 if (reg_count
== 2) {
1302 ex_rc
= RC_XMM0
<< (sse_reg
+ 1);
1303 gv(RC_XMM0
<< sse_reg
);
1305 assert(reg_count
== 1);
1306 /* Load directly to register */
1307 gv(RC_XMM0
<< sse_reg
);
1310 } else if (mode
== x86_64_mode_integer
) {
1311 gen_reg
-= reg_count
;
1312 if (gen_reg
+ reg_count
<= REGN
) {
1313 if (reg_count
== 2) {
1314 d
= arg_regs
[gen_reg
+1];
1315 ex_rc
= reg_classes
[d
] & ~RC_MASK
;
1316 d
= arg_regs
[gen_reg
];
1317 gv(reg_classes
[d
] & ~RC_MASK
);
1319 assert(reg_count
== 1);
1320 d
= arg_regs
[gen_reg
];
1321 gv(reg_classes
[d
] & ~RC_MASK
);
1328 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1334 #define FUNC_PROLOG_SIZE 11
1336 static void push_arg_reg(int i
) {
1338 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1341 /* generate function prolog of type 't' */
1342 void gfunc_prolog(CType
*func_type
)
1345 int i
, addr
, align
, size
, reg_count
;
1346 int param_addr
= 0, reg_param_index
, sse_param_index
;
1350 sym
= func_type
->ref
;
1351 addr
= PTR_SIZE
* 2;
1352 pop_stack
= loc
= 0;
1353 func_scratch
= r_loc
= 0;
1354 ind
+= FUNC_PROLOG_SIZE
;
1355 func_sub_sp_offset
= ind
;
1358 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1359 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1360 seen_reg_num
= seen_sse_num
= 0;
1361 /* frame pointer and return address */
1362 seen_stack_size
= PTR_SIZE
* 2;
1363 /* count the number of seen parameters */
1364 while ((sym
= sym
->next
) != NULL
) {
1366 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1370 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1373 case x86_64_mode_integer
:
1374 if (seen_reg_num
+ reg_count
<= REGN
) {
1375 seen_reg_num
+= reg_count
;
1382 case x86_64_mode_sse
:
1383 if (seen_sse_num
+ reg_count
<= 8) {
1384 seen_sse_num
+= reg_count
;
1394 /* movl $0x????????, -0x10(%rbp) */
1396 gen_le32(seen_reg_num
* 8);
1397 /* movl $0x????????, -0xc(%rbp) */
1399 gen_le32(seen_sse_num
* 16 + 48);
1400 /* movl $0x????????, -0x8(%rbp) */
1402 gen_le32(seen_stack_size
);
1404 o(0xc084);/* test %al,%al */
1406 g(4*(8 - seen_sse_num
) + 3);
1408 /* save all register passing arguments */
1409 for (i
= 0; i
< 8; i
++) {
1411 o(0x290f);/* movaps %xmm1-7,-XXX(%rbp) */
1412 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1414 for (i
= 0; i
< (REGN
- seen_reg_num
); i
++) {
1415 push_arg_reg(REGN
-1 - i
);
1419 sym
= func_type
->ref
;
1420 reg_param_index
= 0;
1421 sse_param_index
= 0;
1423 /* if the function returns a structure, then add an
1424 implicit pointer parameter */
1425 func_vt
= sym
->type
;
1426 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1427 if (mode
== x86_64_mode_memory
) {
1428 push_arg_reg(reg_param_index
);
1432 /* define parameters */
1433 while ((sym
= sym
->next
) != NULL
) {
1435 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1437 case x86_64_mode_sse
:
1438 if (sse_param_index
+ reg_count
<= 8) {
1439 /* save arguments passed by register */
1440 loc
-= reg_count
* 8;
1442 for (i
= 0; i
< reg_count
; ++i
) {
1443 o(0xd60f66); /* movq */
1444 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1448 addr
= (addr
+ align
- 1) & -align
;
1451 sse_param_index
+= reg_count
;
1455 case x86_64_mode_memory
:
1456 case x86_64_mode_x87
:
1457 addr
= (addr
+ align
- 1) & -align
;
1462 case x86_64_mode_integer
: {
1463 if (reg_param_index
+ reg_count
<= REGN
) {
1464 /* save arguments passed by register */
1465 loc
-= reg_count
* 8;
1467 for (i
= 0; i
< reg_count
; ++i
) {
1468 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1472 addr
= (addr
+ align
- 1) & -align
;
1475 reg_param_index
+= reg_count
;
1479 default: break; /* nothing to be done for x86_64_mode_none */
1481 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1482 VT_LOCAL
| VT_LVAL
, param_addr
);
1486 /* generate function epilog */
1487 void gfunc_epilog(void)
1491 o(0xc9); /* leave */
1492 if (func_ret_sub
== 0) {
1495 o(0xc2); /* ret n */
1497 g(func_ret_sub
>> 8);
1499 /* align local size to word & save local variables */
1500 v
= (func_scratch
-loc
+ 15) & -16;
1501 reloc_use(r_loc
, func_scratch
);
1503 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1504 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1505 o(0xec8148); /* sub rsp, stacksize */
1512 /* generate a jump to a label */
1515 return psym(0xe9, t
);
1518 /* generate a jump to a fixed address */
1519 void gjmp_addr(int a
)
1527 oad(0xe9, a
- ind
- 5);
1531 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1532 int gtst(int inv
, int t
)
1536 v
= vtop
->r
& VT_VALMASK
;
1538 /* fast case : can jump directly since flags are set */
1539 if (vtop
->c
.i
& 0x100)
1541 /* This was a float compare. If the parity flag is set
1542 the result was unordered. For anything except != this
1543 means false and we don't jump (anding both conditions).
1544 For != this means true (oring both).
1545 Take care about inverting the test. We need to jump
1546 to our target if the result was unordered and test wasn't NE,
1547 otherwise if unordered we don't want to jump. */
1548 vtop
->c
.i
&= ~0x100;
1549 if (!inv
== (vtop
->c
.i
!= TOK_NE
))
1550 o(0x067a); /* jp +6 */
1554 t
= psym(0x8a, t
); /* jp t */
1558 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1559 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1560 /* && or || optimization */
1561 if ((v
& 1) == inv
) {
1562 /* insert vtop->c jump list in t */
1565 p
= (int *)(cur_text_section
->data
+ *p
);
1573 if (is_float(vtop
->type
.t
) ||
1574 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1578 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1579 /* constant jmp optimization */
1580 if ((vtop
->c
.i
!= 0) != inv
)
1585 o(0xc0 + REG_VALUE(v
) * 9);
1587 t
= psym(0x85 ^ inv
, t
);
1594 /* generate an integer binary operation */
1595 void gen_opi(int op
)
1597 int r
, fr
, opc
, fc
, c
, ll
, uu
, cc
, tt2
;
1601 ll
= is64_type(vtop
[-1].type
.t
);
1602 cc
= (fr
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1603 tt2
= (fr
& (VT_LVAL
| VT_LVAL_TYPE
)) == VT_LVAL
;
1607 case TOK_ADDC1
: /* add with carry generation */
1613 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1617 /* XXX: generate inc and dec for smaller code ? */
1618 orex(ll
, r
, 0, 0x83);
1619 o(0xc0 + REG_VALUE(r
) + opc
*8);
1622 orex(ll
, r
, 0, 0x81);
1623 oad(0xc0 + REG_VALUE(r
) + opc
*8, c
);
1628 orex(ll
, fr
, r
, 0x03 + opc
*8);
1630 gen_modrm(r
, fr
, vtop
->sym
, fc
);
1632 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
)*8);
1635 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1641 case TOK_SUBC1
: /* sub with carry generation */
1644 case TOK_ADDC2
: /* add with carry use */
1647 case TOK_SUBC2
: /* sub with carry use */
1669 orex(ll
, fr
, r
, 0xf7);
1671 gen_modrm(opc
, fr
, vtop
->sym
, fc
);
1673 o(0xc0 + REG_VALUE(fr
) + opc
*8);
1675 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1677 gen_modrm(r
, fr
, vtop
->sym
, fc
);
1679 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
)*8);
1699 orex(ll
, r
, 0, 0xd1);
1700 o(0xc0 + REG_VALUE(r
) + opc
*8);
1702 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1703 o(0xc0 + REG_VALUE(r
) + opc
*8);
1704 g(c
& (ll
? 0x3f : 0x1f));
1707 /* we generate the shift in ecx */
1708 gv2(RC_INT
, RC_RCX
);
1710 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1711 o(0xc0 + REG_VALUE(r
) + opc
*8);
1726 /* first operand must be in eax */
1727 /* XXX: need better constraint for second operand */
1729 gv2(RC_RAX
, RC_INT2
);
1737 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cdq RDX:RAX <- sign-extend of RAX. */
1738 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1740 gen_modrm(opc
, fr
, vtop
->sym
, fc
);
1742 o(0xc0 + REG_VALUE(fr
) + opc
*8);
1743 if (op
== '%' || op
== TOK_UMOD
)
1756 void gen_opl(int op
)
1761 /* generate a floating point operation 'v = t1 op t2' instruction. The
1762 two operands are guaranted to have the same floating point type */
1763 /* XXX: need to use ST1 too */
1764 void gen_opf(int op
)
1766 int a
, ft
, fc
, swapped
, fr
, r
;
1767 int float_type
= (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1769 /* convert constants to memory references */
1770 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1775 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1782 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1783 /* swap the stack if needed so that t1 is the register and t2 is
1784 the memory reference */
1785 /* must put at least one value in the floating point register */
1786 if ((vtop
[-1].r
& VT_LVAL
) && (vtop
[0].r
& VT_LVAL
)) {
1791 if (vtop
[-1].r
& VT_LVAL
) {
1795 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1796 /* load on stack second operand */
1797 load(TREG_ST0
, vtop
);
1798 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1799 if (op
== TOK_GE
|| op
== TOK_GT
)
1801 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1804 o(0xc9d9); /* fxch %st(1) */
1805 if (op
== TOK_EQ
|| op
== TOK_NE
)
1806 o(0xe9da); /* fucompp */
1808 o(0xd9de); /* fcompp */
1809 o(0xe0df); /* fnstsw %ax */
1811 o(0x45e480); /* and $0x45, %ah */
1812 o(0x40fC80); /* cmp $0x40, %ah */
1813 } else if (op
== TOK_NE
) {
1814 o(0x45e480); /* and $0x45, %ah */
1815 o(0x40f480); /* xor $0x40, %ah */
1817 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1818 o(0x05c4f6); /* test $0x05, %ah */
1821 o(0x45c4f6); /* test $0x45, %ah */
1828 /* no memory reference possible for long double operations */
1829 load(TREG_ST0
, vtop
);
1850 o(0xde); /* fxxxp %st, %st(1) */
1860 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1863 op
= TOK_ULE
; /* setae */
1872 op
= TOK_UGT
; /* seta */
1875 assert(!(vtop
[-1].r
& VT_LVAL
));
1876 if ((ft
& VT_BTYPE
) == VT_DOUBLE
)
1878 o(0x2e0f); /* ucomisd */
1880 gen_modrm(r
, fr
, vtop
->sym
, fc
);
1882 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
)*8);
1885 vtop
->c
.i
= op
| 0x100;
1887 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
1888 /* no memory reference possible for long double operations */
1904 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
1905 assert(!(vtop
[-1].r
& VT_LVAL
));
1906 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1914 gen_modrm(r
, fr
, vtop
->sym
, fc
);
1916 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
)*8);
1922 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1923 and 'long long' cases. */
1924 void gen_cvt_itof(int t
)
1933 if (tbt
== VT_LDOUBLE
) {
1935 if ((ft
& VT_BTYPE
) == VT_LLONG
) {
1936 /* signed long long to float/double/long double (unsigned case
1937 is handled generically) */
1938 o(0x50 + REG_VALUE(r
)); /* push r */
1939 o(0x242cdf); /* fildll (%rsp) */
1940 o(0x08c48348); /* add $8, %rsp */
1941 } else if ((ft
& (VT_BTYPE
| VT_UNSIGNED
)) == (VT_INT
| VT_UNSIGNED
)) {
1942 /* unsigned int to float/double/long double */
1943 o(0x6a); /* push $0 */
1945 o(0x50 + REG_VALUE(r
)); /* push r */
1946 o(0x242cdf); /* fildll (%rsp) */
1947 o(0x10c48348); /* add $16, %rsp */
1949 /* int to float/double/long double */
1950 o(0x50 + REG_VALUE(r
)); /* push r */
1951 o(0x2404db); /* fildl (%rsp) */
1952 o(0x08c48348); /* add $8, %rsp */
1957 r_xmm
= get_reg(RC_FLOAT
);
1958 o(0xf2 + (tbt
== VT_FLOAT
));
1959 if ((ft
& (VT_BTYPE
| VT_UNSIGNED
)) == (VT_INT
| VT_UNSIGNED
) || bt
== VT_LLONG
) {
1963 o(0xc0 + REG_VALUE(r
) + REG_VALUE(r_xmm
)*8); /* cvtsi2sd or cvtsi2ss */
1968 /* convert from one floating point type to another */
1969 void gen_cvt_ftof(int t
)
1977 if(bt
== VT_LDOUBLE
)
1978 r
= get_reg(RC_FLOAT
);
1981 if (bt
== VT_FLOAT
) {
1982 if (tbt
== VT_DOUBLE
) {
1983 o(0x5a0f); /* cvtps2pd */
1984 o(0xc0 + REG_VALUE(r
) + REG_VALUE(r
) * 8);
1985 } else if (tbt
== VT_LDOUBLE
) {
1986 /* movss %xmm0-7,-0x10(%rsp) */
1988 o(0xf02444 + REG_VALUE(r
)*8);
1989 o(0xf02444d9); /* flds -0x10(%rsp) */
1992 } else if (bt
== VT_DOUBLE
) {
1993 if (tbt
== VT_FLOAT
) {
1994 o(0x5a0f66); /* cvtpd2ps */
1995 o(0xc0 + REG_VALUE(r
) + REG_VALUE(r
) * 8);
1996 } else if (tbt
== VT_LDOUBLE
) {
1997 /* movsd %xmm0-7,-0x10(%rsp) */
1999 o(0xf02444 + REG_VALUE(r
)*8);
2000 o(0xf02444dd); /* fldl -0x10(%rsp) */
2005 if (tbt
== VT_DOUBLE
) {
2006 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2007 /* movsd -0x10(%rsp),%xmm0-7 */
2009 o(0xf02444 + REG_VALUE(r
)*8);
2011 } else if (tbt
== VT_FLOAT
) {
2012 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2013 /* movss -0x10(%rsp),%xmm0-7 */
2015 o(0xf02444 + REG_VALUE(r
)*8);
2021 /* convert fp to int 't' type */
2022 void gen_cvt_ftoi(int t
)
2024 int ft
, bt
, ll
, r
, r_xmm
;
2029 if (bt
== VT_LDOUBLE
) {
2030 gen_cvt_ftof(VT_DOUBLE
);
2033 r_xmm
= gv(RC_FLOAT
);
2034 if ((t
& VT_BTYPE
) == VT_INT
)
2038 r
= get_reg(RC_INT
);
2039 if (bt
== VT_FLOAT
) {
2041 } else if (bt
== VT_DOUBLE
) {
2046 orex(ll
, r
, r_xmm
, 0x2c0f); /* cvttss2si or cvttsd2si */
2047 o(0xc0 + REG_VALUE(r_xmm
) + (REG_VALUE(r
) << 3));
2051 /* computed goto support */
2058 /* Save the stack pointer onto the stack and return the location of its address */
2059 ST_FUNC
void gen_vla_sp_save(int addr
) {
2060 /* mov %rsp,addr(%rbp)*/
2061 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2064 /* Restore the SP from a location on the stack */
2065 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2066 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2069 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2070 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2072 r
= gv(RC_INT
); /* allocation size */
2075 o(0xe0 | REG_VALUE(r
));
2079 orex(1, 0, r
, 0x8d);
2080 o(0x2484 | (REG_VALUE(r
)*8));
2081 r_loc
= reloc_add(r_loc
);
2087 /* end of x86-64 code generator */
2088 /*************************************************************/
2089 #endif /* ! TARGET_DEFS_ONLY */
2090 /******************************************************/