2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
39 #define RC_XMM0 0x0020
40 #define RC_ST0 0x0040 /* only for long double */
41 #define RC_IRET RC_RAX /* function return: integer register */
42 #define RC_LRET RC_RDX /* function return: second integer register */
43 #define RC_FRET RC_XMM0 /* function return: float register */
45 /* pretty names for the registers */
64 #define REX_BASE(reg) (((reg) >> 3) & 1)
65 #define REG_VALUE(reg) ((reg) & 7)
67 /* return registers for function */
68 #define REG_IRET TREG_RAX /* single word int return register */
69 #define REG_LRET TREG_RDX /* second word return register (for long long) */
70 #define REG_FRET TREG_XMM0 /* float return register */
72 /* defined if function parameters must be evaluated in reverse order */
73 #define INVERT_FUNC_PARAMS
75 /* pointer size, in bytes */
78 /* long double size and alignment, in bytes */
79 #define LDOUBLE_SIZE 16
80 #define LDOUBLE_ALIGN 8
81 /* maximum alignment (for aligned attribute support) */
84 ST_FUNC
void gen_opl(int op
);
85 ST_FUNC
void gen_le64(int64_t c
);
87 /******************************************************/
90 #define EM_TCC_TARGET EM_X86_64
92 /* relocation type for 32 bit data relocation */
93 #define R_DATA_32 R_X86_64_32
94 #define R_DATA_PTR R_X86_64_64
95 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
96 #define R_COPY R_X86_64_COPY
98 #define ELF_START_ADDR 0x08048000
99 #define ELF_PAGE_SIZE 0x1000
101 /******************************************************/
102 #else /* ! TARGET_DEFS_ONLY */
103 /******************************************************/
107 ST_DATA
const int reg_classes
[NB_REGS
] = {
108 /* eax */ RC_INT
| RC_RAX
,
109 /* ecx */ RC_INT
| RC_RCX
,
110 /* edx */ RC_INT
| RC_RDX
,
111 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
122 static unsigned long func_sub_sp_offset
;
123 static int func_ret_sub
;
125 /* XXX: make it faster ? */
130 if (ind1
> cur_text_section
->data_allocated
)
131 section_realloc(cur_text_section
, ind1
);
132 cur_text_section
->data
[ind
] = c
;
136 void o(unsigned int c
)
158 void gen_le64(int64_t c
)
170 void orex(int ll
, int r
, int r2
, int b
)
172 if ((r
& VT_VALMASK
) >= VT_CONST
)
174 if ((r2
& VT_VALMASK
) >= VT_CONST
)
176 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
177 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
181 /* output a symbol and patch all calls to it */
182 void gsym_addr(int t
, int a
)
186 ptr
= (int *)(cur_text_section
->data
+ t
);
187 n
= *ptr
; /* next value */
198 /* psym is used to put an instruction with a data field which is a
199 reference to a symbol. It is in fact the same as oad ! */
202 static int is64_type(int t
)
204 return ((t
& VT_BTYPE
) == VT_PTR
||
205 (t
& VT_BTYPE
) == VT_FUNC
||
206 (t
& VT_BTYPE
) == VT_LLONG
);
209 static int is_sse_float(int t
) {
212 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
216 /* instruction + 4 bytes data. Return the address of the data */
217 ST_FUNC
int oad(int c
, int s
)
223 if (ind1
> cur_text_section
->data_allocated
)
224 section_realloc(cur_text_section
, ind1
);
225 *(int *)(cur_text_section
->data
+ ind
) = s
;
231 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
234 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
238 /* output constant with relocation if 'r & VT_SYM' is true */
239 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
242 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
246 /* output constant with relocation if 'r & VT_SYM' is true */
247 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
250 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
254 /* output got address with relocation */
255 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
257 #ifndef TCC_TARGET_PE
260 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
261 sr
= cur_text_section
->reloc
;
262 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
265 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
266 cur_text_section
->data
[ind
-3],
267 cur_text_section
->data
[ind
-2],
268 cur_text_section
->data
[ind
-1]
270 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
274 /* we use add c, %xxx for displacement */
276 o(0xc0 + REG_VALUE(r
));
281 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
283 op_reg
= REG_VALUE(op_reg
) << 3;
284 if ((r
& VT_VALMASK
) == VT_CONST
) {
285 /* constant memory reference */
288 gen_gotpcrel(r
, sym
, c
);
290 gen_addrpc32(r
, sym
, c
);
292 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
293 /* currently, we use only ebp as base */
295 /* short reference */
299 oad(0x85 | op_reg
, c
);
301 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
303 g(0x80 | op_reg
| REG_VALUE(r
));
306 g(0x00 | op_reg
| REG_VALUE(r
));
309 g(0x00 | op_reg
| REG_VALUE(r
));
313 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
315 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
317 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
320 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
322 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
325 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
326 orex(1, r
, op_reg
, opcode
);
327 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
331 /* load 'r' from value 'sv' */
332 void load(int r
, SValue
*sv
)
334 int v
, t
, ft
, fc
, fr
;
339 sv
= pe_getimport(sv
, &v2
);
346 #ifndef TCC_TARGET_PE
347 /* we use indirect access via got */
348 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
349 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
350 /* use the result register as a temporal register */
351 int tr
= r
| TREG_MEM
;
353 /* we cannot use float registers as a temporal register */
354 tr
= get_reg(RC_INT
) | TREG_MEM
;
356 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
358 /* load from the temporal register */
366 if (v
== VT_LLOCAL
) {
368 v1
.r
= VT_LOCAL
| VT_LVAL
;
374 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
375 b
= 0x6e0f66, r
= 0; /* movd */
376 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
377 b
= 0x7e0ff3, r
= 0; /* movq */
378 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
379 b
= 0xdb, r
= 5; /* fldt */
380 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
381 b
= 0xbe0f; /* movsbl */
382 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
383 b
= 0xb60f; /* movzbl */
384 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
385 b
= 0xbf0f; /* movswl */
386 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
387 b
= 0xb70f; /* movzwl */
393 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
396 gen_modrm(r
, fr
, sv
->sym
, fc
);
403 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
404 gen_addrpc32(fr
, sv
->sym
, fc
);
406 if (sv
->sym
->type
.t
& VT_STATIC
) {
408 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
409 gen_addrpc32(fr
, sv
->sym
, fc
);
412 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
413 gen_gotpcrel(fr
, sv
->sym
, fc
);
416 } else if (is64_type(ft
)) {
417 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
420 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
423 } else if (v
== VT_LOCAL
) {
424 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
425 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
426 } else if (v
== VT_CMP
) {
428 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
429 orex(0,r
,0, 0x0f); /* setxx %br */
431 o(0xc0 + REG_VALUE(r
));
432 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
435 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
436 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
439 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
441 if (r
== TREG_XMM0
) {
442 assert(v
== TREG_ST0
);
443 /* gen_cvt_ftof(VT_DOUBLE); */
444 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
445 /* movsd -0x10(%rsp),%xmm0 */
448 } else if (r
== TREG_ST0
) {
449 assert(v
== TREG_XMM0
);
450 /* gen_cvt_ftof(VT_LDOUBLE); */
451 /* movsd %xmm0,-0x10(%rsp) */
454 o(0xf02444dd); /* fldl -0x10(%rsp) */
457 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
463 /* store register 'r' in lvalue 'v' */
464 void store(int r
, SValue
*v
)
468 /* store the REX prefix in this variable when PIC is enabled */
473 v
= pe_getimport(v
, &v2
);
478 fr
= v
->r
& VT_VALMASK
;
481 #ifndef TCC_TARGET_PE
482 /* we need to access the variable via got */
483 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
484 /* mov xx(%rip), %r11 */
486 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
487 pic
= is64_type(bt
) ? 0x49 : 0x41;
491 /* XXX: incorrect if float reg to reg */
492 if (bt
== VT_FLOAT
) {
495 o(0x7e0f); /* movd */
497 } else if (bt
== VT_DOUBLE
) {
500 o(0xd60f); /* movq */
502 } else if (bt
== VT_LDOUBLE
) {
503 o(0xc0d9); /* fld %st(0) */
511 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
513 else if (is64_type(bt
))
519 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
524 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
525 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
526 } else if (fr
!= r
) {
527 /* XXX: don't we really come here? */
529 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
532 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
533 gen_modrm(r
, v
->r
, v
->sym
, fc
);
534 } else if (fr
!= r
) {
535 /* XXX: don't we really come here? */
537 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
542 /* 'is_jmp' is '1' if it is a jump */
543 static void gcall_or_jmp(int is_jmp
)
546 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
548 if (vtop
->r
& VT_SYM
) {
549 /* relocation case */
550 greloc(cur_text_section
, vtop
->sym
,
551 ind
+ 1, R_X86_64_PC32
);
553 /* put an empty PC32 relocation */
554 put_elf_reloc(symtab_section
, cur_text_section
,
555 ind
+ 1, R_X86_64_PC32
, 0);
557 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
559 /* otherwise, indirect call */
563 o(0xff); /* call/jmp *r */
564 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
571 static const uint8_t arg_regs
[] = {
572 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
575 static int func_scratch
;
577 /* Generate function call. The function address is pushed first, then
578 all the parameters in call order. This functions pops all the
579 parameters and the function address. */
581 void gen_offs_sp(int b
, int r
, int d
)
583 orex(1,0,r
& 0x100 ? 0 : r
, b
);
585 o(0x2444 | (REG_VALUE(r
) << 3));
588 o(0x2484 | (REG_VALUE(r
) << 3));
593 void gfunc_call(int nb_args
)
595 int size
, align
, r
, args_size
, i
, d
, j
, bt
, struct_size
;
596 int nb_reg_args
, gen_reg
;
598 nb_reg_args
= nb_args
;
599 args_size
= (nb_reg_args
< REGN
? REGN
: nb_reg_args
) * PTR_SIZE
;
601 /* for struct arguments, we need to call memcpy and the function
602 call breaks register passing arguments we are preparing.
603 So, we process arguments which will be passed by stack first. */
604 struct_size
= args_size
;
605 for(i
= 0; i
< nb_args
; i
++) {
606 SValue
*sv
= &vtop
[-i
];
607 bt
= (sv
->type
.t
& VT_BTYPE
);
608 if (bt
== VT_STRUCT
) {
609 size
= type_size(&sv
->type
, &align
);
610 /* align to stack align size */
611 size
= (size
+ 15) & ~15;
612 /* generate structure store */
614 gen_offs_sp(0x8d, r
, struct_size
);
617 /* generate memcpy call */
618 vset(&sv
->type
, r
| VT_LVAL
, 0);
623 } else if (bt
== VT_LDOUBLE
) {
626 gen_offs_sp(0xdb, 0x107, struct_size
);
632 if (func_scratch
< struct_size
)
633 func_scratch
= struct_size
;
635 for (i
= 0; i
< REGN
; ++i
)
636 save_reg(arg_regs
[i
]);
639 gen_reg
= nb_reg_args
;
640 struct_size
= args_size
;
642 for(i
= 0; i
< nb_args
; i
++) {
643 bt
= (vtop
->type
.t
& VT_BTYPE
);
645 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
646 if (bt
== VT_LDOUBLE
)
649 size
= type_size(&vtop
->type
, &align
);
650 /* align to stack align size */
651 size
= (size
+ 15) & ~15;
655 gen_offs_sp(0x8d, d
, struct_size
);
656 gen_offs_sp(0x89, d
, j
*8);
659 gen_offs_sp(0x8d, d
, struct_size
);
663 } else if (is_sse_float(vtop
->type
.t
)) {
664 gv(RC_FLOAT
); /* only one float register */
667 /* movq %xmm0, j*8(%rsp) */
668 gen_offs_sp(0xd60f66, 0x100, j
*8);
670 /* movaps %xmm0, %xmmN */
674 /* mov %xmm0, %rxx */
677 o(0xc0 + REG_VALUE(d
));
683 gen_offs_sp(0x89, r
, j
*8);
687 gv(reg_classes
[d
] & ~RC_INT
);
692 o(0xc0 + REG_VALUE(d
) + REG_VALUE(r
) * 8);
706 #define FUNC_PROLOG_SIZE 11
708 /* generate function prolog of type 't' */
709 void gfunc_prolog(CType
*func_type
)
711 int addr
, reg_param_index
, bt
;
720 ind
+= FUNC_PROLOG_SIZE
;
721 func_sub_sp_offset
= ind
;
724 sym
= func_type
->ref
;
726 /* if the function returns a structure, then add an
727 implicit pointer parameter */
729 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
730 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
735 /* define parameters */
736 while ((sym
= sym
->next
) != NULL
) {
738 bt
= type
->t
& VT_BTYPE
;
739 if (reg_param_index
< REGN
) {
740 /* save arguments passed by register */
741 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
743 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
744 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
746 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
752 while (reg_param_index
< REGN
) {
753 if (func_type
->ref
->c
== FUNC_ELLIPSIS
)
754 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
760 /* generate function epilog */
761 void gfunc_epilog(void)
766 if (func_ret_sub
== 0) {
771 g(func_ret_sub
>> 8);
775 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
776 /* align local size to word & save local variables */
777 v
= (func_scratch
+ -loc
+ 15) & -16;
780 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
781 oad(0xb8, v
); /* mov stacksize, %eax */
782 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
783 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
784 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
786 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
787 o(0xec8148); /* sub rsp, stacksize */
791 cur_text_section
->data_offset
= saved_ind
;
792 pe_add_unwind_data(ind
, saved_ind
, v
);
793 ind
= cur_text_section
->data_offset
;
798 static void gadd_sp(int val
)
800 if (val
== (char)val
) {
804 oad(0xc48148, val
); /* add $xxx, %rsp */
809 static const uint8_t arg_regs
[REGN
] = {
810 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
813 /* Generate function call. The function address is pushed first, then
814 all the parameters in call order. This functions pops all the
815 parameters and the function address. */
816 void gfunc_call(int nb_args
)
818 int size
, align
, r
, args_size
, i
;
822 int sse_reg
, gen_reg
;
824 /* calculate the number of integer/float arguments */
826 for(i
= 0; i
< nb_args
; i
++) {
827 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
828 args_size
+= type_size(&vtop
[-i
].type
, &align
);
829 args_size
= (args_size
+ 7) & ~7;
830 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
832 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
834 if (nb_sse_args
> 8) args_size
+= 8;
837 if (nb_reg_args
> REGN
) args_size
+= 8;
841 /* for struct arguments, we need to call memcpy and the function
842 call breaks register passing arguments we are preparing.
843 So, we process arguments which will be passed by stack first. */
845 gen_reg
= nb_reg_args
;
846 sse_reg
= nb_sse_args
;
848 /* adjust stack to align SSE boundary */
849 if (args_size
&= 15) {
850 /* fetch cpu flag before the following sub will change the value */
851 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
854 args_size
= 16 - args_size
;
856 oad(0xec81, args_size
); /* sub $xxx, %rsp */
859 for(i
= 0; i
< nb_args
; i
++) {
860 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
861 size
= type_size(&vtop
->type
, &align
);
862 /* align to stack align size */
863 size
= (size
+ 7) & ~7;
864 /* allocate the necessary size on stack */
866 oad(0xec81, size
); /* sub $xxx, %rsp */
867 /* generate structure store */
869 orex(1, r
, 0, 0x89); /* mov %rsp, r */
870 o(0xe0 + REG_VALUE(r
));
872 /* following code breaks vtop[1], vtop[2], and vtop[3] */
873 SValue tmp1
= vtop
[1];
874 SValue tmp2
= vtop
[2];
875 SValue tmp3
= vtop
[3];
876 vset(&vtop
->type
, r
| VT_LVAL
, 0);
884 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
887 oad(0xec8148, size
); /* sub $xxx, %rsp */
888 o(0x7cdb); /* fstpt 0(%rsp) */
892 } else if (is_sse_float(vtop
->type
.t
)) {
896 o(0x50); /* push $rax */
897 /* movq %xmm0, (%rsp) */
905 /* XXX: implicit cast ? */
908 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
916 save_regs(0); /* save used temporary registers */
918 /* then, we prepare register passing arguments.
919 Note that we cannot set RDX and RCX in this loop because gv()
920 may break these temporary registers. Let's use R10 and R11
922 gen_reg
= nb_reg_args
;
923 sse_reg
= nb_sse_args
;
924 for(i
= 0; i
< nb_args
; i
++) {
925 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
926 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
927 } else if (is_sse_float(vtop
->type
.t
)) {
930 gv(RC_FLOAT
); /* only one float register */
931 /* movaps %xmm0, %xmmN */
933 o(0xc0 + (sse_reg
<< 3));
938 /* XXX: implicit cast ? */
942 if (j
== 2 || j
== 3)
943 /* j=2: r10, j=3: r11 */
945 orex(1,d
,r
,0x89); /* mov */
946 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
952 /* Copy R10 and R11 into RDX and RCX, respectively */
953 if (nb_reg_args
> 2) {
954 o(0xd2894c); /* mov %r10, %rdx */
955 if (nb_reg_args
> 3) {
956 o(0xd9894c); /* mov %r11, %rcx */
960 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
968 #define FUNC_PROLOG_SIZE 11
970 static void push_arg_reg(int i
) {
972 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
975 /* generate function prolog of type 't' */
976 void gfunc_prolog(CType
*func_type
)
978 int i
, addr
, align
, size
;
979 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
983 sym
= func_type
->ref
;
986 ind
+= FUNC_PROLOG_SIZE
;
987 func_sub_sp_offset
= ind
;
990 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
991 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
992 seen_reg_num
= seen_sse_num
= 0;
993 /* frame pointer and return address */
994 seen_stack_size
= PTR_SIZE
* 2;
995 /* count the number of seen parameters */
996 sym
= func_type
->ref
;
997 while ((sym
= sym
->next
) != NULL
) {
999 if (is_sse_float(type
->t
)) {
1000 if (seen_sse_num
< 8) {
1003 seen_stack_size
+= 8;
1005 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
1006 size
= type_size(type
, &align
);
1007 size
= (size
+ 7) & ~7;
1008 seen_stack_size
+= size
;
1009 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1010 seen_stack_size
+= LDOUBLE_SIZE
;
1012 if (seen_reg_num
< REGN
) {
1015 seen_stack_size
+= 8;
1021 /* movl $0x????????, -0x10(%rbp) */
1023 gen_le32(seen_reg_num
* 8);
1024 /* movl $0x????????, -0xc(%rbp) */
1026 gen_le32(seen_sse_num
* 16 + 48);
1027 /* movl $0x????????, -0x8(%rbp) */
1029 gen_le32(seen_stack_size
);
1031 /* save all register passing arguments */
1032 for (i
= 0; i
< 8; i
++) {
1034 o(0xd60f66); /* movq */
1035 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1036 /* movq $0, loc+8(%rbp) */
1041 for (i
= 0; i
< REGN
; i
++) {
1042 push_arg_reg(REGN
-1-i
);
1046 sym
= func_type
->ref
;
1048 reg_param_index
= 0;
1049 sse_param_index
= 0;
1051 /* if the function returns a structure, then add an
1052 implicit pointer parameter */
1053 func_vt
= sym
->type
;
1054 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
1055 push_arg_reg(reg_param_index
);
1062 /* define parameters */
1063 while ((sym
= sym
->next
) != NULL
) {
1065 size
= type_size(type
, &align
);
1066 size
= (size
+ 7) & ~7;
1067 if (is_sse_float(type
->t
)) {
1068 if (sse_param_index
< 8) {
1069 /* save arguments passed by register */
1071 o(0xd60f66); /* movq */
1072 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
1080 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
1081 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1085 if (reg_param_index
< REGN
) {
1086 /* save arguments passed by register */
1087 push_arg_reg(reg_param_index
);
1095 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1096 VT_LOCAL
| VT_LVAL
, param_addr
);
1101 /* generate function epilog */
1102 void gfunc_epilog(void)
1106 o(0xc9); /* leave */
1107 if (func_ret_sub
== 0) {
1110 o(0xc2); /* ret n */
1112 g(func_ret_sub
>> 8);
1114 /* align local size to word & save local variables */
1115 v
= (-loc
+ 15) & -16;
1117 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1118 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1119 o(0xec8148); /* sub rsp, stacksize */
1126 /* generate a jump to a label */
1129 return psym(0xe9, t
);
1132 /* generate a jump to a fixed address */
1133 void gjmp_addr(int a
)
1141 oad(0xe9, a
- ind
- 5);
1145 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1146 int gtst(int inv
, int t
)
1150 v
= vtop
->r
& VT_VALMASK
;
1152 /* fast case : can jump directly since flags are set */
1154 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1155 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1156 /* && or || optimization */
1157 if ((v
& 1) == inv
) {
1158 /* insert vtop->c jump list in t */
1161 p
= (int *)(cur_text_section
->data
+ *p
);
1169 if (is_float(vtop
->type
.t
) ||
1170 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1174 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1175 /* constant jmp optimization */
1176 if ((vtop
->c
.i
!= 0) != inv
)
1181 o(0xc0 + REG_VALUE(v
) * 9);
1183 t
= psym(0x85 ^ inv
, t
);
1190 /* generate an integer binary operation */
1191 void gen_opi(int op
)
1196 ll
= is64_type(vtop
[-1].type
.t
);
1197 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1198 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1202 case TOK_ADDC1
: /* add with carry generation */
1205 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1212 /* XXX: generate inc and dec for smaller code ? */
1213 orex(ll
, r
, 0, 0x83);
1214 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1217 orex(ll
, r
, 0, 0x81);
1218 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1221 gv2(RC_INT
, RC_INT
);
1224 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1225 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1228 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1234 case TOK_SUBC1
: /* sub with carry generation */
1237 case TOK_ADDC2
: /* add with carry use */
1240 case TOK_SUBC2
: /* sub with carry use */
1253 gv2(RC_INT
, RC_INT
);
1256 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1257 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1269 opc
= 0xc0 | (opc
<< 3);
1275 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1276 o(opc
| REG_VALUE(r
));
1277 g(vtop
->c
.i
& (ll
? 63 : 31));
1279 /* we generate the shift in ecx */
1280 gv2(RC_INT
, RC_RCX
);
1282 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1283 o(opc
| REG_VALUE(r
));
1296 /* first operand must be in eax */
1297 /* XXX: need better constraint for second operand */
1298 gv2(RC_RAX
, RC_RCX
);
1303 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1304 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1305 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1306 if (op
== '%' || op
== TOK_UMOD
)
1318 void gen_opl(int op
)
1323 /* generate a floating point operation 'v = t1 op t2' instruction. The
1324 two operands are guaranted to have the same floating point type */
1325 /* XXX: need to use ST1 too */
1326 void gen_opf(int op
)
1328 int a
, ft
, fc
, swapped
, r
;
1330 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1332 /* convert constants to memory references */
1333 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1338 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1341 /* must put at least one value in the floating point register */
1342 if ((vtop
[-1].r
& VT_LVAL
) &&
1343 (vtop
[0].r
& VT_LVAL
)) {
1349 /* swap the stack if needed so that t1 is the register and t2 is
1350 the memory reference */
1351 if (vtop
[-1].r
& VT_LVAL
) {
1355 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1356 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1357 /* load on stack second operand */
1358 load(TREG_ST0
, vtop
);
1359 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1360 if (op
== TOK_GE
|| op
== TOK_GT
)
1362 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1365 o(0xc9d9); /* fxch %st(1) */
1366 o(0xe9da); /* fucompp */
1367 o(0xe0df); /* fnstsw %ax */
1369 o(0x45e480); /* and $0x45, %ah */
1370 o(0x40fC80); /* cmp $0x40, %ah */
1371 } else if (op
== TOK_NE
) {
1372 o(0x45e480); /* and $0x45, %ah */
1373 o(0x40f480); /* xor $0x40, %ah */
1375 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1376 o(0x05c4f6); /* test $0x05, %ah */
1379 o(0x45c4f6); /* test $0x45, %ah */
1386 /* no memory reference possible for long double operations */
1387 load(TREG_ST0
, vtop
);
1411 o(0xde); /* fxxxp %st, %st(1) */
1416 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1417 /* if saved lvalue, then we must reload it */
1420 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1422 r
= get_reg(RC_INT
);
1424 v1
.r
= VT_LOCAL
| VT_LVAL
;
1430 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1433 if (op
== TOK_LE
|| op
== TOK_LT
)
1435 if (op
== TOK_LE
|| op
== TOK_GE
) {
1436 op
= 0x93; /* setae */
1438 op
= 0x97; /* seta */
1443 o(0x7e0ff3); /* movq */
1444 gen_modrm(1, r
, vtop
->sym
, fc
);
1446 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1449 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1452 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1455 o(0x2e0f); /* ucomisd */
1456 gen_modrm(0, r
, vtop
->sym
, fc
);
1463 /* no memory reference possible for long double operations */
1464 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1465 load(TREG_XMM0
, vtop
);
1485 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1486 o(0xde); /* fxxxp %st, %st(1) */
1489 /* if saved lvalue, then we must reload it */
1491 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1493 r
= get_reg(RC_INT
);
1495 v1
.r
= VT_LOCAL
| VT_LVAL
;
1501 /* movq %xmm0,%xmm1 */
1504 load(TREG_XMM0
, vtop
);
1505 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1506 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1515 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1522 gen_modrm(0, r
, vtop
->sym
, fc
);
1530 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1531 and 'long long' cases. */
1532 void gen_cvt_itof(int t
)
1534 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1537 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1538 /* signed long long to float/double/long double (unsigned case
1539 is handled generically) */
1540 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1541 o(0x242cdf); /* fildll (%rsp) */
1542 o(0x08c48348); /* add $8, %rsp */
1543 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1544 (VT_INT
| VT_UNSIGNED
)) {
1545 /* unsigned int to float/double/long double */
1546 o(0x6a); /* push $0 */
1548 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1549 o(0x242cdf); /* fildll (%rsp) */
1550 o(0x10c48348); /* add $16, %rsp */
1552 /* int to float/double/long double */
1553 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1554 o(0x2404db); /* fildl (%rsp) */
1555 o(0x08c48348); /* add $8, %rsp */
1559 save_reg(TREG_XMM0
);
1561 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1562 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1563 (VT_INT
| VT_UNSIGNED
) ||
1564 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1568 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1569 vtop
->r
= TREG_XMM0
;
1573 /* convert from one floating point type to another */
1574 void gen_cvt_ftof(int t
)
1582 if (bt
== VT_FLOAT
) {
1584 if (tbt
== VT_DOUBLE
) {
1585 o(0xc0140f); /* unpcklps */
1586 o(0xc05a0f); /* cvtps2pd */
1587 } else if (tbt
== VT_LDOUBLE
) {
1588 /* movss %xmm0,-0x10(%rsp) */
1591 o(0xf02444d9); /* flds -0x10(%rsp) */
1594 } else if (bt
== VT_DOUBLE
) {
1596 if (tbt
== VT_FLOAT
) {
1597 o(0xc0140f66); /* unpcklpd */
1598 o(0xc05a0f66); /* cvtpd2ps */
1599 } else if (tbt
== VT_LDOUBLE
) {
1600 /* movsd %xmm0,-0x10(%rsp) */
1603 o(0xf02444dd); /* fldl -0x10(%rsp) */
1608 if (tbt
== VT_DOUBLE
) {
1609 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1610 /* movsd -0x10(%rsp),%xmm0 */
1613 vtop
->r
= TREG_XMM0
;
1614 } else if (tbt
== VT_FLOAT
) {
1615 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1616 /* movss -0x10(%rsp),%xmm0 */
1619 vtop
->r
= TREG_XMM0
;
1624 /* convert fp to int 't' type */
1625 void gen_cvt_ftoi(int t
)
1627 int ft
, bt
, size
, r
;
1630 if (bt
== VT_LDOUBLE
) {
1631 gen_cvt_ftof(VT_DOUBLE
);
1641 r
= get_reg(RC_INT
);
1642 if (bt
== VT_FLOAT
) {
1644 } else if (bt
== VT_DOUBLE
) {
1649 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1650 o(0xc0 + (REG_VALUE(r
) << 3));
1654 /* computed goto support */
1661 /* end of x86-64 code generator */
1662 /*************************************************************/
1663 #endif /* ! TARGET_DEFS_ONLY */
1664 /******************************************************/