2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
39 #define RC_XMM0 0x0020
40 #define RC_ST0 0x0040 /* only for long double */
41 #define RC_IRET RC_RAX /* function return: integer register */
42 #define RC_LRET RC_RDX /* function return: second integer register */
43 #define RC_FRET RC_XMM0 /* function return: float register */
45 /* pretty names for the registers */
64 #define REX_BASE(reg) (((reg) >> 3) & 1)
65 #define REG_VALUE(reg) ((reg) & 7)
67 /* return registers for function */
68 #define REG_IRET TREG_RAX /* single word int return register */
69 #define REG_LRET TREG_RDX /* second word return register (for long long) */
70 #define REG_FRET TREG_XMM0 /* float return register */
72 /* defined if function parameters must be evaluated in reverse order */
73 #define INVERT_FUNC_PARAMS
75 /* pointer size, in bytes */
78 /* long double size and alignment, in bytes */
79 #define LDOUBLE_SIZE 16
80 #define LDOUBLE_ALIGN 8
81 /* maximum alignment (for aligned attribute support) */
84 ST_FUNC
void gen_opl(int op
);
85 ST_FUNC
void gen_le64(int64_t c
);
87 /******************************************************/
90 #define EM_TCC_TARGET EM_X86_64
92 /* relocation type for 32 bit data relocation */
93 #define R_DATA_32 R_X86_64_32
94 #define R_DATA_PTR R_X86_64_64
95 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
96 #define R_COPY R_X86_64_COPY
98 #define ELF_START_ADDR 0x08048000
99 #define ELF_PAGE_SIZE 0x1000
101 /******************************************************/
102 #else /* ! TARGET_DEFS_ONLY */
103 /******************************************************/
107 ST_DATA
const int reg_classes
[NB_REGS
] = {
108 /* eax */ RC_INT
| RC_RAX
,
109 /* ecx */ RC_INT
| RC_RCX
,
110 /* edx */ RC_INT
| RC_RDX
,
111 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
122 static unsigned long func_sub_sp_offset
;
123 static int func_ret_sub
;
125 /* XXX: make it faster ? */
130 if (ind1
> cur_text_section
->data_allocated
)
131 section_realloc(cur_text_section
, ind1
);
132 cur_text_section
->data
[ind
] = c
;
136 void o(unsigned int c
)
158 void gen_le64(int64_t c
)
170 void orex(int ll
, int r
, int r2
, int b
)
172 if ((r
& VT_VALMASK
) >= VT_CONST
)
174 if ((r2
& VT_VALMASK
) >= VT_CONST
)
176 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
177 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
181 /* output a symbol and patch all calls to it */
182 void gsym_addr(int t
, int a
)
186 ptr
= (int *)(cur_text_section
->data
+ t
);
187 n
= *ptr
; /* next value */
198 /* psym is used to put an instruction with a data field which is a
199 reference to a symbol. It is in fact the same as oad ! */
202 static int is64_type(int t
)
204 return ((t
& VT_BTYPE
) == VT_PTR
||
205 (t
& VT_BTYPE
) == VT_FUNC
||
206 (t
& VT_BTYPE
) == VT_LLONG
);
209 static int is_sse_float(int t
) {
212 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
216 /* instruction + 4 bytes data. Return the address of the data */
217 ST_FUNC
int oad(int c
, int s
)
223 if (ind1
> cur_text_section
->data_allocated
)
224 section_realloc(cur_text_section
, ind1
);
225 *(int *)(cur_text_section
->data
+ ind
) = s
;
231 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
234 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
238 /* output constant with relocation if 'r & VT_SYM' is true */
239 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
242 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
246 /* output constant with relocation if 'r & VT_SYM' is true */
247 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
250 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
254 /* output got address with relocation */
255 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
257 #ifndef TCC_TARGET_PE
260 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
261 sr
= cur_text_section
->reloc
;
262 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
265 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
266 cur_text_section
->data
[ind
-3],
267 cur_text_section
->data
[ind
-2],
268 cur_text_section
->data
[ind
-1]
270 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
274 /* we use add c, %xxx for displacement */
276 o(0xc0 + REG_VALUE(r
));
281 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
283 op_reg
= REG_VALUE(op_reg
) << 3;
284 if ((r
& VT_VALMASK
) == VT_CONST
) {
285 /* constant memory reference */
288 gen_gotpcrel(r
, sym
, c
);
290 gen_addrpc32(r
, sym
, c
);
292 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
293 /* currently, we use only ebp as base */
295 /* short reference */
299 oad(0x85 | op_reg
, c
);
301 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
303 g(0x80 | op_reg
| REG_VALUE(r
));
306 g(0x00 | op_reg
| REG_VALUE(r
));
309 g(0x00 | op_reg
| REG_VALUE(r
));
313 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
315 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
317 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
320 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
322 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
325 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
326 orex(1, r
, op_reg
, opcode
);
327 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
331 /* load 'r' from value 'sv' */
332 void load(int r
, SValue
*sv
)
334 int v
, t
, ft
, fc
, fr
;
339 sv
= pe_getimport(sv
, &v2
);
346 #ifndef TCC_TARGET_PE
347 /* we use indirect access via got */
348 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
349 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
350 /* use the result register as a temporal register */
351 int tr
= r
| TREG_MEM
;
353 /* we cannot use float registers as a temporal register */
354 tr
= get_reg(RC_INT
) | TREG_MEM
;
356 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
358 /* load from the temporal register */
366 if (v
== VT_LLOCAL
) {
368 v1
.r
= VT_LOCAL
| VT_LVAL
;
374 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
375 b
= 0x6e0f66, r
= 0; /* movd */
376 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
377 b
= 0x7e0ff3, r
= 0; /* movq */
378 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
379 b
= 0xdb, r
= 5; /* fldt */
380 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
381 b
= 0xbe0f; /* movsbl */
382 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
383 b
= 0xb60f; /* movzbl */
384 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
385 b
= 0xbf0f; /* movswl */
386 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
387 b
= 0xb70f; /* movzwl */
393 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
396 gen_modrm(r
, fr
, sv
->sym
, fc
);
403 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
404 gen_addrpc32(fr
, sv
->sym
, fc
);
406 if (sv
->sym
->type
.t
& VT_STATIC
) {
408 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
409 gen_addrpc32(fr
, sv
->sym
, fc
);
412 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
413 gen_gotpcrel(fr
, sv
->sym
, fc
);
416 } else if (is64_type(ft
)) {
417 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
420 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
423 } else if (v
== VT_LOCAL
) {
424 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
425 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
426 } else if (v
== VT_CMP
) {
428 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
429 orex(0,r
,0, 0x0f); /* setxx %br */
431 o(0xc0 + REG_VALUE(r
));
432 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
435 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
436 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
439 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
441 if (r
== TREG_XMM0
) {
442 assert(v
== TREG_ST0
);
443 /* gen_cvt_ftof(VT_DOUBLE); */
444 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
445 /* movsd -0x10(%rsp),%xmm0 */
448 } else if (r
== TREG_ST0
) {
449 assert(v
== TREG_XMM0
);
450 /* gen_cvt_ftof(VT_LDOUBLE); */
451 /* movsd %xmm0,-0x10(%rsp) */
454 o(0xf02444dd); /* fldl -0x10(%rsp) */
457 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
463 /* store register 'r' in lvalue 'v' */
464 void store(int r
, SValue
*v
)
468 /* store the REX prefix in this variable when PIC is enabled */
473 v
= pe_getimport(v
, &v2
);
478 fr
= v
->r
& VT_VALMASK
;
481 #ifndef TCC_TARGET_PE
482 /* we need to access the variable via got */
483 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
484 /* mov xx(%rip), %r11 */
486 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
487 pic
= is64_type(bt
) ? 0x49 : 0x41;
491 /* XXX: incorrect if float reg to reg */
492 if (bt
== VT_FLOAT
) {
495 o(0x7e0f); /* movd */
497 } else if (bt
== VT_DOUBLE
) {
500 o(0xd60f); /* movq */
502 } else if (bt
== VT_LDOUBLE
) {
503 o(0xc0d9); /* fld %st(0) */
511 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
513 else if (is64_type(bt
))
519 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
524 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
525 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
526 } else if (fr
!= r
) {
527 /* XXX: don't we really come here? */
529 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
532 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
533 gen_modrm(r
, v
->r
, v
->sym
, fc
);
534 } else if (fr
!= r
) {
535 /* XXX: don't we really come here? */
537 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
542 /* 'is_jmp' is '1' if it is a jump */
543 static void gcall_or_jmp(int is_jmp
)
546 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
548 if (vtop
->r
& VT_SYM
) {
549 /* relocation case */
550 greloc(cur_text_section
, vtop
->sym
,
551 ind
+ 1, R_X86_64_PC32
);
553 /* put an empty PC32 relocation */
554 put_elf_reloc(symtab_section
, cur_text_section
,
555 ind
+ 1, R_X86_64_PC32
, 0);
557 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
559 /* otherwise, indirect call */
563 o(0xff); /* call/jmp *r */
564 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
571 static const uint8_t arg_regs
[] = {
572 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
575 static int func_scratch
;
577 /* Generate function call. The function address is pushed first, then
578 all the parameters in call order. This functions pops all the
579 parameters and the function address. */
581 void gen_offs_sp(int b
, int r
, int d
)
583 orex(1,0,r
& 0x100 ? 0 : r
, b
);
585 o(0x2444 | (REG_VALUE(r
) << 3));
588 o(0x2484 | (REG_VALUE(r
) << 3));
593 void gfunc_call(int nb_args
)
595 int size
, align
, r
, args_size
, i
, d
, j
, bt
;
596 int nb_reg_args
, gen_reg
;
598 /* calculate the number of integer/float arguments */
600 for(i
= 0; i
< nb_args
; i
++) {
601 bt
= (vtop
[-i
].type
.t
& VT_BTYPE
);
602 if (bt
!= VT_STRUCT
&& bt
!= VT_LDOUBLE
)
606 args_size
= (nb_reg_args
< REGN
? REGN
: nb_reg_args
) * PTR_SIZE
;
608 /* for struct arguments, we need to call memcpy and the function
609 call breaks register passing arguments we are preparing.
610 So, we process arguments which will be passed by stack first. */
611 for(i
= 0; i
< nb_args
; i
++) {
612 SValue
*sv
= &vtop
[-i
];
613 bt
= (sv
->type
.t
& VT_BTYPE
);
614 if (bt
== VT_STRUCT
) {
615 size
= type_size(&sv
->type
, &align
);
616 /* align to stack align size */
617 size
= (size
+ 15) & ~15;
618 /* generate structure store */
620 gen_offs_sp(0x8d, r
, args_size
);
623 /* generate memcpy call */
624 vset(&sv
->type
, r
| VT_LVAL
, 0);
629 } else if (bt
== VT_LDOUBLE
) {
632 gen_offs_sp(0xdb, 0x107, args_size
);
638 if (func_scratch
< args_size
)
639 func_scratch
= args_size
;
641 for (i
= 0; i
< REGN
; ++i
)
642 save_reg(arg_regs
[i
]);
644 gen_reg
= nb_reg_args
;
645 for(i
= 0; i
< nb_args
; i
++) {
646 bt
= (vtop
->type
.t
& VT_BTYPE
);
647 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
649 } else if (is_sse_float(vtop
->type
.t
)) {
650 gv(RC_FLOAT
); /* only one float register */
653 /* movq %xmm0, j*8(%rsp) */
654 gen_offs_sp(0xd60f66, 0x100, j
*8);
656 /* movaps %xmm0, %xmmN */
660 /* mov %xmm0, %rxx */
663 o(0xc0 + REG_VALUE(d
));
669 gen_offs_sp(0x89, r
, j
*8);
673 gv(reg_classes
[d
] & ~RC_INT
);
678 o(0xc0 + REG_VALUE(d
) + REG_VALUE(r
) * 8);
692 #define FUNC_PROLOG_SIZE 11
694 /* generate function prolog of type 't' */
695 void gfunc_prolog(CType
*func_type
)
697 int addr
, align
, size
, reg_param_index
, bt
;
706 ind
+= FUNC_PROLOG_SIZE
;
707 func_sub_sp_offset
= ind
;
710 sym
= func_type
->ref
;
712 /* if the function returns a structure, then add an
713 implicit pointer parameter */
715 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
716 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
721 /* define parameters */
722 while ((sym
= sym
->next
) != NULL
) {
724 bt
= type
->t
& VT_BTYPE
;
725 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
)
727 if (reg_param_index
< REGN
) {
728 /* save arguments passed by register */
729 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
731 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
736 while (reg_param_index
< REGN
) {
737 if (func_type
->ref
->c
== FUNC_ELLIPSIS
)
738 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
743 sym
= func_type
->ref
;
744 while ((sym
= sym
->next
) != NULL
) {
746 bt
= type
->t
& VT_BTYPE
;
747 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
748 size
= type_size(type
, &align
);
749 size
= (size
+ 15) & -16;
750 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
756 /* generate function epilog */
757 void gfunc_epilog(void)
762 if (func_ret_sub
== 0) {
767 g(func_ret_sub
>> 8);
771 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
772 /* align local size to word & save local variables */
773 v
= (func_scratch
+ -loc
+ 15) & -16;
775 pe_add_unwind_data(ind
, saved_ind
, v
);
778 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
779 oad(0xb8, v
); /* mov stacksize, %eax */
780 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
781 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
782 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
784 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
785 o(0xec8148); /* sub rsp, stacksize */
793 static void gadd_sp(int val
)
795 if (val
== (char)val
) {
799 oad(0xc48148, val
); /* add $xxx, %rsp */
804 static const uint8_t arg_regs
[REGN
] = {
805 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
808 /* Generate function call. The function address is pushed first, then
809 all the parameters in call order. This functions pops all the
810 parameters and the function address. */
811 void gfunc_call(int nb_args
)
813 int size
, align
, r
, args_size
, i
;
817 int sse_reg
, gen_reg
;
819 /* calculate the number of integer/float arguments */
821 for(i
= 0; i
< nb_args
; i
++) {
822 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
823 args_size
+= type_size(&vtop
[-i
].type
, &align
);
824 args_size
= (args_size
+ 7) & ~7;
825 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
827 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
829 if (nb_sse_args
> 8) args_size
+= 8;
832 if (nb_reg_args
> REGN
) args_size
+= 8;
836 /* for struct arguments, we need to call memcpy and the function
837 call breaks register passing arguments we are preparing.
838 So, we process arguments which will be passed by stack first. */
840 gen_reg
= nb_reg_args
;
841 sse_reg
= nb_sse_args
;
843 /* adjust stack to align SSE boundary */
844 if (args_size
&= 15) {
845 /* fetch cpu flag before the following sub will change the value */
846 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
849 args_size
= 16 - args_size
;
851 oad(0xec81, args_size
); /* sub $xxx, %rsp */
854 for(i
= 0; i
< nb_args
; i
++) {
855 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
856 size
= type_size(&vtop
->type
, &align
);
857 /* align to stack align size */
858 size
= (size
+ 7) & ~7;
859 /* allocate the necessary size on stack */
861 oad(0xec81, size
); /* sub $xxx, %rsp */
862 /* generate structure store */
864 orex(1, r
, 0, 0x89); /* mov %rsp, r */
865 o(0xe0 + REG_VALUE(r
));
867 /* following code breaks vtop[1], vtop[2], and vtop[3] */
868 SValue tmp1
= vtop
[1];
869 SValue tmp2
= vtop
[2];
870 SValue tmp3
= vtop
[3];
871 vset(&vtop
->type
, r
| VT_LVAL
, 0);
879 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
882 oad(0xec8148, size
); /* sub $xxx, %rsp */
883 o(0x7cdb); /* fstpt 0(%rsp) */
887 } else if (is_sse_float(vtop
->type
.t
)) {
891 o(0x50); /* push $rax */
892 /* movq %xmm0, (%rsp) */
900 /* XXX: implicit cast ? */
903 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
911 save_regs(0); /* save used temporary registers */
913 /* then, we prepare register passing arguments.
914 Note that we cannot set RDX and RCX in this loop because gv()
915 may break these temporary registers. Let's use R10 and R11
917 gen_reg
= nb_reg_args
;
918 sse_reg
= nb_sse_args
;
919 for(i
= 0; i
< nb_args
; i
++) {
920 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
921 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
922 } else if (is_sse_float(vtop
->type
.t
)) {
925 gv(RC_FLOAT
); /* only one float register */
926 /* movaps %xmm0, %xmmN */
928 o(0xc0 + (sse_reg
<< 3));
933 /* XXX: implicit cast ? */
937 if (j
== 2 || j
== 3)
938 /* j=2: r10, j=3: r11 */
940 orex(1,d
,r
,0x89); /* mov */
941 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
947 /* Copy R10 and R11 into RDX and RCX, respectively */
948 if (nb_reg_args
> 2) {
949 o(0xd2894c); /* mov %r10, %rdx */
950 if (nb_reg_args
> 3) {
951 o(0xd9894c); /* mov %r11, %rcx */
955 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
963 #define FUNC_PROLOG_SIZE 11
965 static void push_arg_reg(int i
) {
967 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
970 /* generate function prolog of type 't' */
971 void gfunc_prolog(CType
*func_type
)
973 int i
, addr
, align
, size
;
974 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
978 sym
= func_type
->ref
;
981 ind
+= FUNC_PROLOG_SIZE
;
982 func_sub_sp_offset
= ind
;
985 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
986 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
987 seen_reg_num
= seen_sse_num
= 0;
988 /* frame pointer and return address */
989 seen_stack_size
= PTR_SIZE
* 2;
990 /* count the number of seen parameters */
991 sym
= func_type
->ref
;
992 while ((sym
= sym
->next
) != NULL
) {
994 if (is_sse_float(type
->t
)) {
995 if (seen_sse_num
< 8) {
998 seen_stack_size
+= 8;
1000 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
1001 size
= type_size(type
, &align
);
1002 size
= (size
+ 7) & ~7;
1003 seen_stack_size
+= size
;
1004 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1005 seen_stack_size
+= LDOUBLE_SIZE
;
1007 if (seen_reg_num
< REGN
) {
1010 seen_stack_size
+= 8;
1016 /* movl $0x????????, -0x10(%rbp) */
1018 gen_le32(seen_reg_num
* 8);
1019 /* movl $0x????????, -0xc(%rbp) */
1021 gen_le32(seen_sse_num
* 16 + 48);
1022 /* movl $0x????????, -0x8(%rbp) */
1024 gen_le32(seen_stack_size
);
1026 /* save all register passing arguments */
1027 for (i
= 0; i
< 8; i
++) {
1029 o(0xd60f66); /* movq */
1030 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1031 /* movq $0, loc+8(%rbp) */
1036 for (i
= 0; i
< REGN
; i
++) {
1037 push_arg_reg(REGN
-1-i
);
1041 sym
= func_type
->ref
;
1043 reg_param_index
= 0;
1044 sse_param_index
= 0;
1046 /* if the function returns a structure, then add an
1047 implicit pointer parameter */
1048 func_vt
= sym
->type
;
1049 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
1050 push_arg_reg(reg_param_index
);
1057 /* define parameters */
1058 while ((sym
= sym
->next
) != NULL
) {
1060 size
= type_size(type
, &align
);
1061 size
= (size
+ 7) & ~7;
1062 if (is_sse_float(type
->t
)) {
1063 if (sse_param_index
< 8) {
1064 /* save arguments passed by register */
1066 o(0xd60f66); /* movq */
1067 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
1075 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
1076 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1080 if (reg_param_index
< REGN
) {
1081 /* save arguments passed by register */
1082 push_arg_reg(reg_param_index
);
1090 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1091 VT_LOCAL
| VT_LVAL
, param_addr
);
1096 /* generate function epilog */
1097 void gfunc_epilog(void)
1101 o(0xc9); /* leave */
1102 if (func_ret_sub
== 0) {
1105 o(0xc2); /* ret n */
1107 g(func_ret_sub
>> 8);
1109 /* align local size to word & save local variables */
1110 v
= (-loc
+ 15) & -16;
1112 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1113 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1114 o(0xec8148); /* sub rsp, stacksize */
1121 /* generate a jump to a label */
1124 return psym(0xe9, t
);
1127 /* generate a jump to a fixed address */
1128 void gjmp_addr(int a
)
1136 oad(0xe9, a
- ind
- 5);
1140 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1141 int gtst(int inv
, int t
)
1145 v
= vtop
->r
& VT_VALMASK
;
1147 /* fast case : can jump directly since flags are set */
1149 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1150 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1151 /* && or || optimization */
1152 if ((v
& 1) == inv
) {
1153 /* insert vtop->c jump list in t */
1156 p
= (int *)(cur_text_section
->data
+ *p
);
1164 if (is_float(vtop
->type
.t
) ||
1165 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1169 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1170 /* constant jmp optimization */
1171 if ((vtop
->c
.i
!= 0) != inv
)
1176 o(0xc0 + REG_VALUE(v
) * 9);
1178 t
= psym(0x85 ^ inv
, t
);
1185 /* generate an integer binary operation */
1186 void gen_opi(int op
)
1191 ll
= is64_type(vtop
[-1].type
.t
);
1192 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1193 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1197 case TOK_ADDC1
: /* add with carry generation */
1200 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1207 /* XXX: generate inc and dec for smaller code ? */
1208 orex(ll
, r
, 0, 0x83);
1209 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1212 orex(ll
, r
, 0, 0x81);
1213 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1216 gv2(RC_INT
, RC_INT
);
1219 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1220 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1223 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1229 case TOK_SUBC1
: /* sub with carry generation */
1232 case TOK_ADDC2
: /* add with carry use */
1235 case TOK_SUBC2
: /* sub with carry use */
1248 gv2(RC_INT
, RC_INT
);
1251 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1252 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1264 opc
= 0xc0 | (opc
<< 3);
1270 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1271 o(opc
| REG_VALUE(r
));
1272 g(vtop
->c
.i
& (ll
? 63 : 31));
1274 /* we generate the shift in ecx */
1275 gv2(RC_INT
, RC_RCX
);
1277 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1278 o(opc
| REG_VALUE(r
));
1291 /* first operand must be in eax */
1292 /* XXX: need better constraint for second operand */
1293 gv2(RC_RAX
, RC_RCX
);
1298 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1299 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1300 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1301 if (op
== '%' || op
== TOK_UMOD
)
1313 void gen_opl(int op
)
1318 /* generate a floating point operation 'v = t1 op t2' instruction. The
1319 two operands are guaranted to have the same floating point type */
1320 /* XXX: need to use ST1 too */
1321 void gen_opf(int op
)
1323 int a
, ft
, fc
, swapped
, r
;
1325 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1327 /* convert constants to memory references */
1328 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1333 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1336 /* must put at least one value in the floating point register */
1337 if ((vtop
[-1].r
& VT_LVAL
) &&
1338 (vtop
[0].r
& VT_LVAL
)) {
1344 /* swap the stack if needed so that t1 is the register and t2 is
1345 the memory reference */
1346 if (vtop
[-1].r
& VT_LVAL
) {
1350 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1351 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1352 /* load on stack second operand */
1353 load(TREG_ST0
, vtop
);
1354 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1355 if (op
== TOK_GE
|| op
== TOK_GT
)
1357 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1360 o(0xc9d9); /* fxch %st(1) */
1361 o(0xe9da); /* fucompp */
1362 o(0xe0df); /* fnstsw %ax */
1364 o(0x45e480); /* and $0x45, %ah */
1365 o(0x40fC80); /* cmp $0x40, %ah */
1366 } else if (op
== TOK_NE
) {
1367 o(0x45e480); /* and $0x45, %ah */
1368 o(0x40f480); /* xor $0x40, %ah */
1370 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1371 o(0x05c4f6); /* test $0x05, %ah */
1374 o(0x45c4f6); /* test $0x45, %ah */
1381 /* no memory reference possible for long double operations */
1382 load(TREG_ST0
, vtop
);
1406 o(0xde); /* fxxxp %st, %st(1) */
1411 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1412 /* if saved lvalue, then we must reload it */
1415 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1417 r
= get_reg(RC_INT
);
1419 v1
.r
= VT_LOCAL
| VT_LVAL
;
1425 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1428 if (op
== TOK_LE
|| op
== TOK_LT
)
1430 if (op
== TOK_LE
|| op
== TOK_GE
) {
1431 op
= 0x93; /* setae */
1433 op
= 0x97; /* seta */
1438 o(0x7e0ff3); /* movq */
1439 gen_modrm(1, r
, vtop
->sym
, fc
);
1441 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1444 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1447 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1450 o(0x2e0f); /* ucomisd */
1451 gen_modrm(0, r
, vtop
->sym
, fc
);
1458 /* no memory reference possible for long double operations */
1459 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1460 load(TREG_XMM0
, vtop
);
1480 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1481 o(0xde); /* fxxxp %st, %st(1) */
1484 /* if saved lvalue, then we must reload it */
1486 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1488 r
= get_reg(RC_INT
);
1490 v1
.r
= VT_LOCAL
| VT_LVAL
;
1496 /* movq %xmm0,%xmm1 */
1499 load(TREG_XMM0
, vtop
);
1500 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1501 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1510 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1517 gen_modrm(0, r
, vtop
->sym
, fc
);
1525 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1526 and 'long long' cases. */
1527 void gen_cvt_itof(int t
)
1529 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1532 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1533 /* signed long long to float/double/long double (unsigned case
1534 is handled generically) */
1535 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1536 o(0x242cdf); /* fildll (%rsp) */
1537 o(0x08c48348); /* add $8, %rsp */
1538 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1539 (VT_INT
| VT_UNSIGNED
)) {
1540 /* unsigned int to float/double/long double */
1541 o(0x6a); /* push $0 */
1543 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1544 o(0x242cdf); /* fildll (%rsp) */
1545 o(0x10c48348); /* add $16, %rsp */
1547 /* int to float/double/long double */
1548 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1549 o(0x2404db); /* fildl (%rsp) */
1550 o(0x08c48348); /* add $8, %rsp */
1554 save_reg(TREG_XMM0
);
1556 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1557 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1558 (VT_INT
| VT_UNSIGNED
) ||
1559 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1563 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1564 vtop
->r
= TREG_XMM0
;
1568 /* convert from one floating point type to another */
1569 void gen_cvt_ftof(int t
)
1577 if (bt
== VT_FLOAT
) {
1579 if (tbt
== VT_DOUBLE
) {
1580 o(0xc0140f); /* unpcklps */
1581 o(0xc05a0f); /* cvtps2pd */
1582 } else if (tbt
== VT_LDOUBLE
) {
1583 /* movss %xmm0,-0x10(%rsp) */
1586 o(0xf02444d9); /* flds -0x10(%rsp) */
1589 } else if (bt
== VT_DOUBLE
) {
1591 if (tbt
== VT_FLOAT
) {
1592 o(0xc0140f66); /* unpcklpd */
1593 o(0xc05a0f66); /* cvtpd2ps */
1594 } else if (tbt
== VT_LDOUBLE
) {
1595 /* movsd %xmm0,-0x10(%rsp) */
1598 o(0xf02444dd); /* fldl -0x10(%rsp) */
1603 if (tbt
== VT_DOUBLE
) {
1604 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1605 /* movsd -0x10(%rsp),%xmm0 */
1608 vtop
->r
= TREG_XMM0
;
1609 } else if (tbt
== VT_FLOAT
) {
1610 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1611 /* movss -0x10(%rsp),%xmm0 */
1614 vtop
->r
= TREG_XMM0
;
1619 /* convert fp to int 't' type */
1620 void gen_cvt_ftoi(int t
)
1622 int ft
, bt
, size
, r
;
1625 if (bt
== VT_LDOUBLE
) {
1626 gen_cvt_ftof(VT_DOUBLE
);
1636 r
= get_reg(RC_INT
);
1637 if (bt
== VT_FLOAT
) {
1639 } else if (bt
== VT_DOUBLE
) {
1644 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1645 o(0xc0 + (REG_VALUE(r
) << 3));
1649 /* computed goto support */
1656 /* end of x86-64 code generator */
1657 /*************************************************************/
1658 #endif /* ! TARGET_DEFS_ONLY */
1659 /******************************************************/