2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
41 #define RC_XMM0 0x0020
42 #define RC_ST0 0x0040 /* only for long double */
43 #define RC_IRET RC_RAX /* function return: integer register */
44 #define RC_LRET RC_RDX /* function return: second integer register */
45 #define RC_FRET RC_XMM0 /* function return: float register */
47 /* pretty names for the registers */
66 #define REX_BASE(reg) (((reg) >> 3) & 1)
67 #define REG_VALUE(reg) ((reg) & 7)
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
86 ST_FUNC
void gen_opl(int op
);
87 ST_FUNC
void gen_le64(int64_t c
);
89 /******************************************************/
92 #define EM_TCC_TARGET EM_X86_64
94 /* relocation type for 32 bit data relocation */
95 #define R_DATA_32 R_X86_64_32
96 #define R_DATA_PTR R_X86_64_64
97 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
98 #define R_COPY R_X86_64_COPY
100 #define ELF_START_ADDR 0x08048000
101 #define ELF_PAGE_SIZE 0x1000
103 /******************************************************/
104 #else /* ! TARGET_DEFS_ONLY */
105 /******************************************************/
109 ST_DATA
const int reg_classes
[] = {
110 /* eax */ RC_INT
| RC_RAX
,
111 /* ecx */ RC_INT
| RC_RCX
,
112 /* edx */ RC_INT
| RC_RDX
,
113 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
124 static unsigned long func_sub_sp_offset
;
125 static int func_ret_sub
;
127 /* XXX: make it faster ? */
132 if (ind1
> cur_text_section
->data_allocated
)
133 section_realloc(cur_text_section
, ind1
);
134 cur_text_section
->data
[ind
] = c
;
138 void o(unsigned int c
)
160 void gen_le64(int64_t c
)
172 void orex(int ll
, int r
, int r2
, int b
)
174 if ((r
& VT_VALMASK
) >= VT_CONST
)
176 if ((r2
& VT_VALMASK
) >= VT_CONST
)
178 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
179 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
183 /* output a symbol and patch all calls to it */
184 void gsym_addr(int t
, int a
)
188 ptr
= (int *)(cur_text_section
->data
+ t
);
189 n
= *ptr
; /* next value */
200 /* psym is used to put an instruction with a data field which is a
201 reference to a symbol. It is in fact the same as oad ! */
204 static int is64_type(int t
)
206 return ((t
& VT_BTYPE
) == VT_PTR
||
207 (t
& VT_BTYPE
) == VT_FUNC
||
208 (t
& VT_BTYPE
) == VT_LLONG
);
211 static int is_sse_float(int t
) {
214 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
218 /* instruction + 4 bytes data. Return the address of the data */
219 ST_FUNC
int oad(int c
, int s
)
225 if (ind1
> cur_text_section
->data_allocated
)
226 section_realloc(cur_text_section
, ind1
);
227 *(int *)(cur_text_section
->data
+ ind
) = s
;
233 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
236 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
240 /* output constant with relocation if 'r & VT_SYM' is true */
241 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
244 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
248 /* output constant with relocation if 'r & VT_SYM' is true */
249 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
252 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
256 /* output got address with relocation */
257 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
259 #ifndef TCC_TARGET_PE
262 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
263 sr
= cur_text_section
->reloc
;
264 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
267 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
268 cur_text_section
->data
[ind
-3],
269 cur_text_section
->data
[ind
-2],
270 cur_text_section
->data
[ind
-1]
272 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
276 /* we use add c, %xxx for displacement */
278 o(0xc0 + REG_VALUE(r
));
283 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
285 op_reg
= REG_VALUE(op_reg
) << 3;
286 if ((r
& VT_VALMASK
) == VT_CONST
) {
287 /* constant memory reference */
290 gen_gotpcrel(r
, sym
, c
);
292 gen_addrpc32(r
, sym
, c
);
294 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
295 /* currently, we use only ebp as base */
297 /* short reference */
301 oad(0x85 | op_reg
, c
);
303 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
305 g(0x80 | op_reg
| REG_VALUE(r
));
308 g(0x00 | op_reg
| REG_VALUE(r
));
311 g(0x00 | op_reg
| REG_VALUE(r
));
315 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
317 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
319 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
322 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
324 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
327 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
328 orex(1, r
, op_reg
, opcode
);
329 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
333 /* load 'r' from value 'sv' */
334 void load(int r
, SValue
*sv
)
336 int v
, t
, ft
, fc
, fr
;
341 sv
= pe_getimport(sv
, &v2
);
348 #ifndef TCC_TARGET_PE
349 /* we use indirect access via got */
350 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
351 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
352 /* use the result register as a temporal register */
353 int tr
= r
| TREG_MEM
;
355 /* we cannot use float registers as a temporal register */
356 tr
= get_reg(RC_INT
) | TREG_MEM
;
358 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
360 /* load from the temporal register */
368 if (v
== VT_LLOCAL
) {
370 v1
.r
= VT_LOCAL
| VT_LVAL
;
373 if (!(reg_classes
[fr
] & RC_INT
))
374 fr
= get_reg(RC_INT
);
378 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
379 b
= 0x6e0f66, r
= 0; /* movd */
380 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
381 b
= 0x7e0ff3, r
= 0; /* movq */
382 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
383 b
= 0xdb, r
= 5; /* fldt */
384 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
385 b
= 0xbe0f; /* movsbl */
386 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
387 b
= 0xb60f; /* movzbl */
388 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
389 b
= 0xbf0f; /* movswl */
390 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
391 b
= 0xb70f; /* movzwl */
397 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
400 gen_modrm(r
, fr
, sv
->sym
, fc
);
407 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
408 gen_addrpc32(fr
, sv
->sym
, fc
);
410 if (sv
->sym
->type
.t
& VT_STATIC
) {
412 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
413 gen_addrpc32(fr
, sv
->sym
, fc
);
416 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
417 gen_gotpcrel(fr
, sv
->sym
, fc
);
420 } else if (is64_type(ft
)) {
421 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
424 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
427 } else if (v
== VT_LOCAL
) {
428 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
429 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
430 } else if (v
== VT_CMP
) {
432 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
433 orex(0,r
,0, 0x0f); /* setxx %br */
435 o(0xc0 + REG_VALUE(r
));
436 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
439 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
440 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
443 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
445 if (r
== TREG_XMM0
) {
446 assert(v
== TREG_ST0
);
447 /* gen_cvt_ftof(VT_DOUBLE); */
448 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
449 /* movsd -0x10(%rsp),%xmm0 */
452 } else if (r
== TREG_ST0
) {
453 assert(v
== TREG_XMM0
);
454 /* gen_cvt_ftof(VT_LDOUBLE); */
455 /* movsd %xmm0,-0x10(%rsp) */
458 o(0xf02444dd); /* fldl -0x10(%rsp) */
461 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
467 /* store register 'r' in lvalue 'v' */
468 void store(int r
, SValue
*v
)
472 /* store the REX prefix in this variable when PIC is enabled */
477 v
= pe_getimport(v
, &v2
);
482 fr
= v
->r
& VT_VALMASK
;
485 #ifndef TCC_TARGET_PE
486 /* we need to access the variable via got */
487 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
488 /* mov xx(%rip), %r11 */
490 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
491 pic
= is64_type(bt
) ? 0x49 : 0x41;
495 /* XXX: incorrect if float reg to reg */
496 if (bt
== VT_FLOAT
) {
499 o(0x7e0f); /* movd */
501 } else if (bt
== VT_DOUBLE
) {
504 o(0xd60f); /* movq */
506 } else if (bt
== VT_LDOUBLE
) {
507 o(0xc0d9); /* fld %st(0) */
515 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
517 else if (is64_type(bt
))
523 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
528 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
529 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
530 } else if (fr
!= r
) {
531 /* XXX: don't we really come here? */
533 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
536 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
537 gen_modrm(r
, v
->r
, v
->sym
, fc
);
538 } else if (fr
!= r
) {
539 /* XXX: don't we really come here? */
541 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
546 /* 'is_jmp' is '1' if it is a jump */
547 static void gcall_or_jmp(int is_jmp
)
550 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
552 if (vtop
->r
& VT_SYM
) {
553 /* relocation case */
554 greloc(cur_text_section
, vtop
->sym
,
555 ind
+ 1, R_X86_64_PC32
);
557 /* put an empty PC32 relocation */
558 put_elf_reloc(symtab_section
, cur_text_section
,
559 ind
+ 1, R_X86_64_PC32
, 0);
561 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
563 /* otherwise, indirect call */
567 o(0xff); /* call/jmp *r */
568 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
575 static const uint8_t arg_regs
[] = {
576 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
579 static int func_scratch
;
581 /* Generate function call. The function address is pushed first, then
582 all the parameters in call order. This functions pops all the
583 parameters and the function address. */
585 void gen_offs_sp(int b
, int r
, int d
)
587 orex(1,0,r
& 0x100 ? 0 : r
, b
);
589 o(0x2444 | (REG_VALUE(r
) << 3));
592 o(0x2484 | (REG_VALUE(r
) << 3));
597 void gfunc_call(int nb_args
)
599 int size
, align
, r
, args_size
, i
, d
, j
, bt
, struct_size
;
600 int nb_reg_args
, gen_reg
;
602 nb_reg_args
= nb_args
;
603 args_size
= (nb_reg_args
< REGN
? REGN
: nb_reg_args
) * PTR_SIZE
;
605 /* for struct arguments, we need to call memcpy and the function
606 call breaks register passing arguments we are preparing.
607 So, we process arguments which will be passed by stack first. */
608 struct_size
= args_size
;
609 for(i
= 0; i
< nb_args
; i
++) {
610 SValue
*sv
= &vtop
[-i
];
611 bt
= (sv
->type
.t
& VT_BTYPE
);
612 if (bt
== VT_STRUCT
) {
613 size
= type_size(&sv
->type
, &align
);
614 /* align to stack align size */
615 size
= (size
+ 15) & ~15;
616 /* generate structure store */
618 gen_offs_sp(0x8d, r
, struct_size
);
621 /* generate memcpy call */
622 vset(&sv
->type
, r
| VT_LVAL
, 0);
627 } else if (bt
== VT_LDOUBLE
) {
630 gen_offs_sp(0xdb, 0x107, struct_size
);
636 if (func_scratch
< struct_size
)
637 func_scratch
= struct_size
;
639 for (i
= 0; i
< REGN
; ++i
)
640 save_reg(arg_regs
[i
]);
643 gen_reg
= nb_reg_args
;
644 struct_size
= args_size
;
646 for(i
= 0; i
< nb_args
; i
++) {
647 bt
= (vtop
->type
.t
& VT_BTYPE
);
649 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
650 if (bt
== VT_LDOUBLE
)
653 size
= type_size(&vtop
->type
, &align
);
654 /* align to stack align size */
655 size
= (size
+ 15) & ~15;
659 gen_offs_sp(0x8d, d
, struct_size
);
660 gen_offs_sp(0x89, d
, j
*8);
663 gen_offs_sp(0x8d, d
, struct_size
);
667 } else if (is_sse_float(vtop
->type
.t
)) {
668 gv(RC_FLOAT
); /* only one float register */
671 /* movq %xmm0, j*8(%rsp) */
672 gen_offs_sp(0xd60f66, 0x100, j
*8);
674 /* movaps %xmm0, %xmmN */
678 /* mov %xmm0, %rxx */
681 o(0xc0 + REG_VALUE(d
));
687 gen_offs_sp(0x89, r
, j
*8);
691 gv(reg_classes
[d
] & ~RC_INT
);
696 o(0xc0 + REG_VALUE(d
) + REG_VALUE(r
) * 8);
710 #define FUNC_PROLOG_SIZE 11
712 /* generate function prolog of type 't' */
713 void gfunc_prolog(CType
*func_type
)
715 int addr
, reg_param_index
, bt
;
724 ind
+= FUNC_PROLOG_SIZE
;
725 func_sub_sp_offset
= ind
;
728 sym
= func_type
->ref
;
730 /* if the function returns a structure, then add an
731 implicit pointer parameter */
733 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
734 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
739 /* define parameters */
740 while ((sym
= sym
->next
) != NULL
) {
742 bt
= type
->t
& VT_BTYPE
;
743 if (reg_param_index
< REGN
) {
744 /* save arguments passed by register */
745 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
747 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
748 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
750 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
756 while (reg_param_index
< REGN
) {
757 if (func_type
->ref
->c
== FUNC_ELLIPSIS
)
758 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
764 /* generate function epilog */
765 void gfunc_epilog(void)
770 if (func_ret_sub
== 0) {
775 g(func_ret_sub
>> 8);
779 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
780 /* align local size to word & save local variables */
781 v
= (func_scratch
+ -loc
+ 15) & -16;
784 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
785 oad(0xb8, v
); /* mov stacksize, %eax */
786 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
787 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
788 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
790 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
791 o(0xec8148); /* sub rsp, stacksize */
795 cur_text_section
->data_offset
= saved_ind
;
796 pe_add_unwind_data(ind
, saved_ind
, v
);
797 ind
= cur_text_section
->data_offset
;
802 static void gadd_sp(int val
)
804 if (val
== (char)val
) {
808 oad(0xc48148, val
); /* add $xxx, %rsp */
813 static const uint8_t arg_regs
[REGN
] = {
814 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
817 /* Generate function call. The function address is pushed first, then
818 all the parameters in call order. This functions pops all the
819 parameters and the function address. */
820 void gfunc_call(int nb_args
)
822 int size
, align
, r
, args_size
, i
;
826 int sse_reg
, gen_reg
;
828 /* calculate the number of integer/float arguments */
830 for(i
= 0; i
< nb_args
; i
++) {
831 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
832 args_size
+= type_size(&vtop
[-i
].type
, &align
);
833 args_size
= (args_size
+ 7) & ~7;
834 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
836 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
838 if (nb_sse_args
> 8) args_size
+= 8;
841 if (nb_reg_args
> REGN
) args_size
+= 8;
845 /* for struct arguments, we need to call memcpy and the function
846 call breaks register passing arguments we are preparing.
847 So, we process arguments which will be passed by stack first. */
849 gen_reg
= nb_reg_args
;
850 sse_reg
= nb_sse_args
;
852 /* adjust stack to align SSE boundary */
853 if (args_size
&= 15) {
854 /* fetch cpu flag before the following sub will change the value */
855 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
858 args_size
= 16 - args_size
;
860 oad(0xec81, args_size
); /* sub $xxx, %rsp */
863 for(i
= 0; i
< nb_args
; i
++) {
864 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
865 size
= type_size(&vtop
->type
, &align
);
866 /* align to stack align size */
867 size
= (size
+ 7) & ~7;
868 /* allocate the necessary size on stack */
870 oad(0xec81, size
); /* sub $xxx, %rsp */
871 /* generate structure store */
873 orex(1, r
, 0, 0x89); /* mov %rsp, r */
874 o(0xe0 + REG_VALUE(r
));
876 /* following code breaks vtop[1], vtop[2], and vtop[3] */
877 SValue tmp1
= vtop
[1];
878 SValue tmp2
= vtop
[2];
879 SValue tmp3
= vtop
[3];
880 vset(&vtop
->type
, r
| VT_LVAL
, 0);
888 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
891 oad(0xec8148, size
); /* sub $xxx, %rsp */
892 o(0x7cdb); /* fstpt 0(%rsp) */
896 } else if (is_sse_float(vtop
->type
.t
)) {
900 o(0x50); /* push $rax */
901 /* movq %xmm0, (%rsp) */
909 /* XXX: implicit cast ? */
912 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
920 save_regs(0); /* save used temporary registers */
922 /* then, we prepare register passing arguments.
923 Note that we cannot set RDX and RCX in this loop because gv()
924 may break these temporary registers. Let's use R10 and R11
926 gen_reg
= nb_reg_args
;
927 sse_reg
= nb_sse_args
;
928 for(i
= 0; i
< nb_args
; i
++) {
929 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
930 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
931 } else if (is_sse_float(vtop
->type
.t
)) {
934 gv(RC_FLOAT
); /* only one float register */
935 /* movaps %xmm0, %xmmN */
937 o(0xc0 + (sse_reg
<< 3));
942 /* XXX: implicit cast ? */
946 if (j
== 2 || j
== 3)
947 /* j=2: r10, j=3: r11 */
949 orex(1,d
,r
,0x89); /* mov */
950 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
956 /* Copy R10 and R11 into RDX and RCX, respectively */
957 if (nb_reg_args
> 2) {
958 o(0xd2894c); /* mov %r10, %rdx */
959 if (nb_reg_args
> 3) {
960 o(0xd9894c); /* mov %r11, %rcx */
964 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
972 #define FUNC_PROLOG_SIZE 11
974 static void push_arg_reg(int i
) {
976 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
979 /* generate function prolog of type 't' */
980 void gfunc_prolog(CType
*func_type
)
982 int i
, addr
, align
, size
;
983 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
987 sym
= func_type
->ref
;
990 ind
+= FUNC_PROLOG_SIZE
;
991 func_sub_sp_offset
= ind
;
994 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
995 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
996 seen_reg_num
= seen_sse_num
= 0;
997 /* frame pointer and return address */
998 seen_stack_size
= PTR_SIZE
* 2;
999 /* count the number of seen parameters */
1000 sym
= func_type
->ref
;
1001 while ((sym
= sym
->next
) != NULL
) {
1003 if (is_sse_float(type
->t
)) {
1004 if (seen_sse_num
< 8) {
1007 seen_stack_size
+= 8;
1009 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
1010 size
= type_size(type
, &align
);
1011 size
= (size
+ 7) & ~7;
1012 seen_stack_size
+= size
;
1013 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1014 seen_stack_size
+= LDOUBLE_SIZE
;
1016 if (seen_reg_num
< REGN
) {
1019 seen_stack_size
+= 8;
1025 /* movl $0x????????, -0x10(%rbp) */
1027 gen_le32(seen_reg_num
* 8);
1028 /* movl $0x????????, -0xc(%rbp) */
1030 gen_le32(seen_sse_num
* 16 + 48);
1031 /* movl $0x????????, -0x8(%rbp) */
1033 gen_le32(seen_stack_size
);
1035 /* save all register passing arguments */
1036 for (i
= 0; i
< 8; i
++) {
1038 o(0xd60f66); /* movq */
1039 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1040 /* movq $0, loc+8(%rbp) */
1045 for (i
= 0; i
< REGN
; i
++) {
1046 push_arg_reg(REGN
-1-i
);
1050 sym
= func_type
->ref
;
1052 reg_param_index
= 0;
1053 sse_param_index
= 0;
1055 /* if the function returns a structure, then add an
1056 implicit pointer parameter */
1057 func_vt
= sym
->type
;
1058 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
1059 push_arg_reg(reg_param_index
);
1066 /* define parameters */
1067 while ((sym
= sym
->next
) != NULL
) {
1069 size
= type_size(type
, &align
);
1070 size
= (size
+ 7) & ~7;
1071 if (is_sse_float(type
->t
)) {
1072 if (sse_param_index
< 8) {
1073 /* save arguments passed by register */
1075 o(0xd60f66); /* movq */
1076 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
1084 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
1085 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1089 if (reg_param_index
< REGN
) {
1090 /* save arguments passed by register */
1091 push_arg_reg(reg_param_index
);
1099 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1100 VT_LOCAL
| VT_LVAL
, param_addr
);
1105 /* generate function epilog */
1106 void gfunc_epilog(void)
1110 o(0xc9); /* leave */
1111 if (func_ret_sub
== 0) {
1114 o(0xc2); /* ret n */
1116 g(func_ret_sub
>> 8);
1118 /* align local size to word & save local variables */
1119 v
= (-loc
+ 15) & -16;
1121 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1122 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1123 o(0xec8148); /* sub rsp, stacksize */
1130 /* generate a jump to a label */
1133 return psym(0xe9, t
);
1136 /* generate a jump to a fixed address */
1137 void gjmp_addr(int a
)
1145 oad(0xe9, a
- ind
- 5);
1149 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1150 int gtst(int inv
, int t
)
1154 v
= vtop
->r
& VT_VALMASK
;
1156 /* fast case : can jump directly since flags are set */
1158 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1159 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1160 /* && or || optimization */
1161 if ((v
& 1) == inv
) {
1162 /* insert vtop->c jump list in t */
1165 p
= (int *)(cur_text_section
->data
+ *p
);
1173 if (is_float(vtop
->type
.t
) ||
1174 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1178 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1179 /* constant jmp optimization */
1180 if ((vtop
->c
.i
!= 0) != inv
)
1185 o(0xc0 + REG_VALUE(v
) * 9);
1187 t
= psym(0x85 ^ inv
, t
);
1194 /* generate an integer binary operation */
1195 void gen_opi(int op
)
1200 ll
= is64_type(vtop
[-1].type
.t
);
1201 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1202 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1206 case TOK_ADDC1
: /* add with carry generation */
1209 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1216 /* XXX: generate inc and dec for smaller code ? */
1217 orex(ll
, r
, 0, 0x83);
1218 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1221 orex(ll
, r
, 0, 0x81);
1222 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1225 gv2(RC_INT
, RC_INT
);
1228 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1229 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1232 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1238 case TOK_SUBC1
: /* sub with carry generation */
1241 case TOK_ADDC2
: /* add with carry use */
1244 case TOK_SUBC2
: /* sub with carry use */
1257 gv2(RC_INT
, RC_INT
);
1260 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1261 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1273 opc
= 0xc0 | (opc
<< 3);
1279 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1280 o(opc
| REG_VALUE(r
));
1281 g(vtop
->c
.i
& (ll
? 63 : 31));
1283 /* we generate the shift in ecx */
1284 gv2(RC_INT
, RC_RCX
);
1286 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1287 o(opc
| REG_VALUE(r
));
1300 /* first operand must be in eax */
1301 /* XXX: need better constraint for second operand */
1302 gv2(RC_RAX
, RC_RCX
);
1307 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1308 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1309 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1310 if (op
== '%' || op
== TOK_UMOD
)
1322 void gen_opl(int op
)
1327 /* generate a floating point operation 'v = t1 op t2' instruction. The
1328 two operands are guaranted to have the same floating point type */
1329 /* XXX: need to use ST1 too */
1330 void gen_opf(int op
)
1332 int a
, ft
, fc
, swapped
, r
;
1334 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1336 /* convert constants to memory references */
1337 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1342 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1345 /* must put at least one value in the floating point register */
1346 if ((vtop
[-1].r
& VT_LVAL
) &&
1347 (vtop
[0].r
& VT_LVAL
)) {
1353 /* swap the stack if needed so that t1 is the register and t2 is
1354 the memory reference */
1355 if (vtop
[-1].r
& VT_LVAL
) {
1359 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1360 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1361 /* load on stack second operand */
1362 load(TREG_ST0
, vtop
);
1363 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1364 if (op
== TOK_GE
|| op
== TOK_GT
)
1366 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1369 o(0xc9d9); /* fxch %st(1) */
1370 o(0xe9da); /* fucompp */
1371 o(0xe0df); /* fnstsw %ax */
1373 o(0x45e480); /* and $0x45, %ah */
1374 o(0x40fC80); /* cmp $0x40, %ah */
1375 } else if (op
== TOK_NE
) {
1376 o(0x45e480); /* and $0x45, %ah */
1377 o(0x40f480); /* xor $0x40, %ah */
1379 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1380 o(0x05c4f6); /* test $0x05, %ah */
1383 o(0x45c4f6); /* test $0x45, %ah */
1390 /* no memory reference possible for long double operations */
1391 load(TREG_ST0
, vtop
);
1415 o(0xde); /* fxxxp %st, %st(1) */
1420 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1421 /* if saved lvalue, then we must reload it */
1424 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1426 r
= get_reg(RC_INT
);
1428 v1
.r
= VT_LOCAL
| VT_LVAL
;
1434 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1437 if (op
== TOK_LE
|| op
== TOK_LT
)
1439 if (op
== TOK_LE
|| op
== TOK_GE
) {
1440 op
= 0x93; /* setae */
1442 op
= 0x97; /* seta */
1447 o(0x7e0ff3); /* movq */
1448 gen_modrm(1, r
, vtop
->sym
, fc
);
1450 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1453 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1456 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1459 o(0x2e0f); /* ucomisd */
1460 gen_modrm(0, r
, vtop
->sym
, fc
);
1467 /* no memory reference possible for long double operations */
1468 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1469 load(TREG_XMM0
, vtop
);
1489 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1490 o(0xde); /* fxxxp %st, %st(1) */
1493 /* if saved lvalue, then we must reload it */
1495 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1497 r
= get_reg(RC_INT
);
1499 v1
.r
= VT_LOCAL
| VT_LVAL
;
1505 /* movq %xmm0,%xmm1 */
1508 load(TREG_XMM0
, vtop
);
1509 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1510 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1519 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1526 gen_modrm(0, r
, vtop
->sym
, fc
);
1534 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1535 and 'long long' cases. */
1536 void gen_cvt_itof(int t
)
1538 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1541 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1542 /* signed long long to float/double/long double (unsigned case
1543 is handled generically) */
1544 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1545 o(0x242cdf); /* fildll (%rsp) */
1546 o(0x08c48348); /* add $8, %rsp */
1547 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1548 (VT_INT
| VT_UNSIGNED
)) {
1549 /* unsigned int to float/double/long double */
1550 o(0x6a); /* push $0 */
1552 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1553 o(0x242cdf); /* fildll (%rsp) */
1554 o(0x10c48348); /* add $16, %rsp */
1556 /* int to float/double/long double */
1557 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1558 o(0x2404db); /* fildl (%rsp) */
1559 o(0x08c48348); /* add $8, %rsp */
1563 save_reg(TREG_XMM0
);
1565 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1566 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1567 (VT_INT
| VT_UNSIGNED
) ||
1568 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1572 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1573 vtop
->r
= TREG_XMM0
;
1577 /* convert from one floating point type to another */
1578 void gen_cvt_ftof(int t
)
1586 if (bt
== VT_FLOAT
) {
1588 if (tbt
== VT_DOUBLE
) {
1589 o(0xc0140f); /* unpcklps */
1590 o(0xc05a0f); /* cvtps2pd */
1591 } else if (tbt
== VT_LDOUBLE
) {
1592 /* movss %xmm0,-0x10(%rsp) */
1595 o(0xf02444d9); /* flds -0x10(%rsp) */
1598 } else if (bt
== VT_DOUBLE
) {
1600 if (tbt
== VT_FLOAT
) {
1601 o(0xc0140f66); /* unpcklpd */
1602 o(0xc05a0f66); /* cvtpd2ps */
1603 } else if (tbt
== VT_LDOUBLE
) {
1604 /* movsd %xmm0,-0x10(%rsp) */
1607 o(0xf02444dd); /* fldl -0x10(%rsp) */
1612 if (tbt
== VT_DOUBLE
) {
1613 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1614 /* movsd -0x10(%rsp),%xmm0 */
1617 vtop
->r
= TREG_XMM0
;
1618 } else if (tbt
== VT_FLOAT
) {
1619 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1620 /* movss -0x10(%rsp),%xmm0 */
1623 vtop
->r
= TREG_XMM0
;
1628 /* convert fp to int 't' type */
1629 void gen_cvt_ftoi(int t
)
1631 int ft
, bt
, size
, r
;
1634 if (bt
== VT_LDOUBLE
) {
1635 gen_cvt_ftof(VT_DOUBLE
);
1645 r
= get_reg(RC_INT
);
1646 if (bt
== VT_FLOAT
) {
1648 } else if (bt
== VT_DOUBLE
) {
1653 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1654 o(0xc0 + (REG_VALUE(r
) << 3));
1658 /* computed goto support */
1665 /* end of x86-64 code generator */
1666 /*************************************************************/
1667 #endif /* ! TARGET_DEFS_ONLY */
1668 /******************************************************/