2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
41 #define RC_XMM0 0x0020
42 #define RC_ST0 0x0040 /* only for long double */
43 #define RC_IRET RC_RAX /* function return: integer register */
44 #define RC_LRET RC_RDX /* function return: second integer register */
45 #define RC_FRET RC_XMM0 /* function return: float register */
47 /* pretty names for the registers */
66 #define REX_BASE(reg) (((reg) >> 3) & 1)
67 #define REG_VALUE(reg) ((reg) & 7)
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
86 ST_FUNC
void gen_opl(int op
);
87 ST_FUNC
void gen_le64(int64_t c
);
89 /******************************************************/
92 #define EM_TCC_TARGET EM_X86_64
94 /* relocation type for 32 bit data relocation */
95 #define R_DATA_32 R_X86_64_32
96 #define R_DATA_PTR R_X86_64_64
97 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
98 #define R_COPY R_X86_64_COPY
100 #define ELF_START_ADDR 0x08048000
101 #define ELF_PAGE_SIZE 0x1000
103 /******************************************************/
104 #else /* ! TARGET_DEFS_ONLY */
105 /******************************************************/
109 ST_DATA
const int reg_classes
[] = {
110 /* eax */ RC_INT
| RC_RAX
,
111 /* ecx */ RC_INT
| RC_RCX
,
112 /* edx */ RC_INT
| RC_RDX
,
113 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
124 static unsigned long func_sub_sp_offset
;
125 static int func_ret_sub
;
127 /* XXX: make it faster ? */
132 if (ind1
> cur_text_section
->data_allocated
)
133 section_realloc(cur_text_section
, ind1
);
134 cur_text_section
->data
[ind
] = c
;
138 void o(unsigned int c
)
160 void gen_le64(int64_t c
)
172 void orex(int ll
, int r
, int r2
, int b
)
174 if ((r
& VT_VALMASK
) >= VT_CONST
)
176 if ((r2
& VT_VALMASK
) >= VT_CONST
)
178 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
179 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
183 /* output a symbol and patch all calls to it */
184 void gsym_addr(int t
, int a
)
188 ptr
= (int *)(cur_text_section
->data
+ t
);
189 n
= *ptr
; /* next value */
200 /* psym is used to put an instruction with a data field which is a
201 reference to a symbol. It is in fact the same as oad ! */
204 static int is64_type(int t
)
206 return ((t
& VT_BTYPE
) == VT_PTR
||
207 (t
& VT_BTYPE
) == VT_FUNC
||
208 (t
& VT_BTYPE
) == VT_LLONG
);
211 static int is_sse_float(int t
) {
214 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
218 /* instruction + 4 bytes data. Return the address of the data */
219 ST_FUNC
int oad(int c
, int s
)
225 if (ind1
> cur_text_section
->data_allocated
)
226 section_realloc(cur_text_section
, ind1
);
227 *(int *)(cur_text_section
->data
+ ind
) = s
;
233 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
236 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
240 /* output constant with relocation if 'r & VT_SYM' is true */
241 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
244 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
248 /* output constant with relocation if 'r & VT_SYM' is true */
249 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
252 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
256 /* output got address with relocation */
257 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
259 #ifndef TCC_TARGET_PE
262 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
263 sr
= cur_text_section
->reloc
;
264 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
267 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
268 cur_text_section
->data
[ind
-3],
269 cur_text_section
->data
[ind
-2],
270 cur_text_section
->data
[ind
-1]
272 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
276 /* we use add c, %xxx for displacement */
278 o(0xc0 + REG_VALUE(r
));
283 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
285 op_reg
= REG_VALUE(op_reg
) << 3;
286 if ((r
& VT_VALMASK
) == VT_CONST
) {
287 /* constant memory reference */
290 gen_gotpcrel(r
, sym
, c
);
292 gen_addrpc32(r
, sym
, c
);
294 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
295 /* currently, we use only ebp as base */
297 /* short reference */
301 oad(0x85 | op_reg
, c
);
303 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
305 g(0x80 | op_reg
| REG_VALUE(r
));
308 g(0x00 | op_reg
| REG_VALUE(r
));
311 g(0x00 | op_reg
| REG_VALUE(r
));
315 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
317 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
319 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
322 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
324 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
327 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
328 orex(1, r
, op_reg
, opcode
);
329 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
333 /* load 'r' from value 'sv' */
334 void load(int r
, SValue
*sv
)
336 int v
, t
, ft
, fc
, fr
;
341 sv
= pe_getimport(sv
, &v2
);
348 #ifndef TCC_TARGET_PE
349 /* we use indirect access via got */
350 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
351 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
352 /* use the result register as a temporal register */
353 int tr
= r
| TREG_MEM
;
355 /* we cannot use float registers as a temporal register */
356 tr
= get_reg(RC_INT
) | TREG_MEM
;
358 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
360 /* load from the temporal register */
368 if (v
== VT_LLOCAL
) {
370 v1
.r
= VT_LOCAL
| VT_LVAL
;
373 if (!(reg_classes
[fr
] & RC_INT
))
374 fr
= get_reg(RC_INT
);
378 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
379 b
= 0x6e0f66, r
= 0; /* movd */
380 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
381 b
= 0x7e0ff3, r
= 0; /* movq */
382 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
383 b
= 0xdb, r
= 5; /* fldt */
384 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
385 b
= 0xbe0f; /* movsbl */
386 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
387 b
= 0xb60f; /* movzbl */
388 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
389 b
= 0xbf0f; /* movswl */
390 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
391 b
= 0xb70f; /* movzwl */
397 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
400 gen_modrm(r
, fr
, sv
->sym
, fc
);
407 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
408 gen_addrpc32(fr
, sv
->sym
, fc
);
410 if (sv
->sym
->type
.t
& VT_STATIC
) {
412 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
413 gen_addrpc32(fr
, sv
->sym
, fc
);
416 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
417 gen_gotpcrel(fr
, sv
->sym
, fc
);
420 } else if (is64_type(ft
)) {
421 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
424 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
427 } else if (v
== VT_LOCAL
) {
428 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
429 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
430 } else if (v
== VT_CMP
) {
432 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
433 orex(0,r
,0, 0x0f); /* setxx %br */
435 o(0xc0 + REG_VALUE(r
));
436 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
439 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
440 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
443 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
445 if (r
== TREG_XMM0
) {
446 assert(v
== TREG_ST0
);
447 /* gen_cvt_ftof(VT_DOUBLE); */
448 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
449 /* movsd -0x10(%rsp),%xmm0 */
452 } else if (r
== TREG_ST0
) {
453 assert(v
== TREG_XMM0
);
454 /* gen_cvt_ftof(VT_LDOUBLE); */
455 /* movsd %xmm0,-0x10(%rsp) */
458 o(0xf02444dd); /* fldl -0x10(%rsp) */
461 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
467 /* store register 'r' in lvalue 'v' */
468 void store(int r
, SValue
*v
)
472 /* store the REX prefix in this variable when PIC is enabled */
477 v
= pe_getimport(v
, &v2
);
482 fr
= v
->r
& VT_VALMASK
;
485 #ifndef TCC_TARGET_PE
486 /* we need to access the variable via got */
487 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
488 /* mov xx(%rip), %r11 */
490 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
491 pic
= is64_type(bt
) ? 0x49 : 0x41;
495 /* XXX: incorrect if float reg to reg */
496 if (bt
== VT_FLOAT
) {
499 o(0x7e0f); /* movd */
501 } else if (bt
== VT_DOUBLE
) {
504 o(0xd60f); /* movq */
506 } else if (bt
== VT_LDOUBLE
) {
507 o(0xc0d9); /* fld %st(0) */
515 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
517 else if (is64_type(bt
))
523 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
528 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
529 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
530 } else if (fr
!= r
) {
531 /* XXX: don't we really come here? */
533 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
536 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
537 gen_modrm(r
, v
->r
, v
->sym
, fc
);
538 } else if (fr
!= r
) {
539 /* XXX: don't we really come here? */
541 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
546 /* 'is_jmp' is '1' if it is a jump */
547 static void gcall_or_jmp(int is_jmp
)
550 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
552 if (vtop
->r
& VT_SYM
) {
553 /* relocation case */
554 greloc(cur_text_section
, vtop
->sym
,
555 ind
+ 1, R_X86_64_PC32
);
557 /* put an empty PC32 relocation */
558 put_elf_reloc(symtab_section
, cur_text_section
,
559 ind
+ 1, R_X86_64_PC32
, 0);
561 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
563 /* otherwise, indirect call */
567 o(0xff); /* call/jmp *r */
568 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
575 static const uint8_t arg_regs
[] = {
576 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
579 static int func_scratch
;
581 /* Generate function call. The function address is pushed first, then
582 all the parameters in call order. This functions pops all the
583 parameters and the function address. */
585 void gen_offs_sp(int b
, int r
, int d
)
587 orex(1,0,r
& 0x100 ? 0 : r
, b
);
589 o(0x2444 | (REG_VALUE(r
) << 3));
592 o(0x2484 | (REG_VALUE(r
) << 3));
597 void gfunc_call(int nb_args
)
599 int size
, align
, r
, args_size
, i
, d
, j
, bt
, struct_size
;
600 int nb_reg_args
, gen_reg
;
602 nb_reg_args
= nb_args
;
603 args_size
= (nb_reg_args
< REGN
? REGN
: nb_reg_args
) * PTR_SIZE
;
605 /* for struct arguments, we need to call memcpy and the function
606 call breaks register passing arguments we are preparing.
607 So, we process arguments which will be passed by stack first. */
608 struct_size
= args_size
;
609 for(i
= 0; i
< nb_args
; i
++) {
610 SValue
*sv
= &vtop
[-i
];
611 bt
= (sv
->type
.t
& VT_BTYPE
);
612 if (bt
== VT_STRUCT
) {
613 size
= type_size(&sv
->type
, &align
);
614 /* align to stack align size */
615 size
= (size
+ 15) & ~15;
616 /* generate structure store */
618 gen_offs_sp(0x8d, r
, struct_size
);
621 /* generate memcpy call */
622 vset(&sv
->type
, r
| VT_LVAL
, 0);
627 } else if (bt
== VT_LDOUBLE
) {
630 gen_offs_sp(0xdb, 0x107, struct_size
);
636 if (func_scratch
< struct_size
)
637 func_scratch
= struct_size
;
639 for (i
= 0; i
< REGN
; ++i
)
640 save_reg(arg_regs
[i
]);
643 gen_reg
= nb_reg_args
;
644 struct_size
= args_size
;
646 for(i
= 0; i
< nb_args
; i
++) {
647 bt
= (vtop
->type
.t
& VT_BTYPE
);
649 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
650 if (bt
== VT_LDOUBLE
)
653 size
= type_size(&vtop
->type
, &align
);
654 /* align to stack align size */
655 size
= (size
+ 15) & ~15;
659 gen_offs_sp(0x8d, d
, struct_size
);
660 gen_offs_sp(0x89, d
, j
*8);
663 gen_offs_sp(0x8d, d
, struct_size
);
667 } else if (is_sse_float(vtop
->type
.t
)) {
668 gv(RC_FLOAT
); /* only one float register */
671 /* movq %xmm0, j*8(%rsp) */
672 gen_offs_sp(0xd60f66, 0x100, j
*8);
674 /* movaps %xmm0, %xmmN */
678 /* mov %xmm0, %rxx */
681 o(0xc0 + REG_VALUE(d
));
687 gen_offs_sp(0x89, r
, j
*8);
691 gv(reg_classes
[d
] & ~RC_INT
);
696 o(0xc0 + REG_VALUE(d
) + REG_VALUE(r
) * 8);
710 #define FUNC_PROLOG_SIZE 11
712 /* generate function prolog of type 't' */
713 void gfunc_prolog(CType
*func_type
)
715 int addr
, reg_param_index
, bt
;
724 ind
+= FUNC_PROLOG_SIZE
;
725 func_sub_sp_offset
= ind
;
728 sym
= func_type
->ref
;
730 /* if the function returns a structure, then add an
731 implicit pointer parameter */
733 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
734 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
739 /* define parameters */
740 while ((sym
= sym
->next
) != NULL
) {
742 bt
= type
->t
& VT_BTYPE
;
743 if (reg_param_index
< REGN
) {
744 /* save arguments passed by register */
745 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
747 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
748 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
| VT_REF
, addr
);
750 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
756 while (reg_param_index
< REGN
) {
757 if (func_type
->ref
->c
== FUNC_ELLIPSIS
)
758 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
764 /* generate function epilog */
765 void gfunc_epilog(void)
770 if (func_ret_sub
== 0) {
775 g(func_ret_sub
>> 8);
779 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
780 /* align local size to word & save local variables */
781 v
= (func_scratch
+ -loc
+ 15) & -16;
784 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
785 oad(0xb8, v
); /* mov stacksize, %eax */
786 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
787 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
788 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
790 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
791 o(0xec8148); /* sub rsp, stacksize */
795 cur_text_section
->data_offset
= saved_ind
;
796 pe_add_unwind_data(ind
, saved_ind
, v
);
797 ind
= cur_text_section
->data_offset
;
802 static void gadd_sp(int val
)
804 if (val
== (char)val
) {
808 oad(0xc48148, val
); /* add $xxx, %rsp */
813 static const uint8_t arg_regs
[REGN
] = {
814 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
817 /* Generate function call. The function address is pushed first, then
818 all the parameters in call order. This functions pops all the
819 parameters and the function address. */
820 void gfunc_call(int nb_args
)
822 int size
, align
, r
, args_size
, i
;
825 int sse_reg
, gen_reg
;
827 /* calculate the number of integer/float arguments */
829 for(i
= 0; i
< nb_args
; i
++) {
830 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
831 args_size
+= type_size(&vtop
[-i
].type
, &align
);
832 args_size
= (args_size
+ 7) & ~7;
833 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
835 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
837 if (nb_sse_args
> 8) args_size
+= 8;
840 if (nb_reg_args
> REGN
) args_size
+= 8;
844 /* for struct arguments, we need to call memcpy and the function
845 call breaks register passing arguments we are preparing.
846 So, we process arguments which will be passed by stack first. */
847 gen_reg
= nb_reg_args
;
848 sse_reg
= nb_sse_args
;
850 /* adjust stack to align SSE boundary */
851 if (args_size
&= 15) {
852 /* fetch cpu flag before the following sub will change the value */
853 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
856 args_size
= 16 - args_size
;
858 oad(0xec81, args_size
); /* sub $xxx, %rsp */
861 for(i
= 0; i
< nb_args
; i
++) {
862 /* Swap argument to top, it will possibly be changed here,
863 and might use more temps. All arguments must remain on the
864 stack, so that get_reg can correctly evict some of them onto
865 stack. We could use also use a vrott(nb_args) at the end
866 of this loop, but this seems faster. */
867 SValue tmp
= vtop
[0];
870 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
871 size
= type_size(&vtop
->type
, &align
);
872 /* align to stack align size */
873 size
= (size
+ 7) & ~7;
874 /* allocate the necessary size on stack */
876 oad(0xec81, size
); /* sub $xxx, %rsp */
877 /* generate structure store */
879 orex(1, r
, 0, 0x89); /* mov %rsp, r */
880 o(0xe0 + REG_VALUE(r
));
881 vset(&vtop
->type
, r
| VT_LVAL
, 0);
885 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
888 oad(0xec8148, size
); /* sub $xxx, %rsp */
889 o(0x7cdb); /* fstpt 0(%rsp) */
893 } else if (is_sse_float(vtop
->type
.t
)) {
897 o(0x50); /* push $rax */
898 /* movq %xmm0, (%rsp) */
906 /* XXX: implicit cast ? */
909 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
914 /* And swap the argument back to it's original position. */
920 /* XXX This should be superfluous. */
921 save_regs(0); /* save used temporary registers */
923 /* then, we prepare register passing arguments.
924 Note that we cannot set RDX and RCX in this loop because gv()
925 may break these temporary registers. Let's use R10 and R11
927 gen_reg
= nb_reg_args
;
928 sse_reg
= nb_sse_args
;
929 for(i
= 0; i
< nb_args
; i
++) {
930 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
931 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
932 } else if (is_sse_float(vtop
->type
.t
)) {
935 gv(RC_FLOAT
); /* only one float register */
936 /* movaps %xmm0, %xmmN */
938 o(0xc0 + (sse_reg
<< 3));
943 /* XXX: implicit cast ? */
947 if (j
== 2 || j
== 3)
948 /* j=2: r10, j=3: r11 */
950 orex(1,d
,r
,0x89); /* mov */
951 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
957 /* We shouldn't have many operands on the stack anymore, but the
958 call address itself is still there, and it might be in %eax
959 (or edx/ecx) currently, which the below writes would clobber.
960 So evict all remaining operands here. */
963 /* Copy R10 and R11 into RDX and RCX, respectively */
964 if (nb_reg_args
> 2) {
965 o(0xd2894c); /* mov %r10, %rdx */
966 if (nb_reg_args
> 3) {
967 o(0xd9894c); /* mov %r11, %rcx */
971 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
979 #define FUNC_PROLOG_SIZE 11
981 static void push_arg_reg(int i
) {
983 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
986 /* generate function prolog of type 't' */
987 void gfunc_prolog(CType
*func_type
)
989 int i
, addr
, align
, size
;
990 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
994 sym
= func_type
->ref
;
997 ind
+= FUNC_PROLOG_SIZE
;
998 func_sub_sp_offset
= ind
;
1001 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
1002 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1003 seen_reg_num
= seen_sse_num
= 0;
1004 /* frame pointer and return address */
1005 seen_stack_size
= PTR_SIZE
* 2;
1006 /* count the number of seen parameters */
1007 sym
= func_type
->ref
;
1008 while ((sym
= sym
->next
) != NULL
) {
1010 if (is_sse_float(type
->t
)) {
1011 if (seen_sse_num
< 8) {
1014 seen_stack_size
+= 8;
1016 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
1017 size
= type_size(type
, &align
);
1018 size
= (size
+ 7) & ~7;
1019 seen_stack_size
+= size
;
1020 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1021 seen_stack_size
+= LDOUBLE_SIZE
;
1023 if (seen_reg_num
< REGN
) {
1026 seen_stack_size
+= 8;
1032 /* movl $0x????????, -0x10(%rbp) */
1034 gen_le32(seen_reg_num
* 8);
1035 /* movl $0x????????, -0xc(%rbp) */
1037 gen_le32(seen_sse_num
* 16 + 48);
1038 /* movl $0x????????, -0x8(%rbp) */
1040 gen_le32(seen_stack_size
);
1042 /* save all register passing arguments */
1043 for (i
= 0; i
< 8; i
++) {
1045 o(0xd60f66); /* movq */
1046 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1047 /* movq $0, loc+8(%rbp) */
1052 for (i
= 0; i
< REGN
; i
++) {
1053 push_arg_reg(REGN
-1-i
);
1057 sym
= func_type
->ref
;
1059 reg_param_index
= 0;
1060 sse_param_index
= 0;
1062 /* if the function returns a structure, then add an
1063 implicit pointer parameter */
1064 func_vt
= sym
->type
;
1065 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
1066 push_arg_reg(reg_param_index
);
1073 /* define parameters */
1074 while ((sym
= sym
->next
) != NULL
) {
1076 size
= type_size(type
, &align
);
1077 size
= (size
+ 7) & ~7;
1078 if (is_sse_float(type
->t
)) {
1079 if (sse_param_index
< 8) {
1080 /* save arguments passed by register */
1082 o(0xd60f66); /* movq */
1083 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
1091 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
1092 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1096 if (reg_param_index
< REGN
) {
1097 /* save arguments passed by register */
1098 push_arg_reg(reg_param_index
);
1106 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1107 VT_LOCAL
| VT_LVAL
, param_addr
);
1112 /* generate function epilog */
1113 void gfunc_epilog(void)
1117 o(0xc9); /* leave */
1118 if (func_ret_sub
== 0) {
1121 o(0xc2); /* ret n */
1123 g(func_ret_sub
>> 8);
1125 /* align local size to word & save local variables */
1126 v
= (-loc
+ 15) & -16;
1128 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1129 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1130 o(0xec8148); /* sub rsp, stacksize */
1137 /* generate a jump to a label */
1140 return psym(0xe9, t
);
1143 /* generate a jump to a fixed address */
1144 void gjmp_addr(int a
)
1152 oad(0xe9, a
- ind
- 5);
1156 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1157 int gtst(int inv
, int t
)
1161 v
= vtop
->r
& VT_VALMASK
;
1163 /* fast case : can jump directly since flags are set */
1165 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1166 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1167 /* && or || optimization */
1168 if ((v
& 1) == inv
) {
1169 /* insert vtop->c jump list in t */
1172 p
= (int *)(cur_text_section
->data
+ *p
);
1180 if (is_float(vtop
->type
.t
) ||
1181 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1185 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1186 /* constant jmp optimization */
1187 if ((vtop
->c
.i
!= 0) != inv
)
1192 o(0xc0 + REG_VALUE(v
) * 9);
1194 t
= psym(0x85 ^ inv
, t
);
1201 /* generate an integer binary operation */
1202 void gen_opi(int op
)
1207 ll
= is64_type(vtop
[-1].type
.t
);
1208 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1209 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1213 case TOK_ADDC1
: /* add with carry generation */
1216 if (cc
&& (!ll
|| (int)vtop
->c
.ll
== vtop
->c
.ll
)) {
1223 /* XXX: generate inc and dec for smaller code ? */
1224 orex(ll
, r
, 0, 0x83);
1225 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1228 orex(ll
, r
, 0, 0x81);
1229 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1232 gv2(RC_INT
, RC_INT
);
1235 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1236 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1239 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1245 case TOK_SUBC1
: /* sub with carry generation */
1248 case TOK_ADDC2
: /* add with carry use */
1251 case TOK_SUBC2
: /* sub with carry use */
1264 gv2(RC_INT
, RC_INT
);
1267 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1268 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1280 opc
= 0xc0 | (opc
<< 3);
1286 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1287 o(opc
| REG_VALUE(r
));
1288 g(vtop
->c
.i
& (ll
? 63 : 31));
1290 /* we generate the shift in ecx */
1291 gv2(RC_INT
, RC_RCX
);
1293 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1294 o(opc
| REG_VALUE(r
));
1307 /* first operand must be in eax */
1308 /* XXX: need better constraint for second operand */
1309 gv2(RC_RAX
, RC_RCX
);
1314 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1315 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1316 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1317 if (op
== '%' || op
== TOK_UMOD
)
1329 void gen_opl(int op
)
1334 /* generate a floating point operation 'v = t1 op t2' instruction. The
1335 two operands are guaranted to have the same floating point type */
1336 /* XXX: need to use ST1 too */
1337 void gen_opf(int op
)
1339 int a
, ft
, fc
, swapped
, r
;
1341 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1343 /* convert constants to memory references */
1344 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1349 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1352 /* must put at least one value in the floating point register */
1353 if ((vtop
[-1].r
& VT_LVAL
) &&
1354 (vtop
[0].r
& VT_LVAL
)) {
1360 /* swap the stack if needed so that t1 is the register and t2 is
1361 the memory reference */
1362 if (vtop
[-1].r
& VT_LVAL
) {
1366 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1367 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1368 /* load on stack second operand */
1369 load(TREG_ST0
, vtop
);
1370 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1371 if (op
== TOK_GE
|| op
== TOK_GT
)
1373 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1376 o(0xc9d9); /* fxch %st(1) */
1377 o(0xe9da); /* fucompp */
1378 o(0xe0df); /* fnstsw %ax */
1380 o(0x45e480); /* and $0x45, %ah */
1381 o(0x40fC80); /* cmp $0x40, %ah */
1382 } else if (op
== TOK_NE
) {
1383 o(0x45e480); /* and $0x45, %ah */
1384 o(0x40f480); /* xor $0x40, %ah */
1386 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1387 o(0x05c4f6); /* test $0x05, %ah */
1390 o(0x45c4f6); /* test $0x45, %ah */
1397 /* no memory reference possible for long double operations */
1398 load(TREG_ST0
, vtop
);
1422 o(0xde); /* fxxxp %st, %st(1) */
1427 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1428 /* if saved lvalue, then we must reload it */
1431 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1433 r
= get_reg(RC_INT
);
1435 v1
.r
= VT_LOCAL
| VT_LVAL
;
1441 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1444 if (op
== TOK_LE
|| op
== TOK_LT
)
1446 if (op
== TOK_LE
|| op
== TOK_GE
) {
1447 op
= 0x93; /* setae */
1449 op
= 0x97; /* seta */
1454 o(0x7e0ff3); /* movq */
1455 gen_modrm(1, r
, vtop
->sym
, fc
);
1457 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1460 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1463 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1466 o(0x2e0f); /* ucomisd */
1467 gen_modrm(0, r
, vtop
->sym
, fc
);
1474 /* no memory reference possible for long double operations */
1475 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1476 load(TREG_XMM0
, vtop
);
1496 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1497 o(0xde); /* fxxxp %st, %st(1) */
1500 /* if saved lvalue, then we must reload it */
1502 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1504 r
= get_reg(RC_INT
);
1506 v1
.r
= VT_LOCAL
| VT_LVAL
;
1512 /* movq %xmm0,%xmm1 */
1515 load(TREG_XMM0
, vtop
);
1516 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1517 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1526 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1533 gen_modrm(0, r
, vtop
->sym
, fc
);
1541 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1542 and 'long long' cases. */
1543 void gen_cvt_itof(int t
)
1545 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1548 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1549 /* signed long long to float/double/long double (unsigned case
1550 is handled generically) */
1551 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1552 o(0x242cdf); /* fildll (%rsp) */
1553 o(0x08c48348); /* add $8, %rsp */
1554 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1555 (VT_INT
| VT_UNSIGNED
)) {
1556 /* unsigned int to float/double/long double */
1557 o(0x6a); /* push $0 */
1559 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1560 o(0x242cdf); /* fildll (%rsp) */
1561 o(0x10c48348); /* add $16, %rsp */
1563 /* int to float/double/long double */
1564 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1565 o(0x2404db); /* fildl (%rsp) */
1566 o(0x08c48348); /* add $8, %rsp */
1570 save_reg(TREG_XMM0
);
1572 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1573 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1574 (VT_INT
| VT_UNSIGNED
) ||
1575 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1579 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1580 vtop
->r
= TREG_XMM0
;
1584 /* convert from one floating point type to another */
1585 void gen_cvt_ftof(int t
)
1593 if (bt
== VT_FLOAT
) {
1595 if (tbt
== VT_DOUBLE
) {
1596 o(0xc0140f); /* unpcklps */
1597 o(0xc05a0f); /* cvtps2pd */
1598 } else if (tbt
== VT_LDOUBLE
) {
1599 /* movss %xmm0,-0x10(%rsp) */
1602 o(0xf02444d9); /* flds -0x10(%rsp) */
1605 } else if (bt
== VT_DOUBLE
) {
1607 if (tbt
== VT_FLOAT
) {
1608 o(0xc0140f66); /* unpcklpd */
1609 o(0xc05a0f66); /* cvtpd2ps */
1610 } else if (tbt
== VT_LDOUBLE
) {
1611 /* movsd %xmm0,-0x10(%rsp) */
1614 o(0xf02444dd); /* fldl -0x10(%rsp) */
1619 if (tbt
== VT_DOUBLE
) {
1620 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1621 /* movsd -0x10(%rsp),%xmm0 */
1624 vtop
->r
= TREG_XMM0
;
1625 } else if (tbt
== VT_FLOAT
) {
1626 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1627 /* movss -0x10(%rsp),%xmm0 */
1630 vtop
->r
= TREG_XMM0
;
1635 /* convert fp to int 't' type */
1636 void gen_cvt_ftoi(int t
)
1638 int ft
, bt
, size
, r
;
1641 if (bt
== VT_LDOUBLE
) {
1642 gen_cvt_ftof(VT_DOUBLE
);
1652 r
= get_reg(RC_INT
);
1653 if (bt
== VT_FLOAT
) {
1655 } else if (bt
== VT_DOUBLE
) {
1660 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1661 o(0xc0 + (REG_VALUE(r
) << 3));
1665 /* computed goto support */
1672 /* end of x86-64 code generator */
1673 /*************************************************************/
1674 #endif /* ! TARGET_DEFS_ONLY */
1675 /******************************************************/