2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
37 #define RC_XMM0 0x0020
38 #define RC_ST0 0x0040 /* only for long double */
39 #define RC_IRET RC_RAX /* function return: integer register */
40 #define RC_LRET RC_RDX /* function return: second integer register */
41 #define RC_FRET RC_XMM0 /* function return: float register */
43 /* pretty names for the registers */
61 #define REX_BASE(reg) (((reg) >> 3) & 1)
62 #define REG_VALUE(reg) ((reg) & 7)
64 /* return registers for function */
65 #define REG_IRET TREG_RAX /* single word int return register */
66 #define REG_LRET TREG_RDX /* second word return register (for long long) */
67 #define REG_FRET TREG_XMM0 /* float return register */
69 /* defined if function parameters must be evaluated in reverse order */
70 #define INVERT_FUNC_PARAMS
72 /* pointer size, in bytes */
75 /* long double size and alignment, in bytes */
76 #define LDOUBLE_SIZE 16
77 #define LDOUBLE_ALIGN 8
78 /* maximum alignment (for aligned attribute support) */
81 ST_FUNC
void gen_opl(int op
);
82 ST_FUNC
void gen_le64(int64_t c
);
84 /******************************************************/
87 #define EM_TCC_TARGET EM_X86_64
89 /* relocation type for 32 bit data relocation */
90 #define R_DATA_32 R_X86_64_32
91 #define R_DATA_PTR R_X86_64_64
92 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
93 #define R_COPY R_X86_64_COPY
95 #define ELF_START_ADDR 0x08048000
96 #define ELF_PAGE_SIZE 0x1000
98 /******************************************************/
99 #else /* ! TARGET_DEFS_ONLY */
100 /******************************************************/
104 ST_DATA
const int reg_classes
[NB_REGS
] = {
105 /* eax */ RC_INT
| RC_RAX
,
106 /* ecx */ RC_INT
| RC_RCX
,
107 /* edx */ RC_INT
| RC_RDX
,
108 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
112 static unsigned long func_sub_sp_offset
;
113 static int func_ret_sub
;
115 /* XXX: make it faster ? */
120 if (ind1
> cur_text_section
->data_allocated
)
121 section_realloc(cur_text_section
, ind1
);
122 cur_text_section
->data
[ind
] = c
;
126 void o(unsigned int c
)
148 void gen_le64(int64_t c
)
160 /* output a symbol and patch all calls to it */
161 void gsym_addr(int t
, int a
)
165 ptr
= (int *)(cur_text_section
->data
+ t
);
166 n
= *ptr
; /* next value */
177 /* psym is used to put an instruction with a data field which is a
178 reference to a symbol. It is in fact the same as oad ! */
181 static int is64_type(int t
)
183 return ((t
& VT_BTYPE
) == VT_PTR
||
184 (t
& VT_BTYPE
) == VT_FUNC
||
185 (t
& VT_BTYPE
) == VT_LLONG
);
188 static int is_sse_float(int t
) {
191 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
194 /* instruction + 4 bytes data. Return the address of the data */
195 ST_FUNC
int oad(int c
, int s
)
201 if (ind1
> cur_text_section
->data_allocated
)
202 section_realloc(cur_text_section
, ind1
);
203 *(int *)(cur_text_section
->data
+ ind
) = s
;
209 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
212 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
216 /* output constant with relocation if 'r & VT_SYM' is true */
217 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
220 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
224 /* output constant with relocation if 'r & VT_SYM' is true */
225 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
228 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
232 /* output got address with relocation */
233 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
235 #ifndef TCC_TARGET_PE
238 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
239 sr
= cur_text_section
->reloc
;
240 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
243 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
244 cur_text_section
->data
[ind
-3],
245 cur_text_section
->data
[ind
-2],
246 cur_text_section
->data
[ind
-1]
248 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
253 /* we use add c, %xxx for displacement */
254 o(0x48 + REX_BASE(r
));
256 o(0xc0 + REG_VALUE(r
));
261 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
263 op_reg
= REG_VALUE(op_reg
) << 3;
264 if ((r
& VT_VALMASK
) == VT_CONST
) {
265 /* constant memory reference */
268 gen_gotpcrel(r
, sym
, c
);
270 gen_addrpc32(r
, sym
, c
);
272 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
273 /* currently, we use only ebp as base */
275 /* short reference */
279 oad(0x85 | op_reg
, c
);
281 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
283 g(0x80 | op_reg
| REG_VALUE(r
));
286 g(0x00 | op_reg
| REG_VALUE(r
));
289 g(0x00 | op_reg
| (r
& VT_VALMASK
));
293 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
295 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
297 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
300 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
302 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
305 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
306 if ((r
& VT_VALMASK
) != VT_CONST
&&
307 (r
& VT_VALMASK
) != VT_LOCAL
) {
308 rex
|= REX_BASE(VT_VALMASK
& r
);
312 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
313 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
317 /* load 'r' from value 'sv' */
318 void load(int r
, SValue
*sv
)
320 int v
, t
, ft
, fc
, fr
;
324 if (pe_dllimport(r
, sv
, load
))
332 #ifndef TCC_TARGET_PE
333 /* we use indirect access via got */
334 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
335 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
336 /* use the result register as a temporal register */
337 int tr
= r
| TREG_MEM
;
339 /* we cannot use float registers as a temporal register */
340 tr
= get_reg(RC_INT
) | TREG_MEM
;
342 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
344 /* load from the temporal register */
351 if (v
== VT_LLOCAL
) {
353 v1
.r
= VT_LOCAL
| VT_LVAL
;
358 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
359 o(0x6e0f66); /* movd */
361 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
362 o(0x7e0ff3); /* movq */
364 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
367 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
368 o(0xbe0f); /* movsbl */
369 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
370 o(0xb60f); /* movzbl */
371 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
372 o(0xbf0f); /* movswl */
373 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
374 o(0xb70f); /* movzwl */
375 } else if (is64_type(ft
)) {
376 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
381 gen_modrm(r
, fr
, sv
->sym
, fc
);
387 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
388 gen_addrpc32(fr
, sv
->sym
, fc
);
390 if (sv
->sym
->type
.t
& VT_STATIC
) {
392 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
393 gen_addrpc32(fr
, sv
->sym
, fc
);
396 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
397 gen_gotpcrel(r
, sv
->sym
, fc
);
400 } else if (is64_type(ft
)) {
402 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
405 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
408 } else if (v
== VT_LOCAL
) {
409 o(0x48 | REX_BASE(r
));
410 o(0x8d); /* lea xxx(%ebp), r */
411 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
412 } else if (v
== VT_CMP
) {
413 oad(0xb8 + r
, 0); /* mov $0, r */
414 o(0x0f); /* setxx %br */
417 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
419 oad(0xb8 + r
, t
); /* mov $1, r */
420 o(0x05eb); /* jmp after */
422 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
424 if (r
== TREG_XMM0
) {
425 assert(v
== TREG_ST0
);
426 /* gen_cvt_ftof(VT_DOUBLE); */
427 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
428 /* movsd -0x10(%rsp),%xmm0 */
431 } else if (r
== TREG_ST0
) {
432 assert(v
== TREG_XMM0
);
433 /* gen_cvt_ftof(VT_LDOUBLE); */
434 /* movsd %xmm0,-0x10(%rsp) */
437 o(0xf02444dd); /* fldl -0x10(%rsp) */
439 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
441 o(0xc0 + r
+ v
* 8); /* mov v, r */
447 /* store register 'r' in lvalue 'v' */
448 void store(int r
, SValue
*v
)
452 /* store the REX prefix in this variable when PIC is enabled */
456 if (pe_dllimport(r
, v
, store
))
462 fr
= v
->r
& VT_VALMASK
;
465 #ifndef TCC_TARGET_PE
466 /* we need to access the variable via got */
467 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
468 /* mov xx(%rip), %r11 */
470 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
471 pic
= is64_type(bt
) ? 0x49 : 0x41;
475 /* XXX: incorrect if float reg to reg */
476 if (bt
== VT_FLOAT
) {
479 o(0x7e0f); /* movd */
481 } else if (bt
== VT_DOUBLE
) {
484 o(0xd60f); /* movq */
486 } else if (bt
== VT_LDOUBLE
) {
487 o(0xc0d9); /* fld %st(0) */
495 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
497 else if (is64_type(bt
))
503 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
508 if (fr
== VT_CONST
||
511 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
512 } else if (fr
!= r
) {
513 /* XXX: don't we really come here? */
515 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
518 if (fr
== VT_CONST
||
521 gen_modrm(r
, v
->r
, v
->sym
, fc
);
522 } else if (fr
!= r
) {
523 /* XXX: don't we really come here? */
525 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
530 /* 'is_jmp' is '1' if it is a jump */
531 static void gcall_or_jmp(int is_jmp
)
534 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
536 if (vtop
->r
& VT_SYM
) {
537 /* relocation case */
538 greloc(cur_text_section
, vtop
->sym
,
539 ind
+ 1, R_X86_64_PC32
);
541 /* put an empty PC32 relocation */
542 put_elf_reloc(symtab_section
, cur_text_section
,
543 ind
+ 1, R_X86_64_PC32
, 0);
545 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
547 /* otherwise, indirect call */
551 o(0xff); /* call/jmp *r */
552 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
559 static const uint8_t arg_regs
[] = {
560 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
563 static int func_scratch
;
565 /* Generate function call. The function address is pushed first, then
566 all the parameters in call order. This functions pops all the
567 parameters and the function address. */
569 void gen_offs_sp(int b
, int r
, int d
)
572 o(b
| 0x4000 | (r
<< 11));
575 o(b
| 0x8000 | (r
<< 11));
580 void gfunc_call(int nb_args
)
582 int size
, align
, r
, args_size
, i
, d
, j
, bt
;
583 int nb_reg_args
, gen_reg
;
585 /* calculate the number of integer/float arguments */
587 for(i
= 0; i
< nb_args
; i
++) {
588 bt
= (vtop
[-i
].type
.t
& VT_BTYPE
);
589 if (bt
!= VT_STRUCT
&& bt
!= VT_LDOUBLE
)
593 args_size
= (nb_reg_args
< REGN
? REGN
: nb_reg_args
) * PTR_SIZE
;
594 save_regs(0); /* save used temporary registers */
596 /* for struct arguments, we need to call memcpy and the function
597 call breaks register passing arguments we are preparing.
598 So, we process arguments which will be passed by stack first. */
599 for(i
= 0; i
< nb_args
; i
++) {
600 SValue
*sv
= &vtop
[-i
];
601 bt
= (sv
->type
.t
& VT_BTYPE
);
602 if (bt
== VT_STRUCT
) {
603 size
= type_size(&sv
->type
, &align
);
604 /* align to stack align size */
605 size
= (size
+ 15) & ~16;
606 /* generate structure store */
609 gen_offs_sp(0x24048d, r
, args_size
);
612 /* generate memcpy call */
613 vset(&sv
->type
, r
| VT_LVAL
, 0);
618 } else if (bt
== VT_LDOUBLE
) {
621 gen_offs_sp(0x243cdb, 0, args_size
);
627 if (func_scratch
< args_size
)
628 func_scratch
= args_size
;
630 gen_reg
= nb_reg_args
;
631 for(i
= 0; i
< nb_args
; i
++) {
632 bt
= (vtop
->type
.t
& VT_BTYPE
);
633 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
636 } else if (is_sse_float(vtop
->type
.t
)) {
637 gv(RC_FLOAT
); /* only one float register */
641 /* movq %xmm0, j*8(%rsp) */
642 gen_offs_sp(0x2444d6, 0, j
*8);
645 /* movaps %xmm0, %xmmN */
648 /* mov %xmm0, %rxx */
650 o(0x7e0f48 + (d
>= 8));
658 gen_offs_sp(0x244489, r
, j
*8);
662 o(0x8948 + (d
>= 8));
663 o(0xc0 + r
*8 + (d
& 7));
674 #define FUNC_PROLOG_SIZE 11
676 /* generate function prolog of type 't' */
677 void gfunc_prolog(CType
*func_type
)
679 int addr
, align
, size
, reg_param_index
, bt
;
688 ind
+= FUNC_PROLOG_SIZE
;
689 func_sub_sp_offset
= ind
;
692 sym
= func_type
->ref
;
694 /* if the function returns a structure, then add an
695 implicit pointer parameter */
697 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
698 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
703 /* define parameters */
704 while ((sym
= sym
->next
) != NULL
) {
706 bt
= type
->t
& VT_BTYPE
;
707 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
)
709 if (reg_param_index
< REGN
) {
710 /* save arguments passed by register */
711 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
713 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
718 while (reg_param_index
< REGN
) {
719 if (func_type
->ref
->c
== FUNC_ELLIPSIS
)
720 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
725 sym
= func_type
->ref
;
726 while ((sym
= sym
->next
) != NULL
) {
728 bt
= type
->t
& VT_BTYPE
;
729 if (bt
== VT_STRUCT
|| bt
== VT_LDOUBLE
) {
730 size
= type_size(type
, &align
);
731 size
= (size
+ 15) & -16;
732 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
738 /* generate function epilog */
739 void gfunc_epilog(void)
744 if (func_ret_sub
== 0) {
749 g(func_ret_sub
>> 8);
753 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
754 /* align local size to word & save local variables */
755 v
= (func_scratch
+ -loc
+ 15) & -16;
757 pe_add_unwind_data(ind
, saved_ind
, v
);
760 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
761 oad(0xb8, v
); /* mov stacksize, %eax */
762 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
763 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
764 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
766 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
767 o(0xec8148); /* sub rsp, stacksize */
775 static void gadd_sp(int val
)
777 if (val
== (char)val
) {
781 oad(0xc48148, val
); /* add $xxx, %rsp */
786 static const uint8_t arg_regs
[REGN
] = {
787 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
790 /* Generate function call. The function address is pushed first, then
791 all the parameters in call order. This functions pops all the
792 parameters and the function address. */
793 void gfunc_call(int nb_args
)
795 int size
, align
, r
, args_size
, i
;
799 int sse_reg
, gen_reg
;
801 /* calculate the number of integer/float arguments */
803 for(i
= 0; i
< nb_args
; i
++) {
804 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
805 args_size
+= type_size(&vtop
->type
, &align
);
806 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
808 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
810 if (nb_sse_args
> 8) args_size
+= 8;
813 if (nb_reg_args
> REGN
) args_size
+= 8;
817 /* for struct arguments, we need to call memcpy and the function
818 call breaks register passing arguments we are preparing.
819 So, we process arguments which will be passed by stack first. */
821 gen_reg
= nb_reg_args
;
822 sse_reg
= nb_sse_args
;
824 /* adjust stack to align SSE boundary */
825 if (args_size
&= 8) {
826 o(0x50); /* push $rax */
828 for(i
= 0; i
< nb_args
; i
++) {
829 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
830 size
= type_size(&vtop
->type
, &align
);
831 /* align to stack align size */
832 size
= (size
+ 3) & ~3;
833 /* allocate the necessary size on stack */
835 oad(0xec81, size
); /* sub $xxx, %rsp */
836 /* generate structure store */
838 o(0x48 + REX_BASE(r
));
839 o(0x89); /* mov %rsp, r */
842 /* following code breaks vtop[1] */
843 SValue tmp
= vtop
[1];
844 vset(&vtop
->type
, r
| VT_LVAL
, 0);
850 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
853 oad(0xec8148, size
); /* sub $xxx, %rsp */
854 o(0x7cdb); /* fstpt 0(%rsp) */
858 } else if (is_sse_float(vtop
->type
.t
)) {
862 o(0x50); /* push $rax */
863 /* movq %xmm0, (%rsp) */
871 /* XXX: implicit cast ? */
874 o(0x50 + r
); /* push r */
882 /* then, we prepare register passing arguments.
883 Note that we cannot set RDX and RCX in this loop because gv()
884 may break these temporary registers. Let's use R10 and R11
886 gen_reg
= nb_reg_args
;
887 sse_reg
= nb_sse_args
;
888 for(i
= 0; i
< nb_args
; i
++) {
889 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
890 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
891 } else if (is_sse_float(vtop
->type
.t
)) {
894 gv(RC_FLOAT
); /* only one float register */
895 /* movaps %xmm0, %xmmN */
897 o(0xc0 + (sse_reg
<< 3));
902 /* XXX: implicit cast ? */
907 o(0xc0 + r
* 8 + arg_regs
[j
]);
910 /* j=2: r10, j=3: r11 */
914 /* j=4: r8, j=5: r9 */
915 o(0xc0 + r
* 8 + j
- 4);
922 save_regs(0); /* save used temporary registers */
924 /* Copy R10 and R11 into RDX and RCX, respectively */
925 if (nb_reg_args
> 2) {
926 o(0xd2894c); /* mov %r10, %rdx */
927 if (nb_reg_args
> 3) {
928 o(0xd9894c); /* mov %r11, %rcx */
932 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
940 #define FUNC_PROLOG_SIZE 11
942 static void push_arg_reg(int i
) {
944 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
947 /* generate function prolog of type 't' */
948 void gfunc_prolog(CType
*func_type
)
950 int i
, addr
, align
, size
;
951 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
955 sym
= func_type
->ref
;
958 ind
+= FUNC_PROLOG_SIZE
;
959 func_sub_sp_offset
= ind
;
962 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
963 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
964 seen_reg_num
= seen_sse_num
= 0;
965 /* frame pointer and return address */
966 seen_stack_size
= PTR_SIZE
* 2;
967 /* count the number of seen parameters */
968 sym
= func_type
->ref
;
969 while ((sym
= sym
->next
) != NULL
) {
971 if (is_sse_float(type
->t
)) {
972 if (seen_sse_num
< 8) {
975 seen_stack_size
+= 8;
977 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
978 size
= type_size(type
, &align
);
979 size
= (size
+ 3) & ~3;
980 seen_stack_size
+= size
;
981 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
982 seen_stack_size
+= LDOUBLE_SIZE
;
984 if (seen_reg_num
< REGN
) {
987 seen_stack_size
+= 8;
993 /* movl $0x????????, -0x10(%rbp) */
995 gen_le32(seen_reg_num
* 8);
996 /* movl $0x????????, -0xc(%rbp) */
998 gen_le32(seen_sse_num
* 16 + 48);
999 /* movl $0x????????, -0x8(%rbp) */
1001 gen_le32(seen_stack_size
);
1003 /* save all register passing arguments */
1004 for (i
= 0; i
< 8; i
++) {
1006 o(0xd60f66); /* movq */
1007 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1008 /* movq $0, loc+8(%rbp) */
1013 for (i
= 0; i
< REGN
; i
++) {
1014 push_arg_reg(REGN
-1-i
);
1018 sym
= func_type
->ref
;
1020 reg_param_index
= 0;
1021 sse_param_index
= 0;
1023 /* if the function returns a structure, then add an
1024 implicit pointer parameter */
1025 func_vt
= sym
->type
;
1026 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
1027 push_arg_reg(reg_param_index
);
1034 /* define parameters */
1035 while ((sym
= sym
->next
) != NULL
) {
1037 size
= type_size(type
, &align
);
1038 size
= (size
+ 3) & ~3;
1039 if (is_sse_float(type
->t
)) {
1040 if (sse_param_index
< 8) {
1041 /* save arguments passed by register */
1043 o(0xd60f66); /* movq */
1044 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
1052 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
1053 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
1057 if (reg_param_index
< REGN
) {
1058 /* save arguments passed by register */
1059 push_arg_reg(reg_param_index
);
1067 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1068 VT_LOCAL
| VT_LVAL
, param_addr
);
1073 /* generate function epilog */
1074 void gfunc_epilog(void)
1078 o(0xc9); /* leave */
1079 if (func_ret_sub
== 0) {
1082 o(0xc2); /* ret n */
1084 g(func_ret_sub
>> 8);
1086 /* align local size to word & save local variables */
1087 v
= (-loc
+ 15) & -16;
1089 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1090 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1091 o(0xec8148); /* sub rsp, stacksize */
1098 /* generate a jump to a label */
1101 return psym(0xe9, t
);
1104 /* generate a jump to a fixed address */
1105 void gjmp_addr(int a
)
1113 oad(0xe9, a
- ind
- 5);
1117 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1118 int gtst(int inv
, int t
)
1122 v
= vtop
->r
& VT_VALMASK
;
1124 /* fast case : can jump directly since flags are set */
1126 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
1127 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1128 /* && or || optimization */
1129 if ((v
& 1) == inv
) {
1130 /* insert vtop->c jump list in t */
1133 p
= (int *)(cur_text_section
->data
+ *p
);
1141 if (is_float(vtop
->type
.t
) ||
1142 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1146 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1147 /* constant jmp optimization */
1148 if ((vtop
->c
.i
!= 0) != inv
)
1155 t
= psym(0x85 ^ inv
, t
);
1162 /* generate an integer binary operation */
1163 void gen_opi(int op
)
1169 case TOK_ADDC1
: /* add with carry generation */
1172 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
1173 !is64_type(vtop
->type
.t
)) {
1177 if (is64_type(vtop
->type
.t
)) {
1178 o(0x48 | REX_BASE(r
));
1183 /* XXX: generate inc and dec for smaller code ? */
1185 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1189 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1192 gv2(RC_INT
, RC_INT
);
1196 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1197 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1198 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
1200 o((opc
<< 3) | 0x01);
1201 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1204 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1210 case TOK_SUBC1
: /* sub with carry generation */
1213 case TOK_ADDC2
: /* add with carry use */
1216 case TOK_SUBC2
: /* sub with carry use */
1229 gv2(RC_INT
, RC_INT
);
1232 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1233 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1234 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
1237 o(0xaf0f); /* imul fr, r */
1238 o(0xc0 + fr
+ r
* 8);
1249 opc
= 0xc0 | (opc
<< 3);
1250 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1254 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1255 o(0x48 | REX_BASE(r
));
1262 o(0xc1); /* shl/shr/sar $xxx, r */
1266 /* we generate the shift in ecx */
1267 gv2(RC_INT
, RC_RCX
);
1269 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
1270 o(0x48 | REX_BASE(r
));
1272 o(0xd3); /* shl/shr/sar %cl, r */
1283 /* first operand must be in eax */
1284 /* XXX: need better constraint for second operand */
1285 gv2(RC_RAX
, RC_RCX
);
1290 if (op
== TOK_UMULL
) {
1291 o(0xf7); /* mul fr */
1293 vtop
->r2
= TREG_RDX
;
1296 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
1297 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1298 o(0xd23148); /* xor %rdx, %rdx */
1299 o(0x48 + REX_BASE(fr
));
1301 o(0xd231); /* xor %edx, %edx */
1303 o(0xf7); /* div fr, %eax */
1306 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1307 o(0x9948); /* cqto */
1308 o(0x48 + REX_BASE(fr
));
1312 o(0xf7); /* idiv fr, %eax */
1315 if (op
== '%' || op
== TOK_UMOD
)
1328 void gen_opl(int op
)
1333 /* generate a floating point operation 'v = t1 op t2' instruction. The
1334 two operands are guaranted to have the same floating point type */
1335 /* XXX: need to use ST1 too */
1336 void gen_opf(int op
)
1338 int a
, ft
, fc
, swapped
, r
;
1340 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1342 /* convert constants to memory references */
1343 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1348 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1351 /* must put at least one value in the floating point register */
1352 if ((vtop
[-1].r
& VT_LVAL
) &&
1353 (vtop
[0].r
& VT_LVAL
)) {
1359 /* swap the stack if needed so that t1 is the register and t2 is
1360 the memory reference */
1361 if (vtop
[-1].r
& VT_LVAL
) {
1365 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1366 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1367 /* load on stack second operand */
1368 load(TREG_ST0
, vtop
);
1369 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1370 if (op
== TOK_GE
|| op
== TOK_GT
)
1372 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1375 o(0xc9d9); /* fxch %st(1) */
1376 o(0xe9da); /* fucompp */
1377 o(0xe0df); /* fnstsw %ax */
1379 o(0x45e480); /* and $0x45, %ah */
1380 o(0x40fC80); /* cmp $0x40, %ah */
1381 } else if (op
== TOK_NE
) {
1382 o(0x45e480); /* and $0x45, %ah */
1383 o(0x40f480); /* xor $0x40, %ah */
1385 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1386 o(0x05c4f6); /* test $0x05, %ah */
1389 o(0x45c4f6); /* test $0x45, %ah */
1396 /* no memory reference possible for long double operations */
1397 load(TREG_ST0
, vtop
);
1421 o(0xde); /* fxxxp %st, %st(1) */
1426 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1427 /* if saved lvalue, then we must reload it */
1430 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1432 r
= get_reg(RC_INT
);
1434 v1
.r
= VT_LOCAL
| VT_LVAL
;
1440 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1443 if (op
== TOK_LE
|| op
== TOK_LT
)
1445 if (op
== TOK_LE
|| op
== TOK_GE
) {
1446 op
= 0x93; /* setae */
1448 op
= 0x97; /* seta */
1453 o(0x7e0ff3); /* movq */
1454 gen_modrm(1, r
, vtop
->sym
, fc
);
1456 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1459 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1462 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1465 o(0x2e0f); /* ucomisd */
1466 gen_modrm(0, r
, vtop
->sym
, fc
);
1473 /* no memory reference possible for long double operations */
1474 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1475 load(TREG_XMM0
, vtop
);
1495 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1496 o(0xde); /* fxxxp %st, %st(1) */
1499 /* if saved lvalue, then we must reload it */
1501 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1503 r
= get_reg(RC_INT
);
1505 v1
.r
= VT_LOCAL
| VT_LVAL
;
1511 /* movq %xmm0,%xmm1 */
1514 load(TREG_XMM0
, vtop
);
1515 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1516 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1525 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1532 gen_modrm(0, r
, vtop
->sym
, fc
);
1540 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1541 and 'long long' cases. */
1542 void gen_cvt_itof(int t
)
1544 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1547 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1548 /* signed long long to float/double/long double (unsigned case
1549 is handled generically) */
1550 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1551 o(0x242cdf); /* fildll (%rsp) */
1552 o(0x08c48348); /* add $8, %rsp */
1553 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1554 (VT_INT
| VT_UNSIGNED
)) {
1555 /* unsigned int to float/double/long double */
1556 o(0x6a); /* push $0 */
1558 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1559 o(0x242cdf); /* fildll (%rsp) */
1560 o(0x10c48348); /* add $16, %rsp */
1562 /* int to float/double/long double */
1563 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1564 o(0x2404db); /* fildl (%rsp) */
1565 o(0x08c48348); /* add $8, %rsp */
1569 save_reg(TREG_XMM0
);
1571 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1572 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1573 (VT_INT
| VT_UNSIGNED
) ||
1574 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1578 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1579 vtop
->r
= TREG_XMM0
;
1583 /* convert from one floating point type to another */
1584 void gen_cvt_ftof(int t
)
1592 if (bt
== VT_FLOAT
) {
1594 if (tbt
== VT_DOUBLE
) {
1595 o(0xc0140f); /* unpcklps */
1596 o(0xc05a0f); /* cvtps2pd */
1597 } else if (tbt
== VT_LDOUBLE
) {
1598 /* movss %xmm0,-0x10(%rsp) */
1601 o(0xf02444d9); /* flds -0x10(%rsp) */
1604 } else if (bt
== VT_DOUBLE
) {
1606 if (tbt
== VT_FLOAT
) {
1607 o(0xc0140f66); /* unpcklpd */
1608 o(0xc05a0f66); /* cvtpd2ps */
1609 } else if (tbt
== VT_LDOUBLE
) {
1610 /* movsd %xmm0,-0x10(%rsp) */
1613 o(0xf02444dd); /* fldl -0x10(%rsp) */
1618 if (tbt
== VT_DOUBLE
) {
1619 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1620 /* movsd -0x10(%rsp),%xmm0 */
1623 vtop
->r
= TREG_XMM0
;
1624 } else if (tbt
== VT_FLOAT
) {
1625 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1626 /* movss -0x10(%rsp),%xmm0 */
1629 vtop
->r
= TREG_XMM0
;
1634 /* convert fp to int 't' type */
1635 void gen_cvt_ftoi(int t
)
1637 int ft
, bt
, size
, r
;
1640 if (bt
== VT_LDOUBLE
) {
1641 gen_cvt_ftof(VT_DOUBLE
);
1651 r
= get_reg(RC_INT
);
1652 if (bt
== VT_FLOAT
) {
1654 } else if (bt
== VT_DOUBLE
) {
1660 o(0x48 + REX_BASE(r
));
1662 o(0x2c0f); /* cvttss2si or cvttsd2si */
1663 o(0xc0 + (REG_VALUE(r
) << 3));
1667 /* computed goto support */
1674 /* end of x86-64 code generator */
1675 /*************************************************************/
1676 #endif /* ! TARGET_DEFS_ONLY */
1677 /******************************************************/