2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* number of available registers */
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 const int reg_classes
[NB_REGS
] = {
64 /* eax */ RC_INT
| RC_RAX
,
65 /* ecx */ RC_INT
| RC_RCX
,
66 /* edx */ RC_INT
| RC_RDX
,
67 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
88 /******************************************************/
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_32
95 #define R_DATA_PTR R_X86_64_64
96 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
97 #define R_COPY R_X86_64_COPY
99 #define ELF_START_ADDR 0x08048000
100 #define ELF_PAGE_SIZE 0x1000
102 /******************************************************/
104 static unsigned long func_sub_sp_offset
;
105 static int func_ret_sub
;
107 /* XXX: make it faster ? */
112 if (ind1
> cur_text_section
->data_allocated
)
113 section_realloc(cur_text_section
, ind1
);
114 cur_text_section
->data
[ind
] = c
;
118 void o(unsigned int c
)
140 void gen_le64(int64_t c
)
152 /* output a symbol and patch all calls to it */
153 void gsym_addr(int t
, int a
)
157 ptr
= (int *)(cur_text_section
->data
+ t
);
158 n
= *ptr
; /* next value */
169 /* psym is used to put an instruction with a data field which is a
170 reference to a symbol. It is in fact the same as oad ! */
173 static int is64_type(int t
)
175 return ((t
& VT_BTYPE
) == VT_PTR
||
176 (t
& VT_BTYPE
) == VT_FUNC
||
177 (t
& VT_BTYPE
) == VT_LLONG
);
180 static int is_sse_float(int t
) {
183 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
186 /* instruction + 4 bytes data. Return the address of the data */
187 static int oad(int c
, int s
)
193 if (ind1
> cur_text_section
->data_allocated
)
194 section_realloc(cur_text_section
, ind1
);
195 *(int *)(cur_text_section
->data
+ ind
) = s
;
201 static void gen_addr32(int r
, Sym
*sym
, int c
)
204 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
208 /* output constant with relocation if 'r & VT_SYM' is true */
209 static void gen_addr64(int r
, Sym
*sym
, int64_t c
)
212 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
216 /* output constant with relocation if 'r & VT_SYM' is true */
217 static void gen_addrpc32(int r
, Sym
*sym
, int c
)
220 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
224 /* output got address with relocation */
225 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
227 #ifndef TCC_TARGET_PE
230 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
231 sr
= cur_text_section
->reloc
;
232 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
235 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
236 cur_text_section
->data
[ind
-3],
237 cur_text_section
->data
[ind
-2],
238 cur_text_section
->data
[ind
-1]
240 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
245 /* we use add c, %xxx for displacement */
246 o(0x48 + REX_BASE(r
));
248 o(0xc0 + REG_VALUE(r
));
253 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
255 op_reg
= REG_VALUE(op_reg
) << 3;
256 if ((r
& VT_VALMASK
) == VT_CONST
) {
257 /* constant memory reference */
260 gen_gotpcrel(r
, sym
, c
);
262 gen_addrpc32(r
, sym
, c
);
264 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
265 /* currently, we use only ebp as base */
267 /* short reference */
271 oad(0x85 | op_reg
, c
);
273 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
275 g(0x80 | op_reg
| REG_VALUE(r
));
278 g(0x00 | op_reg
| REG_VALUE(r
));
281 g(0x00 | op_reg
| (r
& VT_VALMASK
));
285 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
287 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
289 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
292 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
294 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
297 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
298 if ((r
& VT_VALMASK
) != VT_CONST
&&
299 (r
& VT_VALMASK
) != VT_LOCAL
) {
300 rex
|= REX_BASE(VT_VALMASK
& r
);
304 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
305 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
309 /* load 'r' from value 'sv' */
310 void load(int r
, SValue
*sv
)
312 int v
, t
, ft
, fc
, fr
;
316 if (pe_dllimport(r
, sv
, load
))
324 #ifndef TCC_TARGET_PE
325 /* we use indirect access via got */
326 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
327 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
328 /* use the result register as a temporal register */
329 int tr
= r
| TREG_MEM
;
331 /* we cannot use float registers as a temporal register */
332 tr
= get_reg(RC_INT
) | TREG_MEM
;
334 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
336 /* load from the temporal register */
343 if (v
== VT_LLOCAL
) {
345 v1
.r
= VT_LOCAL
| VT_LVAL
;
350 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
351 o(0x6e0f66); /* movd */
353 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
354 o(0x7e0ff3); /* movq */
356 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
359 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
360 o(0xbe0f); /* movsbl */
361 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
362 o(0xb60f); /* movzbl */
363 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
364 o(0xbf0f); /* movswl */
365 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
366 o(0xb70f); /* movzwl */
367 } else if (is64_type(ft
)) {
368 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
373 gen_modrm(r
, fr
, sv
->sym
, fc
);
379 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
380 gen_addrpc32(fr
, sv
->sym
, fc
);
382 if (sv
->sym
->type
.t
& VT_STATIC
) {
384 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
385 gen_addrpc32(fr
, sv
->sym
, fc
);
388 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
389 gen_gotpcrel(r
, sv
->sym
, fc
);
392 } else if (is64_type(ft
)) {
394 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
397 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
400 } else if (v
== VT_LOCAL
) {
401 o(0x48 | REX_BASE(r
));
402 o(0x8d); /* lea xxx(%ebp), r */
403 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
404 } else if (v
== VT_CMP
) {
405 oad(0xb8 + r
, 0); /* mov $0, r */
406 o(0x0f); /* setxx %br */
409 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
411 oad(0xb8 + r
, t
); /* mov $1, r */
412 o(0x05eb); /* jmp after */
414 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
416 if (r
== TREG_XMM0
) {
417 assert(v
== TREG_ST0
);
418 /* gen_cvt_ftof(VT_DOUBLE); */
419 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
420 /* movsd -0x10(%rsp),%xmm0 */
423 } else if (r
== TREG_ST0
) {
424 assert(v
== TREG_XMM0
);
425 /* gen_cvt_ftof(VT_LDOUBLE); */
426 /* movsd %xmm0,-0x10(%rsp) */
429 o(0xf02444dd); /* fldl -0x10(%rsp) */
431 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
433 o(0xc0 + r
+ v
* 8); /* mov v, r */
439 /* store register 'r' in lvalue 'v' */
440 void store(int r
, SValue
*v
)
444 /* store the REX prefix in this variable when PIC is enabled */
448 if (pe_dllimport(r
, v
, store
))
454 fr
= v
->r
& VT_VALMASK
;
457 #ifndef TCC_TARGET_PE
458 /* we need to access the variable via got */
459 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
460 /* mov xx(%rip), %r11 */
462 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
463 pic
= is64_type(bt
) ? 0x49 : 0x41;
467 /* XXX: incorrect if float reg to reg */
468 if (bt
== VT_FLOAT
) {
471 o(0x7e0f); /* movd */
473 } else if (bt
== VT_DOUBLE
) {
476 o(0xd60f); /* movq */
478 } else if (bt
== VT_LDOUBLE
) {
479 o(0xc0d9); /* fld %st(0) */
487 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
489 else if (is64_type(bt
))
495 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
500 if (fr
== VT_CONST
||
503 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
504 } else if (fr
!= r
) {
505 /* XXX: don't we really come here? */
507 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
510 if (fr
== VT_CONST
||
513 gen_modrm(r
, v
->r
, v
->sym
, fc
);
514 } else if (fr
!= r
) {
515 /* XXX: don't we really come here? */
517 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
522 static void gadd_sp(int val
)
524 if (val
== (char)val
) {
528 oad(0xc48148, val
); /* add $xxx, %rsp */
532 /* 'is_jmp' is '1' if it is a jump */
533 static void gcall_or_jmp(int is_jmp
)
536 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
538 if (vtop
->r
& VT_SYM
) {
539 /* relocation case */
540 greloc(cur_text_section
, vtop
->sym
,
541 ind
+ 1, R_X86_64_PC32
);
543 /* put an empty PC32 relocation */
544 put_elf_reloc(symtab_section
, cur_text_section
,
545 ind
+ 1, R_X86_64_PC32
, 0);
547 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
549 /* otherwise, indirect call */
553 o(0xff); /* call/jmp *r */
554 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
560 static const uint8_t arg_regs
[] = {
561 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
565 static const uint8_t arg_regs
[REGN
] = {
566 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
570 /* Generate function call. The function address is pushed first, then
571 all the parameters in call order. This functions pops all the
572 parameters and the function address. */
573 void gfunc_call(int nb_args
)
575 int size
, align
, r
, args_size
, i
;
579 int sse_reg
, gen_reg
;
581 /* calculate the number of integer/float arguments */
583 for(i
= 0; i
< nb_args
; i
++) {
584 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
585 args_size
+= type_size(&vtop
->type
, &align
);
586 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
588 #ifndef TCC_TARGET_PE
589 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
591 if (nb_sse_args
> 8) args_size
+= 8;
595 if (nb_reg_args
> REGN
) args_size
+= 8;
599 /* for struct arguments, we need to call memcpy and the function
600 call breaks register passing arguments we are preparing.
601 So, we process arguments which will be passed by stack first. */
603 gen_reg
= nb_reg_args
;
604 sse_reg
= nb_sse_args
;
607 save_regs(0); /* save used temporary registers */
610 /* adjust stack to align SSE boundary */
611 if (args_size
&= 8) {
612 o(0x50); /* push $rax */
614 for(i
= 0; i
< nb_args
; i
++) {
615 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
616 size
= type_size(&vtop
->type
, &align
);
617 /* align to stack align size */
618 size
= (size
+ 3) & ~3;
619 /* allocate the necessary size on stack */
621 oad(0xec81, size
); /* sub $xxx, %rsp */
622 /* generate structure store */
624 o(0x48 + REX_BASE(r
));
625 o(0x89); /* mov %rsp, r */
628 /* following code breaks vtop[1] */
629 SValue tmp
= vtop
[1];
630 vset(&vtop
->type
, r
| VT_LVAL
, 0);
636 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
639 oad(0xec8148, size
); /* sub $xxx, %rsp */
640 o(0x7cdb); /* fstpt 0(%rsp) */
644 } else if (is_sse_float(vtop
->type
.t
)) {
653 o(0x50); /* push $rax */
654 /* movq %xmm0, (%rsp) */
662 /* XXX: implicit cast ? */
665 o(0x50 + r
); /* push r */
673 /* then, we prepare register passing arguments.
674 Note that we cannot set RDX and RCX in this loop because gv()
675 may break these temporary registers. Let's use R10 and R11
677 gen_reg
= nb_reg_args
;
678 sse_reg
= nb_sse_args
;
679 for(i
= 0; i
< nb_args
; i
++) {
680 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
681 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
682 } else if (is_sse_float(vtop
->type
.t
)) {
687 gv(RC_FLOAT
); /* only one float register */
688 /* movaps %xmm0, %xmmN */
692 o(0xd60f66); /* movq %xmm0, (%rsp) */
693 o(0x2404 + (j
<< 3));
695 o(0x58 + d
); /* pop d */
704 /* XXX: implicit cast ? */
714 o(0xc0 + r
* 8 + d
- 8);
721 gv(RC_FLOAT
); /* only one float register */
722 /* movaps %xmm0, %xmmN */
724 o(0xc0 + (sse_reg
<< 3));
729 /* XXX: implicit cast ? */
734 o(0xc0 + r
* 8 + arg_regs
[j
]);
737 /* j=2: r10, j=3: r11 */
741 /* j=4: r8, j=5: r9 */
742 o(0xc0 + r
* 8 + j
- 4);
751 /* allocate scratch space */
755 save_regs(0); /* save used temporary registers */
757 /* Copy R10 and R11 into RDX and RCX, respectively */
758 if (nb_reg_args
> 2) {
759 o(0xd2894c); /* mov %r10, %rdx */
760 if (nb_reg_args
> 3) {
761 o(0xd9894c); /* mov %r11, %rcx */
765 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
773 #define FUNC_PROLOG_SIZE 11
775 static void push_arg_reg(int i
) {
777 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
780 /* generate function prolog of type 't' */
781 void gfunc_prolog(CType
*func_type
)
783 int i
, addr
, align
, size
;
784 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
790 sym
= func_type
->ref
;
793 ind
+= FUNC_PROLOG_SIZE
;
794 func_sub_sp_offset
= ind
;
796 #ifndef TCC_TARGET_PE
797 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
798 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
799 seen_reg_num
= seen_sse_num
= 0;
800 /* frame pointer and return address */
801 seen_stack_size
= PTR_SIZE
* 2;
802 /* count the number of seen parameters */
803 sym
= func_type
->ref
;
804 while ((sym
= sym
->next
) != NULL
) {
806 if (is_sse_float(type
->t
)) {
807 if (seen_sse_num
< 8) {
810 seen_stack_size
+= 8;
812 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
813 size
= type_size(type
, &align
);
814 size
= (size
+ 3) & ~3;
815 seen_stack_size
+= size
;
816 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
817 seen_stack_size
+= LDOUBLE_SIZE
;
819 if (seen_reg_num
< REGN
) {
822 seen_stack_size
+= 8;
828 /* movl $0x????????, -0x10(%rbp) */
830 gen_le32(seen_reg_num
* 8);
831 /* movl $0x????????, -0xc(%rbp) */
833 gen_le32(seen_sse_num
* 16 + 48);
834 /* movl $0x????????, -0x8(%rbp) */
836 gen_le32(seen_stack_size
);
838 /* save all register passing arguments */
839 for (i
= 0; i
< 8; i
++) {
841 o(0xd60f66); /* movq */
842 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
843 /* movq $0, loc+8(%rbp) */
848 for (i
= 0; i
< REGN
; i
++) {
849 push_arg_reg(REGN
-1-i
);
854 sym
= func_type
->ref
;
859 /* if the function returns a structure, then add an
860 implicit pointer parameter */
862 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
863 push_arg_reg(reg_param_index
);
870 /* define parameters */
871 while ((sym
= sym
->next
) != NULL
) {
873 size
= type_size(type
, &align
);
874 size
= (size
+ 3) & ~3;
875 #ifndef TCC_TARGET_PE
876 if (is_sse_float(type
->t
)) {
877 if (sse_param_index
< 8) {
878 /* save arguments passed by register */
880 o(0xd60f66); /* movq */
881 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
890 if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
891 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
896 if (reg_param_index
< REGN
) {
897 /* save arguments passed by register */
898 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
903 if (reg_param_index
< REGN
) {
904 /* save arguments passed by register */
905 push_arg_reg(reg_param_index
);
914 sym_push(sym
->v
& ~SYM_FIELD
, type
,
915 VT_LOCAL
| VT_LVAL
, param_addr
);
919 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
920 for (i
= reg_param_index
; i
< REGN
; ++i
) {
921 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, addr
);
928 /* generate function epilog */
929 void gfunc_epilog(void)
934 if (func_ret_sub
== 0) {
939 g(func_ret_sub
>> 8);
941 /* align local size to word & save local variables */
942 v
= (-loc
+ 15) & -16;
944 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
947 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
948 oad(0xb8, v
); /* mov stacksize, %eax */
949 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
950 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
951 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
955 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
956 o(0xec8148); /* sub rsp, stacksize */
962 /* generate a jump to a label */
965 return psym(0xe9, t
);
968 /* generate a jump to a fixed address */
969 void gjmp_addr(int a
)
977 oad(0xe9, a
- ind
- 5);
981 /* generate a test. set 'inv' to invert test. Stack entry is popped */
982 int gtst(int inv
, int t
)
986 v
= vtop
->r
& VT_VALMASK
;
988 /* fast case : can jump directly since flags are set */
990 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
991 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
992 /* && or || optimization */
993 if ((v
& 1) == inv
) {
994 /* insert vtop->c jump list in t */
997 p
= (int *)(cur_text_section
->data
+ *p
);
1005 if (is_float(vtop
->type
.t
) ||
1006 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1010 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1011 /* constant jmp optimization */
1012 if ((vtop
->c
.i
!= 0) != inv
)
1019 t
= psym(0x85 ^ inv
, t
);
1026 /* generate an integer binary operation */
1027 void gen_opi(int op
)
1033 case TOK_ADDC1
: /* add with carry generation */
1036 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
1037 !is64_type(vtop
->type
.t
)) {
1041 if (is64_type(vtop
->type
.t
)) {
1042 o(0x48 | REX_BASE(r
));
1047 /* XXX: generate inc and dec for smaller code ? */
1049 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1053 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1056 gv2(RC_INT
, RC_INT
);
1060 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1061 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1062 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
1064 o((opc
<< 3) | 0x01);
1065 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1068 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1074 case TOK_SUBC1
: /* sub with carry generation */
1077 case TOK_ADDC2
: /* add with carry use */
1080 case TOK_SUBC2
: /* sub with carry use */
1093 gv2(RC_INT
, RC_INT
);
1096 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1097 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1098 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
1101 o(0xaf0f); /* imul fr, r */
1102 o(0xc0 + fr
+ r
* 8);
1113 opc
= 0xc0 | (opc
<< 3);
1114 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1118 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1119 o(0x48 | REX_BASE(r
));
1126 o(0xc1); /* shl/shr/sar $xxx, r */
1130 /* we generate the shift in ecx */
1131 gv2(RC_INT
, RC_RCX
);
1133 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
1134 o(0x48 | REX_BASE(r
));
1136 o(0xd3); /* shl/shr/sar %cl, r */
1147 /* first operand must be in eax */
1148 /* XXX: need better constraint for second operand */
1149 gv2(RC_RAX
, RC_RCX
);
1154 if (op
== TOK_UMULL
) {
1155 o(0xf7); /* mul fr */
1157 vtop
->r2
= TREG_RDX
;
1160 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
1161 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1162 o(0xd23148); /* xor %rdx, %rdx */
1163 o(0x48 + REX_BASE(fr
));
1165 o(0xd231); /* xor %edx, %edx */
1167 o(0xf7); /* div fr, %eax */
1170 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1171 o(0x9948); /* cqto */
1172 o(0x48 + REX_BASE(fr
));
1176 o(0xf7); /* idiv fr, %eax */
1179 if (op
== '%' || op
== TOK_UMOD
)
1192 void gen_opl(int op
)
1197 /* generate a floating point operation 'v = t1 op t2' instruction. The
1198 two operands are guaranted to have the same floating point type */
1199 /* XXX: need to use ST1 too */
1200 void gen_opf(int op
)
1202 int a
, ft
, fc
, swapped
, r
;
1204 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1206 /* convert constants to memory references */
1207 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1212 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1215 /* must put at least one value in the floating point register */
1216 if ((vtop
[-1].r
& VT_LVAL
) &&
1217 (vtop
[0].r
& VT_LVAL
)) {
1223 /* swap the stack if needed so that t1 is the register and t2 is
1224 the memory reference */
1225 if (vtop
[-1].r
& VT_LVAL
) {
1229 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1230 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1231 /* load on stack second operand */
1232 load(TREG_ST0
, vtop
);
1233 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1234 if (op
== TOK_GE
|| op
== TOK_GT
)
1236 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1239 o(0xc9d9); /* fxch %st(1) */
1240 o(0xe9da); /* fucompp */
1241 o(0xe0df); /* fnstsw %ax */
1243 o(0x45e480); /* and $0x45, %ah */
1244 o(0x40fC80); /* cmp $0x40, %ah */
1245 } else if (op
== TOK_NE
) {
1246 o(0x45e480); /* and $0x45, %ah */
1247 o(0x40f480); /* xor $0x40, %ah */
1249 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1250 o(0x05c4f6); /* test $0x05, %ah */
1253 o(0x45c4f6); /* test $0x45, %ah */
1260 /* no memory reference possible for long double operations */
1261 load(TREG_ST0
, vtop
);
1285 o(0xde); /* fxxxp %st, %st(1) */
1290 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1291 /* if saved lvalue, then we must reload it */
1294 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1296 r
= get_reg(RC_INT
);
1298 v1
.r
= VT_LOCAL
| VT_LVAL
;
1304 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1307 if (op
== TOK_LE
|| op
== TOK_LT
)
1309 if (op
== TOK_LE
|| op
== TOK_GE
) {
1310 op
= 0x93; /* setae */
1312 op
= 0x97; /* seta */
1317 o(0x7e0ff3); /* movq */
1318 gen_modrm(1, r
, vtop
->sym
, fc
);
1320 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1323 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1326 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1329 o(0x2e0f); /* ucomisd */
1330 gen_modrm(0, r
, vtop
->sym
, fc
);
1337 /* no memory reference possible for long double operations */
1338 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1339 load(TREG_XMM0
, vtop
);
1359 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1360 o(0xde); /* fxxxp %st, %st(1) */
1363 /* if saved lvalue, then we must reload it */
1365 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1367 r
= get_reg(RC_INT
);
1369 v1
.r
= VT_LOCAL
| VT_LVAL
;
1375 /* movq %xmm0,%xmm1 */
1378 load(TREG_XMM0
, vtop
);
1379 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1380 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1389 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1396 gen_modrm(0, r
, vtop
->sym
, fc
);
1404 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1405 and 'long long' cases. */
1406 void gen_cvt_itof(int t
)
1408 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1411 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1412 /* signed long long to float/double/long double (unsigned case
1413 is handled generically) */
1414 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1415 o(0x242cdf); /* fildll (%rsp) */
1416 o(0x08c48348); /* add $8, %rsp */
1417 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1418 (VT_INT
| VT_UNSIGNED
)) {
1419 /* unsigned int to float/double/long double */
1420 o(0x6a); /* push $0 */
1422 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1423 o(0x242cdf); /* fildll (%rsp) */
1424 o(0x10c48348); /* add $16, %rsp */
1426 /* int to float/double/long double */
1427 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1428 o(0x2404db); /* fildl (%rsp) */
1429 o(0x08c48348); /* add $8, %rsp */
1433 save_reg(TREG_XMM0
);
1435 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1436 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1437 (VT_INT
| VT_UNSIGNED
) ||
1438 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1442 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1443 vtop
->r
= TREG_XMM0
;
1447 /* convert from one floating point type to another */
1448 void gen_cvt_ftof(int t
)
1456 if (bt
== VT_FLOAT
) {
1458 if (tbt
== VT_DOUBLE
) {
1459 o(0xc0140f); /* unpcklps */
1460 o(0xc05a0f); /* cvtps2pd */
1461 } else if (tbt
== VT_LDOUBLE
) {
1462 /* movss %xmm0,-0x10(%rsp) */
1465 o(0xf02444d9); /* flds -0x10(%rsp) */
1468 } else if (bt
== VT_DOUBLE
) {
1470 if (tbt
== VT_FLOAT
) {
1471 o(0xc0140f66); /* unpcklpd */
1472 o(0xc05a0f66); /* cvtpd2ps */
1473 } else if (tbt
== VT_LDOUBLE
) {
1474 /* movsd %xmm0,-0x10(%rsp) */
1477 o(0xf02444dd); /* fldl -0x10(%rsp) */
1482 if (tbt
== VT_DOUBLE
) {
1483 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1484 /* movsd -0x10(%rsp),%xmm0 */
1487 vtop
->r
= TREG_XMM0
;
1488 } else if (tbt
== VT_FLOAT
) {
1489 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1490 /* movss -0x10(%rsp),%xmm0 */
1493 vtop
->r
= TREG_XMM0
;
1498 /* convert fp to int 't' type */
1499 void gen_cvt_ftoi(int t
)
1501 int ft
, bt
, size
, r
;
1504 if (bt
== VT_LDOUBLE
) {
1505 gen_cvt_ftof(VT_DOUBLE
);
1515 r
= get_reg(RC_INT
);
1516 if (bt
== VT_FLOAT
) {
1518 } else if (bt
== VT_DOUBLE
) {
1524 o(0x48 + REX_BASE(r
));
1526 o(0x2c0f); /* cvttss2si or cvttsd2si */
1527 o(0xc0 + (REG_VALUE(r
) << 3));
1531 /* computed goto support */
1538 /* end of x86-64 code generator */
1539 /*************************************************************/