2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* number of available registers */
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 int reg_classes
[NB_REGS
] = {
64 /* eax */ RC_INT
| RC_RAX
,
65 /* ecx */ RC_INT
| RC_RCX
,
66 /* edx */ RC_INT
| RC_RDX
,
67 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
88 /******************************************************/
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_64
95 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
96 #define R_COPY R_X86_64_COPY
98 #define ELF_START_ADDR 0x08048000
99 #define ELF_PAGE_SIZE 0x1000
101 /******************************************************/
103 static unsigned long func_sub_sp_offset
;
104 static int func_ret_sub
;
106 /* XXX: make it faster ? */
111 if (ind1
> cur_text_section
->data_allocated
)
112 section_realloc(cur_text_section
, ind1
);
113 cur_text_section
->data
[ind
] = c
;
117 void o(unsigned int c
)
133 void gen_le64(int64_t c
)
145 /* output a symbol and patch all calls to it */
146 void gsym_addr(int t
, int a
)
150 ptr
= (int *)(cur_text_section
->data
+ t
);
151 n
= *ptr
; /* next value */
162 /* psym is used to put an instruction with a data field which is a
163 reference to a symbol. It is in fact the same as oad ! */
166 static int is64_type(int t
)
168 return ((t
& VT_BTYPE
) == VT_PTR
||
169 (t
& VT_BTYPE
) == VT_FUNC
||
170 (t
& VT_BTYPE
) == VT_LLONG
);
173 static int is_sse_float(int t
) {
176 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
179 /* instruction + 4 bytes data. Return the address of the data */
180 static int oad(int c
, int s
)
186 if (ind1
> cur_text_section
->data_allocated
)
187 section_realloc(cur_text_section
, ind1
);
188 *(int *)(cur_text_section
->data
+ ind
) = s
;
194 /* output constant with relocation if 'r & VT_SYM' is true */
195 static void gen_addr64(int r
, Sym
*sym
, int64_t c
)
198 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
202 /* output constant with relocation if 'r & VT_SYM' is true */
203 static void gen_addrpc32(int r
, Sym
*sym
, int c
)
206 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
210 /* output got address with relocation */
211 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
215 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
216 sr
= cur_text_section
->reloc
;
217 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
222 /* we use add c, %xxx for displacement */
223 o(0x48 + REX_BASE(r
));
225 o(0xc0 + REG_VALUE(r
));
230 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
232 op_reg
= REG_VALUE(op_reg
) << 3;
233 if ((r
& VT_VALMASK
) == VT_CONST
) {
234 /* constant memory reference */
237 gen_gotpcrel(r
, sym
, c
);
239 gen_addrpc32(r
, sym
, c
);
241 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
242 /* currently, we use only ebp as base */
244 /* short reference */
248 oad(0x85 | op_reg
, c
);
250 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
252 g(0x80 | op_reg
| REG_VALUE(r
));
255 g(0x00 | op_reg
| REG_VALUE(r
));
258 g(0x00 | op_reg
| (r
& VT_VALMASK
));
262 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
264 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
266 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
269 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
271 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
274 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
275 if ((r
& VT_VALMASK
) != VT_CONST
&&
276 (r
& VT_VALMASK
) != VT_LOCAL
) {
277 rex
|= REX_BASE(VT_VALMASK
& r
);
281 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
282 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
286 /* load 'r' from value 'sv' */
287 void load(int r
, SValue
*sv
)
289 int v
, t
, ft
, fc
, fr
;
296 /* we use indirect access via got */
297 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
298 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
299 /* use the result register as a temporal register */
300 int tr
= r
| TREG_MEM
;
302 /* we cannot use float registers as a temporal register */
303 tr
= get_reg(RC_INT
) | TREG_MEM
;
305 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
307 /* load from the temporal register */
313 if (v
== VT_LLOCAL
) {
315 v1
.r
= VT_LOCAL
| VT_LVAL
;
320 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
321 o(0x6e0f66); /* movd */
323 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
324 o(0x7e0ff3); /* movq */
326 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
329 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
330 o(0xbe0f); /* movsbl */
331 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
332 o(0xb60f); /* movzbl */
333 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
334 o(0xbf0f); /* movswl */
335 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
336 o(0xb70f); /* movzwl */
337 } else if (is64_type(ft
)) {
338 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
343 gen_modrm(r
, fr
, sv
->sym
, fc
);
346 if ((ft
& VT_BTYPE
) == VT_LLONG
) {
347 assert(!(fr
& VT_SYM
));
349 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
350 gen_addr64(fr
, sv
->sym
, sv
->c
.ull
);
353 if (sv
->sym
->type
.t
& VT_STATIC
) {
355 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
356 gen_addrpc32(fr
, sv
->sym
, fc
);
359 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
360 gen_gotpcrel(r
, sv
->sym
, fc
);
363 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
367 } else if (v
== VT_LOCAL
) {
368 o(0x48 | REX_BASE(r
));
369 o(0x8d); /* lea xxx(%ebp), r */
370 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
371 } else if (v
== VT_CMP
) {
372 oad(0xb8 + r
, 0); /* mov $0, r */
373 o(0x0f); /* setxx %br */
376 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
378 oad(0xb8 + r
, t
); /* mov $1, r */
379 o(0x05eb); /* jmp after */
381 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
383 if (r
== TREG_XMM0
) {
384 assert(v
== TREG_ST0
);
385 /* gen_cvt_ftof(VT_DOUBLE); */
386 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
387 /* movsd -0x10(%rsp),%xmm0 */
390 } else if (r
== TREG_ST0
) {
391 assert(v
== TREG_XMM0
);
392 /* gen_cvt_ftof(VT_LDOUBLE); */
393 /* movsd %xmm0,-0x10(%rsp) */
396 o(0xf02444dd); /* fldl -0x10(%rsp) */
398 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
400 o(0xc0 + r
+ v
* 8); /* mov v, r */
406 /* store register 'r' in lvalue 'v' */
407 void store(int r
, SValue
*v
)
411 /* store the REX prefix in this variable when PIC is enabled */
416 fr
= v
->r
& VT_VALMASK
;
419 /* we need to access the variable via got */
420 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
421 /* mov xx(%rip), %r11 */
423 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
424 pic
= is64_type(bt
) ? 0x49 : 0x41;
427 /* XXX: incorrect if float reg to reg */
428 if (bt
== VT_FLOAT
) {
431 o(0x7e0f); /* movd */
433 } else if (bt
== VT_DOUBLE
) {
436 o(0xd60f); /* movq */
438 } else if (bt
== VT_LDOUBLE
) {
439 o(0xc0d9); /* fld %st(0) */
447 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
449 else if (is64_type(bt
))
455 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
460 if (fr
== VT_CONST
||
463 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
464 } else if (fr
!= r
) {
465 /* XXX: don't we really come here? */
467 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
470 if (fr
== VT_CONST
||
473 gen_modrm(r
, v
->r
, v
->sym
, fc
);
474 } else if (fr
!= r
) {
475 /* XXX: don't we really come here? */
477 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
482 static void gadd_sp(int val
)
484 if (val
== (char)val
) {
488 oad(0xc48148, val
); /* add $xxx, %rsp */
492 /* 'is_jmp' is '1' if it is a jump */
493 static void gcall_or_jmp(int is_jmp
)
496 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
498 if (vtop
->r
& VT_SYM
) {
499 /* relocation case */
500 greloc(cur_text_section
, vtop
->sym
,
501 ind
+ 1, R_X86_64_PC32
);
503 /* put an empty PC32 relocation */
504 put_elf_reloc(symtab_section
, cur_text_section
,
505 ind
+ 1, R_X86_64_PC32
, 0);
507 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
509 /* otherwise, indirect call */
513 o(0xff); /* call/jmp *r */
514 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
518 static uint8_t arg_regs
[6] = {
519 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
521 /* Generate function call. The function address is pushed first, then
522 all the parameters in call order. This functions pops all the
523 parameters and the function address. */
524 void gfunc_call(int nb_args
)
526 int size
, align
, r
, args_size
, i
, func_call
;
531 int sse_reg
, gen_reg
;
533 /* calculate the number of integer/float arguments */
535 for(i
= 0; i
< nb_args
; i
++) {
536 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
537 args_size
+= type_size(&vtop
->type
, &align
);
538 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
540 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
542 if (nb_sse_args
> 8) args_size
+= 8;
545 if (nb_reg_args
> 6) args_size
+= 8;
549 /* for struct arguments, we need to call memcpy and the function
550 call breaks register passing arguments we are preparing.
551 So, we process arguments which will be passed by stack first. */
553 gen_reg
= nb_reg_args
;
554 sse_reg
= nb_sse_args
;
555 /* adjust stack to align SSE boundary */
556 if (args_size
&= 8) {
557 o(0x50); /* push $rax */
559 for(i
= 0; i
< nb_args
; i
++) {
560 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
561 size
= type_size(&vtop
->type
, &align
);
562 /* align to stack align size */
563 size
= (size
+ 3) & ~3;
564 /* allocate the necessary size on stack */
566 oad(0xec81, size
); /* sub $xxx, %rsp */
567 /* generate structure store */
569 o(0x48 + REX_BASE(r
));
570 o(0x89); /* mov %rsp, r */
573 /* following code breaks vtop[1] */
574 SValue tmp
= vtop
[1];
575 vset(&vtop
->type
, r
| VT_LVAL
, 0);
581 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
584 oad(0xec8148, size
); /* sub $xxx, %rsp */
585 o(0x7cdb); /* fstpt 0(%rsp) */
589 } else if (is_sse_float(vtop
->type
.t
)) {
593 o(0x50); /* push $rax */
594 /* movq %xmm0, (%rsp) */
602 /* XXX: implicit cast ? */
605 o(0x50 + r
); /* push r */
613 /* then, we prepare register passing arguments.
614 Note that we cannot set RDX and RCX in this loop because gv()
615 may break these temporary registers. Let's use R10 and R11
617 gen_reg
= nb_reg_args
;
618 sse_reg
= nb_sse_args
;
619 for(i
= 0; i
< nb_args
; i
++) {
620 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
621 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
622 } else if (is_sse_float(vtop
->type
.t
)) {
625 gv(RC_FLOAT
); /* only one float register */
626 /* movaps %xmm0, %xmmN */
628 o(0xc0 + (sse_reg
<< 3));
633 /* XXX: implicit cast ? */
638 o(0xc0 + r
* 8 + arg_regs
[j
]);
641 /* j=2: r10, j=3: r11 */
645 /* j=4: r8, j=5: r9 */
646 o(0xc0 + r
* 8 + j
- 4);
653 save_regs(0); /* save used temporary registers */
655 /* Copy R10 and R11 into RDX and RCX, respectively */
656 if (nb_reg_args
> 2) {
657 o(0xd2894c); /* mov %r10, %rdx */
658 if (nb_reg_args
> 3) {
659 o(0xd9894c); /* mov %r11, %rcx */
663 func_sym
= vtop
->type
.ref
;
664 func_call
= FUNC_CALL(func_sym
->r
);
665 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
673 /* XXX: support PE? */
674 #warning "PE isn't tested at all"
675 #define FUNC_PROLOG_SIZE 12
677 #define FUNC_PROLOG_SIZE 11
680 static void push_arg_reg(int i
) {
682 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
685 /* generate function prolog of type 't' */
686 void gfunc_prolog(CType
*func_type
)
688 int i
, addr
, align
, size
, func_call
;
689 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
695 sym
= func_type
->ref
;
696 func_call
= FUNC_CALL(sym
->r
);
699 ind
+= FUNC_PROLOG_SIZE
;
700 func_sub_sp_offset
= ind
;
702 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
703 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
704 seen_reg_num
= seen_sse_num
= 0;
705 /* frame pointer and return address */
706 seen_stack_size
= PTR_SIZE
* 2;
707 /* count the number of seen parameters */
708 sym
= func_type
->ref
;
709 while ((sym
= sym
->next
) != NULL
) {
711 if (is_sse_float(type
->t
)) {
712 if (seen_sse_num
< 8) {
715 seen_stack_size
+= 8;
717 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
718 size
= type_size(type
, &align
);
719 size
= (size
+ 3) & ~3;
720 seen_stack_size
+= size
;
721 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
722 seen_stack_size
+= LDOUBLE_SIZE
;
724 if (seen_reg_num
< 6) {
727 seen_stack_size
+= 8;
733 /* movl $0x????????, -0x10(%rbp) */
735 gen_le32(seen_reg_num
* 8);
736 /* movl $0x????????, -0xc(%rbp) */
738 gen_le32(seen_sse_num
* 16 + 48);
739 /* movl $0x????????, -0x8(%rbp) */
741 gen_le32(seen_stack_size
);
743 /* save all register passing arguments */
744 for (i
= 0; i
< 8; i
++) {
746 o(0xd60f66); /* movq */
747 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
748 /* movq $0, loc+8(%rbp) */
753 for (i
= 0; i
< 6; i
++) {
758 sym
= func_type
->ref
;
763 /* if the function returns a structure, then add an
764 implicit pointer parameter */
766 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
767 push_arg_reg(reg_param_index
);
774 /* define parameters */
775 while ((sym
= sym
->next
) != NULL
) {
777 size
= type_size(type
, &align
);
778 size
= (size
+ 3) & ~3;
779 if (is_sse_float(type
->t
)) {
780 if (sse_param_index
< 8) {
781 /* save arguments passed by register */
783 o(0xd60f66); /* movq */
784 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
791 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
792 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
796 if (reg_param_index
< 6) {
797 /* save arguments passed by register */
798 push_arg_reg(reg_param_index
);
806 sym_push(sym
->v
& ~SYM_FIELD
, type
,
807 VT_LOCAL
| VT_LVAL
, param_addr
);
812 /* generate function epilog */
813 void gfunc_epilog(void)
818 if (func_ret_sub
== 0) {
823 g(func_ret_sub
>> 8);
825 /* align local size to word & save local variables */
826 v
= (-loc
+ 15) & -16;
828 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
831 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
832 oad(0xb8, v
); /* mov stacksize, %eax */
833 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
834 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
838 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
839 o(0xec8148); /* sub rsp, stacksize */
841 #if FUNC_PROLOG_SIZE == 12
842 o(0x90); /* adjust to FUNC_PROLOG_SIZE */
848 /* generate a jump to a label */
851 return psym(0xe9, t
);
854 /* generate a jump to a fixed address */
855 void gjmp_addr(int a
)
863 oad(0xe9, a
- ind
- 5);
867 /* generate a test. set 'inv' to invert test. Stack entry is popped */
868 int gtst(int inv
, int t
)
872 v
= vtop
->r
& VT_VALMASK
;
874 /* fast case : can jump directly since flags are set */
876 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
877 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
878 /* && or || optimization */
879 if ((v
& 1) == inv
) {
880 /* insert vtop->c jump list in t */
883 p
= (int *)(cur_text_section
->data
+ *p
);
891 if (is_float(vtop
->type
.t
) ||
892 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
896 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
897 /* constant jmp optimization */
898 if ((vtop
->c
.i
!= 0) != inv
)
905 t
= psym(0x85 ^ inv
, t
);
912 /* generate an integer binary operation */
919 case TOK_ADDC1
: /* add with carry generation */
922 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
923 !is64_type(vtop
->type
.t
)) {
927 if (is64_type(vtop
->type
.t
)) {
928 o(0x48 | REX_BASE(r
));
933 /* XXX: generate inc and dec for smaller code ? */
935 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
939 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
946 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
947 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
948 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
950 o((opc
<< 3) | 0x01);
951 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
954 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
960 case TOK_SUBC1
: /* sub with carry generation */
963 case TOK_ADDC2
: /* add with carry use */
966 case TOK_SUBC2
: /* sub with carry use */
982 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
983 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
984 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
987 o(0xaf0f); /* imul fr, r */
988 o(0xc0 + fr
+ r
* 8);
999 opc
= 0xc0 | (opc
<< 3);
1000 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1004 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1005 o(0x48 | REX_BASE(r
));
1012 o(0xc1); /* shl/shr/sar $xxx, r */
1016 /* we generate the shift in ecx */
1017 gv2(RC_INT
, RC_RCX
);
1019 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
1020 o(0x48 | REX_BASE(r
));
1022 o(0xd3); /* shl/shr/sar %cl, r */
1033 /* first operand must be in eax */
1034 /* XXX: need better constraint for second operand */
1035 gv2(RC_RAX
, RC_RCX
);
1040 if (op
== TOK_UMULL
) {
1041 o(0xf7); /* mul fr */
1043 vtop
->r2
= TREG_RDX
;
1046 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
1047 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
1050 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1051 o(0x9948); /* cqto */
1052 o(0x48 + REX_BASE(fr
));
1056 o(0xf7); /* idiv fr, %eax */
1059 if (op
== '%' || op
== TOK_UMOD
)
1072 void gen_opl(int op
)
1077 /* generate a floating point operation 'v = t1 op t2' instruction. The
1078 two operands are guaranted to have the same floating point type */
1079 /* XXX: need to use ST1 too */
1080 void gen_opf(int op
)
1082 int a
, ft
, fc
, swapped
, r
;
1084 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1086 /* convert constants to memory references */
1087 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1092 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1095 /* must put at least one value in the floating point register */
1096 if ((vtop
[-1].r
& VT_LVAL
) &&
1097 (vtop
[0].r
& VT_LVAL
)) {
1103 /* swap the stack if needed so that t1 is the register and t2 is
1104 the memory reference */
1105 if (vtop
[-1].r
& VT_LVAL
) {
1109 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1110 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1111 /* load on stack second operand */
1112 load(TREG_ST0
, vtop
);
1113 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1114 if (op
== TOK_GE
|| op
== TOK_GT
)
1116 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1119 o(0xc9d9); /* fxch %st(1) */
1120 o(0xe9da); /* fucompp */
1121 o(0xe0df); /* fnstsw %ax */
1123 o(0x45e480); /* and $0x45, %ah */
1124 o(0x40fC80); /* cmp $0x40, %ah */
1125 } else if (op
== TOK_NE
) {
1126 o(0x45e480); /* and $0x45, %ah */
1127 o(0x40f480); /* xor $0x40, %ah */
1129 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1130 o(0x05c4f6); /* test $0x05, %ah */
1133 o(0x45c4f6); /* test $0x45, %ah */
1140 /* no memory reference possible for long double operations */
1141 load(TREG_ST0
, vtop
);
1165 o(0xde); /* fxxxp %st, %st(1) */
1170 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1171 /* if saved lvalue, then we must reload it */
1174 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1176 r
= get_reg(RC_INT
);
1178 v1
.r
= VT_LOCAL
| VT_LVAL
;
1184 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1187 if (op
== TOK_LE
|| op
== TOK_LT
)
1189 if (op
== TOK_LE
|| op
== TOK_GE
) {
1190 op
= 0x93; /* setae */
1192 op
= 0x97; /* seta */
1197 o(0x7e0ff3); /* movq */
1198 gen_modrm(1, r
, vtop
->sym
, fc
);
1200 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1203 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1206 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1209 o(0x2e0f); /* ucomisd */
1210 gen_modrm(0, r
, vtop
->sym
, fc
);
1217 /* no memory reference possible for long double operations */
1218 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1219 load(TREG_XMM0
, vtop
);
1239 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1240 o(0xde); /* fxxxp %st, %st(1) */
1243 /* if saved lvalue, then we must reload it */
1245 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1247 r
= get_reg(RC_INT
);
1249 v1
.r
= VT_LOCAL
| VT_LVAL
;
1255 /* movq %xmm0,%xmm1 */
1258 load(TREG_XMM0
, vtop
);
1259 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1260 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1269 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1276 gen_modrm(0, r
, vtop
->sym
, fc
);
1284 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1285 and 'long long' cases. */
1286 void gen_cvt_itof(int t
)
1288 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1291 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1292 /* signed long long to float/double/long double (unsigned case
1293 is handled generically) */
1294 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1295 o(0x242cdf); /* fildll (%rsp) */
1296 o(0x08c48348); /* add $8, %rsp */
1297 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1298 (VT_INT
| VT_UNSIGNED
)) {
1299 /* unsigned int to float/double/long double */
1300 o(0x6a); /* push $0 */
1302 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1303 o(0x242cdf); /* fildll (%rsp) */
1304 o(0x10c48348); /* add $16, %rsp */
1306 /* int to float/double/long double */
1307 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1308 o(0x2404db); /* fildl (%rsp) */
1309 o(0x08c48348); /* add $8, %rsp */
1313 save_reg(TREG_XMM0
);
1315 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1316 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1317 (VT_INT
| VT_UNSIGNED
) ||
1318 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1322 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1323 vtop
->r
= TREG_XMM0
;
1327 /* convert from one floating point type to another */
1328 void gen_cvt_ftof(int t
)
1336 if (bt
== VT_FLOAT
) {
1338 if (tbt
== VT_DOUBLE
) {
1339 o(0xc0140f); /* unpcklps */
1340 o(0xc05a0f); /* cvtps2pd */
1341 } else if (tbt
== VT_LDOUBLE
) {
1342 /* movss %xmm0,-0x10(%rsp) */
1345 o(0xf02444d9); /* flds -0x10(%rsp) */
1348 } else if (bt
== VT_DOUBLE
) {
1350 if (tbt
== VT_FLOAT
) {
1351 o(0xc0140f66); /* unpcklpd */
1352 o(0xc05a0f66); /* cvtpd2ps */
1353 } else if (tbt
== VT_LDOUBLE
) {
1354 /* movsd %xmm0,-0x10(%rsp) */
1357 o(0xf02444dd); /* fldl -0x10(%rsp) */
1362 if (tbt
== VT_DOUBLE
) {
1363 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1364 /* movsd -0x10(%rsp),%xmm0 */
1367 vtop
->r
= TREG_XMM0
;
1368 } else if (tbt
== VT_FLOAT
) {
1369 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1370 /* movss -0x10(%rsp),%xmm0 */
1373 vtop
->r
= TREG_XMM0
;
1378 /* convert fp to int 't' type */
1379 void gen_cvt_ftoi(int t
)
1381 int ft
, bt
, size
, r
;
1384 if (bt
== VT_LDOUBLE
) {
1385 gen_cvt_ftof(VT_DOUBLE
);
1395 r
= get_reg(RC_INT
);
1396 if (bt
== VT_FLOAT
) {
1398 } else if (bt
== VT_DOUBLE
) {
1404 o(0x48 + REX_BASE(r
));
1406 o(0x2c0f); /* cvttss2si or cvttsd2si */
1407 o(0xc0 + (REG_VALUE(r
) << 3));
1411 /* computed goto support */
1418 /* end of x86-64 code generator */
1419 /*************************************************************/