2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* number of available registers */
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
58 #define REX_BASE(reg) ((reg) >> 3)
59 #define REG_VALUE(reg) ((reg) & 7)
61 int reg_classes
[NB_REGS
] = {
62 /* eax */ RC_INT
| RC_RAX
,
63 /* ecx */ RC_INT
| RC_RCX
,
64 /* edx */ RC_INT
| RC_RDX
,
65 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
86 /******************************************************/
89 #define EM_TCC_TARGET EM_X86_64
91 /* relocation type for 32 bit data relocation */
92 #define R_DATA_32 R_X86_64_32
93 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
94 #define R_COPY R_X86_64_COPY
96 #define ELF_START_ADDR 0x08048000
97 #define ELF_PAGE_SIZE 0x1000
99 /******************************************************/
101 static unsigned long func_sub_sp_offset
;
102 static int func_ret_sub
;
104 /* XXX: make it faster ? */
109 if (ind1
> cur_text_section
->data_allocated
)
110 section_realloc(cur_text_section
, ind1
);
111 cur_text_section
->data
[ind
] = c
;
115 void o(unsigned int c
)
131 void gen_le64(int64_t c
)
143 /* output a symbol and patch all calls to it */
144 void gsym_addr(int t
, int a
)
148 ptr
= (int *)(cur_text_section
->data
+ t
);
149 n
= *ptr
; /* next value */
160 /* psym is used to put an instruction with a data field which is a
161 reference to a symbol. It is in fact the same as oad ! */
164 static int is64_type(int t
)
166 return ((t
& VT_BTYPE
) == VT_PTR
||
167 (t
& VT_BTYPE
) == VT_FUNC
||
168 (t
& VT_BTYPE
) == VT_LLONG
);
171 static int is_sse_float(int t
) {
174 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
177 /* instruction + 4 bytes data. Return the address of the data */
178 static int oad(int c
, int s
)
184 if (ind1
> cur_text_section
->data_allocated
)
185 section_realloc(cur_text_section
, ind1
);
186 *(int *)(cur_text_section
->data
+ ind
) = s
;
192 /* output constant with relocation if 'r & VT_SYM' is true */
193 static void gen_addr64(int r
, Sym
*sym
, int64_t c
)
196 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
200 /* output constant with relocation if 'r & VT_SYM' is true */
201 static void gen_addrpc32(int r
, Sym
*sym
, int c
)
204 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
208 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
210 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
212 op_reg
= op_reg
<< 3;
213 if ((r
& VT_VALMASK
) == VT_CONST
) {
214 /* constant memory reference */
216 gen_addrpc32(r
, sym
, c
);
217 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
218 /* currently, we use only ebp as base */
220 /* short reference */
224 oad(0x85 | op_reg
, c
);
227 g(0x00 | op_reg
| (r
& VT_VALMASK
));
231 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
233 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
235 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
236 if ((r
& VT_VALMASK
) != VT_CONST
&&
237 (r
& VT_VALMASK
) != VT_LOCAL
) {
238 rex
|= REX_BASE(VT_VALMASK
& r
);
242 op_reg
= REG_VALUE(op_reg
) << 3;
243 if ((r
& VT_VALMASK
) == VT_CONST
) {
244 /* constant memory reference */
246 gen_addrpc32(r
, sym
, c
);
247 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
248 /* currently, we use only ebp as base */
250 /* short reference */
254 oad(0x85 | op_reg
, c
);
257 g(0x00 | op_reg
| (r
& VT_VALMASK
));
262 /* load 'r' from value 'sv' */
263 void load(int r
, SValue
*sv
)
265 int v
, t
, ft
, fc
, fr
;
274 if (v
== VT_LLOCAL
) {
276 v1
.r
= VT_LOCAL
| VT_LVAL
;
281 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
282 o(0x6e0f66); /* movd */
284 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
285 o(0x7e0ff3); /* movq */
287 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
290 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
291 o(0xbe0f); /* movsbl */
292 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
293 o(0xb60f); /* movzbl */
294 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
295 o(0xbf0f); /* movswl */
296 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
297 o(0xb70f); /* movzwl */
298 } else if (is64_type(ft
)) {
299 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
304 gen_modrm(r
, fr
, sv
->sym
, fc
);
307 if ((ft
& VT_BTYPE
) == VT_LLONG
) {
309 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
310 gen_addr64(fr
, sv
->sym
, sv
->c
.ull
);
314 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
315 gen_addrpc32(fr
, sv
->sym
, fc
);
317 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
321 } else if (v
== VT_LOCAL
) {
322 o(0x48 | REX_BASE(r
));
323 o(0x8d); /* lea xxx(%ebp), r */
324 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
325 } else if (v
== VT_CMP
) {
326 oad(0xb8 + r
, 0); /* mov $0, r */
327 o(0x0f); /* setxx %br */
330 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
332 oad(0xb8 + r
, t
); /* mov $1, r */
333 o(0x05eb); /* jmp after */
335 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
337 if (r
== TREG_XMM0
) {
338 assert(v
== TREG_ST0
);
339 /* gen_cvt_ftof(VT_DOUBLE); */
340 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
341 /* movsd -0x10(%rsp),%xmm0 */
344 } else if (r
== TREG_ST0
) {
345 assert(v
== TREG_XMM0
);
346 /* gen_cvt_ftof(VT_LDOUBLE); */
347 /* movsd %xmm0,-0x10(%rsp) */
350 o(0xf02444dd); /* fldl -0x10(%rsp) */
352 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
354 o(0xc0 + r
+ v
* 8); /* mov v, r */
360 /* store register 'r' in lvalue 'v' */
361 void store(int r
, SValue
*v
)
368 fr
= v
->r
& VT_VALMASK
;
370 /* XXX: incorrect if float reg to reg */
371 if (bt
== VT_FLOAT
) {
372 o(0x7e0f66); /* movd */
374 } else if (bt
== VT_DOUBLE
) {
375 o(0xd60f66); /* movq */
377 } else if (bt
== VT_LDOUBLE
) {
378 o(0xc0d9); /* fld %st(0) */
384 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
386 else if (is64_type(bt
))
392 if (fr
== VT_CONST
||
395 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
396 } else if (fr
!= r
) {
397 /* XXX: don't we really come here? */
399 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
402 if (fr
== VT_CONST
||
405 gen_modrm(r
, v
->r
, v
->sym
, fc
);
406 } else if (fr
!= r
) {
407 /* XXX: don't we really come here? */
409 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
414 static void gadd_sp(int val
)
416 if (val
== (char)val
) {
420 oad(0xc48148, val
); /* add $xxx, %rsp */
424 /* 'is_jmp' is '1' if it is a jump */
425 static void gcall_or_jmp(int is_jmp
)
428 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
430 if (vtop
->r
& VT_SYM
) {
431 /* relocation case */
432 greloc(cur_text_section
, vtop
->sym
,
433 ind
+ 1, R_X86_64_PC32
);
435 /* put an empty PC32 relocation */
436 put_elf_reloc(symtab_section
, cur_text_section
,
437 ind
+ 1, R_X86_64_PC32
, 0);
439 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
441 /* otherwise, indirect call */
445 o(0xff); /* call/jmp *r */
446 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
450 static uint8_t arg_regs
[6] = {
451 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
453 /* Generate function call. The function address is pushed first, then
454 all the parameters in call order. This functions pops all the
455 parameters and the function address. */
456 void gfunc_call(int nb_args
)
458 int size
, align
, r
, args_size
, i
, func_call
;
463 int sse_reg
, gen_reg
;
465 /* calculate the number of integer/float arguments */
467 for(i
= 0; i
< nb_args
; i
++) {
468 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
469 args_size
+= type_size(&vtop
->type
, &align
);
470 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
472 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
474 if (nb_sse_args
> 8) args_size
+= 8;
477 if (nb_reg_args
> 6) args_size
+= 8;
481 /* for struct arguments, we need to call memcpy and the function
482 call breaks register passing arguments we are preparing.
483 So, we process arguments which will be passed by stack first. */
485 gen_reg
= nb_reg_args
;
486 sse_reg
= nb_sse_args
;
487 /* adjust stack to align SSE boundary */
488 if (args_size
&= 8) {
489 o(0x50); /* push $rax */
491 for(i
= 0; i
< nb_args
; i
++) {
492 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
493 size
= type_size(&vtop
->type
, &align
);
494 /* align to stack align size */
495 size
= (size
+ 3) & ~3;
496 /* allocate the necessary size on stack */
498 oad(0xec81, size
); /* sub $xxx, %rsp */
499 /* generate structure store */
501 o(0x48 + REX_BASE(r
));
502 o(0x89); /* mov %rsp, r */
505 /* following code breaks vtop[1] */
506 SValue tmp
= vtop
[1];
507 vset(&vtop
->type
, r
| VT_LVAL
, 0);
513 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
516 oad(0xec8148, size
); /* sub $xxx, %rsp */
517 o(0x7cdb); /* fstpt 0(%rsp) */
521 } else if (is_sse_float(vtop
->type
.t
)) {
525 o(0x50); /* push $rax */
526 /* movq %xmm0, (%rsp) */
534 /* XXX: implicit cast ? */
537 o(0x50 + r
); /* push r */
545 /* then, we prepare register passing arguments.
546 Note that we cannot set RDX and RCX in this loop because gv()
547 may break these temporary registers. Let's use R10 and R11
549 gen_reg
= nb_reg_args
;
550 sse_reg
= nb_sse_args
;
551 for(i
= 0; i
< nb_args
; i
++) {
552 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
553 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
554 } else if (is_sse_float(vtop
->type
.t
)) {
557 gv(RC_FLOAT
); /* only one float register */
558 /* movaps %xmm0, %xmmN */
560 o(0xc0 + (sse_reg
<< 3));
565 /* XXX: implicit cast ? */
570 o(0xc0 + r
* 8 + arg_regs
[j
]);
573 /* j=2: r10, j=3: r11 */
577 /* j=4: r8, j=5: r9 */
578 o(0xc0 + r
* 8 + j
- 4);
585 save_regs(0); /* save used temporary registers */
587 /* Copy R10 and R11 into RDX and RCX, respectively */
588 if (nb_reg_args
> 2) {
589 o(0xd2894c); /* mov %r10, %rdx */
590 if (nb_reg_args
> 3) {
591 o(0xd9894c); /* mov %r11, %rcx */
595 func_sym
= vtop
->type
.ref
;
596 func_call
= FUNC_CALL(func_sym
->r
);
597 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
605 /* XXX: support PE? */
606 #warning "PE isn't tested at all"
607 #define FUNC_PROLOG_SIZE 12
609 #define FUNC_PROLOG_SIZE 11
612 static void push_arg_reg(int i
) {
614 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
617 /* generate function prolog of type 't' */
618 void gfunc_prolog(CType
*func_type
)
620 int i
, addr
, align
, size
, func_call
;
621 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
627 sym
= func_type
->ref
;
628 func_call
= FUNC_CALL(sym
->r
);
631 ind
+= FUNC_PROLOG_SIZE
;
632 func_sub_sp_offset
= ind
;
634 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
635 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
636 seen_reg_num
= seen_sse_num
= 0;
637 /* frame pointer and return address */
638 seen_stack_size
= PTR_SIZE
* 2;
639 /* count the number of seen parameters */
640 sym
= func_type
->ref
;
641 while ((sym
= sym
->next
) != NULL
) {
643 if (is_sse_float(type
->t
)) {
644 if (seen_sse_num
< 8) {
647 seen_stack_size
+= 8;
649 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
650 size
= type_size(type
, &align
);
651 size
= (size
+ 3) & ~3;
652 seen_stack_size
+= size
;
653 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
654 seen_stack_size
+= LDOUBLE_SIZE
;
656 if (seen_reg_num
< 6) {
659 seen_stack_size
+= 8;
665 /* movl $0x????????, -0x10(%rbp) */
667 gen_le32(seen_reg_num
* 8);
668 /* movl $0x????????, -0xc(%rbp) */
670 gen_le32(seen_sse_num
* 16 + 48);
671 /* movl $0x????????, -0x8(%rbp) */
673 gen_le32(seen_stack_size
);
675 /* save all register passing arguments */
676 for (i
= 0; i
< 8; i
++) {
678 o(0xd60f66); /* movq */
679 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
680 /* movq $0, loc+8(%rbp) */
685 for (i
= 0; i
< 6; i
++) {
690 sym
= func_type
->ref
;
695 /* if the function returns a structure, then add an
696 implicit pointer parameter */
698 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
699 push_arg_reg(reg_param_index
);
706 /* define parameters */
707 while ((sym
= sym
->next
) != NULL
) {
709 size
= type_size(type
, &align
);
710 size
= (size
+ 3) & ~3;
711 if (is_sse_float(type
->t
)) {
712 if (sse_param_index
< 8) {
713 /* save arguments passed by register */
715 o(0xd60f66); /* movq */
716 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
723 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
724 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
728 if (reg_param_index
< 6) {
729 /* save arguments passed by register */
730 push_arg_reg(reg_param_index
);
738 sym_push(sym
->v
& ~SYM_FIELD
, type
,
739 VT_LOCAL
| VT_LVAL
, param_addr
);
744 /* generate function epilog */
745 void gfunc_epilog(void)
750 if (func_ret_sub
== 0) {
755 g(func_ret_sub
>> 8);
757 /* align local size to word & save local variables */
758 v
= (-loc
+ 15) & -16;
760 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
763 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
764 oad(0xb8, v
); /* mov stacksize, %eax */
765 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
766 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
770 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
771 o(0xec8148); /* sub rsp, stacksize */
773 #if FUNC_PROLOG_SIZE == 12
774 o(0x90); /* adjust to FUNC_PROLOG_SIZE */
780 /* generate a jump to a label */
783 return psym(0xe9, t
);
786 /* generate a jump to a fixed address */
787 void gjmp_addr(int a
)
795 oad(0xe9, a
- ind
- 5);
799 /* generate a test. set 'inv' to invert test. Stack entry is popped */
800 int gtst(int inv
, int t
)
804 v
= vtop
->r
& VT_VALMASK
;
806 /* fast case : can jump directly since flags are set */
808 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
809 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
810 /* && or || optimization */
811 if ((v
& 1) == inv
) {
812 /* insert vtop->c jump list in t */
815 p
= (int *)(cur_text_section
->data
+ *p
);
823 /* XXX: not tested */
824 if (is_float(vtop
->type
.t
) ||
825 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
829 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
830 /* constant jmp optimization */
831 if ((vtop
->c
.i
!= 0) != inv
)
838 t
= psym(0x85 ^ inv
, t
);
845 /* generate an integer binary operation */
852 case TOK_ADDC1
: /* add with carry generation */
855 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
856 !is64_type(vtop
->type
.t
)) {
860 if (is64_type(vtop
->type
.t
)) {
861 o(0x48 | REX_BASE(r
));
866 /* XXX: generate inc and dec for smaller code ? */
868 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
872 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
879 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
880 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
881 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
883 o((opc
<< 3) | 0x01);
884 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
887 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
893 case TOK_SUBC1
: /* sub with carry generation */
896 case TOK_ADDC2
: /* add with carry use */
899 case TOK_SUBC2
: /* sub with carry use */
915 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
916 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
917 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
920 o(0xaf0f); /* imul fr, r */
921 o(0xc0 + fr
+ r
* 8);
932 opc
= 0xc0 | (opc
<< 3);
933 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
937 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
938 o(0x48 | REX_BASE(r
));
945 o(0xc1); /* shl/shr/sar $xxx, r */
949 /* we generate the shift in ecx */
952 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
953 o(0x48 | REX_BASE(r
));
955 o(0xd3); /* shl/shr/sar %cl, r */
966 /* first operand must be in eax */
967 /* XXX: need better constraint for second operand */
973 if (op
== TOK_UMULL
) {
974 o(0xf7); /* mul fr */
979 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
980 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
983 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
984 o(0x9948); /* cqto */
985 o(0x48 + REX_BASE(fr
));
989 o(0xf7); /* idiv fr, %eax */
992 if (op
== '%' || op
== TOK_UMOD
)
1005 void gen_opl(int op
)
1010 /* generate a floating point operation 'v = t1 op t2' instruction. The
1011 two operands are guaranted to have the same floating point type */
1012 /* XXX: need to use ST1 too */
1013 void gen_opf(int op
)
1015 int a
, ft
, fc
, swapped
, r
;
1017 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1019 /* convert constants to memory references */
1020 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1025 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1028 /* must put at least one value in the floating point register */
1029 if ((vtop
[-1].r
& VT_LVAL
) &&
1030 (vtop
[0].r
& VT_LVAL
)) {
1036 /* swap the stack if needed so that t1 is the register and t2 is
1037 the memory reference */
1038 if (vtop
[-1].r
& VT_LVAL
) {
1042 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1043 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1044 /* load on stack second operand */
1045 load(TREG_ST0
, vtop
);
1046 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1047 if (op
== TOK_GE
|| op
== TOK_GT
)
1049 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1052 o(0xc9d9); /* fxch %st(1) */
1053 o(0xe9da); /* fucompp */
1054 o(0xe0df); /* fnstsw %ax */
1056 o(0x45e480); /* and $0x45, %ah */
1057 o(0x40fC80); /* cmp $0x40, %ah */
1058 } else if (op
== TOK_NE
) {
1059 o(0x45e480); /* and $0x45, %ah */
1060 o(0x40f480); /* xor $0x40, %ah */
1062 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1063 o(0x05c4f6); /* test $0x05, %ah */
1066 o(0x45c4f6); /* test $0x45, %ah */
1073 /* no memory reference possible for long double operations */
1074 load(TREG_ST0
, vtop
);
1098 o(0xde); /* fxxxp %st, %st(1) */
1103 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1104 /* if saved lvalue, then we must reload it */
1107 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1109 r
= get_reg(RC_INT
);
1111 v1
.r
= VT_LOCAL
| VT_LVAL
;
1117 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1120 if (op
== TOK_LE
|| op
== TOK_LT
)
1122 if (op
== TOK_LE
|| op
== TOK_GE
) {
1123 op
= 0x93; /* setae */
1125 op
= 0x97; /* seta */
1130 o(0x7e0ff3); /* movq */
1131 gen_modrm(1, r
, vtop
->sym
, fc
);
1133 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1136 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1139 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1142 o(0x2e0f); /* ucomisd */
1143 gen_modrm(0, r
, vtop
->sym
, fc
);
1150 /* no memory reference possible for long double operations */
1151 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1152 load(TREG_XMM0
, vtop
);
1172 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1173 o(0xde); /* fxxxp %st, %st(1) */
1176 /* if saved lvalue, then we must reload it */
1178 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1180 r
= get_reg(RC_INT
);
1182 v1
.r
= VT_LOCAL
| VT_LVAL
;
1188 /* movq %xmm0,%xmm1 */
1191 load(TREG_XMM0
, vtop
);
1192 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1193 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1202 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1209 gen_modrm(0, r
, vtop
->sym
, fc
);
1217 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1218 and 'long long' cases. */
1219 void gen_cvt_itof(int t
)
1221 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1224 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1225 /* signed long long to float/double/long double (unsigned case
1226 is handled generically) */
1227 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1228 o(0x242cdf); /* fildll (%rsp) */
1229 o(0x08c48348); /* add $8, %rsp */
1230 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1231 (VT_INT
| VT_UNSIGNED
)) {
1232 /* unsigned int to float/double/long double */
1233 o(0x6a); /* push $0 */
1235 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1236 o(0x242cdf); /* fildll (%rsp) */
1237 o(0x10c48348); /* add $16, %rsp */
1239 /* int to float/double/long double */
1240 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1241 o(0x2404db); /* fildl (%rsp) */
1242 o(0x08c48348); /* add $8, %rsp */
1246 save_reg(TREG_XMM0
);
1248 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1249 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1250 (VT_INT
| VT_UNSIGNED
) ||
1251 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1255 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1256 vtop
->r
= TREG_XMM0
;
1260 /* convert from one floating point type to another */
1261 void gen_cvt_ftof(int t
)
1269 if (bt
== VT_FLOAT
) {
1271 if (tbt
== VT_DOUBLE
) {
1272 o(0xc0140f); /* unpcklps */
1273 o(0xc05a0f); /* cvtps2pd */
1274 } else if (tbt
== VT_LDOUBLE
) {
1275 /* movss %xmm0,-0x10(%rsp) */
1278 o(0xf02444d9); /* flds -0x10(%rsp) */
1281 } else if (bt
== VT_DOUBLE
) {
1283 if (tbt
== VT_FLOAT
) {
1284 o(0xc0140f66); /* unpcklpd */
1285 o(0xc05a0f66); /* cvtpd2ps */
1286 } else if (tbt
== VT_LDOUBLE
) {
1287 /* movsd %xmm0,-0x10(%rsp) */
1290 o(0xf02444dd); /* fldl -0x10(%rsp) */
1295 if (tbt
== VT_DOUBLE
) {
1296 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1297 /* movsd -0x10(%rsp),%xmm0 */
1300 vtop
->r
= TREG_XMM0
;
1301 } else if (tbt
== VT_FLOAT
) {
1302 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1303 /* movss -0x10(%rsp),%xmm0 */
1306 vtop
->r
= TREG_XMM0
;
1311 /* convert fp to int 't' type */
1312 void gen_cvt_ftoi(int t
)
1314 int ft
, bt
, size
, r
;
1317 if (bt
== VT_LDOUBLE
) {
1318 gen_cvt_ftof(VT_DOUBLE
);
1328 r
= get_reg(RC_INT
);
1329 if (bt
== VT_FLOAT
) {
1331 } else if (bt
== VT_DOUBLE
) {
1337 o(0x48 + REX_BASE(r
));
1339 o(0x2c0f); /* cvttss2si or cvttsd2si */
1340 o(0xc0 + (REG_VALUE(r
) << 3));
1344 /* computed goto support */
1351 /* end of x86-64 code generator */
1352 /*************************************************************/