2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* number of available registers */
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
58 #define REX_BASE(reg) ((reg) >> 3)
59 #define REG_VALUE(reg) ((reg) & 7)
61 int reg_classes
[NB_REGS
] = {
62 /* eax */ RC_INT
| RC_RAX
,
63 /* ecx */ RC_INT
| RC_RCX
,
64 /* edx */ RC_INT
| RC_RDX
,
65 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
86 /******************************************************/
89 #define EM_TCC_TARGET EM_X86_64
91 /* relocation type for 32 bit data relocation */
92 #define R_DATA_32 R_X86_64_32
93 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
94 #define R_COPY R_X86_64_COPY
96 #define ELF_START_ADDR 0x08048000
97 #define ELF_PAGE_SIZE 0x1000
99 /******************************************************/
101 static unsigned long func_sub_sp_offset
;
102 static int func_ret_sub
;
104 /* XXX: make it faster ? */
109 if (ind1
> cur_text_section
->data_allocated
)
110 section_realloc(cur_text_section
, ind1
);
111 cur_text_section
->data
[ind
] = c
;
115 void o(unsigned int c
)
131 void gen_le64(int64_t c
)
143 /* output a symbol and patch all calls to it */
144 void gsym_addr(int t
, int a
)
148 ptr
= (int *)(cur_text_section
->data
+ t
);
149 n
= *ptr
; /* next value */
160 /* psym is used to put an instruction with a data field which is a
161 reference to a symbol. It is in fact the same as oad ! */
164 static int is64_type(int t
)
166 return ((t
& VT_BTYPE
) == VT_PTR
||
167 (t
& VT_BTYPE
) == VT_FUNC
||
168 (t
& VT_BTYPE
) == VT_LLONG
);
171 static int is_sse_float(int t
) {
174 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
177 /* instruction + 4 bytes data. Return the address of the data */
178 static int oad(int c
, int s
)
184 if (ind1
> cur_text_section
->data_allocated
)
185 section_realloc(cur_text_section
, ind1
);
186 *(int *)(cur_text_section
->data
+ ind
) = s
;
192 /* output constant with relocation if 'r & VT_SYM' is true */
193 static void gen_addr64(int r
, Sym
*sym
, int64_t c
)
196 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
200 /* output constant with relocation if 'r & VT_SYM' is true */
201 static void gen_addr32(int r
, Sym
*sym
, int c
)
204 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
208 /* output constant with relocation if 'r & VT_SYM' is true */
209 static void gen_addrpc32(int r
, Sym
*sym
, int c
)
212 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
216 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
218 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
220 op_reg
= op_reg
<< 3;
221 if ((r
& VT_VALMASK
) == VT_CONST
) {
222 /* constant memory reference */
224 gen_addrpc32(r
, sym
, c
);
225 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
226 /* currently, we use only ebp as base */
228 /* short reference */
232 oad(0x85 | op_reg
, c
);
235 g(0x00 | op_reg
| (r
& VT_VALMASK
));
239 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
241 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
243 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
244 if ((r
& VT_VALMASK
) != VT_CONST
&&
245 (r
& VT_VALMASK
) != VT_LOCAL
) {
246 rex
|= REX_BASE(VT_VALMASK
& r
);
250 op_reg
= REG_VALUE(op_reg
) << 3;
251 if ((r
& VT_VALMASK
) == VT_CONST
) {
252 /* constant memory reference */
254 gen_addrpc32(r
, sym
, c
);
255 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
256 /* currently, we use only ebp as base */
258 /* short reference */
262 oad(0x85 | op_reg
, c
);
265 g(0x00 | op_reg
| (r
& VT_VALMASK
));
270 /* load 'r' from value 'sv' */
271 void load(int r
, SValue
*sv
)
273 int v
, t
, ft
, fc
, fr
;
282 if (v
== VT_LLOCAL
) {
284 v1
.r
= VT_LOCAL
| VT_LVAL
;
289 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
290 o(0x6e0f66); /* movd */
292 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
293 o(0x7e0ff3); /* movq */
295 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
298 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
299 o(0xbe0f); /* movsbl */
300 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
301 o(0xb60f); /* movzbl */
302 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
303 o(0xbf0f); /* movswl */
304 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
305 o(0xb70f); /* movzwl */
306 } else if (is64_type(ft
)) {
307 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
312 gen_modrm(r
, fr
, sv
->sym
, fc
);
315 if ((ft
& VT_BTYPE
) == VT_LLONG
) {
317 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
318 gen_addr64(fr
, sv
->sym
, sv
->c
.ull
);
321 o(0xc0 + REG_VALUE(r
)); /* mov $xx, r */
322 gen_addr32(fr
, sv
->sym
, fc
);
324 } else if (v
== VT_LOCAL
) {
325 o(0x48 | REX_BASE(r
));
326 o(0x8d); /* lea xxx(%ebp), r */
327 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
328 } else if (v
== VT_CMP
) {
329 oad(0xb8 + r
, 0); /* mov $0, r */
330 o(0x0f); /* setxx %br */
333 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
335 oad(0xb8 + r
, t
); /* mov $1, r */
336 o(0x05eb); /* jmp after */
338 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
340 if (r
== TREG_XMM0
) {
341 assert(v
== TREG_ST0
);
342 /* gen_cvt_ftof(VT_DOUBLE); */
343 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
344 /* movsd -0x10(%rsp),%xmm0 */
347 } else if (r
== TREG_ST0
) {
348 assert(v
== TREG_XMM0
);
349 /* gen_cvt_ftof(VT_LDOUBLE); */
350 /* movsd %xmm0,-0x10(%rsp) */
353 o(0xf02444dd); /* fldl -0x10(%rsp) */
355 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
357 o(0xc0 + r
+ v
* 8); /* mov v, r */
363 /* store register 'r' in lvalue 'v' */
364 void store(int r
, SValue
*v
)
371 fr
= v
->r
& VT_VALMASK
;
373 /* XXX: incorrect if float reg to reg */
374 if (bt
== VT_FLOAT
) {
375 o(0x7e0f66); /* movd */
377 } else if (bt
== VT_DOUBLE
) {
378 o(0xd60f66); /* movq */
380 } else if (bt
== VT_LDOUBLE
) {
381 o(0xc0d9); /* fld %st(0) */
387 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
389 else if (is64_type(bt
))
395 if (fr
== VT_CONST
||
398 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
399 } else if (fr
!= r
) {
400 /* XXX: don't we really come here? */
402 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
405 if (fr
== VT_CONST
||
408 gen_modrm(r
, v
->r
, v
->sym
, fc
);
409 } else if (fr
!= r
) {
410 /* XXX: don't we really come here? */
412 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
417 static void gadd_sp(int val
)
419 if (val
== (char)val
) {
423 oad(0xc48148, val
); /* add $xxx, %rsp */
427 /* 'is_jmp' is '1' if it is a jump */
428 static void gcall_or_jmp(int is_jmp
)
431 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
433 if (vtop
->r
& VT_SYM
) {
434 /* relocation case */
435 greloc(cur_text_section
, vtop
->sym
,
436 ind
+ 1, R_X86_64_PC32
);
438 /* put an empty PC32 relocation */
439 put_elf_reloc(symtab_section
, cur_text_section
,
440 ind
+ 1, R_X86_64_PC32
, 0);
442 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
444 /* otherwise, indirect call */
448 o(0xff); /* call/jmp *r */
449 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
453 static uint8_t arg_regs
[6] = {
454 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
456 /* Generate function call. The function address is pushed first, then
457 all the parameters in call order. This functions pops all the
458 parameters and the function address. */
459 void gfunc_call(int nb_args
)
461 int size
, align
, r
, args_size
, i
, func_call
;
466 int sse_reg
, gen_reg
;
468 /* calculate the number of integer/float arguments */
470 for(i
= 0; i
< nb_args
; i
++) {
471 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
472 args_size
+= type_size(&vtop
->type
, &align
);
473 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
475 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
477 if (nb_sse_args
> 8) args_size
+= 8;
480 if (nb_reg_args
> 6) args_size
+= 8;
484 /* for struct arguments, we need to call memcpy and the function
485 call breaks register passing arguments we are preparing.
486 So, we process arguments which will be passed by stack first. */
488 gen_reg
= nb_reg_args
;
489 sse_reg
= nb_sse_args
;
490 /* adjust stack to align SSE boundary */
491 if (args_size
&= 8) {
492 o(0x50); /* push $rax */
494 for(i
= 0; i
< nb_args
; i
++) {
495 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
496 size
= type_size(&vtop
->type
, &align
);
497 /* align to stack align size */
498 size
= (size
+ 3) & ~3;
499 /* allocate the necessary size on stack */
501 oad(0xec81, size
); /* sub $xxx, %rsp */
502 /* generate structure store */
504 o(0x48 + REX_BASE(r
));
505 o(0x89); /* mov %rsp, r */
508 /* following code breaks vtop[1] */
509 SValue tmp
= vtop
[1];
510 vset(&vtop
->type
, r
| VT_LVAL
, 0);
516 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
519 oad(0xec8148, size
); /* sub $xxx, %rsp */
520 o(0x7cdb); /* fstpt 0(%rsp) */
524 } else if (is_sse_float(vtop
->type
.t
)) {
528 o(0x50); /* push $rax */
529 /* movq %xmm0, (%rsp) */
537 /* XXX: implicit cast ? */
540 o(0x50 + r
); /* push r */
548 /* then, we prepare register passing arguments.
549 Note that we cannot set RDX and RCX in this loop because gv()
550 may break these temporary registers. Let's use R10 and R11
552 gen_reg
= nb_reg_args
;
553 sse_reg
= nb_sse_args
;
554 for(i
= 0; i
< nb_args
; i
++) {
555 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
556 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
557 } else if (is_sse_float(vtop
->type
.t
)) {
560 gv(RC_FLOAT
); /* only one float register */
561 /* movaps %xmm0, %xmmN */
563 o(0xc0 + (sse_reg
<< 3));
568 /* XXX: implicit cast ? */
573 o(0xc0 + r
* 8 + arg_regs
[j
]);
576 /* j=2: r10, j=3: r11 */
580 /* j=4: r8, j=5: r9 */
581 o(0xc0 + r
* 8 + j
- 4);
588 /* Copy R10 and R11 into RDX and RCX, respectively */
589 if (nb_reg_args
> 2) {
590 o(0xd2894c); /* mov %r10, %rdx */
591 if (nb_reg_args
> 3) {
592 o(0xd9894c); /* mov %r11, %rcx */
596 save_regs(0); /* save used temporary registers */
598 func_sym
= vtop
->type
.ref
;
599 func_call
= FUNC_CALL(func_sym
->r
);
600 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
608 /* XXX: support PE? */
609 #warning "PE isn't tested at all"
610 #define FUNC_PROLOG_SIZE 12
612 #define FUNC_PROLOG_SIZE 11
615 static void push_arg_reg(int i
) {
617 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
620 /* generate function prolog of type 't' */
621 void gfunc_prolog(CType
*func_type
)
623 int i
, addr
, align
, size
, func_call
;
624 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
630 sym
= func_type
->ref
;
631 func_call
= FUNC_CALL(sym
->r
);
634 ind
+= FUNC_PROLOG_SIZE
;
635 func_sub_sp_offset
= ind
;
637 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
638 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
639 seen_reg_num
= seen_sse_num
= 0;
640 /* frame pointer and return address */
641 seen_stack_size
= PTR_SIZE
* 2;
642 /* count the number of seen parameters */
643 sym
= func_type
->ref
;
644 while ((sym
= sym
->next
) != NULL
) {
646 if (is_sse_float(type
->t
)) {
647 if (seen_sse_num
< 8) {
650 seen_stack_size
+= 8;
652 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
653 size
= type_size(type
, &align
);
654 size
= (size
+ 3) & ~3;
655 seen_stack_size
+= size
;
656 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
657 seen_stack_size
+= LDOUBLE_SIZE
;
659 if (seen_reg_num
< 6) {
662 seen_stack_size
+= 8;
668 /* movl $0x????????, -0x10(%rbp) */
670 gen_le32(seen_reg_num
* 8);
671 /* movl $0x????????, -0xc(%rbp) */
673 gen_le32(seen_sse_num
* 16 + 48);
674 /* movl $0x????????, -0x8(%rbp) */
676 gen_le32(seen_stack_size
);
678 /* save all register passing arguments */
679 for (i
= 0; i
< 8; i
++) {
681 o(0xd60f66); /* movq */
682 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
683 /* movq $0, loc+8(%rbp) */
688 for (i
= 0; i
< 6; i
++) {
693 sym
= func_type
->ref
;
698 /* if the function returns a structure, then add an
699 implicit pointer parameter */
701 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
702 push_arg_reg(reg_param_index
);
709 /* define parameters */
710 while ((sym
= sym
->next
) != NULL
) {
712 size
= type_size(type
, &align
);
713 size
= (size
+ 3) & ~3;
714 if (is_sse_float(type
->t
)) {
715 if (sse_param_index
< 8) {
716 /* save arguments passed by register */
718 o(0xd60f66); /* movq */
719 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
726 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
727 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
731 if (reg_param_index
< 6) {
732 /* save arguments passed by register */
733 push_arg_reg(reg_param_index
);
741 sym_push(sym
->v
& ~SYM_FIELD
, type
,
742 VT_LOCAL
| VT_LVAL
, param_addr
);
747 /* generate function epilog */
748 void gfunc_epilog(void)
753 if (func_ret_sub
== 0) {
758 g(func_ret_sub
>> 8);
760 /* align local size to word & save local variables */
761 v
= (-loc
+ 15) & -16;
763 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
766 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
767 oad(0xb8, v
); /* mov stacksize, %eax */
768 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
769 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
773 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
774 o(0xec8148); /* sub rsp, stacksize */
776 #if FUNC_PROLOG_SIZE == 12
777 o(0x90); /* adjust to FUNC_PROLOG_SIZE */
783 /* generate a jump to a label */
786 return psym(0xe9, t
);
789 /* generate a jump to a fixed address */
790 void gjmp_addr(int a
)
798 oad(0xe9, a
- ind
- 5);
802 /* generate a test. set 'inv' to invert test. Stack entry is popped */
803 int gtst(int inv
, int t
)
807 v
= vtop
->r
& VT_VALMASK
;
809 /* fast case : can jump directly since flags are set */
811 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
812 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
813 /* && or || optimization */
814 if ((v
& 1) == inv
) {
815 /* insert vtop->c jump list in t */
818 p
= (int *)(cur_text_section
->data
+ *p
);
826 /* XXX: not tested */
827 if (is_float(vtop
->type
.t
) ||
828 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
832 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
833 /* constant jmp optimization */
834 if ((vtop
->c
.i
!= 0) != inv
)
841 t
= psym(0x85 ^ inv
, t
);
848 /* generate an integer binary operation */
855 case TOK_ADDC1
: /* add with carry generation */
858 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
859 !is64_type(vtop
->type
.t
)) {
863 if (is64_type(vtop
->type
.t
)) {
864 o(0x48 | REX_BASE(r
));
869 /* XXX: generate inc and dec for smaller code ? */
871 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
875 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
882 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
883 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
884 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
886 o((opc
<< 3) | 0x01);
887 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
890 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
896 case TOK_SUBC1
: /* sub with carry generation */
899 case TOK_ADDC2
: /* add with carry use */
902 case TOK_SUBC2
: /* sub with carry use */
918 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
919 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
920 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
923 o(0xaf0f); /* imul fr, r */
924 o(0xc0 + fr
+ r
* 8);
935 opc
= 0xc0 | (opc
<< 3);
936 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
940 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
941 o(0x48 | REX_BASE(r
));
948 o(0xc1); /* shl/shr/sar $xxx, r */
952 /* we generate the shift in ecx */
955 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
956 o(0x48 | REX_BASE(r
));
958 o(0xd3); /* shl/shr/sar %cl, r */
969 /* first operand must be in eax */
970 /* XXX: need better constraint for second operand */
976 if (op
== TOK_UMULL
) {
977 o(0xf7); /* mul fr */
982 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
983 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
986 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
987 o(0x9948); /* cqto */
988 o(0x48 + REX_BASE(fr
));
992 o(0xf7); /* idiv fr, %eax */
995 if (op
== '%' || op
== TOK_UMOD
)
1008 void gen_opl(int op
)
1013 /* generate a floating point operation 'v = t1 op t2' instruction. The
1014 two operands are guaranted to have the same floating point type */
1015 /* XXX: need to use ST1 too */
1016 void gen_opf(int op
)
1018 int a
, ft
, fc
, swapped
, r
;
1020 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1022 /* convert constants to memory references */
1023 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1028 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1031 /* must put at least one value in the floating point register */
1032 if ((vtop
[-1].r
& VT_LVAL
) &&
1033 (vtop
[0].r
& VT_LVAL
)) {
1039 /* swap the stack if needed so that t1 is the register and t2 is
1040 the memory reference */
1041 if (vtop
[-1].r
& VT_LVAL
) {
1045 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1046 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1047 /* load on stack second operand */
1048 load(TREG_ST0
, vtop
);
1049 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1050 if (op
== TOK_GE
|| op
== TOK_GT
)
1052 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1055 o(0xc9d9); /* fxch %st(1) */
1056 o(0xe9da); /* fucompp */
1057 o(0xe0df); /* fnstsw %ax */
1059 o(0x45e480); /* and $0x45, %ah */
1060 o(0x40fC80); /* cmp $0x40, %ah */
1061 } else if (op
== TOK_NE
) {
1062 o(0x45e480); /* and $0x45, %ah */
1063 o(0x40f480); /* xor $0x40, %ah */
1065 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1066 o(0x05c4f6); /* test $0x05, %ah */
1069 o(0x45c4f6); /* test $0x45, %ah */
1076 /* no memory reference possible for long double operations */
1077 load(TREG_ST0
, vtop
);
1101 o(0xde); /* fxxxp %st, %st(1) */
1106 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1107 /* if saved lvalue, then we must reload it */
1110 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1112 r
= get_reg(RC_INT
);
1114 v1
.r
= VT_LOCAL
| VT_LVAL
;
1120 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1123 if (op
== TOK_LE
|| op
== TOK_LT
)
1125 if (op
== TOK_LE
|| op
== TOK_GE
) {
1126 op
= 0x93; /* setae */
1128 op
= 0x97; /* seta */
1133 o(0x7e0ff3); /* movq */
1134 gen_modrm(1, r
, vtop
->sym
, fc
);
1136 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1139 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1142 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1145 o(0x2e0f); /* ucomisd */
1146 gen_modrm(0, r
, vtop
->sym
, fc
);
1153 /* no memory reference possible for long double operations */
1154 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1155 load(TREG_XMM0
, vtop
);
1175 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1176 o(0xde); /* fxxxp %st, %st(1) */
1179 /* if saved lvalue, then we must reload it */
1181 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1183 r
= get_reg(RC_INT
);
1185 v1
.r
= VT_LOCAL
| VT_LVAL
;
1191 /* movq %xmm0,%xmm1 */
1194 load(TREG_XMM0
, vtop
);
1195 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1196 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1205 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1212 gen_modrm(0, r
, vtop
->sym
, fc
);
1220 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1221 and 'long long' cases. */
1222 void gen_cvt_itof(int t
)
1224 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1227 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1228 /* signed long long to float/double/long double (unsigned case
1229 is handled generically) */
1230 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1231 o(0x242cdf); /* fildll (%rsp) */
1232 o(0x08c48348); /* add $8, %rsp */
1233 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1234 (VT_INT
| VT_UNSIGNED
)) {
1235 /* unsigned int to float/double/long double */
1236 o(0x6a); /* push $0 */
1238 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1239 o(0x242cdf); /* fildll (%rsp) */
1240 o(0x10c48348); /* add $16, %rsp */
1242 /* int to float/double/long double */
1243 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1244 o(0x2404db); /* fildl (%rsp) */
1245 o(0x08c48348); /* add $8, %rsp */
1249 save_reg(TREG_XMM0
);
1251 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1252 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1253 (VT_INT
| VT_UNSIGNED
) ||
1254 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1258 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1259 vtop
->r
= TREG_XMM0
;
1263 /* convert from one floating point type to another */
1264 void gen_cvt_ftof(int t
)
1272 if (bt
== VT_FLOAT
) {
1274 if (tbt
== VT_DOUBLE
) {
1275 o(0xc0140f); /* unpcklps */
1276 o(0xc05a0f); /* cvtps2pd */
1277 } else if (tbt
== VT_LDOUBLE
) {
1278 /* movss %xmm0,-0x10(%rsp) */
1281 o(0xf02444d9); /* flds -0x10(%rsp) */
1284 } else if (bt
== VT_DOUBLE
) {
1286 if (tbt
== VT_FLOAT
) {
1287 o(0xc0140f66); /* unpcklpd */
1288 o(0xc05a0f66); /* cvtpd2ps */
1289 } else if (tbt
== VT_LDOUBLE
) {
1290 /* movsd %xmm0,-0x10(%rsp) */
1293 o(0xf02444dd); /* fldl -0x10(%rsp) */
1298 if (tbt
== VT_DOUBLE
) {
1299 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1300 /* movsd -0x10(%rsp),%xmm0 */
1303 vtop
->r
= TREG_XMM0
;
1304 } else if (tbt
== VT_FLOAT
) {
1305 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1306 /* movss -0x10(%rsp),%xmm0 */
1309 vtop
->r
= TREG_XMM0
;
1314 /* convert fp to int 't' type */
1315 void gen_cvt_ftoi(int t
)
1317 int ft
, bt
, size
, r
;
1320 if (bt
== VT_LDOUBLE
) {
1321 gen_cvt_ftof(VT_DOUBLE
);
1331 r
= get_reg(RC_INT
);
1332 if (bt
== VT_FLOAT
) {
1334 } else if (bt
== VT_DOUBLE
) {
1340 o(0x48 + REX_BASE(r
));
1342 o(0x2c0f); /* cvttss2si or cvttsd2si */
1343 o(0xc0 + (REG_VALUE(r
) << 3));
1347 /* computed goto support */
1354 /* end of x86-64 code generator */
1355 /*************************************************************/