2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* number of available registers */
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 const int reg_classes
[NB_REGS
] = {
64 /* eax */ RC_INT
| RC_RAX
,
65 /* ecx */ RC_INT
| RC_RCX
,
66 /* edx */ RC_INT
| RC_RDX
,
67 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
88 /******************************************************/
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_32
95 #define R_DATA_PTR R_X86_64_64
96 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
97 #define R_COPY R_X86_64_COPY
99 #define ELF_START_ADDR 0x08048000
100 #define ELF_PAGE_SIZE 0x1000
102 /******************************************************/
104 static unsigned long func_sub_sp_offset
;
105 static int func_ret_sub
;
107 /* XXX: make it faster ? */
112 if (ind1
> cur_text_section
->data_allocated
)
113 section_realloc(cur_text_section
, ind1
);
114 cur_text_section
->data
[ind
] = c
;
118 void o(unsigned int c
)
140 void gen_le64(int64_t c
)
152 /* output a symbol and patch all calls to it */
153 void gsym_addr(int t
, int a
)
157 ptr
= (int *)(cur_text_section
->data
+ t
);
158 n
= *ptr
; /* next value */
169 /* psym is used to put an instruction with a data field which is a
170 reference to a symbol. It is in fact the same as oad ! */
173 static int is64_type(int t
)
175 return ((t
& VT_BTYPE
) == VT_PTR
||
176 (t
& VT_BTYPE
) == VT_FUNC
||
177 (t
& VT_BTYPE
) == VT_LLONG
);
180 static int is_sse_float(int t
) {
183 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
186 /* instruction + 4 bytes data. Return the address of the data */
187 static int oad(int c
, int s
)
193 if (ind1
> cur_text_section
->data_allocated
)
194 section_realloc(cur_text_section
, ind1
);
195 *(int *)(cur_text_section
->data
+ ind
) = s
;
201 static void gen_addr32(int r
, Sym
*sym
, int c
)
204 greloc(cur_text_section
, sym
, ind
, R_X86_64_32
);
208 /* output constant with relocation if 'r & VT_SYM' is true */
209 static void gen_addr64(int r
, Sym
*sym
, int64_t c
)
212 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
216 /* output constant with relocation if 'r & VT_SYM' is true */
217 static void gen_addrpc32(int r
, Sym
*sym
, int c
)
220 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
224 /* output got address with relocation */
225 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
227 #ifndef TCC_TARGET_PE
230 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
231 sr
= cur_text_section
->reloc
;
232 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
235 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
236 cur_text_section
->data
[ind
-3],
237 cur_text_section
->data
[ind
-2],
238 cur_text_section
->data
[ind
-1]
240 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
245 /* we use add c, %xxx for displacement */
246 o(0x48 + REX_BASE(r
));
248 o(0xc0 + REG_VALUE(r
));
253 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
255 op_reg
= REG_VALUE(op_reg
) << 3;
256 if ((r
& VT_VALMASK
) == VT_CONST
) {
257 /* constant memory reference */
260 gen_gotpcrel(r
, sym
, c
);
262 gen_addrpc32(r
, sym
, c
);
264 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
265 /* currently, we use only ebp as base */
267 /* short reference */
271 oad(0x85 | op_reg
, c
);
273 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
275 g(0x80 | op_reg
| REG_VALUE(r
));
278 g(0x00 | op_reg
| REG_VALUE(r
));
281 g(0x00 | op_reg
| (r
& VT_VALMASK
));
285 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
287 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
289 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
292 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
294 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
297 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
298 if ((r
& VT_VALMASK
) != VT_CONST
&&
299 (r
& VT_VALMASK
) != VT_LOCAL
) {
300 rex
|= REX_BASE(VT_VALMASK
& r
);
304 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
305 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
309 /* load 'r' from value 'sv' */
310 void load(int r
, SValue
*sv
)
312 int v
, t
, ft
, fc
, fr
;
319 #ifndef TCC_TARGET_PE
320 /* we use indirect access via got */
321 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
322 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
323 /* use the result register as a temporal register */
324 int tr
= r
| TREG_MEM
;
326 /* we cannot use float registers as a temporal register */
327 tr
= get_reg(RC_INT
) | TREG_MEM
;
329 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
331 /* load from the temporal register */
338 if (v
== VT_LLOCAL
) {
340 v1
.r
= VT_LOCAL
| VT_LVAL
;
345 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
346 o(0x6e0f66); /* movd */
348 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
349 o(0x7e0ff3); /* movq */
351 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
354 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
355 o(0xbe0f); /* movsbl */
356 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
357 o(0xb60f); /* movzbl */
358 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
359 o(0xbf0f); /* movswl */
360 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
361 o(0xb70f); /* movzwl */
362 } else if (is64_type(ft
)) {
363 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
368 gen_modrm(r
, fr
, sv
->sym
, fc
);
374 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
375 gen_addrpc32(fr
, sv
->sym
, fc
);
377 if (sv
->sym
->type
.t
& VT_STATIC
) {
379 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
380 gen_addrpc32(fr
, sv
->sym
, fc
);
383 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
384 gen_gotpcrel(r
, sv
->sym
, fc
);
387 } else if (is64_type(ft
)) {
389 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
392 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
395 } else if (v
== VT_LOCAL
) {
396 o(0x48 | REX_BASE(r
));
397 o(0x8d); /* lea xxx(%ebp), r */
398 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
399 } else if (v
== VT_CMP
) {
400 oad(0xb8 + r
, 0); /* mov $0, r */
401 o(0x0f); /* setxx %br */
404 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
406 oad(0xb8 + r
, t
); /* mov $1, r */
407 o(0x05eb); /* jmp after */
409 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
411 if (r
== TREG_XMM0
) {
412 assert(v
== TREG_ST0
);
413 /* gen_cvt_ftof(VT_DOUBLE); */
414 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
415 /* movsd -0x10(%rsp),%xmm0 */
418 } else if (r
== TREG_ST0
) {
419 assert(v
== TREG_XMM0
);
420 /* gen_cvt_ftof(VT_LDOUBLE); */
421 /* movsd %xmm0,-0x10(%rsp) */
424 o(0xf02444dd); /* fldl -0x10(%rsp) */
426 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
428 o(0xc0 + r
+ v
* 8); /* mov v, r */
434 /* store register 'r' in lvalue 'v' */
435 void store(int r
, SValue
*v
)
439 /* store the REX prefix in this variable when PIC is enabled */
444 fr
= v
->r
& VT_VALMASK
;
447 #ifndef TCC_TARGET_PE
448 /* we need to access the variable via got */
449 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
450 /* mov xx(%rip), %r11 */
452 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
453 pic
= is64_type(bt
) ? 0x49 : 0x41;
457 /* XXX: incorrect if float reg to reg */
458 if (bt
== VT_FLOAT
) {
461 o(0x7e0f); /* movd */
463 } else if (bt
== VT_DOUBLE
) {
466 o(0xd60f); /* movq */
468 } else if (bt
== VT_LDOUBLE
) {
469 o(0xc0d9); /* fld %st(0) */
477 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
479 else if (is64_type(bt
))
485 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
490 if (fr
== VT_CONST
||
493 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
494 } else if (fr
!= r
) {
495 /* XXX: don't we really come here? */
497 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
500 if (fr
== VT_CONST
||
503 gen_modrm(r
, v
->r
, v
->sym
, fc
);
504 } else if (fr
!= r
) {
505 /* XXX: don't we really come here? */
507 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
512 static void gadd_sp(int val
)
514 if (val
== (char)val
) {
518 oad(0xc48148, val
); /* add $xxx, %rsp */
522 /* 'is_jmp' is '1' if it is a jump */
523 static void gcall_or_jmp(int is_jmp
)
526 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
528 if (vtop
->r
& VT_SYM
) {
529 /* relocation case */
530 greloc(cur_text_section
, vtop
->sym
,
531 ind
+ 1, R_X86_64_PC32
);
533 /* put an empty PC32 relocation */
534 put_elf_reloc(symtab_section
, cur_text_section
,
535 ind
+ 1, R_X86_64_PC32
, 0);
537 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
539 /* otherwise, indirect call */
543 o(0xff); /* call/jmp *r */
544 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
550 static const uint8_t arg_regs
[] = {
551 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
555 static const uint8_t arg_regs
[REGN
] = {
556 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
560 /* Generate function call. The function address is pushed first, then
561 all the parameters in call order. This functions pops all the
562 parameters and the function address. */
563 void gfunc_call(int nb_args
)
565 int size
, align
, r
, args_size
, i
;
569 int sse_reg
, gen_reg
;
571 /* calculate the number of integer/float arguments */
573 for(i
= 0; i
< nb_args
; i
++) {
574 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
575 args_size
+= type_size(&vtop
->type
, &align
);
576 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
578 #ifndef TCC_TARGET_PE
579 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
581 if (nb_sse_args
> 8) args_size
+= 8;
585 if (nb_reg_args
> REGN
) args_size
+= 8;
589 /* for struct arguments, we need to call memcpy and the function
590 call breaks register passing arguments we are preparing.
591 So, we process arguments which will be passed by stack first. */
593 gen_reg
= nb_reg_args
;
594 sse_reg
= nb_sse_args
;
597 save_regs(0); /* save used temporary registers */
600 /* adjust stack to align SSE boundary */
601 if (args_size
&= 8) {
602 o(0x50); /* push $rax */
604 for(i
= 0; i
< nb_args
; i
++) {
605 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
606 size
= type_size(&vtop
->type
, &align
);
607 /* align to stack align size */
608 size
= (size
+ 3) & ~3;
609 /* allocate the necessary size on stack */
611 oad(0xec81, size
); /* sub $xxx, %rsp */
612 /* generate structure store */
614 o(0x48 + REX_BASE(r
));
615 o(0x89); /* mov %rsp, r */
618 /* following code breaks vtop[1] */
619 SValue tmp
= vtop
[1];
620 vset(&vtop
->type
, r
| VT_LVAL
, 0);
626 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
629 oad(0xec8148, size
); /* sub $xxx, %rsp */
630 o(0x7cdb); /* fstpt 0(%rsp) */
634 } else if (is_sse_float(vtop
->type
.t
)) {
643 o(0x50); /* push $rax */
644 /* movq %xmm0, (%rsp) */
652 /* XXX: implicit cast ? */
655 o(0x50 + r
); /* push r */
663 /* then, we prepare register passing arguments.
664 Note that we cannot set RDX and RCX in this loop because gv()
665 may break these temporary registers. Let's use R10 and R11
667 gen_reg
= nb_reg_args
;
668 sse_reg
= nb_sse_args
;
669 for(i
= 0; i
< nb_args
; i
++) {
670 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
671 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
672 } else if (is_sse_float(vtop
->type
.t
)) {
677 gv(RC_FLOAT
); /* only one float register */
678 /* movaps %xmm0, %xmmN */
682 o(0xd60f66); /* movq %xmm0, (%rsp) */
683 o(0x2404 + (j
<< 3));
685 o(0x58 + d
); /* pop d */
694 /* XXX: implicit cast ? */
704 o(0xc0 + r
* 8 + d
- 8);
711 gv(RC_FLOAT
); /* only one float register */
712 /* movaps %xmm0, %xmmN */
714 o(0xc0 + (sse_reg
<< 3));
719 /* XXX: implicit cast ? */
724 o(0xc0 + r
* 8 + arg_regs
[j
]);
727 /* j=2: r10, j=3: r11 */
731 /* j=4: r8, j=5: r9 */
732 o(0xc0 + r
* 8 + j
- 4);
741 /* allocate scratch space */
745 save_regs(0); /* save used temporary registers */
747 /* Copy R10 and R11 into RDX and RCX, respectively */
748 if (nb_reg_args
> 2) {
749 o(0xd2894c); /* mov %r10, %rdx */
750 if (nb_reg_args
> 3) {
751 o(0xd9894c); /* mov %r11, %rcx */
755 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
763 #define FUNC_PROLOG_SIZE 11
765 static void push_arg_reg(int i
) {
767 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
770 /* generate function prolog of type 't' */
771 void gfunc_prolog(CType
*func_type
)
773 int i
, addr
, align
, size
;
774 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
780 sym
= func_type
->ref
;
783 ind
+= FUNC_PROLOG_SIZE
;
784 func_sub_sp_offset
= ind
;
786 #ifndef TCC_TARGET_PE
787 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
788 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
789 seen_reg_num
= seen_sse_num
= 0;
790 /* frame pointer and return address */
791 seen_stack_size
= PTR_SIZE
* 2;
792 /* count the number of seen parameters */
793 sym
= func_type
->ref
;
794 while ((sym
= sym
->next
) != NULL
) {
796 if (is_sse_float(type
->t
)) {
797 if (seen_sse_num
< 8) {
800 seen_stack_size
+= 8;
802 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
803 size
= type_size(type
, &align
);
804 size
= (size
+ 3) & ~3;
805 seen_stack_size
+= size
;
806 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
807 seen_stack_size
+= LDOUBLE_SIZE
;
809 if (seen_reg_num
< REGN
) {
812 seen_stack_size
+= 8;
818 /* movl $0x????????, -0x10(%rbp) */
820 gen_le32(seen_reg_num
* 8);
821 /* movl $0x????????, -0xc(%rbp) */
823 gen_le32(seen_sse_num
* 16 + 48);
824 /* movl $0x????????, -0x8(%rbp) */
826 gen_le32(seen_stack_size
);
828 /* save all register passing arguments */
829 for (i
= 0; i
< 8; i
++) {
831 o(0xd60f66); /* movq */
832 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
833 /* movq $0, loc+8(%rbp) */
838 for (i
= 0; i
< REGN
; i
++) {
839 push_arg_reg(REGN
-1-i
);
844 sym
= func_type
->ref
;
849 /* if the function returns a structure, then add an
850 implicit pointer parameter */
852 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
853 push_arg_reg(reg_param_index
);
860 /* define parameters */
861 while ((sym
= sym
->next
) != NULL
) {
863 size
= type_size(type
, &align
);
864 size
= (size
+ 3) & ~3;
865 #ifndef TCC_TARGET_PE
866 if (is_sse_float(type
->t
)) {
867 if (sse_param_index
< 8) {
868 /* save arguments passed by register */
870 o(0xd60f66); /* movq */
871 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
880 if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
881 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
886 if (reg_param_index
< REGN
) {
887 /* save arguments passed by register */
888 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
893 if (reg_param_index
< REGN
) {
894 /* save arguments passed by register */
895 push_arg_reg(reg_param_index
);
904 sym_push(sym
->v
& ~SYM_FIELD
, type
,
905 VT_LOCAL
| VT_LVAL
, param_addr
);
909 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
910 for (i
= reg_param_index
; i
< REGN
; ++i
) {
911 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, addr
);
918 /* generate function epilog */
919 void gfunc_epilog(void)
924 if (func_ret_sub
== 0) {
929 g(func_ret_sub
>> 8);
931 /* align local size to word & save local variables */
932 v
= (-loc
+ 15) & -16;
934 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
937 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
938 oad(0xb8, v
); /* mov stacksize, %eax */
939 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
940 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
941 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
945 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
946 o(0xec8148); /* sub rsp, stacksize */
952 /* generate a jump to a label */
955 return psym(0xe9, t
);
958 /* generate a jump to a fixed address */
959 void gjmp_addr(int a
)
967 oad(0xe9, a
- ind
- 5);
971 /* generate a test. set 'inv' to invert test. Stack entry is popped */
972 int gtst(int inv
, int t
)
976 v
= vtop
->r
& VT_VALMASK
;
978 /* fast case : can jump directly since flags are set */
980 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
981 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
982 /* && or || optimization */
983 if ((v
& 1) == inv
) {
984 /* insert vtop->c jump list in t */
987 p
= (int *)(cur_text_section
->data
+ *p
);
995 if (is_float(vtop
->type
.t
) ||
996 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1000 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1001 /* constant jmp optimization */
1002 if ((vtop
->c
.i
!= 0) != inv
)
1009 t
= psym(0x85 ^ inv
, t
);
1016 /* generate an integer binary operation */
1017 void gen_opi(int op
)
1023 case TOK_ADDC1
: /* add with carry generation */
1026 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
1027 !is64_type(vtop
->type
.t
)) {
1031 if (is64_type(vtop
->type
.t
)) {
1032 o(0x48 | REX_BASE(r
));
1037 /* XXX: generate inc and dec for smaller code ? */
1039 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1043 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1046 gv2(RC_INT
, RC_INT
);
1050 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1051 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1052 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
1054 o((opc
<< 3) | 0x01);
1055 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1058 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1064 case TOK_SUBC1
: /* sub with carry generation */
1067 case TOK_ADDC2
: /* add with carry use */
1070 case TOK_SUBC2
: /* sub with carry use */
1083 gv2(RC_INT
, RC_INT
);
1086 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1087 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1088 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
1091 o(0xaf0f); /* imul fr, r */
1092 o(0xc0 + fr
+ r
* 8);
1103 opc
= 0xc0 | (opc
<< 3);
1104 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1108 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1109 o(0x48 | REX_BASE(r
));
1116 o(0xc1); /* shl/shr/sar $xxx, r */
1120 /* we generate the shift in ecx */
1121 gv2(RC_INT
, RC_RCX
);
1123 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
1124 o(0x48 | REX_BASE(r
));
1126 o(0xd3); /* shl/shr/sar %cl, r */
1137 /* first operand must be in eax */
1138 /* XXX: need better constraint for second operand */
1139 gv2(RC_RAX
, RC_RCX
);
1144 if (op
== TOK_UMULL
) {
1145 o(0xf7); /* mul fr */
1147 vtop
->r2
= TREG_RDX
;
1150 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
1151 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1152 o(0xd23148); /* xor %rdx, %rdx */
1153 o(0x48 + REX_BASE(fr
));
1155 o(0xd231); /* xor %edx, %edx */
1157 o(0xf7); /* div fr, %eax */
1160 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1161 o(0x9948); /* cqto */
1162 o(0x48 + REX_BASE(fr
));
1166 o(0xf7); /* idiv fr, %eax */
1169 if (op
== '%' || op
== TOK_UMOD
)
1182 void gen_opl(int op
)
1187 /* generate a floating point operation 'v = t1 op t2' instruction. The
1188 two operands are guaranted to have the same floating point type */
1189 /* XXX: need to use ST1 too */
1190 void gen_opf(int op
)
1192 int a
, ft
, fc
, swapped
, r
;
1194 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1196 /* convert constants to memory references */
1197 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1202 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1205 /* must put at least one value in the floating point register */
1206 if ((vtop
[-1].r
& VT_LVAL
) &&
1207 (vtop
[0].r
& VT_LVAL
)) {
1213 /* swap the stack if needed so that t1 is the register and t2 is
1214 the memory reference */
1215 if (vtop
[-1].r
& VT_LVAL
) {
1219 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1220 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1221 /* load on stack second operand */
1222 load(TREG_ST0
, vtop
);
1223 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1224 if (op
== TOK_GE
|| op
== TOK_GT
)
1226 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1229 o(0xc9d9); /* fxch %st(1) */
1230 o(0xe9da); /* fucompp */
1231 o(0xe0df); /* fnstsw %ax */
1233 o(0x45e480); /* and $0x45, %ah */
1234 o(0x40fC80); /* cmp $0x40, %ah */
1235 } else if (op
== TOK_NE
) {
1236 o(0x45e480); /* and $0x45, %ah */
1237 o(0x40f480); /* xor $0x40, %ah */
1239 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1240 o(0x05c4f6); /* test $0x05, %ah */
1243 o(0x45c4f6); /* test $0x45, %ah */
1250 /* no memory reference possible for long double operations */
1251 load(TREG_ST0
, vtop
);
1275 o(0xde); /* fxxxp %st, %st(1) */
1280 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1281 /* if saved lvalue, then we must reload it */
1284 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1286 r
= get_reg(RC_INT
);
1288 v1
.r
= VT_LOCAL
| VT_LVAL
;
1294 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1297 if (op
== TOK_LE
|| op
== TOK_LT
)
1299 if (op
== TOK_LE
|| op
== TOK_GE
) {
1300 op
= 0x93; /* setae */
1302 op
= 0x97; /* seta */
1307 o(0x7e0ff3); /* movq */
1308 gen_modrm(1, r
, vtop
->sym
, fc
);
1310 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1313 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1316 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1319 o(0x2e0f); /* ucomisd */
1320 gen_modrm(0, r
, vtop
->sym
, fc
);
1327 /* no memory reference possible for long double operations */
1328 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1329 load(TREG_XMM0
, vtop
);
1349 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1350 o(0xde); /* fxxxp %st, %st(1) */
1353 /* if saved lvalue, then we must reload it */
1355 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1357 r
= get_reg(RC_INT
);
1359 v1
.r
= VT_LOCAL
| VT_LVAL
;
1365 /* movq %xmm0,%xmm1 */
1368 load(TREG_XMM0
, vtop
);
1369 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1370 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1379 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1386 gen_modrm(0, r
, vtop
->sym
, fc
);
1394 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1395 and 'long long' cases. */
1396 void gen_cvt_itof(int t
)
1398 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1401 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1402 /* signed long long to float/double/long double (unsigned case
1403 is handled generically) */
1404 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1405 o(0x242cdf); /* fildll (%rsp) */
1406 o(0x08c48348); /* add $8, %rsp */
1407 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1408 (VT_INT
| VT_UNSIGNED
)) {
1409 /* unsigned int to float/double/long double */
1410 o(0x6a); /* push $0 */
1412 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1413 o(0x242cdf); /* fildll (%rsp) */
1414 o(0x10c48348); /* add $16, %rsp */
1416 /* int to float/double/long double */
1417 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1418 o(0x2404db); /* fildl (%rsp) */
1419 o(0x08c48348); /* add $8, %rsp */
1423 save_reg(TREG_XMM0
);
1425 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1426 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1427 (VT_INT
| VT_UNSIGNED
) ||
1428 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1432 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1433 vtop
->r
= TREG_XMM0
;
1437 /* convert from one floating point type to another */
1438 void gen_cvt_ftof(int t
)
1446 if (bt
== VT_FLOAT
) {
1448 if (tbt
== VT_DOUBLE
) {
1449 o(0xc0140f); /* unpcklps */
1450 o(0xc05a0f); /* cvtps2pd */
1451 } else if (tbt
== VT_LDOUBLE
) {
1452 /* movss %xmm0,-0x10(%rsp) */
1455 o(0xf02444d9); /* flds -0x10(%rsp) */
1458 } else if (bt
== VT_DOUBLE
) {
1460 if (tbt
== VT_FLOAT
) {
1461 o(0xc0140f66); /* unpcklpd */
1462 o(0xc05a0f66); /* cvtpd2ps */
1463 } else if (tbt
== VT_LDOUBLE
) {
1464 /* movsd %xmm0,-0x10(%rsp) */
1467 o(0xf02444dd); /* fldl -0x10(%rsp) */
1472 if (tbt
== VT_DOUBLE
) {
1473 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1474 /* movsd -0x10(%rsp),%xmm0 */
1477 vtop
->r
= TREG_XMM0
;
1478 } else if (tbt
== VT_FLOAT
) {
1479 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1480 /* movss -0x10(%rsp),%xmm0 */
1483 vtop
->r
= TREG_XMM0
;
1488 /* convert fp to int 't' type */
1489 void gen_cvt_ftoi(int t
)
1491 int ft
, bt
, size
, r
;
1494 if (bt
== VT_LDOUBLE
) {
1495 gen_cvt_ftof(VT_DOUBLE
);
1505 r
= get_reg(RC_INT
);
1506 if (bt
== VT_FLOAT
) {
1508 } else if (bt
== VT_DOUBLE
) {
1514 o(0x48 + REX_BASE(r
));
1516 o(0x2c0f); /* cvttss2si or cvttsd2si */
1517 o(0xc0 + (REG_VALUE(r
) << 3));
1521 /* computed goto support */
1528 /* end of x86-64 code generator */
1529 /*************************************************************/