2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* number of available registers */
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 const int reg_classes
[NB_REGS
] = {
64 /* eax */ RC_INT
| RC_RAX
,
65 /* ecx */ RC_INT
| RC_RCX
,
66 /* edx */ RC_INT
| RC_RDX
,
67 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
88 /******************************************************/
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_32
95 #define R_DATA_PTR R_X86_64_64
96 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
97 #define R_COPY R_X86_64_COPY
99 #define ELF_START_ADDR 0x08048000
100 #define ELF_PAGE_SIZE 0x1000
102 /******************************************************/
104 static unsigned long func_sub_sp_offset
;
105 static int func_ret_sub
;
107 /* XXX: make it faster ? */
112 if (ind1
> cur_text_section
->data_allocated
)
113 section_realloc(cur_text_section
, ind1
);
114 cur_text_section
->data
[ind
] = c
;
118 void o(unsigned int c
)
134 void gen_le64(int64_t c
)
146 /* output a symbol and patch all calls to it */
147 void gsym_addr(int t
, int a
)
151 ptr
= (int *)(cur_text_section
->data
+ t
);
152 n
= *ptr
; /* next value */
163 /* psym is used to put an instruction with a data field which is a
164 reference to a symbol. It is in fact the same as oad ! */
167 static int is64_type(int t
)
169 return ((t
& VT_BTYPE
) == VT_PTR
||
170 (t
& VT_BTYPE
) == VT_FUNC
||
171 (t
& VT_BTYPE
) == VT_LLONG
);
174 static int is_sse_float(int t
) {
177 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
180 /* instruction + 4 bytes data. Return the address of the data */
181 static int oad(int c
, int s
)
187 if (ind1
> cur_text_section
->data_allocated
)
188 section_realloc(cur_text_section
, ind1
);
189 *(int *)(cur_text_section
->data
+ ind
) = s
;
196 /* output constant with relocation if 'r & VT_SYM' is true */
197 static void gen_addr64(int r
, Sym
*sym
, int64_t c
)
200 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
205 /* output constant with relocation if 'r & VT_SYM' is true */
206 static void gen_addrpc32(int r
, Sym
*sym
, int c
)
209 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
213 /* output got address with relocation */
214 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
216 #ifndef TCC_TARGET_PE
219 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
220 sr
= cur_text_section
->reloc
;
221 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
224 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
225 cur_text_section
->data
[ind
-3],
226 cur_text_section
->data
[ind
-2],
227 cur_text_section
->data
[ind
-1]
229 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
234 /* we use add c, %xxx for displacement */
235 o(0x48 + REX_BASE(r
));
237 o(0xc0 + REG_VALUE(r
));
242 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
244 op_reg
= REG_VALUE(op_reg
) << 3;
245 if ((r
& VT_VALMASK
) == VT_CONST
) {
246 /* constant memory reference */
249 gen_gotpcrel(r
, sym
, c
);
251 gen_addrpc32(r
, sym
, c
);
253 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
254 /* currently, we use only ebp as base */
256 /* short reference */
260 oad(0x85 | op_reg
, c
);
262 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
264 g(0x80 | op_reg
| REG_VALUE(r
));
267 g(0x00 | op_reg
| REG_VALUE(r
));
270 g(0x00 | op_reg
| (r
& VT_VALMASK
));
274 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
276 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
278 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
281 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
283 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
286 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
287 if ((r
& VT_VALMASK
) != VT_CONST
&&
288 (r
& VT_VALMASK
) != VT_LOCAL
) {
289 rex
|= REX_BASE(VT_VALMASK
& r
);
293 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
294 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
298 /* load 'r' from value 'sv' */
299 void load(int r
, SValue
*sv
)
301 int v
, t
, ft
, fc
, fr
;
308 #ifndef TCC_TARGET_PE
309 /* we use indirect access via got */
310 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
311 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
312 /* use the result register as a temporal register */
313 int tr
= r
| TREG_MEM
;
315 /* we cannot use float registers as a temporal register */
316 tr
= get_reg(RC_INT
) | TREG_MEM
;
318 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
320 /* load from the temporal register */
327 if (v
== VT_LLOCAL
) {
329 v1
.r
= VT_LOCAL
| VT_LVAL
;
334 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
335 o(0x6e0f66); /* movd */
337 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
338 o(0x7e0ff3); /* movq */
340 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
343 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
344 o(0xbe0f); /* movsbl */
345 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
346 o(0xb60f); /* movzbl */
347 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
348 o(0xbf0f); /* movswl */
349 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
350 o(0xb70f); /* movzwl */
351 } else if (is64_type(ft
)) {
352 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
357 gen_modrm(r
, fr
, sv
->sym
, fc
);
363 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
364 gen_addrpc32(fr
, sv
->sym
, fc
);
366 if (sv
->sym
->type
.t
& VT_STATIC
) {
368 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
369 gen_addrpc32(fr
, sv
->sym
, fc
);
372 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
373 gen_gotpcrel(r
, sv
->sym
, fc
);
376 } else if (is64_type(ft
)) {
378 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
381 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
384 } else if (v
== VT_LOCAL
) {
385 o(0x48 | REX_BASE(r
));
386 o(0x8d); /* lea xxx(%ebp), r */
387 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
388 } else if (v
== VT_CMP
) {
389 oad(0xb8 + r
, 0); /* mov $0, r */
390 o(0x0f); /* setxx %br */
393 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
395 oad(0xb8 + r
, t
); /* mov $1, r */
396 o(0x05eb); /* jmp after */
398 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
400 if (r
== TREG_XMM0
) {
401 assert(v
== TREG_ST0
);
402 /* gen_cvt_ftof(VT_DOUBLE); */
403 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
404 /* movsd -0x10(%rsp),%xmm0 */
407 } else if (r
== TREG_ST0
) {
408 assert(v
== TREG_XMM0
);
409 /* gen_cvt_ftof(VT_LDOUBLE); */
410 /* movsd %xmm0,-0x10(%rsp) */
413 o(0xf02444dd); /* fldl -0x10(%rsp) */
415 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
417 o(0xc0 + r
+ v
* 8); /* mov v, r */
423 /* store register 'r' in lvalue 'v' */
424 void store(int r
, SValue
*v
)
428 /* store the REX prefix in this variable when PIC is enabled */
433 fr
= v
->r
& VT_VALMASK
;
436 #ifndef TCC_TARGET_PE
437 /* we need to access the variable via got */
438 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
439 /* mov xx(%rip), %r11 */
441 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
442 pic
= is64_type(bt
) ? 0x49 : 0x41;
446 /* XXX: incorrect if float reg to reg */
447 if (bt
== VT_FLOAT
) {
450 o(0x7e0f); /* movd */
452 } else if (bt
== VT_DOUBLE
) {
455 o(0xd60f); /* movq */
457 } else if (bt
== VT_LDOUBLE
) {
458 o(0xc0d9); /* fld %st(0) */
466 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
468 else if (is64_type(bt
))
474 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
479 if (fr
== VT_CONST
||
482 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
483 } else if (fr
!= r
) {
484 /* XXX: don't we really come here? */
486 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
489 if (fr
== VT_CONST
||
492 gen_modrm(r
, v
->r
, v
->sym
, fc
);
493 } else if (fr
!= r
) {
494 /* XXX: don't we really come here? */
496 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
501 static void gadd_sp(int val
)
503 if (val
== (char)val
) {
507 oad(0xc48148, val
); /* add $xxx, %rsp */
511 /* 'is_jmp' is '1' if it is a jump */
512 static void gcall_or_jmp(int is_jmp
)
515 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
517 if (vtop
->r
& VT_SYM
) {
518 /* relocation case */
519 greloc(cur_text_section
, vtop
->sym
,
520 ind
+ 1, R_X86_64_PC32
);
522 /* put an empty PC32 relocation */
523 put_elf_reloc(symtab_section
, cur_text_section
,
524 ind
+ 1, R_X86_64_PC32
, 0);
526 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
528 /* otherwise, indirect call */
532 o(0xff); /* call/jmp *r */
533 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
539 static const uint8_t arg_regs
[] = {
540 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
544 static const uint8_t arg_regs
[REGN
] = {
545 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
549 /* Generate function call. The function address is pushed first, then
550 all the parameters in call order. This functions pops all the
551 parameters and the function address. */
552 void gfunc_call(int nb_args
)
554 int size
, align
, r
, args_size
, i
;
558 int sse_reg
, gen_reg
;
560 /* calculate the number of integer/float arguments */
562 for(i
= 0; i
< nb_args
; i
++) {
563 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
564 args_size
+= type_size(&vtop
->type
, &align
);
565 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
567 #ifndef TCC_TARGET_PE
568 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
570 if (nb_sse_args
> 8) args_size
+= 8;
574 if (nb_reg_args
> REGN
) args_size
+= 8;
578 /* for struct arguments, we need to call memcpy and the function
579 call breaks register passing arguments we are preparing.
580 So, we process arguments which will be passed by stack first. */
582 gen_reg
= nb_reg_args
;
583 sse_reg
= nb_sse_args
;
586 save_regs(0); /* save used temporary registers */
589 /* adjust stack to align SSE boundary */
590 if (args_size
&= 8) {
591 o(0x50); /* push $rax */
593 for(i
= 0; i
< nb_args
; i
++) {
594 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
595 size
= type_size(&vtop
->type
, &align
);
596 /* align to stack align size */
597 size
= (size
+ 3) & ~3;
598 /* allocate the necessary size on stack */
600 oad(0xec81, size
); /* sub $xxx, %rsp */
601 /* generate structure store */
603 o(0x48 + REX_BASE(r
));
604 o(0x89); /* mov %rsp, r */
607 /* following code breaks vtop[1] */
608 SValue tmp
= vtop
[1];
609 vset(&vtop
->type
, r
| VT_LVAL
, 0);
615 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
618 oad(0xec8148, size
); /* sub $xxx, %rsp */
619 o(0x7cdb); /* fstpt 0(%rsp) */
623 } else if (is_sse_float(vtop
->type
.t
)) {
632 o(0x50); /* push $rax */
633 /* movq %xmm0, (%rsp) */
641 /* XXX: implicit cast ? */
644 o(0x50 + r
); /* push r */
652 /* then, we prepare register passing arguments.
653 Note that we cannot set RDX and RCX in this loop because gv()
654 may break these temporary registers. Let's use R10 and R11
656 gen_reg
= nb_reg_args
;
657 sse_reg
= nb_sse_args
;
658 for(i
= 0; i
< nb_args
; i
++) {
659 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
660 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
661 } else if (is_sse_float(vtop
->type
.t
)) {
666 gv(RC_FLOAT
); /* only one float register */
667 /* movaps %xmm0, %xmmN */
671 o(0xd60f66); /* movq %xmm0, (%rsp) */
672 o(0x2404 + (j
<< 3));
674 o(0x58 + d
); /* pop d */
683 /* XXX: implicit cast ? */
693 o(0xc0 + r
* 8 + d
- 8);
700 gv(RC_FLOAT
); /* only one float register */
701 /* movaps %xmm0, %xmmN */
703 o(0xc0 + (sse_reg
<< 3));
708 /* XXX: implicit cast ? */
713 o(0xc0 + r
* 8 + arg_regs
[j
]);
716 /* j=2: r10, j=3: r11 */
720 /* j=4: r8, j=5: r9 */
721 o(0xc0 + r
* 8 + j
- 4);
730 /* allocate scratch space */
734 save_regs(0); /* save used temporary registers */
736 /* Copy R10 and R11 into RDX and RCX, respectively */
737 if (nb_reg_args
> 2) {
738 o(0xd2894c); /* mov %r10, %rdx */
739 if (nb_reg_args
> 3) {
740 o(0xd9894c); /* mov %r11, %rcx */
744 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
752 #define FUNC_PROLOG_SIZE 11
754 static void push_arg_reg(int i
) {
756 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
759 /* generate function prolog of type 't' */
760 void gfunc_prolog(CType
*func_type
)
762 int i
, addr
, align
, size
;
763 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
769 sym
= func_type
->ref
;
772 ind
+= FUNC_PROLOG_SIZE
;
773 func_sub_sp_offset
= ind
;
775 #ifndef TCC_TARGET_PE
776 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
777 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
778 seen_reg_num
= seen_sse_num
= 0;
779 /* frame pointer and return address */
780 seen_stack_size
= PTR_SIZE
* 2;
781 /* count the number of seen parameters */
782 sym
= func_type
->ref
;
783 while ((sym
= sym
->next
) != NULL
) {
785 if (is_sse_float(type
->t
)) {
786 if (seen_sse_num
< 8) {
789 seen_stack_size
+= 8;
791 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
792 size
= type_size(type
, &align
);
793 size
= (size
+ 3) & ~3;
794 seen_stack_size
+= size
;
795 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
796 seen_stack_size
+= LDOUBLE_SIZE
;
798 if (seen_reg_num
< REGN
) {
801 seen_stack_size
+= 8;
807 /* movl $0x????????, -0x10(%rbp) */
809 gen_le32(seen_reg_num
* 8);
810 /* movl $0x????????, -0xc(%rbp) */
812 gen_le32(seen_sse_num
* 16 + 48);
813 /* movl $0x????????, -0x8(%rbp) */
815 gen_le32(seen_stack_size
);
817 /* save all register passing arguments */
818 for (i
= 0; i
< 8; i
++) {
820 o(0xd60f66); /* movq */
821 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
822 /* movq $0, loc+8(%rbp) */
827 for (i
= 0; i
< REGN
; i
++) {
828 push_arg_reg(REGN
-1-i
);
833 sym
= func_type
->ref
;
838 /* if the function returns a structure, then add an
839 implicit pointer parameter */
841 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
842 push_arg_reg(reg_param_index
);
849 /* define parameters */
850 while ((sym
= sym
->next
) != NULL
) {
852 size
= type_size(type
, &align
);
853 size
= (size
+ 3) & ~3;
854 #ifndef TCC_TARGET_PE
855 if (is_sse_float(type
->t
)) {
856 if (sse_param_index
< 8) {
857 /* save arguments passed by register */
859 o(0xd60f66); /* movq */
860 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
869 if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
870 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
875 if (reg_param_index
< REGN
) {
876 /* save arguments passed by register */
877 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
882 if (reg_param_index
< REGN
) {
883 /* save arguments passed by register */
884 push_arg_reg(reg_param_index
);
893 sym_push(sym
->v
& ~SYM_FIELD
, type
,
894 VT_LOCAL
| VT_LVAL
, param_addr
);
898 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
899 for (i
= reg_param_index
; i
< REGN
; ++i
) {
900 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, addr
);
907 /* generate function epilog */
908 void gfunc_epilog(void)
913 if (func_ret_sub
== 0) {
918 g(func_ret_sub
>> 8);
920 /* align local size to word & save local variables */
921 v
= (-loc
+ 15) & -16;
923 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
926 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
927 oad(0xb8, v
); /* mov stacksize, %eax */
928 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
929 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
930 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
934 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
935 o(0xec8148); /* sub rsp, stacksize */
941 /* generate a jump to a label */
944 return psym(0xe9, t
);
947 /* generate a jump to a fixed address */
948 void gjmp_addr(int a
)
956 oad(0xe9, a
- ind
- 5);
960 /* generate a test. set 'inv' to invert test. Stack entry is popped */
961 int gtst(int inv
, int t
)
965 v
= vtop
->r
& VT_VALMASK
;
967 /* fast case : can jump directly since flags are set */
969 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
970 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
971 /* && or || optimization */
972 if ((v
& 1) == inv
) {
973 /* insert vtop->c jump list in t */
976 p
= (int *)(cur_text_section
->data
+ *p
);
984 if (is_float(vtop
->type
.t
) ||
985 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
989 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
990 /* constant jmp optimization */
991 if ((vtop
->c
.i
!= 0) != inv
)
998 t
= psym(0x85 ^ inv
, t
);
1005 /* generate an integer binary operation */
1006 void gen_opi(int op
)
1012 case TOK_ADDC1
: /* add with carry generation */
1015 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
1016 !is64_type(vtop
->type
.t
)) {
1020 if (is64_type(vtop
->type
.t
)) {
1021 o(0x48 | REX_BASE(r
));
1026 /* XXX: generate inc and dec for smaller code ? */
1028 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1032 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1035 gv2(RC_INT
, RC_INT
);
1039 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1040 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1041 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
1043 o((opc
<< 3) | 0x01);
1044 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1047 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1053 case TOK_SUBC1
: /* sub with carry generation */
1056 case TOK_ADDC2
: /* add with carry use */
1059 case TOK_SUBC2
: /* sub with carry use */
1072 gv2(RC_INT
, RC_INT
);
1075 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1076 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1077 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
1080 o(0xaf0f); /* imul fr, r */
1081 o(0xc0 + fr
+ r
* 8);
1092 opc
= 0xc0 | (opc
<< 3);
1093 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1097 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1098 o(0x48 | REX_BASE(r
));
1105 o(0xc1); /* shl/shr/sar $xxx, r */
1109 /* we generate the shift in ecx */
1110 gv2(RC_INT
, RC_RCX
);
1112 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
1113 o(0x48 | REX_BASE(r
));
1115 o(0xd3); /* shl/shr/sar %cl, r */
1126 /* first operand must be in eax */
1127 /* XXX: need better constraint for second operand */
1128 gv2(RC_RAX
, RC_RCX
);
1133 if (op
== TOK_UMULL
) {
1134 o(0xf7); /* mul fr */
1136 vtop
->r2
= TREG_RDX
;
1139 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
1140 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
1143 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1144 o(0x9948); /* cqto */
1145 o(0x48 + REX_BASE(fr
));
1149 o(0xf7); /* idiv fr, %eax */
1152 if (op
== '%' || op
== TOK_UMOD
)
1165 void gen_opl(int op
)
1170 /* generate a floating point operation 'v = t1 op t2' instruction. The
1171 two operands are guaranted to have the same floating point type */
1172 /* XXX: need to use ST1 too */
1173 void gen_opf(int op
)
1175 int a
, ft
, fc
, swapped
, r
;
1177 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1179 /* convert constants to memory references */
1180 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1185 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1188 /* must put at least one value in the floating point register */
1189 if ((vtop
[-1].r
& VT_LVAL
) &&
1190 (vtop
[0].r
& VT_LVAL
)) {
1196 /* swap the stack if needed so that t1 is the register and t2 is
1197 the memory reference */
1198 if (vtop
[-1].r
& VT_LVAL
) {
1202 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1203 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1204 /* load on stack second operand */
1205 load(TREG_ST0
, vtop
);
1206 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1207 if (op
== TOK_GE
|| op
== TOK_GT
)
1209 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1212 o(0xc9d9); /* fxch %st(1) */
1213 o(0xe9da); /* fucompp */
1214 o(0xe0df); /* fnstsw %ax */
1216 o(0x45e480); /* and $0x45, %ah */
1217 o(0x40fC80); /* cmp $0x40, %ah */
1218 } else if (op
== TOK_NE
) {
1219 o(0x45e480); /* and $0x45, %ah */
1220 o(0x40f480); /* xor $0x40, %ah */
1222 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1223 o(0x05c4f6); /* test $0x05, %ah */
1226 o(0x45c4f6); /* test $0x45, %ah */
1233 /* no memory reference possible for long double operations */
1234 load(TREG_ST0
, vtop
);
1258 o(0xde); /* fxxxp %st, %st(1) */
1263 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1264 /* if saved lvalue, then we must reload it */
1267 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1269 r
= get_reg(RC_INT
);
1271 v1
.r
= VT_LOCAL
| VT_LVAL
;
1277 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1280 if (op
== TOK_LE
|| op
== TOK_LT
)
1282 if (op
== TOK_LE
|| op
== TOK_GE
) {
1283 op
= 0x93; /* setae */
1285 op
= 0x97; /* seta */
1290 o(0x7e0ff3); /* movq */
1291 gen_modrm(1, r
, vtop
->sym
, fc
);
1293 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1296 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1299 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1302 o(0x2e0f); /* ucomisd */
1303 gen_modrm(0, r
, vtop
->sym
, fc
);
1310 /* no memory reference possible for long double operations */
1311 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1312 load(TREG_XMM0
, vtop
);
1332 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1333 o(0xde); /* fxxxp %st, %st(1) */
1336 /* if saved lvalue, then we must reload it */
1338 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1340 r
= get_reg(RC_INT
);
1342 v1
.r
= VT_LOCAL
| VT_LVAL
;
1348 /* movq %xmm0,%xmm1 */
1351 load(TREG_XMM0
, vtop
);
1352 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1353 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1362 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1369 gen_modrm(0, r
, vtop
->sym
, fc
);
1377 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1378 and 'long long' cases. */
1379 void gen_cvt_itof(int t
)
1381 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1384 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1385 /* signed long long to float/double/long double (unsigned case
1386 is handled generically) */
1387 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1388 o(0x242cdf); /* fildll (%rsp) */
1389 o(0x08c48348); /* add $8, %rsp */
1390 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1391 (VT_INT
| VT_UNSIGNED
)) {
1392 /* unsigned int to float/double/long double */
1393 o(0x6a); /* push $0 */
1395 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1396 o(0x242cdf); /* fildll (%rsp) */
1397 o(0x10c48348); /* add $16, %rsp */
1399 /* int to float/double/long double */
1400 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1401 o(0x2404db); /* fildl (%rsp) */
1402 o(0x08c48348); /* add $8, %rsp */
1406 save_reg(TREG_XMM0
);
1408 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1409 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1410 (VT_INT
| VT_UNSIGNED
) ||
1411 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1415 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1416 vtop
->r
= TREG_XMM0
;
1420 /* convert from one floating point type to another */
1421 void gen_cvt_ftof(int t
)
1429 if (bt
== VT_FLOAT
) {
1431 if (tbt
== VT_DOUBLE
) {
1432 o(0xc0140f); /* unpcklps */
1433 o(0xc05a0f); /* cvtps2pd */
1434 } else if (tbt
== VT_LDOUBLE
) {
1435 /* movss %xmm0,-0x10(%rsp) */
1438 o(0xf02444d9); /* flds -0x10(%rsp) */
1441 } else if (bt
== VT_DOUBLE
) {
1443 if (tbt
== VT_FLOAT
) {
1444 o(0xc0140f66); /* unpcklpd */
1445 o(0xc05a0f66); /* cvtpd2ps */
1446 } else if (tbt
== VT_LDOUBLE
) {
1447 /* movsd %xmm0,-0x10(%rsp) */
1450 o(0xf02444dd); /* fldl -0x10(%rsp) */
1455 if (tbt
== VT_DOUBLE
) {
1456 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1457 /* movsd -0x10(%rsp),%xmm0 */
1460 vtop
->r
= TREG_XMM0
;
1461 } else if (tbt
== VT_FLOAT
) {
1462 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1463 /* movss -0x10(%rsp),%xmm0 */
1466 vtop
->r
= TREG_XMM0
;
1471 /* convert fp to int 't' type */
1472 void gen_cvt_ftoi(int t
)
1474 int ft
, bt
, size
, r
;
1477 if (bt
== VT_LDOUBLE
) {
1478 gen_cvt_ftof(VT_DOUBLE
);
1488 r
= get_reg(RC_INT
);
1489 if (bt
== VT_FLOAT
) {
1491 } else if (bt
== VT_DOUBLE
) {
1497 o(0x48 + REX_BASE(r
));
1499 o(0x2c0f); /* cvttss2si or cvttsd2si */
1500 o(0xc0 + (REG_VALUE(r
) << 3));
1504 /* computed goto support */
1511 /* end of x86-64 code generator */
1512 /*************************************************************/