2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* number of available registers */
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 int reg_classes
[NB_REGS
] = {
64 /* eax */ RC_INT
| RC_RAX
,
65 /* ecx */ RC_INT
| RC_RCX
,
66 /* edx */ RC_INT
| RC_RDX
,
67 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
88 /******************************************************/
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_64
95 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
96 #define R_COPY R_X86_64_COPY
98 #define ELF_START_ADDR 0x08048000
99 #define ELF_PAGE_SIZE 0x1000
101 /******************************************************/
103 static unsigned long func_sub_sp_offset
;
104 static int func_ret_sub
;
106 /* XXX: make it faster ? */
111 if (ind1
> cur_text_section
->data_allocated
)
112 section_realloc(cur_text_section
, ind1
);
113 cur_text_section
->data
[ind
] = c
;
117 void o(unsigned int c
)
133 void gen_le64(int64_t c
)
145 /* output a symbol and patch all calls to it */
146 void gsym_addr(int t
, int a
)
150 ptr
= (int *)(cur_text_section
->data
+ t
);
151 n
= *ptr
; /* next value */
162 /* psym is used to put an instruction with a data field which is a
163 reference to a symbol. It is in fact the same as oad ! */
166 static int is64_type(int t
)
168 return ((t
& VT_BTYPE
) == VT_PTR
||
169 (t
& VT_BTYPE
) == VT_FUNC
||
170 (t
& VT_BTYPE
) == VT_LLONG
);
173 static int is_sse_float(int t
) {
176 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
179 /* instruction + 4 bytes data. Return the address of the data */
180 static int oad(int c
, int s
)
186 if (ind1
> cur_text_section
->data_allocated
)
187 section_realloc(cur_text_section
, ind1
);
188 *(int *)(cur_text_section
->data
+ ind
) = s
;
194 /* output constant with relocation if 'r & VT_SYM' is true */
195 static void gen_addr64(int r
, Sym
*sym
, int64_t c
)
198 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
202 /* output constant with relocation if 'r & VT_SYM' is true */
203 static void gen_addrpc32(int r
, Sym
*sym
, int c
)
206 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
210 /* output got address with relocation */
211 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
213 #ifndef TCC_TARGET_PE
216 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
217 sr
= cur_text_section
->reloc
;
218 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
221 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
222 cur_text_section
->data
[ind
-3],
223 cur_text_section
->data
[ind
-2],
224 cur_text_section
->data
[ind
-1]
226 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
231 /* we use add c, %xxx for displacement */
232 o(0x48 + REX_BASE(r
));
234 o(0xc0 + REG_VALUE(r
));
239 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
241 op_reg
= REG_VALUE(op_reg
) << 3;
242 if ((r
& VT_VALMASK
) == VT_CONST
) {
243 /* constant memory reference */
246 gen_gotpcrel(r
, sym
, c
);
248 gen_addrpc32(r
, sym
, c
);
250 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
251 /* currently, we use only ebp as base */
253 /* short reference */
257 oad(0x85 | op_reg
, c
);
259 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
261 g(0x80 | op_reg
| REG_VALUE(r
));
264 g(0x00 | op_reg
| REG_VALUE(r
));
267 g(0x00 | op_reg
| (r
& VT_VALMASK
));
271 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
273 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
275 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
278 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
280 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
283 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
284 if ((r
& VT_VALMASK
) != VT_CONST
&&
285 (r
& VT_VALMASK
) != VT_LOCAL
) {
286 rex
|= REX_BASE(VT_VALMASK
& r
);
290 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
291 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
295 /* load 'r' from value 'sv' */
296 void load(int r
, SValue
*sv
)
298 int v
, t
, ft
, fc
, fr
;
305 #ifndef TCC_TARGET_PE
306 /* we use indirect access via got */
307 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
308 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
309 /* use the result register as a temporal register */
310 int tr
= r
| TREG_MEM
;
312 /* we cannot use float registers as a temporal register */
313 tr
= get_reg(RC_INT
) | TREG_MEM
;
315 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
317 /* load from the temporal register */
324 if (v
== VT_LLOCAL
) {
326 v1
.r
= VT_LOCAL
| VT_LVAL
;
331 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
332 o(0x6e0f66); /* movd */
334 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
335 o(0x7e0ff3); /* movq */
337 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
340 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
341 o(0xbe0f); /* movsbl */
342 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
343 o(0xb60f); /* movzbl */
344 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
345 o(0xbf0f); /* movswl */
346 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
347 o(0xb70f); /* movzwl */
348 } else if (is64_type(ft
)) {
349 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
354 gen_modrm(r
, fr
, sv
->sym
, fc
);
357 if ((ft
& VT_BTYPE
) == VT_LLONG
) {
358 assert(!(fr
& VT_SYM
));
360 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
361 gen_addr64(fr
, sv
->sym
, sv
->c
.ull
);
364 #ifndef TCC_TARGET_PE
365 if (sv
->sym
->type
.t
& VT_STATIC
) {
368 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
369 gen_addrpc32(fr
, sv
->sym
, fc
);
370 #ifndef TCC_TARGET_PE
373 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
374 gen_gotpcrel(r
, sv
->sym
, fc
);
378 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
382 } else if (v
== VT_LOCAL
) {
383 o(0x48 | REX_BASE(r
));
384 o(0x8d); /* lea xxx(%ebp), r */
385 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
386 } else if (v
== VT_CMP
) {
387 oad(0xb8 + r
, 0); /* mov $0, r */
388 o(0x0f); /* setxx %br */
391 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
393 oad(0xb8 + r
, t
); /* mov $1, r */
394 o(0x05eb); /* jmp after */
396 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
398 if (r
== TREG_XMM0
) {
399 assert(v
== TREG_ST0
);
400 /* gen_cvt_ftof(VT_DOUBLE); */
401 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
402 /* movsd -0x10(%rsp),%xmm0 */
405 } else if (r
== TREG_ST0
) {
406 assert(v
== TREG_XMM0
);
407 /* gen_cvt_ftof(VT_LDOUBLE); */
408 /* movsd %xmm0,-0x10(%rsp) */
411 o(0xf02444dd); /* fldl -0x10(%rsp) */
413 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
415 o(0xc0 + r
+ v
* 8); /* mov v, r */
421 /* store register 'r' in lvalue 'v' */
422 void store(int r
, SValue
*v
)
426 /* store the REX prefix in this variable when PIC is enabled */
431 fr
= v
->r
& VT_VALMASK
;
434 #ifndef TCC_TARGET_PE
435 /* we need to access the variable via got */
436 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
437 /* mov xx(%rip), %r11 */
439 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
440 pic
= is64_type(bt
) ? 0x49 : 0x41;
444 /* XXX: incorrect if float reg to reg */
445 if (bt
== VT_FLOAT
) {
448 o(0x7e0f); /* movd */
450 } else if (bt
== VT_DOUBLE
) {
453 o(0xd60f); /* movq */
455 } else if (bt
== VT_LDOUBLE
) {
456 o(0xc0d9); /* fld %st(0) */
464 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
466 else if (is64_type(bt
))
472 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
477 if (fr
== VT_CONST
||
480 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
481 } else if (fr
!= r
) {
482 /* XXX: don't we really come here? */
484 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
487 if (fr
== VT_CONST
||
490 gen_modrm(r
, v
->r
, v
->sym
, fc
);
491 } else if (fr
!= r
) {
492 /* XXX: don't we really come here? */
494 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
499 static void gadd_sp(int val
)
501 if (val
== (char)val
) {
505 oad(0xc48148, val
); /* add $xxx, %rsp */
509 /* 'is_jmp' is '1' if it is a jump */
510 static void gcall_or_jmp(int is_jmp
)
513 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
515 if (vtop
->r
& VT_SYM
) {
516 /* relocation case */
517 greloc(cur_text_section
, vtop
->sym
,
518 ind
+ 1, R_X86_64_PC32
);
520 /* put an empty PC32 relocation */
521 put_elf_reloc(symtab_section
, cur_text_section
,
522 ind
+ 1, R_X86_64_PC32
, 0);
524 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
526 /* otherwise, indirect call */
530 o(0xff); /* call/jmp *r */
531 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
537 static uint8_t arg_regs
[] = {
538 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
542 static uint8_t arg_regs
[REGN
] = {
543 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
547 /* Generate function call. The function address is pushed first, then
548 all the parameters in call order. This functions pops all the
549 parameters and the function address. */
550 void gfunc_call(int nb_args
)
552 int size
, align
, r
, args_size
, i
;
556 int sse_reg
, gen_reg
;
558 /* calculate the number of integer/float arguments */
560 for(i
= 0; i
< nb_args
; i
++) {
561 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
562 args_size
+= type_size(&vtop
->type
, &align
);
563 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
565 #ifndef TCC_TARGET_PE
566 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
568 if (nb_sse_args
> 8) args_size
+= 8;
572 if (nb_reg_args
> REGN
) args_size
+= 8;
576 /* for struct arguments, we need to call memcpy and the function
577 call breaks register passing arguments we are preparing.
578 So, we process arguments which will be passed by stack first. */
580 gen_reg
= nb_reg_args
;
581 sse_reg
= nb_sse_args
;
584 save_regs(0); /* save used temporary registers */
587 /* adjust stack to align SSE boundary */
588 if (args_size
&= 8) {
589 o(0x50); /* push $rax */
591 for(i
= 0; i
< nb_args
; i
++) {
592 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
593 size
= type_size(&vtop
->type
, &align
);
594 /* align to stack align size */
595 size
= (size
+ 3) & ~3;
596 /* allocate the necessary size on stack */
598 oad(0xec81, size
); /* sub $xxx, %rsp */
599 /* generate structure store */
601 o(0x48 + REX_BASE(r
));
602 o(0x89); /* mov %rsp, r */
605 /* following code breaks vtop[1] */
606 SValue tmp
= vtop
[1];
607 vset(&vtop
->type
, r
| VT_LVAL
, 0);
613 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
616 oad(0xec8148, size
); /* sub $xxx, %rsp */
617 o(0x7cdb); /* fstpt 0(%rsp) */
621 } else if (is_sse_float(vtop
->type
.t
)) {
630 o(0x50); /* push $rax */
631 /* movq %xmm0, (%rsp) */
639 /* XXX: implicit cast ? */
642 o(0x50 + r
); /* push r */
650 /* then, we prepare register passing arguments.
651 Note that we cannot set RDX and RCX in this loop because gv()
652 may break these temporary registers. Let's use R10 and R11
654 gen_reg
= nb_reg_args
;
655 sse_reg
= nb_sse_args
;
656 for(i
= 0; i
< nb_args
; i
++) {
657 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
658 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
659 } else if (is_sse_float(vtop
->type
.t
)) {
664 gv(RC_FLOAT
); /* only one float register */
665 /* movaps %xmm0, %xmmN */
669 o(0xd60f66); /* movq %xmm0, (%rsp) */
670 o(0x2404 + (j
<< 3));
672 o(0x58 + d
); /* pop d */
681 /* XXX: implicit cast ? */
691 o(0xc0 + r
* 8 + d
- 8);
698 gv(RC_FLOAT
); /* only one float register */
699 /* movaps %xmm0, %xmmN */
701 o(0xc0 + (sse_reg
<< 3));
706 /* XXX: implicit cast ? */
711 o(0xc0 + r
* 8 + arg_regs
[j
]);
714 /* j=2: r10, j=3: r11 */
718 /* j=4: r8, j=5: r9 */
719 o(0xc0 + r
* 8 + j
- 4);
728 /* allocate scratch space */
732 save_regs(0); /* save used temporary registers */
734 /* Copy R10 and R11 into RDX and RCX, respectively */
735 if (nb_reg_args
> 2) {
736 o(0xd2894c); /* mov %r10, %rdx */
737 if (nb_reg_args
> 3) {
738 o(0xd9894c); /* mov %r11, %rcx */
742 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
750 #define FUNC_PROLOG_SIZE 11
752 static void push_arg_reg(int i
) {
754 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
757 /* generate function prolog of type 't' */
758 void gfunc_prolog(CType
*func_type
)
760 int i
, addr
, align
, size
;
761 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
767 sym
= func_type
->ref
;
770 ind
+= FUNC_PROLOG_SIZE
;
771 func_sub_sp_offset
= ind
;
773 #ifndef TCC_TARGET_PE
774 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
775 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
776 seen_reg_num
= seen_sse_num
= 0;
777 /* frame pointer and return address */
778 seen_stack_size
= PTR_SIZE
* 2;
779 /* count the number of seen parameters */
780 sym
= func_type
->ref
;
781 while ((sym
= sym
->next
) != NULL
) {
783 if (is_sse_float(type
->t
)) {
784 if (seen_sse_num
< 8) {
787 seen_stack_size
+= 8;
789 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
790 size
= type_size(type
, &align
);
791 size
= (size
+ 3) & ~3;
792 seen_stack_size
+= size
;
793 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
794 seen_stack_size
+= LDOUBLE_SIZE
;
796 if (seen_reg_num
< REGN
) {
799 seen_stack_size
+= 8;
805 /* movl $0x????????, -0x10(%rbp) */
807 gen_le32(seen_reg_num
* 8);
808 /* movl $0x????????, -0xc(%rbp) */
810 gen_le32(seen_sse_num
* 16 + 48);
811 /* movl $0x????????, -0x8(%rbp) */
813 gen_le32(seen_stack_size
);
815 /* save all register passing arguments */
816 for (i
= 0; i
< 8; i
++) {
818 o(0xd60f66); /* movq */
819 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
820 /* movq $0, loc+8(%rbp) */
825 for (i
= 0; i
< REGN
; i
++) {
826 push_arg_reg(REGN
-1-i
);
831 sym
= func_type
->ref
;
836 /* if the function returns a structure, then add an
837 implicit pointer parameter */
839 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
840 push_arg_reg(reg_param_index
);
847 /* define parameters */
848 while ((sym
= sym
->next
) != NULL
) {
850 size
= type_size(type
, &align
);
851 size
= (size
+ 3) & ~3;
852 #ifndef TCC_TARGET_PE
853 if (is_sse_float(type
->t
)) {
854 if (sse_param_index
< 8) {
855 /* save arguments passed by register */
857 o(0xd60f66); /* movq */
858 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
867 if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
868 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
873 if (reg_param_index
< REGN
) {
874 /* save arguments passed by register */
875 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
880 if (reg_param_index
< REGN
) {
881 /* save arguments passed by register */
882 push_arg_reg(reg_param_index
);
891 sym_push(sym
->v
& ~SYM_FIELD
, type
,
892 VT_LOCAL
| VT_LVAL
, param_addr
);
896 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
897 for (i
= reg_param_index
; i
< REGN
; ++i
) {
898 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, addr
);
905 /* generate function epilog */
906 void gfunc_epilog(void)
911 if (func_ret_sub
== 0) {
916 g(func_ret_sub
>> 8);
918 /* align local size to word & save local variables */
919 v
= (-loc
+ 15) & -16;
921 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
922 #if 0 // def TCC_TARGET_PE - don't have __chkstk yet, because assembler does not work
924 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
925 oad(0xb8, v
); /* mov stacksize, %eax */
926 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
927 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
931 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
932 o(0xec8148); /* sub rsp, stacksize */
938 /* generate a jump to a label */
941 return psym(0xe9, t
);
944 /* generate a jump to a fixed address */
945 void gjmp_addr(int a
)
953 oad(0xe9, a
- ind
- 5);
957 /* generate a test. set 'inv' to invert test. Stack entry is popped */
958 int gtst(int inv
, int t
)
962 v
= vtop
->r
& VT_VALMASK
;
964 /* fast case : can jump directly since flags are set */
966 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
967 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
968 /* && or || optimization */
969 if ((v
& 1) == inv
) {
970 /* insert vtop->c jump list in t */
973 p
= (int *)(cur_text_section
->data
+ *p
);
981 if (is_float(vtop
->type
.t
) ||
982 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
986 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
987 /* constant jmp optimization */
988 if ((vtop
->c
.i
!= 0) != inv
)
995 t
= psym(0x85 ^ inv
, t
);
1002 /* generate an integer binary operation */
1003 void gen_opi(int op
)
1009 case TOK_ADDC1
: /* add with carry generation */
1012 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
1013 !is64_type(vtop
->type
.t
)) {
1017 if (is64_type(vtop
->type
.t
)) {
1018 o(0x48 | REX_BASE(r
));
1023 /* XXX: generate inc and dec for smaller code ? */
1025 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1029 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1032 gv2(RC_INT
, RC_INT
);
1036 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1037 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1038 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
1040 o((opc
<< 3) | 0x01);
1041 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1044 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1050 case TOK_SUBC1
: /* sub with carry generation */
1053 case TOK_ADDC2
: /* add with carry use */
1056 case TOK_SUBC2
: /* sub with carry use */
1069 gv2(RC_INT
, RC_INT
);
1072 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1073 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1074 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
1077 o(0xaf0f); /* imul fr, r */
1078 o(0xc0 + fr
+ r
* 8);
1089 opc
= 0xc0 | (opc
<< 3);
1090 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1094 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1095 o(0x48 | REX_BASE(r
));
1102 o(0xc1); /* shl/shr/sar $xxx, r */
1106 /* we generate the shift in ecx */
1107 gv2(RC_INT
, RC_RCX
);
1109 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
1110 o(0x48 | REX_BASE(r
));
1112 o(0xd3); /* shl/shr/sar %cl, r */
1123 /* first operand must be in eax */
1124 /* XXX: need better constraint for second operand */
1125 gv2(RC_RAX
, RC_RCX
);
1130 if (op
== TOK_UMULL
) {
1131 o(0xf7); /* mul fr */
1133 vtop
->r2
= TREG_RDX
;
1136 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
1137 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
1140 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1141 o(0x9948); /* cqto */
1142 o(0x48 + REX_BASE(fr
));
1146 o(0xf7); /* idiv fr, %eax */
1149 if (op
== '%' || op
== TOK_UMOD
)
1162 void gen_opl(int op
)
1167 /* generate a floating point operation 'v = t1 op t2' instruction. The
1168 two operands are guaranted to have the same floating point type */
1169 /* XXX: need to use ST1 too */
1170 void gen_opf(int op
)
1172 int a
, ft
, fc
, swapped
, r
;
1174 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1176 /* convert constants to memory references */
1177 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1182 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1185 /* must put at least one value in the floating point register */
1186 if ((vtop
[-1].r
& VT_LVAL
) &&
1187 (vtop
[0].r
& VT_LVAL
)) {
1193 /* swap the stack if needed so that t1 is the register and t2 is
1194 the memory reference */
1195 if (vtop
[-1].r
& VT_LVAL
) {
1199 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1200 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1201 /* load on stack second operand */
1202 load(TREG_ST0
, vtop
);
1203 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1204 if (op
== TOK_GE
|| op
== TOK_GT
)
1206 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1209 o(0xc9d9); /* fxch %st(1) */
1210 o(0xe9da); /* fucompp */
1211 o(0xe0df); /* fnstsw %ax */
1213 o(0x45e480); /* and $0x45, %ah */
1214 o(0x40fC80); /* cmp $0x40, %ah */
1215 } else if (op
== TOK_NE
) {
1216 o(0x45e480); /* and $0x45, %ah */
1217 o(0x40f480); /* xor $0x40, %ah */
1219 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1220 o(0x05c4f6); /* test $0x05, %ah */
1223 o(0x45c4f6); /* test $0x45, %ah */
1230 /* no memory reference possible for long double operations */
1231 load(TREG_ST0
, vtop
);
1255 o(0xde); /* fxxxp %st, %st(1) */
1260 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1261 /* if saved lvalue, then we must reload it */
1264 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1266 r
= get_reg(RC_INT
);
1268 v1
.r
= VT_LOCAL
| VT_LVAL
;
1274 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1277 if (op
== TOK_LE
|| op
== TOK_LT
)
1279 if (op
== TOK_LE
|| op
== TOK_GE
) {
1280 op
= 0x93; /* setae */
1282 op
= 0x97; /* seta */
1287 o(0x7e0ff3); /* movq */
1288 gen_modrm(1, r
, vtop
->sym
, fc
);
1290 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1293 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1296 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1299 o(0x2e0f); /* ucomisd */
1300 gen_modrm(0, r
, vtop
->sym
, fc
);
1307 /* no memory reference possible for long double operations */
1308 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1309 load(TREG_XMM0
, vtop
);
1329 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1330 o(0xde); /* fxxxp %st, %st(1) */
1333 /* if saved lvalue, then we must reload it */
1335 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1337 r
= get_reg(RC_INT
);
1339 v1
.r
= VT_LOCAL
| VT_LVAL
;
1345 /* movq %xmm0,%xmm1 */
1348 load(TREG_XMM0
, vtop
);
1349 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1350 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1359 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1366 gen_modrm(0, r
, vtop
->sym
, fc
);
1374 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1375 and 'long long' cases. */
1376 void gen_cvt_itof(int t
)
1378 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1381 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1382 /* signed long long to float/double/long double (unsigned case
1383 is handled generically) */
1384 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1385 o(0x242cdf); /* fildll (%rsp) */
1386 o(0x08c48348); /* add $8, %rsp */
1387 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1388 (VT_INT
| VT_UNSIGNED
)) {
1389 /* unsigned int to float/double/long double */
1390 o(0x6a); /* push $0 */
1392 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1393 o(0x242cdf); /* fildll (%rsp) */
1394 o(0x10c48348); /* add $16, %rsp */
1396 /* int to float/double/long double */
1397 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1398 o(0x2404db); /* fildl (%rsp) */
1399 o(0x08c48348); /* add $8, %rsp */
1403 save_reg(TREG_XMM0
);
1405 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1406 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1407 (VT_INT
| VT_UNSIGNED
) ||
1408 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1412 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1413 vtop
->r
= TREG_XMM0
;
1417 /* convert from one floating point type to another */
1418 void gen_cvt_ftof(int t
)
1426 if (bt
== VT_FLOAT
) {
1428 if (tbt
== VT_DOUBLE
) {
1429 o(0xc0140f); /* unpcklps */
1430 o(0xc05a0f); /* cvtps2pd */
1431 } else if (tbt
== VT_LDOUBLE
) {
1432 /* movss %xmm0,-0x10(%rsp) */
1435 o(0xf02444d9); /* flds -0x10(%rsp) */
1438 } else if (bt
== VT_DOUBLE
) {
1440 if (tbt
== VT_FLOAT
) {
1441 o(0xc0140f66); /* unpcklpd */
1442 o(0xc05a0f66); /* cvtpd2ps */
1443 } else if (tbt
== VT_LDOUBLE
) {
1444 /* movsd %xmm0,-0x10(%rsp) */
1447 o(0xf02444dd); /* fldl -0x10(%rsp) */
1452 if (tbt
== VT_DOUBLE
) {
1453 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1454 /* movsd -0x10(%rsp),%xmm0 */
1457 vtop
->r
= TREG_XMM0
;
1458 } else if (tbt
== VT_FLOAT
) {
1459 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1460 /* movss -0x10(%rsp),%xmm0 */
1463 vtop
->r
= TREG_XMM0
;
1468 /* convert fp to int 't' type */
1469 void gen_cvt_ftoi(int t
)
1471 int ft
, bt
, size
, r
;
1474 if (bt
== VT_LDOUBLE
) {
1475 gen_cvt_ftof(VT_DOUBLE
);
1485 r
= get_reg(RC_INT
);
1486 if (bt
== VT_FLOAT
) {
1488 } else if (bt
== VT_DOUBLE
) {
1494 o(0x48 + REX_BASE(r
));
1496 o(0x2c0f); /* cvttss2si or cvttsd2si */
1497 o(0xc0 + (REG_VALUE(r
) << 3));
1501 /* computed goto support */
1508 /* end of x86-64 code generator */
1509 /*************************************************************/