2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* number of available registers */
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 int reg_classes
[NB_REGS
] = {
64 /* eax */ RC_INT
| RC_RAX
,
65 /* ecx */ RC_INT
| RC_RCX
,
66 /* edx */ RC_INT
| RC_RDX
,
67 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
88 /******************************************************/
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_64
95 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
96 #define R_COPY R_X86_64_COPY
98 #define ELF_START_ADDR 0x08048000
99 #define ELF_PAGE_SIZE 0x1000
101 /******************************************************/
103 static unsigned long func_sub_sp_offset
;
104 static int func_ret_sub
;
106 /* XXX: make it faster ? */
111 if (ind1
> cur_text_section
->data_allocated
)
112 section_realloc(cur_text_section
, ind1
);
113 cur_text_section
->data
[ind
] = c
;
117 void o(unsigned int c
)
133 void gen_le64(int64_t c
)
145 /* output a symbol and patch all calls to it */
146 void gsym_addr(int t
, int a
)
150 ptr
= (int *)(cur_text_section
->data
+ t
);
151 n
= *ptr
; /* next value */
162 /* psym is used to put an instruction with a data field which is a
163 reference to a symbol. It is in fact the same as oad ! */
166 static int is64_type(int t
)
168 return ((t
& VT_BTYPE
) == VT_PTR
||
169 (t
& VT_BTYPE
) == VT_FUNC
||
170 (t
& VT_BTYPE
) == VT_LLONG
);
173 static int is_sse_float(int t
) {
176 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
179 /* instruction + 4 bytes data. Return the address of the data */
180 static int oad(int c
, int s
)
186 if (ind1
> cur_text_section
->data_allocated
)
187 section_realloc(cur_text_section
, ind1
);
188 *(int *)(cur_text_section
->data
+ ind
) = s
;
195 /* output constant with relocation if 'r & VT_SYM' is true */
196 static void gen_addr64(int r
, Sym
*sym
, int64_t c
)
199 greloc(cur_text_section
, sym
, ind
, R_X86_64_64
);
204 /* output constant with relocation if 'r & VT_SYM' is true */
205 static void gen_addrpc32(int r
, Sym
*sym
, int c
)
208 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
212 /* output got address with relocation */
213 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
215 #ifndef TCC_TARGET_PE
218 greloc(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
);
219 sr
= cur_text_section
->reloc
;
220 rel
= (ElfW(Rela
) *)(sr
->data
+ sr
->data_offset
- sizeof(ElfW(Rela
)));
223 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym
->v
, NULL
), c
, r
,
224 cur_text_section
->data
[ind
-3],
225 cur_text_section
->data
[ind
-2],
226 cur_text_section
->data
[ind
-1]
228 greloc(cur_text_section
, sym
, ind
, R_X86_64_PC32
);
233 /* we use add c, %xxx for displacement */
234 o(0x48 + REX_BASE(r
));
236 o(0xc0 + REG_VALUE(r
));
241 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
243 op_reg
= REG_VALUE(op_reg
) << 3;
244 if ((r
& VT_VALMASK
) == VT_CONST
) {
245 /* constant memory reference */
248 gen_gotpcrel(r
, sym
, c
);
250 gen_addrpc32(r
, sym
, c
);
252 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
253 /* currently, we use only ebp as base */
255 /* short reference */
259 oad(0x85 | op_reg
, c
);
261 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
263 g(0x80 | op_reg
| REG_VALUE(r
));
266 g(0x00 | op_reg
| REG_VALUE(r
));
269 g(0x00 | op_reg
| (r
& VT_VALMASK
));
273 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
275 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
277 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
280 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
282 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
285 int rex
= 0x48 | (REX_BASE(op_reg
) << 2);
286 if ((r
& VT_VALMASK
) != VT_CONST
&&
287 (r
& VT_VALMASK
) != VT_LOCAL
) {
288 rex
|= REX_BASE(VT_VALMASK
& r
);
292 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
293 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
297 /* load 'r' from value 'sv' */
298 void load(int r
, SValue
*sv
)
300 int v
, t
, ft
, fc
, fr
;
307 #ifndef TCC_TARGET_PE
308 /* we use indirect access via got */
309 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
310 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
311 /* use the result register as a temporal register */
312 int tr
= r
| TREG_MEM
;
314 /* we cannot use float registers as a temporal register */
315 tr
= get_reg(RC_INT
) | TREG_MEM
;
317 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
319 /* load from the temporal register */
326 if (v
== VT_LLOCAL
) {
328 v1
.r
= VT_LOCAL
| VT_LVAL
;
333 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
334 o(0x6e0f66); /* movd */
336 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
337 o(0x7e0ff3); /* movq */
339 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
342 } else if ((ft
& VT_TYPE
) == VT_BYTE
) {
343 o(0xbe0f); /* movsbl */
344 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
345 o(0xb60f); /* movzbl */
346 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
347 o(0xbf0f); /* movswl */
348 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
349 o(0xb70f); /* movzwl */
350 } else if (is64_type(ft
)) {
351 gen_modrm64(0x8b, r
, fr
, sv
->sym
, fc
);
356 gen_modrm(r
, fr
, sv
->sym
, fc
);
362 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
363 gen_addrpc32(fr
, sv
->sym
, fc
);
365 if (sv
->sym
->type
.t
& VT_STATIC
) {
367 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
368 gen_addrpc32(fr
, sv
->sym
, fc
);
371 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
372 gen_gotpcrel(r
, sv
->sym
, fc
);
375 } else if (is64_type(ft
)) {
377 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
380 o(0xb8 + REG_VALUE(r
)); /* mov $xx, r */
383 } else if (v
== VT_LOCAL
) {
384 o(0x48 | REX_BASE(r
));
385 o(0x8d); /* lea xxx(%ebp), r */
386 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
387 } else if (v
== VT_CMP
) {
388 oad(0xb8 + r
, 0); /* mov $0, r */
389 o(0x0f); /* setxx %br */
392 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
394 oad(0xb8 + r
, t
); /* mov $1, r */
395 o(0x05eb); /* jmp after */
397 oad(0xb8 + r
, t
^ 1); /* mov $0, r */
399 if (r
== TREG_XMM0
) {
400 assert(v
== TREG_ST0
);
401 /* gen_cvt_ftof(VT_DOUBLE); */
402 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
403 /* movsd -0x10(%rsp),%xmm0 */
406 } else if (r
== TREG_ST0
) {
407 assert(v
== TREG_XMM0
);
408 /* gen_cvt_ftof(VT_LDOUBLE); */
409 /* movsd %xmm0,-0x10(%rsp) */
412 o(0xf02444dd); /* fldl -0x10(%rsp) */
414 o(0x48 | REX_BASE(r
) | (REX_BASE(v
) << 2));
416 o(0xc0 + r
+ v
* 8); /* mov v, r */
422 /* store register 'r' in lvalue 'v' */
423 void store(int r
, SValue
*v
)
427 /* store the REX prefix in this variable when PIC is enabled */
432 fr
= v
->r
& VT_VALMASK
;
435 #ifndef TCC_TARGET_PE
436 /* we need to access the variable via got */
437 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
438 /* mov xx(%rip), %r11 */
440 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.ul
);
441 pic
= is64_type(bt
) ? 0x49 : 0x41;
445 /* XXX: incorrect if float reg to reg */
446 if (bt
== VT_FLOAT
) {
449 o(0x7e0f); /* movd */
451 } else if (bt
== VT_DOUBLE
) {
454 o(0xd60f); /* movq */
456 } else if (bt
== VT_LDOUBLE
) {
457 o(0xc0d9); /* fld %st(0) */
465 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
467 else if (is64_type(bt
))
473 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
478 if (fr
== VT_CONST
||
481 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
482 } else if (fr
!= r
) {
483 /* XXX: don't we really come here? */
485 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
488 if (fr
== VT_CONST
||
491 gen_modrm(r
, v
->r
, v
->sym
, fc
);
492 } else if (fr
!= r
) {
493 /* XXX: don't we really come here? */
495 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
500 static void gadd_sp(int val
)
502 if (val
== (char)val
) {
506 oad(0xc48148, val
); /* add $xxx, %rsp */
510 /* 'is_jmp' is '1' if it is a jump */
511 static void gcall_or_jmp(int is_jmp
)
514 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
516 if (vtop
->r
& VT_SYM
) {
517 /* relocation case */
518 greloc(cur_text_section
, vtop
->sym
,
519 ind
+ 1, R_X86_64_PC32
);
521 /* put an empty PC32 relocation */
522 put_elf_reloc(symtab_section
, cur_text_section
,
523 ind
+ 1, R_X86_64_PC32
, 0);
525 oad(0xe8 + is_jmp
, vtop
->c
.ul
- 4); /* call/jmp im */
527 /* otherwise, indirect call */
531 o(0xff); /* call/jmp *r */
532 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
538 static uint8_t arg_regs
[] = {
539 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
543 static uint8_t arg_regs
[REGN
] = {
544 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
548 /* Generate function call. The function address is pushed first, then
549 all the parameters in call order. This functions pops all the
550 parameters and the function address. */
551 void gfunc_call(int nb_args
)
553 int size
, align
, r
, args_size
, i
;
557 int sse_reg
, gen_reg
;
559 /* calculate the number of integer/float arguments */
561 for(i
= 0; i
< nb_args
; i
++) {
562 if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_STRUCT
) {
563 args_size
+= type_size(&vtop
->type
, &align
);
564 } else if ((vtop
[-i
].type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
566 #ifndef TCC_TARGET_PE
567 } else if (is_sse_float(vtop
[-i
].type
.t
)) {
569 if (nb_sse_args
> 8) args_size
+= 8;
573 if (nb_reg_args
> REGN
) args_size
+= 8;
577 /* for struct arguments, we need to call memcpy and the function
578 call breaks register passing arguments we are preparing.
579 So, we process arguments which will be passed by stack first. */
581 gen_reg
= nb_reg_args
;
582 sse_reg
= nb_sse_args
;
585 save_regs(0); /* save used temporary registers */
588 /* adjust stack to align SSE boundary */
589 if (args_size
&= 8) {
590 o(0x50); /* push $rax */
592 for(i
= 0; i
< nb_args
; i
++) {
593 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
) {
594 size
= type_size(&vtop
->type
, &align
);
595 /* align to stack align size */
596 size
= (size
+ 3) & ~3;
597 /* allocate the necessary size on stack */
599 oad(0xec81, size
); /* sub $xxx, %rsp */
600 /* generate structure store */
602 o(0x48 + REX_BASE(r
));
603 o(0x89); /* mov %rsp, r */
606 /* following code breaks vtop[1] */
607 SValue tmp
= vtop
[1];
608 vset(&vtop
->type
, r
| VT_LVAL
, 0);
614 } else if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
617 oad(0xec8148, size
); /* sub $xxx, %rsp */
618 o(0x7cdb); /* fstpt 0(%rsp) */
622 } else if (is_sse_float(vtop
->type
.t
)) {
631 o(0x50); /* push $rax */
632 /* movq %xmm0, (%rsp) */
640 /* XXX: implicit cast ? */
643 o(0x50 + r
); /* push r */
651 /* then, we prepare register passing arguments.
652 Note that we cannot set RDX and RCX in this loop because gv()
653 may break these temporary registers. Let's use R10 and R11
655 gen_reg
= nb_reg_args
;
656 sse_reg
= nb_sse_args
;
657 for(i
= 0; i
< nb_args
; i
++) {
658 if ((vtop
->type
.t
& VT_BTYPE
) == VT_STRUCT
||
659 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
660 } else if (is_sse_float(vtop
->type
.t
)) {
665 gv(RC_FLOAT
); /* only one float register */
666 /* movaps %xmm0, %xmmN */
670 o(0xd60f66); /* movq %xmm0, (%rsp) */
671 o(0x2404 + (j
<< 3));
673 o(0x58 + d
); /* pop d */
682 /* XXX: implicit cast ? */
692 o(0xc0 + r
* 8 + d
- 8);
699 gv(RC_FLOAT
); /* only one float register */
700 /* movaps %xmm0, %xmmN */
702 o(0xc0 + (sse_reg
<< 3));
707 /* XXX: implicit cast ? */
712 o(0xc0 + r
* 8 + arg_regs
[j
]);
715 /* j=2: r10, j=3: r11 */
719 /* j=4: r8, j=5: r9 */
720 o(0xc0 + r
* 8 + j
- 4);
729 /* allocate scratch space */
733 save_regs(0); /* save used temporary registers */
735 /* Copy R10 and R11 into RDX and RCX, respectively */
736 if (nb_reg_args
> 2) {
737 o(0xd2894c); /* mov %r10, %rdx */
738 if (nb_reg_args
> 3) {
739 o(0xd9894c); /* mov %r11, %rcx */
743 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
751 #define FUNC_PROLOG_SIZE 11
753 static void push_arg_reg(int i
) {
755 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
758 /* generate function prolog of type 't' */
759 void gfunc_prolog(CType
*func_type
)
761 int i
, addr
, align
, size
;
762 int param_index
, param_addr
, reg_param_index
, sse_param_index
;
768 sym
= func_type
->ref
;
771 ind
+= FUNC_PROLOG_SIZE
;
772 func_sub_sp_offset
= ind
;
774 #ifndef TCC_TARGET_PE
775 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
776 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
777 seen_reg_num
= seen_sse_num
= 0;
778 /* frame pointer and return address */
779 seen_stack_size
= PTR_SIZE
* 2;
780 /* count the number of seen parameters */
781 sym
= func_type
->ref
;
782 while ((sym
= sym
->next
) != NULL
) {
784 if (is_sse_float(type
->t
)) {
785 if (seen_sse_num
< 8) {
788 seen_stack_size
+= 8;
790 } else if ((type
->t
& VT_BTYPE
) == VT_STRUCT
) {
791 size
= type_size(type
, &align
);
792 size
= (size
+ 3) & ~3;
793 seen_stack_size
+= size
;
794 } else if ((type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
795 seen_stack_size
+= LDOUBLE_SIZE
;
797 if (seen_reg_num
< REGN
) {
800 seen_stack_size
+= 8;
806 /* movl $0x????????, -0x10(%rbp) */
808 gen_le32(seen_reg_num
* 8);
809 /* movl $0x????????, -0xc(%rbp) */
811 gen_le32(seen_sse_num
* 16 + 48);
812 /* movl $0x????????, -0x8(%rbp) */
814 gen_le32(seen_stack_size
);
816 /* save all register passing arguments */
817 for (i
= 0; i
< 8; i
++) {
819 o(0xd60f66); /* movq */
820 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
821 /* movq $0, loc+8(%rbp) */
826 for (i
= 0; i
< REGN
; i
++) {
827 push_arg_reg(REGN
-1-i
);
832 sym
= func_type
->ref
;
837 /* if the function returns a structure, then add an
838 implicit pointer parameter */
840 if ((func_vt
.t
& VT_BTYPE
) == VT_STRUCT
) {
841 push_arg_reg(reg_param_index
);
848 /* define parameters */
849 while ((sym
= sym
->next
) != NULL
) {
851 size
= type_size(type
, &align
);
852 size
= (size
+ 3) & ~3;
853 #ifndef TCC_TARGET_PE
854 if (is_sse_float(type
->t
)) {
855 if (sse_param_index
< 8) {
856 /* save arguments passed by register */
858 o(0xd60f66); /* movq */
859 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, loc
);
868 if ((type
->t
& VT_BTYPE
) == VT_STRUCT
||
869 (type
->t
& VT_BTYPE
) == VT_LDOUBLE
) {
874 if (reg_param_index
< REGN
) {
875 /* save arguments passed by register */
876 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
881 if (reg_param_index
< REGN
) {
882 /* save arguments passed by register */
883 push_arg_reg(reg_param_index
);
892 sym_push(sym
->v
& ~SYM_FIELD
, type
,
893 VT_LOCAL
| VT_LVAL
, param_addr
);
897 if (func_type
->ref
->c
== FUNC_ELLIPSIS
) {
898 for (i
= reg_param_index
; i
< REGN
; ++i
) {
899 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, addr
);
906 /* generate function epilog */
907 void gfunc_epilog(void)
912 if (func_ret_sub
== 0) {
917 g(func_ret_sub
>> 8);
919 /* align local size to word & save local variables */
920 v
= (-loc
+ 15) & -16;
922 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
925 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
926 oad(0xb8, v
); /* mov stacksize, %eax */
927 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
928 greloc(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
);
929 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
933 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
934 o(0xec8148); /* sub rsp, stacksize */
940 /* generate a jump to a label */
943 return psym(0xe9, t
);
946 /* generate a jump to a fixed address */
947 void gjmp_addr(int a
)
955 oad(0xe9, a
- ind
- 5);
959 /* generate a test. set 'inv' to invert test. Stack entry is popped */
960 int gtst(int inv
, int t
)
964 v
= vtop
->r
& VT_VALMASK
;
966 /* fast case : can jump directly since flags are set */
968 t
= psym((vtop
->c
.i
- 16) ^ inv
, t
);
969 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
970 /* && or || optimization */
971 if ((v
& 1) == inv
) {
972 /* insert vtop->c jump list in t */
975 p
= (int *)(cur_text_section
->data
+ *p
);
983 if (is_float(vtop
->type
.t
) ||
984 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
988 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
989 /* constant jmp optimization */
990 if ((vtop
->c
.i
!= 0) != inv
)
997 t
= psym(0x85 ^ inv
, t
);
1004 /* generate an integer binary operation */
1005 void gen_opi(int op
)
1011 case TOK_ADDC1
: /* add with carry generation */
1014 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
&&
1015 !is64_type(vtop
->type
.t
)) {
1019 if (is64_type(vtop
->type
.t
)) {
1020 o(0x48 | REX_BASE(r
));
1025 /* XXX: generate inc and dec for smaller code ? */
1027 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1031 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1034 gv2(RC_INT
, RC_INT
);
1038 is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1039 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1040 o(0x48 | REX_BASE(r
) | (REX_BASE(fr
) << 2));
1042 o((opc
<< 3) | 0x01);
1043 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1046 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1052 case TOK_SUBC1
: /* sub with carry generation */
1055 case TOK_ADDC2
: /* add with carry use */
1058 case TOK_SUBC2
: /* sub with carry use */
1071 gv2(RC_INT
, RC_INT
);
1074 if (is64_type(vtop
[0].type
.t
) || (vtop
[0].type
.t
& VT_UNSIGNED
) ||
1075 is64_type(vtop
[-1].type
.t
) || (vtop
[-1].type
.t
& VT_UNSIGNED
)) {
1076 o(0x48 | REX_BASE(fr
) | (REX_BASE(r
) << 2));
1079 o(0xaf0f); /* imul fr, r */
1080 o(0xc0 + fr
+ r
* 8);
1091 opc
= 0xc0 | (opc
<< 3);
1092 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
) {
1096 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1097 o(0x48 | REX_BASE(r
));
1104 o(0xc1); /* shl/shr/sar $xxx, r */
1108 /* we generate the shift in ecx */
1109 gv2(RC_INT
, RC_RCX
);
1111 if ((vtop
[-1].type
.t
& VT_BTYPE
) == VT_LLONG
) {
1112 o(0x48 | REX_BASE(r
));
1114 o(0xd3); /* shl/shr/sar %cl, r */
1125 /* first operand must be in eax */
1126 /* XXX: need better constraint for second operand */
1127 gv2(RC_RAX
, RC_RCX
);
1132 if (op
== TOK_UMULL
) {
1133 o(0xf7); /* mul fr */
1135 vtop
->r2
= TREG_RDX
;
1138 if (op
== TOK_UDIV
|| op
== TOK_UMOD
) {
1139 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
1142 if ((vtop
->type
.t
& VT_BTYPE
) & VT_LLONG
) {
1143 o(0x9948); /* cqto */
1144 o(0x48 + REX_BASE(fr
));
1148 o(0xf7); /* idiv fr, %eax */
1151 if (op
== '%' || op
== TOK_UMOD
)
1164 void gen_opl(int op
)
1169 /* generate a floating point operation 'v = t1 op t2' instruction. The
1170 two operands are guaranted to have the same floating point type */
1171 /* XXX: need to use ST1 too */
1172 void gen_opf(int op
)
1174 int a
, ft
, fc
, swapped
, r
;
1176 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1178 /* convert constants to memory references */
1179 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1184 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1187 /* must put at least one value in the floating point register */
1188 if ((vtop
[-1].r
& VT_LVAL
) &&
1189 (vtop
[0].r
& VT_LVAL
)) {
1195 /* swap the stack if needed so that t1 is the register and t2 is
1196 the memory reference */
1197 if (vtop
[-1].r
& VT_LVAL
) {
1201 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1202 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1203 /* load on stack second operand */
1204 load(TREG_ST0
, vtop
);
1205 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1206 if (op
== TOK_GE
|| op
== TOK_GT
)
1208 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1211 o(0xc9d9); /* fxch %st(1) */
1212 o(0xe9da); /* fucompp */
1213 o(0xe0df); /* fnstsw %ax */
1215 o(0x45e480); /* and $0x45, %ah */
1216 o(0x40fC80); /* cmp $0x40, %ah */
1217 } else if (op
== TOK_NE
) {
1218 o(0x45e480); /* and $0x45, %ah */
1219 o(0x40f480); /* xor $0x40, %ah */
1221 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1222 o(0x05c4f6); /* test $0x05, %ah */
1225 o(0x45c4f6); /* test $0x45, %ah */
1232 /* no memory reference possible for long double operations */
1233 load(TREG_ST0
, vtop
);
1257 o(0xde); /* fxxxp %st, %st(1) */
1262 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1263 /* if saved lvalue, then we must reload it */
1266 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1268 r
= get_reg(RC_INT
);
1270 v1
.r
= VT_LOCAL
| VT_LVAL
;
1276 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1279 if (op
== TOK_LE
|| op
== TOK_LT
)
1281 if (op
== TOK_LE
|| op
== TOK_GE
) {
1282 op
= 0x93; /* setae */
1284 op
= 0x97; /* seta */
1289 o(0x7e0ff3); /* movq */
1290 gen_modrm(1, r
, vtop
->sym
, fc
);
1292 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1295 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1298 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
) {
1301 o(0x2e0f); /* ucomisd */
1302 gen_modrm(0, r
, vtop
->sym
, fc
);
1309 /* no memory reference possible for long double operations */
1310 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1311 load(TREG_XMM0
, vtop
);
1331 if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
1332 o(0xde); /* fxxxp %st, %st(1) */
1335 /* if saved lvalue, then we must reload it */
1337 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1339 r
= get_reg(RC_INT
);
1341 v1
.r
= VT_LOCAL
| VT_LVAL
;
1347 /* movq %xmm0,%xmm1 */
1350 load(TREG_XMM0
, vtop
);
1351 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1352 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1361 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
1368 gen_modrm(0, r
, vtop
->sym
, fc
);
1376 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1377 and 'long long' cases. */
1378 void gen_cvt_itof(int t
)
1380 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
1383 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1384 /* signed long long to float/double/long double (unsigned case
1385 is handled generically) */
1386 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1387 o(0x242cdf); /* fildll (%rsp) */
1388 o(0x08c48348); /* add $8, %rsp */
1389 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1390 (VT_INT
| VT_UNSIGNED
)) {
1391 /* unsigned int to float/double/long double */
1392 o(0x6a); /* push $0 */
1394 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1395 o(0x242cdf); /* fildll (%rsp) */
1396 o(0x10c48348); /* add $16, %rsp */
1398 /* int to float/double/long double */
1399 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
1400 o(0x2404db); /* fildl (%rsp) */
1401 o(0x08c48348); /* add $8, %rsp */
1405 save_reg(TREG_XMM0
);
1407 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
));
1408 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
1409 (VT_INT
| VT_UNSIGNED
) ||
1410 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
1414 o(0xc0 + (vtop
->r
& VT_VALMASK
)); /* cvtsi2sd */
1415 vtop
->r
= TREG_XMM0
;
1419 /* convert from one floating point type to another */
1420 void gen_cvt_ftof(int t
)
1428 if (bt
== VT_FLOAT
) {
1430 if (tbt
== VT_DOUBLE
) {
1431 o(0xc0140f); /* unpcklps */
1432 o(0xc05a0f); /* cvtps2pd */
1433 } else if (tbt
== VT_LDOUBLE
) {
1434 /* movss %xmm0,-0x10(%rsp) */
1437 o(0xf02444d9); /* flds -0x10(%rsp) */
1440 } else if (bt
== VT_DOUBLE
) {
1442 if (tbt
== VT_FLOAT
) {
1443 o(0xc0140f66); /* unpcklpd */
1444 o(0xc05a0f66); /* cvtpd2ps */
1445 } else if (tbt
== VT_LDOUBLE
) {
1446 /* movsd %xmm0,-0x10(%rsp) */
1449 o(0xf02444dd); /* fldl -0x10(%rsp) */
1454 if (tbt
== VT_DOUBLE
) {
1455 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1456 /* movsd -0x10(%rsp),%xmm0 */
1459 vtop
->r
= TREG_XMM0
;
1460 } else if (tbt
== VT_FLOAT
) {
1461 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1462 /* movss -0x10(%rsp),%xmm0 */
1465 vtop
->r
= TREG_XMM0
;
1470 /* convert fp to int 't' type */
1471 void gen_cvt_ftoi(int t
)
1473 int ft
, bt
, size
, r
;
1476 if (bt
== VT_LDOUBLE
) {
1477 gen_cvt_ftof(VT_DOUBLE
);
1487 r
= get_reg(RC_INT
);
1488 if (bt
== VT_FLOAT
) {
1490 } else if (bt
== VT_DOUBLE
) {
1496 o(0x48 + REX_BASE(r
));
1498 o(0x2c0f); /* cvttss2si or cvttsd2si */
1499 o(0xc0 + (REG_VALUE(r
) << 3));
1503 /* computed goto support */
1510 /* end of x86-64 code generator */
1511 /*************************************************************/