integrate x86_64-asm.c into i386-asm.c
[tinycc.git] / x86_64-gen.c
blob9f05889f24cf2e8d3392bf7dfb179e14c37dd57b
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <assert.h>
25 /* number of available registers */
26 #define NB_REGS 5
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
33 #define RC_RAX 0x0004
34 #define RC_RCX 0x0008
35 #define RC_RDX 0x0010
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
43 enum {
44 TREG_RAX = 0,
45 TREG_RCX = 1,
46 TREG_RDX = 2,
47 TREG_RSI = 6,
48 TREG_RDI = 7,
49 TREG_R8 = 8,
50 TREG_R9 = 9,
51 TREG_R10 = 10,
52 TREG_R11 = 11,
54 TREG_XMM0 = 3,
55 TREG_ST0 = 4,
57 TREG_MEM = 0x10,
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 const int reg_classes[NB_REGS] = {
64 /* eax */ RC_INT | RC_RAX,
65 /* ecx */ RC_INT | RC_RCX,
66 /* edx */ RC_INT | RC_RDX,
67 /* xmm0 */ RC_FLOAT | RC_XMM0,
68 /* st0 */ RC_ST0,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
80 #define PTR_SIZE 8
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
86 #define MAX_ALIGN 8
88 /******************************************************/
89 /* ELF defines */
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_32
95 #define R_DATA_PTR R_X86_64_64
96 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
97 #define R_COPY R_X86_64_COPY
99 #define ELF_START_ADDR 0x08048000
100 #define ELF_PAGE_SIZE 0x1000
102 /******************************************************/
104 static unsigned long func_sub_sp_offset;
105 static int func_ret_sub;
107 /* XXX: make it faster ? */
108 void g(int c)
110 int ind1;
111 ind1 = ind + 1;
112 if (ind1 > cur_text_section->data_allocated)
113 section_realloc(cur_text_section, ind1);
114 cur_text_section->data[ind] = c;
115 ind = ind1;
118 void o(unsigned int c)
120 while (c) {
121 g(c);
122 c = c >> 8;
126 void gen_le16(int v)
128 g(v);
129 g(v >> 8);
132 void gen_le32(int c)
134 g(c);
135 g(c >> 8);
136 g(c >> 16);
137 g(c >> 24);
140 void gen_le64(int64_t c)
142 g(c);
143 g(c >> 8);
144 g(c >> 16);
145 g(c >> 24);
146 g(c >> 32);
147 g(c >> 40);
148 g(c >> 48);
149 g(c >> 56);
152 /* output a symbol and patch all calls to it */
153 void gsym_addr(int t, int a)
155 int n, *ptr;
156 while (t) {
157 ptr = (int *)(cur_text_section->data + t);
158 n = *ptr; /* next value */
159 *ptr = a - t - 4;
160 t = n;
164 void gsym(int t)
166 gsym_addr(t, ind);
169 /* psym is used to put an instruction with a data field which is a
170 reference to a symbol. It is in fact the same as oad ! */
171 #define psym oad
173 static int is64_type(int t)
175 return ((t & VT_BTYPE) == VT_PTR ||
176 (t & VT_BTYPE) == VT_FUNC ||
177 (t & VT_BTYPE) == VT_LLONG);
180 static int is_sse_float(int t) {
181 int bt;
182 bt = t & VT_BTYPE;
183 return bt == VT_DOUBLE || bt == VT_FLOAT;
186 /* instruction + 4 bytes data. Return the address of the data */
187 static int oad(int c, int s)
189 int ind1;
191 o(c);
192 ind1 = ind + 4;
193 if (ind1 > cur_text_section->data_allocated)
194 section_realloc(cur_text_section, ind1);
195 *(int *)(cur_text_section->data + ind) = s;
196 s = ind;
197 ind = ind1;
198 return s;
201 static void gen_addr32(int r, Sym *sym, int c)
203 if (r & VT_SYM)
204 greloc(cur_text_section, sym, ind, R_X86_64_32);
205 gen_le32(c);
208 /* output constant with relocation if 'r & VT_SYM' is true */
209 static void gen_addr64(int r, Sym *sym, int64_t c)
211 if (r & VT_SYM)
212 greloc(cur_text_section, sym, ind, R_X86_64_64);
213 gen_le64(c);
216 /* output constant with relocation if 'r & VT_SYM' is true */
217 static void gen_addrpc32(int r, Sym *sym, int c)
219 if (r & VT_SYM)
220 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
221 gen_le32(c-4);
224 /* output got address with relocation */
225 static void gen_gotpcrel(int r, Sym *sym, int c)
227 #ifndef TCC_TARGET_PE
228 Section *sr;
229 ElfW(Rela) *rel;
230 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
231 sr = cur_text_section->reloc;
232 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
233 rel->r_addend = -4;
234 #else
235 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
236 cur_text_section->data[ind-3],
237 cur_text_section->data[ind-2],
238 cur_text_section->data[ind-1]
240 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
241 #endif
242 gen_le32(0);
244 if (c) {
245 /* we use add c, %xxx for displacement */
246 o(0x48 + REX_BASE(r));
247 o(0x81);
248 o(0xc0 + REG_VALUE(r));
249 gen_le32(c);
253 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
255 op_reg = REG_VALUE(op_reg) << 3;
256 if ((r & VT_VALMASK) == VT_CONST) {
257 /* constant memory reference */
258 o(0x05 | op_reg);
259 if (is_got) {
260 gen_gotpcrel(r, sym, c);
261 } else {
262 gen_addrpc32(r, sym, c);
264 } else if ((r & VT_VALMASK) == VT_LOCAL) {
265 /* currently, we use only ebp as base */
266 if (c == (char)c) {
267 /* short reference */
268 o(0x45 | op_reg);
269 g(c);
270 } else {
271 oad(0x85 | op_reg, c);
273 } else if ((r & VT_VALMASK) >= TREG_MEM) {
274 if (c) {
275 g(0x80 | op_reg | REG_VALUE(r));
276 gen_le32(c);
277 } else {
278 g(0x00 | op_reg | REG_VALUE(r));
280 } else {
281 g(0x00 | op_reg | (r & VT_VALMASK));
285 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
286 opcode bits */
287 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
289 gen_modrm_impl(op_reg, r, sym, c, 0);
292 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
293 opcode bits */
294 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
296 int is_got;
297 int rex = 0x48 | (REX_BASE(op_reg) << 2);
298 if ((r & VT_VALMASK) != VT_CONST &&
299 (r & VT_VALMASK) != VT_LOCAL) {
300 rex |= REX_BASE(VT_VALMASK & r);
302 o(rex);
303 o(opcode);
304 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
305 gen_modrm_impl(op_reg, r, sym, c, is_got);
309 /* load 'r' from value 'sv' */
310 void load(int r, SValue *sv)
312 int v, t, ft, fc, fr;
313 SValue v1;
315 fr = sv->r;
316 ft = sv->type.t;
317 fc = sv->c.ul;
319 #ifndef TCC_TARGET_PE
320 /* we use indirect access via got */
321 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
322 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
323 /* use the result register as a temporal register */
324 int tr = r | TREG_MEM;
325 if (is_float(ft)) {
326 /* we cannot use float registers as a temporal register */
327 tr = get_reg(RC_INT) | TREG_MEM;
329 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
331 /* load from the temporal register */
332 fr = tr | VT_LVAL;
334 #endif
336 v = fr & VT_VALMASK;
337 if (fr & VT_LVAL) {
338 if (v == VT_LLOCAL) {
339 v1.type.t = VT_PTR;
340 v1.r = VT_LOCAL | VT_LVAL;
341 v1.c.ul = fc;
342 load(r, &v1);
343 fr = r;
345 if ((ft & VT_BTYPE) == VT_FLOAT) {
346 o(0x6e0f66); /* movd */
347 r = 0;
348 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
349 o(0x7e0ff3); /* movq */
350 r = 0;
351 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
352 o(0xdb); /* fldt */
353 r = 5;
354 } else if ((ft & VT_TYPE) == VT_BYTE) {
355 o(0xbe0f); /* movsbl */
356 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
357 o(0xb60f); /* movzbl */
358 } else if ((ft & VT_TYPE) == VT_SHORT) {
359 o(0xbf0f); /* movswl */
360 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
361 o(0xb70f); /* movzwl */
362 } else if (is64_type(ft)) {
363 gen_modrm64(0x8b, r, fr, sv->sym, fc);
364 return;
365 } else {
366 o(0x8b); /* movl */
368 gen_modrm(r, fr, sv->sym, fc);
369 } else {
370 if (v == VT_CONST) {
371 if (fr & VT_SYM) {
372 #ifdef TCC_TARGET_PE
373 o(0x8d48);
374 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
375 gen_addrpc32(fr, sv->sym, fc);
376 #else
377 if (sv->sym->type.t & VT_STATIC) {
378 o(0x8d48);
379 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
380 gen_addrpc32(fr, sv->sym, fc);
381 } else {
382 o(0x8b48);
383 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
384 gen_gotpcrel(r, sv->sym, fc);
386 #endif
387 } else if (is64_type(ft)) {
388 o(0x48);
389 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
390 gen_le64(sv->c.ull);
391 } else {
392 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
393 gen_le32(fc);
395 } else if (v == VT_LOCAL) {
396 o(0x48 | REX_BASE(r));
397 o(0x8d); /* lea xxx(%ebp), r */
398 gen_modrm(r, VT_LOCAL, sv->sym, fc);
399 } else if (v == VT_CMP) {
400 oad(0xb8 + r, 0); /* mov $0, r */
401 o(0x0f); /* setxx %br */
402 o(fc);
403 o(0xc0 + r);
404 } else if (v == VT_JMP || v == VT_JMPI) {
405 t = v & 1;
406 oad(0xb8 + r, t); /* mov $1, r */
407 o(0x05eb); /* jmp after */
408 gsym(fc);
409 oad(0xb8 + r, t ^ 1); /* mov $0, r */
410 } else if (v != r) {
411 if (r == TREG_XMM0) {
412 assert(v == TREG_ST0);
413 /* gen_cvt_ftof(VT_DOUBLE); */
414 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
415 /* movsd -0x10(%rsp),%xmm0 */
416 o(0x44100ff2);
417 o(0xf024);
418 } else if (r == TREG_ST0) {
419 assert(v == TREG_XMM0);
420 /* gen_cvt_ftof(VT_LDOUBLE); */
421 /* movsd %xmm0,-0x10(%rsp) */
422 o(0x44110ff2);
423 o(0xf024);
424 o(0xf02444dd); /* fldl -0x10(%rsp) */
425 } else {
426 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
427 o(0x89);
428 o(0xc0 + r + v * 8); /* mov v, r */
434 /* store register 'r' in lvalue 'v' */
435 void store(int r, SValue *v)
437 int fr, bt, ft, fc;
438 int op64 = 0;
439 /* store the REX prefix in this variable when PIC is enabled */
440 int pic = 0;
442 ft = v->type.t;
443 fc = v->c.ul;
444 fr = v->r & VT_VALMASK;
445 bt = ft & VT_BTYPE;
447 #ifndef TCC_TARGET_PE
448 /* we need to access the variable via got */
449 if (fr == VT_CONST && (v->r & VT_SYM)) {
450 /* mov xx(%rip), %r11 */
451 o(0x1d8b4c);
452 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
453 pic = is64_type(bt) ? 0x49 : 0x41;
455 #endif
457 /* XXX: incorrect if float reg to reg */
458 if (bt == VT_FLOAT) {
459 o(0x66);
460 o(pic);
461 o(0x7e0f); /* movd */
462 r = 0;
463 } else if (bt == VT_DOUBLE) {
464 o(0x66);
465 o(pic);
466 o(0xd60f); /* movq */
467 r = 0;
468 } else if (bt == VT_LDOUBLE) {
469 o(0xc0d9); /* fld %st(0) */
470 o(pic);
471 o(0xdb); /* fstpt */
472 r = 7;
473 } else {
474 if (bt == VT_SHORT)
475 o(0x66);
476 o(pic);
477 if (bt == VT_BYTE || bt == VT_BOOL)
478 o(0x88);
479 else if (is64_type(bt))
480 op64 = 0x89;
481 else
482 o(0x89);
484 if (pic) {
485 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
486 if (op64)
487 o(op64);
488 o(3 + (r << 3));
489 } else if (op64) {
490 if (fr == VT_CONST ||
491 fr == VT_LOCAL ||
492 (v->r & VT_LVAL)) {
493 gen_modrm64(op64, r, v->r, v->sym, fc);
494 } else if (fr != r) {
495 /* XXX: don't we really come here? */
496 abort();
497 o(0xc0 + fr + r * 8); /* mov r, fr */
499 } else {
500 if (fr == VT_CONST ||
501 fr == VT_LOCAL ||
502 (v->r & VT_LVAL)) {
503 gen_modrm(r, v->r, v->sym, fc);
504 } else if (fr != r) {
505 /* XXX: don't we really come here? */
506 abort();
507 o(0xc0 + fr + r * 8); /* mov r, fr */
512 static void gadd_sp(int val)
514 if (val == (char)val) {
515 o(0xc48348);
516 g(val);
517 } else {
518 oad(0xc48148, val); /* add $xxx, %rsp */
522 /* 'is_jmp' is '1' if it is a jump */
523 static void gcall_or_jmp(int is_jmp)
525 int r;
526 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
527 /* constant case */
528 if (vtop->r & VT_SYM) {
529 /* relocation case */
530 greloc(cur_text_section, vtop->sym,
531 ind + 1, R_X86_64_PC32);
532 } else {
533 /* put an empty PC32 relocation */
534 put_elf_reloc(symtab_section, cur_text_section,
535 ind + 1, R_X86_64_PC32, 0);
537 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
538 } else {
539 /* otherwise, indirect call */
540 r = TREG_R11;
541 load(r, vtop);
542 o(0x41); /* REX */
543 o(0xff); /* call/jmp *r */
544 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
548 #ifdef TCC_TARGET_PE
549 #define REGN 4
550 static const uint8_t arg_regs[] = {
551 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
553 #else
554 #define REGN 6
555 static const uint8_t arg_regs[REGN] = {
556 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
558 #endif
560 /* Generate function call. The function address is pushed first, then
561 all the parameters in call order. This functions pops all the
562 parameters and the function address. */
563 void gfunc_call(int nb_args)
565 int size, align, r, args_size, i;
566 SValue *orig_vtop;
567 int nb_reg_args = 0;
568 int nb_sse_args = 0;
569 int sse_reg, gen_reg;
571 /* calculate the number of integer/float arguments */
572 args_size = 0;
573 for(i = 0; i < nb_args; i++) {
574 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
575 args_size += type_size(&vtop->type, &align);
576 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
577 args_size += 16;
578 #ifndef TCC_TARGET_PE
579 } else if (is_sse_float(vtop[-i].type.t)) {
580 nb_sse_args++;
581 if (nb_sse_args > 8) args_size += 8;
582 #endif
583 } else {
584 nb_reg_args++;
585 if (nb_reg_args > REGN) args_size += 8;
589 /* for struct arguments, we need to call memcpy and the function
590 call breaks register passing arguments we are preparing.
591 So, we process arguments which will be passed by stack first. */
592 orig_vtop = vtop;
593 gen_reg = nb_reg_args;
594 sse_reg = nb_sse_args;
596 #ifdef TCC_TARGET_PE
597 save_regs(0); /* save used temporary registers */
598 #endif
600 /* adjust stack to align SSE boundary */
601 if (args_size &= 8) {
602 o(0x50); /* push $rax */
604 for(i = 0; i < nb_args; i++) {
605 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
606 size = type_size(&vtop->type, &align);
607 /* align to stack align size */
608 size = (size + 3) & ~3;
609 /* allocate the necessary size on stack */
610 o(0x48);
611 oad(0xec81, size); /* sub $xxx, %rsp */
612 /* generate structure store */
613 r = get_reg(RC_INT);
614 o(0x48 + REX_BASE(r));
615 o(0x89); /* mov %rsp, r */
616 o(0xe0 + r);
618 /* following code breaks vtop[1] */
619 SValue tmp = vtop[1];
620 vset(&vtop->type, r | VT_LVAL, 0);
621 vswap();
622 vstore();
623 vtop[1] = tmp;
625 args_size += size;
626 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
627 gv(RC_ST0);
628 size = LDOUBLE_SIZE;
629 oad(0xec8148, size); /* sub $xxx, %rsp */
630 o(0x7cdb); /* fstpt 0(%rsp) */
631 g(0x24);
632 g(0x00);
633 args_size += size;
634 } else if (is_sse_float(vtop->type.t)) {
635 #ifdef TCC_TARGET_PE
636 int j = --gen_reg;
637 if (j >= REGN) {
638 #else
639 int j = --sse_reg;
640 if (j >= 8) {
641 #endif
642 gv(RC_FLOAT);
643 o(0x50); /* push $rax */
644 /* movq %xmm0, (%rsp) */
645 o(0x04d60f66);
646 o(0x24);
647 args_size += 8;
649 } else {
650 int j = --gen_reg;
651 /* simple type */
652 /* XXX: implicit cast ? */
653 if (j >= REGN) {
654 r = gv(RC_INT);
655 o(0x50 + r); /* push r */
656 args_size += 8;
659 vtop--;
661 vtop = orig_vtop;
663 /* then, we prepare register passing arguments.
664 Note that we cannot set RDX and RCX in this loop because gv()
665 may break these temporary registers. Let's use R10 and R11
666 instead of them */
667 gen_reg = nb_reg_args;
668 sse_reg = nb_sse_args;
669 for(i = 0; i < nb_args; i++) {
670 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
671 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
672 } else if (is_sse_float(vtop->type.t)) {
673 #ifdef TCC_TARGET_PE
674 int j = --gen_reg;
675 if (j < REGN) {
676 int d = arg_regs[j];
677 gv(RC_FLOAT); /* only one float register */
678 /* movaps %xmm0, %xmmN */
679 o(0x280f);
680 o(0xc0 + (j << 3));
681 o(0x50);
682 o(0xd60f66); /* movq %xmm0, (%rsp) */
683 o(0x2404 + (j << 3));
684 if (d < 8) {
685 o(0x58 + d); /* pop d */
686 } else {
687 o(0x58);
688 o(0xc08949 + d - 8);
691 } else {
692 int j = --gen_reg;
693 /* simple type */
694 /* XXX: implicit cast ? */
695 if (j < REGN) {
696 int d = arg_regs[j];
697 r = gv(RC_INT);
698 if (d != r) {
699 if (d < 8) {
700 o(0x8948); /* mov */
701 o(0xc0 + r * 8 + d);
702 } else {
703 o(0x8949); /* mov */
704 o(0xc0 + r * 8 + d - 8);
708 #else
709 int j = --sse_reg;
710 if (j < 8) {
711 gv(RC_FLOAT); /* only one float register */
712 /* movaps %xmm0, %xmmN */
713 o(0x280f);
714 o(0xc0 + (sse_reg << 3));
716 } else {
717 int j = --gen_reg;
718 /* simple type */
719 /* XXX: implicit cast ? */
720 if (j < REGN) {
721 r = gv(RC_INT);
722 if (j < 2) {
723 o(0x8948); /* mov */
724 o(0xc0 + r * 8 + arg_regs[j]);
725 } else if (j < 4) {
726 o(0x8949); /* mov */
727 /* j=2: r10, j=3: r11 */
728 o(0xc0 + r * 8 + j);
729 } else {
730 o(0x8949); /* mov */
731 /* j=4: r8, j=5: r9 */
732 o(0xc0 + r * 8 + j - 4);
735 #endif
737 vtop--;
740 #ifdef TCC_TARGET_PE
741 /* allocate scratch space */
742 gadd_sp(-8*REGN);
743 args_size += 8*REGN;
744 #else
745 save_regs(0); /* save used temporary registers */
747 /* Copy R10 and R11 into RDX and RCX, respectively */
748 if (nb_reg_args > 2) {
749 o(0xd2894c); /* mov %r10, %rdx */
750 if (nb_reg_args > 3) {
751 o(0xd9894c); /* mov %r11, %rcx */
755 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
756 #endif
757 gcall_or_jmp(0);
758 if (args_size)
759 gadd_sp(args_size);
760 vtop--;
763 #define FUNC_PROLOG_SIZE 11
765 static void push_arg_reg(int i) {
766 loc -= 8;
767 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
770 /* generate function prolog of type 't' */
771 void gfunc_prolog(CType *func_type)
773 int i, addr, align, size;
774 int param_index, param_addr, reg_param_index, sse_param_index;
775 Sym *sym;
776 CType *type;
778 func_ret_sub = 0;
780 sym = func_type->ref;
781 addr = PTR_SIZE * 2;
782 loc = 0;
783 ind += FUNC_PROLOG_SIZE;
784 func_sub_sp_offset = ind;
786 #ifndef TCC_TARGET_PE
787 if (func_type->ref->c == FUNC_ELLIPSIS) {
788 int seen_reg_num, seen_sse_num, seen_stack_size;
789 seen_reg_num = seen_sse_num = 0;
790 /* frame pointer and return address */
791 seen_stack_size = PTR_SIZE * 2;
792 /* count the number of seen parameters */
793 sym = func_type->ref;
794 while ((sym = sym->next) != NULL) {
795 type = &sym->type;
796 if (is_sse_float(type->t)) {
797 if (seen_sse_num < 8) {
798 seen_sse_num++;
799 } else {
800 seen_stack_size += 8;
802 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
803 size = type_size(type, &align);
804 size = (size + 3) & ~3;
805 seen_stack_size += size;
806 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
807 seen_stack_size += LDOUBLE_SIZE;
808 } else {
809 if (seen_reg_num < REGN) {
810 seen_reg_num++;
811 } else {
812 seen_stack_size += 8;
817 loc -= 16;
818 /* movl $0x????????, -0x10(%rbp) */
819 o(0xf045c7);
820 gen_le32(seen_reg_num * 8);
821 /* movl $0x????????, -0xc(%rbp) */
822 o(0xf445c7);
823 gen_le32(seen_sse_num * 16 + 48);
824 /* movl $0x????????, -0x8(%rbp) */
825 o(0xf845c7);
826 gen_le32(seen_stack_size);
828 /* save all register passing arguments */
829 for (i = 0; i < 8; i++) {
830 loc -= 16;
831 o(0xd60f66); /* movq */
832 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
833 /* movq $0, loc+8(%rbp) */
834 o(0x85c748);
835 gen_le32(loc + 8);
836 gen_le32(0);
838 for (i = 0; i < REGN; i++) {
839 push_arg_reg(REGN-1-i);
842 #endif
844 sym = func_type->ref;
845 param_index = 0;
846 reg_param_index = 0;
847 sse_param_index = 0;
849 /* if the function returns a structure, then add an
850 implicit pointer parameter */
851 func_vt = sym->type;
852 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
853 push_arg_reg(reg_param_index);
854 param_addr = loc;
856 func_vc = loc;
857 param_index++;
858 reg_param_index++;
860 /* define parameters */
861 while ((sym = sym->next) != NULL) {
862 type = &sym->type;
863 size = type_size(type, &align);
864 size = (size + 3) & ~3;
865 #ifndef TCC_TARGET_PE
866 if (is_sse_float(type->t)) {
867 if (sse_param_index < 8) {
868 /* save arguments passed by register */
869 loc -= 8;
870 o(0xd60f66); /* movq */
871 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
872 param_addr = loc;
873 } else {
874 param_addr = addr;
875 addr += size;
877 sse_param_index++;
878 } else
879 #endif
880 if ((type->t & VT_BTYPE) == VT_STRUCT ||
881 (type->t & VT_BTYPE) == VT_LDOUBLE) {
882 param_addr = addr;
883 addr += size;
884 } else {
885 #ifdef TCC_TARGET_PE
886 if (reg_param_index < REGN) {
887 /* save arguments passed by register */
888 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
890 param_addr = addr;
891 addr += 8;
892 #else
893 if (reg_param_index < REGN) {
894 /* save arguments passed by register */
895 push_arg_reg(reg_param_index);
896 param_addr = loc;
897 } else {
898 param_addr = addr;
899 addr += 8;
901 #endif
902 reg_param_index++;
904 sym_push(sym->v & ~SYM_FIELD, type,
905 VT_LOCAL | VT_LVAL, param_addr);
906 param_index++;
908 #ifdef TCC_TARGET_PE
909 if (func_type->ref->c == FUNC_ELLIPSIS) {
910 for (i = reg_param_index; i < REGN; ++i) {
911 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, addr);
912 addr += 8;
915 #endif
918 /* generate function epilog */
919 void gfunc_epilog(void)
921 int v, saved_ind;
923 o(0xc9); /* leave */
924 if (func_ret_sub == 0) {
925 o(0xc3); /* ret */
926 } else {
927 o(0xc2); /* ret n */
928 g(func_ret_sub);
929 g(func_ret_sub >> 8);
931 /* align local size to word & save local variables */
932 v = (-loc + 15) & -16;
933 saved_ind = ind;
934 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
935 #ifdef TCC_TARGET_PE
936 if (v >= 4096) {
937 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
938 oad(0xb8, v); /* mov stacksize, %eax */
939 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
940 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
941 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
942 } else
943 #endif
945 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
946 o(0xec8148); /* sub rsp, stacksize */
947 gen_le32(v);
949 ind = saved_ind;
952 /* generate a jump to a label */
953 int gjmp(int t)
955 return psym(0xe9, t);
958 /* generate a jump to a fixed address */
959 void gjmp_addr(int a)
961 int r;
962 r = a - ind - 2;
963 if (r == (char)r) {
964 g(0xeb);
965 g(r);
966 } else {
967 oad(0xe9, a - ind - 5);
971 /* generate a test. set 'inv' to invert test. Stack entry is popped */
972 int gtst(int inv, int t)
974 int v, *p;
976 v = vtop->r & VT_VALMASK;
977 if (v == VT_CMP) {
978 /* fast case : can jump directly since flags are set */
979 g(0x0f);
980 t = psym((vtop->c.i - 16) ^ inv, t);
981 } else if (v == VT_JMP || v == VT_JMPI) {
982 /* && or || optimization */
983 if ((v & 1) == inv) {
984 /* insert vtop->c jump list in t */
985 p = &vtop->c.i;
986 while (*p != 0)
987 p = (int *)(cur_text_section->data + *p);
988 *p = t;
989 t = vtop->c.i;
990 } else {
991 t = gjmp(t);
992 gsym(vtop->c.i);
994 } else {
995 if (is_float(vtop->type.t) ||
996 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
997 vpushi(0);
998 gen_op(TOK_NE);
1000 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1001 /* constant jmp optimization */
1002 if ((vtop->c.i != 0) != inv)
1003 t = gjmp(t);
1004 } else {
1005 v = gv(RC_INT);
1006 o(0x85);
1007 o(0xc0 + v * 9);
1008 g(0x0f);
1009 t = psym(0x85 ^ inv, t);
1012 vtop--;
1013 return t;
1016 /* generate an integer binary operation */
1017 void gen_opi(int op)
1019 int r, fr, opc, c;
1021 switch(op) {
1022 case '+':
1023 case TOK_ADDC1: /* add with carry generation */
1024 opc = 0;
1025 gen_op8:
1026 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
1027 !is64_type(vtop->type.t)) {
1028 /* constant case */
1029 vswap();
1030 r = gv(RC_INT);
1031 if (is64_type(vtop->type.t)) {
1032 o(0x48 | REX_BASE(r));
1034 vswap();
1035 c = vtop->c.i;
1036 if (c == (char)c) {
1037 /* XXX: generate inc and dec for smaller code ? */
1038 o(0x83);
1039 o(0xc0 | (opc << 3) | REG_VALUE(r));
1040 g(c);
1041 } else {
1042 o(0x81);
1043 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1045 } else {
1046 gv2(RC_INT, RC_INT);
1047 r = vtop[-1].r;
1048 fr = vtop[0].r;
1049 if (opc != 7 ||
1050 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1051 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1052 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
1054 o((opc << 3) | 0x01);
1055 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1057 vtop--;
1058 if (op >= TOK_ULT && op <= TOK_GT) {
1059 vtop->r = VT_CMP;
1060 vtop->c.i = op;
1062 break;
1063 case '-':
1064 case TOK_SUBC1: /* sub with carry generation */
1065 opc = 5;
1066 goto gen_op8;
1067 case TOK_ADDC2: /* add with carry use */
1068 opc = 2;
1069 goto gen_op8;
1070 case TOK_SUBC2: /* sub with carry use */
1071 opc = 3;
1072 goto gen_op8;
1073 case '&':
1074 opc = 4;
1075 goto gen_op8;
1076 case '^':
1077 opc = 6;
1078 goto gen_op8;
1079 case '|':
1080 opc = 1;
1081 goto gen_op8;
1082 case '*':
1083 gv2(RC_INT, RC_INT);
1084 r = vtop[-1].r;
1085 fr = vtop[0].r;
1086 if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1087 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1088 o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
1090 vtop--;
1091 o(0xaf0f); /* imul fr, r */
1092 o(0xc0 + fr + r * 8);
1093 break;
1094 case TOK_SHL:
1095 opc = 4;
1096 goto gen_shift;
1097 case TOK_SHR:
1098 opc = 5;
1099 goto gen_shift;
1100 case TOK_SAR:
1101 opc = 7;
1102 gen_shift:
1103 opc = 0xc0 | (opc << 3);
1104 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1105 /* constant case */
1106 vswap();
1107 r = gv(RC_INT);
1108 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1109 o(0x48 | REX_BASE(r));
1110 c = 0x3f;
1111 } else {
1112 c = 0x1f;
1114 vswap();
1115 c &= vtop->c.i;
1116 o(0xc1); /* shl/shr/sar $xxx, r */
1117 o(opc | r);
1118 g(c);
1119 } else {
1120 /* we generate the shift in ecx */
1121 gv2(RC_INT, RC_RCX);
1122 r = vtop[-1].r;
1123 if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
1124 o(0x48 | REX_BASE(r));
1126 o(0xd3); /* shl/shr/sar %cl, r */
1127 o(opc | r);
1129 vtop--;
1130 break;
1131 case '/':
1132 case TOK_UDIV:
1133 case TOK_PDIV:
1134 case '%':
1135 case TOK_UMOD:
1136 case TOK_UMULL:
1137 /* first operand must be in eax */
1138 /* XXX: need better constraint for second operand */
1139 gv2(RC_RAX, RC_RCX);
1140 r = vtop[-1].r;
1141 fr = vtop[0].r;
1142 vtop--;
1143 save_reg(TREG_RDX);
1144 if (op == TOK_UMULL) {
1145 o(0xf7); /* mul fr */
1146 o(0xe0 + fr);
1147 vtop->r2 = TREG_RDX;
1148 r = TREG_RAX;
1149 } else {
1150 if (op == TOK_UDIV || op == TOK_UMOD) {
1151 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1152 o(0xd23148); /* xor %rdx, %rdx */
1153 o(0x48 + REX_BASE(fr));
1154 } else {
1155 o(0xd231); /* xor %edx, %edx */
1157 o(0xf7); /* div fr, %eax */
1158 o(0xf0 + fr);
1159 } else {
1160 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1161 o(0x9948); /* cqto */
1162 o(0x48 + REX_BASE(fr));
1163 } else {
1164 o(0x99); /* cltd */
1166 o(0xf7); /* idiv fr, %eax */
1167 o(0xf8 + fr);
1169 if (op == '%' || op == TOK_UMOD)
1170 r = TREG_RDX;
1171 else
1172 r = TREG_RAX;
1174 vtop->r = r;
1175 break;
1176 default:
1177 opc = 7;
1178 goto gen_op8;
1182 void gen_opl(int op)
1184 gen_opi(op);
1187 /* generate a floating point operation 'v = t1 op t2' instruction. The
1188 two operands are guaranted to have the same floating point type */
1189 /* XXX: need to use ST1 too */
1190 void gen_opf(int op)
1192 int a, ft, fc, swapped, r;
1193 int float_type =
1194 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1196 /* convert constants to memory references */
1197 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1198 vswap();
1199 gv(float_type);
1200 vswap();
1202 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1203 gv(float_type);
1205 /* must put at least one value in the floating point register */
1206 if ((vtop[-1].r & VT_LVAL) &&
1207 (vtop[0].r & VT_LVAL)) {
1208 vswap();
1209 gv(float_type);
1210 vswap();
1212 swapped = 0;
1213 /* swap the stack if needed so that t1 is the register and t2 is
1214 the memory reference */
1215 if (vtop[-1].r & VT_LVAL) {
1216 vswap();
1217 swapped = 1;
1219 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1220 if (op >= TOK_ULT && op <= TOK_GT) {
1221 /* load on stack second operand */
1222 load(TREG_ST0, vtop);
1223 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1224 if (op == TOK_GE || op == TOK_GT)
1225 swapped = !swapped;
1226 else if (op == TOK_EQ || op == TOK_NE)
1227 swapped = 0;
1228 if (swapped)
1229 o(0xc9d9); /* fxch %st(1) */
1230 o(0xe9da); /* fucompp */
1231 o(0xe0df); /* fnstsw %ax */
1232 if (op == TOK_EQ) {
1233 o(0x45e480); /* and $0x45, %ah */
1234 o(0x40fC80); /* cmp $0x40, %ah */
1235 } else if (op == TOK_NE) {
1236 o(0x45e480); /* and $0x45, %ah */
1237 o(0x40f480); /* xor $0x40, %ah */
1238 op = TOK_NE;
1239 } else if (op == TOK_GE || op == TOK_LE) {
1240 o(0x05c4f6); /* test $0x05, %ah */
1241 op = TOK_EQ;
1242 } else {
1243 o(0x45c4f6); /* test $0x45, %ah */
1244 op = TOK_EQ;
1246 vtop--;
1247 vtop->r = VT_CMP;
1248 vtop->c.i = op;
1249 } else {
1250 /* no memory reference possible for long double operations */
1251 load(TREG_ST0, vtop);
1252 swapped = !swapped;
1254 switch(op) {
1255 default:
1256 case '+':
1257 a = 0;
1258 break;
1259 case '-':
1260 a = 4;
1261 if (swapped)
1262 a++;
1263 break;
1264 case '*':
1265 a = 1;
1266 break;
1267 case '/':
1268 a = 6;
1269 if (swapped)
1270 a++;
1271 break;
1273 ft = vtop->type.t;
1274 fc = vtop->c.ul;
1275 o(0xde); /* fxxxp %st, %st(1) */
1276 o(0xc1 + (a << 3));
1277 vtop--;
1279 } else {
1280 if (op >= TOK_ULT && op <= TOK_GT) {
1281 /* if saved lvalue, then we must reload it */
1282 r = vtop->r;
1283 fc = vtop->c.ul;
1284 if ((r & VT_VALMASK) == VT_LLOCAL) {
1285 SValue v1;
1286 r = get_reg(RC_INT);
1287 v1.type.t = VT_INT;
1288 v1.r = VT_LOCAL | VT_LVAL;
1289 v1.c.ul = fc;
1290 load(r, &v1);
1291 fc = 0;
1294 if (op == TOK_EQ || op == TOK_NE) {
1295 swapped = 0;
1296 } else {
1297 if (op == TOK_LE || op == TOK_LT)
1298 swapped = !swapped;
1299 if (op == TOK_LE || op == TOK_GE) {
1300 op = 0x93; /* setae */
1301 } else {
1302 op = 0x97; /* seta */
1306 if (swapped) {
1307 o(0x7e0ff3); /* movq */
1308 gen_modrm(1, r, vtop->sym, fc);
1310 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1311 o(0x66);
1313 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1314 o(0xc8);
1315 } else {
1316 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1317 o(0x66);
1319 o(0x2e0f); /* ucomisd */
1320 gen_modrm(0, r, vtop->sym, fc);
1323 vtop--;
1324 vtop->r = VT_CMP;
1325 vtop->c.i = op;
1326 } else {
1327 /* no memory reference possible for long double operations */
1328 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1329 load(TREG_XMM0, vtop);
1330 swapped = !swapped;
1332 switch(op) {
1333 default:
1334 case '+':
1335 a = 0;
1336 break;
1337 case '-':
1338 a = 4;
1339 break;
1340 case '*':
1341 a = 1;
1342 break;
1343 case '/':
1344 a = 6;
1345 break;
1347 ft = vtop->type.t;
1348 fc = vtop->c.ul;
1349 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1350 o(0xde); /* fxxxp %st, %st(1) */
1351 o(0xc1 + (a << 3));
1352 } else {
1353 /* if saved lvalue, then we must reload it */
1354 r = vtop->r;
1355 if ((r & VT_VALMASK) == VT_LLOCAL) {
1356 SValue v1;
1357 r = get_reg(RC_INT);
1358 v1.type.t = VT_INT;
1359 v1.r = VT_LOCAL | VT_LVAL;
1360 v1.c.ul = fc;
1361 load(r, &v1);
1362 fc = 0;
1364 if (swapped) {
1365 /* movq %xmm0,%xmm1 */
1366 o(0x7e0ff3);
1367 o(0xc8);
1368 load(TREG_XMM0, vtop);
1369 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1370 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1371 o(0xf2);
1372 } else {
1373 o(0xf3);
1375 o(0x0f);
1376 o(0x58 + a);
1377 o(0xc1);
1378 } else {
1379 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1380 o(0xf2);
1381 } else {
1382 o(0xf3);
1384 o(0x0f);
1385 o(0x58 + a);
1386 gen_modrm(0, r, vtop->sym, fc);
1389 vtop--;
1394 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1395 and 'long long' cases. */
1396 void gen_cvt_itof(int t)
1398 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1399 save_reg(TREG_ST0);
1400 gv(RC_INT);
1401 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1402 /* signed long long to float/double/long double (unsigned case
1403 is handled generically) */
1404 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1405 o(0x242cdf); /* fildll (%rsp) */
1406 o(0x08c48348); /* add $8, %rsp */
1407 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1408 (VT_INT | VT_UNSIGNED)) {
1409 /* unsigned int to float/double/long double */
1410 o(0x6a); /* push $0 */
1411 g(0x00);
1412 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1413 o(0x242cdf); /* fildll (%rsp) */
1414 o(0x10c48348); /* add $16, %rsp */
1415 } else {
1416 /* int to float/double/long double */
1417 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1418 o(0x2404db); /* fildl (%rsp) */
1419 o(0x08c48348); /* add $8, %rsp */
1421 vtop->r = TREG_ST0;
1422 } else {
1423 save_reg(TREG_XMM0);
1424 gv(RC_INT);
1425 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1426 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1427 (VT_INT | VT_UNSIGNED) ||
1428 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1429 o(0x48); /* REX */
1431 o(0x2a0f);
1432 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1433 vtop->r = TREG_XMM0;
1437 /* convert from one floating point type to another */
1438 void gen_cvt_ftof(int t)
1440 int ft, bt, tbt;
1442 ft = vtop->type.t;
1443 bt = ft & VT_BTYPE;
1444 tbt = t & VT_BTYPE;
1446 if (bt == VT_FLOAT) {
1447 gv(RC_FLOAT);
1448 if (tbt == VT_DOUBLE) {
1449 o(0xc0140f); /* unpcklps */
1450 o(0xc05a0f); /* cvtps2pd */
1451 } else if (tbt == VT_LDOUBLE) {
1452 /* movss %xmm0,-0x10(%rsp) */
1453 o(0x44110ff3);
1454 o(0xf024);
1455 o(0xf02444d9); /* flds -0x10(%rsp) */
1456 vtop->r = TREG_ST0;
1458 } else if (bt == VT_DOUBLE) {
1459 gv(RC_FLOAT);
1460 if (tbt == VT_FLOAT) {
1461 o(0xc0140f66); /* unpcklpd */
1462 o(0xc05a0f66); /* cvtpd2ps */
1463 } else if (tbt == VT_LDOUBLE) {
1464 /* movsd %xmm0,-0x10(%rsp) */
1465 o(0x44110ff2);
1466 o(0xf024);
1467 o(0xf02444dd); /* fldl -0x10(%rsp) */
1468 vtop->r = TREG_ST0;
1470 } else {
1471 gv(RC_ST0);
1472 if (tbt == VT_DOUBLE) {
1473 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1474 /* movsd -0x10(%rsp),%xmm0 */
1475 o(0x44100ff2);
1476 o(0xf024);
1477 vtop->r = TREG_XMM0;
1478 } else if (tbt == VT_FLOAT) {
1479 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1480 /* movss -0x10(%rsp),%xmm0 */
1481 o(0x44100ff3);
1482 o(0xf024);
1483 vtop->r = TREG_XMM0;
1488 /* convert fp to int 't' type */
1489 void gen_cvt_ftoi(int t)
1491 int ft, bt, size, r;
1492 ft = vtop->type.t;
1493 bt = ft & VT_BTYPE;
1494 if (bt == VT_LDOUBLE) {
1495 gen_cvt_ftof(VT_DOUBLE);
1496 bt = VT_DOUBLE;
1499 gv(RC_FLOAT);
1500 if (t != VT_INT)
1501 size = 8;
1502 else
1503 size = 4;
1505 r = get_reg(RC_INT);
1506 if (bt == VT_FLOAT) {
1507 o(0xf3);
1508 } else if (bt == VT_DOUBLE) {
1509 o(0xf2);
1510 } else {
1511 assert(0);
1513 if (size == 8) {
1514 o(0x48 + REX_BASE(r));
1516 o(0x2c0f); /* cvttss2si or cvttsd2si */
1517 o(0xc0 + (REG_VALUE(r) << 3));
1518 vtop->r = r;
1521 /* computed goto support */
1522 void ggoto(void)
1524 gcall_or_jmp(1);
1525 vtop--;
1528 /* end of x86-64 code generator */
1529 /*************************************************************/