tccpe: improve dllimport
[tinycc.git] / x86_64-gen.c
blob86ec4ea84ef46c00fe7aac137f2c9239e1babb49
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <assert.h>
25 /* number of available registers */
26 #define NB_REGS 5
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
33 #define RC_RAX 0x0004
34 #define RC_RCX 0x0008
35 #define RC_RDX 0x0010
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
43 enum {
44 TREG_RAX = 0,
45 TREG_RCX = 1,
46 TREG_RDX = 2,
47 TREG_RSI = 6,
48 TREG_RDI = 7,
49 TREG_R8 = 8,
50 TREG_R9 = 9,
51 TREG_R10 = 10,
52 TREG_R11 = 11,
54 TREG_XMM0 = 3,
55 TREG_ST0 = 4,
57 TREG_MEM = 0x10,
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 const int reg_classes[NB_REGS] = {
64 /* eax */ RC_INT | RC_RAX,
65 /* ecx */ RC_INT | RC_RCX,
66 /* edx */ RC_INT | RC_RDX,
67 /* xmm0 */ RC_FLOAT | RC_XMM0,
68 /* st0 */ RC_ST0,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
80 #define PTR_SIZE 8
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
86 #define MAX_ALIGN 8
88 /******************************************************/
89 /* ELF defines */
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_32
95 #define R_DATA_PTR R_X86_64_64
96 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
97 #define R_COPY R_X86_64_COPY
99 #define ELF_START_ADDR 0x08048000
100 #define ELF_PAGE_SIZE 0x1000
102 /******************************************************/
104 static unsigned long func_sub_sp_offset;
105 static int func_ret_sub;
107 /* XXX: make it faster ? */
108 void g(int c)
110 int ind1;
111 ind1 = ind + 1;
112 if (ind1 > cur_text_section->data_allocated)
113 section_realloc(cur_text_section, ind1);
114 cur_text_section->data[ind] = c;
115 ind = ind1;
118 void o(unsigned int c)
120 while (c) {
121 g(c);
122 c = c >> 8;
126 void gen_le16(int v)
128 g(v);
129 g(v >> 8);
132 void gen_le32(int c)
134 g(c);
135 g(c >> 8);
136 g(c >> 16);
137 g(c >> 24);
140 void gen_le64(int64_t c)
142 g(c);
143 g(c >> 8);
144 g(c >> 16);
145 g(c >> 24);
146 g(c >> 32);
147 g(c >> 40);
148 g(c >> 48);
149 g(c >> 56);
152 /* output a symbol and patch all calls to it */
153 void gsym_addr(int t, int a)
155 int n, *ptr;
156 while (t) {
157 ptr = (int *)(cur_text_section->data + t);
158 n = *ptr; /* next value */
159 *ptr = a - t - 4;
160 t = n;
164 void gsym(int t)
166 gsym_addr(t, ind);
169 /* psym is used to put an instruction with a data field which is a
170 reference to a symbol. It is in fact the same as oad ! */
171 #define psym oad
173 static int is64_type(int t)
175 return ((t & VT_BTYPE) == VT_PTR ||
176 (t & VT_BTYPE) == VT_FUNC ||
177 (t & VT_BTYPE) == VT_LLONG);
180 static int is_sse_float(int t) {
181 int bt;
182 bt = t & VT_BTYPE;
183 return bt == VT_DOUBLE || bt == VT_FLOAT;
186 /* instruction + 4 bytes data. Return the address of the data */
187 static int oad(int c, int s)
189 int ind1;
191 o(c);
192 ind1 = ind + 4;
193 if (ind1 > cur_text_section->data_allocated)
194 section_realloc(cur_text_section, ind1);
195 *(int *)(cur_text_section->data + ind) = s;
196 s = ind;
197 ind = ind1;
198 return s;
201 static void gen_addr32(int r, Sym *sym, int c)
203 if (r & VT_SYM)
204 greloc(cur_text_section, sym, ind, R_X86_64_32);
205 gen_le32(c);
208 /* output constant with relocation if 'r & VT_SYM' is true */
209 static void gen_addr64(int r, Sym *sym, int64_t c)
211 if (r & VT_SYM)
212 greloc(cur_text_section, sym, ind, R_X86_64_64);
213 gen_le64(c);
216 /* output constant with relocation if 'r & VT_SYM' is true */
217 static void gen_addrpc32(int r, Sym *sym, int c)
219 if (r & VT_SYM)
220 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
221 gen_le32(c-4);
224 /* output got address with relocation */
225 static void gen_gotpcrel(int r, Sym *sym, int c)
227 #ifndef TCC_TARGET_PE
228 Section *sr;
229 ElfW(Rela) *rel;
230 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
231 sr = cur_text_section->reloc;
232 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
233 rel->r_addend = -4;
234 #else
235 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
236 cur_text_section->data[ind-3],
237 cur_text_section->data[ind-2],
238 cur_text_section->data[ind-1]
240 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
241 #endif
242 gen_le32(0);
244 if (c) {
245 /* we use add c, %xxx for displacement */
246 o(0x48 + REX_BASE(r));
247 o(0x81);
248 o(0xc0 + REG_VALUE(r));
249 gen_le32(c);
253 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
255 op_reg = REG_VALUE(op_reg) << 3;
256 if ((r & VT_VALMASK) == VT_CONST) {
257 /* constant memory reference */
258 o(0x05 | op_reg);
259 if (is_got) {
260 gen_gotpcrel(r, sym, c);
261 } else {
262 gen_addrpc32(r, sym, c);
264 } else if ((r & VT_VALMASK) == VT_LOCAL) {
265 /* currently, we use only ebp as base */
266 if (c == (char)c) {
267 /* short reference */
268 o(0x45 | op_reg);
269 g(c);
270 } else {
271 oad(0x85 | op_reg, c);
273 } else if ((r & VT_VALMASK) >= TREG_MEM) {
274 if (c) {
275 g(0x80 | op_reg | REG_VALUE(r));
276 gen_le32(c);
277 } else {
278 g(0x00 | op_reg | REG_VALUE(r));
280 } else {
281 g(0x00 | op_reg | (r & VT_VALMASK));
285 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
286 opcode bits */
287 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
289 gen_modrm_impl(op_reg, r, sym, c, 0);
292 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
293 opcode bits */
294 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
296 int is_got;
297 int rex = 0x48 | (REX_BASE(op_reg) << 2);
298 if ((r & VT_VALMASK) != VT_CONST &&
299 (r & VT_VALMASK) != VT_LOCAL) {
300 rex |= REX_BASE(VT_VALMASK & r);
302 o(rex);
303 o(opcode);
304 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
305 gen_modrm_impl(op_reg, r, sym, c, is_got);
309 /* load 'r' from value 'sv' */
310 void load(int r, SValue *sv)
312 int v, t, ft, fc, fr;
313 SValue v1;
315 #ifdef TCC_TARGET_PE
316 if (pe_dllimport(r, sv, load))
317 return;
318 #endif
320 fr = sv->r;
321 ft = sv->type.t;
322 fc = sv->c.ul;
324 #ifndef TCC_TARGET_PE
325 /* we use indirect access via got */
326 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
327 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
328 /* use the result register as a temporal register */
329 int tr = r | TREG_MEM;
330 if (is_float(ft)) {
331 /* we cannot use float registers as a temporal register */
332 tr = get_reg(RC_INT) | TREG_MEM;
334 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
336 /* load from the temporal register */
337 fr = tr | VT_LVAL;
339 #endif
341 v = fr & VT_VALMASK;
342 if (fr & VT_LVAL) {
343 if (v == VT_LLOCAL) {
344 v1.type.t = VT_PTR;
345 v1.r = VT_LOCAL | VT_LVAL;
346 v1.c.ul = fc;
347 load(r, &v1);
348 fr = r;
350 if ((ft & VT_BTYPE) == VT_FLOAT) {
351 o(0x6e0f66); /* movd */
352 r = 0;
353 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
354 o(0x7e0ff3); /* movq */
355 r = 0;
356 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
357 o(0xdb); /* fldt */
358 r = 5;
359 } else if ((ft & VT_TYPE) == VT_BYTE) {
360 o(0xbe0f); /* movsbl */
361 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
362 o(0xb60f); /* movzbl */
363 } else if ((ft & VT_TYPE) == VT_SHORT) {
364 o(0xbf0f); /* movswl */
365 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
366 o(0xb70f); /* movzwl */
367 } else if (is64_type(ft)) {
368 gen_modrm64(0x8b, r, fr, sv->sym, fc);
369 return;
370 } else {
371 o(0x8b); /* movl */
373 gen_modrm(r, fr, sv->sym, fc);
374 } else {
375 if (v == VT_CONST) {
376 if (fr & VT_SYM) {
377 #ifdef TCC_TARGET_PE
378 o(0x8d48);
379 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
380 gen_addrpc32(fr, sv->sym, fc);
381 #else
382 if (sv->sym->type.t & VT_STATIC) {
383 o(0x8d48);
384 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
385 gen_addrpc32(fr, sv->sym, fc);
386 } else {
387 o(0x8b48);
388 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
389 gen_gotpcrel(r, sv->sym, fc);
391 #endif
392 } else if (is64_type(ft)) {
393 o(0x48);
394 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
395 gen_le64(sv->c.ull);
396 } else {
397 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
398 gen_le32(fc);
400 } else if (v == VT_LOCAL) {
401 o(0x48 | REX_BASE(r));
402 o(0x8d); /* lea xxx(%ebp), r */
403 gen_modrm(r, VT_LOCAL, sv->sym, fc);
404 } else if (v == VT_CMP) {
405 oad(0xb8 + r, 0); /* mov $0, r */
406 o(0x0f); /* setxx %br */
407 o(fc);
408 o(0xc0 + r);
409 } else if (v == VT_JMP || v == VT_JMPI) {
410 t = v & 1;
411 oad(0xb8 + r, t); /* mov $1, r */
412 o(0x05eb); /* jmp after */
413 gsym(fc);
414 oad(0xb8 + r, t ^ 1); /* mov $0, r */
415 } else if (v != r) {
416 if (r == TREG_XMM0) {
417 assert(v == TREG_ST0);
418 /* gen_cvt_ftof(VT_DOUBLE); */
419 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
420 /* movsd -0x10(%rsp),%xmm0 */
421 o(0x44100ff2);
422 o(0xf024);
423 } else if (r == TREG_ST0) {
424 assert(v == TREG_XMM0);
425 /* gen_cvt_ftof(VT_LDOUBLE); */
426 /* movsd %xmm0,-0x10(%rsp) */
427 o(0x44110ff2);
428 o(0xf024);
429 o(0xf02444dd); /* fldl -0x10(%rsp) */
430 } else {
431 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
432 o(0x89);
433 o(0xc0 + r + v * 8); /* mov v, r */
439 /* store register 'r' in lvalue 'v' */
440 void store(int r, SValue *v)
442 int fr, bt, ft, fc;
443 int op64 = 0;
444 /* store the REX prefix in this variable when PIC is enabled */
445 int pic = 0;
447 #ifdef TCC_TARGET_PE
448 if (pe_dllimport(r, v, store))
449 return;
450 #endif
452 ft = v->type.t;
453 fc = v->c.ul;
454 fr = v->r & VT_VALMASK;
455 bt = ft & VT_BTYPE;
457 #ifndef TCC_TARGET_PE
458 /* we need to access the variable via got */
459 if (fr == VT_CONST && (v->r & VT_SYM)) {
460 /* mov xx(%rip), %r11 */
461 o(0x1d8b4c);
462 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
463 pic = is64_type(bt) ? 0x49 : 0x41;
465 #endif
467 /* XXX: incorrect if float reg to reg */
468 if (bt == VT_FLOAT) {
469 o(0x66);
470 o(pic);
471 o(0x7e0f); /* movd */
472 r = 0;
473 } else if (bt == VT_DOUBLE) {
474 o(0x66);
475 o(pic);
476 o(0xd60f); /* movq */
477 r = 0;
478 } else if (bt == VT_LDOUBLE) {
479 o(0xc0d9); /* fld %st(0) */
480 o(pic);
481 o(0xdb); /* fstpt */
482 r = 7;
483 } else {
484 if (bt == VT_SHORT)
485 o(0x66);
486 o(pic);
487 if (bt == VT_BYTE || bt == VT_BOOL)
488 o(0x88);
489 else if (is64_type(bt))
490 op64 = 0x89;
491 else
492 o(0x89);
494 if (pic) {
495 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
496 if (op64)
497 o(op64);
498 o(3 + (r << 3));
499 } else if (op64) {
500 if (fr == VT_CONST ||
501 fr == VT_LOCAL ||
502 (v->r & VT_LVAL)) {
503 gen_modrm64(op64, r, v->r, v->sym, fc);
504 } else if (fr != r) {
505 /* XXX: don't we really come here? */
506 abort();
507 o(0xc0 + fr + r * 8); /* mov r, fr */
509 } else {
510 if (fr == VT_CONST ||
511 fr == VT_LOCAL ||
512 (v->r & VT_LVAL)) {
513 gen_modrm(r, v->r, v->sym, fc);
514 } else if (fr != r) {
515 /* XXX: don't we really come here? */
516 abort();
517 o(0xc0 + fr + r * 8); /* mov r, fr */
522 static void gadd_sp(int val)
524 if (val == (char)val) {
525 o(0xc48348);
526 g(val);
527 } else {
528 oad(0xc48148, val); /* add $xxx, %rsp */
532 /* 'is_jmp' is '1' if it is a jump */
533 static void gcall_or_jmp(int is_jmp)
535 int r;
536 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
537 /* constant case */
538 if (vtop->r & VT_SYM) {
539 /* relocation case */
540 greloc(cur_text_section, vtop->sym,
541 ind + 1, R_X86_64_PC32);
542 } else {
543 /* put an empty PC32 relocation */
544 put_elf_reloc(symtab_section, cur_text_section,
545 ind + 1, R_X86_64_PC32, 0);
547 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
548 } else {
549 /* otherwise, indirect call */
550 r = TREG_R11;
551 load(r, vtop);
552 o(0x41); /* REX */
553 o(0xff); /* call/jmp *r */
554 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
558 #ifdef TCC_TARGET_PE
559 #define REGN 4
560 static const uint8_t arg_regs[] = {
561 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
563 #else
564 #define REGN 6
565 static const uint8_t arg_regs[REGN] = {
566 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
568 #endif
570 /* Generate function call. The function address is pushed first, then
571 all the parameters in call order. This functions pops all the
572 parameters and the function address. */
573 void gfunc_call(int nb_args)
575 int size, align, r, args_size, i;
576 SValue *orig_vtop;
577 int nb_reg_args = 0;
578 int nb_sse_args = 0;
579 int sse_reg, gen_reg;
581 /* calculate the number of integer/float arguments */
582 args_size = 0;
583 for(i = 0; i < nb_args; i++) {
584 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
585 args_size += type_size(&vtop->type, &align);
586 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
587 args_size += 16;
588 #ifndef TCC_TARGET_PE
589 } else if (is_sse_float(vtop[-i].type.t)) {
590 nb_sse_args++;
591 if (nb_sse_args > 8) args_size += 8;
592 #endif
593 } else {
594 nb_reg_args++;
595 if (nb_reg_args > REGN) args_size += 8;
599 /* for struct arguments, we need to call memcpy and the function
600 call breaks register passing arguments we are preparing.
601 So, we process arguments which will be passed by stack first. */
602 orig_vtop = vtop;
603 gen_reg = nb_reg_args;
604 sse_reg = nb_sse_args;
606 #ifdef TCC_TARGET_PE
607 save_regs(0); /* save used temporary registers */
608 #endif
610 /* adjust stack to align SSE boundary */
611 if (args_size &= 8) {
612 o(0x50); /* push $rax */
614 for(i = 0; i < nb_args; i++) {
615 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
616 size = type_size(&vtop->type, &align);
617 /* align to stack align size */
618 size = (size + 3) & ~3;
619 /* allocate the necessary size on stack */
620 o(0x48);
621 oad(0xec81, size); /* sub $xxx, %rsp */
622 /* generate structure store */
623 r = get_reg(RC_INT);
624 o(0x48 + REX_BASE(r));
625 o(0x89); /* mov %rsp, r */
626 o(0xe0 + r);
628 /* following code breaks vtop[1] */
629 SValue tmp = vtop[1];
630 vset(&vtop->type, r | VT_LVAL, 0);
631 vswap();
632 vstore();
633 vtop[1] = tmp;
635 args_size += size;
636 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
637 gv(RC_ST0);
638 size = LDOUBLE_SIZE;
639 oad(0xec8148, size); /* sub $xxx, %rsp */
640 o(0x7cdb); /* fstpt 0(%rsp) */
641 g(0x24);
642 g(0x00);
643 args_size += size;
644 } else if (is_sse_float(vtop->type.t)) {
645 #ifdef TCC_TARGET_PE
646 int j = --gen_reg;
647 if (j >= REGN) {
648 #else
649 int j = --sse_reg;
650 if (j >= 8) {
651 #endif
652 gv(RC_FLOAT);
653 o(0x50); /* push $rax */
654 /* movq %xmm0, (%rsp) */
655 o(0x04d60f66);
656 o(0x24);
657 args_size += 8;
659 } else {
660 int j = --gen_reg;
661 /* simple type */
662 /* XXX: implicit cast ? */
663 if (j >= REGN) {
664 r = gv(RC_INT);
665 o(0x50 + r); /* push r */
666 args_size += 8;
669 vtop--;
671 vtop = orig_vtop;
673 /* then, we prepare register passing arguments.
674 Note that we cannot set RDX and RCX in this loop because gv()
675 may break these temporary registers. Let's use R10 and R11
676 instead of them */
677 gen_reg = nb_reg_args;
678 sse_reg = nb_sse_args;
679 for(i = 0; i < nb_args; i++) {
680 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
681 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
682 } else if (is_sse_float(vtop->type.t)) {
683 #ifdef TCC_TARGET_PE
684 int j = --gen_reg;
685 if (j < REGN) {
686 int d = arg_regs[j];
687 gv(RC_FLOAT); /* only one float register */
688 /* movaps %xmm0, %xmmN */
689 o(0x280f);
690 o(0xc0 + (j << 3));
691 o(0x50);
692 o(0xd60f66); /* movq %xmm0, (%rsp) */
693 o(0x2404 + (j << 3));
694 if (d < 8) {
695 o(0x58 + d); /* pop d */
696 } else {
697 o(0x58);
698 o(0xc08949 + d - 8);
701 } else {
702 int j = --gen_reg;
703 /* simple type */
704 /* XXX: implicit cast ? */
705 if (j < REGN) {
706 int d = arg_regs[j];
707 r = gv(RC_INT);
708 if (d != r) {
709 if (d < 8) {
710 o(0x8948); /* mov */
711 o(0xc0 + r * 8 + d);
712 } else {
713 o(0x8949); /* mov */
714 o(0xc0 + r * 8 + d - 8);
718 #else
719 int j = --sse_reg;
720 if (j < 8) {
721 gv(RC_FLOAT); /* only one float register */
722 /* movaps %xmm0, %xmmN */
723 o(0x280f);
724 o(0xc0 + (sse_reg << 3));
726 } else {
727 int j = --gen_reg;
728 /* simple type */
729 /* XXX: implicit cast ? */
730 if (j < REGN) {
731 r = gv(RC_INT);
732 if (j < 2) {
733 o(0x8948); /* mov */
734 o(0xc0 + r * 8 + arg_regs[j]);
735 } else if (j < 4) {
736 o(0x8949); /* mov */
737 /* j=2: r10, j=3: r11 */
738 o(0xc0 + r * 8 + j);
739 } else {
740 o(0x8949); /* mov */
741 /* j=4: r8, j=5: r9 */
742 o(0xc0 + r * 8 + j - 4);
745 #endif
747 vtop--;
750 #ifdef TCC_TARGET_PE
751 /* allocate scratch space */
752 gadd_sp(-8*REGN);
753 args_size += 8*REGN;
754 #else
755 save_regs(0); /* save used temporary registers */
757 /* Copy R10 and R11 into RDX and RCX, respectively */
758 if (nb_reg_args > 2) {
759 o(0xd2894c); /* mov %r10, %rdx */
760 if (nb_reg_args > 3) {
761 o(0xd9894c); /* mov %r11, %rcx */
765 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
766 #endif
767 gcall_or_jmp(0);
768 if (args_size)
769 gadd_sp(args_size);
770 vtop--;
773 #define FUNC_PROLOG_SIZE 11
775 static void push_arg_reg(int i) {
776 loc -= 8;
777 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
780 /* generate function prolog of type 't' */
781 void gfunc_prolog(CType *func_type)
783 int i, addr, align, size;
784 int param_index, param_addr, reg_param_index, sse_param_index;
785 Sym *sym;
786 CType *type;
788 func_ret_sub = 0;
790 sym = func_type->ref;
791 addr = PTR_SIZE * 2;
792 loc = 0;
793 ind += FUNC_PROLOG_SIZE;
794 func_sub_sp_offset = ind;
796 #ifndef TCC_TARGET_PE
797 if (func_type->ref->c == FUNC_ELLIPSIS) {
798 int seen_reg_num, seen_sse_num, seen_stack_size;
799 seen_reg_num = seen_sse_num = 0;
800 /* frame pointer and return address */
801 seen_stack_size = PTR_SIZE * 2;
802 /* count the number of seen parameters */
803 sym = func_type->ref;
804 while ((sym = sym->next) != NULL) {
805 type = &sym->type;
806 if (is_sse_float(type->t)) {
807 if (seen_sse_num < 8) {
808 seen_sse_num++;
809 } else {
810 seen_stack_size += 8;
812 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
813 size = type_size(type, &align);
814 size = (size + 3) & ~3;
815 seen_stack_size += size;
816 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
817 seen_stack_size += LDOUBLE_SIZE;
818 } else {
819 if (seen_reg_num < REGN) {
820 seen_reg_num++;
821 } else {
822 seen_stack_size += 8;
827 loc -= 16;
828 /* movl $0x????????, -0x10(%rbp) */
829 o(0xf045c7);
830 gen_le32(seen_reg_num * 8);
831 /* movl $0x????????, -0xc(%rbp) */
832 o(0xf445c7);
833 gen_le32(seen_sse_num * 16 + 48);
834 /* movl $0x????????, -0x8(%rbp) */
835 o(0xf845c7);
836 gen_le32(seen_stack_size);
838 /* save all register passing arguments */
839 for (i = 0; i < 8; i++) {
840 loc -= 16;
841 o(0xd60f66); /* movq */
842 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
843 /* movq $0, loc+8(%rbp) */
844 o(0x85c748);
845 gen_le32(loc + 8);
846 gen_le32(0);
848 for (i = 0; i < REGN; i++) {
849 push_arg_reg(REGN-1-i);
852 #endif
854 sym = func_type->ref;
855 param_index = 0;
856 reg_param_index = 0;
857 sse_param_index = 0;
859 /* if the function returns a structure, then add an
860 implicit pointer parameter */
861 func_vt = sym->type;
862 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
863 push_arg_reg(reg_param_index);
864 param_addr = loc;
866 func_vc = loc;
867 param_index++;
868 reg_param_index++;
870 /* define parameters */
871 while ((sym = sym->next) != NULL) {
872 type = &sym->type;
873 size = type_size(type, &align);
874 size = (size + 3) & ~3;
875 #ifndef TCC_TARGET_PE
876 if (is_sse_float(type->t)) {
877 if (sse_param_index < 8) {
878 /* save arguments passed by register */
879 loc -= 8;
880 o(0xd60f66); /* movq */
881 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
882 param_addr = loc;
883 } else {
884 param_addr = addr;
885 addr += size;
887 sse_param_index++;
888 } else
889 #endif
890 if ((type->t & VT_BTYPE) == VT_STRUCT ||
891 (type->t & VT_BTYPE) == VT_LDOUBLE) {
892 param_addr = addr;
893 addr += size;
894 } else {
895 #ifdef TCC_TARGET_PE
896 if (reg_param_index < REGN) {
897 /* save arguments passed by register */
898 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
900 param_addr = addr;
901 addr += 8;
902 #else
903 if (reg_param_index < REGN) {
904 /* save arguments passed by register */
905 push_arg_reg(reg_param_index);
906 param_addr = loc;
907 } else {
908 param_addr = addr;
909 addr += 8;
911 #endif
912 reg_param_index++;
914 sym_push(sym->v & ~SYM_FIELD, type,
915 VT_LOCAL | VT_LVAL, param_addr);
916 param_index++;
918 #ifdef TCC_TARGET_PE
919 if (func_type->ref->c == FUNC_ELLIPSIS) {
920 for (i = reg_param_index; i < REGN; ++i) {
921 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, addr);
922 addr += 8;
925 #endif
928 /* generate function epilog */
929 void gfunc_epilog(void)
931 int v, saved_ind;
933 o(0xc9); /* leave */
934 if (func_ret_sub == 0) {
935 o(0xc3); /* ret */
936 } else {
937 o(0xc2); /* ret n */
938 g(func_ret_sub);
939 g(func_ret_sub >> 8);
941 /* align local size to word & save local variables */
942 v = (-loc + 15) & -16;
943 saved_ind = ind;
944 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
945 #ifdef TCC_TARGET_PE
946 if (v >= 4096) {
947 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
948 oad(0xb8, v); /* mov stacksize, %eax */
949 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
950 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
951 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
952 } else
953 #endif
955 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
956 o(0xec8148); /* sub rsp, stacksize */
957 gen_le32(v);
959 ind = saved_ind;
962 /* generate a jump to a label */
963 int gjmp(int t)
965 return psym(0xe9, t);
968 /* generate a jump to a fixed address */
969 void gjmp_addr(int a)
971 int r;
972 r = a - ind - 2;
973 if (r == (char)r) {
974 g(0xeb);
975 g(r);
976 } else {
977 oad(0xe9, a - ind - 5);
981 /* generate a test. set 'inv' to invert test. Stack entry is popped */
982 int gtst(int inv, int t)
984 int v, *p;
986 v = vtop->r & VT_VALMASK;
987 if (v == VT_CMP) {
988 /* fast case : can jump directly since flags are set */
989 g(0x0f);
990 t = psym((vtop->c.i - 16) ^ inv, t);
991 } else if (v == VT_JMP || v == VT_JMPI) {
992 /* && or || optimization */
993 if ((v & 1) == inv) {
994 /* insert vtop->c jump list in t */
995 p = &vtop->c.i;
996 while (*p != 0)
997 p = (int *)(cur_text_section->data + *p);
998 *p = t;
999 t = vtop->c.i;
1000 } else {
1001 t = gjmp(t);
1002 gsym(vtop->c.i);
1004 } else {
1005 if (is_float(vtop->type.t) ||
1006 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1007 vpushi(0);
1008 gen_op(TOK_NE);
1010 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1011 /* constant jmp optimization */
1012 if ((vtop->c.i != 0) != inv)
1013 t = gjmp(t);
1014 } else {
1015 v = gv(RC_INT);
1016 o(0x85);
1017 o(0xc0 + v * 9);
1018 g(0x0f);
1019 t = psym(0x85 ^ inv, t);
1022 vtop--;
1023 return t;
1026 /* generate an integer binary operation */
1027 void gen_opi(int op)
1029 int r, fr, opc, c;
1031 switch(op) {
1032 case '+':
1033 case TOK_ADDC1: /* add with carry generation */
1034 opc = 0;
1035 gen_op8:
1036 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
1037 !is64_type(vtop->type.t)) {
1038 /* constant case */
1039 vswap();
1040 r = gv(RC_INT);
1041 if (is64_type(vtop->type.t)) {
1042 o(0x48 | REX_BASE(r));
1044 vswap();
1045 c = vtop->c.i;
1046 if (c == (char)c) {
1047 /* XXX: generate inc and dec for smaller code ? */
1048 o(0x83);
1049 o(0xc0 | (opc << 3) | REG_VALUE(r));
1050 g(c);
1051 } else {
1052 o(0x81);
1053 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1055 } else {
1056 gv2(RC_INT, RC_INT);
1057 r = vtop[-1].r;
1058 fr = vtop[0].r;
1059 if (opc != 7 ||
1060 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1061 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1062 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
1064 o((opc << 3) | 0x01);
1065 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1067 vtop--;
1068 if (op >= TOK_ULT && op <= TOK_GT) {
1069 vtop->r = VT_CMP;
1070 vtop->c.i = op;
1072 break;
1073 case '-':
1074 case TOK_SUBC1: /* sub with carry generation */
1075 opc = 5;
1076 goto gen_op8;
1077 case TOK_ADDC2: /* add with carry use */
1078 opc = 2;
1079 goto gen_op8;
1080 case TOK_SUBC2: /* sub with carry use */
1081 opc = 3;
1082 goto gen_op8;
1083 case '&':
1084 opc = 4;
1085 goto gen_op8;
1086 case '^':
1087 opc = 6;
1088 goto gen_op8;
1089 case '|':
1090 opc = 1;
1091 goto gen_op8;
1092 case '*':
1093 gv2(RC_INT, RC_INT);
1094 r = vtop[-1].r;
1095 fr = vtop[0].r;
1096 if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1097 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1098 o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
1100 vtop--;
1101 o(0xaf0f); /* imul fr, r */
1102 o(0xc0 + fr + r * 8);
1103 break;
1104 case TOK_SHL:
1105 opc = 4;
1106 goto gen_shift;
1107 case TOK_SHR:
1108 opc = 5;
1109 goto gen_shift;
1110 case TOK_SAR:
1111 opc = 7;
1112 gen_shift:
1113 opc = 0xc0 | (opc << 3);
1114 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1115 /* constant case */
1116 vswap();
1117 r = gv(RC_INT);
1118 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1119 o(0x48 | REX_BASE(r));
1120 c = 0x3f;
1121 } else {
1122 c = 0x1f;
1124 vswap();
1125 c &= vtop->c.i;
1126 o(0xc1); /* shl/shr/sar $xxx, r */
1127 o(opc | r);
1128 g(c);
1129 } else {
1130 /* we generate the shift in ecx */
1131 gv2(RC_INT, RC_RCX);
1132 r = vtop[-1].r;
1133 if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
1134 o(0x48 | REX_BASE(r));
1136 o(0xd3); /* shl/shr/sar %cl, r */
1137 o(opc | r);
1139 vtop--;
1140 break;
1141 case '/':
1142 case TOK_UDIV:
1143 case TOK_PDIV:
1144 case '%':
1145 case TOK_UMOD:
1146 case TOK_UMULL:
1147 /* first operand must be in eax */
1148 /* XXX: need better constraint for second operand */
1149 gv2(RC_RAX, RC_RCX);
1150 r = vtop[-1].r;
1151 fr = vtop[0].r;
1152 vtop--;
1153 save_reg(TREG_RDX);
1154 if (op == TOK_UMULL) {
1155 o(0xf7); /* mul fr */
1156 o(0xe0 + fr);
1157 vtop->r2 = TREG_RDX;
1158 r = TREG_RAX;
1159 } else {
1160 if (op == TOK_UDIV || op == TOK_UMOD) {
1161 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1162 o(0xd23148); /* xor %rdx, %rdx */
1163 o(0x48 + REX_BASE(fr));
1164 } else {
1165 o(0xd231); /* xor %edx, %edx */
1167 o(0xf7); /* div fr, %eax */
1168 o(0xf0 + fr);
1169 } else {
1170 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1171 o(0x9948); /* cqto */
1172 o(0x48 + REX_BASE(fr));
1173 } else {
1174 o(0x99); /* cltd */
1176 o(0xf7); /* idiv fr, %eax */
1177 o(0xf8 + fr);
1179 if (op == '%' || op == TOK_UMOD)
1180 r = TREG_RDX;
1181 else
1182 r = TREG_RAX;
1184 vtop->r = r;
1185 break;
1186 default:
1187 opc = 7;
1188 goto gen_op8;
1192 void gen_opl(int op)
1194 gen_opi(op);
1197 /* generate a floating point operation 'v = t1 op t2' instruction. The
1198 two operands are guaranted to have the same floating point type */
1199 /* XXX: need to use ST1 too */
1200 void gen_opf(int op)
1202 int a, ft, fc, swapped, r;
1203 int float_type =
1204 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1206 /* convert constants to memory references */
1207 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1208 vswap();
1209 gv(float_type);
1210 vswap();
1212 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1213 gv(float_type);
1215 /* must put at least one value in the floating point register */
1216 if ((vtop[-1].r & VT_LVAL) &&
1217 (vtop[0].r & VT_LVAL)) {
1218 vswap();
1219 gv(float_type);
1220 vswap();
1222 swapped = 0;
1223 /* swap the stack if needed so that t1 is the register and t2 is
1224 the memory reference */
1225 if (vtop[-1].r & VT_LVAL) {
1226 vswap();
1227 swapped = 1;
1229 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1230 if (op >= TOK_ULT && op <= TOK_GT) {
1231 /* load on stack second operand */
1232 load(TREG_ST0, vtop);
1233 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1234 if (op == TOK_GE || op == TOK_GT)
1235 swapped = !swapped;
1236 else if (op == TOK_EQ || op == TOK_NE)
1237 swapped = 0;
1238 if (swapped)
1239 o(0xc9d9); /* fxch %st(1) */
1240 o(0xe9da); /* fucompp */
1241 o(0xe0df); /* fnstsw %ax */
1242 if (op == TOK_EQ) {
1243 o(0x45e480); /* and $0x45, %ah */
1244 o(0x40fC80); /* cmp $0x40, %ah */
1245 } else if (op == TOK_NE) {
1246 o(0x45e480); /* and $0x45, %ah */
1247 o(0x40f480); /* xor $0x40, %ah */
1248 op = TOK_NE;
1249 } else if (op == TOK_GE || op == TOK_LE) {
1250 o(0x05c4f6); /* test $0x05, %ah */
1251 op = TOK_EQ;
1252 } else {
1253 o(0x45c4f6); /* test $0x45, %ah */
1254 op = TOK_EQ;
1256 vtop--;
1257 vtop->r = VT_CMP;
1258 vtop->c.i = op;
1259 } else {
1260 /* no memory reference possible for long double operations */
1261 load(TREG_ST0, vtop);
1262 swapped = !swapped;
1264 switch(op) {
1265 default:
1266 case '+':
1267 a = 0;
1268 break;
1269 case '-':
1270 a = 4;
1271 if (swapped)
1272 a++;
1273 break;
1274 case '*':
1275 a = 1;
1276 break;
1277 case '/':
1278 a = 6;
1279 if (swapped)
1280 a++;
1281 break;
1283 ft = vtop->type.t;
1284 fc = vtop->c.ul;
1285 o(0xde); /* fxxxp %st, %st(1) */
1286 o(0xc1 + (a << 3));
1287 vtop--;
1289 } else {
1290 if (op >= TOK_ULT && op <= TOK_GT) {
1291 /* if saved lvalue, then we must reload it */
1292 r = vtop->r;
1293 fc = vtop->c.ul;
1294 if ((r & VT_VALMASK) == VT_LLOCAL) {
1295 SValue v1;
1296 r = get_reg(RC_INT);
1297 v1.type.t = VT_INT;
1298 v1.r = VT_LOCAL | VT_LVAL;
1299 v1.c.ul = fc;
1300 load(r, &v1);
1301 fc = 0;
1304 if (op == TOK_EQ || op == TOK_NE) {
1305 swapped = 0;
1306 } else {
1307 if (op == TOK_LE || op == TOK_LT)
1308 swapped = !swapped;
1309 if (op == TOK_LE || op == TOK_GE) {
1310 op = 0x93; /* setae */
1311 } else {
1312 op = 0x97; /* seta */
1316 if (swapped) {
1317 o(0x7e0ff3); /* movq */
1318 gen_modrm(1, r, vtop->sym, fc);
1320 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1321 o(0x66);
1323 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1324 o(0xc8);
1325 } else {
1326 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1327 o(0x66);
1329 o(0x2e0f); /* ucomisd */
1330 gen_modrm(0, r, vtop->sym, fc);
1333 vtop--;
1334 vtop->r = VT_CMP;
1335 vtop->c.i = op;
1336 } else {
1337 /* no memory reference possible for long double operations */
1338 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1339 load(TREG_XMM0, vtop);
1340 swapped = !swapped;
1342 switch(op) {
1343 default:
1344 case '+':
1345 a = 0;
1346 break;
1347 case '-':
1348 a = 4;
1349 break;
1350 case '*':
1351 a = 1;
1352 break;
1353 case '/':
1354 a = 6;
1355 break;
1357 ft = vtop->type.t;
1358 fc = vtop->c.ul;
1359 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1360 o(0xde); /* fxxxp %st, %st(1) */
1361 o(0xc1 + (a << 3));
1362 } else {
1363 /* if saved lvalue, then we must reload it */
1364 r = vtop->r;
1365 if ((r & VT_VALMASK) == VT_LLOCAL) {
1366 SValue v1;
1367 r = get_reg(RC_INT);
1368 v1.type.t = VT_INT;
1369 v1.r = VT_LOCAL | VT_LVAL;
1370 v1.c.ul = fc;
1371 load(r, &v1);
1372 fc = 0;
1374 if (swapped) {
1375 /* movq %xmm0,%xmm1 */
1376 o(0x7e0ff3);
1377 o(0xc8);
1378 load(TREG_XMM0, vtop);
1379 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1380 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1381 o(0xf2);
1382 } else {
1383 o(0xf3);
1385 o(0x0f);
1386 o(0x58 + a);
1387 o(0xc1);
1388 } else {
1389 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1390 o(0xf2);
1391 } else {
1392 o(0xf3);
1394 o(0x0f);
1395 o(0x58 + a);
1396 gen_modrm(0, r, vtop->sym, fc);
1399 vtop--;
1404 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1405 and 'long long' cases. */
1406 void gen_cvt_itof(int t)
1408 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1409 save_reg(TREG_ST0);
1410 gv(RC_INT);
1411 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1412 /* signed long long to float/double/long double (unsigned case
1413 is handled generically) */
1414 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1415 o(0x242cdf); /* fildll (%rsp) */
1416 o(0x08c48348); /* add $8, %rsp */
1417 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1418 (VT_INT | VT_UNSIGNED)) {
1419 /* unsigned int to float/double/long double */
1420 o(0x6a); /* push $0 */
1421 g(0x00);
1422 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1423 o(0x242cdf); /* fildll (%rsp) */
1424 o(0x10c48348); /* add $16, %rsp */
1425 } else {
1426 /* int to float/double/long double */
1427 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1428 o(0x2404db); /* fildl (%rsp) */
1429 o(0x08c48348); /* add $8, %rsp */
1431 vtop->r = TREG_ST0;
1432 } else {
1433 save_reg(TREG_XMM0);
1434 gv(RC_INT);
1435 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1436 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1437 (VT_INT | VT_UNSIGNED) ||
1438 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1439 o(0x48); /* REX */
1441 o(0x2a0f);
1442 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1443 vtop->r = TREG_XMM0;
1447 /* convert from one floating point type to another */
1448 void gen_cvt_ftof(int t)
1450 int ft, bt, tbt;
1452 ft = vtop->type.t;
1453 bt = ft & VT_BTYPE;
1454 tbt = t & VT_BTYPE;
1456 if (bt == VT_FLOAT) {
1457 gv(RC_FLOAT);
1458 if (tbt == VT_DOUBLE) {
1459 o(0xc0140f); /* unpcklps */
1460 o(0xc05a0f); /* cvtps2pd */
1461 } else if (tbt == VT_LDOUBLE) {
1462 /* movss %xmm0,-0x10(%rsp) */
1463 o(0x44110ff3);
1464 o(0xf024);
1465 o(0xf02444d9); /* flds -0x10(%rsp) */
1466 vtop->r = TREG_ST0;
1468 } else if (bt == VT_DOUBLE) {
1469 gv(RC_FLOAT);
1470 if (tbt == VT_FLOAT) {
1471 o(0xc0140f66); /* unpcklpd */
1472 o(0xc05a0f66); /* cvtpd2ps */
1473 } else if (tbt == VT_LDOUBLE) {
1474 /* movsd %xmm0,-0x10(%rsp) */
1475 o(0x44110ff2);
1476 o(0xf024);
1477 o(0xf02444dd); /* fldl -0x10(%rsp) */
1478 vtop->r = TREG_ST0;
1480 } else {
1481 gv(RC_ST0);
1482 if (tbt == VT_DOUBLE) {
1483 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1484 /* movsd -0x10(%rsp),%xmm0 */
1485 o(0x44100ff2);
1486 o(0xf024);
1487 vtop->r = TREG_XMM0;
1488 } else if (tbt == VT_FLOAT) {
1489 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1490 /* movss -0x10(%rsp),%xmm0 */
1491 o(0x44100ff3);
1492 o(0xf024);
1493 vtop->r = TREG_XMM0;
1498 /* convert fp to int 't' type */
1499 void gen_cvt_ftoi(int t)
1501 int ft, bt, size, r;
1502 ft = vtop->type.t;
1503 bt = ft & VT_BTYPE;
1504 if (bt == VT_LDOUBLE) {
1505 gen_cvt_ftof(VT_DOUBLE);
1506 bt = VT_DOUBLE;
1509 gv(RC_FLOAT);
1510 if (t != VT_INT)
1511 size = 8;
1512 else
1513 size = 4;
1515 r = get_reg(RC_INT);
1516 if (bt == VT_FLOAT) {
1517 o(0xf3);
1518 } else if (bt == VT_DOUBLE) {
1519 o(0xf2);
1520 } else {
1521 assert(0);
1523 if (size == 8) {
1524 o(0x48 + REX_BASE(r));
1526 o(0x2c0f); /* cvttss2si or cvttsd2si */
1527 o(0xc0 + (REG_VALUE(r) << 3));
1528 vtop->r = r;
1531 /* computed goto support */
1532 void ggoto(void)
1534 gcall_or_jmp(1);
1535 vtop--;
1538 /* end of x86-64 code generator */
1539 /*************************************************************/