x86-64: fix udiv, add cqto instruction
[tinycc.git] / x86_64-gen.c
blob052bf874a2b353b8fa05199f015b247bcbd888bb
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <assert.h>
25 /* number of available registers */
26 #define NB_REGS 5
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
33 #define RC_RAX 0x0004
34 #define RC_RCX 0x0008
35 #define RC_RDX 0x0010
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
43 enum {
44 TREG_RAX = 0,
45 TREG_RCX = 1,
46 TREG_RDX = 2,
47 TREG_RSI = 6,
48 TREG_RDI = 7,
49 TREG_R8 = 8,
50 TREG_R9 = 9,
51 TREG_R10 = 10,
52 TREG_R11 = 11,
54 TREG_XMM0 = 3,
55 TREG_ST0 = 4,
57 TREG_MEM = 0x10,
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 const int reg_classes[NB_REGS] = {
64 /* eax */ RC_INT | RC_RAX,
65 /* ecx */ RC_INT | RC_RCX,
66 /* edx */ RC_INT | RC_RDX,
67 /* xmm0 */ RC_FLOAT | RC_XMM0,
68 /* st0 */ RC_ST0,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
80 #define PTR_SIZE 8
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
86 #define MAX_ALIGN 8
88 /******************************************************/
89 /* ELF defines */
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_32
95 #define R_DATA_PTR R_X86_64_64
96 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
97 #define R_COPY R_X86_64_COPY
99 #define ELF_START_ADDR 0x08048000
100 #define ELF_PAGE_SIZE 0x1000
102 /******************************************************/
104 static unsigned long func_sub_sp_offset;
105 static int func_ret_sub;
107 /* XXX: make it faster ? */
108 void g(int c)
110 int ind1;
111 ind1 = ind + 1;
112 if (ind1 > cur_text_section->data_allocated)
113 section_realloc(cur_text_section, ind1);
114 cur_text_section->data[ind] = c;
115 ind = ind1;
118 void o(unsigned int c)
120 while (c) {
121 g(c);
122 c = c >> 8;
126 void gen_le32(int c)
128 g(c);
129 g(c >> 8);
130 g(c >> 16);
131 g(c >> 24);
134 void gen_le64(int64_t c)
136 g(c);
137 g(c >> 8);
138 g(c >> 16);
139 g(c >> 24);
140 g(c >> 32);
141 g(c >> 40);
142 g(c >> 48);
143 g(c >> 56);
146 /* output a symbol and patch all calls to it */
147 void gsym_addr(int t, int a)
149 int n, *ptr;
150 while (t) {
151 ptr = (int *)(cur_text_section->data + t);
152 n = *ptr; /* next value */
153 *ptr = a - t - 4;
154 t = n;
158 void gsym(int t)
160 gsym_addr(t, ind);
163 /* psym is used to put an instruction with a data field which is a
164 reference to a symbol. It is in fact the same as oad ! */
165 #define psym oad
167 static int is64_type(int t)
169 return ((t & VT_BTYPE) == VT_PTR ||
170 (t & VT_BTYPE) == VT_FUNC ||
171 (t & VT_BTYPE) == VT_LLONG);
174 static int is_sse_float(int t) {
175 int bt;
176 bt = t & VT_BTYPE;
177 return bt == VT_DOUBLE || bt == VT_FLOAT;
180 /* instruction + 4 bytes data. Return the address of the data */
181 static int oad(int c, int s)
183 int ind1;
185 o(c);
186 ind1 = ind + 4;
187 if (ind1 > cur_text_section->data_allocated)
188 section_realloc(cur_text_section, ind1);
189 *(int *)(cur_text_section->data + ind) = s;
190 s = ind;
191 ind = ind1;
192 return s;
195 #if 0
196 /* output constant with relocation if 'r & VT_SYM' is true */
197 static void gen_addr64(int r, Sym *sym, int64_t c)
199 if (r & VT_SYM)
200 greloc(cur_text_section, sym, ind, R_X86_64_64);
201 gen_le64(c);
203 #endif
205 /* output constant with relocation if 'r & VT_SYM' is true */
206 static void gen_addrpc32(int r, Sym *sym, int c)
208 if (r & VT_SYM)
209 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
210 gen_le32(c-4);
213 /* output got address with relocation */
214 static void gen_gotpcrel(int r, Sym *sym, int c)
216 #ifndef TCC_TARGET_PE
217 Section *sr;
218 ElfW(Rela) *rel;
219 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
220 sr = cur_text_section->reloc;
221 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
222 rel->r_addend = -4;
223 #else
224 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
225 cur_text_section->data[ind-3],
226 cur_text_section->data[ind-2],
227 cur_text_section->data[ind-1]
229 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
230 #endif
231 gen_le32(0);
233 if (c) {
234 /* we use add c, %xxx for displacement */
235 o(0x48 + REX_BASE(r));
236 o(0x81);
237 o(0xc0 + REG_VALUE(r));
238 gen_le32(c);
242 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
244 op_reg = REG_VALUE(op_reg) << 3;
245 if ((r & VT_VALMASK) == VT_CONST) {
246 /* constant memory reference */
247 o(0x05 | op_reg);
248 if (is_got) {
249 gen_gotpcrel(r, sym, c);
250 } else {
251 gen_addrpc32(r, sym, c);
253 } else if ((r & VT_VALMASK) == VT_LOCAL) {
254 /* currently, we use only ebp as base */
255 if (c == (char)c) {
256 /* short reference */
257 o(0x45 | op_reg);
258 g(c);
259 } else {
260 oad(0x85 | op_reg, c);
262 } else if ((r & VT_VALMASK) >= TREG_MEM) {
263 if (c) {
264 g(0x80 | op_reg | REG_VALUE(r));
265 gen_le32(c);
266 } else {
267 g(0x00 | op_reg | REG_VALUE(r));
269 } else {
270 g(0x00 | op_reg | (r & VT_VALMASK));
274 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
275 opcode bits */
276 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
278 gen_modrm_impl(op_reg, r, sym, c, 0);
281 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
282 opcode bits */
283 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
285 int is_got;
286 int rex = 0x48 | (REX_BASE(op_reg) << 2);
287 if ((r & VT_VALMASK) != VT_CONST &&
288 (r & VT_VALMASK) != VT_LOCAL) {
289 rex |= REX_BASE(VT_VALMASK & r);
291 o(rex);
292 o(opcode);
293 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
294 gen_modrm_impl(op_reg, r, sym, c, is_got);
298 /* load 'r' from value 'sv' */
299 void load(int r, SValue *sv)
301 int v, t, ft, fc, fr;
302 SValue v1;
304 fr = sv->r;
305 ft = sv->type.t;
306 fc = sv->c.ul;
308 #ifndef TCC_TARGET_PE
309 /* we use indirect access via got */
310 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
311 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
312 /* use the result register as a temporal register */
313 int tr = r | TREG_MEM;
314 if (is_float(ft)) {
315 /* we cannot use float registers as a temporal register */
316 tr = get_reg(RC_INT) | TREG_MEM;
318 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
320 /* load from the temporal register */
321 fr = tr | VT_LVAL;
323 #endif
325 v = fr & VT_VALMASK;
326 if (fr & VT_LVAL) {
327 if (v == VT_LLOCAL) {
328 v1.type.t = VT_PTR;
329 v1.r = VT_LOCAL | VT_LVAL;
330 v1.c.ul = fc;
331 load(r, &v1);
332 fr = r;
334 if ((ft & VT_BTYPE) == VT_FLOAT) {
335 o(0x6e0f66); /* movd */
336 r = 0;
337 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
338 o(0x7e0ff3); /* movq */
339 r = 0;
340 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
341 o(0xdb); /* fldt */
342 r = 5;
343 } else if ((ft & VT_TYPE) == VT_BYTE) {
344 o(0xbe0f); /* movsbl */
345 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
346 o(0xb60f); /* movzbl */
347 } else if ((ft & VT_TYPE) == VT_SHORT) {
348 o(0xbf0f); /* movswl */
349 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
350 o(0xb70f); /* movzwl */
351 } else if (is64_type(ft)) {
352 gen_modrm64(0x8b, r, fr, sv->sym, fc);
353 return;
354 } else {
355 o(0x8b); /* movl */
357 gen_modrm(r, fr, sv->sym, fc);
358 } else {
359 if (v == VT_CONST) {
360 if (fr & VT_SYM) {
361 #ifdef TCC_TARGET_PE
362 o(0x8d48);
363 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
364 gen_addrpc32(fr, sv->sym, fc);
365 #else
366 if (sv->sym->type.t & VT_STATIC) {
367 o(0x8d48);
368 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
369 gen_addrpc32(fr, sv->sym, fc);
370 } else {
371 o(0x8b48);
372 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
373 gen_gotpcrel(r, sv->sym, fc);
375 #endif
376 } else if (is64_type(ft)) {
377 o(0x48);
378 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
379 gen_le64(sv->c.ull);
380 } else {
381 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
382 gen_le32(fc);
384 } else if (v == VT_LOCAL) {
385 o(0x48 | REX_BASE(r));
386 o(0x8d); /* lea xxx(%ebp), r */
387 gen_modrm(r, VT_LOCAL, sv->sym, fc);
388 } else if (v == VT_CMP) {
389 oad(0xb8 + r, 0); /* mov $0, r */
390 o(0x0f); /* setxx %br */
391 o(fc);
392 o(0xc0 + r);
393 } else if (v == VT_JMP || v == VT_JMPI) {
394 t = v & 1;
395 oad(0xb8 + r, t); /* mov $1, r */
396 o(0x05eb); /* jmp after */
397 gsym(fc);
398 oad(0xb8 + r, t ^ 1); /* mov $0, r */
399 } else if (v != r) {
400 if (r == TREG_XMM0) {
401 assert(v == TREG_ST0);
402 /* gen_cvt_ftof(VT_DOUBLE); */
403 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
404 /* movsd -0x10(%rsp),%xmm0 */
405 o(0x44100ff2);
406 o(0xf024);
407 } else if (r == TREG_ST0) {
408 assert(v == TREG_XMM0);
409 /* gen_cvt_ftof(VT_LDOUBLE); */
410 /* movsd %xmm0,-0x10(%rsp) */
411 o(0x44110ff2);
412 o(0xf024);
413 o(0xf02444dd); /* fldl -0x10(%rsp) */
414 } else {
415 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
416 o(0x89);
417 o(0xc0 + r + v * 8); /* mov v, r */
423 /* store register 'r' in lvalue 'v' */
424 void store(int r, SValue *v)
426 int fr, bt, ft, fc;
427 int op64 = 0;
428 /* store the REX prefix in this variable when PIC is enabled */
429 int pic = 0;
431 ft = v->type.t;
432 fc = v->c.ul;
433 fr = v->r & VT_VALMASK;
434 bt = ft & VT_BTYPE;
436 #ifndef TCC_TARGET_PE
437 /* we need to access the variable via got */
438 if (fr == VT_CONST && (v->r & VT_SYM)) {
439 /* mov xx(%rip), %r11 */
440 o(0x1d8b4c);
441 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
442 pic = is64_type(bt) ? 0x49 : 0x41;
444 #endif
446 /* XXX: incorrect if float reg to reg */
447 if (bt == VT_FLOAT) {
448 o(0x66);
449 o(pic);
450 o(0x7e0f); /* movd */
451 r = 0;
452 } else if (bt == VT_DOUBLE) {
453 o(0x66);
454 o(pic);
455 o(0xd60f); /* movq */
456 r = 0;
457 } else if (bt == VT_LDOUBLE) {
458 o(0xc0d9); /* fld %st(0) */
459 o(pic);
460 o(0xdb); /* fstpt */
461 r = 7;
462 } else {
463 if (bt == VT_SHORT)
464 o(0x66);
465 o(pic);
466 if (bt == VT_BYTE || bt == VT_BOOL)
467 o(0x88);
468 else if (is64_type(bt))
469 op64 = 0x89;
470 else
471 o(0x89);
473 if (pic) {
474 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
475 if (op64)
476 o(op64);
477 o(3 + (r << 3));
478 } else if (op64) {
479 if (fr == VT_CONST ||
480 fr == VT_LOCAL ||
481 (v->r & VT_LVAL)) {
482 gen_modrm64(op64, r, v->r, v->sym, fc);
483 } else if (fr != r) {
484 /* XXX: don't we really come here? */
485 abort();
486 o(0xc0 + fr + r * 8); /* mov r, fr */
488 } else {
489 if (fr == VT_CONST ||
490 fr == VT_LOCAL ||
491 (v->r & VT_LVAL)) {
492 gen_modrm(r, v->r, v->sym, fc);
493 } else if (fr != r) {
494 /* XXX: don't we really come here? */
495 abort();
496 o(0xc0 + fr + r * 8); /* mov r, fr */
501 static void gadd_sp(int val)
503 if (val == (char)val) {
504 o(0xc48348);
505 g(val);
506 } else {
507 oad(0xc48148, val); /* add $xxx, %rsp */
511 /* 'is_jmp' is '1' if it is a jump */
512 static void gcall_or_jmp(int is_jmp)
514 int r;
515 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
516 /* constant case */
517 if (vtop->r & VT_SYM) {
518 /* relocation case */
519 greloc(cur_text_section, vtop->sym,
520 ind + 1, R_X86_64_PC32);
521 } else {
522 /* put an empty PC32 relocation */
523 put_elf_reloc(symtab_section, cur_text_section,
524 ind + 1, R_X86_64_PC32, 0);
526 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
527 } else {
528 /* otherwise, indirect call */
529 r = TREG_R11;
530 load(r, vtop);
531 o(0x41); /* REX */
532 o(0xff); /* call/jmp *r */
533 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
537 #ifdef TCC_TARGET_PE
538 #define REGN 4
539 static const uint8_t arg_regs[] = {
540 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
542 #else
543 #define REGN 6
544 static const uint8_t arg_regs[REGN] = {
545 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
547 #endif
549 /* Generate function call. The function address is pushed first, then
550 all the parameters in call order. This functions pops all the
551 parameters and the function address. */
552 void gfunc_call(int nb_args)
554 int size, align, r, args_size, i;
555 SValue *orig_vtop;
556 int nb_reg_args = 0;
557 int nb_sse_args = 0;
558 int sse_reg, gen_reg;
560 /* calculate the number of integer/float arguments */
561 args_size = 0;
562 for(i = 0; i < nb_args; i++) {
563 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
564 args_size += type_size(&vtop->type, &align);
565 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
566 args_size += 16;
567 #ifndef TCC_TARGET_PE
568 } else if (is_sse_float(vtop[-i].type.t)) {
569 nb_sse_args++;
570 if (nb_sse_args > 8) args_size += 8;
571 #endif
572 } else {
573 nb_reg_args++;
574 if (nb_reg_args > REGN) args_size += 8;
578 /* for struct arguments, we need to call memcpy and the function
579 call breaks register passing arguments we are preparing.
580 So, we process arguments which will be passed by stack first. */
581 orig_vtop = vtop;
582 gen_reg = nb_reg_args;
583 sse_reg = nb_sse_args;
585 #ifdef TCC_TARGET_PE
586 save_regs(0); /* save used temporary registers */
587 #endif
589 /* adjust stack to align SSE boundary */
590 if (args_size &= 8) {
591 o(0x50); /* push $rax */
593 for(i = 0; i < nb_args; i++) {
594 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
595 size = type_size(&vtop->type, &align);
596 /* align to stack align size */
597 size = (size + 3) & ~3;
598 /* allocate the necessary size on stack */
599 o(0x48);
600 oad(0xec81, size); /* sub $xxx, %rsp */
601 /* generate structure store */
602 r = get_reg(RC_INT);
603 o(0x48 + REX_BASE(r));
604 o(0x89); /* mov %rsp, r */
605 o(0xe0 + r);
607 /* following code breaks vtop[1] */
608 SValue tmp = vtop[1];
609 vset(&vtop->type, r | VT_LVAL, 0);
610 vswap();
611 vstore();
612 vtop[1] = tmp;
614 args_size += size;
615 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
616 gv(RC_ST0);
617 size = LDOUBLE_SIZE;
618 oad(0xec8148, size); /* sub $xxx, %rsp */
619 o(0x7cdb); /* fstpt 0(%rsp) */
620 g(0x24);
621 g(0x00);
622 args_size += size;
623 } else if (is_sse_float(vtop->type.t)) {
624 #ifdef TCC_TARGET_PE
625 int j = --gen_reg;
626 if (j >= REGN) {
627 #else
628 int j = --sse_reg;
629 if (j >= 8) {
630 #endif
631 gv(RC_FLOAT);
632 o(0x50); /* push $rax */
633 /* movq %xmm0, (%rsp) */
634 o(0x04d60f66);
635 o(0x24);
636 args_size += 8;
638 } else {
639 int j = --gen_reg;
640 /* simple type */
641 /* XXX: implicit cast ? */
642 if (j >= REGN) {
643 r = gv(RC_INT);
644 o(0x50 + r); /* push r */
645 args_size += 8;
648 vtop--;
650 vtop = orig_vtop;
652 /* then, we prepare register passing arguments.
653 Note that we cannot set RDX and RCX in this loop because gv()
654 may break these temporary registers. Let's use R10 and R11
655 instead of them */
656 gen_reg = nb_reg_args;
657 sse_reg = nb_sse_args;
658 for(i = 0; i < nb_args; i++) {
659 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
660 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
661 } else if (is_sse_float(vtop->type.t)) {
662 #ifdef TCC_TARGET_PE
663 int j = --gen_reg;
664 if (j < REGN) {
665 int d = arg_regs[j];
666 gv(RC_FLOAT); /* only one float register */
667 /* movaps %xmm0, %xmmN */
668 o(0x280f);
669 o(0xc0 + (j << 3));
670 o(0x50);
671 o(0xd60f66); /* movq %xmm0, (%rsp) */
672 o(0x2404 + (j << 3));
673 if (d < 8) {
674 o(0x58 + d); /* pop d */
675 } else {
676 o(0x58);
677 o(0xc08949 + d - 8);
680 } else {
681 int j = --gen_reg;
682 /* simple type */
683 /* XXX: implicit cast ? */
684 if (j < REGN) {
685 int d = arg_regs[j];
686 r = gv(RC_INT);
687 if (d != r) {
688 if (d < 8) {
689 o(0x8948); /* mov */
690 o(0xc0 + r * 8 + d);
691 } else {
692 o(0x8949); /* mov */
693 o(0xc0 + r * 8 + d - 8);
697 #else
698 int j = --sse_reg;
699 if (j < 8) {
700 gv(RC_FLOAT); /* only one float register */
701 /* movaps %xmm0, %xmmN */
702 o(0x280f);
703 o(0xc0 + (sse_reg << 3));
705 } else {
706 int j = --gen_reg;
707 /* simple type */
708 /* XXX: implicit cast ? */
709 if (j < REGN) {
710 r = gv(RC_INT);
711 if (j < 2) {
712 o(0x8948); /* mov */
713 o(0xc0 + r * 8 + arg_regs[j]);
714 } else if (j < 4) {
715 o(0x8949); /* mov */
716 /* j=2: r10, j=3: r11 */
717 o(0xc0 + r * 8 + j);
718 } else {
719 o(0x8949); /* mov */
720 /* j=4: r8, j=5: r9 */
721 o(0xc0 + r * 8 + j - 4);
724 #endif
726 vtop--;
729 #ifdef TCC_TARGET_PE
730 /* allocate scratch space */
731 gadd_sp(-8*REGN);
732 args_size += 8*REGN;
733 #else
734 save_regs(0); /* save used temporary registers */
736 /* Copy R10 and R11 into RDX and RCX, respectively */
737 if (nb_reg_args > 2) {
738 o(0xd2894c); /* mov %r10, %rdx */
739 if (nb_reg_args > 3) {
740 o(0xd9894c); /* mov %r11, %rcx */
744 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
745 #endif
746 gcall_or_jmp(0);
747 if (args_size)
748 gadd_sp(args_size);
749 vtop--;
752 #define FUNC_PROLOG_SIZE 11
754 static void push_arg_reg(int i) {
755 loc -= 8;
756 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
759 /* generate function prolog of type 't' */
760 void gfunc_prolog(CType *func_type)
762 int i, addr, align, size;
763 int param_index, param_addr, reg_param_index, sse_param_index;
764 Sym *sym;
765 CType *type;
767 func_ret_sub = 0;
769 sym = func_type->ref;
770 addr = PTR_SIZE * 2;
771 loc = 0;
772 ind += FUNC_PROLOG_SIZE;
773 func_sub_sp_offset = ind;
775 #ifndef TCC_TARGET_PE
776 if (func_type->ref->c == FUNC_ELLIPSIS) {
777 int seen_reg_num, seen_sse_num, seen_stack_size;
778 seen_reg_num = seen_sse_num = 0;
779 /* frame pointer and return address */
780 seen_stack_size = PTR_SIZE * 2;
781 /* count the number of seen parameters */
782 sym = func_type->ref;
783 while ((sym = sym->next) != NULL) {
784 type = &sym->type;
785 if (is_sse_float(type->t)) {
786 if (seen_sse_num < 8) {
787 seen_sse_num++;
788 } else {
789 seen_stack_size += 8;
791 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
792 size = type_size(type, &align);
793 size = (size + 3) & ~3;
794 seen_stack_size += size;
795 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
796 seen_stack_size += LDOUBLE_SIZE;
797 } else {
798 if (seen_reg_num < REGN) {
799 seen_reg_num++;
800 } else {
801 seen_stack_size += 8;
806 loc -= 16;
807 /* movl $0x????????, -0x10(%rbp) */
808 o(0xf045c7);
809 gen_le32(seen_reg_num * 8);
810 /* movl $0x????????, -0xc(%rbp) */
811 o(0xf445c7);
812 gen_le32(seen_sse_num * 16 + 48);
813 /* movl $0x????????, -0x8(%rbp) */
814 o(0xf845c7);
815 gen_le32(seen_stack_size);
817 /* save all register passing arguments */
818 for (i = 0; i < 8; i++) {
819 loc -= 16;
820 o(0xd60f66); /* movq */
821 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
822 /* movq $0, loc+8(%rbp) */
823 o(0x85c748);
824 gen_le32(loc + 8);
825 gen_le32(0);
827 for (i = 0; i < REGN; i++) {
828 push_arg_reg(REGN-1-i);
831 #endif
833 sym = func_type->ref;
834 param_index = 0;
835 reg_param_index = 0;
836 sse_param_index = 0;
838 /* if the function returns a structure, then add an
839 implicit pointer parameter */
840 func_vt = sym->type;
841 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
842 push_arg_reg(reg_param_index);
843 param_addr = loc;
845 func_vc = loc;
846 param_index++;
847 reg_param_index++;
849 /* define parameters */
850 while ((sym = sym->next) != NULL) {
851 type = &sym->type;
852 size = type_size(type, &align);
853 size = (size + 3) & ~3;
854 #ifndef TCC_TARGET_PE
855 if (is_sse_float(type->t)) {
856 if (sse_param_index < 8) {
857 /* save arguments passed by register */
858 loc -= 8;
859 o(0xd60f66); /* movq */
860 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
861 param_addr = loc;
862 } else {
863 param_addr = addr;
864 addr += size;
866 sse_param_index++;
867 } else
868 #endif
869 if ((type->t & VT_BTYPE) == VT_STRUCT ||
870 (type->t & VT_BTYPE) == VT_LDOUBLE) {
871 param_addr = addr;
872 addr += size;
873 } else {
874 #ifdef TCC_TARGET_PE
875 if (reg_param_index < REGN) {
876 /* save arguments passed by register */
877 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
879 param_addr = addr;
880 addr += 8;
881 #else
882 if (reg_param_index < REGN) {
883 /* save arguments passed by register */
884 push_arg_reg(reg_param_index);
885 param_addr = loc;
886 } else {
887 param_addr = addr;
888 addr += 8;
890 #endif
891 reg_param_index++;
893 sym_push(sym->v & ~SYM_FIELD, type,
894 VT_LOCAL | VT_LVAL, param_addr);
895 param_index++;
897 #ifdef TCC_TARGET_PE
898 if (func_type->ref->c == FUNC_ELLIPSIS) {
899 for (i = reg_param_index; i < REGN; ++i) {
900 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, addr);
901 addr += 8;
904 #endif
907 /* generate function epilog */
908 void gfunc_epilog(void)
910 int v, saved_ind;
912 o(0xc9); /* leave */
913 if (func_ret_sub == 0) {
914 o(0xc3); /* ret */
915 } else {
916 o(0xc2); /* ret n */
917 g(func_ret_sub);
918 g(func_ret_sub >> 8);
920 /* align local size to word & save local variables */
921 v = (-loc + 15) & -16;
922 saved_ind = ind;
923 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
924 #ifdef TCC_TARGET_PE
925 if (v >= 4096) {
926 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
927 oad(0xb8, v); /* mov stacksize, %eax */
928 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
929 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
930 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
931 } else
932 #endif
934 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
935 o(0xec8148); /* sub rsp, stacksize */
936 gen_le32(v);
938 ind = saved_ind;
941 /* generate a jump to a label */
942 int gjmp(int t)
944 return psym(0xe9, t);
947 /* generate a jump to a fixed address */
948 void gjmp_addr(int a)
950 int r;
951 r = a - ind - 2;
952 if (r == (char)r) {
953 g(0xeb);
954 g(r);
955 } else {
956 oad(0xe9, a - ind - 5);
960 /* generate a test. set 'inv' to invert test. Stack entry is popped */
961 int gtst(int inv, int t)
963 int v, *p;
965 v = vtop->r & VT_VALMASK;
966 if (v == VT_CMP) {
967 /* fast case : can jump directly since flags are set */
968 g(0x0f);
969 t = psym((vtop->c.i - 16) ^ inv, t);
970 } else if (v == VT_JMP || v == VT_JMPI) {
971 /* && or || optimization */
972 if ((v & 1) == inv) {
973 /* insert vtop->c jump list in t */
974 p = &vtop->c.i;
975 while (*p != 0)
976 p = (int *)(cur_text_section->data + *p);
977 *p = t;
978 t = vtop->c.i;
979 } else {
980 t = gjmp(t);
981 gsym(vtop->c.i);
983 } else {
984 if (is_float(vtop->type.t) ||
985 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
986 vpushi(0);
987 gen_op(TOK_NE);
989 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
990 /* constant jmp optimization */
991 if ((vtop->c.i != 0) != inv)
992 t = gjmp(t);
993 } else {
994 v = gv(RC_INT);
995 o(0x85);
996 o(0xc0 + v * 9);
997 g(0x0f);
998 t = psym(0x85 ^ inv, t);
1001 vtop--;
1002 return t;
1005 /* generate an integer binary operation */
1006 void gen_opi(int op)
1008 int r, fr, opc, c;
1010 switch(op) {
1011 case '+':
1012 case TOK_ADDC1: /* add with carry generation */
1013 opc = 0;
1014 gen_op8:
1015 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
1016 !is64_type(vtop->type.t)) {
1017 /* constant case */
1018 vswap();
1019 r = gv(RC_INT);
1020 if (is64_type(vtop->type.t)) {
1021 o(0x48 | REX_BASE(r));
1023 vswap();
1024 c = vtop->c.i;
1025 if (c == (char)c) {
1026 /* XXX: generate inc and dec for smaller code ? */
1027 o(0x83);
1028 o(0xc0 | (opc << 3) | REG_VALUE(r));
1029 g(c);
1030 } else {
1031 o(0x81);
1032 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1034 } else {
1035 gv2(RC_INT, RC_INT);
1036 r = vtop[-1].r;
1037 fr = vtop[0].r;
1038 if (opc != 7 ||
1039 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1040 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1041 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
1043 o((opc << 3) | 0x01);
1044 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1046 vtop--;
1047 if (op >= TOK_ULT && op <= TOK_GT) {
1048 vtop->r = VT_CMP;
1049 vtop->c.i = op;
1051 break;
1052 case '-':
1053 case TOK_SUBC1: /* sub with carry generation */
1054 opc = 5;
1055 goto gen_op8;
1056 case TOK_ADDC2: /* add with carry use */
1057 opc = 2;
1058 goto gen_op8;
1059 case TOK_SUBC2: /* sub with carry use */
1060 opc = 3;
1061 goto gen_op8;
1062 case '&':
1063 opc = 4;
1064 goto gen_op8;
1065 case '^':
1066 opc = 6;
1067 goto gen_op8;
1068 case '|':
1069 opc = 1;
1070 goto gen_op8;
1071 case '*':
1072 gv2(RC_INT, RC_INT);
1073 r = vtop[-1].r;
1074 fr = vtop[0].r;
1075 if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1076 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1077 o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
1079 vtop--;
1080 o(0xaf0f); /* imul fr, r */
1081 o(0xc0 + fr + r * 8);
1082 break;
1083 case TOK_SHL:
1084 opc = 4;
1085 goto gen_shift;
1086 case TOK_SHR:
1087 opc = 5;
1088 goto gen_shift;
1089 case TOK_SAR:
1090 opc = 7;
1091 gen_shift:
1092 opc = 0xc0 | (opc << 3);
1093 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1094 /* constant case */
1095 vswap();
1096 r = gv(RC_INT);
1097 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1098 o(0x48 | REX_BASE(r));
1099 c = 0x3f;
1100 } else {
1101 c = 0x1f;
1103 vswap();
1104 c &= vtop->c.i;
1105 o(0xc1); /* shl/shr/sar $xxx, r */
1106 o(opc | r);
1107 g(c);
1108 } else {
1109 /* we generate the shift in ecx */
1110 gv2(RC_INT, RC_RCX);
1111 r = vtop[-1].r;
1112 if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
1113 o(0x48 | REX_BASE(r));
1115 o(0xd3); /* shl/shr/sar %cl, r */
1116 o(opc | r);
1118 vtop--;
1119 break;
1120 case '/':
1121 case TOK_UDIV:
1122 case TOK_PDIV:
1123 case '%':
1124 case TOK_UMOD:
1125 case TOK_UMULL:
1126 /* first operand must be in eax */
1127 /* XXX: need better constraint for second operand */
1128 gv2(RC_RAX, RC_RCX);
1129 r = vtop[-1].r;
1130 fr = vtop[0].r;
1131 vtop--;
1132 save_reg(TREG_RDX);
1133 if (op == TOK_UMULL) {
1134 o(0xf7); /* mul fr */
1135 o(0xe0 + fr);
1136 vtop->r2 = TREG_RDX;
1137 r = TREG_RAX;
1138 } else {
1139 if (op == TOK_UDIV || op == TOK_UMOD) {
1140 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1141 o(0xd23148); /* xor %rdx, %rdx */
1142 o(0x48 + REX_BASE(fr));
1143 } else {
1144 o(0xd231); /* xor %edx, %edx */
1146 o(0xf7); /* div fr, %eax */
1147 o(0xf0 + fr);
1148 } else {
1149 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1150 o(0x9948); /* cqto */
1151 o(0x48 + REX_BASE(fr));
1152 } else {
1153 o(0x99); /* cltd */
1155 o(0xf7); /* idiv fr, %eax */
1156 o(0xf8 + fr);
1158 if (op == '%' || op == TOK_UMOD)
1159 r = TREG_RDX;
1160 else
1161 r = TREG_RAX;
1163 vtop->r = r;
1164 break;
1165 default:
1166 opc = 7;
1167 goto gen_op8;
1171 void gen_opl(int op)
1173 gen_opi(op);
1176 /* generate a floating point operation 'v = t1 op t2' instruction. The
1177 two operands are guaranted to have the same floating point type */
1178 /* XXX: need to use ST1 too */
1179 void gen_opf(int op)
1181 int a, ft, fc, swapped, r;
1182 int float_type =
1183 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1185 /* convert constants to memory references */
1186 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1187 vswap();
1188 gv(float_type);
1189 vswap();
1191 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1192 gv(float_type);
1194 /* must put at least one value in the floating point register */
1195 if ((vtop[-1].r & VT_LVAL) &&
1196 (vtop[0].r & VT_LVAL)) {
1197 vswap();
1198 gv(float_type);
1199 vswap();
1201 swapped = 0;
1202 /* swap the stack if needed so that t1 is the register and t2 is
1203 the memory reference */
1204 if (vtop[-1].r & VT_LVAL) {
1205 vswap();
1206 swapped = 1;
1208 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1209 if (op >= TOK_ULT && op <= TOK_GT) {
1210 /* load on stack second operand */
1211 load(TREG_ST0, vtop);
1212 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1213 if (op == TOK_GE || op == TOK_GT)
1214 swapped = !swapped;
1215 else if (op == TOK_EQ || op == TOK_NE)
1216 swapped = 0;
1217 if (swapped)
1218 o(0xc9d9); /* fxch %st(1) */
1219 o(0xe9da); /* fucompp */
1220 o(0xe0df); /* fnstsw %ax */
1221 if (op == TOK_EQ) {
1222 o(0x45e480); /* and $0x45, %ah */
1223 o(0x40fC80); /* cmp $0x40, %ah */
1224 } else if (op == TOK_NE) {
1225 o(0x45e480); /* and $0x45, %ah */
1226 o(0x40f480); /* xor $0x40, %ah */
1227 op = TOK_NE;
1228 } else if (op == TOK_GE || op == TOK_LE) {
1229 o(0x05c4f6); /* test $0x05, %ah */
1230 op = TOK_EQ;
1231 } else {
1232 o(0x45c4f6); /* test $0x45, %ah */
1233 op = TOK_EQ;
1235 vtop--;
1236 vtop->r = VT_CMP;
1237 vtop->c.i = op;
1238 } else {
1239 /* no memory reference possible for long double operations */
1240 load(TREG_ST0, vtop);
1241 swapped = !swapped;
1243 switch(op) {
1244 default:
1245 case '+':
1246 a = 0;
1247 break;
1248 case '-':
1249 a = 4;
1250 if (swapped)
1251 a++;
1252 break;
1253 case '*':
1254 a = 1;
1255 break;
1256 case '/':
1257 a = 6;
1258 if (swapped)
1259 a++;
1260 break;
1262 ft = vtop->type.t;
1263 fc = vtop->c.ul;
1264 o(0xde); /* fxxxp %st, %st(1) */
1265 o(0xc1 + (a << 3));
1266 vtop--;
1268 } else {
1269 if (op >= TOK_ULT && op <= TOK_GT) {
1270 /* if saved lvalue, then we must reload it */
1271 r = vtop->r;
1272 fc = vtop->c.ul;
1273 if ((r & VT_VALMASK) == VT_LLOCAL) {
1274 SValue v1;
1275 r = get_reg(RC_INT);
1276 v1.type.t = VT_INT;
1277 v1.r = VT_LOCAL | VT_LVAL;
1278 v1.c.ul = fc;
1279 load(r, &v1);
1280 fc = 0;
1283 if (op == TOK_EQ || op == TOK_NE) {
1284 swapped = 0;
1285 } else {
1286 if (op == TOK_LE || op == TOK_LT)
1287 swapped = !swapped;
1288 if (op == TOK_LE || op == TOK_GE) {
1289 op = 0x93; /* setae */
1290 } else {
1291 op = 0x97; /* seta */
1295 if (swapped) {
1296 o(0x7e0ff3); /* movq */
1297 gen_modrm(1, r, vtop->sym, fc);
1299 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1300 o(0x66);
1302 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1303 o(0xc8);
1304 } else {
1305 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1306 o(0x66);
1308 o(0x2e0f); /* ucomisd */
1309 gen_modrm(0, r, vtop->sym, fc);
1312 vtop--;
1313 vtop->r = VT_CMP;
1314 vtop->c.i = op;
1315 } else {
1316 /* no memory reference possible for long double operations */
1317 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1318 load(TREG_XMM0, vtop);
1319 swapped = !swapped;
1321 switch(op) {
1322 default:
1323 case '+':
1324 a = 0;
1325 break;
1326 case '-':
1327 a = 4;
1328 break;
1329 case '*':
1330 a = 1;
1331 break;
1332 case '/':
1333 a = 6;
1334 break;
1336 ft = vtop->type.t;
1337 fc = vtop->c.ul;
1338 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1339 o(0xde); /* fxxxp %st, %st(1) */
1340 o(0xc1 + (a << 3));
1341 } else {
1342 /* if saved lvalue, then we must reload it */
1343 r = vtop->r;
1344 if ((r & VT_VALMASK) == VT_LLOCAL) {
1345 SValue v1;
1346 r = get_reg(RC_INT);
1347 v1.type.t = VT_INT;
1348 v1.r = VT_LOCAL | VT_LVAL;
1349 v1.c.ul = fc;
1350 load(r, &v1);
1351 fc = 0;
1353 if (swapped) {
1354 /* movq %xmm0,%xmm1 */
1355 o(0x7e0ff3);
1356 o(0xc8);
1357 load(TREG_XMM0, vtop);
1358 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1359 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1360 o(0xf2);
1361 } else {
1362 o(0xf3);
1364 o(0x0f);
1365 o(0x58 + a);
1366 o(0xc1);
1367 } else {
1368 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1369 o(0xf2);
1370 } else {
1371 o(0xf3);
1373 o(0x0f);
1374 o(0x58 + a);
1375 gen_modrm(0, r, vtop->sym, fc);
1378 vtop--;
1383 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1384 and 'long long' cases. */
1385 void gen_cvt_itof(int t)
1387 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1388 save_reg(TREG_ST0);
1389 gv(RC_INT);
1390 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1391 /* signed long long to float/double/long double (unsigned case
1392 is handled generically) */
1393 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1394 o(0x242cdf); /* fildll (%rsp) */
1395 o(0x08c48348); /* add $8, %rsp */
1396 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1397 (VT_INT | VT_UNSIGNED)) {
1398 /* unsigned int to float/double/long double */
1399 o(0x6a); /* push $0 */
1400 g(0x00);
1401 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1402 o(0x242cdf); /* fildll (%rsp) */
1403 o(0x10c48348); /* add $16, %rsp */
1404 } else {
1405 /* int to float/double/long double */
1406 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1407 o(0x2404db); /* fildl (%rsp) */
1408 o(0x08c48348); /* add $8, %rsp */
1410 vtop->r = TREG_ST0;
1411 } else {
1412 save_reg(TREG_XMM0);
1413 gv(RC_INT);
1414 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1415 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1416 (VT_INT | VT_UNSIGNED) ||
1417 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1418 o(0x48); /* REX */
1420 o(0x2a0f);
1421 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1422 vtop->r = TREG_XMM0;
1426 /* convert from one floating point type to another */
1427 void gen_cvt_ftof(int t)
1429 int ft, bt, tbt;
1431 ft = vtop->type.t;
1432 bt = ft & VT_BTYPE;
1433 tbt = t & VT_BTYPE;
1435 if (bt == VT_FLOAT) {
1436 gv(RC_FLOAT);
1437 if (tbt == VT_DOUBLE) {
1438 o(0xc0140f); /* unpcklps */
1439 o(0xc05a0f); /* cvtps2pd */
1440 } else if (tbt == VT_LDOUBLE) {
1441 /* movss %xmm0,-0x10(%rsp) */
1442 o(0x44110ff3);
1443 o(0xf024);
1444 o(0xf02444d9); /* flds -0x10(%rsp) */
1445 vtop->r = TREG_ST0;
1447 } else if (bt == VT_DOUBLE) {
1448 gv(RC_FLOAT);
1449 if (tbt == VT_FLOAT) {
1450 o(0xc0140f66); /* unpcklpd */
1451 o(0xc05a0f66); /* cvtpd2ps */
1452 } else if (tbt == VT_LDOUBLE) {
1453 /* movsd %xmm0,-0x10(%rsp) */
1454 o(0x44110ff2);
1455 o(0xf024);
1456 o(0xf02444dd); /* fldl -0x10(%rsp) */
1457 vtop->r = TREG_ST0;
1459 } else {
1460 gv(RC_ST0);
1461 if (tbt == VT_DOUBLE) {
1462 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1463 /* movsd -0x10(%rsp),%xmm0 */
1464 o(0x44100ff2);
1465 o(0xf024);
1466 vtop->r = TREG_XMM0;
1467 } else if (tbt == VT_FLOAT) {
1468 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1469 /* movss -0x10(%rsp),%xmm0 */
1470 o(0x44100ff3);
1471 o(0xf024);
1472 vtop->r = TREG_XMM0;
1477 /* convert fp to int 't' type */
1478 void gen_cvt_ftoi(int t)
1480 int ft, bt, size, r;
1481 ft = vtop->type.t;
1482 bt = ft & VT_BTYPE;
1483 if (bt == VT_LDOUBLE) {
1484 gen_cvt_ftof(VT_DOUBLE);
1485 bt = VT_DOUBLE;
1488 gv(RC_FLOAT);
1489 if (t != VT_INT)
1490 size = 8;
1491 else
1492 size = 4;
1494 r = get_reg(RC_INT);
1495 if (bt == VT_FLOAT) {
1496 o(0xf3);
1497 } else if (bt == VT_DOUBLE) {
1498 o(0xf2);
1499 } else {
1500 assert(0);
1502 if (size == 8) {
1503 o(0x48 + REX_BASE(r));
1505 o(0x2c0f); /* cvttss2si or cvttsd2si */
1506 o(0xc0 + (REG_VALUE(r) << 3));
1507 vtop->r = r;
1510 /* computed goto support */
1511 void ggoto(void)
1513 gcall_or_jmp(1);
1514 vtop--;
1517 /* end of x86-64 code generator */
1518 /*************************************************************/