x86-64: fix flags and zero-pad long doubles
[tinycc.git] / x86_64-gen.c
blob4d2521d1357653118a0c7dc5f116c509c801f737
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 5
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_R8 0x0100
38 #define RC_R9 0x0200
39 #define RC_XMM0 0x0020
40 #define RC_ST0 0x0040 /* only for long double */
41 #define RC_IRET RC_RAX /* function return: integer register */
42 #define RC_LRET RC_RDX /* function return: second integer register */
43 #define RC_FRET RC_XMM0 /* function return: float register */
45 /* pretty names for the registers */
46 enum {
47 TREG_RAX = 0,
48 TREG_RCX = 1,
49 TREG_RDX = 2,
50 TREG_XMM0 = 3,
51 TREG_ST0 = 4,
53 TREG_RSI = 6,
54 TREG_RDI = 7,
55 TREG_R8 = 8,
56 TREG_R9 = 9,
58 TREG_R10 = 10,
59 TREG_R11 = 11,
61 TREG_MEM = 0x10,
64 #define REX_BASE(reg) (((reg) >> 3) & 1)
65 #define REG_VALUE(reg) ((reg) & 7)
67 /* return registers for function */
68 #define REG_IRET TREG_RAX /* single word int return register */
69 #define REG_LRET TREG_RDX /* second word return register (for long long) */
70 #define REG_FRET TREG_XMM0 /* float return register */
72 /* defined if function parameters must be evaluated in reverse order */
73 #define INVERT_FUNC_PARAMS
75 /* pointer size, in bytes */
76 #define PTR_SIZE 8
78 /* long double size and alignment, in bytes */
79 #define LDOUBLE_SIZE 16
80 #define LDOUBLE_ALIGN 8
81 /* maximum alignment (for aligned attribute support) */
82 #define MAX_ALIGN 8
84 ST_FUNC void gen_opl(int op);
85 ST_FUNC void gen_le64(int64_t c);
87 /******************************************************/
88 /* ELF defines */
90 #define EM_TCC_TARGET EM_X86_64
92 /* relocation type for 32 bit data relocation */
93 #define R_DATA_32 R_X86_64_32
94 #define R_DATA_PTR R_X86_64_64
95 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
96 #define R_COPY R_X86_64_COPY
98 #define ELF_START_ADDR 0x08048000
99 #define ELF_PAGE_SIZE 0x1000
101 /******************************************************/
102 #else /* ! TARGET_DEFS_ONLY */
103 /******************************************************/
104 #include "tcc.h"
105 #include <assert.h>
107 ST_DATA const int reg_classes[NB_REGS] = {
108 /* eax */ RC_INT | RC_RAX,
109 /* ecx */ RC_INT | RC_RCX,
110 /* edx */ RC_INT | RC_RDX,
111 /* xmm0 */ RC_FLOAT | RC_XMM0,
112 /* st0 */ RC_ST0,
113 #if NB_REGS == 10
117 RC_INT | RC_R8,
118 RC_INT | RC_R9,
119 #endif
122 static unsigned long func_sub_sp_offset;
123 static int func_ret_sub;
125 /* XXX: make it faster ? */
126 void g(int c)
128 int ind1;
129 ind1 = ind + 1;
130 if (ind1 > cur_text_section->data_allocated)
131 section_realloc(cur_text_section, ind1);
132 cur_text_section->data[ind] = c;
133 ind = ind1;
136 void o(unsigned int c)
138 while (c) {
139 g(c);
140 c = c >> 8;
144 void gen_le16(int v)
146 g(v);
147 g(v >> 8);
150 void gen_le32(int c)
152 g(c);
153 g(c >> 8);
154 g(c >> 16);
155 g(c >> 24);
158 void gen_le64(int64_t c)
160 g(c);
161 g(c >> 8);
162 g(c >> 16);
163 g(c >> 24);
164 g(c >> 32);
165 g(c >> 40);
166 g(c >> 48);
167 g(c >> 56);
170 void orex(int ll, int r, int r2, int b)
172 if ((r & VT_VALMASK) >= VT_CONST)
173 r = 0;
174 if ((r2 & VT_VALMASK) >= VT_CONST)
175 r2 = 0;
176 if (ll || REX_BASE(r) || REX_BASE(r2))
177 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
178 o(b);
181 /* output a symbol and patch all calls to it */
182 void gsym_addr(int t, int a)
184 int n, *ptr;
185 while (t) {
186 ptr = (int *)(cur_text_section->data + t);
187 n = *ptr; /* next value */
188 *ptr = a - t - 4;
189 t = n;
193 void gsym(int t)
195 gsym_addr(t, ind);
198 /* psym is used to put an instruction with a data field which is a
199 reference to a symbol. It is in fact the same as oad ! */
200 #define psym oad
202 static int is64_type(int t)
204 return ((t & VT_BTYPE) == VT_PTR ||
205 (t & VT_BTYPE) == VT_FUNC ||
206 (t & VT_BTYPE) == VT_LLONG);
209 static int is_sse_float(int t) {
210 int bt;
211 bt = t & VT_BTYPE;
212 return bt == VT_DOUBLE || bt == VT_FLOAT;
216 /* instruction + 4 bytes data. Return the address of the data */
217 ST_FUNC int oad(int c, int s)
219 int ind1;
221 o(c);
222 ind1 = ind + 4;
223 if (ind1 > cur_text_section->data_allocated)
224 section_realloc(cur_text_section, ind1);
225 *(int *)(cur_text_section->data + ind) = s;
226 s = ind;
227 ind = ind1;
228 return s;
231 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
233 if (r & VT_SYM)
234 greloc(cur_text_section, sym, ind, R_X86_64_32);
235 gen_le32(c);
238 /* output constant with relocation if 'r & VT_SYM' is true */
239 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
241 if (r & VT_SYM)
242 greloc(cur_text_section, sym, ind, R_X86_64_64);
243 gen_le64(c);
246 /* output constant with relocation if 'r & VT_SYM' is true */
247 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
249 if (r & VT_SYM)
250 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
251 gen_le32(c-4);
254 /* output got address with relocation */
255 static void gen_gotpcrel(int r, Sym *sym, int c)
257 #ifndef TCC_TARGET_PE
258 Section *sr;
259 ElfW(Rela) *rel;
260 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
261 sr = cur_text_section->reloc;
262 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
263 rel->r_addend = -4;
264 #else
265 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
266 cur_text_section->data[ind-3],
267 cur_text_section->data[ind-2],
268 cur_text_section->data[ind-1]
270 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
271 #endif
272 gen_le32(0);
273 if (c) {
274 /* we use add c, %xxx for displacement */
275 orex(1, r, 0, 0x81);
276 o(0xc0 + REG_VALUE(r));
277 gen_le32(c);
281 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
283 op_reg = REG_VALUE(op_reg) << 3;
284 if ((r & VT_VALMASK) == VT_CONST) {
285 /* constant memory reference */
286 o(0x05 | op_reg);
287 if (is_got) {
288 gen_gotpcrel(r, sym, c);
289 } else {
290 gen_addrpc32(r, sym, c);
292 } else if ((r & VT_VALMASK) == VT_LOCAL) {
293 /* currently, we use only ebp as base */
294 if (c == (char)c) {
295 /* short reference */
296 o(0x45 | op_reg);
297 g(c);
298 } else {
299 oad(0x85 | op_reg, c);
301 } else if ((r & VT_VALMASK) >= TREG_MEM) {
302 if (c) {
303 g(0x80 | op_reg | REG_VALUE(r));
304 gen_le32(c);
305 } else {
306 g(0x00 | op_reg | REG_VALUE(r));
308 } else {
309 g(0x00 | op_reg | REG_VALUE(r));
313 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
314 opcode bits */
315 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
317 gen_modrm_impl(op_reg, r, sym, c, 0);
320 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
321 opcode bits */
322 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
324 int is_got;
325 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
326 orex(1, r, op_reg, opcode);
327 gen_modrm_impl(op_reg, r, sym, c, is_got);
331 /* load 'r' from value 'sv' */
332 void load(int r, SValue *sv)
334 int v, t, ft, fc, fr;
335 SValue v1;
337 #ifdef TCC_TARGET_PE
338 SValue v2;
339 sv = pe_getimport(sv, &v2);
340 #endif
342 fr = sv->r;
343 ft = sv->type.t;
344 fc = sv->c.ul;
346 #ifndef TCC_TARGET_PE
347 /* we use indirect access via got */
348 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
349 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
350 /* use the result register as a temporal register */
351 int tr = r | TREG_MEM;
352 if (is_float(ft)) {
353 /* we cannot use float registers as a temporal register */
354 tr = get_reg(RC_INT) | TREG_MEM;
356 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
358 /* load from the temporal register */
359 fr = tr | VT_LVAL;
361 #endif
363 v = fr & VT_VALMASK;
364 if (fr & VT_LVAL) {
365 int b, ll;
366 if (v == VT_LLOCAL) {
367 v1.type.t = VT_PTR;
368 v1.r = VT_LOCAL | VT_LVAL;
369 v1.c.ul = fc;
370 load(r, &v1);
371 fr = r;
373 ll = 0;
374 if ((ft & VT_BTYPE) == VT_FLOAT) {
375 b = 0x6e0f66, r = 0; /* movd */
376 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
377 b = 0x7e0ff3, r = 0; /* movq */
378 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
379 b = 0xdb, r = 5; /* fldt */
380 } else if ((ft & VT_TYPE) == VT_BYTE) {
381 b = 0xbe0f; /* movsbl */
382 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
383 b = 0xb60f; /* movzbl */
384 } else if ((ft & VT_TYPE) == VT_SHORT) {
385 b = 0xbf0f; /* movswl */
386 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
387 b = 0xb70f; /* movzwl */
388 } else {
389 ll = is64_type(ft);
390 b = 0x8b;
392 if (ll) {
393 gen_modrm64(b, r, fr, sv->sym, fc);
394 } else {
395 orex(ll, fr, r, b);
396 gen_modrm(r, fr, sv->sym, fc);
398 } else {
399 if (v == VT_CONST) {
400 if (fr & VT_SYM) {
401 #ifdef TCC_TARGET_PE
402 orex(1,0,r,0x8d);
403 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
404 gen_addrpc32(fr, sv->sym, fc);
405 #else
406 if (sv->sym->type.t & VT_STATIC) {
407 orex(1,0,r,0x8d);
408 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
409 gen_addrpc32(fr, sv->sym, fc);
410 } else {
411 orex(1,0,r,0x8b);
412 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
413 gen_gotpcrel(fr, sv->sym, fc);
415 #endif
416 } else if (is64_type(ft)) {
417 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
418 gen_le64(sv->c.ull);
419 } else {
420 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
421 gen_le32(fc);
423 } else if (v == VT_LOCAL) {
424 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
425 gen_modrm(r, VT_LOCAL, sv->sym, fc);
426 } else if (v == VT_CMP) {
427 orex(0,r,0,0);
428 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
429 orex(0,r,0, 0x0f); /* setxx %br */
430 o(fc);
431 o(0xc0 + REG_VALUE(r));
432 } else if (v == VT_JMP || v == VT_JMPI) {
433 t = v & 1;
434 orex(0,r,0,0);
435 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
436 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
437 gsym(fc);
438 orex(0,r,0,0);
439 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
440 } else if (v != r) {
441 if (r == TREG_XMM0) {
442 assert(v == TREG_ST0);
443 /* gen_cvt_ftof(VT_DOUBLE); */
444 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
445 /* movsd -0x10(%rsp),%xmm0 */
446 o(0x44100ff2);
447 o(0xf024);
448 } else if (r == TREG_ST0) {
449 assert(v == TREG_XMM0);
450 /* gen_cvt_ftof(VT_LDOUBLE); */
451 /* movsd %xmm0,-0x10(%rsp) */
452 o(0x44110ff2);
453 o(0xf024);
454 o(0xf02444dd); /* fldl -0x10(%rsp) */
455 } else {
456 orex(1,r,v, 0x89);
457 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
463 /* store register 'r' in lvalue 'v' */
464 void store(int r, SValue *v)
466 int fr, bt, ft, fc;
467 int op64 = 0;
468 /* store the REX prefix in this variable when PIC is enabled */
469 int pic = 0;
471 #ifdef TCC_TARGET_PE
472 SValue v2;
473 v = pe_getimport(v, &v2);
474 #endif
476 ft = v->type.t;
477 fc = v->c.ul;
478 fr = v->r & VT_VALMASK;
479 bt = ft & VT_BTYPE;
481 #ifndef TCC_TARGET_PE
482 /* we need to access the variable via got */
483 if (fr == VT_CONST && (v->r & VT_SYM)) {
484 /* mov xx(%rip), %r11 */
485 o(0x1d8b4c);
486 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
487 pic = is64_type(bt) ? 0x49 : 0x41;
489 #endif
491 /* XXX: incorrect if float reg to reg */
492 if (bt == VT_FLOAT) {
493 o(0x66);
494 o(pic);
495 o(0x7e0f); /* movd */
496 r = 0;
497 } else if (bt == VT_DOUBLE) {
498 o(0x66);
499 o(pic);
500 o(0xd60f); /* movq */
501 r = 0;
502 } else if (bt == VT_LDOUBLE) {
503 o(0xc0d9); /* fld %st(0) */
504 o(pic);
505 o(0xdb); /* fstpt */
506 r = 7;
507 } else {
508 if (bt == VT_SHORT)
509 o(0x66);
510 o(pic);
511 if (bt == VT_BYTE || bt == VT_BOOL)
512 orex(0, 0, r, 0x88);
513 else if (is64_type(bt))
514 op64 = 0x89;
515 else
516 orex(0, 0, r, 0x89);
518 if (pic) {
519 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
520 if (op64)
521 o(op64);
522 o(3 + (r << 3));
523 } else if (op64) {
524 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
525 gen_modrm64(op64, r, v->r, v->sym, fc);
526 } else if (fr != r) {
527 /* XXX: don't we really come here? */
528 abort();
529 o(0xc0 + fr + r * 8); /* mov r, fr */
531 } else {
532 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
533 gen_modrm(r, v->r, v->sym, fc);
534 } else if (fr != r) {
535 /* XXX: don't we really come here? */
536 abort();
537 o(0xc0 + fr + r * 8); /* mov r, fr */
542 /* 'is_jmp' is '1' if it is a jump */
543 static void gcall_or_jmp(int is_jmp)
545 int r;
546 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
547 /* constant case */
548 if (vtop->r & VT_SYM) {
549 /* relocation case */
550 greloc(cur_text_section, vtop->sym,
551 ind + 1, R_X86_64_PC32);
552 } else {
553 /* put an empty PC32 relocation */
554 put_elf_reloc(symtab_section, cur_text_section,
555 ind + 1, R_X86_64_PC32, 0);
557 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
558 } else {
559 /* otherwise, indirect call */
560 r = TREG_R11;
561 load(r, vtop);
562 o(0x41); /* REX */
563 o(0xff); /* call/jmp *r */
564 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
568 #ifdef TCC_TARGET_PE
570 #define REGN 4
571 static const uint8_t arg_regs[] = {
572 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
575 static int func_scratch;
577 /* Generate function call. The function address is pushed first, then
578 all the parameters in call order. This functions pops all the
579 parameters and the function address. */
581 void gen_offs_sp(int b, int r, int d)
583 orex(1,0,r & 0x100 ? 0 : r, b);
584 if (d == (char)d) {
585 o(0x2444 | (REG_VALUE(r) << 3));
586 g(d);
587 } else {
588 o(0x2484 | (REG_VALUE(r) << 3));
589 gen_le32(d);
593 void gfunc_call(int nb_args)
595 int size, align, r, args_size, i, d, j, bt, struct_size;
596 int nb_reg_args, gen_reg;
598 nb_reg_args = nb_args;
599 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
601 /* for struct arguments, we need to call memcpy and the function
602 call breaks register passing arguments we are preparing.
603 So, we process arguments which will be passed by stack first. */
604 struct_size = args_size;
605 for(i = 0; i < nb_args; i++) {
606 SValue *sv = &vtop[-i];
607 bt = (sv->type.t & VT_BTYPE);
608 if (bt == VT_STRUCT) {
609 size = type_size(&sv->type, &align);
610 /* align to stack align size */
611 size = (size + 15) & ~15;
612 /* generate structure store */
613 r = get_reg(RC_INT);
614 gen_offs_sp(0x8d, r, struct_size);
615 struct_size += size;
617 /* generate memcpy call */
618 vset(&sv->type, r | VT_LVAL, 0);
619 vpushv(sv);
620 vstore();
621 --vtop;
623 } else if (bt == VT_LDOUBLE) {
625 gv(RC_ST0);
626 gen_offs_sp(0xdb, 0x107, struct_size);
627 struct_size += 16;
632 if (func_scratch < struct_size)
633 func_scratch = struct_size;
634 #if 1
635 for (i = 0; i < REGN; ++i)
636 save_reg(arg_regs[i]);
637 save_reg(TREG_RAX);
638 #endif
639 gen_reg = nb_reg_args;
640 struct_size = args_size;
642 for(i = 0; i < nb_args; i++) {
643 bt = (vtop->type.t & VT_BTYPE);
645 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
646 if (bt == VT_LDOUBLE)
647 size = 16;
648 else
649 size = type_size(&vtop->type, &align);
650 /* align to stack align size */
651 size = (size + 15) & ~15;
652 j = --gen_reg;
653 if (j >= REGN) {
654 d = TREG_RAX;
655 gen_offs_sp(0x8d, d, struct_size);
656 gen_offs_sp(0x89, d, j*8);
657 } else {
658 d = arg_regs[j];
659 gen_offs_sp(0x8d, d, struct_size);
661 struct_size += size;
663 } else if (is_sse_float(vtop->type.t)) {
664 gv(RC_FLOAT); /* only one float register */
665 j = --gen_reg;
666 if (j >= REGN) {
667 /* movq %xmm0, j*8(%rsp) */
668 gen_offs_sp(0xd60f66, 0x100, j*8);
669 } else {
670 /* movaps %xmm0, %xmmN */
671 o(0x280f);
672 o(0xc0 + (j << 3));
673 d = arg_regs[j];
674 /* mov %xmm0, %rxx */
675 o(0x66);
676 orex(1,d,0, 0x7e0f);
677 o(0xc0 + REG_VALUE(d));
679 } else {
680 j = --gen_reg;
681 if (j >= REGN) {
682 r = gv(RC_INT);
683 gen_offs_sp(0x89, r, j*8);
684 } else {
685 d = arg_regs[j];
686 if (d < NB_REGS) {
687 gv(reg_classes[d] & ~RC_INT);
688 } else {
689 r = gv(RC_INT);
690 if (d != r) {
691 orex(1,d,r, 0x89);
692 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
698 vtop--;
700 save_regs(0);
701 gcall_or_jmp(0);
702 vtop--;
706 #define FUNC_PROLOG_SIZE 11
708 /* generate function prolog of type 't' */
709 void gfunc_prolog(CType *func_type)
711 int addr, reg_param_index, bt;
712 Sym *sym;
713 CType *type;
715 func_ret_sub = 0;
716 func_scratch = 0;
717 loc = 0;
719 addr = PTR_SIZE * 2;
720 ind += FUNC_PROLOG_SIZE;
721 func_sub_sp_offset = ind;
722 reg_param_index = 0;
724 sym = func_type->ref;
726 /* if the function returns a structure, then add an
727 implicit pointer parameter */
728 func_vt = sym->type;
729 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
730 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
731 reg_param_index++;
732 addr += PTR_SIZE;
735 /* define parameters */
736 while ((sym = sym->next) != NULL) {
737 type = &sym->type;
738 bt = type->t & VT_BTYPE;
739 if (reg_param_index < REGN) {
740 /* save arguments passed by register */
741 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
743 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
744 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
745 } else {
746 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
748 reg_param_index++;
749 addr += PTR_SIZE;
752 while (reg_param_index < REGN) {
753 if (func_type->ref->c == FUNC_ELLIPSIS)
754 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
755 reg_param_index++;
756 addr += PTR_SIZE;
760 /* generate function epilog */
761 void gfunc_epilog(void)
763 int v, saved_ind;
765 o(0xc9); /* leave */
766 if (func_ret_sub == 0) {
767 o(0xc3); /* ret */
768 } else {
769 o(0xc2); /* ret n */
770 g(func_ret_sub);
771 g(func_ret_sub >> 8);
774 saved_ind = ind;
775 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
776 /* align local size to word & save local variables */
777 v = (func_scratch + -loc + 15) & -16;
779 if (v >= 4096) {
780 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
781 oad(0xb8, v); /* mov stacksize, %eax */
782 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
783 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
784 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
785 } else {
786 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
787 o(0xec8148); /* sub rsp, stacksize */
788 gen_le32(v);
791 cur_text_section->data_offset = saved_ind;
792 pe_add_unwind_data(ind, saved_ind, v);
793 ind = cur_text_section->data_offset;
796 #else
798 static void gadd_sp(int val)
800 if (val == (char)val) {
801 o(0xc48348);
802 g(val);
803 } else {
804 oad(0xc48148, val); /* add $xxx, %rsp */
808 #define REGN 6
809 static const uint8_t arg_regs[REGN] = {
810 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
813 /* Generate function call. The function address is pushed first, then
814 all the parameters in call order. This functions pops all the
815 parameters and the function address. */
816 void gfunc_call(int nb_args)
818 int size, align, r, args_size, i;
819 SValue *orig_vtop;
820 int nb_reg_args = 0;
821 int nb_sse_args = 0;
822 int sse_reg, gen_reg;
824 /* calculate the number of integer/float arguments */
825 args_size = 0;
826 for(i = 0; i < nb_args; i++) {
827 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
828 args_size += type_size(&vtop[-i].type, &align);
829 args_size = (args_size + 7) & ~7;
830 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
831 args_size += 16;
832 } else if (is_sse_float(vtop[-i].type.t)) {
833 nb_sse_args++;
834 if (nb_sse_args > 8) args_size += 8;
835 } else {
836 nb_reg_args++;
837 if (nb_reg_args > REGN) args_size += 8;
841 /* for struct arguments, we need to call memcpy and the function
842 call breaks register passing arguments we are preparing.
843 So, we process arguments which will be passed by stack first. */
844 orig_vtop = vtop;
845 gen_reg = nb_reg_args;
846 sse_reg = nb_sse_args;
848 /* adjust stack to align SSE boundary */
849 if (args_size &= 15) {
850 /* fetch cpu flag before the following sub will change the value */
851 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
852 gv(RC_INT);
854 args_size = 16 - args_size;
855 o(0x48);
856 oad(0xec81, args_size); /* sub $xxx, %rsp */
859 for(i = 0; i < nb_args; i++) {
860 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
861 size = type_size(&vtop->type, &align);
862 /* align to stack align size */
863 size = (size + 7) & ~7;
864 /* allocate the necessary size on stack */
865 o(0x48);
866 oad(0xec81, size); /* sub $xxx, %rsp */
867 /* generate structure store */
868 r = get_reg(RC_INT);
869 orex(1, r, 0, 0x89); /* mov %rsp, r */
870 o(0xe0 + REG_VALUE(r));
872 /* following code breaks vtop[1], vtop[2], and vtop[3] */
873 SValue tmp1 = vtop[1];
874 SValue tmp2 = vtop[2];
875 SValue tmp3 = vtop[3];
876 vset(&vtop->type, r | VT_LVAL, 0);
877 vswap();
878 vstore();
879 vtop[1] = tmp1;
880 vtop[2] = tmp2;
881 vtop[3] = tmp3;
883 args_size += size;
884 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
885 gv(RC_ST0);
886 size = LDOUBLE_SIZE;
887 oad(0xec8148, size); /* sub $xxx, %rsp */
888 o(0x7cdb); /* fstpt 0(%rsp) */
889 g(0x24);
890 g(0x00);
891 args_size += size;
892 } else if (is_sse_float(vtop->type.t)) {
893 int j = --sse_reg;
894 if (j >= 8) {
895 gv(RC_FLOAT);
896 o(0x50); /* push $rax */
897 /* movq %xmm0, (%rsp) */
898 o(0x04d60f66);
899 o(0x24);
900 args_size += 8;
902 } else {
903 int j = --gen_reg;
904 /* simple type */
905 /* XXX: implicit cast ? */
906 if (j >= REGN) {
907 r = gv(RC_INT);
908 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
909 args_size += 8;
912 vtop--;
914 vtop = orig_vtop;
916 save_regs(0); /* save used temporary registers */
918 /* then, we prepare register passing arguments.
919 Note that we cannot set RDX and RCX in this loop because gv()
920 may break these temporary registers. Let's use R10 and R11
921 instead of them */
922 gen_reg = nb_reg_args;
923 sse_reg = nb_sse_args;
924 for(i = 0; i < nb_args; i++) {
925 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
926 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
927 } else if (is_sse_float(vtop->type.t)) {
928 int j = --sse_reg;
929 if (j < 8) {
930 gv(RC_FLOAT); /* only one float register */
931 /* movaps %xmm0, %xmmN */
932 o(0x280f);
933 o(0xc0 + (sse_reg << 3));
935 } else {
936 int j = --gen_reg;
937 /* simple type */
938 /* XXX: implicit cast ? */
939 if (j < REGN) {
940 int d = arg_regs[j];
941 r = gv(RC_INT);
942 if (j == 2 || j == 3)
943 /* j=2: r10, j=3: r11 */
944 d = j + 8;
945 orex(1,d,r,0x89); /* mov */
946 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
949 vtop--;
952 /* Copy R10 and R11 into RDX and RCX, respectively */
953 if (nb_reg_args > 2) {
954 o(0xd2894c); /* mov %r10, %rdx */
955 if (nb_reg_args > 3) {
956 o(0xd9894c); /* mov %r11, %rcx */
960 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
961 gcall_or_jmp(0);
962 if (args_size)
963 gadd_sp(args_size);
964 vtop--;
968 #define FUNC_PROLOG_SIZE 11
970 static void push_arg_reg(int i) {
971 loc -= 8;
972 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
975 /* generate function prolog of type 't' */
976 void gfunc_prolog(CType *func_type)
978 int i, addr, align, size;
979 int param_index, param_addr, reg_param_index, sse_param_index;
980 Sym *sym;
981 CType *type;
983 sym = func_type->ref;
984 addr = PTR_SIZE * 2;
985 loc = 0;
986 ind += FUNC_PROLOG_SIZE;
987 func_sub_sp_offset = ind;
988 func_ret_sub = 0;
990 if (func_type->ref->c == FUNC_ELLIPSIS) {
991 int seen_reg_num, seen_sse_num, seen_stack_size;
992 seen_reg_num = seen_sse_num = 0;
993 /* frame pointer and return address */
994 seen_stack_size = PTR_SIZE * 2;
995 /* count the number of seen parameters */
996 sym = func_type->ref;
997 while ((sym = sym->next) != NULL) {
998 type = &sym->type;
999 if (is_sse_float(type->t)) {
1000 if (seen_sse_num < 8) {
1001 seen_sse_num++;
1002 } else {
1003 seen_stack_size += 8;
1005 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
1006 size = type_size(type, &align);
1007 size = (size + 7) & ~7;
1008 seen_stack_size += size;
1009 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
1010 seen_stack_size += LDOUBLE_SIZE;
1011 } else {
1012 if (seen_reg_num < REGN) {
1013 seen_reg_num++;
1014 } else {
1015 seen_stack_size += 8;
1020 loc -= 16;
1021 /* movl $0x????????, -0x10(%rbp) */
1022 o(0xf045c7);
1023 gen_le32(seen_reg_num * 8);
1024 /* movl $0x????????, -0xc(%rbp) */
1025 o(0xf445c7);
1026 gen_le32(seen_sse_num * 16 + 48);
1027 /* movl $0x????????, -0x8(%rbp) */
1028 o(0xf845c7);
1029 gen_le32(seen_stack_size);
1031 /* save all register passing arguments */
1032 for (i = 0; i < 8; i++) {
1033 loc -= 16;
1034 o(0xd60f66); /* movq */
1035 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1036 /* movq $0, loc+8(%rbp) */
1037 o(0x85c748);
1038 gen_le32(loc + 8);
1039 gen_le32(0);
1041 for (i = 0; i < REGN; i++) {
1042 push_arg_reg(REGN-1-i);
1046 sym = func_type->ref;
1047 param_index = 0;
1048 reg_param_index = 0;
1049 sse_param_index = 0;
1051 /* if the function returns a structure, then add an
1052 implicit pointer parameter */
1053 func_vt = sym->type;
1054 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
1055 push_arg_reg(reg_param_index);
1056 param_addr = loc;
1058 func_vc = loc;
1059 param_index++;
1060 reg_param_index++;
1062 /* define parameters */
1063 while ((sym = sym->next) != NULL) {
1064 type = &sym->type;
1065 size = type_size(type, &align);
1066 size = (size + 7) & ~7;
1067 if (is_sse_float(type->t)) {
1068 if (sse_param_index < 8) {
1069 /* save arguments passed by register */
1070 loc -= 8;
1071 o(0xd60f66); /* movq */
1072 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
1073 param_addr = loc;
1074 } else {
1075 param_addr = addr;
1076 addr += size;
1078 sse_param_index++;
1080 } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
1081 (type->t & VT_BTYPE) == VT_LDOUBLE) {
1082 param_addr = addr;
1083 addr += size;
1084 } else {
1085 if (reg_param_index < REGN) {
1086 /* save arguments passed by register */
1087 push_arg_reg(reg_param_index);
1088 param_addr = loc;
1089 } else {
1090 param_addr = addr;
1091 addr += 8;
1093 reg_param_index++;
1095 sym_push(sym->v & ~SYM_FIELD, type,
1096 VT_LOCAL | VT_LVAL, param_addr);
1097 param_index++;
1101 /* generate function epilog */
1102 void gfunc_epilog(void)
1104 int v, saved_ind;
1106 o(0xc9); /* leave */
1107 if (func_ret_sub == 0) {
1108 o(0xc3); /* ret */
1109 } else {
1110 o(0xc2); /* ret n */
1111 g(func_ret_sub);
1112 g(func_ret_sub >> 8);
1114 /* align local size to word & save local variables */
1115 v = (-loc + 15) & -16;
1116 saved_ind = ind;
1117 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1118 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1119 o(0xec8148); /* sub rsp, stacksize */
1120 gen_le32(v);
1121 ind = saved_ind;
1124 #endif /* not PE */
1126 /* generate a jump to a label */
1127 int gjmp(int t)
1129 return psym(0xe9, t);
1132 /* generate a jump to a fixed address */
1133 void gjmp_addr(int a)
1135 int r;
1136 r = a - ind - 2;
1137 if (r == (char)r) {
1138 g(0xeb);
1139 g(r);
1140 } else {
1141 oad(0xe9, a - ind - 5);
1145 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1146 int gtst(int inv, int t)
1148 int v, *p;
1150 v = vtop->r & VT_VALMASK;
1151 if (v == VT_CMP) {
1152 /* fast case : can jump directly since flags are set */
1153 g(0x0f);
1154 t = psym((vtop->c.i - 16) ^ inv, t);
1155 } else if (v == VT_JMP || v == VT_JMPI) {
1156 /* && or || optimization */
1157 if ((v & 1) == inv) {
1158 /* insert vtop->c jump list in t */
1159 p = &vtop->c.i;
1160 while (*p != 0)
1161 p = (int *)(cur_text_section->data + *p);
1162 *p = t;
1163 t = vtop->c.i;
1164 } else {
1165 t = gjmp(t);
1166 gsym(vtop->c.i);
1168 } else {
1169 if (is_float(vtop->type.t) ||
1170 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1171 vpushi(0);
1172 gen_op(TOK_NE);
1174 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1175 /* constant jmp optimization */
1176 if ((vtop->c.i != 0) != inv)
1177 t = gjmp(t);
1178 } else {
1179 v = gv(RC_INT);
1180 orex(0,v,v,0x85);
1181 o(0xc0 + REG_VALUE(v) * 9);
1182 g(0x0f);
1183 t = psym(0x85 ^ inv, t);
1186 vtop--;
1187 return t;
1190 /* generate an integer binary operation */
1191 void gen_opi(int op)
1193 int r, fr, opc, c;
1194 int ll, uu, cc;
1196 ll = is64_type(vtop[-1].type.t);
1197 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1198 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1200 switch(op) {
1201 case '+':
1202 case TOK_ADDC1: /* add with carry generation */
1203 opc = 0;
1204 gen_op8:
1205 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1206 /* constant case */
1207 vswap();
1208 r = gv(RC_INT);
1209 vswap();
1210 c = vtop->c.i;
1211 if (c == (char)c) {
1212 /* XXX: generate inc and dec for smaller code ? */
1213 orex(ll, r, 0, 0x83);
1214 o(0xc0 | (opc << 3) | REG_VALUE(r));
1215 g(c);
1216 } else {
1217 orex(ll, r, 0, 0x81);
1218 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1220 } else {
1221 gv2(RC_INT, RC_INT);
1222 r = vtop[-1].r;
1223 fr = vtop[0].r;
1224 orex(ll, r, fr, (opc << 3) | 0x01);
1225 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1227 vtop--;
1228 if (op >= TOK_ULT && op <= TOK_GT) {
1229 vtop->r = VT_CMP;
1230 vtop->c.i = op;
1232 break;
1233 case '-':
1234 case TOK_SUBC1: /* sub with carry generation */
1235 opc = 5;
1236 goto gen_op8;
1237 case TOK_ADDC2: /* add with carry use */
1238 opc = 2;
1239 goto gen_op8;
1240 case TOK_SUBC2: /* sub with carry use */
1241 opc = 3;
1242 goto gen_op8;
1243 case '&':
1244 opc = 4;
1245 goto gen_op8;
1246 case '^':
1247 opc = 6;
1248 goto gen_op8;
1249 case '|':
1250 opc = 1;
1251 goto gen_op8;
1252 case '*':
1253 gv2(RC_INT, RC_INT);
1254 r = vtop[-1].r;
1255 fr = vtop[0].r;
1256 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1257 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1258 vtop--;
1259 break;
1260 case TOK_SHL:
1261 opc = 4;
1262 goto gen_shift;
1263 case TOK_SHR:
1264 opc = 5;
1265 goto gen_shift;
1266 case TOK_SAR:
1267 opc = 7;
1268 gen_shift:
1269 opc = 0xc0 | (opc << 3);
1270 if (cc) {
1271 /* constant case */
1272 vswap();
1273 r = gv(RC_INT);
1274 vswap();
1275 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1276 o(opc | REG_VALUE(r));
1277 g(vtop->c.i & (ll ? 63 : 31));
1278 } else {
1279 /* we generate the shift in ecx */
1280 gv2(RC_INT, RC_RCX);
1281 r = vtop[-1].r;
1282 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1283 o(opc | REG_VALUE(r));
1285 vtop--;
1286 break;
1287 case TOK_UDIV:
1288 case TOK_UMOD:
1289 uu = 1;
1290 goto divmod;
1291 case '/':
1292 case '%':
1293 case TOK_PDIV:
1294 uu = 0;
1295 divmod:
1296 /* first operand must be in eax */
1297 /* XXX: need better constraint for second operand */
1298 gv2(RC_RAX, RC_RCX);
1299 r = vtop[-1].r;
1300 fr = vtop[0].r;
1301 vtop--;
1302 save_reg(TREG_RDX);
1303 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1304 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1305 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1306 if (op == '%' || op == TOK_UMOD)
1307 r = TREG_RDX;
1308 else
1309 r = TREG_RAX;
1310 vtop->r = r;
1311 break;
1312 default:
1313 opc = 7;
1314 goto gen_op8;
1318 void gen_opl(int op)
1320 gen_opi(op);
1323 /* generate a floating point operation 'v = t1 op t2' instruction. The
1324 two operands are guaranted to have the same floating point type */
1325 /* XXX: need to use ST1 too */
1326 void gen_opf(int op)
1328 int a, ft, fc, swapped, r;
1329 int float_type =
1330 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1332 /* convert constants to memory references */
1333 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1334 vswap();
1335 gv(float_type);
1336 vswap();
1338 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1339 gv(float_type);
1341 /* must put at least one value in the floating point register */
1342 if ((vtop[-1].r & VT_LVAL) &&
1343 (vtop[0].r & VT_LVAL)) {
1344 vswap();
1345 gv(float_type);
1346 vswap();
1348 swapped = 0;
1349 /* swap the stack if needed so that t1 is the register and t2 is
1350 the memory reference */
1351 if (vtop[-1].r & VT_LVAL) {
1352 vswap();
1353 swapped = 1;
1355 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1356 if (op >= TOK_ULT && op <= TOK_GT) {
1357 /* load on stack second operand */
1358 load(TREG_ST0, vtop);
1359 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1360 if (op == TOK_GE || op == TOK_GT)
1361 swapped = !swapped;
1362 else if (op == TOK_EQ || op == TOK_NE)
1363 swapped = 0;
1364 if (swapped)
1365 o(0xc9d9); /* fxch %st(1) */
1366 o(0xe9da); /* fucompp */
1367 o(0xe0df); /* fnstsw %ax */
1368 if (op == TOK_EQ) {
1369 o(0x45e480); /* and $0x45, %ah */
1370 o(0x40fC80); /* cmp $0x40, %ah */
1371 } else if (op == TOK_NE) {
1372 o(0x45e480); /* and $0x45, %ah */
1373 o(0x40f480); /* xor $0x40, %ah */
1374 op = TOK_NE;
1375 } else if (op == TOK_GE || op == TOK_LE) {
1376 o(0x05c4f6); /* test $0x05, %ah */
1377 op = TOK_EQ;
1378 } else {
1379 o(0x45c4f6); /* test $0x45, %ah */
1380 op = TOK_EQ;
1382 vtop--;
1383 vtop->r = VT_CMP;
1384 vtop->c.i = op;
1385 } else {
1386 /* no memory reference possible for long double operations */
1387 load(TREG_ST0, vtop);
1388 swapped = !swapped;
1390 switch(op) {
1391 default:
1392 case '+':
1393 a = 0;
1394 break;
1395 case '-':
1396 a = 4;
1397 if (swapped)
1398 a++;
1399 break;
1400 case '*':
1401 a = 1;
1402 break;
1403 case '/':
1404 a = 6;
1405 if (swapped)
1406 a++;
1407 break;
1409 ft = vtop->type.t;
1410 fc = vtop->c.ul;
1411 o(0xde); /* fxxxp %st, %st(1) */
1412 o(0xc1 + (a << 3));
1413 vtop--;
1415 } else {
1416 if (op >= TOK_ULT && op <= TOK_GT) {
1417 /* if saved lvalue, then we must reload it */
1418 r = vtop->r;
1419 fc = vtop->c.ul;
1420 if ((r & VT_VALMASK) == VT_LLOCAL) {
1421 SValue v1;
1422 r = get_reg(RC_INT);
1423 v1.type.t = VT_INT;
1424 v1.r = VT_LOCAL | VT_LVAL;
1425 v1.c.ul = fc;
1426 load(r, &v1);
1427 fc = 0;
1430 if (op == TOK_EQ || op == TOK_NE) {
1431 swapped = 0;
1432 } else {
1433 if (op == TOK_LE || op == TOK_LT)
1434 swapped = !swapped;
1435 if (op == TOK_LE || op == TOK_GE) {
1436 op = 0x93; /* setae */
1437 } else {
1438 op = 0x97; /* seta */
1442 if (swapped) {
1443 o(0x7e0ff3); /* movq */
1444 gen_modrm(1, r, vtop->sym, fc);
1446 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1447 o(0x66);
1449 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1450 o(0xc8);
1451 } else {
1452 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1453 o(0x66);
1455 o(0x2e0f); /* ucomisd */
1456 gen_modrm(0, r, vtop->sym, fc);
1459 vtop--;
1460 vtop->r = VT_CMP;
1461 vtop->c.i = op;
1462 } else {
1463 /* no memory reference possible for long double operations */
1464 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1465 load(TREG_XMM0, vtop);
1466 swapped = !swapped;
1468 switch(op) {
1469 default:
1470 case '+':
1471 a = 0;
1472 break;
1473 case '-':
1474 a = 4;
1475 break;
1476 case '*':
1477 a = 1;
1478 break;
1479 case '/':
1480 a = 6;
1481 break;
1483 ft = vtop->type.t;
1484 fc = vtop->c.ul;
1485 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1486 o(0xde); /* fxxxp %st, %st(1) */
1487 o(0xc1 + (a << 3));
1488 } else {
1489 /* if saved lvalue, then we must reload it */
1490 r = vtop->r;
1491 if ((r & VT_VALMASK) == VT_LLOCAL) {
1492 SValue v1;
1493 r = get_reg(RC_INT);
1494 v1.type.t = VT_INT;
1495 v1.r = VT_LOCAL | VT_LVAL;
1496 v1.c.ul = fc;
1497 load(r, &v1);
1498 fc = 0;
1500 if (swapped) {
1501 /* movq %xmm0,%xmm1 */
1502 o(0x7e0ff3);
1503 o(0xc8);
1504 load(TREG_XMM0, vtop);
1505 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1506 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1507 o(0xf2);
1508 } else {
1509 o(0xf3);
1511 o(0x0f);
1512 o(0x58 + a);
1513 o(0xc1);
1514 } else {
1515 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1516 o(0xf2);
1517 } else {
1518 o(0xf3);
1520 o(0x0f);
1521 o(0x58 + a);
1522 gen_modrm(0, r, vtop->sym, fc);
1525 vtop--;
1530 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1531 and 'long long' cases. */
1532 void gen_cvt_itof(int t)
1534 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1535 save_reg(TREG_ST0);
1536 gv(RC_INT);
1537 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1538 /* signed long long to float/double/long double (unsigned case
1539 is handled generically) */
1540 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1541 o(0x242cdf); /* fildll (%rsp) */
1542 o(0x08c48348); /* add $8, %rsp */
1543 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1544 (VT_INT | VT_UNSIGNED)) {
1545 /* unsigned int to float/double/long double */
1546 o(0x6a); /* push $0 */
1547 g(0x00);
1548 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1549 o(0x242cdf); /* fildll (%rsp) */
1550 o(0x10c48348); /* add $16, %rsp */
1551 } else {
1552 /* int to float/double/long double */
1553 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1554 o(0x2404db); /* fildl (%rsp) */
1555 o(0x08c48348); /* add $8, %rsp */
1557 vtop->r = TREG_ST0;
1558 } else {
1559 save_reg(TREG_XMM0);
1560 gv(RC_INT);
1561 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1562 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1563 (VT_INT | VT_UNSIGNED) ||
1564 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1565 o(0x48); /* REX */
1567 o(0x2a0f);
1568 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1569 vtop->r = TREG_XMM0;
1573 /* convert from one floating point type to another */
1574 void gen_cvt_ftof(int t)
1576 int ft, bt, tbt;
1578 ft = vtop->type.t;
1579 bt = ft & VT_BTYPE;
1580 tbt = t & VT_BTYPE;
1582 if (bt == VT_FLOAT) {
1583 gv(RC_FLOAT);
1584 if (tbt == VT_DOUBLE) {
1585 o(0xc0140f); /* unpcklps */
1586 o(0xc05a0f); /* cvtps2pd */
1587 } else if (tbt == VT_LDOUBLE) {
1588 /* movss %xmm0,-0x10(%rsp) */
1589 o(0x44110ff3);
1590 o(0xf024);
1591 o(0xf02444d9); /* flds -0x10(%rsp) */
1592 vtop->r = TREG_ST0;
1594 } else if (bt == VT_DOUBLE) {
1595 gv(RC_FLOAT);
1596 if (tbt == VT_FLOAT) {
1597 o(0xc0140f66); /* unpcklpd */
1598 o(0xc05a0f66); /* cvtpd2ps */
1599 } else if (tbt == VT_LDOUBLE) {
1600 /* movsd %xmm0,-0x10(%rsp) */
1601 o(0x44110ff2);
1602 o(0xf024);
1603 o(0xf02444dd); /* fldl -0x10(%rsp) */
1604 vtop->r = TREG_ST0;
1606 } else {
1607 gv(RC_ST0);
1608 if (tbt == VT_DOUBLE) {
1609 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1610 /* movsd -0x10(%rsp),%xmm0 */
1611 o(0x44100ff2);
1612 o(0xf024);
1613 vtop->r = TREG_XMM0;
1614 } else if (tbt == VT_FLOAT) {
1615 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1616 /* movss -0x10(%rsp),%xmm0 */
1617 o(0x44100ff3);
1618 o(0xf024);
1619 vtop->r = TREG_XMM0;
1624 /* convert fp to int 't' type */
1625 void gen_cvt_ftoi(int t)
1627 int ft, bt, size, r;
1628 ft = vtop->type.t;
1629 bt = ft & VT_BTYPE;
1630 if (bt == VT_LDOUBLE) {
1631 gen_cvt_ftof(VT_DOUBLE);
1632 bt = VT_DOUBLE;
1635 gv(RC_FLOAT);
1636 if (t != VT_INT)
1637 size = 8;
1638 else
1639 size = 4;
1641 r = get_reg(RC_INT);
1642 if (bt == VT_FLOAT) {
1643 o(0xf3);
1644 } else if (bt == VT_DOUBLE) {
1645 o(0xf2);
1646 } else {
1647 assert(0);
1649 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1650 o(0xc0 + (REG_VALUE(r) << 3));
1651 vtop->r = r;
1654 /* computed goto support */
1655 void ggoto(void)
1657 gcall_or_jmp(1);
1658 vtop--;
1661 /* end of x86-64 code generator */
1662 /*************************************************************/
1663 #endif /* ! TARGET_DEFS_ONLY */
1664 /******************************************************/