tccrun: win32: improve rt_get_caller_pc
[tinycc/miki.git] / x86_64-gen.c
blob247027a508296836f4ac9bf8348f9d4ce6265a12
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 5
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_R8 0x0100
38 #define RC_R9 0x0200
39 #define RC_XMM0 0x0020
40 #define RC_ST0 0x0040 /* only for long double */
41 #define RC_IRET RC_RAX /* function return: integer register */
42 #define RC_LRET RC_RDX /* function return: second integer register */
43 #define RC_FRET RC_XMM0 /* function return: float register */
45 /* pretty names for the registers */
46 enum {
47 TREG_RAX = 0,
48 TREG_RCX = 1,
49 TREG_RDX = 2,
50 TREG_XMM0 = 3,
51 TREG_ST0 = 4,
53 TREG_RSI = 6,
54 TREG_RDI = 7,
55 TREG_R8 = 8,
56 TREG_R9 = 9,
58 TREG_R10 = 10,
59 TREG_R11 = 11,
61 TREG_MEM = 0x10,
64 #define REX_BASE(reg) (((reg) >> 3) & 1)
65 #define REG_VALUE(reg) ((reg) & 7)
67 /* return registers for function */
68 #define REG_IRET TREG_RAX /* single word int return register */
69 #define REG_LRET TREG_RDX /* second word return register (for long long) */
70 #define REG_FRET TREG_XMM0 /* float return register */
72 /* defined if function parameters must be evaluated in reverse order */
73 #define INVERT_FUNC_PARAMS
75 /* pointer size, in bytes */
76 #define PTR_SIZE 8
78 /* long double size and alignment, in bytes */
79 #define LDOUBLE_SIZE 16
80 #define LDOUBLE_ALIGN 8
81 /* maximum alignment (for aligned attribute support) */
82 #define MAX_ALIGN 8
84 ST_FUNC void gen_opl(int op);
85 ST_FUNC void gen_le64(int64_t c);
87 /******************************************************/
88 /* ELF defines */
90 #define EM_TCC_TARGET EM_X86_64
92 /* relocation type for 32 bit data relocation */
93 #define R_DATA_32 R_X86_64_32
94 #define R_DATA_PTR R_X86_64_64
95 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
96 #define R_COPY R_X86_64_COPY
98 #define ELF_START_ADDR 0x08048000
99 #define ELF_PAGE_SIZE 0x1000
101 /******************************************************/
102 #else /* ! TARGET_DEFS_ONLY */
103 /******************************************************/
104 #include "tcc.h"
105 #include <assert.h>
107 ST_DATA const int reg_classes[NB_REGS] = {
108 /* eax */ RC_INT | RC_RAX,
109 /* ecx */ RC_INT | RC_RCX,
110 /* edx */ RC_INT | RC_RDX,
111 /* xmm0 */ RC_FLOAT | RC_XMM0,
112 /* st0 */ RC_ST0,
113 #if NB_REGS == 10
117 RC_INT | RC_R8,
118 RC_INT | RC_R9,
119 #endif
122 static unsigned long func_sub_sp_offset;
123 static int func_ret_sub;
125 /* XXX: make it faster ? */
126 void g(int c)
128 int ind1;
129 ind1 = ind + 1;
130 if (ind1 > cur_text_section->data_allocated)
131 section_realloc(cur_text_section, ind1);
132 cur_text_section->data[ind] = c;
133 ind = ind1;
136 void o(unsigned int c)
138 while (c) {
139 g(c);
140 c = c >> 8;
144 void gen_le16(int v)
146 g(v);
147 g(v >> 8);
150 void gen_le32(int c)
152 g(c);
153 g(c >> 8);
154 g(c >> 16);
155 g(c >> 24);
158 void gen_le64(int64_t c)
160 g(c);
161 g(c >> 8);
162 g(c >> 16);
163 g(c >> 24);
164 g(c >> 32);
165 g(c >> 40);
166 g(c >> 48);
167 g(c >> 56);
170 void orex(int ll, int r, int r2, int b)
172 if ((r & VT_VALMASK) >= VT_CONST)
173 r = 0;
174 if ((r2 & VT_VALMASK) >= VT_CONST)
175 r2 = 0;
176 if (ll || REX_BASE(r) || REX_BASE(r2))
177 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
178 o(b);
181 /* output a symbol and patch all calls to it */
182 void gsym_addr(int t, int a)
184 int n, *ptr;
185 while (t) {
186 ptr = (int *)(cur_text_section->data + t);
187 n = *ptr; /* next value */
188 *ptr = a - t - 4;
189 t = n;
193 void gsym(int t)
195 gsym_addr(t, ind);
198 /* psym is used to put an instruction with a data field which is a
199 reference to a symbol. It is in fact the same as oad ! */
200 #define psym oad
202 static int is64_type(int t)
204 return ((t & VT_BTYPE) == VT_PTR ||
205 (t & VT_BTYPE) == VT_FUNC ||
206 (t & VT_BTYPE) == VT_LLONG);
209 static int is_sse_float(int t) {
210 int bt;
211 bt = t & VT_BTYPE;
212 return bt == VT_DOUBLE || bt == VT_FLOAT;
216 /* instruction + 4 bytes data. Return the address of the data */
217 ST_FUNC int oad(int c, int s)
219 int ind1;
221 o(c);
222 ind1 = ind + 4;
223 if (ind1 > cur_text_section->data_allocated)
224 section_realloc(cur_text_section, ind1);
225 *(int *)(cur_text_section->data + ind) = s;
226 s = ind;
227 ind = ind1;
228 return s;
231 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
233 if (r & VT_SYM)
234 greloc(cur_text_section, sym, ind, R_X86_64_32);
235 gen_le32(c);
238 /* output constant with relocation if 'r & VT_SYM' is true */
239 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
241 if (r & VT_SYM)
242 greloc(cur_text_section, sym, ind, R_X86_64_64);
243 gen_le64(c);
246 /* output constant with relocation if 'r & VT_SYM' is true */
247 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
249 if (r & VT_SYM)
250 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
251 gen_le32(c-4);
254 /* output got address with relocation */
255 static void gen_gotpcrel(int r, Sym *sym, int c)
257 #ifndef TCC_TARGET_PE
258 Section *sr;
259 ElfW(Rela) *rel;
260 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
261 sr = cur_text_section->reloc;
262 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
263 rel->r_addend = -4;
264 #else
265 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
266 cur_text_section->data[ind-3],
267 cur_text_section->data[ind-2],
268 cur_text_section->data[ind-1]
270 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
271 #endif
272 gen_le32(0);
273 if (c) {
274 /* we use add c, %xxx for displacement */
275 orex(1, r, 0, 0x81);
276 o(0xc0 + REG_VALUE(r));
277 gen_le32(c);
281 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
283 op_reg = REG_VALUE(op_reg) << 3;
284 if ((r & VT_VALMASK) == VT_CONST) {
285 /* constant memory reference */
286 o(0x05 | op_reg);
287 if (is_got) {
288 gen_gotpcrel(r, sym, c);
289 } else {
290 gen_addrpc32(r, sym, c);
292 } else if ((r & VT_VALMASK) == VT_LOCAL) {
293 /* currently, we use only ebp as base */
294 if (c == (char)c) {
295 /* short reference */
296 o(0x45 | op_reg);
297 g(c);
298 } else {
299 oad(0x85 | op_reg, c);
301 } else if ((r & VT_VALMASK) >= TREG_MEM) {
302 if (c) {
303 g(0x80 | op_reg | REG_VALUE(r));
304 gen_le32(c);
305 } else {
306 g(0x00 | op_reg | REG_VALUE(r));
308 } else {
309 g(0x00 | op_reg | REG_VALUE(r));
313 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
314 opcode bits */
315 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
317 gen_modrm_impl(op_reg, r, sym, c, 0);
320 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
321 opcode bits */
322 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
324 int is_got;
325 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
326 orex(1, r, op_reg, opcode);
327 gen_modrm_impl(op_reg, r, sym, c, is_got);
331 /* load 'r' from value 'sv' */
332 void load(int r, SValue *sv)
334 int v, t, ft, fc, fr;
335 SValue v1;
337 #ifdef TCC_TARGET_PE
338 SValue v2;
339 sv = pe_getimport(sv, &v2);
340 #endif
342 fr = sv->r;
343 ft = sv->type.t;
344 fc = sv->c.ul;
346 #ifndef TCC_TARGET_PE
347 /* we use indirect access via got */
348 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
349 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
350 /* use the result register as a temporal register */
351 int tr = r | TREG_MEM;
352 if (is_float(ft)) {
353 /* we cannot use float registers as a temporal register */
354 tr = get_reg(RC_INT) | TREG_MEM;
356 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
358 /* load from the temporal register */
359 fr = tr | VT_LVAL;
361 #endif
363 v = fr & VT_VALMASK;
364 if (fr & VT_LVAL) {
365 int b, ll;
366 if (v == VT_LLOCAL) {
367 v1.type.t = VT_PTR;
368 v1.r = VT_LOCAL | VT_LVAL;
369 v1.c.ul = fc;
370 load(r, &v1);
371 fr = r;
373 ll = 0;
374 if ((ft & VT_BTYPE) == VT_FLOAT) {
375 b = 0x6e0f66, r = 0; /* movd */
376 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
377 b = 0x7e0ff3, r = 0; /* movq */
378 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
379 b = 0xdb, r = 5; /* fldt */
380 } else if ((ft & VT_TYPE) == VT_BYTE) {
381 b = 0xbe0f; /* movsbl */
382 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
383 b = 0xb60f; /* movzbl */
384 } else if ((ft & VT_TYPE) == VT_SHORT) {
385 b = 0xbf0f; /* movswl */
386 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
387 b = 0xb70f; /* movzwl */
388 } else {
389 ll = is64_type(ft);
390 b = 0x8b;
392 if (ll) {
393 gen_modrm64(b, r, fr, sv->sym, fc);
394 } else {
395 orex(ll, fr, r, b);
396 gen_modrm(r, fr, sv->sym, fc);
398 } else {
399 if (v == VT_CONST) {
400 if (fr & VT_SYM) {
401 #ifdef TCC_TARGET_PE
402 orex(1,0,r,0x8d);
403 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
404 gen_addrpc32(fr, sv->sym, fc);
405 #else
406 if (sv->sym->type.t & VT_STATIC) {
407 orex(1,0,r,0x8d);
408 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
409 gen_addrpc32(fr, sv->sym, fc);
410 } else {
411 orex(1,0,r,0x8b);
412 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
413 gen_gotpcrel(fr, sv->sym, fc);
415 #endif
416 } else if (is64_type(ft)) {
417 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
418 gen_le64(sv->c.ull);
419 } else {
420 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
421 gen_le32(fc);
423 } else if (v == VT_LOCAL) {
424 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
425 gen_modrm(r, VT_LOCAL, sv->sym, fc);
426 } else if (v == VT_CMP) {
427 orex(0,r,0,0);
428 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
429 orex(0,r,0, 0x0f); /* setxx %br */
430 o(fc);
431 o(0xc0 + REG_VALUE(r));
432 } else if (v == VT_JMP || v == VT_JMPI) {
433 t = v & 1;
434 orex(0,r,0,0);
435 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
436 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
437 gsym(fc);
438 orex(0,r,0,0);
439 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
440 } else if (v != r) {
441 if (r == TREG_XMM0) {
442 assert(v == TREG_ST0);
443 /* gen_cvt_ftof(VT_DOUBLE); */
444 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
445 /* movsd -0x10(%rsp),%xmm0 */
446 o(0x44100ff2);
447 o(0xf024);
448 } else if (r == TREG_ST0) {
449 assert(v == TREG_XMM0);
450 /* gen_cvt_ftof(VT_LDOUBLE); */
451 /* movsd %xmm0,-0x10(%rsp) */
452 o(0x44110ff2);
453 o(0xf024);
454 o(0xf02444dd); /* fldl -0x10(%rsp) */
455 } else {
456 orex(1,r,v, 0x89);
457 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
463 /* store register 'r' in lvalue 'v' */
464 void store(int r, SValue *v)
466 int fr, bt, ft, fc;
467 int op64 = 0;
468 /* store the REX prefix in this variable when PIC is enabled */
469 int pic = 0;
471 #ifdef TCC_TARGET_PE
472 SValue v2;
473 v = pe_getimport(v, &v2);
474 #endif
476 ft = v->type.t;
477 fc = v->c.ul;
478 fr = v->r & VT_VALMASK;
479 bt = ft & VT_BTYPE;
481 #ifndef TCC_TARGET_PE
482 /* we need to access the variable via got */
483 if (fr == VT_CONST && (v->r & VT_SYM)) {
484 /* mov xx(%rip), %r11 */
485 o(0x1d8b4c);
486 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
487 pic = is64_type(bt) ? 0x49 : 0x41;
489 #endif
491 /* XXX: incorrect if float reg to reg */
492 if (bt == VT_FLOAT) {
493 o(0x66);
494 o(pic);
495 o(0x7e0f); /* movd */
496 r = 0;
497 } else if (bt == VT_DOUBLE) {
498 o(0x66);
499 o(pic);
500 o(0xd60f); /* movq */
501 r = 0;
502 } else if (bt == VT_LDOUBLE) {
503 o(0xc0d9); /* fld %st(0) */
504 o(pic);
505 o(0xdb); /* fstpt */
506 r = 7;
507 } else {
508 if (bt == VT_SHORT)
509 o(0x66);
510 o(pic);
511 if (bt == VT_BYTE || bt == VT_BOOL)
512 orex(0, 0, r, 0x88);
513 else if (is64_type(bt))
514 op64 = 0x89;
515 else
516 orex(0, 0, r, 0x89);
518 if (pic) {
519 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
520 if (op64)
521 o(op64);
522 o(3 + (r << 3));
523 } else if (op64) {
524 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
525 gen_modrm64(op64, r, v->r, v->sym, fc);
526 } else if (fr != r) {
527 /* XXX: don't we really come here? */
528 abort();
529 o(0xc0 + fr + r * 8); /* mov r, fr */
531 } else {
532 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
533 gen_modrm(r, v->r, v->sym, fc);
534 } else if (fr != r) {
535 /* XXX: don't we really come here? */
536 abort();
537 o(0xc0 + fr + r * 8); /* mov r, fr */
542 /* 'is_jmp' is '1' if it is a jump */
543 static void gcall_or_jmp(int is_jmp)
545 int r;
546 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
547 /* constant case */
548 if (vtop->r & VT_SYM) {
549 /* relocation case */
550 greloc(cur_text_section, vtop->sym,
551 ind + 1, R_X86_64_PC32);
552 } else {
553 /* put an empty PC32 relocation */
554 put_elf_reloc(symtab_section, cur_text_section,
555 ind + 1, R_X86_64_PC32, 0);
557 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
558 } else {
559 /* otherwise, indirect call */
560 r = TREG_R11;
561 load(r, vtop);
562 o(0x41); /* REX */
563 o(0xff); /* call/jmp *r */
564 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
568 #ifdef TCC_TARGET_PE
570 #define REGN 4
571 static const uint8_t arg_regs[] = {
572 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
575 static int func_scratch;
577 /* Generate function call. The function address is pushed first, then
578 all the parameters in call order. This functions pops all the
579 parameters and the function address. */
581 void gen_offs_sp(int b, int r, int d)
583 orex(1,0,r & 0x100 ? 0 : r, b);
584 if (d == (char)d) {
585 o(0x2444 | (REG_VALUE(r) << 3));
586 g(d);
587 } else {
588 o(0x2484 | (REG_VALUE(r) << 3));
589 gen_le32(d);
593 void gfunc_call(int nb_args)
595 int size, align, r, args_size, i, d, j, bt;
596 int nb_reg_args, gen_reg;
598 /* calculate the number of integer/float arguments */
599 nb_reg_args = 0;
600 for(i = 0; i < nb_args; i++) {
601 bt = (vtop[-i].type.t & VT_BTYPE);
602 if (bt != VT_STRUCT && bt != VT_LDOUBLE)
603 nb_reg_args++;
606 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
608 /* for struct arguments, we need to call memcpy and the function
609 call breaks register passing arguments we are preparing.
610 So, we process arguments which will be passed by stack first. */
611 for(i = 0; i < nb_args; i++) {
612 SValue *sv = &vtop[-i];
613 bt = (sv->type.t & VT_BTYPE);
614 if (bt == VT_STRUCT) {
615 size = type_size(&sv->type, &align);
616 /* align to stack align size */
617 size = (size + 15) & ~15;
618 /* generate structure store */
619 r = get_reg(RC_INT);
620 gen_offs_sp(0x8d, r, args_size);
621 args_size += size;
623 /* generate memcpy call */
624 vset(&sv->type, r | VT_LVAL, 0);
625 vpushv(sv);
626 vstore();
627 --vtop;
629 } else if (bt == VT_LDOUBLE) {
631 gv(RC_ST0);
632 gen_offs_sp(0xdb, 0x107, args_size);
633 args_size += 16;
638 if (func_scratch < args_size)
639 func_scratch = args_size;
641 for (i = 0; i < REGN; ++i)
642 save_reg(arg_regs[i]);
644 gen_reg = nb_reg_args;
645 for(i = 0; i < nb_args; i++) {
646 bt = (vtop->type.t & VT_BTYPE);
647 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
648 ; /* done */
649 } else if (is_sse_float(vtop->type.t)) {
650 gv(RC_FLOAT); /* only one float register */
651 j = --gen_reg;
652 if (j >= REGN) {
653 /* movq %xmm0, j*8(%rsp) */
654 gen_offs_sp(0xd60f66, 0x100, j*8);
655 } else {
656 /* movaps %xmm0, %xmmN */
657 o(0x280f);
658 o(0xc0 + (j << 3));
659 d = arg_regs[j];
660 /* mov %xmm0, %rxx */
661 o(0x66);
662 orex(1,d,0, 0x7e0f);
663 o(0xc0 + REG_VALUE(d));
665 } else {
666 j = --gen_reg;
667 if (j >= REGN) {
668 r = gv(RC_INT);
669 gen_offs_sp(0x89, r, j*8);
670 } else {
671 d = arg_regs[j];
672 if (d < NB_REGS) {
673 gv(reg_classes[d] & ~RC_INT);
674 } else {
675 r = gv(RC_INT);
676 if (d != r) {
677 orex(1,d,r, 0x89);
678 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
684 vtop--;
686 save_regs(0);
687 gcall_or_jmp(0);
688 vtop--;
692 #define FUNC_PROLOG_SIZE 11
694 /* generate function prolog of type 't' */
695 void gfunc_prolog(CType *func_type)
697 int addr, align, size, reg_param_index, bt;
698 Sym *sym;
699 CType *type;
701 func_ret_sub = 0;
702 func_scratch = 0;
703 loc = 0;
705 addr = PTR_SIZE * 2;
706 ind += FUNC_PROLOG_SIZE;
707 func_sub_sp_offset = ind;
708 reg_param_index = 0;
710 sym = func_type->ref;
712 /* if the function returns a structure, then add an
713 implicit pointer parameter */
714 func_vt = sym->type;
715 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
716 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
717 reg_param_index++;
718 addr += PTR_SIZE;
721 /* define parameters */
722 while ((sym = sym->next) != NULL) {
723 type = &sym->type;
724 bt = type->t & VT_BTYPE;
725 if (bt == VT_STRUCT || bt == VT_LDOUBLE)
726 continue;
727 if (reg_param_index < REGN) {
728 /* save arguments passed by register */
729 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
731 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
732 reg_param_index++;
733 addr += PTR_SIZE;
736 while (reg_param_index < REGN) {
737 if (func_type->ref->c == FUNC_ELLIPSIS)
738 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
739 reg_param_index++;
740 addr += PTR_SIZE;
743 sym = func_type->ref;
744 while ((sym = sym->next) != NULL) {
745 type = &sym->type;
746 bt = type->t & VT_BTYPE;
747 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
748 size = type_size(type, &align);
749 size = (size + 15) & -16;
750 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
751 addr += size;
756 /* generate function epilog */
757 void gfunc_epilog(void)
759 int v, saved_ind;
761 o(0xc9); /* leave */
762 if (func_ret_sub == 0) {
763 o(0xc3); /* ret */
764 } else {
765 o(0xc2); /* ret n */
766 g(func_ret_sub);
767 g(func_ret_sub >> 8);
770 saved_ind = ind;
771 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
772 /* align local size to word & save local variables */
773 v = (func_scratch + -loc + 15) & -16;
775 if (v >= 4096) {
776 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
777 oad(0xb8, v); /* mov stacksize, %eax */
778 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
779 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
780 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
781 } else {
782 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
783 o(0xec8148); /* sub rsp, stacksize */
784 gen_le32(v);
787 cur_text_section->data_offset = saved_ind;
788 pe_add_unwind_data(ind, saved_ind, v);
789 ind = cur_text_section->data_offset;
792 #else
794 static void gadd_sp(int val)
796 if (val == (char)val) {
797 o(0xc48348);
798 g(val);
799 } else {
800 oad(0xc48148, val); /* add $xxx, %rsp */
804 #define REGN 6
805 static const uint8_t arg_regs[REGN] = {
806 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
809 /* Generate function call. The function address is pushed first, then
810 all the parameters in call order. This functions pops all the
811 parameters and the function address. */
812 void gfunc_call(int nb_args)
814 int size, align, r, args_size, i;
815 SValue *orig_vtop;
816 int nb_reg_args = 0;
817 int nb_sse_args = 0;
818 int sse_reg, gen_reg;
820 /* calculate the number of integer/float arguments */
821 args_size = 0;
822 for(i = 0; i < nb_args; i++) {
823 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
824 args_size += type_size(&vtop[-i].type, &align);
825 args_size = (args_size + 7) & ~7;
826 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
827 args_size += 16;
828 } else if (is_sse_float(vtop[-i].type.t)) {
829 nb_sse_args++;
830 if (nb_sse_args > 8) args_size += 8;
831 } else {
832 nb_reg_args++;
833 if (nb_reg_args > REGN) args_size += 8;
837 /* for struct arguments, we need to call memcpy and the function
838 call breaks register passing arguments we are preparing.
839 So, we process arguments which will be passed by stack first. */
840 orig_vtop = vtop;
841 gen_reg = nb_reg_args;
842 sse_reg = nb_sse_args;
844 /* adjust stack to align SSE boundary */
845 if (args_size &= 15) {
846 /* fetch cpu flag before the following sub will change the value */
847 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
848 gv(RC_INT);
850 args_size = 16 - args_size;
851 o(0x48);
852 oad(0xec81, args_size); /* sub $xxx, %rsp */
855 for(i = 0; i < nb_args; i++) {
856 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
857 size = type_size(&vtop->type, &align);
858 /* align to stack align size */
859 size = (size + 7) & ~7;
860 /* allocate the necessary size on stack */
861 o(0x48);
862 oad(0xec81, size); /* sub $xxx, %rsp */
863 /* generate structure store */
864 r = get_reg(RC_INT);
865 orex(1, r, 0, 0x89); /* mov %rsp, r */
866 o(0xe0 + REG_VALUE(r));
868 /* following code breaks vtop[1], vtop[2], and vtop[3] */
869 SValue tmp1 = vtop[1];
870 SValue tmp2 = vtop[2];
871 SValue tmp3 = vtop[3];
872 vset(&vtop->type, r | VT_LVAL, 0);
873 vswap();
874 vstore();
875 vtop[1] = tmp1;
876 vtop[2] = tmp2;
877 vtop[3] = tmp3;
879 args_size += size;
880 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
881 gv(RC_ST0);
882 size = LDOUBLE_SIZE;
883 oad(0xec8148, size); /* sub $xxx, %rsp */
884 o(0x7cdb); /* fstpt 0(%rsp) */
885 g(0x24);
886 g(0x00);
887 args_size += size;
888 } else if (is_sse_float(vtop->type.t)) {
889 int j = --sse_reg;
890 if (j >= 8) {
891 gv(RC_FLOAT);
892 o(0x50); /* push $rax */
893 /* movq %xmm0, (%rsp) */
894 o(0x04d60f66);
895 o(0x24);
896 args_size += 8;
898 } else {
899 int j = --gen_reg;
900 /* simple type */
901 /* XXX: implicit cast ? */
902 if (j >= REGN) {
903 r = gv(RC_INT);
904 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
905 args_size += 8;
908 vtop--;
910 vtop = orig_vtop;
912 save_regs(0); /* save used temporary registers */
914 /* then, we prepare register passing arguments.
915 Note that we cannot set RDX and RCX in this loop because gv()
916 may break these temporary registers. Let's use R10 and R11
917 instead of them */
918 gen_reg = nb_reg_args;
919 sse_reg = nb_sse_args;
920 for(i = 0; i < nb_args; i++) {
921 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
922 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
923 } else if (is_sse_float(vtop->type.t)) {
924 int j = --sse_reg;
925 if (j < 8) {
926 gv(RC_FLOAT); /* only one float register */
927 /* movaps %xmm0, %xmmN */
928 o(0x280f);
929 o(0xc0 + (sse_reg << 3));
931 } else {
932 int j = --gen_reg;
933 /* simple type */
934 /* XXX: implicit cast ? */
935 if (j < REGN) {
936 int d = arg_regs[j];
937 r = gv(RC_INT);
938 if (j == 2 || j == 3)
939 /* j=2: r10, j=3: r11 */
940 d = j + 8;
941 orex(1,d,r,0x89); /* mov */
942 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
945 vtop--;
948 /* Copy R10 and R11 into RDX and RCX, respectively */
949 if (nb_reg_args > 2) {
950 o(0xd2894c); /* mov %r10, %rdx */
951 if (nb_reg_args > 3) {
952 o(0xd9894c); /* mov %r11, %rcx */
956 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
957 gcall_or_jmp(0);
958 if (args_size)
959 gadd_sp(args_size);
960 vtop--;
964 #define FUNC_PROLOG_SIZE 11
966 static void push_arg_reg(int i) {
967 loc -= 8;
968 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
971 /* generate function prolog of type 't' */
972 void gfunc_prolog(CType *func_type)
974 int i, addr, align, size;
975 int param_index, param_addr, reg_param_index, sse_param_index;
976 Sym *sym;
977 CType *type;
979 sym = func_type->ref;
980 addr = PTR_SIZE * 2;
981 loc = 0;
982 ind += FUNC_PROLOG_SIZE;
983 func_sub_sp_offset = ind;
984 func_ret_sub = 0;
986 if (func_type->ref->c == FUNC_ELLIPSIS) {
987 int seen_reg_num, seen_sse_num, seen_stack_size;
988 seen_reg_num = seen_sse_num = 0;
989 /* frame pointer and return address */
990 seen_stack_size = PTR_SIZE * 2;
991 /* count the number of seen parameters */
992 sym = func_type->ref;
993 while ((sym = sym->next) != NULL) {
994 type = &sym->type;
995 if (is_sse_float(type->t)) {
996 if (seen_sse_num < 8) {
997 seen_sse_num++;
998 } else {
999 seen_stack_size += 8;
1001 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
1002 size = type_size(type, &align);
1003 size = (size + 7) & ~7;
1004 seen_stack_size += size;
1005 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
1006 seen_stack_size += LDOUBLE_SIZE;
1007 } else {
1008 if (seen_reg_num < REGN) {
1009 seen_reg_num++;
1010 } else {
1011 seen_stack_size += 8;
1016 loc -= 16;
1017 /* movl $0x????????, -0x10(%rbp) */
1018 o(0xf045c7);
1019 gen_le32(seen_reg_num * 8);
1020 /* movl $0x????????, -0xc(%rbp) */
1021 o(0xf445c7);
1022 gen_le32(seen_sse_num * 16 + 48);
1023 /* movl $0x????????, -0x8(%rbp) */
1024 o(0xf845c7);
1025 gen_le32(seen_stack_size);
1027 /* save all register passing arguments */
1028 for (i = 0; i < 8; i++) {
1029 loc -= 16;
1030 o(0xd60f66); /* movq */
1031 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1032 /* movq $0, loc+8(%rbp) */
1033 o(0x85c748);
1034 gen_le32(loc + 8);
1035 gen_le32(0);
1037 for (i = 0; i < REGN; i++) {
1038 push_arg_reg(REGN-1-i);
1042 sym = func_type->ref;
1043 param_index = 0;
1044 reg_param_index = 0;
1045 sse_param_index = 0;
1047 /* if the function returns a structure, then add an
1048 implicit pointer parameter */
1049 func_vt = sym->type;
1050 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
1051 push_arg_reg(reg_param_index);
1052 param_addr = loc;
1054 func_vc = loc;
1055 param_index++;
1056 reg_param_index++;
1058 /* define parameters */
1059 while ((sym = sym->next) != NULL) {
1060 type = &sym->type;
1061 size = type_size(type, &align);
1062 size = (size + 7) & ~7;
1063 if (is_sse_float(type->t)) {
1064 if (sse_param_index < 8) {
1065 /* save arguments passed by register */
1066 loc -= 8;
1067 o(0xd60f66); /* movq */
1068 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
1069 param_addr = loc;
1070 } else {
1071 param_addr = addr;
1072 addr += size;
1074 sse_param_index++;
1076 } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
1077 (type->t & VT_BTYPE) == VT_LDOUBLE) {
1078 param_addr = addr;
1079 addr += size;
1080 } else {
1081 if (reg_param_index < REGN) {
1082 /* save arguments passed by register */
1083 push_arg_reg(reg_param_index);
1084 param_addr = loc;
1085 } else {
1086 param_addr = addr;
1087 addr += 8;
1089 reg_param_index++;
1091 sym_push(sym->v & ~SYM_FIELD, type,
1092 VT_LOCAL | VT_LVAL, param_addr);
1093 param_index++;
1097 /* generate function epilog */
1098 void gfunc_epilog(void)
1100 int v, saved_ind;
1102 o(0xc9); /* leave */
1103 if (func_ret_sub == 0) {
1104 o(0xc3); /* ret */
1105 } else {
1106 o(0xc2); /* ret n */
1107 g(func_ret_sub);
1108 g(func_ret_sub >> 8);
1110 /* align local size to word & save local variables */
1111 v = (-loc + 15) & -16;
1112 saved_ind = ind;
1113 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1114 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1115 o(0xec8148); /* sub rsp, stacksize */
1116 gen_le32(v);
1117 ind = saved_ind;
1120 #endif /* not PE */
1122 /* generate a jump to a label */
1123 int gjmp(int t)
1125 return psym(0xe9, t);
1128 /* generate a jump to a fixed address */
1129 void gjmp_addr(int a)
1131 int r;
1132 r = a - ind - 2;
1133 if (r == (char)r) {
1134 g(0xeb);
1135 g(r);
1136 } else {
1137 oad(0xe9, a - ind - 5);
1141 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1142 int gtst(int inv, int t)
1144 int v, *p;
1146 v = vtop->r & VT_VALMASK;
1147 if (v == VT_CMP) {
1148 /* fast case : can jump directly since flags are set */
1149 g(0x0f);
1150 t = psym((vtop->c.i - 16) ^ inv, t);
1151 } else if (v == VT_JMP || v == VT_JMPI) {
1152 /* && or || optimization */
1153 if ((v & 1) == inv) {
1154 /* insert vtop->c jump list in t */
1155 p = &vtop->c.i;
1156 while (*p != 0)
1157 p = (int *)(cur_text_section->data + *p);
1158 *p = t;
1159 t = vtop->c.i;
1160 } else {
1161 t = gjmp(t);
1162 gsym(vtop->c.i);
1164 } else {
1165 if (is_float(vtop->type.t) ||
1166 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1167 vpushi(0);
1168 gen_op(TOK_NE);
1170 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1171 /* constant jmp optimization */
1172 if ((vtop->c.i != 0) != inv)
1173 t = gjmp(t);
1174 } else {
1175 v = gv(RC_INT);
1176 orex(0,v,v,0x85);
1177 o(0xc0 + REG_VALUE(v) * 9);
1178 g(0x0f);
1179 t = psym(0x85 ^ inv, t);
1182 vtop--;
1183 return t;
1186 /* generate an integer binary operation */
1187 void gen_opi(int op)
1189 int r, fr, opc, c;
1190 int ll, uu, cc;
1192 ll = is64_type(vtop[-1].type.t);
1193 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1194 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1196 switch(op) {
1197 case '+':
1198 case TOK_ADDC1: /* add with carry generation */
1199 opc = 0;
1200 gen_op8:
1201 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1202 /* constant case */
1203 vswap();
1204 r = gv(RC_INT);
1205 vswap();
1206 c = vtop->c.i;
1207 if (c == (char)c) {
1208 /* XXX: generate inc and dec for smaller code ? */
1209 orex(ll, r, 0, 0x83);
1210 o(0xc0 | (opc << 3) | REG_VALUE(r));
1211 g(c);
1212 } else {
1213 orex(ll, r, 0, 0x81);
1214 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1216 } else {
1217 gv2(RC_INT, RC_INT);
1218 r = vtop[-1].r;
1219 fr = vtop[0].r;
1220 orex(ll, r, fr, (opc << 3) | 0x01);
1221 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1223 vtop--;
1224 if (op >= TOK_ULT && op <= TOK_GT) {
1225 vtop->r = VT_CMP;
1226 vtop->c.i = op;
1228 break;
1229 case '-':
1230 case TOK_SUBC1: /* sub with carry generation */
1231 opc = 5;
1232 goto gen_op8;
1233 case TOK_ADDC2: /* add with carry use */
1234 opc = 2;
1235 goto gen_op8;
1236 case TOK_SUBC2: /* sub with carry use */
1237 opc = 3;
1238 goto gen_op8;
1239 case '&':
1240 opc = 4;
1241 goto gen_op8;
1242 case '^':
1243 opc = 6;
1244 goto gen_op8;
1245 case '|':
1246 opc = 1;
1247 goto gen_op8;
1248 case '*':
1249 gv2(RC_INT, RC_INT);
1250 r = vtop[-1].r;
1251 fr = vtop[0].r;
1252 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1253 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1254 vtop--;
1255 break;
1256 case TOK_SHL:
1257 opc = 4;
1258 goto gen_shift;
1259 case TOK_SHR:
1260 opc = 5;
1261 goto gen_shift;
1262 case TOK_SAR:
1263 opc = 7;
1264 gen_shift:
1265 opc = 0xc0 | (opc << 3);
1266 if (cc) {
1267 /* constant case */
1268 vswap();
1269 r = gv(RC_INT);
1270 vswap();
1271 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1272 o(opc | REG_VALUE(r));
1273 g(vtop->c.i & (ll ? 63 : 31));
1274 } else {
1275 /* we generate the shift in ecx */
1276 gv2(RC_INT, RC_RCX);
1277 r = vtop[-1].r;
1278 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1279 o(opc | REG_VALUE(r));
1281 vtop--;
1282 break;
1283 case TOK_UDIV:
1284 case TOK_UMOD:
1285 uu = 1;
1286 goto divmod;
1287 case '/':
1288 case '%':
1289 case TOK_PDIV:
1290 uu = 0;
1291 divmod:
1292 /* first operand must be in eax */
1293 /* XXX: need better constraint for second operand */
1294 gv2(RC_RAX, RC_RCX);
1295 r = vtop[-1].r;
1296 fr = vtop[0].r;
1297 vtop--;
1298 save_reg(TREG_RDX);
1299 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1300 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1301 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1302 if (op == '%' || op == TOK_UMOD)
1303 r = TREG_RDX;
1304 else
1305 r = TREG_RAX;
1306 vtop->r = r;
1307 break;
1308 default:
1309 opc = 7;
1310 goto gen_op8;
1314 void gen_opl(int op)
1316 gen_opi(op);
1319 /* generate a floating point operation 'v = t1 op t2' instruction. The
1320 two operands are guaranted to have the same floating point type */
1321 /* XXX: need to use ST1 too */
1322 void gen_opf(int op)
1324 int a, ft, fc, swapped, r;
1325 int float_type =
1326 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1328 /* convert constants to memory references */
1329 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1330 vswap();
1331 gv(float_type);
1332 vswap();
1334 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1335 gv(float_type);
1337 /* must put at least one value in the floating point register */
1338 if ((vtop[-1].r & VT_LVAL) &&
1339 (vtop[0].r & VT_LVAL)) {
1340 vswap();
1341 gv(float_type);
1342 vswap();
1344 swapped = 0;
1345 /* swap the stack if needed so that t1 is the register and t2 is
1346 the memory reference */
1347 if (vtop[-1].r & VT_LVAL) {
1348 vswap();
1349 swapped = 1;
1351 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1352 if (op >= TOK_ULT && op <= TOK_GT) {
1353 /* load on stack second operand */
1354 load(TREG_ST0, vtop);
1355 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1356 if (op == TOK_GE || op == TOK_GT)
1357 swapped = !swapped;
1358 else if (op == TOK_EQ || op == TOK_NE)
1359 swapped = 0;
1360 if (swapped)
1361 o(0xc9d9); /* fxch %st(1) */
1362 o(0xe9da); /* fucompp */
1363 o(0xe0df); /* fnstsw %ax */
1364 if (op == TOK_EQ) {
1365 o(0x45e480); /* and $0x45, %ah */
1366 o(0x40fC80); /* cmp $0x40, %ah */
1367 } else if (op == TOK_NE) {
1368 o(0x45e480); /* and $0x45, %ah */
1369 o(0x40f480); /* xor $0x40, %ah */
1370 op = TOK_NE;
1371 } else if (op == TOK_GE || op == TOK_LE) {
1372 o(0x05c4f6); /* test $0x05, %ah */
1373 op = TOK_EQ;
1374 } else {
1375 o(0x45c4f6); /* test $0x45, %ah */
1376 op = TOK_EQ;
1378 vtop--;
1379 vtop->r = VT_CMP;
1380 vtop->c.i = op;
1381 } else {
1382 /* no memory reference possible for long double operations */
1383 load(TREG_ST0, vtop);
1384 swapped = !swapped;
1386 switch(op) {
1387 default:
1388 case '+':
1389 a = 0;
1390 break;
1391 case '-':
1392 a = 4;
1393 if (swapped)
1394 a++;
1395 break;
1396 case '*':
1397 a = 1;
1398 break;
1399 case '/':
1400 a = 6;
1401 if (swapped)
1402 a++;
1403 break;
1405 ft = vtop->type.t;
1406 fc = vtop->c.ul;
1407 o(0xde); /* fxxxp %st, %st(1) */
1408 o(0xc1 + (a << 3));
1409 vtop--;
1411 } else {
1412 if (op >= TOK_ULT && op <= TOK_GT) {
1413 /* if saved lvalue, then we must reload it */
1414 r = vtop->r;
1415 fc = vtop->c.ul;
1416 if ((r & VT_VALMASK) == VT_LLOCAL) {
1417 SValue v1;
1418 r = get_reg(RC_INT);
1419 v1.type.t = VT_INT;
1420 v1.r = VT_LOCAL | VT_LVAL;
1421 v1.c.ul = fc;
1422 load(r, &v1);
1423 fc = 0;
1426 if (op == TOK_EQ || op == TOK_NE) {
1427 swapped = 0;
1428 } else {
1429 if (op == TOK_LE || op == TOK_LT)
1430 swapped = !swapped;
1431 if (op == TOK_LE || op == TOK_GE) {
1432 op = 0x93; /* setae */
1433 } else {
1434 op = 0x97; /* seta */
1438 if (swapped) {
1439 o(0x7e0ff3); /* movq */
1440 gen_modrm(1, r, vtop->sym, fc);
1442 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1443 o(0x66);
1445 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1446 o(0xc8);
1447 } else {
1448 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1449 o(0x66);
1451 o(0x2e0f); /* ucomisd */
1452 gen_modrm(0, r, vtop->sym, fc);
1455 vtop--;
1456 vtop->r = VT_CMP;
1457 vtop->c.i = op;
1458 } else {
1459 /* no memory reference possible for long double operations */
1460 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1461 load(TREG_XMM0, vtop);
1462 swapped = !swapped;
1464 switch(op) {
1465 default:
1466 case '+':
1467 a = 0;
1468 break;
1469 case '-':
1470 a = 4;
1471 break;
1472 case '*':
1473 a = 1;
1474 break;
1475 case '/':
1476 a = 6;
1477 break;
1479 ft = vtop->type.t;
1480 fc = vtop->c.ul;
1481 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1482 o(0xde); /* fxxxp %st, %st(1) */
1483 o(0xc1 + (a << 3));
1484 } else {
1485 /* if saved lvalue, then we must reload it */
1486 r = vtop->r;
1487 if ((r & VT_VALMASK) == VT_LLOCAL) {
1488 SValue v1;
1489 r = get_reg(RC_INT);
1490 v1.type.t = VT_INT;
1491 v1.r = VT_LOCAL | VT_LVAL;
1492 v1.c.ul = fc;
1493 load(r, &v1);
1494 fc = 0;
1496 if (swapped) {
1497 /* movq %xmm0,%xmm1 */
1498 o(0x7e0ff3);
1499 o(0xc8);
1500 load(TREG_XMM0, vtop);
1501 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1502 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1503 o(0xf2);
1504 } else {
1505 o(0xf3);
1507 o(0x0f);
1508 o(0x58 + a);
1509 o(0xc1);
1510 } else {
1511 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1512 o(0xf2);
1513 } else {
1514 o(0xf3);
1516 o(0x0f);
1517 o(0x58 + a);
1518 gen_modrm(0, r, vtop->sym, fc);
1521 vtop--;
1526 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1527 and 'long long' cases. */
1528 void gen_cvt_itof(int t)
1530 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1531 save_reg(TREG_ST0);
1532 gv(RC_INT);
1533 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1534 /* signed long long to float/double/long double (unsigned case
1535 is handled generically) */
1536 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1537 o(0x242cdf); /* fildll (%rsp) */
1538 o(0x08c48348); /* add $8, %rsp */
1539 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1540 (VT_INT | VT_UNSIGNED)) {
1541 /* unsigned int to float/double/long double */
1542 o(0x6a); /* push $0 */
1543 g(0x00);
1544 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1545 o(0x242cdf); /* fildll (%rsp) */
1546 o(0x10c48348); /* add $16, %rsp */
1547 } else {
1548 /* int to float/double/long double */
1549 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1550 o(0x2404db); /* fildl (%rsp) */
1551 o(0x08c48348); /* add $8, %rsp */
1553 vtop->r = TREG_ST0;
1554 } else {
1555 save_reg(TREG_XMM0);
1556 gv(RC_INT);
1557 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1558 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1559 (VT_INT | VT_UNSIGNED) ||
1560 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1561 o(0x48); /* REX */
1563 o(0x2a0f);
1564 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1565 vtop->r = TREG_XMM0;
1569 /* convert from one floating point type to another */
1570 void gen_cvt_ftof(int t)
1572 int ft, bt, tbt;
1574 ft = vtop->type.t;
1575 bt = ft & VT_BTYPE;
1576 tbt = t & VT_BTYPE;
1578 if (bt == VT_FLOAT) {
1579 gv(RC_FLOAT);
1580 if (tbt == VT_DOUBLE) {
1581 o(0xc0140f); /* unpcklps */
1582 o(0xc05a0f); /* cvtps2pd */
1583 } else if (tbt == VT_LDOUBLE) {
1584 /* movss %xmm0,-0x10(%rsp) */
1585 o(0x44110ff3);
1586 o(0xf024);
1587 o(0xf02444d9); /* flds -0x10(%rsp) */
1588 vtop->r = TREG_ST0;
1590 } else if (bt == VT_DOUBLE) {
1591 gv(RC_FLOAT);
1592 if (tbt == VT_FLOAT) {
1593 o(0xc0140f66); /* unpcklpd */
1594 o(0xc05a0f66); /* cvtpd2ps */
1595 } else if (tbt == VT_LDOUBLE) {
1596 /* movsd %xmm0,-0x10(%rsp) */
1597 o(0x44110ff2);
1598 o(0xf024);
1599 o(0xf02444dd); /* fldl -0x10(%rsp) */
1600 vtop->r = TREG_ST0;
1602 } else {
1603 gv(RC_ST0);
1604 if (tbt == VT_DOUBLE) {
1605 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1606 /* movsd -0x10(%rsp),%xmm0 */
1607 o(0x44100ff2);
1608 o(0xf024);
1609 vtop->r = TREG_XMM0;
1610 } else if (tbt == VT_FLOAT) {
1611 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1612 /* movss -0x10(%rsp),%xmm0 */
1613 o(0x44100ff3);
1614 o(0xf024);
1615 vtop->r = TREG_XMM0;
1620 /* convert fp to int 't' type */
1621 void gen_cvt_ftoi(int t)
1623 int ft, bt, size, r;
1624 ft = vtop->type.t;
1625 bt = ft & VT_BTYPE;
1626 if (bt == VT_LDOUBLE) {
1627 gen_cvt_ftof(VT_DOUBLE);
1628 bt = VT_DOUBLE;
1631 gv(RC_FLOAT);
1632 if (t != VT_INT)
1633 size = 8;
1634 else
1635 size = 4;
1637 r = get_reg(RC_INT);
1638 if (bt == VT_FLOAT) {
1639 o(0xf3);
1640 } else if (bt == VT_DOUBLE) {
1641 o(0xf2);
1642 } else {
1643 assert(0);
1645 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1646 o(0xc0 + (REG_VALUE(r) << 3));
1647 vtop->r = r;
1650 /* computed goto support */
1651 void ggoto(void)
1653 gcall_or_jmp(1);
1654 vtop--;
1657 /* end of x86-64 code generator */
1658 /*************************************************************/
1659 #endif /* ! TARGET_DEFS_ONLY */
1660 /******************************************************/