win64: add tiny unwind data for setjmp/longjmp
[tinycc.git] / x86_64-gen.c
blob2e7c8bebbe8a438504c2df11272066425cafcc81
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 5
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_XMM0 0x0020
38 #define RC_ST0 0x0040 /* only for long double */
39 #define RC_IRET RC_RAX /* function return: integer register */
40 #define RC_LRET RC_RDX /* function return: second integer register */
41 #define RC_FRET RC_XMM0 /* function return: float register */
43 /* pretty names for the registers */
44 enum {
45 TREG_RAX = 0,
46 TREG_RCX = 1,
47 TREG_RDX = 2,
48 TREG_RSI = 6,
49 TREG_RDI = 7,
50 TREG_R8 = 8,
51 TREG_R9 = 9,
52 TREG_R10 = 10,
53 TREG_R11 = 11,
55 TREG_XMM0 = 3,
56 TREG_ST0 = 4,
58 TREG_MEM = 0x10,
61 #define REX_BASE(reg) (((reg) >> 3) & 1)
62 #define REG_VALUE(reg) ((reg) & 7)
64 /* return registers for function */
65 #define REG_IRET TREG_RAX /* single word int return register */
66 #define REG_LRET TREG_RDX /* second word return register (for long long) */
67 #define REG_FRET TREG_XMM0 /* float return register */
69 /* defined if function parameters must be evaluated in reverse order */
70 #define INVERT_FUNC_PARAMS
72 /* pointer size, in bytes */
73 #define PTR_SIZE 8
75 /* long double size and alignment, in bytes */
76 #define LDOUBLE_SIZE 16
77 #define LDOUBLE_ALIGN 8
78 /* maximum alignment (for aligned attribute support) */
79 #define MAX_ALIGN 8
81 ST_FUNC void gen_opl(int op);
82 ST_FUNC void gen_le64(int64_t c);
84 /******************************************************/
85 /* ELF defines */
87 #define EM_TCC_TARGET EM_X86_64
89 /* relocation type for 32 bit data relocation */
90 #define R_DATA_32 R_X86_64_32
91 #define R_DATA_PTR R_X86_64_64
92 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
93 #define R_COPY R_X86_64_COPY
95 #define ELF_START_ADDR 0x08048000
96 #define ELF_PAGE_SIZE 0x1000
98 /******************************************************/
99 #else /* ! TARGET_DEFS_ONLY */
100 /******************************************************/
101 #include "tcc.h"
102 #include <assert.h>
104 ST_DATA const int reg_classes[NB_REGS] = {
105 /* eax */ RC_INT | RC_RAX,
106 /* ecx */ RC_INT | RC_RCX,
107 /* edx */ RC_INT | RC_RDX,
108 /* xmm0 */ RC_FLOAT | RC_XMM0,
109 /* st0 */ RC_ST0,
112 static unsigned long func_sub_sp_offset;
113 static int func_ret_sub;
115 /* XXX: make it faster ? */
116 void g(int c)
118 int ind1;
119 ind1 = ind + 1;
120 if (ind1 > cur_text_section->data_allocated)
121 section_realloc(cur_text_section, ind1);
122 cur_text_section->data[ind] = c;
123 ind = ind1;
126 void o(unsigned int c)
128 while (c) {
129 g(c);
130 c = c >> 8;
134 void gen_le16(int v)
136 g(v);
137 g(v >> 8);
140 void gen_le32(int c)
142 g(c);
143 g(c >> 8);
144 g(c >> 16);
145 g(c >> 24);
148 void gen_le64(int64_t c)
150 g(c);
151 g(c >> 8);
152 g(c >> 16);
153 g(c >> 24);
154 g(c >> 32);
155 g(c >> 40);
156 g(c >> 48);
157 g(c >> 56);
160 /* output a symbol and patch all calls to it */
161 void gsym_addr(int t, int a)
163 int n, *ptr;
164 while (t) {
165 ptr = (int *)(cur_text_section->data + t);
166 n = *ptr; /* next value */
167 *ptr = a - t - 4;
168 t = n;
172 void gsym(int t)
174 gsym_addr(t, ind);
177 /* psym is used to put an instruction with a data field which is a
178 reference to a symbol. It is in fact the same as oad ! */
179 #define psym oad
181 static int is64_type(int t)
183 return ((t & VT_BTYPE) == VT_PTR ||
184 (t & VT_BTYPE) == VT_FUNC ||
185 (t & VT_BTYPE) == VT_LLONG);
188 static int is_sse_float(int t) {
189 int bt;
190 bt = t & VT_BTYPE;
191 return bt == VT_DOUBLE || bt == VT_FLOAT;
194 /* instruction + 4 bytes data. Return the address of the data */
195 ST_FUNC int oad(int c, int s)
197 int ind1;
199 o(c);
200 ind1 = ind + 4;
201 if (ind1 > cur_text_section->data_allocated)
202 section_realloc(cur_text_section, ind1);
203 *(int *)(cur_text_section->data + ind) = s;
204 s = ind;
205 ind = ind1;
206 return s;
209 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
211 if (r & VT_SYM)
212 greloc(cur_text_section, sym, ind, R_X86_64_32);
213 gen_le32(c);
216 /* output constant with relocation if 'r & VT_SYM' is true */
217 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
219 if (r & VT_SYM)
220 greloc(cur_text_section, sym, ind, R_X86_64_64);
221 gen_le64(c);
224 /* output constant with relocation if 'r & VT_SYM' is true */
225 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
227 if (r & VT_SYM)
228 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
229 gen_le32(c-4);
232 /* output got address with relocation */
233 static void gen_gotpcrel(int r, Sym *sym, int c)
235 #ifndef TCC_TARGET_PE
236 Section *sr;
237 ElfW(Rela) *rel;
238 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
239 sr = cur_text_section->reloc;
240 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
241 rel->r_addend = -4;
242 #else
243 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
244 cur_text_section->data[ind-3],
245 cur_text_section->data[ind-2],
246 cur_text_section->data[ind-1]
248 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
249 #endif
250 gen_le32(0);
252 if (c) {
253 /* we use add c, %xxx for displacement */
254 o(0x48 + REX_BASE(r));
255 o(0x81);
256 o(0xc0 + REG_VALUE(r));
257 gen_le32(c);
261 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
263 op_reg = REG_VALUE(op_reg) << 3;
264 if ((r & VT_VALMASK) == VT_CONST) {
265 /* constant memory reference */
266 o(0x05 | op_reg);
267 if (is_got) {
268 gen_gotpcrel(r, sym, c);
269 } else {
270 gen_addrpc32(r, sym, c);
272 } else if ((r & VT_VALMASK) == VT_LOCAL) {
273 /* currently, we use only ebp as base */
274 if (c == (char)c) {
275 /* short reference */
276 o(0x45 | op_reg);
277 g(c);
278 } else {
279 oad(0x85 | op_reg, c);
281 } else if ((r & VT_VALMASK) >= TREG_MEM) {
282 if (c) {
283 g(0x80 | op_reg | REG_VALUE(r));
284 gen_le32(c);
285 } else {
286 g(0x00 | op_reg | REG_VALUE(r));
288 } else {
289 g(0x00 | op_reg | (r & VT_VALMASK));
293 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
294 opcode bits */
295 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
297 gen_modrm_impl(op_reg, r, sym, c, 0);
300 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
301 opcode bits */
302 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
304 int is_got;
305 int rex = 0x48 | (REX_BASE(op_reg) << 2);
306 if ((r & VT_VALMASK) != VT_CONST &&
307 (r & VT_VALMASK) != VT_LOCAL) {
308 rex |= REX_BASE(VT_VALMASK & r);
310 o(rex);
311 o(opcode);
312 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
313 gen_modrm_impl(op_reg, r, sym, c, is_got);
317 /* load 'r' from value 'sv' */
318 void load(int r, SValue *sv)
320 int v, t, ft, fc, fr;
321 SValue v1;
323 #ifdef TCC_TARGET_PE
324 if (pe_dllimport(r, sv, load))
325 return;
326 #endif
328 fr = sv->r;
329 ft = sv->type.t;
330 fc = sv->c.ul;
332 #ifndef TCC_TARGET_PE
333 /* we use indirect access via got */
334 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
335 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
336 /* use the result register as a temporal register */
337 int tr = r | TREG_MEM;
338 if (is_float(ft)) {
339 /* we cannot use float registers as a temporal register */
340 tr = get_reg(RC_INT) | TREG_MEM;
342 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
344 /* load from the temporal register */
345 fr = tr | VT_LVAL;
347 #endif
349 v = fr & VT_VALMASK;
350 if (fr & VT_LVAL) {
351 if (v == VT_LLOCAL) {
352 v1.type.t = VT_PTR;
353 v1.r = VT_LOCAL | VT_LVAL;
354 v1.c.ul = fc;
355 load(r, &v1);
356 fr = r;
358 if ((ft & VT_BTYPE) == VT_FLOAT) {
359 o(0x6e0f66); /* movd */
360 r = 0;
361 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
362 o(0x7e0ff3); /* movq */
363 r = 0;
364 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
365 o(0xdb); /* fldt */
366 r = 5;
367 } else if ((ft & VT_TYPE) == VT_BYTE) {
368 o(0xbe0f); /* movsbl */
369 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
370 o(0xb60f); /* movzbl */
371 } else if ((ft & VT_TYPE) == VT_SHORT) {
372 o(0xbf0f); /* movswl */
373 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
374 o(0xb70f); /* movzwl */
375 } else if (is64_type(ft)) {
376 gen_modrm64(0x8b, r, fr, sv->sym, fc);
377 return;
378 } else {
379 o(0x8b); /* movl */
381 gen_modrm(r, fr, sv->sym, fc);
382 } else {
383 if (v == VT_CONST) {
384 if (fr & VT_SYM) {
385 #ifdef TCC_TARGET_PE
386 o(0x8d48);
387 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
388 gen_addrpc32(fr, sv->sym, fc);
389 #else
390 if (sv->sym->type.t & VT_STATIC) {
391 o(0x8d48);
392 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
393 gen_addrpc32(fr, sv->sym, fc);
394 } else {
395 o(0x8b48);
396 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
397 gen_gotpcrel(r, sv->sym, fc);
399 #endif
400 } else if (is64_type(ft)) {
401 o(0x48);
402 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
403 gen_le64(sv->c.ull);
404 } else {
405 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
406 gen_le32(fc);
408 } else if (v == VT_LOCAL) {
409 o(0x48 | REX_BASE(r));
410 o(0x8d); /* lea xxx(%ebp), r */
411 gen_modrm(r, VT_LOCAL, sv->sym, fc);
412 } else if (v == VT_CMP) {
413 oad(0xb8 + r, 0); /* mov $0, r */
414 o(0x0f); /* setxx %br */
415 o(fc);
416 o(0xc0 + r);
417 } else if (v == VT_JMP || v == VT_JMPI) {
418 t = v & 1;
419 oad(0xb8 + r, t); /* mov $1, r */
420 o(0x05eb); /* jmp after */
421 gsym(fc);
422 oad(0xb8 + r, t ^ 1); /* mov $0, r */
423 } else if (v != r) {
424 if (r == TREG_XMM0) {
425 assert(v == TREG_ST0);
426 /* gen_cvt_ftof(VT_DOUBLE); */
427 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
428 /* movsd -0x10(%rsp),%xmm0 */
429 o(0x44100ff2);
430 o(0xf024);
431 } else if (r == TREG_ST0) {
432 assert(v == TREG_XMM0);
433 /* gen_cvt_ftof(VT_LDOUBLE); */
434 /* movsd %xmm0,-0x10(%rsp) */
435 o(0x44110ff2);
436 o(0xf024);
437 o(0xf02444dd); /* fldl -0x10(%rsp) */
438 } else {
439 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
440 o(0x89);
441 o(0xc0 + r + v * 8); /* mov v, r */
447 /* store register 'r' in lvalue 'v' */
448 void store(int r, SValue *v)
450 int fr, bt, ft, fc;
451 int op64 = 0;
452 /* store the REX prefix in this variable when PIC is enabled */
453 int pic = 0;
455 #ifdef TCC_TARGET_PE
456 if (pe_dllimport(r, v, store))
457 return;
458 #endif
460 ft = v->type.t;
461 fc = v->c.ul;
462 fr = v->r & VT_VALMASK;
463 bt = ft & VT_BTYPE;
465 #ifndef TCC_TARGET_PE
466 /* we need to access the variable via got */
467 if (fr == VT_CONST && (v->r & VT_SYM)) {
468 /* mov xx(%rip), %r11 */
469 o(0x1d8b4c);
470 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
471 pic = is64_type(bt) ? 0x49 : 0x41;
473 #endif
475 /* XXX: incorrect if float reg to reg */
476 if (bt == VT_FLOAT) {
477 o(0x66);
478 o(pic);
479 o(0x7e0f); /* movd */
480 r = 0;
481 } else if (bt == VT_DOUBLE) {
482 o(0x66);
483 o(pic);
484 o(0xd60f); /* movq */
485 r = 0;
486 } else if (bt == VT_LDOUBLE) {
487 o(0xc0d9); /* fld %st(0) */
488 o(pic);
489 o(0xdb); /* fstpt */
490 r = 7;
491 } else {
492 if (bt == VT_SHORT)
493 o(0x66);
494 o(pic);
495 if (bt == VT_BYTE || bt == VT_BOOL)
496 o(0x88);
497 else if (is64_type(bt))
498 op64 = 0x89;
499 else
500 o(0x89);
502 if (pic) {
503 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
504 if (op64)
505 o(op64);
506 o(3 + (r << 3));
507 } else if (op64) {
508 if (fr == VT_CONST ||
509 fr == VT_LOCAL ||
510 (v->r & VT_LVAL)) {
511 gen_modrm64(op64, r, v->r, v->sym, fc);
512 } else if (fr != r) {
513 /* XXX: don't we really come here? */
514 abort();
515 o(0xc0 + fr + r * 8); /* mov r, fr */
517 } else {
518 if (fr == VT_CONST ||
519 fr == VT_LOCAL ||
520 (v->r & VT_LVAL)) {
521 gen_modrm(r, v->r, v->sym, fc);
522 } else if (fr != r) {
523 /* XXX: don't we really come here? */
524 abort();
525 o(0xc0 + fr + r * 8); /* mov r, fr */
530 /* 'is_jmp' is '1' if it is a jump */
531 static void gcall_or_jmp(int is_jmp)
533 int r;
534 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
535 /* constant case */
536 if (vtop->r & VT_SYM) {
537 /* relocation case */
538 greloc(cur_text_section, vtop->sym,
539 ind + 1, R_X86_64_PC32);
540 } else {
541 /* put an empty PC32 relocation */
542 put_elf_reloc(symtab_section, cur_text_section,
543 ind + 1, R_X86_64_PC32, 0);
545 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
546 } else {
547 /* otherwise, indirect call */
548 r = TREG_R11;
549 load(r, vtop);
550 o(0x41); /* REX */
551 o(0xff); /* call/jmp *r */
552 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
556 #ifdef TCC_TARGET_PE
558 #define REGN 4
559 static const uint8_t arg_regs[] = {
560 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
563 static int func_scratch;
565 /* Generate function call. The function address is pushed first, then
566 all the parameters in call order. This functions pops all the
567 parameters and the function address. */
569 void gen_offs_sp(int b, int r, int d)
571 if (d == (char)d) {
572 o(b | 0x4000 | (r << 11));
573 g(d);
574 } else {
575 o(b | 0x8000 | (r << 11));
576 gen_le32(d);
580 void gfunc_call(int nb_args)
582 int size, align, r, args_size, i, d, j, bt;
583 int nb_reg_args, gen_reg;
585 /* calculate the number of integer/float arguments */
586 nb_reg_args = 0;
587 for(i = 0; i < nb_args; i++) {
588 bt = (vtop[-i].type.t & VT_BTYPE);
589 if (bt != VT_STRUCT && bt != VT_LDOUBLE)
590 nb_reg_args++;
593 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
594 save_regs(0); /* save used temporary registers */
596 /* for struct arguments, we need to call memcpy and the function
597 call breaks register passing arguments we are preparing.
598 So, we process arguments which will be passed by stack first. */
599 for(i = 0; i < nb_args; i++) {
600 SValue *sv = &vtop[-i];
601 bt = (sv->type.t & VT_BTYPE);
602 if (bt == VT_STRUCT) {
603 size = type_size(&sv->type, &align);
604 /* align to stack align size */
605 size = (size + 15) & ~16;
606 /* generate structure store */
607 r = get_reg(RC_INT);
608 o(0x48);
609 gen_offs_sp(0x24048d, r, args_size);
610 args_size += size;
612 /* generate memcpy call */
613 vset(&sv->type, r | VT_LVAL, 0);
614 *++vtop = *sv;
615 vstore();
616 --vtop;
618 } else if (bt == VT_LDOUBLE) {
620 gv(RC_ST0);
621 gen_offs_sp(0x243cdb, 0, args_size);
622 args_size += 16;
627 if (func_scratch < args_size)
628 func_scratch = args_size;
630 gen_reg = nb_reg_args;
631 for(i = 0; i < nb_args; i++) {
632 bt = (vtop->type.t & VT_BTYPE);
633 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
634 ; /* done */
636 } else if (is_sse_float(vtop->type.t)) {
637 gv(RC_FLOAT); /* only one float register */
638 j = --gen_reg;
639 if (j >= REGN) {
640 o(0x0f66),
641 /* movq %xmm0, j*8(%rsp) */
642 gen_offs_sp(0x2444d6, 0, j*8);
643 } else {
644 d = arg_regs[j];
645 /* movaps %xmm0, %xmmN */
646 o(0x280f);
647 o(0xc0 + (j << 3));
648 /* mov %xmm0, %rxx */
649 o(0x66);
650 o(0x7e0f48 + (d >= 8));
651 o(0xc0 + (d & 7));
653 } else {
654 r = gv(RC_INT);
655 j = --gen_reg;
656 if (j >= REGN) {
657 o(0x48);
658 gen_offs_sp(0x244489, r, j*8);
659 } else {
660 d = arg_regs[j];
661 if (d != r) {
662 o(0x8948 + (d >= 8));
663 o(0xc0 + r*8 + (d & 7));
667 vtop--;
669 gcall_or_jmp(0);
670 vtop--;
674 #define FUNC_PROLOG_SIZE 11
676 /* generate function prolog of type 't' */
677 void gfunc_prolog(CType *func_type)
679 int addr, align, size, reg_param_index, bt;
680 Sym *sym;
681 CType *type;
683 func_ret_sub = 0;
684 func_scratch = 0;
685 loc = 0;
687 addr = PTR_SIZE * 2;
688 ind += FUNC_PROLOG_SIZE;
689 func_sub_sp_offset = ind;
690 reg_param_index = 0;
692 sym = func_type->ref;
694 /* if the function returns a structure, then add an
695 implicit pointer parameter */
696 func_vt = sym->type;
697 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
698 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
699 reg_param_index++;
700 addr += PTR_SIZE;
703 /* define parameters */
704 while ((sym = sym->next) != NULL) {
705 type = &sym->type;
706 bt = type->t & VT_BTYPE;
707 if (bt == VT_STRUCT || bt == VT_LDOUBLE)
708 continue;
709 if (reg_param_index < REGN) {
710 /* save arguments passed by register */
711 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
713 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
714 reg_param_index++;
715 addr += PTR_SIZE;
718 while (reg_param_index < REGN) {
719 if (func_type->ref->c == FUNC_ELLIPSIS)
720 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
721 reg_param_index++;
722 addr += PTR_SIZE;
725 sym = func_type->ref;
726 while ((sym = sym->next) != NULL) {
727 type = &sym->type;
728 bt = type->t & VT_BTYPE;
729 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
730 size = type_size(type, &align);
731 size = (size + 15) & -16;
732 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
733 addr += size;
738 /* generate function epilog */
739 void gfunc_epilog(void)
741 int v, saved_ind;
743 o(0xc9); /* leave */
744 if (func_ret_sub == 0) {
745 o(0xc3); /* ret */
746 } else {
747 o(0xc2); /* ret n */
748 g(func_ret_sub);
749 g(func_ret_sub >> 8);
752 saved_ind = ind;
753 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
754 /* align local size to word & save local variables */
755 v = (func_scratch + -loc + 15) & -16;
757 pe_add_unwind_data(ind, saved_ind, v);
759 if (v >= 4096) {
760 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
761 oad(0xb8, v); /* mov stacksize, %eax */
762 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
763 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
764 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
765 } else {
766 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
767 o(0xec8148); /* sub rsp, stacksize */
768 gen_le32(v);
770 ind = saved_ind;
773 #else
775 static void gadd_sp(int val)
777 if (val == (char)val) {
778 o(0xc48348);
779 g(val);
780 } else {
781 oad(0xc48148, val); /* add $xxx, %rsp */
785 #define REGN 6
786 static const uint8_t arg_regs[REGN] = {
787 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
790 /* Generate function call. The function address is pushed first, then
791 all the parameters in call order. This functions pops all the
792 parameters and the function address. */
793 void gfunc_call(int nb_args)
795 int size, align, r, args_size, i;
796 SValue *orig_vtop;
797 int nb_reg_args = 0;
798 int nb_sse_args = 0;
799 int sse_reg, gen_reg;
801 /* calculate the number of integer/float arguments */
802 args_size = 0;
803 for(i = 0; i < nb_args; i++) {
804 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
805 args_size += type_size(&vtop->type, &align);
806 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
807 args_size += 16;
808 } else if (is_sse_float(vtop[-i].type.t)) {
809 nb_sse_args++;
810 if (nb_sse_args > 8) args_size += 8;
811 } else {
812 nb_reg_args++;
813 if (nb_reg_args > REGN) args_size += 8;
817 /* for struct arguments, we need to call memcpy and the function
818 call breaks register passing arguments we are preparing.
819 So, we process arguments which will be passed by stack first. */
820 orig_vtop = vtop;
821 gen_reg = nb_reg_args;
822 sse_reg = nb_sse_args;
824 /* adjust stack to align SSE boundary */
825 if (args_size &= 8) {
826 o(0x50); /* push $rax */
828 for(i = 0; i < nb_args; i++) {
829 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
830 size = type_size(&vtop->type, &align);
831 /* align to stack align size */
832 size = (size + 3) & ~3;
833 /* allocate the necessary size on stack */
834 o(0x48);
835 oad(0xec81, size); /* sub $xxx, %rsp */
836 /* generate structure store */
837 r = get_reg(RC_INT);
838 o(0x48 + REX_BASE(r));
839 o(0x89); /* mov %rsp, r */
840 o(0xe0 + r);
842 /* following code breaks vtop[1] */
843 SValue tmp = vtop[1];
844 vset(&vtop->type, r | VT_LVAL, 0);
845 vswap();
846 vstore();
847 vtop[1] = tmp;
849 args_size += size;
850 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
851 gv(RC_ST0);
852 size = LDOUBLE_SIZE;
853 oad(0xec8148, size); /* sub $xxx, %rsp */
854 o(0x7cdb); /* fstpt 0(%rsp) */
855 g(0x24);
856 g(0x00);
857 args_size += size;
858 } else if (is_sse_float(vtop->type.t)) {
859 int j = --sse_reg;
860 if (j >= 8) {
861 gv(RC_FLOAT);
862 o(0x50); /* push $rax */
863 /* movq %xmm0, (%rsp) */
864 o(0x04d60f66);
865 o(0x24);
866 args_size += 8;
868 } else {
869 int j = --gen_reg;
870 /* simple type */
871 /* XXX: implicit cast ? */
872 if (j >= REGN) {
873 r = gv(RC_INT);
874 o(0x50 + r); /* push r */
875 args_size += 8;
878 vtop--;
880 vtop = orig_vtop;
882 /* then, we prepare register passing arguments.
883 Note that we cannot set RDX and RCX in this loop because gv()
884 may break these temporary registers. Let's use R10 and R11
885 instead of them */
886 gen_reg = nb_reg_args;
887 sse_reg = nb_sse_args;
888 for(i = 0; i < nb_args; i++) {
889 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
890 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
891 } else if (is_sse_float(vtop->type.t)) {
892 int j = --sse_reg;
893 if (j < 8) {
894 gv(RC_FLOAT); /* only one float register */
895 /* movaps %xmm0, %xmmN */
896 o(0x280f);
897 o(0xc0 + (sse_reg << 3));
899 } else {
900 int j = --gen_reg;
901 /* simple type */
902 /* XXX: implicit cast ? */
903 if (j < REGN) {
904 r = gv(RC_INT);
905 if (j < 2) {
906 o(0x8948); /* mov */
907 o(0xc0 + r * 8 + arg_regs[j]);
908 } else if (j < 4) {
909 o(0x8949); /* mov */
910 /* j=2: r10, j=3: r11 */
911 o(0xc0 + r * 8 + j);
912 } else {
913 o(0x8949); /* mov */
914 /* j=4: r8, j=5: r9 */
915 o(0xc0 + r * 8 + j - 4);
919 vtop--;
922 save_regs(0); /* save used temporary registers */
924 /* Copy R10 and R11 into RDX and RCX, respectively */
925 if (nb_reg_args > 2) {
926 o(0xd2894c); /* mov %r10, %rdx */
927 if (nb_reg_args > 3) {
928 o(0xd9894c); /* mov %r11, %rcx */
932 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
933 gcall_or_jmp(0);
934 if (args_size)
935 gadd_sp(args_size);
936 vtop--;
940 #define FUNC_PROLOG_SIZE 11
942 static void push_arg_reg(int i) {
943 loc -= 8;
944 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
947 /* generate function prolog of type 't' */
948 void gfunc_prolog(CType *func_type)
950 int i, addr, align, size;
951 int param_index, param_addr, reg_param_index, sse_param_index;
952 Sym *sym;
953 CType *type;
955 sym = func_type->ref;
956 addr = PTR_SIZE * 2;
957 loc = 0;
958 ind += FUNC_PROLOG_SIZE;
959 func_sub_sp_offset = ind;
960 func_ret_sub = 0;
962 if (func_type->ref->c == FUNC_ELLIPSIS) {
963 int seen_reg_num, seen_sse_num, seen_stack_size;
964 seen_reg_num = seen_sse_num = 0;
965 /* frame pointer and return address */
966 seen_stack_size = PTR_SIZE * 2;
967 /* count the number of seen parameters */
968 sym = func_type->ref;
969 while ((sym = sym->next) != NULL) {
970 type = &sym->type;
971 if (is_sse_float(type->t)) {
972 if (seen_sse_num < 8) {
973 seen_sse_num++;
974 } else {
975 seen_stack_size += 8;
977 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
978 size = type_size(type, &align);
979 size = (size + 3) & ~3;
980 seen_stack_size += size;
981 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
982 seen_stack_size += LDOUBLE_SIZE;
983 } else {
984 if (seen_reg_num < REGN) {
985 seen_reg_num++;
986 } else {
987 seen_stack_size += 8;
992 loc -= 16;
993 /* movl $0x????????, -0x10(%rbp) */
994 o(0xf045c7);
995 gen_le32(seen_reg_num * 8);
996 /* movl $0x????????, -0xc(%rbp) */
997 o(0xf445c7);
998 gen_le32(seen_sse_num * 16 + 48);
999 /* movl $0x????????, -0x8(%rbp) */
1000 o(0xf845c7);
1001 gen_le32(seen_stack_size);
1003 /* save all register passing arguments */
1004 for (i = 0; i < 8; i++) {
1005 loc -= 16;
1006 o(0xd60f66); /* movq */
1007 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1008 /* movq $0, loc+8(%rbp) */
1009 o(0x85c748);
1010 gen_le32(loc + 8);
1011 gen_le32(0);
1013 for (i = 0; i < REGN; i++) {
1014 push_arg_reg(REGN-1-i);
1018 sym = func_type->ref;
1019 param_index = 0;
1020 reg_param_index = 0;
1021 sse_param_index = 0;
1023 /* if the function returns a structure, then add an
1024 implicit pointer parameter */
1025 func_vt = sym->type;
1026 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
1027 push_arg_reg(reg_param_index);
1028 param_addr = loc;
1030 func_vc = loc;
1031 param_index++;
1032 reg_param_index++;
1034 /* define parameters */
1035 while ((sym = sym->next) != NULL) {
1036 type = &sym->type;
1037 size = type_size(type, &align);
1038 size = (size + 3) & ~3;
1039 if (is_sse_float(type->t)) {
1040 if (sse_param_index < 8) {
1041 /* save arguments passed by register */
1042 loc -= 8;
1043 o(0xd60f66); /* movq */
1044 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
1045 param_addr = loc;
1046 } else {
1047 param_addr = addr;
1048 addr += size;
1050 sse_param_index++;
1052 } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
1053 (type->t & VT_BTYPE) == VT_LDOUBLE) {
1054 param_addr = addr;
1055 addr += size;
1056 } else {
1057 if (reg_param_index < REGN) {
1058 /* save arguments passed by register */
1059 push_arg_reg(reg_param_index);
1060 param_addr = loc;
1061 } else {
1062 param_addr = addr;
1063 addr += 8;
1065 reg_param_index++;
1067 sym_push(sym->v & ~SYM_FIELD, type,
1068 VT_LOCAL | VT_LVAL, param_addr);
1069 param_index++;
1073 /* generate function epilog */
1074 void gfunc_epilog(void)
1076 int v, saved_ind;
1078 o(0xc9); /* leave */
1079 if (func_ret_sub == 0) {
1080 o(0xc3); /* ret */
1081 } else {
1082 o(0xc2); /* ret n */
1083 g(func_ret_sub);
1084 g(func_ret_sub >> 8);
1086 /* align local size to word & save local variables */
1087 v = (-loc + 15) & -16;
1088 saved_ind = ind;
1089 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1090 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1091 o(0xec8148); /* sub rsp, stacksize */
1092 gen_le32(v);
1093 ind = saved_ind;
1096 #endif /* not PE */
1098 /* generate a jump to a label */
1099 int gjmp(int t)
1101 return psym(0xe9, t);
1104 /* generate a jump to a fixed address */
1105 void gjmp_addr(int a)
1107 int r;
1108 r = a - ind - 2;
1109 if (r == (char)r) {
1110 g(0xeb);
1111 g(r);
1112 } else {
1113 oad(0xe9, a - ind - 5);
1117 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1118 int gtst(int inv, int t)
1120 int v, *p;
1122 v = vtop->r & VT_VALMASK;
1123 if (v == VT_CMP) {
1124 /* fast case : can jump directly since flags are set */
1125 g(0x0f);
1126 t = psym((vtop->c.i - 16) ^ inv, t);
1127 } else if (v == VT_JMP || v == VT_JMPI) {
1128 /* && or || optimization */
1129 if ((v & 1) == inv) {
1130 /* insert vtop->c jump list in t */
1131 p = &vtop->c.i;
1132 while (*p != 0)
1133 p = (int *)(cur_text_section->data + *p);
1134 *p = t;
1135 t = vtop->c.i;
1136 } else {
1137 t = gjmp(t);
1138 gsym(vtop->c.i);
1140 } else {
1141 if (is_float(vtop->type.t) ||
1142 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1143 vpushi(0);
1144 gen_op(TOK_NE);
1146 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1147 /* constant jmp optimization */
1148 if ((vtop->c.i != 0) != inv)
1149 t = gjmp(t);
1150 } else {
1151 v = gv(RC_INT);
1152 o(0x85);
1153 o(0xc0 + v * 9);
1154 g(0x0f);
1155 t = psym(0x85 ^ inv, t);
1158 vtop--;
1159 return t;
1162 /* generate an integer binary operation */
1163 void gen_opi(int op)
1165 int r, fr, opc, c;
1167 switch(op) {
1168 case '+':
1169 case TOK_ADDC1: /* add with carry generation */
1170 opc = 0;
1171 gen_op8:
1172 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
1173 !is64_type(vtop->type.t)) {
1174 /* constant case */
1175 vswap();
1176 r = gv(RC_INT);
1177 if (is64_type(vtop->type.t)) {
1178 o(0x48 | REX_BASE(r));
1180 vswap();
1181 c = vtop->c.i;
1182 if (c == (char)c) {
1183 /* XXX: generate inc and dec for smaller code ? */
1184 o(0x83);
1185 o(0xc0 | (opc << 3) | REG_VALUE(r));
1186 g(c);
1187 } else {
1188 o(0x81);
1189 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1191 } else {
1192 gv2(RC_INT, RC_INT);
1193 r = vtop[-1].r;
1194 fr = vtop[0].r;
1195 if (opc != 7 ||
1196 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1197 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1198 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
1200 o((opc << 3) | 0x01);
1201 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1203 vtop--;
1204 if (op >= TOK_ULT && op <= TOK_GT) {
1205 vtop->r = VT_CMP;
1206 vtop->c.i = op;
1208 break;
1209 case '-':
1210 case TOK_SUBC1: /* sub with carry generation */
1211 opc = 5;
1212 goto gen_op8;
1213 case TOK_ADDC2: /* add with carry use */
1214 opc = 2;
1215 goto gen_op8;
1216 case TOK_SUBC2: /* sub with carry use */
1217 opc = 3;
1218 goto gen_op8;
1219 case '&':
1220 opc = 4;
1221 goto gen_op8;
1222 case '^':
1223 opc = 6;
1224 goto gen_op8;
1225 case '|':
1226 opc = 1;
1227 goto gen_op8;
1228 case '*':
1229 gv2(RC_INT, RC_INT);
1230 r = vtop[-1].r;
1231 fr = vtop[0].r;
1232 if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
1233 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
1234 o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
1236 vtop--;
1237 o(0xaf0f); /* imul fr, r */
1238 o(0xc0 + fr + r * 8);
1239 break;
1240 case TOK_SHL:
1241 opc = 4;
1242 goto gen_shift;
1243 case TOK_SHR:
1244 opc = 5;
1245 goto gen_shift;
1246 case TOK_SAR:
1247 opc = 7;
1248 gen_shift:
1249 opc = 0xc0 | (opc << 3);
1250 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1251 /* constant case */
1252 vswap();
1253 r = gv(RC_INT);
1254 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1255 o(0x48 | REX_BASE(r));
1256 c = 0x3f;
1257 } else {
1258 c = 0x1f;
1260 vswap();
1261 c &= vtop->c.i;
1262 o(0xc1); /* shl/shr/sar $xxx, r */
1263 o(opc | r);
1264 g(c);
1265 } else {
1266 /* we generate the shift in ecx */
1267 gv2(RC_INT, RC_RCX);
1268 r = vtop[-1].r;
1269 if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
1270 o(0x48 | REX_BASE(r));
1272 o(0xd3); /* shl/shr/sar %cl, r */
1273 o(opc | r);
1275 vtop--;
1276 break;
1277 case '/':
1278 case TOK_UDIV:
1279 case TOK_PDIV:
1280 case '%':
1281 case TOK_UMOD:
1282 case TOK_UMULL:
1283 /* first operand must be in eax */
1284 /* XXX: need better constraint for second operand */
1285 gv2(RC_RAX, RC_RCX);
1286 r = vtop[-1].r;
1287 fr = vtop[0].r;
1288 vtop--;
1289 save_reg(TREG_RDX);
1290 if (op == TOK_UMULL) {
1291 o(0xf7); /* mul fr */
1292 o(0xe0 + fr);
1293 vtop->r2 = TREG_RDX;
1294 r = TREG_RAX;
1295 } else {
1296 if (op == TOK_UDIV || op == TOK_UMOD) {
1297 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1298 o(0xd23148); /* xor %rdx, %rdx */
1299 o(0x48 + REX_BASE(fr));
1300 } else {
1301 o(0xd231); /* xor %edx, %edx */
1303 o(0xf7); /* div fr, %eax */
1304 o(0xf0 + fr);
1305 } else {
1306 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1307 o(0x9948); /* cqto */
1308 o(0x48 + REX_BASE(fr));
1309 } else {
1310 o(0x99); /* cltd */
1312 o(0xf7); /* idiv fr, %eax */
1313 o(0xf8 + fr);
1315 if (op == '%' || op == TOK_UMOD)
1316 r = TREG_RDX;
1317 else
1318 r = TREG_RAX;
1320 vtop->r = r;
1321 break;
1322 default:
1323 opc = 7;
1324 goto gen_op8;
1328 void gen_opl(int op)
1330 gen_opi(op);
1333 /* generate a floating point operation 'v = t1 op t2' instruction. The
1334 two operands are guaranted to have the same floating point type */
1335 /* XXX: need to use ST1 too */
1336 void gen_opf(int op)
1338 int a, ft, fc, swapped, r;
1339 int float_type =
1340 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1342 /* convert constants to memory references */
1343 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1344 vswap();
1345 gv(float_type);
1346 vswap();
1348 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1349 gv(float_type);
1351 /* must put at least one value in the floating point register */
1352 if ((vtop[-1].r & VT_LVAL) &&
1353 (vtop[0].r & VT_LVAL)) {
1354 vswap();
1355 gv(float_type);
1356 vswap();
1358 swapped = 0;
1359 /* swap the stack if needed so that t1 is the register and t2 is
1360 the memory reference */
1361 if (vtop[-1].r & VT_LVAL) {
1362 vswap();
1363 swapped = 1;
1365 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1366 if (op >= TOK_ULT && op <= TOK_GT) {
1367 /* load on stack second operand */
1368 load(TREG_ST0, vtop);
1369 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1370 if (op == TOK_GE || op == TOK_GT)
1371 swapped = !swapped;
1372 else if (op == TOK_EQ || op == TOK_NE)
1373 swapped = 0;
1374 if (swapped)
1375 o(0xc9d9); /* fxch %st(1) */
1376 o(0xe9da); /* fucompp */
1377 o(0xe0df); /* fnstsw %ax */
1378 if (op == TOK_EQ) {
1379 o(0x45e480); /* and $0x45, %ah */
1380 o(0x40fC80); /* cmp $0x40, %ah */
1381 } else if (op == TOK_NE) {
1382 o(0x45e480); /* and $0x45, %ah */
1383 o(0x40f480); /* xor $0x40, %ah */
1384 op = TOK_NE;
1385 } else if (op == TOK_GE || op == TOK_LE) {
1386 o(0x05c4f6); /* test $0x05, %ah */
1387 op = TOK_EQ;
1388 } else {
1389 o(0x45c4f6); /* test $0x45, %ah */
1390 op = TOK_EQ;
1392 vtop--;
1393 vtop->r = VT_CMP;
1394 vtop->c.i = op;
1395 } else {
1396 /* no memory reference possible for long double operations */
1397 load(TREG_ST0, vtop);
1398 swapped = !swapped;
1400 switch(op) {
1401 default:
1402 case '+':
1403 a = 0;
1404 break;
1405 case '-':
1406 a = 4;
1407 if (swapped)
1408 a++;
1409 break;
1410 case '*':
1411 a = 1;
1412 break;
1413 case '/':
1414 a = 6;
1415 if (swapped)
1416 a++;
1417 break;
1419 ft = vtop->type.t;
1420 fc = vtop->c.ul;
1421 o(0xde); /* fxxxp %st, %st(1) */
1422 o(0xc1 + (a << 3));
1423 vtop--;
1425 } else {
1426 if (op >= TOK_ULT && op <= TOK_GT) {
1427 /* if saved lvalue, then we must reload it */
1428 r = vtop->r;
1429 fc = vtop->c.ul;
1430 if ((r & VT_VALMASK) == VT_LLOCAL) {
1431 SValue v1;
1432 r = get_reg(RC_INT);
1433 v1.type.t = VT_INT;
1434 v1.r = VT_LOCAL | VT_LVAL;
1435 v1.c.ul = fc;
1436 load(r, &v1);
1437 fc = 0;
1440 if (op == TOK_EQ || op == TOK_NE) {
1441 swapped = 0;
1442 } else {
1443 if (op == TOK_LE || op == TOK_LT)
1444 swapped = !swapped;
1445 if (op == TOK_LE || op == TOK_GE) {
1446 op = 0x93; /* setae */
1447 } else {
1448 op = 0x97; /* seta */
1452 if (swapped) {
1453 o(0x7e0ff3); /* movq */
1454 gen_modrm(1, r, vtop->sym, fc);
1456 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1457 o(0x66);
1459 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1460 o(0xc8);
1461 } else {
1462 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1463 o(0x66);
1465 o(0x2e0f); /* ucomisd */
1466 gen_modrm(0, r, vtop->sym, fc);
1469 vtop--;
1470 vtop->r = VT_CMP;
1471 vtop->c.i = op;
1472 } else {
1473 /* no memory reference possible for long double operations */
1474 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1475 load(TREG_XMM0, vtop);
1476 swapped = !swapped;
1478 switch(op) {
1479 default:
1480 case '+':
1481 a = 0;
1482 break;
1483 case '-':
1484 a = 4;
1485 break;
1486 case '*':
1487 a = 1;
1488 break;
1489 case '/':
1490 a = 6;
1491 break;
1493 ft = vtop->type.t;
1494 fc = vtop->c.ul;
1495 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1496 o(0xde); /* fxxxp %st, %st(1) */
1497 o(0xc1 + (a << 3));
1498 } else {
1499 /* if saved lvalue, then we must reload it */
1500 r = vtop->r;
1501 if ((r & VT_VALMASK) == VT_LLOCAL) {
1502 SValue v1;
1503 r = get_reg(RC_INT);
1504 v1.type.t = VT_INT;
1505 v1.r = VT_LOCAL | VT_LVAL;
1506 v1.c.ul = fc;
1507 load(r, &v1);
1508 fc = 0;
1510 if (swapped) {
1511 /* movq %xmm0,%xmm1 */
1512 o(0x7e0ff3);
1513 o(0xc8);
1514 load(TREG_XMM0, vtop);
1515 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1516 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1517 o(0xf2);
1518 } else {
1519 o(0xf3);
1521 o(0x0f);
1522 o(0x58 + a);
1523 o(0xc1);
1524 } else {
1525 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1526 o(0xf2);
1527 } else {
1528 o(0xf3);
1530 o(0x0f);
1531 o(0x58 + a);
1532 gen_modrm(0, r, vtop->sym, fc);
1535 vtop--;
1540 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1541 and 'long long' cases. */
1542 void gen_cvt_itof(int t)
1544 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1545 save_reg(TREG_ST0);
1546 gv(RC_INT);
1547 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1548 /* signed long long to float/double/long double (unsigned case
1549 is handled generically) */
1550 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1551 o(0x242cdf); /* fildll (%rsp) */
1552 o(0x08c48348); /* add $8, %rsp */
1553 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1554 (VT_INT | VT_UNSIGNED)) {
1555 /* unsigned int to float/double/long double */
1556 o(0x6a); /* push $0 */
1557 g(0x00);
1558 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1559 o(0x242cdf); /* fildll (%rsp) */
1560 o(0x10c48348); /* add $16, %rsp */
1561 } else {
1562 /* int to float/double/long double */
1563 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1564 o(0x2404db); /* fildl (%rsp) */
1565 o(0x08c48348); /* add $8, %rsp */
1567 vtop->r = TREG_ST0;
1568 } else {
1569 save_reg(TREG_XMM0);
1570 gv(RC_INT);
1571 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1572 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1573 (VT_INT | VT_UNSIGNED) ||
1574 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1575 o(0x48); /* REX */
1577 o(0x2a0f);
1578 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1579 vtop->r = TREG_XMM0;
1583 /* convert from one floating point type to another */
1584 void gen_cvt_ftof(int t)
1586 int ft, bt, tbt;
1588 ft = vtop->type.t;
1589 bt = ft & VT_BTYPE;
1590 tbt = t & VT_BTYPE;
1592 if (bt == VT_FLOAT) {
1593 gv(RC_FLOAT);
1594 if (tbt == VT_DOUBLE) {
1595 o(0xc0140f); /* unpcklps */
1596 o(0xc05a0f); /* cvtps2pd */
1597 } else if (tbt == VT_LDOUBLE) {
1598 /* movss %xmm0,-0x10(%rsp) */
1599 o(0x44110ff3);
1600 o(0xf024);
1601 o(0xf02444d9); /* flds -0x10(%rsp) */
1602 vtop->r = TREG_ST0;
1604 } else if (bt == VT_DOUBLE) {
1605 gv(RC_FLOAT);
1606 if (tbt == VT_FLOAT) {
1607 o(0xc0140f66); /* unpcklpd */
1608 o(0xc05a0f66); /* cvtpd2ps */
1609 } else if (tbt == VT_LDOUBLE) {
1610 /* movsd %xmm0,-0x10(%rsp) */
1611 o(0x44110ff2);
1612 o(0xf024);
1613 o(0xf02444dd); /* fldl -0x10(%rsp) */
1614 vtop->r = TREG_ST0;
1616 } else {
1617 gv(RC_ST0);
1618 if (tbt == VT_DOUBLE) {
1619 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1620 /* movsd -0x10(%rsp),%xmm0 */
1621 o(0x44100ff2);
1622 o(0xf024);
1623 vtop->r = TREG_XMM0;
1624 } else if (tbt == VT_FLOAT) {
1625 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1626 /* movss -0x10(%rsp),%xmm0 */
1627 o(0x44100ff3);
1628 o(0xf024);
1629 vtop->r = TREG_XMM0;
1634 /* convert fp to int 't' type */
1635 void gen_cvt_ftoi(int t)
1637 int ft, bt, size, r;
1638 ft = vtop->type.t;
1639 bt = ft & VT_BTYPE;
1640 if (bt == VT_LDOUBLE) {
1641 gen_cvt_ftof(VT_DOUBLE);
1642 bt = VT_DOUBLE;
1645 gv(RC_FLOAT);
1646 if (t != VT_INT)
1647 size = 8;
1648 else
1649 size = 4;
1651 r = get_reg(RC_INT);
1652 if (bt == VT_FLOAT) {
1653 o(0xf3);
1654 } else if (bt == VT_DOUBLE) {
1655 o(0xf2);
1656 } else {
1657 assert(0);
1659 if (size == 8) {
1660 o(0x48 + REX_BASE(r));
1662 o(0x2c0f); /* cvttss2si or cvttsd2si */
1663 o(0xc0 + (REG_VALUE(r) << 3));
1664 vtop->r = r;
1667 /* computed goto support */
1668 void ggoto(void)
1670 gcall_or_jmp(1);
1671 vtop--;
1674 /* end of x86-64 code generator */
1675 /*************************************************************/
1676 #endif /* ! TARGET_DEFS_ONLY */
1677 /******************************************************/