x86_64: Fix segfault for global data
[tinycc/miki.git] / x86_64-gen.c
blob0f86b3a95c5b72fbd7cd90e992b945577d87ee31
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 5
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_R8 0x0100
38 #define RC_R9 0x0200
39 #define RC_R10 0x0400
40 #define RC_R11 0x0800
41 #define RC_XMM0 0x0020
42 #define RC_ST0 0x0040 /* only for long double */
43 #define RC_IRET RC_RAX /* function return: integer register */
44 #define RC_LRET RC_RDX /* function return: second integer register */
45 #define RC_FRET RC_XMM0 /* function return: float register */
47 /* pretty names for the registers */
48 enum {
49 TREG_RAX = 0,
50 TREG_RCX = 1,
51 TREG_RDX = 2,
52 TREG_XMM0 = 3,
53 TREG_ST0 = 4,
55 TREG_RSI = 6,
56 TREG_RDI = 7,
57 TREG_R8 = 8,
58 TREG_R9 = 9,
60 TREG_R10 = 10,
61 TREG_R11 = 11,
63 TREG_MEM = 0x10,
66 #define REX_BASE(reg) (((reg) >> 3) & 1)
67 #define REG_VALUE(reg) ((reg) & 7)
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
78 #define PTR_SIZE 8
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
84 #define MAX_ALIGN 8
86 ST_FUNC void gen_opl(int op);
87 ST_FUNC void gen_le64(int64_t c);
89 /******************************************************/
90 /* ELF defines */
92 #define EM_TCC_TARGET EM_X86_64
94 /* relocation type for 32 bit data relocation */
95 #define R_DATA_32 R_X86_64_32
96 #define R_DATA_PTR R_X86_64_64
97 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
98 #define R_COPY R_X86_64_COPY
100 #define ELF_START_ADDR 0x08048000
101 #define ELF_PAGE_SIZE 0x1000
103 /******************************************************/
104 #else /* ! TARGET_DEFS_ONLY */
105 /******************************************************/
106 #include "tcc.h"
107 #include <assert.h>
109 ST_DATA const int reg_classes[] = {
110 /* eax */ RC_INT | RC_RAX,
111 /* ecx */ RC_INT | RC_RCX,
112 /* edx */ RC_INT | RC_RDX,
113 /* xmm0 */ RC_FLOAT | RC_XMM0,
114 /* st0 */ RC_ST0,
118 RC_INT | RC_R8,
119 RC_INT | RC_R9,
120 RC_INT | RC_R10,
121 RC_INT | RC_R11
124 static unsigned long func_sub_sp_offset;
125 static int func_ret_sub;
127 /* XXX: make it faster ? */
128 void g(int c)
130 int ind1;
131 ind1 = ind + 1;
132 if (ind1 > cur_text_section->data_allocated)
133 section_realloc(cur_text_section, ind1);
134 cur_text_section->data[ind] = c;
135 ind = ind1;
138 void o(unsigned int c)
140 while (c) {
141 g(c);
142 c = c >> 8;
146 void gen_le16(int v)
148 g(v);
149 g(v >> 8);
152 void gen_le32(int c)
154 g(c);
155 g(c >> 8);
156 g(c >> 16);
157 g(c >> 24);
160 void gen_le64(int64_t c)
162 g(c);
163 g(c >> 8);
164 g(c >> 16);
165 g(c >> 24);
166 g(c >> 32);
167 g(c >> 40);
168 g(c >> 48);
169 g(c >> 56);
172 void orex(int ll, int r, int r2, int b)
174 if ((r & VT_VALMASK) >= VT_CONST)
175 r = 0;
176 if ((r2 & VT_VALMASK) >= VT_CONST)
177 r2 = 0;
178 if (ll || REX_BASE(r) || REX_BASE(r2))
179 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
180 o(b);
183 /* output a symbol and patch all calls to it */
184 void gsym_addr(int t, int a)
186 int n, *ptr;
187 while (t) {
188 ptr = (int *)(cur_text_section->data + t);
189 n = *ptr; /* next value */
190 *ptr = a - t - 4;
191 t = n;
195 void gsym(int t)
197 gsym_addr(t, ind);
200 /* psym is used to put an instruction with a data field which is a
201 reference to a symbol. It is in fact the same as oad ! */
202 #define psym oad
204 static int is64_type(int t)
206 return ((t & VT_BTYPE) == VT_PTR ||
207 (t & VT_BTYPE) == VT_FUNC ||
208 (t & VT_BTYPE) == VT_LLONG);
211 static int is_sse_float(int t) {
212 int bt;
213 bt = t & VT_BTYPE;
214 return bt == VT_DOUBLE || bt == VT_FLOAT;
218 /* instruction + 4 bytes data. Return the address of the data */
219 ST_FUNC int oad(int c, int s)
221 int ind1;
223 o(c);
224 ind1 = ind + 4;
225 if (ind1 > cur_text_section->data_allocated)
226 section_realloc(cur_text_section, ind1);
227 *(int *)(cur_text_section->data + ind) = s;
228 s = ind;
229 ind = ind1;
230 return s;
233 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
235 if (r & VT_SYM)
236 greloc(cur_text_section, sym, ind, R_X86_64_32);
237 gen_le32(c);
240 /* output constant with relocation if 'r & VT_SYM' is true */
241 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
243 if (r & VT_SYM)
244 greloc(cur_text_section, sym, ind, R_X86_64_64);
245 gen_le64(c);
248 /* output constant with relocation if 'r & VT_SYM' is true */
249 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
251 if (r & VT_SYM)
252 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
253 gen_le32(c-4);
256 /* output got address with relocation */
257 static void gen_gotpcrel(int r, Sym *sym, int c)
259 #ifndef TCC_TARGET_PE
260 Section *sr;
261 ElfW(Rela) *rel;
262 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
263 sr = cur_text_section->reloc;
264 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
265 rel->r_addend = -4;
266 #else
267 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
268 cur_text_section->data[ind-3],
269 cur_text_section->data[ind-2],
270 cur_text_section->data[ind-1]
272 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
273 #endif
274 gen_le32(0);
275 if (c) {
276 /* we use add c, %xxx for displacement */
277 orex(1, r, 0, 0x81);
278 o(0xc0 + REG_VALUE(r));
279 gen_le32(c);
283 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
285 op_reg = REG_VALUE(op_reg) << 3;
286 if ((r & VT_VALMASK) == VT_CONST) {
287 /* constant memory reference */
288 o(0x05 | op_reg);
289 if (is_got) {
290 gen_gotpcrel(r, sym, c);
291 } else {
292 gen_addrpc32(r, sym, c);
294 } else if ((r & VT_VALMASK) == VT_LOCAL) {
295 /* currently, we use only ebp as base */
296 if (c == (char)c) {
297 /* short reference */
298 o(0x45 | op_reg);
299 g(c);
300 } else {
301 oad(0x85 | op_reg, c);
303 } else if ((r & VT_VALMASK) >= TREG_MEM) {
304 if (c) {
305 g(0x80 | op_reg | REG_VALUE(r));
306 gen_le32(c);
307 } else {
308 g(0x00 | op_reg | REG_VALUE(r));
310 } else {
311 g(0x00 | op_reg | REG_VALUE(r));
315 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
316 opcode bits */
317 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
319 gen_modrm_impl(op_reg, r, sym, c, 0);
322 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
323 opcode bits */
324 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
326 int is_got;
327 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
328 orex(1, r, op_reg, opcode);
329 gen_modrm_impl(op_reg, r, sym, c, is_got);
333 /* load 'r' from value 'sv' */
334 void load(int r, SValue *sv)
336 int v, t, ft, fc, fr;
337 SValue v1;
339 #ifdef TCC_TARGET_PE
340 SValue v2;
341 sv = pe_getimport(sv, &v2);
342 #endif
344 fr = sv->r;
345 ft = sv->type.t;
346 fc = sv->c.ul;
348 #ifndef TCC_TARGET_PE
349 /* we use indirect access via got */
350 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
351 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
352 /* use the result register as a temporal register */
353 int tr = r | TREG_MEM;
354 if (is_float(ft)) {
355 /* we cannot use float registers as a temporal register */
356 tr = get_reg(RC_INT) | TREG_MEM;
358 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
360 /* load from the temporal register */
361 fr = tr | VT_LVAL;
363 #endif
365 v = fr & VT_VALMASK;
366 if (fr & VT_LVAL) {
367 int b, ll;
368 if (v == VT_LLOCAL) {
369 v1.type.t = VT_PTR;
370 v1.r = VT_LOCAL | VT_LVAL;
371 v1.c.ul = fc;
372 fr = r;
373 if (!(reg_classes[fr] & RC_INT))
374 fr = get_reg(RC_INT);
375 load(fr, &v1);
377 ll = 0;
378 if ((ft & VT_BTYPE) == VT_FLOAT) {
379 b = 0x6e0f66, r = 0; /* movd */
380 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
381 b = 0x7e0ff3, r = 0; /* movq */
382 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
383 b = 0xdb, r = 5; /* fldt */
384 } else if ((ft & VT_TYPE) == VT_BYTE) {
385 b = 0xbe0f; /* movsbl */
386 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
387 b = 0xb60f; /* movzbl */
388 } else if ((ft & VT_TYPE) == VT_SHORT) {
389 b = 0xbf0f; /* movswl */
390 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
391 b = 0xb70f; /* movzwl */
392 } else {
393 ll = is64_type(ft);
394 b = 0x8b;
396 if (ll) {
397 gen_modrm64(b, r, fr, sv->sym, fc);
398 } else {
399 orex(ll, fr, r, b);
400 gen_modrm(r, fr, sv->sym, fc);
402 } else {
403 if (v == VT_CONST) {
404 if (fr & VT_SYM) {
405 #ifdef TCC_TARGET_PE
406 orex(1,0,r,0x8d);
407 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
408 gen_addrpc32(fr, sv->sym, fc);
409 #else
410 if (sv->sym->type.t & VT_STATIC) {
411 orex(1,0,r,0x8d);
412 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
413 gen_addrpc32(fr, sv->sym, fc);
414 } else {
415 orex(1,0,r,0x8b);
416 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
417 gen_gotpcrel(r, sv->sym, fc);
419 #endif
420 } else if (is64_type(ft)) {
421 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
422 gen_le64(sv->c.ull);
423 } else {
424 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
425 gen_le32(fc);
427 } else if (v == VT_LOCAL) {
428 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
429 gen_modrm(r, VT_LOCAL, sv->sym, fc);
430 } else if (v == VT_CMP) {
431 orex(0,r,0,0);
432 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
433 orex(0,r,0, 0x0f); /* setxx %br */
434 o(fc);
435 o(0xc0 + REG_VALUE(r));
436 } else if (v == VT_JMP || v == VT_JMPI) {
437 t = v & 1;
438 orex(0,r,0,0);
439 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
440 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
441 gsym(fc);
442 orex(0,r,0,0);
443 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
444 } else if (v != r) {
445 if (r == TREG_XMM0) {
446 assert(v == TREG_ST0);
447 /* gen_cvt_ftof(VT_DOUBLE); */
448 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
449 /* movsd -0x10(%rsp),%xmm0 */
450 o(0x44100ff2);
451 o(0xf024);
452 } else if (r == TREG_ST0) {
453 assert(v == TREG_XMM0);
454 /* gen_cvt_ftof(VT_LDOUBLE); */
455 /* movsd %xmm0,-0x10(%rsp) */
456 o(0x44110ff2);
457 o(0xf024);
458 o(0xf02444dd); /* fldl -0x10(%rsp) */
459 } else {
460 orex(1,r,v, 0x89);
461 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
467 /* store register 'r' in lvalue 'v' */
468 void store(int r, SValue *v)
470 int fr, bt, ft, fc;
471 int op64 = 0;
472 /* store the REX prefix in this variable when PIC is enabled */
473 int pic = 0;
475 #ifdef TCC_TARGET_PE
476 SValue v2;
477 v = pe_getimport(v, &v2);
478 #endif
480 ft = v->type.t;
481 fc = v->c.ul;
482 fr = v->r & VT_VALMASK;
483 bt = ft & VT_BTYPE;
485 #ifndef TCC_TARGET_PE
486 /* we need to access the variable via got */
487 if (fr == VT_CONST && (v->r & VT_SYM)) {
488 /* mov xx(%rip), %r11 */
489 o(0x1d8b4c);
490 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
491 pic = is64_type(bt) ? 0x49 : 0x41;
493 #endif
495 /* XXX: incorrect if float reg to reg */
496 if (bt == VT_FLOAT) {
497 o(0x66);
498 o(pic);
499 o(0x7e0f); /* movd */
500 r = 0;
501 } else if (bt == VT_DOUBLE) {
502 o(0x66);
503 o(pic);
504 o(0xd60f); /* movq */
505 r = 0;
506 } else if (bt == VT_LDOUBLE) {
507 o(0xc0d9); /* fld %st(0) */
508 o(pic);
509 o(0xdb); /* fstpt */
510 r = 7;
511 } else {
512 if (bt == VT_SHORT)
513 o(0x66);
514 o(pic);
515 if (bt == VT_BYTE || bt == VT_BOOL)
516 orex(0, 0, r, 0x88);
517 else if (is64_type(bt))
518 op64 = 0x89;
519 else
520 orex(0, 0, r, 0x89);
522 if (pic) {
523 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
524 if (op64)
525 o(op64);
526 o(3 + (r << 3));
527 } else if (op64) {
528 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
529 gen_modrm64(op64, r, v->r, v->sym, fc);
530 } else if (fr != r) {
531 /* XXX: don't we really come here? */
532 abort();
533 o(0xc0 + fr + r * 8); /* mov r, fr */
535 } else {
536 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
537 gen_modrm(r, v->r, v->sym, fc);
538 } else if (fr != r) {
539 /* XXX: don't we really come here? */
540 abort();
541 o(0xc0 + fr + r * 8); /* mov r, fr */
546 /* 'is_jmp' is '1' if it is a jump */
547 static void gcall_or_jmp(int is_jmp)
549 int r;
550 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
551 /* constant case */
552 if (vtop->r & VT_SYM) {
553 /* relocation case */
554 greloc(cur_text_section, vtop->sym,
555 ind + 1, R_X86_64_PC32);
556 } else {
557 /* put an empty PC32 relocation */
558 put_elf_reloc(symtab_section, cur_text_section,
559 ind + 1, R_X86_64_PC32, 0);
561 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
562 } else {
563 /* otherwise, indirect call */
564 r = TREG_R11;
565 load(r, vtop);
566 o(0x41); /* REX */
567 o(0xff); /* call/jmp *r */
568 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
572 #ifdef TCC_TARGET_PE
574 #define REGN 4
575 static const uint8_t arg_regs[] = {
576 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
579 static int func_scratch;
581 /* Generate function call. The function address is pushed first, then
582 all the parameters in call order. This functions pops all the
583 parameters and the function address. */
585 void gen_offs_sp(int b, int r, int d)
587 orex(1,0,r & 0x100 ? 0 : r, b);
588 if (d == (char)d) {
589 o(0x2444 | (REG_VALUE(r) << 3));
590 g(d);
591 } else {
592 o(0x2484 | (REG_VALUE(r) << 3));
593 gen_le32(d);
597 void gfunc_call(int nb_args)
599 int size, align, r, args_size, i, d, j, bt, struct_size;
600 int nb_reg_args, gen_reg;
602 nb_reg_args = nb_args;
603 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
605 /* for struct arguments, we need to call memcpy and the function
606 call breaks register passing arguments we are preparing.
607 So, we process arguments which will be passed by stack first. */
608 struct_size = args_size;
609 for(i = 0; i < nb_args; i++) {
610 SValue *sv = &vtop[-i];
611 bt = (sv->type.t & VT_BTYPE);
612 if (bt == VT_STRUCT) {
613 size = type_size(&sv->type, &align);
614 /* align to stack align size */
615 size = (size + 15) & ~15;
616 /* generate structure store */
617 r = get_reg(RC_INT);
618 gen_offs_sp(0x8d, r, struct_size);
619 struct_size += size;
621 /* generate memcpy call */
622 vset(&sv->type, r | VT_LVAL, 0);
623 vpushv(sv);
624 vstore();
625 --vtop;
627 } else if (bt == VT_LDOUBLE) {
629 gv(RC_ST0);
630 gen_offs_sp(0xdb, 0x107, struct_size);
631 struct_size += 16;
636 if (func_scratch < struct_size)
637 func_scratch = struct_size;
638 #if 1
639 for (i = 0; i < REGN; ++i)
640 save_reg(arg_regs[i]);
641 save_reg(TREG_RAX);
642 #endif
643 gen_reg = nb_reg_args;
644 struct_size = args_size;
646 for(i = 0; i < nb_args; i++) {
647 bt = (vtop->type.t & VT_BTYPE);
649 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
650 if (bt == VT_LDOUBLE)
651 size = 16;
652 else
653 size = type_size(&vtop->type, &align);
654 /* align to stack align size */
655 size = (size + 15) & ~15;
656 j = --gen_reg;
657 if (j >= REGN) {
658 d = TREG_RAX;
659 gen_offs_sp(0x8d, d, struct_size);
660 gen_offs_sp(0x89, d, j*8);
661 } else {
662 d = arg_regs[j];
663 gen_offs_sp(0x8d, d, struct_size);
665 struct_size += size;
667 } else if (is_sse_float(vtop->type.t)) {
668 gv(RC_FLOAT); /* only one float register */
669 j = --gen_reg;
670 if (j >= REGN) {
671 /* movq %xmm0, j*8(%rsp) */
672 gen_offs_sp(0xd60f66, 0x100, j*8);
673 } else {
674 /* movaps %xmm0, %xmmN */
675 o(0x280f);
676 o(0xc0 + (j << 3));
677 d = arg_regs[j];
678 /* mov %xmm0, %rxx */
679 o(0x66);
680 orex(1,d,0, 0x7e0f);
681 o(0xc0 + REG_VALUE(d));
683 } else {
684 j = --gen_reg;
685 if (j >= REGN) {
686 r = gv(RC_INT);
687 gen_offs_sp(0x89, r, j*8);
688 } else {
689 d = arg_regs[j];
690 if (d < NB_REGS) {
691 gv(reg_classes[d] & ~RC_INT);
692 } else {
693 r = gv(RC_INT);
694 if (d != r) {
695 orex(1,d,r, 0x89);
696 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
702 vtop--;
704 save_regs(0);
705 gcall_or_jmp(0);
706 vtop--;
710 #define FUNC_PROLOG_SIZE 11
712 /* generate function prolog of type 't' */
713 void gfunc_prolog(CType *func_type)
715 int addr, reg_param_index, bt;
716 Sym *sym;
717 CType *type;
719 func_ret_sub = 0;
720 func_scratch = 0;
721 loc = 0;
723 addr = PTR_SIZE * 2;
724 ind += FUNC_PROLOG_SIZE;
725 func_sub_sp_offset = ind;
726 reg_param_index = 0;
728 sym = func_type->ref;
730 /* if the function returns a structure, then add an
731 implicit pointer parameter */
732 func_vt = sym->type;
733 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
734 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
735 reg_param_index++;
736 addr += PTR_SIZE;
739 /* define parameters */
740 while ((sym = sym->next) != NULL) {
741 type = &sym->type;
742 bt = type->t & VT_BTYPE;
743 if (reg_param_index < REGN) {
744 /* save arguments passed by register */
745 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
747 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
748 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
749 } else {
750 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
752 reg_param_index++;
753 addr += PTR_SIZE;
756 while (reg_param_index < REGN) {
757 if (func_type->ref->c == FUNC_ELLIPSIS)
758 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
759 reg_param_index++;
760 addr += PTR_SIZE;
764 /* generate function epilog */
765 void gfunc_epilog(void)
767 int v, saved_ind;
769 o(0xc9); /* leave */
770 if (func_ret_sub == 0) {
771 o(0xc3); /* ret */
772 } else {
773 o(0xc2); /* ret n */
774 g(func_ret_sub);
775 g(func_ret_sub >> 8);
778 saved_ind = ind;
779 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
780 /* align local size to word & save local variables */
781 v = (func_scratch + -loc + 15) & -16;
783 if (v >= 4096) {
784 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
785 oad(0xb8, v); /* mov stacksize, %eax */
786 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
787 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
788 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
789 } else {
790 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
791 o(0xec8148); /* sub rsp, stacksize */
792 gen_le32(v);
795 cur_text_section->data_offset = saved_ind;
796 pe_add_unwind_data(ind, saved_ind, v);
797 ind = cur_text_section->data_offset;
800 #else
802 static void gadd_sp(int val)
804 if (val == (char)val) {
805 o(0xc48348);
806 g(val);
807 } else {
808 oad(0xc48148, val); /* add $xxx, %rsp */
812 #define REGN 6
813 static const uint8_t arg_regs[REGN] = {
814 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
817 /* Generate function call. The function address is pushed first, then
818 all the parameters in call order. This functions pops all the
819 parameters and the function address. */
820 void gfunc_call(int nb_args)
822 int size, align, r, args_size, i;
823 int nb_reg_args = 0;
824 int nb_sse_args = 0;
825 int sse_reg, gen_reg;
827 /* calculate the number of integer/float arguments */
828 args_size = 0;
829 for(i = 0; i < nb_args; i++) {
830 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
831 args_size += type_size(&vtop[-i].type, &align);
832 args_size = (args_size + 7) & ~7;
833 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
834 args_size += 16;
835 } else if (is_sse_float(vtop[-i].type.t)) {
836 nb_sse_args++;
837 if (nb_sse_args > 8) args_size += 8;
838 } else {
839 nb_reg_args++;
840 if (nb_reg_args > REGN) args_size += 8;
844 /* for struct arguments, we need to call memcpy and the function
845 call breaks register passing arguments we are preparing.
846 So, we process arguments which will be passed by stack first. */
847 gen_reg = nb_reg_args;
848 sse_reg = nb_sse_args;
850 /* adjust stack to align SSE boundary */
851 if (args_size &= 15) {
852 /* fetch cpu flag before the following sub will change the value */
853 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
854 gv(RC_INT);
856 args_size = 16 - args_size;
857 o(0x48);
858 oad(0xec81, args_size); /* sub $xxx, %rsp */
861 for(i = 0; i < nb_args; i++) {
862 /* Swap argument to top, it will possibly be changed here,
863 and might use more temps. All arguments must remain on the
864 stack, so that get_reg can correctly evict some of them onto
865 stack. We could use also use a vrott(nb_args) at the end
866 of this loop, but this seems faster. */
867 SValue tmp = vtop[0];
868 vtop[0] = vtop[-i];
869 vtop[-i] = tmp;
870 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
871 size = type_size(&vtop->type, &align);
872 /* align to stack align size */
873 size = (size + 7) & ~7;
874 /* allocate the necessary size on stack */
875 o(0x48);
876 oad(0xec81, size); /* sub $xxx, %rsp */
877 /* generate structure store */
878 r = get_reg(RC_INT);
879 orex(1, r, 0, 0x89); /* mov %rsp, r */
880 o(0xe0 + REG_VALUE(r));
881 vset(&vtop->type, r | VT_LVAL, 0);
882 vswap();
883 vstore();
884 args_size += size;
885 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
886 gv(RC_ST0);
887 size = LDOUBLE_SIZE;
888 oad(0xec8148, size); /* sub $xxx, %rsp */
889 o(0x7cdb); /* fstpt 0(%rsp) */
890 g(0x24);
891 g(0x00);
892 args_size += size;
893 } else if (is_sse_float(vtop->type.t)) {
894 int j = --sse_reg;
895 if (j >= 8) {
896 gv(RC_FLOAT);
897 o(0x50); /* push $rax */
898 /* movq %xmm0, (%rsp) */
899 o(0x04d60f66);
900 o(0x24);
901 args_size += 8;
903 } else {
904 int j = --gen_reg;
905 /* simple type */
906 /* XXX: implicit cast ? */
907 if (j >= REGN) {
908 r = gv(RC_INT);
909 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
910 args_size += 8;
914 /* And swap the argument back to it's original position. */
915 tmp = vtop[0];
916 vtop[0] = vtop[-i];
917 vtop[-i] = tmp;
920 /* XXX This should be superfluous. */
921 save_regs(0); /* save used temporary registers */
923 /* then, we prepare register passing arguments.
924 Note that we cannot set RDX and RCX in this loop because gv()
925 may break these temporary registers. Let's use R10 and R11
926 instead of them */
927 gen_reg = nb_reg_args;
928 sse_reg = nb_sse_args;
929 for(i = 0; i < nb_args; i++) {
930 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
931 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
932 } else if (is_sse_float(vtop->type.t)) {
933 int j = --sse_reg;
934 if (j < 8) {
935 gv(RC_FLOAT); /* only one float register */
936 /* movaps %xmm0, %xmmN */
937 o(0x280f);
938 o(0xc0 + (sse_reg << 3));
940 } else {
941 int j = --gen_reg;
942 /* simple type */
943 /* XXX: implicit cast ? */
944 if (j < REGN) {
945 int d = arg_regs[j];
946 r = gv(RC_INT);
947 if (j == 2 || j == 3)
948 /* j=2: r10, j=3: r11 */
949 d = j + 8;
950 orex(1,d,r,0x89); /* mov */
951 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
954 vtop--;
957 /* We shouldn't have many operands on the stack anymore, but the
958 call address itself is still there, and it might be in %eax
959 (or edx/ecx) currently, which the below writes would clobber.
960 So evict all remaining operands here. */
961 save_regs(0);
963 /* Copy R10 and R11 into RDX and RCX, respectively */
964 if (nb_reg_args > 2) {
965 o(0xd2894c); /* mov %r10, %rdx */
966 if (nb_reg_args > 3) {
967 o(0xd9894c); /* mov %r11, %rcx */
971 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
972 gcall_or_jmp(0);
973 if (args_size)
974 gadd_sp(args_size);
975 vtop--;
979 #define FUNC_PROLOG_SIZE 11
981 static void push_arg_reg(int i) {
982 loc -= 8;
983 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
986 /* generate function prolog of type 't' */
987 void gfunc_prolog(CType *func_type)
989 int i, addr, align, size;
990 int param_index, param_addr, reg_param_index, sse_param_index;
991 Sym *sym;
992 CType *type;
994 sym = func_type->ref;
995 addr = PTR_SIZE * 2;
996 loc = 0;
997 ind += FUNC_PROLOG_SIZE;
998 func_sub_sp_offset = ind;
999 func_ret_sub = 0;
1001 if (func_type->ref->c == FUNC_ELLIPSIS) {
1002 int seen_reg_num, seen_sse_num, seen_stack_size;
1003 seen_reg_num = seen_sse_num = 0;
1004 /* frame pointer and return address */
1005 seen_stack_size = PTR_SIZE * 2;
1006 /* count the number of seen parameters */
1007 sym = func_type->ref;
1008 while ((sym = sym->next) != NULL) {
1009 type = &sym->type;
1010 if (is_sse_float(type->t)) {
1011 if (seen_sse_num < 8) {
1012 seen_sse_num++;
1013 } else {
1014 seen_stack_size += 8;
1016 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
1017 size = type_size(type, &align);
1018 size = (size + 7) & ~7;
1019 seen_stack_size += size;
1020 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
1021 seen_stack_size += LDOUBLE_SIZE;
1022 } else {
1023 if (seen_reg_num < REGN) {
1024 seen_reg_num++;
1025 } else {
1026 seen_stack_size += 8;
1031 loc -= 16;
1032 /* movl $0x????????, -0x10(%rbp) */
1033 o(0xf045c7);
1034 gen_le32(seen_reg_num * 8);
1035 /* movl $0x????????, -0xc(%rbp) */
1036 o(0xf445c7);
1037 gen_le32(seen_sse_num * 16 + 48);
1038 /* movl $0x????????, -0x8(%rbp) */
1039 o(0xf845c7);
1040 gen_le32(seen_stack_size);
1042 /* save all register passing arguments */
1043 for (i = 0; i < 8; i++) {
1044 loc -= 16;
1045 o(0xd60f66); /* movq */
1046 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1047 /* movq $0, loc+8(%rbp) */
1048 o(0x85c748);
1049 gen_le32(loc + 8);
1050 gen_le32(0);
1052 for (i = 0; i < REGN; i++) {
1053 push_arg_reg(REGN-1-i);
1057 sym = func_type->ref;
1058 param_index = 0;
1059 reg_param_index = 0;
1060 sse_param_index = 0;
1062 /* if the function returns a structure, then add an
1063 implicit pointer parameter */
1064 func_vt = sym->type;
1065 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
1066 push_arg_reg(reg_param_index);
1067 param_addr = loc;
1069 func_vc = loc;
1070 param_index++;
1071 reg_param_index++;
1073 /* define parameters */
1074 while ((sym = sym->next) != NULL) {
1075 type = &sym->type;
1076 size = type_size(type, &align);
1077 size = (size + 7) & ~7;
1078 if (is_sse_float(type->t)) {
1079 if (sse_param_index < 8) {
1080 /* save arguments passed by register */
1081 loc -= 8;
1082 o(0xd60f66); /* movq */
1083 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
1084 param_addr = loc;
1085 } else {
1086 param_addr = addr;
1087 addr += size;
1089 sse_param_index++;
1091 } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
1092 (type->t & VT_BTYPE) == VT_LDOUBLE) {
1093 param_addr = addr;
1094 addr += size;
1095 } else {
1096 if (reg_param_index < REGN) {
1097 /* save arguments passed by register */
1098 push_arg_reg(reg_param_index);
1099 param_addr = loc;
1100 } else {
1101 param_addr = addr;
1102 addr += 8;
1104 reg_param_index++;
1106 sym_push(sym->v & ~SYM_FIELD, type,
1107 VT_LOCAL | VT_LVAL, param_addr);
1108 param_index++;
1112 /* generate function epilog */
1113 void gfunc_epilog(void)
1115 int v, saved_ind;
1117 o(0xc9); /* leave */
1118 if (func_ret_sub == 0) {
1119 o(0xc3); /* ret */
1120 } else {
1121 o(0xc2); /* ret n */
1122 g(func_ret_sub);
1123 g(func_ret_sub >> 8);
1125 /* align local size to word & save local variables */
1126 v = (-loc + 15) & -16;
1127 saved_ind = ind;
1128 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1129 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1130 o(0xec8148); /* sub rsp, stacksize */
1131 gen_le32(v);
1132 ind = saved_ind;
1135 #endif /* not PE */
1137 /* generate a jump to a label */
1138 int gjmp(int t)
1140 return psym(0xe9, t);
1143 /* generate a jump to a fixed address */
1144 void gjmp_addr(int a)
1146 int r;
1147 r = a - ind - 2;
1148 if (r == (char)r) {
1149 g(0xeb);
1150 g(r);
1151 } else {
1152 oad(0xe9, a - ind - 5);
1156 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1157 int gtst(int inv, int t)
1159 int v, *p;
1161 v = vtop->r & VT_VALMASK;
1162 if (v == VT_CMP) {
1163 /* fast case : can jump directly since flags are set */
1164 g(0x0f);
1165 t = psym((vtop->c.i - 16) ^ inv, t);
1166 } else if (v == VT_JMP || v == VT_JMPI) {
1167 /* && or || optimization */
1168 if ((v & 1) == inv) {
1169 /* insert vtop->c jump list in t */
1170 p = &vtop->c.i;
1171 while (*p != 0)
1172 p = (int *)(cur_text_section->data + *p);
1173 *p = t;
1174 t = vtop->c.i;
1175 } else {
1176 t = gjmp(t);
1177 gsym(vtop->c.i);
1179 } else {
1180 if (is_float(vtop->type.t) ||
1181 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1182 vpushi(0);
1183 gen_op(TOK_NE);
1185 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1186 /* constant jmp optimization */
1187 if ((vtop->c.i != 0) != inv)
1188 t = gjmp(t);
1189 } else {
1190 v = gv(RC_INT);
1191 orex(0,v,v,0x85);
1192 o(0xc0 + REG_VALUE(v) * 9);
1193 g(0x0f);
1194 t = psym(0x85 ^ inv, t);
1197 vtop--;
1198 return t;
1201 /* generate an integer binary operation */
1202 void gen_opi(int op)
1204 int r, fr, opc, c;
1205 int ll, uu, cc;
1207 ll = is64_type(vtop[-1].type.t);
1208 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1209 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1211 switch(op) {
1212 case '+':
1213 case TOK_ADDC1: /* add with carry generation */
1214 opc = 0;
1215 gen_op8:
1216 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1217 /* constant case */
1218 vswap();
1219 r = gv(RC_INT);
1220 vswap();
1221 c = vtop->c.i;
1222 if (c == (char)c) {
1223 /* XXX: generate inc and dec for smaller code ? */
1224 orex(ll, r, 0, 0x83);
1225 o(0xc0 | (opc << 3) | REG_VALUE(r));
1226 g(c);
1227 } else {
1228 orex(ll, r, 0, 0x81);
1229 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1231 } else {
1232 gv2(RC_INT, RC_INT);
1233 r = vtop[-1].r;
1234 fr = vtop[0].r;
1235 orex(ll, r, fr, (opc << 3) | 0x01);
1236 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1238 vtop--;
1239 if (op >= TOK_ULT && op <= TOK_GT) {
1240 vtop->r = VT_CMP;
1241 vtop->c.i = op;
1243 break;
1244 case '-':
1245 case TOK_SUBC1: /* sub with carry generation */
1246 opc = 5;
1247 goto gen_op8;
1248 case TOK_ADDC2: /* add with carry use */
1249 opc = 2;
1250 goto gen_op8;
1251 case TOK_SUBC2: /* sub with carry use */
1252 opc = 3;
1253 goto gen_op8;
1254 case '&':
1255 opc = 4;
1256 goto gen_op8;
1257 case '^':
1258 opc = 6;
1259 goto gen_op8;
1260 case '|':
1261 opc = 1;
1262 goto gen_op8;
1263 case '*':
1264 gv2(RC_INT, RC_INT);
1265 r = vtop[-1].r;
1266 fr = vtop[0].r;
1267 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1268 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1269 vtop--;
1270 break;
1271 case TOK_SHL:
1272 opc = 4;
1273 goto gen_shift;
1274 case TOK_SHR:
1275 opc = 5;
1276 goto gen_shift;
1277 case TOK_SAR:
1278 opc = 7;
1279 gen_shift:
1280 opc = 0xc0 | (opc << 3);
1281 if (cc) {
1282 /* constant case */
1283 vswap();
1284 r = gv(RC_INT);
1285 vswap();
1286 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1287 o(opc | REG_VALUE(r));
1288 g(vtop->c.i & (ll ? 63 : 31));
1289 } else {
1290 /* we generate the shift in ecx */
1291 gv2(RC_INT, RC_RCX);
1292 r = vtop[-1].r;
1293 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1294 o(opc | REG_VALUE(r));
1296 vtop--;
1297 break;
1298 case TOK_UDIV:
1299 case TOK_UMOD:
1300 uu = 1;
1301 goto divmod;
1302 case '/':
1303 case '%':
1304 case TOK_PDIV:
1305 uu = 0;
1306 divmod:
1307 /* first operand must be in eax */
1308 /* XXX: need better constraint for second operand */
1309 gv2(RC_RAX, RC_RCX);
1310 r = vtop[-1].r;
1311 fr = vtop[0].r;
1312 vtop--;
1313 save_reg(TREG_RDX);
1314 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1315 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1316 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1317 if (op == '%' || op == TOK_UMOD)
1318 r = TREG_RDX;
1319 else
1320 r = TREG_RAX;
1321 vtop->r = r;
1322 break;
1323 default:
1324 opc = 7;
1325 goto gen_op8;
1329 void gen_opl(int op)
1331 gen_opi(op);
1334 /* generate a floating point operation 'v = t1 op t2' instruction. The
1335 two operands are guaranted to have the same floating point type */
1336 /* XXX: need to use ST1 too */
1337 void gen_opf(int op)
1339 int a, ft, fc, swapped, r;
1340 int float_type =
1341 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1343 /* convert constants to memory references */
1344 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1345 vswap();
1346 gv(float_type);
1347 vswap();
1349 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1350 gv(float_type);
1352 /* must put at least one value in the floating point register */
1353 if ((vtop[-1].r & VT_LVAL) &&
1354 (vtop[0].r & VT_LVAL)) {
1355 vswap();
1356 gv(float_type);
1357 vswap();
1359 swapped = 0;
1360 /* swap the stack if needed so that t1 is the register and t2 is
1361 the memory reference */
1362 if (vtop[-1].r & VT_LVAL) {
1363 vswap();
1364 swapped = 1;
1366 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1367 if (op >= TOK_ULT && op <= TOK_GT) {
1368 /* load on stack second operand */
1369 load(TREG_ST0, vtop);
1370 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1371 if (op == TOK_GE || op == TOK_GT)
1372 swapped = !swapped;
1373 else if (op == TOK_EQ || op == TOK_NE)
1374 swapped = 0;
1375 if (swapped)
1376 o(0xc9d9); /* fxch %st(1) */
1377 o(0xe9da); /* fucompp */
1378 o(0xe0df); /* fnstsw %ax */
1379 if (op == TOK_EQ) {
1380 o(0x45e480); /* and $0x45, %ah */
1381 o(0x40fC80); /* cmp $0x40, %ah */
1382 } else if (op == TOK_NE) {
1383 o(0x45e480); /* and $0x45, %ah */
1384 o(0x40f480); /* xor $0x40, %ah */
1385 op = TOK_NE;
1386 } else if (op == TOK_GE || op == TOK_LE) {
1387 o(0x05c4f6); /* test $0x05, %ah */
1388 op = TOK_EQ;
1389 } else {
1390 o(0x45c4f6); /* test $0x45, %ah */
1391 op = TOK_EQ;
1393 vtop--;
1394 vtop->r = VT_CMP;
1395 vtop->c.i = op;
1396 } else {
1397 /* no memory reference possible for long double operations */
1398 load(TREG_ST0, vtop);
1399 swapped = !swapped;
1401 switch(op) {
1402 default:
1403 case '+':
1404 a = 0;
1405 break;
1406 case '-':
1407 a = 4;
1408 if (swapped)
1409 a++;
1410 break;
1411 case '*':
1412 a = 1;
1413 break;
1414 case '/':
1415 a = 6;
1416 if (swapped)
1417 a++;
1418 break;
1420 ft = vtop->type.t;
1421 fc = vtop->c.ul;
1422 o(0xde); /* fxxxp %st, %st(1) */
1423 o(0xc1 + (a << 3));
1424 vtop--;
1426 } else {
1427 if (op >= TOK_ULT && op <= TOK_GT) {
1428 /* if saved lvalue, then we must reload it */
1429 r = vtop->r;
1430 fc = vtop->c.ul;
1431 if ((r & VT_VALMASK) == VT_LLOCAL) {
1432 SValue v1;
1433 r = get_reg(RC_INT);
1434 v1.type.t = VT_INT;
1435 v1.r = VT_LOCAL | VT_LVAL;
1436 v1.c.ul = fc;
1437 load(r, &v1);
1438 fc = 0;
1441 if (op == TOK_EQ || op == TOK_NE) {
1442 swapped = 0;
1443 } else {
1444 if (op == TOK_LE || op == TOK_LT)
1445 swapped = !swapped;
1446 if (op == TOK_LE || op == TOK_GE) {
1447 op = 0x93; /* setae */
1448 } else {
1449 op = 0x97; /* seta */
1453 if (swapped) {
1454 o(0x7e0ff3); /* movq */
1455 gen_modrm(1, r, vtop->sym, fc);
1457 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1458 o(0x66);
1460 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1461 o(0xc8);
1462 } else {
1463 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1464 o(0x66);
1466 o(0x2e0f); /* ucomisd */
1467 gen_modrm(0, r, vtop->sym, fc);
1470 vtop--;
1471 vtop->r = VT_CMP;
1472 vtop->c.i = op;
1473 } else {
1474 /* no memory reference possible for long double operations */
1475 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1476 load(TREG_XMM0, vtop);
1477 swapped = !swapped;
1479 switch(op) {
1480 default:
1481 case '+':
1482 a = 0;
1483 break;
1484 case '-':
1485 a = 4;
1486 break;
1487 case '*':
1488 a = 1;
1489 break;
1490 case '/':
1491 a = 6;
1492 break;
1494 ft = vtop->type.t;
1495 fc = vtop->c.ul;
1496 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1497 o(0xde); /* fxxxp %st, %st(1) */
1498 o(0xc1 + (a << 3));
1499 } else {
1500 /* if saved lvalue, then we must reload it */
1501 r = vtop->r;
1502 if ((r & VT_VALMASK) == VT_LLOCAL) {
1503 SValue v1;
1504 r = get_reg(RC_INT);
1505 v1.type.t = VT_INT;
1506 v1.r = VT_LOCAL | VT_LVAL;
1507 v1.c.ul = fc;
1508 load(r, &v1);
1509 fc = 0;
1511 if (swapped) {
1512 /* movq %xmm0,%xmm1 */
1513 o(0x7e0ff3);
1514 o(0xc8);
1515 load(TREG_XMM0, vtop);
1516 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1517 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1518 o(0xf2);
1519 } else {
1520 o(0xf3);
1522 o(0x0f);
1523 o(0x58 + a);
1524 o(0xc1);
1525 } else {
1526 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1527 o(0xf2);
1528 } else {
1529 o(0xf3);
1531 o(0x0f);
1532 o(0x58 + a);
1533 gen_modrm(0, r, vtop->sym, fc);
1536 vtop--;
1541 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1542 and 'long long' cases. */
1543 void gen_cvt_itof(int t)
1545 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1546 save_reg(TREG_ST0);
1547 gv(RC_INT);
1548 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1549 /* signed long long to float/double/long double (unsigned case
1550 is handled generically) */
1551 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1552 o(0x242cdf); /* fildll (%rsp) */
1553 o(0x08c48348); /* add $8, %rsp */
1554 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1555 (VT_INT | VT_UNSIGNED)) {
1556 /* unsigned int to float/double/long double */
1557 o(0x6a); /* push $0 */
1558 g(0x00);
1559 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1560 o(0x242cdf); /* fildll (%rsp) */
1561 o(0x10c48348); /* add $16, %rsp */
1562 } else {
1563 /* int to float/double/long double */
1564 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1565 o(0x2404db); /* fildl (%rsp) */
1566 o(0x08c48348); /* add $8, %rsp */
1568 vtop->r = TREG_ST0;
1569 } else {
1570 save_reg(TREG_XMM0);
1571 gv(RC_INT);
1572 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1573 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1574 (VT_INT | VT_UNSIGNED) ||
1575 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1576 o(0x48); /* REX */
1578 o(0x2a0f);
1579 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1580 vtop->r = TREG_XMM0;
1584 /* convert from one floating point type to another */
1585 void gen_cvt_ftof(int t)
1587 int ft, bt, tbt;
1589 ft = vtop->type.t;
1590 bt = ft & VT_BTYPE;
1591 tbt = t & VT_BTYPE;
1593 if (bt == VT_FLOAT) {
1594 gv(RC_FLOAT);
1595 if (tbt == VT_DOUBLE) {
1596 o(0xc0140f); /* unpcklps */
1597 o(0xc05a0f); /* cvtps2pd */
1598 } else if (tbt == VT_LDOUBLE) {
1599 /* movss %xmm0,-0x10(%rsp) */
1600 o(0x44110ff3);
1601 o(0xf024);
1602 o(0xf02444d9); /* flds -0x10(%rsp) */
1603 vtop->r = TREG_ST0;
1605 } else if (bt == VT_DOUBLE) {
1606 gv(RC_FLOAT);
1607 if (tbt == VT_FLOAT) {
1608 o(0xc0140f66); /* unpcklpd */
1609 o(0xc05a0f66); /* cvtpd2ps */
1610 } else if (tbt == VT_LDOUBLE) {
1611 /* movsd %xmm0,-0x10(%rsp) */
1612 o(0x44110ff2);
1613 o(0xf024);
1614 o(0xf02444dd); /* fldl -0x10(%rsp) */
1615 vtop->r = TREG_ST0;
1617 } else {
1618 gv(RC_ST0);
1619 if (tbt == VT_DOUBLE) {
1620 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1621 /* movsd -0x10(%rsp),%xmm0 */
1622 o(0x44100ff2);
1623 o(0xf024);
1624 vtop->r = TREG_XMM0;
1625 } else if (tbt == VT_FLOAT) {
1626 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1627 /* movss -0x10(%rsp),%xmm0 */
1628 o(0x44100ff3);
1629 o(0xf024);
1630 vtop->r = TREG_XMM0;
1635 /* convert fp to int 't' type */
1636 void gen_cvt_ftoi(int t)
1638 int ft, bt, size, r;
1639 ft = vtop->type.t;
1640 bt = ft & VT_BTYPE;
1641 if (bt == VT_LDOUBLE) {
1642 gen_cvt_ftof(VT_DOUBLE);
1643 bt = VT_DOUBLE;
1646 gv(RC_FLOAT);
1647 if (t != VT_INT)
1648 size = 8;
1649 else
1650 size = 4;
1652 r = get_reg(RC_INT);
1653 if (bt == VT_FLOAT) {
1654 o(0xf3);
1655 } else if (bt == VT_DOUBLE) {
1656 o(0xf2);
1657 } else {
1658 assert(0);
1660 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1661 o(0xc0 + (REG_VALUE(r) << 3));
1662 vtop->r = r;
1665 /* computed goto support */
1666 void ggoto(void)
1668 gcall_or_jmp(1);
1669 vtop--;
1672 /* end of x86-64 code generator */
1673 /*************************************************************/
1674 #endif /* ! TARGET_DEFS_ONLY */
1675 /******************************************************/