Sorted out CMake on x86-64 and fixed silly XMM# bug introduced when working on Win64...
[tinycc.git] / x86_64-gen.c
blob3e892e31d0509bd4717feb6954ff40a4b3718291
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 24
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSI = 6,
61 TREG_RDI = 7,
63 TREG_R8 = 8,
64 TREG_R9 = 9,
65 TREG_R10 = 10,
66 TREG_R11 = 11,
68 TREG_XMM0 = 16,
69 TREG_XMM1 = 17,
70 TREG_XMM2 = 18,
71 TREG_XMM3 = 19,
72 TREG_XMM4 = 20,
73 TREG_XMM5 = 21,
74 TREG_XMM6 = 22,
75 TREG_XMM7 = 23,
77 TREG_ST0 = 4, // SP slot won't be used
79 TREG_MEM = 0x20,
82 #define REX_BASE(reg) (((reg) >> 3) & 1)
83 #define REG_VALUE(reg) ((reg) & 7)
85 /* return registers for function */
86 #define REG_IRET TREG_RAX /* single word int return register */
87 #define REG_LRET TREG_RDX /* second word return register (for long long) */
88 #define REG_FRET TREG_XMM0 /* float return register */
89 #define REG_QRET TREG_XMM1 /* second float return register */
91 /* defined if function parameters must be evaluated in reverse order */
92 #define INVERT_FUNC_PARAMS
94 /* pointer size, in bytes */
95 #define PTR_SIZE 8
97 /* long double size and alignment, in bytes */
98 #define LDOUBLE_SIZE 16
99 #define LDOUBLE_ALIGN 8
100 /* maximum alignment (for aligned attribute support) */
101 #define MAX_ALIGN 8
103 /******************************************************/
104 /* ELF defines */
106 #define EM_TCC_TARGET EM_X86_64
108 /* relocation type for 32 bit data relocation */
109 #define R_DATA_32 R_X86_64_32
110 #define R_DATA_PTR R_X86_64_64
111 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
112 #define R_COPY R_X86_64_COPY
114 #define ELF_START_ADDR 0x08048000
115 #define ELF_PAGE_SIZE 0x1000
117 /******************************************************/
118 #else /* ! TARGET_DEFS_ONLY */
119 /******************************************************/
120 #include "tcc.h"
121 #include <assert.h>
123 ST_DATA const int reg_classes[NB_REGS] = {
124 /* eax */ RC_INT | RC_RAX,
125 /* ecx */ RC_INT | RC_RCX,
126 /* edx */ RC_INT | RC_RDX,
128 /* st0 */ RC_ST0,
132 RC_R8,
133 RC_R9,
134 RC_R10,
135 RC_R11,
140 /* xmm0 */ RC_FLOAT | RC_XMM0,
141 /* xmm1 */ RC_FLOAT | RC_XMM1,
142 /* xmm2 */ RC_FLOAT | RC_XMM2,
143 /* xmm3 */ RC_FLOAT | RC_XMM3,
144 /* xmm4 */ RC_FLOAT | RC_XMM4,
145 /* xmm5 */ RC_FLOAT | RC_XMM5,
146 /* xmm6 an xmm7 are included so gv() can be used on them,
147 but they are not tagged with RC_FLOAT because they are
148 callee saved on Windows */
149 RC_XMM6,
150 RC_XMM7
153 static unsigned long func_sub_sp_offset;
154 static int func_ret_sub;
156 /* XXX: make it faster ? */
157 void g(int c)
159 int ind1;
160 ind1 = ind + 1;
161 if (ind1 > cur_text_section->data_allocated)
162 section_realloc(cur_text_section, ind1);
163 cur_text_section->data[ind] = c;
164 ind = ind1;
167 void o(unsigned int c)
169 while (c) {
170 g(c);
171 c = c >> 8;
175 void gen_le16(int v)
177 g(v);
178 g(v >> 8);
181 void gen_le32(int c)
183 g(c);
184 g(c >> 8);
185 g(c >> 16);
186 g(c >> 24);
189 void gen_le64(int64_t c)
191 g(c);
192 g(c >> 8);
193 g(c >> 16);
194 g(c >> 24);
195 g(c >> 32);
196 g(c >> 40);
197 g(c >> 48);
198 g(c >> 56);
201 void orex(int ll, int r, int r2, int b)
203 if ((r & VT_VALMASK) >= VT_CONST)
204 r = 0;
205 if ((r2 & VT_VALMASK) >= VT_CONST)
206 r2 = 0;
207 if (ll || REX_BASE(r) || REX_BASE(r2))
208 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
209 o(b);
212 /* output a symbol and patch all calls to it */
213 void gsym_addr(int t, int a)
215 int n, *ptr;
216 while (t) {
217 ptr = (int *)(cur_text_section->data + t);
218 n = *ptr; /* next value */
219 *ptr = a - t - 4;
220 t = n;
224 void gsym(int t)
226 gsym_addr(t, ind);
229 /* psym is used to put an instruction with a data field which is a
230 reference to a symbol. It is in fact the same as oad ! */
231 #define psym oad
233 static int is64_type(int t)
235 return ((t & VT_BTYPE) == VT_PTR ||
236 (t & VT_BTYPE) == VT_FUNC ||
237 (t & VT_BTYPE) == VT_LLONG);
240 static int is_sse_float(int t) {
241 int bt;
242 bt = t & VT_BTYPE;
243 return bt == VT_DOUBLE || bt == VT_FLOAT;
247 /* instruction + 4 bytes data. Return the address of the data */
248 ST_FUNC int oad(int c, int s)
250 int ind1;
252 o(c);
253 ind1 = ind + 4;
254 if (ind1 > cur_text_section->data_allocated)
255 section_realloc(cur_text_section, ind1);
256 *(int *)(cur_text_section->data + ind) = s;
257 s = ind;
258 ind = ind1;
259 return s;
262 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
264 if (r & VT_SYM)
265 greloc(cur_text_section, sym, ind, R_X86_64_32);
266 gen_le32(c);
269 /* output constant with relocation if 'r & VT_SYM' is true */
270 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
272 if (r & VT_SYM)
273 greloc(cur_text_section, sym, ind, R_X86_64_64);
274 gen_le64(c);
277 /* output constant with relocation if 'r & VT_SYM' is true */
278 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
280 if (r & VT_SYM)
281 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
282 gen_le32(c-4);
285 /* output got address with relocation */
286 static void gen_gotpcrel(int r, Sym *sym, int c)
288 #ifndef TCC_TARGET_PE
289 Section *sr;
290 ElfW(Rela) *rel;
291 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
292 sr = cur_text_section->reloc;
293 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
294 rel->r_addend = -4;
295 #else
296 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
297 cur_text_section->data[ind-3],
298 cur_text_section->data[ind-2],
299 cur_text_section->data[ind-1]
301 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
302 #endif
303 gen_le32(0);
304 if (c) {
305 /* we use add c, %xxx for displacement */
306 orex(1, r, 0, 0x81);
307 o(0xc0 + REG_VALUE(r));
308 gen_le32(c);
312 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
314 op_reg = REG_VALUE(op_reg) << 3;
315 if ((r & VT_VALMASK) == VT_CONST) {
316 /* constant memory reference */
317 o(0x05 | op_reg);
318 if (is_got) {
319 gen_gotpcrel(r, sym, c);
320 } else {
321 gen_addrpc32(r, sym, c);
323 } else if ((r & VT_VALMASK) == VT_LOCAL) {
324 /* currently, we use only ebp as base */
325 if (c == (char)c) {
326 /* short reference */
327 o(0x45 | op_reg);
328 g(c);
329 } else {
330 oad(0x85 | op_reg, c);
332 } else if ((r & VT_VALMASK) >= TREG_MEM) {
333 if (c) {
334 g(0x80 | op_reg | REG_VALUE(r));
335 gen_le32(c);
336 } else {
337 g(0x00 | op_reg | REG_VALUE(r));
339 } else {
340 g(0x00 | op_reg | REG_VALUE(r));
344 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
345 opcode bits */
346 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
348 gen_modrm_impl(op_reg, r, sym, c, 0);
351 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
352 opcode bits */
353 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
355 int is_got;
356 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
357 orex(1, r, op_reg, opcode);
358 gen_modrm_impl(op_reg, r, sym, c, is_got);
362 /* load 'r' from value 'sv' */
363 void load(int r, SValue *sv)
365 int v, t, ft, fc, fr;
366 SValue v1;
368 #ifdef TCC_TARGET_PE
369 SValue v2;
370 sv = pe_getimport(sv, &v2);
371 #endif
373 fr = sv->r;
374 ft = sv->type.t;
375 fc = sv->c.ul;
377 #ifndef TCC_TARGET_PE
378 /* we use indirect access via got */
379 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
380 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
381 /* use the result register as a temporal register */
382 int tr = r | TREG_MEM;
383 if (is_float(ft)) {
384 /* we cannot use float registers as a temporal register */
385 tr = get_reg(RC_INT) | TREG_MEM;
387 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
389 /* load from the temporal register */
390 fr = tr | VT_LVAL;
392 #endif
394 v = fr & VT_VALMASK;
395 if (fr & VT_LVAL) {
396 int b, ll;
397 if (v == VT_LLOCAL) {
398 v1.type.t = VT_PTR;
399 v1.r = VT_LOCAL | VT_LVAL;
400 v1.c.ul = fc;
401 fr = r;
402 if (!(reg_classes[fr] & RC_INT))
403 fr = get_reg(RC_INT);
404 load(fr, &v1);
406 ll = 0;
407 if ((ft & VT_BTYPE) == VT_FLOAT) {
408 b = 0x6e0f66;
409 r = REG_VALUE(r); /* movd */
410 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
411 b = 0x7e0ff3; /* movq */
412 r = REG_VALUE(r);
413 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
414 b = 0xdb, r = 5; /* fldt */
415 } else if ((ft & VT_TYPE) == VT_BYTE) {
416 b = 0xbe0f; /* movsbl */
417 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
418 b = 0xb60f; /* movzbl */
419 } else if ((ft & VT_TYPE) == VT_SHORT) {
420 b = 0xbf0f; /* movswl */
421 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
422 b = 0xb70f; /* movzwl */
423 } else {
424 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
425 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
426 || ((ft & VT_BTYPE) == VT_FUNC));
427 ll = is64_type(ft);
428 b = 0x8b;
430 if (ll) {
431 gen_modrm64(b, r, fr, sv->sym, fc);
432 } else {
433 orex(ll, fr, r, b);
434 gen_modrm(r, fr, sv->sym, fc);
436 } else {
437 if (v == VT_CONST) {
438 if (fr & VT_SYM) {
439 #ifdef TCC_TARGET_PE
440 orex(1,0,r,0x8d);
441 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
442 gen_addrpc32(fr, sv->sym, fc);
443 #else
444 if (sv->sym->type.t & VT_STATIC) {
445 orex(1,0,r,0x8d);
446 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
447 gen_addrpc32(fr, sv->sym, fc);
448 } else {
449 orex(1,0,r,0x8b);
450 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
451 gen_gotpcrel(r, sv->sym, fc);
453 #endif
454 } else if (is64_type(ft)) {
455 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
456 gen_le64(sv->c.ull);
457 } else {
458 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
459 gen_le32(fc);
461 } else if (v == VT_LOCAL) {
462 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
463 gen_modrm(r, VT_LOCAL, sv->sym, fc);
464 } else if (v == VT_CMP) {
465 orex(0,r,0,0);
466 if ((fc & ~0x100) != TOK_NE)
467 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
468 else
469 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
470 if (fc & 0x100)
472 /* This was a float compare. If the parity bit is
473 set the result was unordered, meaning false for everything
474 except TOK_NE, and true for TOK_NE. */
475 fc &= ~0x100;
476 o(0x037a + (REX_BASE(r) << 8));
478 orex(0,r,0, 0x0f); /* setxx %br */
479 o(fc);
480 o(0xc0 + REG_VALUE(r));
481 } else if (v == VT_JMP || v == VT_JMPI) {
482 t = v & 1;
483 orex(0,r,0,0);
484 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
485 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
486 gsym(fc);
487 orex(0,r,0,0);
488 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
489 } else if (v != r) {
490 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
491 if (v == TREG_ST0) {
492 /* gen_cvt_ftof(VT_DOUBLE); */
493 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
494 /* movsd -0x10(%rsp),%xmmN */
495 o(0x100ff2);
496 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
497 o(0xf024);
498 } else {
499 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
500 if ((ft & VT_BTYPE) == VT_FLOAT) {
501 o(0x100ff3);
502 } else {
503 assert((ft & VT_BTYPE) == VT_DOUBLE);
504 o(0x100ff2);
506 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
508 } else if (r == TREG_ST0) {
509 assert((v >= TREG_XMM0) || (v <= TREG_XMM7));
510 /* gen_cvt_ftof(VT_LDOUBLE); */
511 /* movsd %xmmN,-0x10(%rsp) */
512 o(0x110ff2);
513 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
514 o(0xf024);
515 o(0xf02444dd); /* fldl -0x10(%rsp) */
516 } else {
517 orex(1,r,v, 0x89);
518 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
524 /* store register 'r' in lvalue 'v' */
525 void store(int r, SValue *v)
527 int fr, bt, ft, fc;
528 int op64 = 0;
529 /* store the REX prefix in this variable when PIC is enabled */
530 int pic = 0;
532 #ifdef TCC_TARGET_PE
533 SValue v2;
534 v = pe_getimport(v, &v2);
535 #endif
537 ft = v->type.t;
538 fc = v->c.ul;
539 fr = v->r & VT_VALMASK;
540 bt = ft & VT_BTYPE;
542 #ifndef TCC_TARGET_PE
543 /* we need to access the variable via got */
544 if (fr == VT_CONST && (v->r & VT_SYM)) {
545 /* mov xx(%rip), %r11 */
546 o(0x1d8b4c);
547 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
548 pic = is64_type(bt) ? 0x49 : 0x41;
550 #endif
552 /* XXX: incorrect if float reg to reg */
553 if (bt == VT_FLOAT) {
554 o(0x66);
555 o(pic);
556 o(0x7e0f); /* movd */
557 r = REG_VALUE(r);
558 } else if (bt == VT_DOUBLE) {
559 o(0x66);
560 o(pic);
561 o(0xd60f); /* movq */
562 r = REG_VALUE(r);
563 } else if (bt == VT_LDOUBLE) {
564 o(0xc0d9); /* fld %st(0) */
565 o(pic);
566 o(0xdb); /* fstpt */
567 r = 7;
568 } else {
569 if (bt == VT_SHORT)
570 o(0x66);
571 o(pic);
572 if (bt == VT_BYTE || bt == VT_BOOL)
573 orex(0, 0, r, 0x88);
574 else if (is64_type(bt))
575 op64 = 0x89;
576 else
577 orex(0, 0, r, 0x89);
579 if (pic) {
580 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
581 if (op64)
582 o(op64);
583 o(3 + (r << 3));
584 } else if (op64) {
585 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
586 gen_modrm64(op64, r, v->r, v->sym, fc);
587 } else if (fr != r) {
588 /* XXX: don't we really come here? */
589 abort();
590 o(0xc0 + fr + r * 8); /* mov r, fr */
592 } else {
593 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
594 gen_modrm(r, v->r, v->sym, fc);
595 } else if (fr != r) {
596 /* XXX: don't we really come here? */
597 abort();
598 o(0xc0 + fr + r * 8); /* mov r, fr */
603 /* 'is_jmp' is '1' if it is a jump */
604 static void gcall_or_jmp(int is_jmp)
606 int r;
607 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
608 /* constant case */
609 if (vtop->r & VT_SYM) {
610 /* relocation case */
611 greloc(cur_text_section, vtop->sym,
612 ind + 1, R_X86_64_PC32);
613 } else {
614 /* put an empty PC32 relocation */
615 put_elf_reloc(symtab_section, cur_text_section,
616 ind + 1, R_X86_64_PC32, 0);
618 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
619 } else {
620 /* otherwise, indirect call */
621 r = TREG_R11;
622 load(r, vtop);
623 o(0x41); /* REX */
624 o(0xff); /* call/jmp *r */
625 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
629 #ifdef TCC_TARGET_PE
631 #define REGN 4
632 static const uint8_t arg_regs[REGN] = {
633 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
636 /* Prepare arguments in R10 and R11 rather than RCX and RDX
637 because gv() will not ever use these */
638 static int arg_prepare_reg(int idx) {
639 if (idx == 0 || idx == 1)
640 /* idx=0: r10, idx=1: r11 */
641 return idx + 10;
642 else
643 return arg_regs[idx];
646 static int func_scratch;
648 /* Generate function call. The function address is pushed first, then
649 all the parameters in call order. This functions pops all the
650 parameters and the function address. */
652 void gen_offs_sp(int b, int r, int d)
654 orex(1,0,r & 0x100 ? 0 : r, b);
655 if (d == (char)d) {
656 o(0x2444 | (REG_VALUE(r) << 3));
657 g(d);
658 } else {
659 o(0x2484 | (REG_VALUE(r) << 3));
660 gen_le32(d);
664 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
665 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
666 *ret_align = 1; // Never have to re-align return values for x86-64
667 int size, align;
668 size = type_size(vt, &align);
669 ret->ref = NULL;
670 if (size > 8) {
671 return 1;
672 } else if (size > 4) {
673 ret->t = VT_LLONG;
674 return 0;
675 } else if (size > 2) {
676 ret->t = VT_INT;
677 return 0;
678 } else if (size > 1) {
679 ret->t = VT_SHORT;
680 return 0;
681 } else {
682 ret->t = VT_BYTE;
683 return 0;
687 int gfunc_arg_size(CType *type) {
688 if (type->t & (VT_ARRAY|VT_BITFIELD))
689 return 8;
690 int align;
691 return type_size(type, &align);
694 void gfunc_call(int nb_args)
696 int size, r, args_size, i, d, bt, struct_size;
697 int arg;
699 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
700 arg = nb_args;
702 /* for struct arguments, we need to call memcpy and the function
703 call breaks register passing arguments we are preparing.
704 So, we process arguments which will be passed by stack first. */
705 struct_size = args_size;
706 for(i = 0; i < nb_args; i++) {
707 --arg;
709 SValue *sv = &vtop[-i];
710 bt = (sv->type.t & VT_BTYPE);
711 size = gfunc_arg_size(&sv->type);
713 if (size <= 8)
714 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
716 if (bt == VT_STRUCT) {
717 /* align to stack align size */
718 size = (size + 15) & ~15;
719 /* generate structure store */
720 r = get_reg(RC_INT);
721 gen_offs_sp(0x8d, r, struct_size);
722 struct_size += size;
724 /* generate memcpy call */
725 vset(&sv->type, r | VT_LVAL, 0);
726 vpushv(sv);
727 vstore();
728 --vtop;
729 } else if (bt == VT_LDOUBLE) {
730 gv(RC_ST0);
731 gen_offs_sp(0xdb, 0x107, struct_size);
732 struct_size += 16;
736 if (func_scratch < struct_size)
737 func_scratch = struct_size;
739 arg = nb_args;
740 struct_size = args_size;
742 for(i = 0; i < nb_args; i++) {
743 --arg;
744 bt = (vtop->type.t & VT_BTYPE);
746 size = gfunc_arg_size(&vtop->type);
747 if (size > 8) {
748 /* align to stack align size */
749 size = (size + 15) & ~15;
750 if (arg >= REGN) {
751 d = get_reg(RC_INT);
752 gen_offs_sp(0x8d, d, struct_size);
753 gen_offs_sp(0x89, d, arg*8);
754 } else {
755 d = arg_prepare_reg(arg);
756 gen_offs_sp(0x8d, d, struct_size);
758 struct_size += size;
759 } else {
760 if (is_sse_float(vtop->type.t)) {
761 gv(RC_XMM0); /* only use one float register */
762 if (arg >= REGN) {
763 /* movq %xmm0, j*8(%rsp) */
764 gen_offs_sp(0xd60f66, 0x100, arg*8);
765 } else {
766 /* movaps %xmm0, %xmmN */
767 o(0x280f);
768 o(0xc0 + (arg << 3));
769 d = arg_prepare_reg(arg);
770 /* mov %xmm0, %rxx */
771 o(0x66);
772 orex(1,d,0, 0x7e0f);
773 o(0xc0 + REG_VALUE(d));
775 } else {
776 if (bt == VT_STRUCT) {
777 vtop->type.ref = NULL;
778 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
779 : size > 1 ? VT_SHORT : VT_BYTE;
782 r = gv(RC_INT);
783 if (arg >= REGN) {
784 gen_offs_sp(0x89, r, arg*8);
785 } else {
786 d = arg_prepare_reg(arg);
787 orex(1,d,r,0x89); /* mov */
788 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
792 vtop--;
794 save_regs(0);
796 /* Copy R10 and R11 into RCX and RDX, respectively */
797 if (nb_args > 0) {
798 o(0xd1894c); /* mov %r10, %rcx */
799 if (nb_args > 1) {
800 o(0xda894c); /* mov %r11, %rdx */
804 gcall_or_jmp(0);
805 vtop--;
809 #define FUNC_PROLOG_SIZE 11
811 /* generate function prolog of type 't' */
812 void gfunc_prolog(CType *func_type)
814 int addr, reg_param_index, bt, size;
815 Sym *sym;
816 CType *type;
818 func_ret_sub = 0;
819 func_scratch = 0;
820 loc = 0;
822 addr = PTR_SIZE * 2;
823 ind += FUNC_PROLOG_SIZE;
824 func_sub_sp_offset = ind;
825 reg_param_index = 0;
827 sym = func_type->ref;
829 /* if the function returns a structure, then add an
830 implicit pointer parameter */
831 func_vt = sym->type;
832 size = gfunc_arg_size(&func_vt);
833 if (size > 8) {
834 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
835 func_vc = addr;
836 reg_param_index++;
837 addr += 8;
840 /* define parameters */
841 while ((sym = sym->next) != NULL) {
842 type = &sym->type;
843 bt = type->t & VT_BTYPE;
844 size = gfunc_arg_size(type);
845 if (size > 8) {
846 if (reg_param_index < REGN) {
847 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
849 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
850 } else {
851 if (reg_param_index < REGN) {
852 /* save arguments passed by register */
853 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
854 o(0xd60f66); /* movq */
855 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
856 } else {
857 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
860 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
862 addr += 8;
863 reg_param_index++;
866 while (reg_param_index < REGN) {
867 if (func_type->ref->c == FUNC_ELLIPSIS) {
868 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
869 addr += 8;
871 reg_param_index++;
875 /* generate function epilog */
876 void gfunc_epilog(void)
878 int v, saved_ind;
880 o(0xc9); /* leave */
881 if (func_ret_sub == 0) {
882 o(0xc3); /* ret */
883 } else {
884 o(0xc2); /* ret n */
885 g(func_ret_sub);
886 g(func_ret_sub >> 8);
889 saved_ind = ind;
890 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
891 /* align local size to word & save local variables */
892 v = (func_scratch + -loc + 15) & -16;
894 if (v >= 4096) {
895 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
896 oad(0xb8, v); /* mov stacksize, %eax */
897 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
898 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
899 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
900 } else {
901 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
902 o(0xec8148); /* sub rsp, stacksize */
903 gen_le32(v);
906 cur_text_section->data_offset = saved_ind;
907 pe_add_unwind_data(ind, saved_ind, v);
908 ind = cur_text_section->data_offset;
911 #else
913 static void gadd_sp(int val)
915 if (val == (char)val) {
916 o(0xc48348);
917 g(val);
918 } else {
919 oad(0xc48148, val); /* add $xxx, %rsp */
923 typedef enum X86_64_Mode {
924 x86_64_mode_none,
925 x86_64_mode_memory,
926 x86_64_mode_integer,
927 x86_64_mode_sse,
928 x86_64_mode_x87
929 } X86_64_Mode;
931 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
932 if (a == b)
933 return a;
934 else if (a == x86_64_mode_none)
935 return b;
936 else if (b == x86_64_mode_none)
937 return a;
938 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
939 return x86_64_mode_memory;
940 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
941 return x86_64_mode_integer;
942 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
943 return x86_64_mode_memory;
944 else
945 return x86_64_mode_sse;
948 static X86_64_Mode classify_x86_64_inner(CType *ty) {
949 X86_64_Mode mode;
950 Sym *f;
952 switch (ty->t & VT_BTYPE) {
953 case VT_VOID: return x86_64_mode_none;
955 case VT_INT:
956 case VT_BYTE:
957 case VT_SHORT:
958 case VT_LLONG:
959 case VT_BOOL:
960 case VT_PTR:
961 case VT_FUNC:
962 case VT_ENUM: return x86_64_mode_integer;
964 case VT_FLOAT:
965 case VT_DOUBLE: return x86_64_mode_sse;
967 case VT_LDOUBLE: return x86_64_mode_x87;
969 case VT_STRUCT:
970 f = ty->ref;
972 // Detect union
973 if (f->next && (f->c == f->next->c))
974 return x86_64_mode_memory;
976 mode = x86_64_mode_none;
977 for (; f; f = f->next)
978 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
980 return mode;
983 assert(0);
986 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *reg_count) {
987 X86_64_Mode mode;
988 int size, align, ret_t;
990 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
991 *psize = 8;
992 *reg_count = 1;
993 ret_t = ty->t;
994 mode = x86_64_mode_integer;
995 } else {
996 size = type_size(ty, &align);
997 *psize = (size + 7) & ~7;
999 if (size > 16) {
1000 mode = x86_64_mode_memory;
1001 } else {
1002 mode = classify_x86_64_inner(ty);
1003 switch (mode) {
1004 case x86_64_mode_integer:
1005 if (size > 8) {
1006 *reg_count = 2;
1007 ret_t = VT_QLONG;
1008 } else {
1009 *reg_count = 1;
1010 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1012 break;
1014 case x86_64_mode_x87:
1015 *reg_count = 1;
1016 ret_t = VT_LDOUBLE;
1017 break;
1019 case x86_64_mode_sse:
1020 if (size > 8) {
1021 *reg_count = 2;
1022 ret_t = VT_QFLOAT;
1023 } else {
1024 *reg_count = 1;
1025 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1027 break;
1032 if (ret) {
1033 ret->ref = NULL;
1034 ret->t = ret_t;
1037 return mode;
1040 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
1041 /* This definition must be synced with stdarg.h */
1042 enum __va_arg_type {
1043 __va_gen_reg, __va_float_reg, __va_stack
1045 int size, reg_count;
1046 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &reg_count);
1047 switch (mode) {
1048 default: return __va_stack;
1049 case x86_64_mode_integer: return __va_gen_reg;
1050 case x86_64_mode_sse: return __va_float_reg;
1054 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1055 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
1056 int size, reg_count;
1057 *ret_align = 1; // Never have to re-align return values for x86-64
1058 return (classify_x86_64_arg(vt, ret, &size, &reg_count) == x86_64_mode_memory);
1061 #define REGN 6
1062 static const uint8_t arg_regs[REGN] = {
1063 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1066 static int arg_prepare_reg(int idx) {
1067 if (idx == 2 || idx == 3)
1068 /* idx=2: r10, idx=3: r11 */
1069 return idx + 8;
1070 else
1071 return arg_regs[idx];
1074 /* Generate function call. The function address is pushed first, then
1075 all the parameters in call order. This functions pops all the
1076 parameters and the function address. */
1077 void gfunc_call(int nb_args)
1079 X86_64_Mode mode;
1080 CType type;
1081 int size, align, r, args_size, i, j, reg_count;
1082 int nb_reg_args = 0;
1083 int nb_sse_args = 0;
1084 int sse_reg, gen_reg;
1086 /* calculate the number of integer/float arguments */
1087 args_size = 0;
1088 for(i = 0; i < nb_args; i++) {
1089 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &reg_count);
1090 switch (mode) {
1091 case x86_64_mode_memory:
1092 case x86_64_mode_x87:
1093 args_size += size;
1094 break;
1096 case x86_64_mode_sse:
1097 nb_sse_args += reg_count;
1098 if (nb_sse_args > 8) args_size += size;
1099 break;
1101 case x86_64_mode_integer:
1102 nb_reg_args += reg_count;
1103 if (nb_reg_args > REGN) args_size += size;
1104 break;
1108 /* for struct arguments, we need to call memcpy and the function
1109 call breaks register passing arguments we are preparing.
1110 So, we process arguments which will be passed by stack first. */
1111 gen_reg = nb_reg_args;
1112 sse_reg = nb_sse_args;
1114 /* adjust stack to align SSE boundary */
1115 if (args_size &= 15) {
1116 /* fetch cpu flag before the following sub will change the value */
1117 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1118 gv(RC_INT);
1120 args_size = 16 - args_size;
1121 o(0x48);
1122 oad(0xec81, args_size); /* sub $xxx, %rsp */
1125 for(i = 0; i < nb_args;) {
1126 /* Swap argument to top, it will possibly be changed here,
1127 and might use more temps. At the end of the loop we keep
1128 in on the stack and swap it back to its original position
1129 if it is a register. */
1130 SValue tmp = vtop[0];
1131 vtop[0] = vtop[-i];
1132 vtop[-i] = tmp;
1134 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &reg_count);
1136 int arg_stored = 1;
1137 switch (vtop->type.t & VT_BTYPE) {
1138 case VT_STRUCT:
1139 if (mode == x86_64_mode_sse) {
1140 if (sse_reg > 8)
1141 sse_reg -= reg_count;
1142 else
1143 arg_stored = 0;
1144 } else if (mode == x86_64_mode_integer) {
1145 if (gen_reg > REGN)
1146 gen_reg -= reg_count;
1147 else
1148 arg_stored = 0;
1151 if (arg_stored) {
1152 /* allocate the necessary size on stack */
1153 o(0x48);
1154 oad(0xec81, size); /* sub $xxx, %rsp */
1155 /* generate structure store */
1156 r = get_reg(RC_INT);
1157 orex(1, r, 0, 0x89); /* mov %rsp, r */
1158 o(0xe0 + REG_VALUE(r));
1159 vset(&vtop->type, r | VT_LVAL, 0);
1160 vswap();
1161 vstore();
1162 args_size += size;
1164 break;
1166 case VT_LDOUBLE:
1167 gv(RC_ST0);
1168 size = LDOUBLE_SIZE;
1169 oad(0xec8148, size); /* sub $xxx, %rsp */
1170 o(0x7cdb); /* fstpt 0(%rsp) */
1171 g(0x24);
1172 g(0x00);
1173 args_size += size;
1174 break;
1176 case VT_FLOAT:
1177 case VT_DOUBLE:
1178 assert(mode == x86_64_mode_sse);
1179 if (sse_reg > 8) {
1180 --sse_reg;
1181 r = gv(RC_FLOAT);
1182 o(0x50); /* push $rax */
1183 /* movq %xmmN, (%rsp) */
1184 o(0xd60f66);
1185 o(0x04 + REG_VALUE(r)*8);
1186 o(0x24);
1187 args_size += size;
1188 } else {
1189 arg_stored = 0;
1191 break;
1193 default:
1194 assert(mode == x86_64_mode_integer);
1195 /* simple type */
1196 /* XXX: implicit cast ? */
1197 if (gen_reg > REGN) {
1198 --gen_reg;
1199 r = gv(RC_INT);
1200 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1201 args_size += size;
1202 } else {
1203 arg_stored = 0;
1205 break;
1208 /* And swap the argument back to it's original position. */
1209 tmp = vtop[0];
1210 vtop[0] = vtop[-i];
1211 vtop[-i] = tmp;
1213 if (arg_stored) {
1214 vrotb(i+1);
1215 assert(vtop->type.t == tmp.type.t);
1216 vpop();
1217 --nb_args;
1218 } else {
1219 ++i;
1223 /* XXX This should be superfluous. */
1224 save_regs(0); /* save used temporary registers */
1226 /* then, we prepare register passing arguments.
1227 Note that we cannot set RDX and RCX in this loop because gv()
1228 may break these temporary registers. Let's use R10 and R11
1229 instead of them */
1230 assert(gen_reg <= REGN);
1231 assert(sse_reg <= 8);
1232 for(i = 0; i < nb_args; i++) {
1233 mode = classify_x86_64_arg(&vtop->type, &type, &size, &reg_count);
1234 /* Alter stack entry type so that gv() knows how to treat it */
1235 vtop->type = type;
1236 if (mode == x86_64_mode_sse) {
1237 if (reg_count == 2) {
1238 sse_reg -= 2;
1239 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1240 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1241 /* movaps %xmm0, %xmmN */
1242 o(0x280f);
1243 o(0xc0 + (sse_reg << 3));
1244 /* movaps %xmm1, %xmmN */
1245 o(0x280f);
1246 o(0xc1 + ((sse_reg+1) << 3));
1248 } else {
1249 assert(reg_count == 1);
1250 --sse_reg;
1251 /* Load directly to register */
1252 gv(RC_XMM0 << sse_reg);
1254 } else if (mode == x86_64_mode_integer) {
1255 /* simple type */
1256 /* XXX: implicit cast ? */
1257 gen_reg -= reg_count;
1258 r = gv(RC_INT);
1259 int d = arg_prepare_reg(gen_reg);
1260 orex(1,d,r,0x89); /* mov */
1261 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1262 if (reg_count == 2) {
1263 d = arg_prepare_reg(gen_reg+1);
1264 orex(1,d,vtop->r2,0x89); /* mov */
1265 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1268 vtop--;
1270 assert(gen_reg == 0);
1271 assert(sse_reg == 0);
1273 /* We shouldn't have many operands on the stack anymore, but the
1274 call address itself is still there, and it might be in %eax
1275 (or edx/ecx) currently, which the below writes would clobber.
1276 So evict all remaining operands here. */
1277 save_regs(0);
1279 /* Copy R10 and R11 into RDX and RCX, respectively */
1280 if (nb_reg_args > 2) {
1281 o(0xd2894c); /* mov %r10, %rdx */
1282 if (nb_reg_args > 3) {
1283 o(0xd9894c); /* mov %r11, %rcx */
1287 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1288 gcall_or_jmp(0);
1289 if (args_size)
1290 gadd_sp(args_size);
1291 vtop--;
1295 #define FUNC_PROLOG_SIZE 11
1297 static void push_arg_reg(int i) {
1298 loc -= 8;
1299 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1302 /* generate function prolog of type 't' */
1303 void gfunc_prolog(CType *func_type)
1305 X86_64_Mode mode;
1306 int i, addr, align, size, reg_count;
1307 int param_addr, reg_param_index, sse_param_index;
1308 Sym *sym;
1309 CType *type;
1311 sym = func_type->ref;
1312 addr = PTR_SIZE * 2;
1313 loc = 0;
1314 ind += FUNC_PROLOG_SIZE;
1315 func_sub_sp_offset = ind;
1316 func_ret_sub = 0;
1318 if (func_type->ref->c == FUNC_ELLIPSIS) {
1319 int seen_reg_num, seen_sse_num, seen_stack_size;
1320 seen_reg_num = seen_sse_num = 0;
1321 /* frame pointer and return address */
1322 seen_stack_size = PTR_SIZE * 2;
1323 /* count the number of seen parameters */
1324 sym = func_type->ref;
1325 while ((sym = sym->next) != NULL) {
1326 type = &sym->type;
1327 mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1328 switch (mode) {
1329 default:
1330 seen_stack_size += size;
1331 break;
1333 case x86_64_mode_integer:
1334 if (seen_reg_num + reg_count <= 8) {
1335 seen_reg_num += reg_count;
1336 } else {
1337 seen_reg_num = 8;
1338 seen_stack_size += size;
1340 break;
1342 case x86_64_mode_sse:
1343 if (seen_sse_num + reg_count <= 8) {
1344 seen_sse_num += reg_count;
1345 } else {
1346 seen_sse_num = 8;
1347 seen_stack_size += size;
1349 break;
1353 loc -= 16;
1354 /* movl $0x????????, -0x10(%rbp) */
1355 o(0xf045c7);
1356 gen_le32(seen_reg_num * 8);
1357 /* movl $0x????????, -0xc(%rbp) */
1358 o(0xf445c7);
1359 gen_le32(seen_sse_num * 16 + 48);
1360 /* movl $0x????????, -0x8(%rbp) */
1361 o(0xf845c7);
1362 gen_le32(seen_stack_size);
1364 /* save all register passing arguments */
1365 for (i = 0; i < 8; i++) {
1366 loc -= 16;
1367 o(0xd60f66); /* movq */
1368 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1369 /* movq $0, loc+8(%rbp) */
1370 o(0x85c748);
1371 gen_le32(loc + 8);
1372 gen_le32(0);
1374 for (i = 0; i < REGN; i++) {
1375 push_arg_reg(REGN-1-i);
1379 sym = func_type->ref;
1380 reg_param_index = 0;
1381 sse_param_index = 0;
1383 /* if the function returns a structure, then add an
1384 implicit pointer parameter */
1385 func_vt = sym->type;
1386 mode = classify_x86_64_arg(&func_vt, NULL, &size, &reg_count);
1387 if (mode == x86_64_mode_memory) {
1388 push_arg_reg(reg_param_index);
1389 func_vc = loc;
1390 reg_param_index++;
1392 /* define parameters */
1393 while ((sym = sym->next) != NULL) {
1394 type = &sym->type;
1395 mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1396 switch (mode) {
1397 case x86_64_mode_sse:
1398 if (sse_param_index + reg_count <= 8) {
1399 /* save arguments passed by register */
1400 loc -= reg_count * 8;
1401 param_addr = loc;
1402 for (i = 0; i < reg_count; ++i) {
1403 o(0xd60f66); /* movq */
1404 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1405 ++sse_param_index;
1407 } else {
1408 param_addr = addr;
1409 addr += size;
1410 sse_param_index += reg_count;
1412 break;
1414 case x86_64_mode_memory:
1415 case x86_64_mode_x87:
1416 param_addr = addr;
1417 addr += size;
1418 break;
1420 case x86_64_mode_integer: {
1421 if (reg_param_index + reg_count <= REGN) {
1422 /* save arguments passed by register */
1423 loc -= reg_count * 8;
1424 param_addr = loc;
1425 for (i = 0; i < reg_count; ++i) {
1426 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1427 ++reg_param_index;
1429 } else {
1430 param_addr = addr;
1431 addr += size;
1432 reg_param_index += reg_count;
1434 break;
1437 sym_push(sym->v & ~SYM_FIELD, type,
1438 VT_LOCAL | VT_LVAL, param_addr);
1442 /* generate function epilog */
1443 void gfunc_epilog(void)
1445 int v, saved_ind;
1447 o(0xc9); /* leave */
1448 if (func_ret_sub == 0) {
1449 o(0xc3); /* ret */
1450 } else {
1451 o(0xc2); /* ret n */
1452 g(func_ret_sub);
1453 g(func_ret_sub >> 8);
1455 /* align local size to word & save local variables */
1456 v = (-loc + 15) & -16;
1457 saved_ind = ind;
1458 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1459 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1460 o(0xec8148); /* sub rsp, stacksize */
1461 gen_le32(v);
1462 ind = saved_ind;
1465 #endif /* not PE */
1467 /* generate a jump to a label */
1468 int gjmp(int t)
1470 return psym(0xe9, t);
1473 /* generate a jump to a fixed address */
1474 void gjmp_addr(int a)
1476 int r;
1477 r = a - ind - 2;
1478 if (r == (char)r) {
1479 g(0xeb);
1480 g(r);
1481 } else {
1482 oad(0xe9, a - ind - 5);
1486 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1487 int gtst(int inv, int t)
1489 int v, *p;
1491 v = vtop->r & VT_VALMASK;
1492 if (v == VT_CMP) {
1493 /* fast case : can jump directly since flags are set */
1494 if (vtop->c.i & 0x100)
1496 /* This was a float compare. If the parity flag is set
1497 the result was unordered. For anything except != this
1498 means false and we don't jump (anding both conditions).
1499 For != this means true (oring both).
1500 Take care about inverting the test. We need to jump
1501 to our target if the result was unordered and test wasn't NE,
1502 otherwise if unordered we don't want to jump. */
1503 vtop->c.i &= ~0x100;
1504 if (!inv == (vtop->c.i != TOK_NE))
1505 o(0x067a); /* jp +6 */
1506 else
1508 g(0x0f);
1509 t = psym(0x8a, t); /* jp t */
1512 g(0x0f);
1513 t = psym((vtop->c.i - 16) ^ inv, t);
1514 } else if (v == VT_JMP || v == VT_JMPI) {
1515 /* && or || optimization */
1516 if ((v & 1) == inv) {
1517 /* insert vtop->c jump list in t */
1518 p = &vtop->c.i;
1519 while (*p != 0)
1520 p = (int *)(cur_text_section->data + *p);
1521 *p = t;
1522 t = vtop->c.i;
1523 } else {
1524 t = gjmp(t);
1525 gsym(vtop->c.i);
1527 } else {
1528 if (is_float(vtop->type.t) ||
1529 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1530 vpushi(0);
1531 gen_op(TOK_NE);
1533 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1534 /* constant jmp optimization */
1535 if ((vtop->c.i != 0) != inv)
1536 t = gjmp(t);
1537 } else {
1538 v = gv(RC_INT);
1539 orex(0,v,v,0x85);
1540 o(0xc0 + REG_VALUE(v) * 9);
1541 g(0x0f);
1542 t = psym(0x85 ^ inv, t);
1545 vtop--;
1546 return t;
1549 /* generate an integer binary operation */
1550 void gen_opi(int op)
1552 int r, fr, opc, c;
1553 int ll, uu, cc;
1555 ll = is64_type(vtop[-1].type.t);
1556 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1557 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1559 switch(op) {
1560 case '+':
1561 case TOK_ADDC1: /* add with carry generation */
1562 opc = 0;
1563 gen_op8:
1564 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1565 /* constant case */
1566 vswap();
1567 r = gv(RC_INT);
1568 vswap();
1569 c = vtop->c.i;
1570 if (c == (char)c) {
1571 /* XXX: generate inc and dec for smaller code ? */
1572 orex(ll, r, 0, 0x83);
1573 o(0xc0 | (opc << 3) | REG_VALUE(r));
1574 g(c);
1575 } else {
1576 orex(ll, r, 0, 0x81);
1577 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1579 } else {
1580 gv2(RC_INT, RC_INT);
1581 r = vtop[-1].r;
1582 fr = vtop[0].r;
1583 orex(ll, r, fr, (opc << 3) | 0x01);
1584 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1586 vtop--;
1587 if (op >= TOK_ULT && op <= TOK_GT) {
1588 vtop->r = VT_CMP;
1589 vtop->c.i = op;
1591 break;
1592 case '-':
1593 case TOK_SUBC1: /* sub with carry generation */
1594 opc = 5;
1595 goto gen_op8;
1596 case TOK_ADDC2: /* add with carry use */
1597 opc = 2;
1598 goto gen_op8;
1599 case TOK_SUBC2: /* sub with carry use */
1600 opc = 3;
1601 goto gen_op8;
1602 case '&':
1603 opc = 4;
1604 goto gen_op8;
1605 case '^':
1606 opc = 6;
1607 goto gen_op8;
1608 case '|':
1609 opc = 1;
1610 goto gen_op8;
1611 case '*':
1612 gv2(RC_INT, RC_INT);
1613 r = vtop[-1].r;
1614 fr = vtop[0].r;
1615 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1616 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1617 vtop--;
1618 break;
1619 case TOK_SHL:
1620 opc = 4;
1621 goto gen_shift;
1622 case TOK_SHR:
1623 opc = 5;
1624 goto gen_shift;
1625 case TOK_SAR:
1626 opc = 7;
1627 gen_shift:
1628 opc = 0xc0 | (opc << 3);
1629 if (cc) {
1630 /* constant case */
1631 vswap();
1632 r = gv(RC_INT);
1633 vswap();
1634 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1635 o(opc | REG_VALUE(r));
1636 g(vtop->c.i & (ll ? 63 : 31));
1637 } else {
1638 /* we generate the shift in ecx */
1639 gv2(RC_INT, RC_RCX);
1640 r = vtop[-1].r;
1641 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1642 o(opc | REG_VALUE(r));
1644 vtop--;
1645 break;
1646 case TOK_UDIV:
1647 case TOK_UMOD:
1648 uu = 1;
1649 goto divmod;
1650 case '/':
1651 case '%':
1652 case TOK_PDIV:
1653 uu = 0;
1654 divmod:
1655 /* first operand must be in eax */
1656 /* XXX: need better constraint for second operand */
1657 gv2(RC_RAX, RC_RCX);
1658 r = vtop[-1].r;
1659 fr = vtop[0].r;
1660 vtop--;
1661 save_reg(TREG_RDX);
1662 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1663 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1664 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1665 if (op == '%' || op == TOK_UMOD)
1666 r = TREG_RDX;
1667 else
1668 r = TREG_RAX;
1669 vtop->r = r;
1670 break;
1671 default:
1672 opc = 7;
1673 goto gen_op8;
1677 void gen_opl(int op)
1679 gen_opi(op);
1682 /* generate a floating point operation 'v = t1 op t2' instruction. The
1683 two operands are guaranted to have the same floating point type */
1684 /* XXX: need to use ST1 too */
1685 void gen_opf(int op)
1687 int a, ft, fc, swapped, r;
1688 int float_type =
1689 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1691 /* convert constants to memory references */
1692 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1693 vswap();
1694 gv(float_type);
1695 vswap();
1697 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1698 gv(float_type);
1700 /* must put at least one value in the floating point register */
1701 if ((vtop[-1].r & VT_LVAL) &&
1702 (vtop[0].r & VT_LVAL)) {
1703 vswap();
1704 gv(float_type);
1705 vswap();
1707 swapped = 0;
1708 /* swap the stack if needed so that t1 is the register and t2 is
1709 the memory reference */
1710 if (vtop[-1].r & VT_LVAL) {
1711 vswap();
1712 swapped = 1;
1714 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1715 if (op >= TOK_ULT && op <= TOK_GT) {
1716 /* load on stack second operand */
1717 load(TREG_ST0, vtop);
1718 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1719 if (op == TOK_GE || op == TOK_GT)
1720 swapped = !swapped;
1721 else if (op == TOK_EQ || op == TOK_NE)
1722 swapped = 0;
1723 if (swapped)
1724 o(0xc9d9); /* fxch %st(1) */
1725 o(0xe9da); /* fucompp */
1726 o(0xe0df); /* fnstsw %ax */
1727 if (op == TOK_EQ) {
1728 o(0x45e480); /* and $0x45, %ah */
1729 o(0x40fC80); /* cmp $0x40, %ah */
1730 } else if (op == TOK_NE) {
1731 o(0x45e480); /* and $0x45, %ah */
1732 o(0x40f480); /* xor $0x40, %ah */
1733 op = TOK_NE;
1734 } else if (op == TOK_GE || op == TOK_LE) {
1735 o(0x05c4f6); /* test $0x05, %ah */
1736 op = TOK_EQ;
1737 } else {
1738 o(0x45c4f6); /* test $0x45, %ah */
1739 op = TOK_EQ;
1741 vtop--;
1742 vtop->r = VT_CMP;
1743 vtop->c.i = op;
1744 } else {
1745 /* no memory reference possible for long double operations */
1746 load(TREG_ST0, vtop);
1747 swapped = !swapped;
1749 switch(op) {
1750 default:
1751 case '+':
1752 a = 0;
1753 break;
1754 case '-':
1755 a = 4;
1756 if (swapped)
1757 a++;
1758 break;
1759 case '*':
1760 a = 1;
1761 break;
1762 case '/':
1763 a = 6;
1764 if (swapped)
1765 a++;
1766 break;
1768 ft = vtop->type.t;
1769 fc = vtop->c.ul;
1770 o(0xde); /* fxxxp %st, %st(1) */
1771 o(0xc1 + (a << 3));
1772 vtop--;
1774 } else {
1775 if (op >= TOK_ULT && op <= TOK_GT) {
1776 /* if saved lvalue, then we must reload it */
1777 r = vtop->r;
1778 fc = vtop->c.ul;
1779 if ((r & VT_VALMASK) == VT_LLOCAL) {
1780 SValue v1;
1781 r = get_reg(RC_INT);
1782 v1.type.t = VT_PTR;
1783 v1.r = VT_LOCAL | VT_LVAL;
1784 v1.c.ul = fc;
1785 load(r, &v1);
1786 fc = 0;
1789 if (op == TOK_EQ || op == TOK_NE) {
1790 swapped = 0;
1791 } else {
1792 if (op == TOK_LE || op == TOK_LT)
1793 swapped = !swapped;
1794 if (op == TOK_LE || op == TOK_GE) {
1795 op = 0x93; /* setae */
1796 } else {
1797 op = 0x97; /* seta */
1801 if (swapped) {
1802 gv(RC_FLOAT);
1803 vswap();
1805 assert(!(vtop[-1].r & VT_LVAL));
1807 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1808 o(0x66);
1809 o(0x2e0f); /* ucomisd */
1811 if (vtop->r & VT_LVAL) {
1812 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1813 } else {
1814 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1817 vtop--;
1818 vtop->r = VT_CMP;
1819 vtop->c.i = op | 0x100;
1820 } else {
1821 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1822 switch(op) {
1823 default:
1824 case '+':
1825 a = 0;
1826 break;
1827 case '-':
1828 a = 4;
1829 break;
1830 case '*':
1831 a = 1;
1832 break;
1833 case '/':
1834 a = 6;
1835 break;
1837 ft = vtop->type.t;
1838 fc = vtop->c.ul;
1839 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1841 r = vtop->r;
1842 /* if saved lvalue, then we must reload it */
1843 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1844 SValue v1;
1845 r = get_reg(RC_INT);
1846 v1.type.t = VT_PTR;
1847 v1.r = VT_LOCAL | VT_LVAL;
1848 v1.c.ul = fc;
1849 load(r, &v1);
1850 fc = 0;
1853 assert(!(vtop[-1].r & VT_LVAL));
1854 if (swapped) {
1855 assert(vtop->r & VT_LVAL);
1856 gv(RC_FLOAT);
1857 vswap();
1860 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1861 o(0xf2);
1862 } else {
1863 o(0xf3);
1865 o(0x0f);
1866 o(0x58 + a);
1868 if (vtop->r & VT_LVAL) {
1869 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1870 } else {
1871 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1874 vtop--;
1879 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1880 and 'long long' cases. */
1881 void gen_cvt_itof(int t)
1883 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1884 save_reg(TREG_ST0);
1885 gv(RC_INT);
1886 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1887 /* signed long long to float/double/long double (unsigned case
1888 is handled generically) */
1889 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1890 o(0x242cdf); /* fildll (%rsp) */
1891 o(0x08c48348); /* add $8, %rsp */
1892 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1893 (VT_INT | VT_UNSIGNED)) {
1894 /* unsigned int to float/double/long double */
1895 o(0x6a); /* push $0 */
1896 g(0x00);
1897 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1898 o(0x242cdf); /* fildll (%rsp) */
1899 o(0x10c48348); /* add $16, %rsp */
1900 } else {
1901 /* int to float/double/long double */
1902 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1903 o(0x2404db); /* fildl (%rsp) */
1904 o(0x08c48348); /* add $8, %rsp */
1906 vtop->r = TREG_ST0;
1907 } else {
1908 int r = get_reg(RC_FLOAT);
1909 gv(RC_INT);
1910 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1911 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1912 (VT_INT | VT_UNSIGNED) ||
1913 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1914 o(0x48); /* REX */
1916 o(0x2a0f);
1917 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1918 vtop->r = r;
1922 /* convert from one floating point type to another */
1923 void gen_cvt_ftof(int t)
1925 int ft, bt, tbt;
1927 ft = vtop->type.t;
1928 bt = ft & VT_BTYPE;
1929 tbt = t & VT_BTYPE;
1931 if (bt == VT_FLOAT) {
1932 gv(RC_FLOAT);
1933 if (tbt == VT_DOUBLE) {
1934 o(0x140f); /* unpcklps */
1935 o(0xc0 + REG_VALUE(vtop->r)*9);
1936 o(0x5a0f); /* cvtps2pd */
1937 o(0xc0 + REG_VALUE(vtop->r)*9);
1938 } else if (tbt == VT_LDOUBLE) {
1939 save_reg(RC_ST0);
1940 /* movss %xmm0,-0x10(%rsp) */
1941 o(0x110ff3);
1942 o(0x44 + REG_VALUE(vtop->r)*8);
1943 o(0xf024);
1944 o(0xf02444d9); /* flds -0x10(%rsp) */
1945 vtop->r = TREG_ST0;
1947 } else if (bt == VT_DOUBLE) {
1948 gv(RC_FLOAT);
1949 if (tbt == VT_FLOAT) {
1950 o(0x140f66); /* unpcklpd */
1951 o(0xc0 + REG_VALUE(vtop->r)*9);
1952 o(0x5a0f66); /* cvtpd2ps */
1953 o(0xc0 + REG_VALUE(vtop->r)*9);
1954 } else if (tbt == VT_LDOUBLE) {
1955 save_reg(RC_ST0);
1956 /* movsd %xmm0,-0x10(%rsp) */
1957 o(0x110ff2);
1958 o(0x44 + REG_VALUE(vtop->r)*8);
1959 o(0xf024);
1960 o(0xf02444dd); /* fldl -0x10(%rsp) */
1961 vtop->r = TREG_ST0;
1963 } else {
1964 gv(RC_ST0);
1965 int r = get_reg(RC_FLOAT);
1966 if (tbt == VT_DOUBLE) {
1967 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1968 /* movsd -0x10(%rsp),%xmm0 */
1969 o(0x100ff2);
1970 o(0x44 + REG_VALUE(r)*8);
1971 o(0xf024);
1972 vtop->r = r;
1973 } else if (tbt == VT_FLOAT) {
1974 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1975 /* movss -0x10(%rsp),%xmm0 */
1976 o(0x100ff3);
1977 o(0x44 + REG_VALUE(r)*8);
1978 o(0xf024);
1979 vtop->r = r;
1984 /* convert fp to int 't' type */
1985 void gen_cvt_ftoi(int t)
1987 int ft, bt, size, r;
1988 ft = vtop->type.t;
1989 bt = ft & VT_BTYPE;
1990 if (bt == VT_LDOUBLE) {
1991 gen_cvt_ftof(VT_DOUBLE);
1992 bt = VT_DOUBLE;
1995 gv(RC_FLOAT);
1996 if (t != VT_INT)
1997 size = 8;
1998 else
1999 size = 4;
2001 r = get_reg(RC_INT);
2002 if (bt == VT_FLOAT) {
2003 o(0xf3);
2004 } else if (bt == VT_DOUBLE) {
2005 o(0xf2);
2006 } else {
2007 assert(0);
2009 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2010 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2011 vtop->r = r;
2014 /* computed goto support */
2015 void ggoto(void)
2017 gcall_or_jmp(1);
2018 vtop--;
2021 /* end of x86-64 code generator */
2022 /*************************************************************/
2023 #endif /* ! TARGET_DEFS_ONLY */
2024 /******************************************************/