Got test1-3 working on x86-64.
[tinycc.git] / x86_64-gen.c
blob318384beeae3d66dd21017d7e1b7c1e3a82af0a5
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 5
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_R8 0x0100
38 #define RC_R9 0x0200
39 #define RC_R10 0x0400
40 #define RC_R11 0x0800
41 #define RC_XMM0 0x0020
42 #define RC_ST0 0x0040 /* only for long double */
43 #define RC_IRET RC_RAX /* function return: integer register */
44 #define RC_LRET RC_RDX /* function return: second integer register */
45 #define RC_FRET RC_XMM0 /* function return: float register */
47 /* pretty names for the registers */
48 enum {
49 TREG_RAX = 0,
50 TREG_RCX = 1,
51 TREG_RDX = 2,
52 TREG_XMM0 = 3,
53 TREG_ST0 = 4,
55 TREG_RSI = 6,
56 TREG_RDI = 7,
57 TREG_R8 = 8,
58 TREG_R9 = 9,
60 TREG_R10 = 10,
61 TREG_R11 = 11,
63 TREG_MEM = 0x10,
66 #define REX_BASE(reg) (((reg) >> 3) & 1)
67 #define REG_VALUE(reg) ((reg) & 7)
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
78 #define PTR_SIZE 8
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
84 #define MAX_ALIGN 8
86 /******************************************************/
87 /* ELF defines */
89 #define EM_TCC_TARGET EM_X86_64
91 /* relocation type for 32 bit data relocation */
92 #define R_DATA_32 R_X86_64_32
93 #define R_DATA_PTR R_X86_64_64
94 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
95 #define R_COPY R_X86_64_COPY
97 #define ELF_START_ADDR 0x08048000
98 #define ELF_PAGE_SIZE 0x1000
100 /******************************************************/
101 #else /* ! TARGET_DEFS_ONLY */
102 /******************************************************/
103 #include "tcc.h"
104 #include <assert.h>
106 ST_DATA const int reg_classes[NB_REGS+7] = {
107 /* eax */ RC_INT | RC_RAX,
108 /* ecx */ RC_INT | RC_RCX,
109 /* edx */ RC_INT | RC_RDX,
110 /* xmm0 */ RC_FLOAT | RC_XMM0,
111 /* st0 */ RC_ST0,
115 RC_INT | RC_R8,
116 RC_INT | RC_R9,
117 RC_INT | RC_R10,
118 RC_INT | RC_R11
121 static unsigned long func_sub_sp_offset;
122 static int func_ret_sub;
124 /* XXX: make it faster ? */
125 void g(int c)
127 int ind1;
128 ind1 = ind + 1;
129 if (ind1 > cur_text_section->data_allocated)
130 section_realloc(cur_text_section, ind1);
131 cur_text_section->data[ind] = c;
132 ind = ind1;
135 void o(unsigned int c)
137 while (c) {
138 g(c);
139 c = c >> 8;
143 void gen_le16(int v)
145 g(v);
146 g(v >> 8);
149 void gen_le32(int c)
151 g(c);
152 g(c >> 8);
153 g(c >> 16);
154 g(c >> 24);
157 void gen_le64(int64_t c)
159 g(c);
160 g(c >> 8);
161 g(c >> 16);
162 g(c >> 24);
163 g(c >> 32);
164 g(c >> 40);
165 g(c >> 48);
166 g(c >> 56);
169 void orex(int ll, int r, int r2, int b)
171 if ((r & VT_VALMASK) >= VT_CONST)
172 r = 0;
173 if ((r2 & VT_VALMASK) >= VT_CONST)
174 r2 = 0;
175 if (ll || REX_BASE(r) || REX_BASE(r2))
176 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
177 o(b);
180 /* output a symbol and patch all calls to it */
181 void gsym_addr(int t, int a)
183 int n, *ptr;
184 while (t) {
185 ptr = (int *)(cur_text_section->data + t);
186 n = *ptr; /* next value */
187 *ptr = a - t - 4;
188 t = n;
192 void gsym(int t)
194 gsym_addr(t, ind);
197 /* psym is used to put an instruction with a data field which is a
198 reference to a symbol. It is in fact the same as oad ! */
199 #define psym oad
201 static int is64_type(int t)
203 return ((t & VT_BTYPE) == VT_PTR ||
204 (t & VT_BTYPE) == VT_FUNC ||
205 (t & VT_BTYPE) == VT_LLONG);
208 static int is_sse_float(int t) {
209 int bt;
210 bt = t & VT_BTYPE;
211 return bt == VT_DOUBLE || bt == VT_FLOAT;
215 /* instruction + 4 bytes data. Return the address of the data */
216 ST_FUNC int oad(int c, int s)
218 int ind1;
220 o(c);
221 ind1 = ind + 4;
222 if (ind1 > cur_text_section->data_allocated)
223 section_realloc(cur_text_section, ind1);
224 *(int *)(cur_text_section->data + ind) = s;
225 s = ind;
226 ind = ind1;
227 return s;
230 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
232 if (r & VT_SYM)
233 greloc(cur_text_section, sym, ind, R_X86_64_32);
234 gen_le32(c);
237 /* output constant with relocation if 'r & VT_SYM' is true */
238 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
240 if (r & VT_SYM)
241 greloc(cur_text_section, sym, ind, R_X86_64_64);
242 gen_le64(c);
245 /* output constant with relocation if 'r & VT_SYM' is true */
246 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
248 if (r & VT_SYM)
249 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
250 gen_le32(c-4);
253 /* output got address with relocation */
254 static void gen_gotpcrel(int r, Sym *sym, int c)
256 #ifndef TCC_TARGET_PE
257 Section *sr;
258 ElfW(Rela) *rel;
259 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
260 sr = cur_text_section->reloc;
261 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
262 rel->r_addend = -4;
263 #else
264 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
265 cur_text_section->data[ind-3],
266 cur_text_section->data[ind-2],
267 cur_text_section->data[ind-1]
269 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
270 #endif
271 gen_le32(0);
272 if (c) {
273 /* we use add c, %xxx for displacement */
274 orex(1, r, 0, 0x81);
275 o(0xc0 + REG_VALUE(r));
276 gen_le32(c);
280 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
282 op_reg = REG_VALUE(op_reg) << 3;
283 if ((r & VT_VALMASK) == VT_CONST) {
284 /* constant memory reference */
285 o(0x05 | op_reg);
286 if (is_got) {
287 gen_gotpcrel(r, sym, c);
288 } else {
289 gen_addrpc32(r, sym, c);
291 } else if ((r & VT_VALMASK) == VT_LOCAL) {
292 /* currently, we use only ebp as base */
293 if (c == (char)c) {
294 /* short reference */
295 o(0x45 | op_reg);
296 g(c);
297 } else {
298 oad(0x85 | op_reg, c);
300 } else if ((r & VT_VALMASK) >= TREG_MEM) {
301 if (c) {
302 g(0x80 | op_reg | REG_VALUE(r));
303 gen_le32(c);
304 } else {
305 g(0x00 | op_reg | REG_VALUE(r));
307 } else {
308 g(0x00 | op_reg | REG_VALUE(r));
312 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
313 opcode bits */
314 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
316 gen_modrm_impl(op_reg, r, sym, c, 0);
319 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
320 opcode bits */
321 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
323 int is_got;
324 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
325 orex(1, r, op_reg, opcode);
326 gen_modrm_impl(op_reg, r, sym, c, is_got);
330 /* load 'r' from value 'sv' */
331 void load(int r, SValue *sv)
333 int v, t, ft, fc, fr;
334 SValue v1;
336 #ifdef TCC_TARGET_PE
337 SValue v2;
338 sv = pe_getimport(sv, &v2);
339 #endif
341 fr = sv->r;
342 ft = sv->type.t;
343 fc = sv->c.ul;
345 #ifndef TCC_TARGET_PE
346 /* we use indirect access via got */
347 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
348 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
349 /* use the result register as a temporal register */
350 int tr = r | TREG_MEM;
351 if (is_float(ft)) {
352 /* we cannot use float registers as a temporal register */
353 tr = get_reg(RC_INT) | TREG_MEM;
355 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
357 /* load from the temporal register */
358 fr = tr | VT_LVAL;
360 #endif
362 v = fr & VT_VALMASK;
363 if (fr & VT_LVAL) {
364 int b, ll;
365 if (v == VT_LLOCAL) {
366 v1.type.t = VT_PTR;
367 v1.r = VT_LOCAL | VT_LVAL;
368 v1.c.ul = fc;
369 fr = r;
370 if (!(reg_classes[fr] & RC_INT))
371 fr = get_reg(RC_INT);
372 load(fr, &v1);
374 ll = 0;
375 if ((ft & VT_BTYPE) == VT_FLOAT) {
376 b = 0x6e0f66, r = 0; /* movd */
377 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
378 b = 0x7e0ff3, r = 0; /* movq */
379 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
380 b = 0xdb, r = 5; /* fldt */
381 } else if ((ft & VT_TYPE) == VT_BYTE) {
382 b = 0xbe0f; /* movsbl */
383 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
384 b = 0xb60f; /* movzbl */
385 } else if ((ft & VT_TYPE) == VT_SHORT) {
386 b = 0xbf0f; /* movswl */
387 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
388 b = 0xb70f; /* movzwl */
389 } else {
390 ll = is64_type(ft);
391 b = 0x8b;
393 if (ll) {
394 gen_modrm64(b, r, fr, sv->sym, fc);
395 } else {
396 orex(ll, fr, r, b);
397 gen_modrm(r, fr, sv->sym, fc);
399 } else {
400 if (v == VT_CONST) {
401 if (fr & VT_SYM) {
402 #ifdef TCC_TARGET_PE
403 orex(1,0,r,0x8d);
404 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
405 gen_addrpc32(fr, sv->sym, fc);
406 #else
407 if (sv->sym->type.t & VT_STATIC) {
408 orex(1,0,r,0x8d);
409 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
410 gen_addrpc32(fr, sv->sym, fc);
411 } else {
412 orex(1,0,r,0x8b);
413 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
414 gen_gotpcrel(r, sv->sym, fc);
416 #endif
417 } else if (is64_type(ft)) {
418 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
419 gen_le64(sv->c.ull);
420 } else {
421 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
422 gen_le32(fc);
424 } else if (v == VT_LOCAL) {
425 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
426 gen_modrm(r, VT_LOCAL, sv->sym, fc);
427 } else if (v == VT_CMP) {
428 orex(0,r,0,0);
429 if ((fc & ~0x100) != TOK_NE)
430 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
431 else
432 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
433 if (fc & 0x100)
435 /* This was a float compare. If the parity bit is
436 set the result was unordered, meaning false for everything
437 except TOK_NE, and true for TOK_NE. */
438 fc &= ~0x100;
439 o(0x037a + (REX_BASE(r) << 8));
441 orex(0,r,0, 0x0f); /* setxx %br */
442 o(fc);
443 o(0xc0 + REG_VALUE(r));
444 } else if (v == VT_JMP || v == VT_JMPI) {
445 t = v & 1;
446 orex(0,r,0,0);
447 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
448 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
449 gsym(fc);
450 orex(0,r,0,0);
451 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
452 } else if (v != r) {
453 if (r == TREG_XMM0) {
454 assert(v == TREG_ST0);
455 /* gen_cvt_ftof(VT_DOUBLE); */
456 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
457 /* movsd -0x10(%rsp),%xmm0 */
458 o(0x44100ff2);
459 o(0xf024);
460 } else if (r == TREG_ST0) {
461 assert(v == TREG_XMM0);
462 /* gen_cvt_ftof(VT_LDOUBLE); */
463 /* movsd %xmm0,-0x10(%rsp) */
464 o(0x44110ff2);
465 o(0xf024);
466 o(0xf02444dd); /* fldl -0x10(%rsp) */
467 } else {
468 orex(1,r,v, 0x89);
469 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
475 /* store register 'r' in lvalue 'v' */
476 void store(int r, SValue *v)
478 int fr, bt, ft, fc;
479 int op64 = 0;
480 /* store the REX prefix in this variable when PIC is enabled */
481 int pic = 0;
483 #ifdef TCC_TARGET_PE
484 SValue v2;
485 v = pe_getimport(v, &v2);
486 #endif
488 ft = v->type.t;
489 fc = v->c.ul;
490 fr = v->r & VT_VALMASK;
491 bt = ft & VT_BTYPE;
493 #ifndef TCC_TARGET_PE
494 /* we need to access the variable via got */
495 if (fr == VT_CONST && (v->r & VT_SYM)) {
496 /* mov xx(%rip), %r11 */
497 o(0x1d8b4c);
498 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
499 pic = is64_type(bt) ? 0x49 : 0x41;
501 #endif
503 /* XXX: incorrect if float reg to reg */
504 if (bt == VT_FLOAT) {
505 o(0x66);
506 o(pic);
507 o(0x7e0f); /* movd */
508 r = 0;
509 } else if (bt == VT_DOUBLE) {
510 o(0x66);
511 o(pic);
512 o(0xd60f); /* movq */
513 r = 0;
514 } else if (bt == VT_LDOUBLE) {
515 o(0xc0d9); /* fld %st(0) */
516 o(pic);
517 o(0xdb); /* fstpt */
518 r = 7;
519 } else {
520 if (bt == VT_SHORT)
521 o(0x66);
522 o(pic);
523 if (bt == VT_BYTE || bt == VT_BOOL)
524 orex(0, 0, r, 0x88);
525 else if (is64_type(bt))
526 op64 = 0x89;
527 else
528 orex(0, 0, r, 0x89);
530 if (pic) {
531 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
532 if (op64)
533 o(op64);
534 o(3 + (r << 3));
535 } else if (op64) {
536 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
537 gen_modrm64(op64, r, v->r, v->sym, fc);
538 } else if (fr != r) {
539 /* XXX: don't we really come here? */
540 abort();
541 o(0xc0 + fr + r * 8); /* mov r, fr */
543 } else {
544 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
545 gen_modrm(r, v->r, v->sym, fc);
546 } else if (fr != r) {
547 /* XXX: don't we really come here? */
548 abort();
549 o(0xc0 + fr + r * 8); /* mov r, fr */
554 /* 'is_jmp' is '1' if it is a jump */
555 static void gcall_or_jmp(int is_jmp)
557 int r;
558 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
559 /* constant case */
560 if (vtop->r & VT_SYM) {
561 /* relocation case */
562 greloc(cur_text_section, vtop->sym,
563 ind + 1, R_X86_64_PC32);
564 } else {
565 /* put an empty PC32 relocation */
566 put_elf_reloc(symtab_section, cur_text_section,
567 ind + 1, R_X86_64_PC32, 0);
569 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
570 } else {
571 /* otherwise, indirect call */
572 r = TREG_R11;
573 load(r, vtop);
574 o(0x41); /* REX */
575 o(0xff); /* call/jmp *r */
576 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
580 #ifdef TCC_TARGET_PE
582 #define REGN 4
583 static const uint8_t arg_regs[] = {
584 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
587 static int func_scratch;
589 /* Generate function call. The function address is pushed first, then
590 all the parameters in call order. This functions pops all the
591 parameters and the function address. */
593 void gen_offs_sp(int b, int r, int d)
595 orex(1,0,r & 0x100 ? 0 : r, b);
596 if (d == (char)d) {
597 o(0x2444 | (REG_VALUE(r) << 3));
598 g(d);
599 } else {
600 o(0x2484 | (REG_VALUE(r) << 3));
601 gen_le32(d);
605 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
606 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
607 *ret_align = 1; // Never have to re-align return values for x86-64
608 return 1;
611 void gfunc_call(int nb_args)
613 int size, align, r, args_size, i, d, j, bt, struct_size;
614 int nb_reg_args, gen_reg;
616 nb_reg_args = nb_args;
617 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
619 /* for struct arguments, we need to call memcpy and the function
620 call breaks register passing arguments we are preparing.
621 So, we process arguments which will be passed by stack first. */
622 struct_size = args_size;
623 for(i = 0; i < nb_args; i++) {
624 SValue *sv = &vtop[-i];
625 bt = (sv->type.t & VT_BTYPE);
626 if (bt == VT_STRUCT) {
627 size = type_size(&sv->type, &align);
628 /* align to stack align size */
629 size = (size + 15) & ~15;
630 /* generate structure store */
631 r = get_reg(RC_INT);
632 gen_offs_sp(0x8d, r, struct_size);
633 struct_size += size;
635 /* generate memcpy call */
636 vset(&sv->type, r | VT_LVAL, 0);
637 vpushv(sv);
638 vstore();
639 --vtop;
641 } else if (bt == VT_LDOUBLE) {
643 gv(RC_ST0);
644 gen_offs_sp(0xdb, 0x107, struct_size);
645 struct_size += 16;
650 if (func_scratch < struct_size)
651 func_scratch = struct_size;
652 #if 1
653 for (i = 0; i < REGN; ++i)
654 save_reg(arg_regs[i]);
655 save_reg(TREG_RAX);
656 #endif
657 gen_reg = nb_reg_args;
658 struct_size = args_size;
660 for(i = 0; i < nb_args; i++) {
661 bt = (vtop->type.t & VT_BTYPE);
663 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
664 if (bt == VT_LDOUBLE)
665 size = 16;
666 else
667 size = type_size(&vtop->type, &align);
668 /* align to stack align size */
669 size = (size + 15) & ~15;
670 j = --gen_reg;
671 if (j >= REGN) {
672 d = TREG_RAX;
673 gen_offs_sp(0x8d, d, struct_size);
674 gen_offs_sp(0x89, d, j*8);
675 } else {
676 d = arg_regs[j];
677 gen_offs_sp(0x8d, d, struct_size);
679 struct_size += size;
681 } else if (is_sse_float(vtop->type.t)) {
682 gv(RC_FLOAT); /* only one float register */
683 j = --gen_reg;
684 if (j >= REGN) {
685 /* movq %xmm0, j*8(%rsp) */
686 gen_offs_sp(0xd60f66, 0x100, j*8);
687 } else {
688 /* movaps %xmm0, %xmmN */
689 o(0x280f);
690 o(0xc0 + (j << 3));
691 d = arg_regs[j];
692 /* mov %xmm0, %rxx */
693 o(0x66);
694 orex(1,d,0, 0x7e0f);
695 o(0xc0 + REG_VALUE(d));
697 } else {
698 j = --gen_reg;
699 if (j >= REGN) {
700 r = gv(RC_INT);
701 gen_offs_sp(0x89, r, j*8);
702 } else {
703 d = arg_regs[j];
704 if (d < NB_REGS) {
705 gv(reg_classes[d] & ~RC_INT);
706 } else {
707 r = gv(RC_INT);
708 if (d != r) {
709 orex(1,d,r, 0x89);
710 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
716 vtop--;
718 save_regs(0);
719 gcall_or_jmp(0);
720 vtop--;
724 #define FUNC_PROLOG_SIZE 11
726 /* generate function prolog of type 't' */
727 void gfunc_prolog(CType *func_type)
729 int addr, reg_param_index, bt;
730 Sym *sym;
731 CType *type;
733 func_ret_sub = 0;
734 func_scratch = 0;
735 loc = 0;
737 addr = PTR_SIZE * 2;
738 ind += FUNC_PROLOG_SIZE;
739 func_sub_sp_offset = ind;
740 reg_param_index = 0;
742 sym = func_type->ref;
744 /* if the function returns a structure, then add an
745 implicit pointer parameter */
746 func_vt = sym->type;
747 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
748 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
749 reg_param_index++;
750 addr += PTR_SIZE;
753 /* define parameters */
754 while ((sym = sym->next) != NULL) {
755 type = &sym->type;
756 bt = type->t & VT_BTYPE;
757 if (reg_param_index < REGN) {
758 /* save arguments passed by register */
759 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
761 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
762 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
763 } else {
764 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
766 reg_param_index++;
767 addr += PTR_SIZE;
770 while (reg_param_index < REGN) {
771 if (func_type->ref->c == FUNC_ELLIPSIS)
772 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
773 reg_param_index++;
774 addr += PTR_SIZE;
778 /* generate function epilog */
779 void gfunc_epilog(void)
781 int v, saved_ind;
783 o(0xc9); /* leave */
784 if (func_ret_sub == 0) {
785 o(0xc3); /* ret */
786 } else {
787 o(0xc2); /* ret n */
788 g(func_ret_sub);
789 g(func_ret_sub >> 8);
792 saved_ind = ind;
793 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
794 /* align local size to word & save local variables */
795 v = (func_scratch + -loc + 15) & -16;
797 if (v >= 4096) {
798 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
799 oad(0xb8, v); /* mov stacksize, %eax */
800 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
801 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
802 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
803 } else {
804 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
805 o(0xec8148); /* sub rsp, stacksize */
806 gen_le32(v);
809 cur_text_section->data_offset = saved_ind;
810 pe_add_unwind_data(ind, saved_ind, v);
811 ind = cur_text_section->data_offset;
814 #else
816 static void gadd_sp(int val)
818 if (val == (char)val) {
819 o(0xc48348);
820 g(val);
821 } else {
822 oad(0xc48148, val); /* add $xxx, %rsp */
826 typedef enum X86_64_Mode {
827 x86_64_mode_none,
828 x86_64_mode_memory,
829 x86_64_mode_integer,
830 x86_64_mode_sse,
831 x86_64_mode_x87
832 } X86_64_Mode;
834 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
835 if (a == b)
836 return a;
837 else if (a == x86_64_mode_none)
838 return b;
839 else if (b == x86_64_mode_none)
840 return a;
841 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
842 return x86_64_mode_memory;
843 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
844 return x86_64_mode_integer;
845 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
846 return x86_64_mode_memory;
847 else
848 return x86_64_mode_sse;
851 static X86_64_Mode classify_x86_64_inner(CType *ty) {
852 X86_64_Mode mode;
853 Sym *f;
855 switch (ty->t & VT_BTYPE) {
856 case VT_VOID: return x86_64_mode_none;
858 case VT_INT:
859 case VT_BYTE:
860 case VT_SHORT:
861 case VT_LLONG:
862 case VT_BOOL:
863 case VT_PTR:
864 case VT_ENUM: return x86_64_mode_integer;
866 case VT_FLOAT:
867 case VT_DOUBLE: return x86_64_mode_sse;
869 case VT_LDOUBLE: return x86_64_mode_x87;
871 case VT_STRUCT:
872 f = ty->ref;
874 // Detect union
875 if (f->next && (f->c == f->next->c))
876 return x86_64_mode_memory;
878 mode = x86_64_mode_none;
879 for (; f; f = f->next)
880 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
882 return mode;
886 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *reg_count) {
887 X86_64_Mode mode;
888 int size, align, ret_t;
890 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
891 *psize = 8;
892 *reg_count = 1;
893 ret_t = ty->t;
894 mode = x86_64_mode_integer;
895 } else {
896 size = type_size(ty, &align);
897 *psize = (size + 7) & ~7;
899 if (size > 16) {
900 mode = x86_64_mode_memory;
901 } else {
902 mode = classify_x86_64_inner(ty);
903 switch (mode) {
904 case x86_64_mode_integer:
905 if (size > 8) {
906 *reg_count = 2;
907 ret_t = VT_QLONG;
908 } else {
909 *reg_count = 1;
910 ret_t = (size > 4) ? VT_LLONG : VT_INT;
912 break;
914 case x86_64_mode_x87:
915 *reg_count = 1;
916 ret_t = VT_LDOUBLE;
917 break;
919 case x86_64_mode_sse:
920 if (size > 8) {
921 *reg_count = 2;
922 ret_t = VT_QFLOAT;
923 } else {
924 *reg_count = 1;
925 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
927 break;
932 if (ret) {
933 ret->ref = NULL;
934 ret->t = ret_t;
937 return mode;
940 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
941 /* This definition must be synced with stdarg.h */
942 enum __va_arg_type {
943 __va_gen_reg, __va_float_reg, __va_stack
945 int size, reg_count;
946 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &reg_count);
947 switch (mode) {
948 default: return __va_stack;
949 case x86_64_mode_integer: return __va_gen_reg;
950 case x86_64_mode_sse: return __va_float_reg;
954 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
955 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
956 int size, reg_count;
957 *ret_align = 1; // Never have to re-align return values for x86-64
958 return (classify_x86_64_arg(vt, ret, &size, &reg_count) == x86_64_mode_memory);
961 #define REGN 6
962 static const uint8_t arg_regs[REGN] = {
963 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
966 /* Generate function call. The function address is pushed first, then
967 all the parameters in call order. This functions pops all the
968 parameters and the function address. */
969 void gfunc_call(int nb_args)
971 X86_64_Mode mode;
972 CType type;
973 int size, align, r, args_size, i, j, reg_count;
974 int nb_reg_args = 0;
975 int nb_sse_args = 0;
976 int sse_reg, gen_reg;
978 /* calculate the number of integer/float arguments */
979 args_size = 0;
980 for(i = 0; i < nb_args; i++) {
981 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &reg_count);
982 switch (mode) {
983 case x86_64_mode_memory:
984 case x86_64_mode_x87:
985 args_size += size;
986 break;
988 case x86_64_mode_sse:
989 nb_sse_args += reg_count;
990 if (nb_sse_args > 8) args_size += size;
991 break;
993 case x86_64_mode_integer:
994 nb_reg_args += reg_count;
995 if (nb_reg_args > REGN) args_size += size;
996 break;
1000 /* for struct arguments, we need to call memcpy and the function
1001 call breaks register passing arguments we are preparing.
1002 So, we process arguments which will be passed by stack first. */
1003 gen_reg = nb_reg_args;
1004 sse_reg = nb_sse_args;
1006 /* adjust stack to align SSE boundary */
1007 if (args_size &= 15) {
1008 /* fetch cpu flag before the following sub will change the value */
1009 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1010 gv(RC_INT);
1012 args_size = 16 - args_size;
1013 o(0x48);
1014 oad(0xec81, args_size); /* sub $xxx, %rsp */
1017 for(i = 0; i < nb_args; i++) {
1018 /* Swap argument to top, it will possibly be changed here,
1019 and might use more temps. All arguments must remain on the
1020 stack, so that get_reg can correctly evict some of them onto
1021 stack. We could use also use a vrott(nb_args) at the end
1022 of this loop, but this seems faster. */
1023 SValue tmp = vtop[0];
1024 vtop[0] = vtop[-i];
1025 vtop[-i] = tmp;
1026 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &reg_count);
1027 switch (mode) {
1028 case x86_64_mode_memory:
1029 /* allocate the necessary size on stack */
1030 o(0x48);
1031 oad(0xec81, size); /* sub $xxx, %rsp */
1032 /* generate structure store */
1033 r = get_reg(RC_INT);
1034 orex(1, r, 0, 0x89); /* mov %rsp, r */
1035 o(0xe0 + REG_VALUE(r));
1036 vset(&vtop->type, r | VT_LVAL, 0);
1037 vswap();
1038 vstore();
1039 args_size += size;
1040 break;
1042 case x86_64_mode_x87:
1043 gv(RC_ST0);
1044 size = LDOUBLE_SIZE;
1045 oad(0xec8148, size); /* sub $xxx, %rsp */
1046 o(0x7cdb); /* fstpt 0(%rsp) */
1047 g(0x24);
1048 g(0x00);
1049 args_size += size;
1050 break;
1052 case x86_64_mode_sse:
1053 if (sse_reg > 8) {
1054 gv(RC_FLOAT);
1055 o(0x50); /* push $rax */
1056 /* movq %xmm0, (%rsp) */
1057 o(0x04d60f66);
1058 o(0x24);
1059 args_size += size;
1061 sse_reg -= reg_count;
1062 break;
1064 case x86_64_mode_integer:
1065 /* simple type */
1066 /* XXX: implicit cast ? */
1067 if (gen_reg > REGN) {
1068 r = gv(RC_INT);
1069 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1070 args_size += size;
1072 gen_reg -= reg_count;
1073 break;
1076 /* And swap the argument back to it's original position. */
1077 tmp = vtop[0];
1078 vtop[0] = vtop[-i];
1079 vtop[-i] = tmp;
1082 /* XXX This should be superfluous. */
1083 save_regs(0); /* save used temporary registers */
1085 /* then, we prepare register passing arguments.
1086 Note that we cannot set RDX and RCX in this loop because gv()
1087 may break these temporary registers. Let's use R10 and R11
1088 instead of them */
1089 gen_reg = nb_reg_args;
1090 sse_reg = nb_sse_args;
1091 for(i = 0; i < nb_args; i++) {
1092 mode = classify_x86_64_arg(&vtop->type, &type, &size, &reg_count);
1093 /* Alter stack entry type so that gv() knows how to treat it */
1094 vtop->type = type;
1095 switch (mode) {
1096 default:
1097 break;
1099 case x86_64_mode_sse:
1100 if (sse_reg > 8) {
1101 sse_reg -= reg_count;
1102 } else {
1103 for (j = 0; j < reg_count; ++j) {
1104 --sse_reg;
1105 gv(RC_FLOAT); /* only one float register */
1106 /* movaps %xmm0, %xmmN */
1107 o(0x280f);
1108 o(0xc0 + (sse_reg << 3));
1111 break;
1113 case x86_64_mode_integer:
1114 /* simple type */
1115 /* XXX: implicit cast ? */
1116 if (gen_reg > 8) {
1117 gen_reg -= reg_count;
1118 } else {
1119 for (j = 0; j < reg_count; ++j) {
1120 --gen_reg;
1121 int d = arg_regs[gen_reg];
1122 r = gv(RC_INT);
1123 if (gen_reg == 2 || gen_reg == 3)
1124 /* gen_reg=2: r10, gen_reg=3: r11 */
1125 d = gen_reg + 8;
1126 orex(1,d,r,0x89); /* mov */
1127 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1130 break;
1132 vtop--;
1135 /* We shouldn't have many operands on the stack anymore, but the
1136 call address itself is still there, and it might be in %eax
1137 (or edx/ecx) currently, which the below writes would clobber.
1138 So evict all remaining operands here. */
1139 save_regs(0);
1141 /* Copy R10 and R11 into RDX and RCX, respectively */
1142 if (nb_reg_args > 2) {
1143 o(0xd2894c); /* mov %r10, %rdx */
1144 if (nb_reg_args > 3) {
1145 o(0xd9894c); /* mov %r11, %rcx */
1149 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1150 gcall_or_jmp(0);
1151 if (args_size)
1152 gadd_sp(args_size);
1153 vtop--;
1157 #define FUNC_PROLOG_SIZE 11
1159 static void push_arg_reg(int i) {
1160 loc -= 8;
1161 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1164 /* generate function prolog of type 't' */
1165 void gfunc_prolog(CType *func_type)
1167 X86_64_Mode mode;
1168 int i, addr, align, size, reg_count;
1169 int param_index, param_addr, reg_param_index, sse_param_index;
1170 Sym *sym;
1171 CType *type;
1173 sym = func_type->ref;
1174 addr = PTR_SIZE * 2;
1175 loc = 0;
1176 ind += FUNC_PROLOG_SIZE;
1177 func_sub_sp_offset = ind;
1178 func_ret_sub = 0;
1180 if (func_type->ref->c == FUNC_ELLIPSIS) {
1181 int seen_reg_num, seen_sse_num, seen_stack_size;
1182 seen_reg_num = seen_sse_num = 0;
1183 /* frame pointer and return address */
1184 seen_stack_size = PTR_SIZE * 2;
1185 /* count the number of seen parameters */
1186 sym = func_type->ref;
1187 while ((sym = sym->next) != NULL) {
1188 type = &sym->type;
1189 mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1190 switch (mode) {
1191 default:
1192 seen_stack_size += size;
1193 break;
1195 case x86_64_mode_integer:
1196 if (seen_reg_num + reg_count <= 8) {
1197 seen_reg_num += reg_count;
1198 } else {
1199 seen_reg_num = 8;
1200 seen_stack_size += size;
1202 break;
1204 case x86_64_mode_sse:
1205 if (seen_sse_num + reg_count <= 8) {
1206 seen_sse_num += reg_count;
1207 } else {
1208 seen_sse_num = 8;
1209 seen_stack_size += size;
1211 break;
1215 loc -= 16;
1216 /* movl $0x????????, -0x10(%rbp) */
1217 o(0xf045c7);
1218 gen_le32(seen_reg_num * 8);
1219 /* movl $0x????????, -0xc(%rbp) */
1220 o(0xf445c7);
1221 gen_le32(seen_sse_num * 16 + 48);
1222 /* movl $0x????????, -0x8(%rbp) */
1223 o(0xf845c7);
1224 gen_le32(seen_stack_size);
1226 /* save all register passing arguments */
1227 for (i = 0; i < 8; i++) {
1228 loc -= 16;
1229 o(0xd60f66); /* movq */
1230 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1231 /* movq $0, loc+8(%rbp) */
1232 o(0x85c748);
1233 gen_le32(loc + 8);
1234 gen_le32(0);
1236 for (i = 0; i < REGN; i++) {
1237 push_arg_reg(REGN-1-i);
1241 sym = func_type->ref;
1242 param_index = 0;
1243 reg_param_index = 0;
1244 sse_param_index = 0;
1246 /* if the function returns a structure, then add an
1247 implicit pointer parameter */
1248 func_vt = sym->type;
1249 mode = classify_x86_64_arg(&func_vt, NULL, &size, &reg_count);
1250 if (mode == x86_64_mode_memory) {
1251 push_arg_reg(reg_param_index);
1252 param_addr = loc;
1254 func_vc = loc;
1255 param_index++;
1256 reg_param_index++;
1258 /* define parameters */
1259 while ((sym = sym->next) != NULL) {
1260 type = &sym->type;
1261 mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1262 switch (mode) {
1263 case x86_64_mode_sse:
1264 if (sse_param_index + reg_count <= 8) {
1265 /* save arguments passed by register */
1266 for (i = 0; i < reg_count; ++i) {
1267 loc -= 8;
1268 o(0xd60f66); /* movq */
1269 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
1270 ++sse_param_index;
1272 param_addr = loc;
1273 } else {
1274 param_addr = addr;
1275 addr += size;
1276 sse_param_index += reg_count;
1278 break;
1280 case x86_64_mode_memory:
1281 case x86_64_mode_x87:
1282 param_addr = addr;
1283 addr += size;
1284 break;
1286 case x86_64_mode_integer: {
1287 if (reg_param_index + reg_count <= REGN) {
1288 /* save arguments passed by register */
1289 for (i = 0; i < reg_count; ++i) {
1290 push_arg_reg(reg_param_index);
1291 ++reg_param_index;
1293 param_addr = loc;
1294 } else {
1295 param_addr = addr;
1296 addr += size;
1297 reg_param_index += reg_count;
1299 break;
1302 sym_push(sym->v & ~SYM_FIELD, type,
1303 VT_LOCAL | VT_LVAL, param_addr);
1304 param_index++;
1308 /* generate function epilog */
1309 void gfunc_epilog(void)
1311 int v, saved_ind;
1313 o(0xc9); /* leave */
1314 if (func_ret_sub == 0) {
1315 o(0xc3); /* ret */
1316 } else {
1317 o(0xc2); /* ret n */
1318 g(func_ret_sub);
1319 g(func_ret_sub >> 8);
1321 /* align local size to word & save local variables */
1322 v = (-loc + 15) & -16;
1323 saved_ind = ind;
1324 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1325 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1326 o(0xec8148); /* sub rsp, stacksize */
1327 gen_le32(v);
1328 ind = saved_ind;
1331 #endif /* not PE */
1333 /* generate a jump to a label */
1334 int gjmp(int t)
1336 return psym(0xe9, t);
1339 /* generate a jump to a fixed address */
1340 void gjmp_addr(int a)
1342 int r;
1343 r = a - ind - 2;
1344 if (r == (char)r) {
1345 g(0xeb);
1346 g(r);
1347 } else {
1348 oad(0xe9, a - ind - 5);
1352 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1353 int gtst(int inv, int t)
1355 int v, *p;
1357 v = vtop->r & VT_VALMASK;
1358 if (v == VT_CMP) {
1359 /* fast case : can jump directly since flags are set */
1360 if (vtop->c.i & 0x100)
1362 /* This was a float compare. If the parity flag is set
1363 the result was unordered. For anything except != this
1364 means false and we don't jump (anding both conditions).
1365 For != this means true (oring both).
1366 Take care about inverting the test. We need to jump
1367 to our target if the result was unordered and test wasn't NE,
1368 otherwise if unordered we don't want to jump. */
1369 vtop->c.i &= ~0x100;
1370 if (!inv == (vtop->c.i != TOK_NE))
1371 o(0x067a); /* jp +6 */
1372 else
1374 g(0x0f);
1375 t = psym(0x8a, t); /* jp t */
1378 g(0x0f);
1379 t = psym((vtop->c.i - 16) ^ inv, t);
1380 } else if (v == VT_JMP || v == VT_JMPI) {
1381 /* && or || optimization */
1382 if ((v & 1) == inv) {
1383 /* insert vtop->c jump list in t */
1384 p = &vtop->c.i;
1385 while (*p != 0)
1386 p = (int *)(cur_text_section->data + *p);
1387 *p = t;
1388 t = vtop->c.i;
1389 } else {
1390 t = gjmp(t);
1391 gsym(vtop->c.i);
1393 } else {
1394 if (is_float(vtop->type.t) ||
1395 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1396 vpushi(0);
1397 gen_op(TOK_NE);
1399 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1400 /* constant jmp optimization */
1401 if ((vtop->c.i != 0) != inv)
1402 t = gjmp(t);
1403 } else {
1404 v = gv(RC_INT);
1405 orex(0,v,v,0x85);
1406 o(0xc0 + REG_VALUE(v) * 9);
1407 g(0x0f);
1408 t = psym(0x85 ^ inv, t);
1411 vtop--;
1412 return t;
1415 /* generate an integer binary operation */
1416 void gen_opi(int op)
1418 int r, fr, opc, c;
1419 int ll, uu, cc;
1421 ll = is64_type(vtop[-1].type.t);
1422 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1423 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1425 switch(op) {
1426 case '+':
1427 case TOK_ADDC1: /* add with carry generation */
1428 opc = 0;
1429 gen_op8:
1430 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1431 /* constant case */
1432 vswap();
1433 r = gv(RC_INT);
1434 vswap();
1435 c = vtop->c.i;
1436 if (c == (char)c) {
1437 /* XXX: generate inc and dec for smaller code ? */
1438 orex(ll, r, 0, 0x83);
1439 o(0xc0 | (opc << 3) | REG_VALUE(r));
1440 g(c);
1441 } else {
1442 orex(ll, r, 0, 0x81);
1443 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1445 } else {
1446 gv2(RC_INT, RC_INT);
1447 r = vtop[-1].r;
1448 fr = vtop[0].r;
1449 orex(ll, r, fr, (opc << 3) | 0x01);
1450 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1452 vtop--;
1453 if (op >= TOK_ULT && op <= TOK_GT) {
1454 vtop->r = VT_CMP;
1455 vtop->c.i = op;
1457 break;
1458 case '-':
1459 case TOK_SUBC1: /* sub with carry generation */
1460 opc = 5;
1461 goto gen_op8;
1462 case TOK_ADDC2: /* add with carry use */
1463 opc = 2;
1464 goto gen_op8;
1465 case TOK_SUBC2: /* sub with carry use */
1466 opc = 3;
1467 goto gen_op8;
1468 case '&':
1469 opc = 4;
1470 goto gen_op8;
1471 case '^':
1472 opc = 6;
1473 goto gen_op8;
1474 case '|':
1475 opc = 1;
1476 goto gen_op8;
1477 case '*':
1478 gv2(RC_INT, RC_INT);
1479 r = vtop[-1].r;
1480 fr = vtop[0].r;
1481 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1482 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1483 vtop--;
1484 break;
1485 case TOK_SHL:
1486 opc = 4;
1487 goto gen_shift;
1488 case TOK_SHR:
1489 opc = 5;
1490 goto gen_shift;
1491 case TOK_SAR:
1492 opc = 7;
1493 gen_shift:
1494 opc = 0xc0 | (opc << 3);
1495 if (cc) {
1496 /* constant case */
1497 vswap();
1498 r = gv(RC_INT);
1499 vswap();
1500 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1501 o(opc | REG_VALUE(r));
1502 g(vtop->c.i & (ll ? 63 : 31));
1503 } else {
1504 /* we generate the shift in ecx */
1505 gv2(RC_INT, RC_RCX);
1506 r = vtop[-1].r;
1507 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1508 o(opc | REG_VALUE(r));
1510 vtop--;
1511 break;
1512 case TOK_UDIV:
1513 case TOK_UMOD:
1514 uu = 1;
1515 goto divmod;
1516 case '/':
1517 case '%':
1518 case TOK_PDIV:
1519 uu = 0;
1520 divmod:
1521 /* first operand must be in eax */
1522 /* XXX: need better constraint for second operand */
1523 gv2(RC_RAX, RC_RCX);
1524 r = vtop[-1].r;
1525 fr = vtop[0].r;
1526 vtop--;
1527 save_reg(TREG_RDX);
1528 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1529 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1530 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1531 if (op == '%' || op == TOK_UMOD)
1532 r = TREG_RDX;
1533 else
1534 r = TREG_RAX;
1535 vtop->r = r;
1536 break;
1537 default:
1538 opc = 7;
1539 goto gen_op8;
1543 void gen_opl(int op)
1545 gen_opi(op);
1548 /* generate a floating point operation 'v = t1 op t2' instruction. The
1549 two operands are guaranted to have the same floating point type */
1550 /* XXX: need to use ST1 too */
1551 void gen_opf(int op)
1553 int a, ft, fc, swapped, r;
1554 int float_type =
1555 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1557 /* convert constants to memory references */
1558 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1559 vswap();
1560 gv(float_type);
1561 vswap();
1563 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1564 gv(float_type);
1566 /* must put at least one value in the floating point register */
1567 if ((vtop[-1].r & VT_LVAL) &&
1568 (vtop[0].r & VT_LVAL)) {
1569 vswap();
1570 gv(float_type);
1571 vswap();
1573 swapped = 0;
1574 /* swap the stack if needed so that t1 is the register and t2 is
1575 the memory reference */
1576 if (vtop[-1].r & VT_LVAL) {
1577 vswap();
1578 swapped = 1;
1580 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1581 if (op >= TOK_ULT && op <= TOK_GT) {
1582 /* load on stack second operand */
1583 load(TREG_ST0, vtop);
1584 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1585 if (op == TOK_GE || op == TOK_GT)
1586 swapped = !swapped;
1587 else if (op == TOK_EQ || op == TOK_NE)
1588 swapped = 0;
1589 if (swapped)
1590 o(0xc9d9); /* fxch %st(1) */
1591 o(0xe9da); /* fucompp */
1592 o(0xe0df); /* fnstsw %ax */
1593 if (op == TOK_EQ) {
1594 o(0x45e480); /* and $0x45, %ah */
1595 o(0x40fC80); /* cmp $0x40, %ah */
1596 } else if (op == TOK_NE) {
1597 o(0x45e480); /* and $0x45, %ah */
1598 o(0x40f480); /* xor $0x40, %ah */
1599 op = TOK_NE;
1600 } else if (op == TOK_GE || op == TOK_LE) {
1601 o(0x05c4f6); /* test $0x05, %ah */
1602 op = TOK_EQ;
1603 } else {
1604 o(0x45c4f6); /* test $0x45, %ah */
1605 op = TOK_EQ;
1607 vtop--;
1608 vtop->r = VT_CMP;
1609 vtop->c.i = op;
1610 } else {
1611 /* no memory reference possible for long double operations */
1612 load(TREG_ST0, vtop);
1613 swapped = !swapped;
1615 switch(op) {
1616 default:
1617 case '+':
1618 a = 0;
1619 break;
1620 case '-':
1621 a = 4;
1622 if (swapped)
1623 a++;
1624 break;
1625 case '*':
1626 a = 1;
1627 break;
1628 case '/':
1629 a = 6;
1630 if (swapped)
1631 a++;
1632 break;
1634 ft = vtop->type.t;
1635 fc = vtop->c.ul;
1636 o(0xde); /* fxxxp %st, %st(1) */
1637 o(0xc1 + (a << 3));
1638 vtop--;
1640 } else {
1641 if (op >= TOK_ULT && op <= TOK_GT) {
1642 /* if saved lvalue, then we must reload it */
1643 r = vtop->r;
1644 fc = vtop->c.ul;
1645 if ((r & VT_VALMASK) == VT_LLOCAL) {
1646 SValue v1;
1647 r = get_reg(RC_INT);
1648 v1.type.t = VT_PTR;
1649 v1.r = VT_LOCAL | VT_LVAL;
1650 v1.c.ul = fc;
1651 load(r, &v1);
1652 fc = 0;
1655 if (op == TOK_EQ || op == TOK_NE) {
1656 swapped = 0;
1657 } else {
1658 if (op == TOK_LE || op == TOK_LT)
1659 swapped = !swapped;
1660 if (op == TOK_LE || op == TOK_GE) {
1661 op = 0x93; /* setae */
1662 } else {
1663 op = 0x97; /* seta */
1667 if (swapped) {
1668 o(0x7e0ff3); /* movq */
1669 gen_modrm(1, r, vtop->sym, fc);
1671 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1672 o(0x66);
1674 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1675 o(0xc8);
1676 } else {
1677 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1678 o(0x66);
1680 o(0x2e0f); /* ucomisd */
1681 gen_modrm(0, r, vtop->sym, fc);
1684 vtop--;
1685 vtop->r = VT_CMP;
1686 vtop->c.i = op | 0x100;
1687 } else {
1688 /* no memory reference possible for long double operations */
1689 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1690 load(TREG_XMM0, vtop);
1691 swapped = !swapped;
1693 switch(op) {
1694 default:
1695 case '+':
1696 a = 0;
1697 break;
1698 case '-':
1699 a = 4;
1700 break;
1701 case '*':
1702 a = 1;
1703 break;
1704 case '/':
1705 a = 6;
1706 break;
1708 ft = vtop->type.t;
1709 fc = vtop->c.ul;
1710 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1711 o(0xde); /* fxxxp %st, %st(1) */
1712 o(0xc1 + (a << 3));
1713 } else {
1714 /* if saved lvalue, then we must reload it */
1715 r = vtop->r;
1716 if ((r & VT_VALMASK) == VT_LLOCAL) {
1717 SValue v1;
1718 r = get_reg(RC_INT);
1719 v1.type.t = VT_PTR;
1720 v1.r = VT_LOCAL | VT_LVAL;
1721 v1.c.ul = fc;
1722 load(r, &v1);
1723 fc = 0;
1725 if (swapped) {
1726 /* movq %xmm0,%xmm1 */
1727 o(0x7e0ff3);
1728 o(0xc8);
1729 load(TREG_XMM0, vtop);
1730 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1731 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1732 o(0xf2);
1733 } else {
1734 o(0xf3);
1736 o(0x0f);
1737 o(0x58 + a);
1738 o(0xc1);
1739 } else {
1740 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1741 o(0xf2);
1742 } else {
1743 o(0xf3);
1745 o(0x0f);
1746 o(0x58 + a);
1747 gen_modrm(0, r, vtop->sym, fc);
1750 vtop--;
1755 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1756 and 'long long' cases. */
1757 void gen_cvt_itof(int t)
1759 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1760 save_reg(TREG_ST0);
1761 gv(RC_INT);
1762 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1763 /* signed long long to float/double/long double (unsigned case
1764 is handled generically) */
1765 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1766 o(0x242cdf); /* fildll (%rsp) */
1767 o(0x08c48348); /* add $8, %rsp */
1768 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1769 (VT_INT | VT_UNSIGNED)) {
1770 /* unsigned int to float/double/long double */
1771 o(0x6a); /* push $0 */
1772 g(0x00);
1773 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1774 o(0x242cdf); /* fildll (%rsp) */
1775 o(0x10c48348); /* add $16, %rsp */
1776 } else {
1777 /* int to float/double/long double */
1778 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1779 o(0x2404db); /* fildl (%rsp) */
1780 o(0x08c48348); /* add $8, %rsp */
1782 vtop->r = TREG_ST0;
1783 } else {
1784 save_reg(TREG_XMM0);
1785 gv(RC_INT);
1786 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1787 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1788 (VT_INT | VT_UNSIGNED) ||
1789 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1790 o(0x48); /* REX */
1792 o(0x2a0f);
1793 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1794 vtop->r = TREG_XMM0;
1798 /* convert from one floating point type to another */
1799 void gen_cvt_ftof(int t)
1801 int ft, bt, tbt;
1803 ft = vtop->type.t;
1804 bt = ft & VT_BTYPE;
1805 tbt = t & VT_BTYPE;
1807 if (bt == VT_FLOAT) {
1808 gv(RC_FLOAT);
1809 if (tbt == VT_DOUBLE) {
1810 o(0xc0140f); /* unpcklps */
1811 o(0xc05a0f); /* cvtps2pd */
1812 } else if (tbt == VT_LDOUBLE) {
1813 /* movss %xmm0,-0x10(%rsp) */
1814 o(0x44110ff3);
1815 o(0xf024);
1816 o(0xf02444d9); /* flds -0x10(%rsp) */
1817 vtop->r = TREG_ST0;
1819 } else if (bt == VT_DOUBLE) {
1820 gv(RC_FLOAT);
1821 if (tbt == VT_FLOAT) {
1822 o(0xc0140f66); /* unpcklpd */
1823 o(0xc05a0f66); /* cvtpd2ps */
1824 } else if (tbt == VT_LDOUBLE) {
1825 /* movsd %xmm0,-0x10(%rsp) */
1826 o(0x44110ff2);
1827 o(0xf024);
1828 o(0xf02444dd); /* fldl -0x10(%rsp) */
1829 vtop->r = TREG_ST0;
1831 } else {
1832 gv(RC_ST0);
1833 if (tbt == VT_DOUBLE) {
1834 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1835 /* movsd -0x10(%rsp),%xmm0 */
1836 o(0x44100ff2);
1837 o(0xf024);
1838 vtop->r = TREG_XMM0;
1839 } else if (tbt == VT_FLOAT) {
1840 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1841 /* movss -0x10(%rsp),%xmm0 */
1842 o(0x44100ff3);
1843 o(0xf024);
1844 vtop->r = TREG_XMM0;
1849 /* convert fp to int 't' type */
1850 void gen_cvt_ftoi(int t)
1852 int ft, bt, size, r;
1853 ft = vtop->type.t;
1854 bt = ft & VT_BTYPE;
1855 if (bt == VT_LDOUBLE) {
1856 gen_cvt_ftof(VT_DOUBLE);
1857 bt = VT_DOUBLE;
1860 gv(RC_FLOAT);
1861 if (t != VT_INT)
1862 size = 8;
1863 else
1864 size = 4;
1866 r = get_reg(RC_INT);
1867 if (bt == VT_FLOAT) {
1868 o(0xf3);
1869 } else if (bt == VT_DOUBLE) {
1870 o(0xf2);
1871 } else {
1872 assert(0);
1874 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1875 o(0xc0 + (REG_VALUE(r) << 3));
1876 vtop->r = r;
1879 /* computed goto support */
1880 void ggoto(void)
1882 gcall_or_jmp(1);
1883 vtop--;
1886 /* end of x86-64 code generator */
1887 /*************************************************************/
1888 #endif /* ! TARGET_DEFS_ONLY */
1889 /******************************************************/