x86-64 ABI fixes.
[tinycc.git] / x86_64-gen.c
blobd9873424e04db42956a90e2c4b0b600327522313
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 5
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_R8 0x0100
38 #define RC_R9 0x0200
39 #define RC_R10 0x0400
40 #define RC_R11 0x0800
41 #define RC_XMM0 0x0020
42 #define RC_ST0 0x0040 /* only for long double */
43 #define RC_IRET RC_RAX /* function return: integer register */
44 #define RC_LRET RC_RDX /* function return: second integer register */
45 #define RC_FRET RC_XMM0 /* function return: float register */
47 /* pretty names for the registers */
48 enum {
49 TREG_RAX = 0,
50 TREG_RCX = 1,
51 TREG_RDX = 2,
52 TREG_XMM0 = 3,
53 TREG_ST0 = 4,
55 TREG_RSI = 6,
56 TREG_RDI = 7,
57 TREG_R8 = 8,
58 TREG_R9 = 9,
60 TREG_R10 = 10,
61 TREG_R11 = 11,
63 TREG_MEM = 0x10,
66 #define REX_BASE(reg) (((reg) >> 3) & 1)
67 #define REG_VALUE(reg) ((reg) & 7)
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
78 #define PTR_SIZE 8
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
84 #define MAX_ALIGN 8
86 /******************************************************/
87 /* ELF defines */
89 #define EM_TCC_TARGET EM_X86_64
91 /* relocation type for 32 bit data relocation */
92 #define R_DATA_32 R_X86_64_32
93 #define R_DATA_PTR R_X86_64_64
94 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
95 #define R_COPY R_X86_64_COPY
97 #define ELF_START_ADDR 0x08048000
98 #define ELF_PAGE_SIZE 0x1000
100 /******************************************************/
101 #else /* ! TARGET_DEFS_ONLY */
102 /******************************************************/
103 #include "tcc.h"
104 #include <assert.h>
106 ST_DATA const int reg_classes[NB_REGS+7] = {
107 /* eax */ RC_INT | RC_RAX,
108 /* ecx */ RC_INT | RC_RCX,
109 /* edx */ RC_INT | RC_RDX,
110 /* xmm0 */ RC_FLOAT | RC_XMM0,
111 /* st0 */ RC_ST0,
115 RC_INT | RC_R8,
116 RC_INT | RC_R9,
117 RC_INT | RC_R10,
118 RC_INT | RC_R11
121 static unsigned long func_sub_sp_offset;
122 static int func_ret_sub;
124 /* XXX: make it faster ? */
125 void g(int c)
127 int ind1;
128 ind1 = ind + 1;
129 if (ind1 > cur_text_section->data_allocated)
130 section_realloc(cur_text_section, ind1);
131 cur_text_section->data[ind] = c;
132 ind = ind1;
135 void o(unsigned int c)
137 while (c) {
138 g(c);
139 c = c >> 8;
143 void gen_le16(int v)
145 g(v);
146 g(v >> 8);
149 void gen_le32(int c)
151 g(c);
152 g(c >> 8);
153 g(c >> 16);
154 g(c >> 24);
157 void gen_le64(int64_t c)
159 g(c);
160 g(c >> 8);
161 g(c >> 16);
162 g(c >> 24);
163 g(c >> 32);
164 g(c >> 40);
165 g(c >> 48);
166 g(c >> 56);
169 void orex(int ll, int r, int r2, int b)
171 if ((r & VT_VALMASK) >= VT_CONST)
172 r = 0;
173 if ((r2 & VT_VALMASK) >= VT_CONST)
174 r2 = 0;
175 if (ll || REX_BASE(r) || REX_BASE(r2))
176 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
177 o(b);
180 /* output a symbol and patch all calls to it */
181 void gsym_addr(int t, int a)
183 int n, *ptr;
184 while (t) {
185 ptr = (int *)(cur_text_section->data + t);
186 n = *ptr; /* next value */
187 *ptr = a - t - 4;
188 t = n;
192 void gsym(int t)
194 gsym_addr(t, ind);
197 /* psym is used to put an instruction with a data field which is a
198 reference to a symbol. It is in fact the same as oad ! */
199 #define psym oad
201 static int is64_type(int t)
203 return ((t & VT_BTYPE) == VT_PTR ||
204 (t & VT_BTYPE) == VT_FUNC ||
205 (t & VT_BTYPE) == VT_LLONG);
208 static int is_sse_float(int t) {
209 int bt;
210 bt = t & VT_BTYPE;
211 return bt == VT_DOUBLE || bt == VT_FLOAT;
215 /* instruction + 4 bytes data. Return the address of the data */
216 ST_FUNC int oad(int c, int s)
218 int ind1;
220 o(c);
221 ind1 = ind + 4;
222 if (ind1 > cur_text_section->data_allocated)
223 section_realloc(cur_text_section, ind1);
224 *(int *)(cur_text_section->data + ind) = s;
225 s = ind;
226 ind = ind1;
227 return s;
230 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
232 if (r & VT_SYM)
233 greloc(cur_text_section, sym, ind, R_X86_64_32);
234 gen_le32(c);
237 /* output constant with relocation if 'r & VT_SYM' is true */
238 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
240 if (r & VT_SYM)
241 greloc(cur_text_section, sym, ind, R_X86_64_64);
242 gen_le64(c);
245 /* output constant with relocation if 'r & VT_SYM' is true */
246 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
248 if (r & VT_SYM)
249 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
250 gen_le32(c-4);
253 /* output got address with relocation */
254 static void gen_gotpcrel(int r, Sym *sym, int c)
256 #ifndef TCC_TARGET_PE
257 Section *sr;
258 ElfW(Rela) *rel;
259 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
260 sr = cur_text_section->reloc;
261 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
262 rel->r_addend = -4;
263 #else
264 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
265 cur_text_section->data[ind-3],
266 cur_text_section->data[ind-2],
267 cur_text_section->data[ind-1]
269 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
270 #endif
271 gen_le32(0);
272 if (c) {
273 /* we use add c, %xxx for displacement */
274 orex(1, r, 0, 0x81);
275 o(0xc0 + REG_VALUE(r));
276 gen_le32(c);
280 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
282 op_reg = REG_VALUE(op_reg) << 3;
283 if ((r & VT_VALMASK) == VT_CONST) {
284 /* constant memory reference */
285 o(0x05 | op_reg);
286 if (is_got) {
287 gen_gotpcrel(r, sym, c);
288 } else {
289 gen_addrpc32(r, sym, c);
291 } else if ((r & VT_VALMASK) == VT_LOCAL) {
292 /* currently, we use only ebp as base */
293 if (c == (char)c) {
294 /* short reference */
295 o(0x45 | op_reg);
296 g(c);
297 } else {
298 oad(0x85 | op_reg, c);
300 } else if ((r & VT_VALMASK) >= TREG_MEM) {
301 if (c) {
302 g(0x80 | op_reg | REG_VALUE(r));
303 gen_le32(c);
304 } else {
305 g(0x00 | op_reg | REG_VALUE(r));
307 } else {
308 g(0x00 | op_reg | REG_VALUE(r));
312 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
313 opcode bits */
314 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
316 gen_modrm_impl(op_reg, r, sym, c, 0);
319 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
320 opcode bits */
321 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
323 int is_got;
324 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
325 orex(1, r, op_reg, opcode);
326 gen_modrm_impl(op_reg, r, sym, c, is_got);
330 /* load 'r' from value 'sv' */
331 void load(int r, SValue *sv)
333 int v, t, ft, fc, fr;
334 SValue v1;
336 #ifdef TCC_TARGET_PE
337 SValue v2;
338 sv = pe_getimport(sv, &v2);
339 #endif
341 fr = sv->r;
342 ft = sv->type.t;
343 fc = sv->c.ul;
345 #ifndef TCC_TARGET_PE
346 /* we use indirect access via got */
347 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
348 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
349 /* use the result register as a temporal register */
350 int tr = r | TREG_MEM;
351 if (is_float(ft)) {
352 /* we cannot use float registers as a temporal register */
353 tr = get_reg(RC_INT) | TREG_MEM;
355 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
357 /* load from the temporal register */
358 fr = tr | VT_LVAL;
360 #endif
362 v = fr & VT_VALMASK;
363 if (fr & VT_LVAL) {
364 int b, ll;
365 if (v == VT_LLOCAL) {
366 v1.type.t = VT_PTR;
367 v1.r = VT_LOCAL | VT_LVAL;
368 v1.c.ul = fc;
369 fr = r;
370 if (!(reg_classes[fr] & RC_INT))
371 fr = get_reg(RC_INT);
372 load(fr, &v1);
374 ll = 0;
375 if ((ft & VT_BTYPE) == VT_FLOAT) {
376 b = 0x6e0f66, r = 0; /* movd */
377 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
378 b = 0x7e0ff3, r = 0; /* movq */
379 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
380 b = 0xdb, r = 5; /* fldt */
381 } else if ((ft & VT_TYPE) == VT_BYTE) {
382 b = 0xbe0f; /* movsbl */
383 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
384 b = 0xb60f; /* movzbl */
385 } else if ((ft & VT_TYPE) == VT_SHORT) {
386 b = 0xbf0f; /* movswl */
387 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
388 b = 0xb70f; /* movzwl */
389 } else {
390 ll = is64_type(ft);
391 b = 0x8b;
393 if (ll) {
394 gen_modrm64(b, r, fr, sv->sym, fc);
395 } else {
396 orex(ll, fr, r, b);
397 gen_modrm(r, fr, sv->sym, fc);
399 } else {
400 if (v == VT_CONST) {
401 if (fr & VT_SYM) {
402 #ifdef TCC_TARGET_PE
403 orex(1,0,r,0x8d);
404 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
405 gen_addrpc32(fr, sv->sym, fc);
406 #else
407 if (sv->sym->type.t & VT_STATIC) {
408 orex(1,0,r,0x8d);
409 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
410 gen_addrpc32(fr, sv->sym, fc);
411 } else {
412 orex(1,0,r,0x8b);
413 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
414 gen_gotpcrel(r, sv->sym, fc);
416 #endif
417 } else if (is64_type(ft)) {
418 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
419 gen_le64(sv->c.ull);
420 } else {
421 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
422 gen_le32(fc);
424 } else if (v == VT_LOCAL) {
425 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
426 gen_modrm(r, VT_LOCAL, sv->sym, fc);
427 } else if (v == VT_CMP) {
428 orex(0,r,0,0);
429 if ((fc & ~0x100) != TOK_NE)
430 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
431 else
432 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
433 if (fc & 0x100)
435 /* This was a float compare. If the parity bit is
436 set the result was unordered, meaning false for everything
437 except TOK_NE, and true for TOK_NE. */
438 fc &= ~0x100;
439 o(0x037a + (REX_BASE(r) << 8));
441 orex(0,r,0, 0x0f); /* setxx %br */
442 o(fc);
443 o(0xc0 + REG_VALUE(r));
444 } else if (v == VT_JMP || v == VT_JMPI) {
445 t = v & 1;
446 orex(0,r,0,0);
447 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
448 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
449 gsym(fc);
450 orex(0,r,0,0);
451 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
452 } else if (v != r) {
453 if (r == TREG_XMM0) {
454 assert(v == TREG_ST0);
455 /* gen_cvt_ftof(VT_DOUBLE); */
456 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
457 /* movsd -0x10(%rsp),%xmm0 */
458 o(0x44100ff2);
459 o(0xf024);
460 } else if (r == TREG_ST0) {
461 assert(v == TREG_XMM0);
462 /* gen_cvt_ftof(VT_LDOUBLE); */
463 /* movsd %xmm0,-0x10(%rsp) */
464 o(0x44110ff2);
465 o(0xf024);
466 o(0xf02444dd); /* fldl -0x10(%rsp) */
467 } else {
468 orex(1,r,v, 0x89);
469 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
475 /* store register 'r' in lvalue 'v' */
476 void store(int r, SValue *v)
478 int fr, bt, ft, fc;
479 int op64 = 0;
480 /* store the REX prefix in this variable when PIC is enabled */
481 int pic = 0;
483 #ifdef TCC_TARGET_PE
484 SValue v2;
485 v = pe_getimport(v, &v2);
486 #endif
488 ft = v->type.t;
489 fc = v->c.ul;
490 fr = v->r & VT_VALMASK;
491 bt = ft & VT_BTYPE;
493 #ifndef TCC_TARGET_PE
494 /* we need to access the variable via got */
495 if (fr == VT_CONST && (v->r & VT_SYM)) {
496 /* mov xx(%rip), %r11 */
497 o(0x1d8b4c);
498 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
499 pic = is64_type(bt) ? 0x49 : 0x41;
501 #endif
503 /* XXX: incorrect if float reg to reg */
504 if (bt == VT_FLOAT) {
505 o(0x66);
506 o(pic);
507 o(0x7e0f); /* movd */
508 r = 0;
509 } else if (bt == VT_DOUBLE) {
510 o(0x66);
511 o(pic);
512 o(0xd60f); /* movq */
513 r = 0;
514 } else if (bt == VT_LDOUBLE) {
515 o(0xc0d9); /* fld %st(0) */
516 o(pic);
517 o(0xdb); /* fstpt */
518 r = 7;
519 } else {
520 if (bt == VT_SHORT)
521 o(0x66);
522 o(pic);
523 if (bt == VT_BYTE || bt == VT_BOOL)
524 orex(0, 0, r, 0x88);
525 else if (is64_type(bt))
526 op64 = 0x89;
527 else
528 orex(0, 0, r, 0x89);
530 if (pic) {
531 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
532 if (op64)
533 o(op64);
534 o(3 + (r << 3));
535 } else if (op64) {
536 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
537 gen_modrm64(op64, r, v->r, v->sym, fc);
538 } else if (fr != r) {
539 /* XXX: don't we really come here? */
540 abort();
541 o(0xc0 + fr + r * 8); /* mov r, fr */
543 } else {
544 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
545 gen_modrm(r, v->r, v->sym, fc);
546 } else if (fr != r) {
547 /* XXX: don't we really come here? */
548 abort();
549 o(0xc0 + fr + r * 8); /* mov r, fr */
554 /* 'is_jmp' is '1' if it is a jump */
555 static void gcall_or_jmp(int is_jmp)
557 int r;
558 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
559 /* constant case */
560 if (vtop->r & VT_SYM) {
561 /* relocation case */
562 greloc(cur_text_section, vtop->sym,
563 ind + 1, R_X86_64_PC32);
564 } else {
565 /* put an empty PC32 relocation */
566 put_elf_reloc(symtab_section, cur_text_section,
567 ind + 1, R_X86_64_PC32, 0);
569 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
570 } else {
571 /* otherwise, indirect call */
572 r = TREG_R11;
573 load(r, vtop);
574 o(0x41); /* REX */
575 o(0xff); /* call/jmp *r */
576 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
580 #ifdef TCC_TARGET_PE
582 #define REGN 4
583 static const uint8_t arg_regs[] = {
584 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
587 static int func_scratch;
589 /* Generate function call. The function address is pushed first, then
590 all the parameters in call order. This functions pops all the
591 parameters and the function address. */
593 void gen_offs_sp(int b, int r, int d)
595 orex(1,0,r & 0x100 ? 0 : r, b);
596 if (d == (char)d) {
597 o(0x2444 | (REG_VALUE(r) << 3));
598 g(d);
599 } else {
600 o(0x2484 | (REG_VALUE(r) << 3));
601 gen_le32(d);
605 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
606 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
607 *ret_align = 1; // Never have to re-align return values for x86-64
608 return 1;
611 void gfunc_call(int nb_args)
613 int size, align, r, args_size, i, d, j, bt, struct_size;
614 int nb_reg_args, gen_reg;
616 nb_reg_args = nb_args;
617 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
619 /* for struct arguments, we need to call memcpy and the function
620 call breaks register passing arguments we are preparing.
621 So, we process arguments which will be passed by stack first. */
622 struct_size = args_size;
623 for(i = 0; i < nb_args; i++) {
624 SValue *sv = &vtop[-i];
625 bt = (sv->type.t & VT_BTYPE);
626 if (bt == VT_STRUCT) {
627 size = type_size(&sv->type, &align);
628 /* align to stack align size */
629 size = (size + 15) & ~15;
630 /* generate structure store */
631 r = get_reg(RC_INT);
632 gen_offs_sp(0x8d, r, struct_size);
633 struct_size += size;
635 /* generate memcpy call */
636 vset(&sv->type, r | VT_LVAL, 0);
637 vpushv(sv);
638 vstore();
639 --vtop;
641 } else if (bt == VT_LDOUBLE) {
643 gv(RC_ST0);
644 gen_offs_sp(0xdb, 0x107, struct_size);
645 struct_size += 16;
650 if (func_scratch < struct_size)
651 func_scratch = struct_size;
652 #if 1
653 for (i = 0; i < REGN; ++i)
654 save_reg(arg_regs[i]);
655 save_reg(TREG_RAX);
656 #endif
657 gen_reg = nb_reg_args;
658 struct_size = args_size;
660 for(i = 0; i < nb_args; i++) {
661 bt = (vtop->type.t & VT_BTYPE);
663 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
664 if (bt == VT_LDOUBLE)
665 size = 16;
666 else
667 size = type_size(&vtop->type, &align);
668 /* align to stack align size */
669 size = (size + 15) & ~15;
670 j = --gen_reg;
671 if (j >= REGN) {
672 d = TREG_RAX;
673 gen_offs_sp(0x8d, d, struct_size);
674 gen_offs_sp(0x89, d, j*8);
675 } else {
676 d = arg_regs[j];
677 gen_offs_sp(0x8d, d, struct_size);
679 struct_size += size;
681 } else if (is_sse_float(vtop->type.t)) {
682 gv(RC_FLOAT); /* only one float register */
683 j = --gen_reg;
684 if (j >= REGN) {
685 /* movq %xmm0, j*8(%rsp) */
686 gen_offs_sp(0xd60f66, 0x100, j*8);
687 } else {
688 /* movaps %xmm0, %xmmN */
689 o(0x280f);
690 o(0xc0 + (j << 3));
691 d = arg_regs[j];
692 /* mov %xmm0, %rxx */
693 o(0x66);
694 orex(1,d,0, 0x7e0f);
695 o(0xc0 + REG_VALUE(d));
697 } else {
698 j = --gen_reg;
699 if (j >= REGN) {
700 r = gv(RC_INT);
701 gen_offs_sp(0x89, r, j*8);
702 } else {
703 d = arg_regs[j];
704 if (d < NB_REGS) {
705 gv(reg_classes[d] & ~RC_INT);
706 } else {
707 r = gv(RC_INT);
708 if (d != r) {
709 orex(1,d,r, 0x89);
710 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
716 vtop--;
718 save_regs(0);
719 gcall_or_jmp(0);
720 vtop--;
724 #define FUNC_PROLOG_SIZE 11
726 /* generate function prolog of type 't' */
727 void gfunc_prolog(CType *func_type)
729 int addr, reg_param_index, bt;
730 Sym *sym;
731 CType *type;
733 func_ret_sub = 0;
734 func_scratch = 0;
735 loc = 0;
737 addr = PTR_SIZE * 2;
738 ind += FUNC_PROLOG_SIZE;
739 func_sub_sp_offset = ind;
740 reg_param_index = 0;
742 sym = func_type->ref;
744 /* if the function returns a structure, then add an
745 implicit pointer parameter */
746 func_vt = sym->type;
747 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
748 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
749 reg_param_index++;
750 addr += PTR_SIZE;
753 /* define parameters */
754 while ((sym = sym->next) != NULL) {
755 type = &sym->type;
756 bt = type->t & VT_BTYPE;
757 if (reg_param_index < REGN) {
758 /* save arguments passed by register */
759 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
761 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
762 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
763 } else {
764 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
766 reg_param_index++;
767 addr += PTR_SIZE;
770 while (reg_param_index < REGN) {
771 if (func_type->ref->c == FUNC_ELLIPSIS)
772 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
773 reg_param_index++;
774 addr += PTR_SIZE;
778 /* generate function epilog */
779 void gfunc_epilog(void)
781 int v, saved_ind;
783 o(0xc9); /* leave */
784 if (func_ret_sub == 0) {
785 o(0xc3); /* ret */
786 } else {
787 o(0xc2); /* ret n */
788 g(func_ret_sub);
789 g(func_ret_sub >> 8);
792 saved_ind = ind;
793 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
794 /* align local size to word & save local variables */
795 v = (func_scratch + -loc + 15) & -16;
797 if (v >= 4096) {
798 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
799 oad(0xb8, v); /* mov stacksize, %eax */
800 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
801 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
802 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
803 } else {
804 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
805 o(0xec8148); /* sub rsp, stacksize */
806 gen_le32(v);
809 cur_text_section->data_offset = saved_ind;
810 pe_add_unwind_data(ind, saved_ind, v);
811 ind = cur_text_section->data_offset;
814 #else
816 static void gadd_sp(int val)
818 if (val == (char)val) {
819 o(0xc48348);
820 g(val);
821 } else {
822 oad(0xc48148, val); /* add $xxx, %rsp */
826 typedef enum X86_64_Mode {
827 x86_64_mode_none,
828 x86_64_mode_memory,
829 x86_64_mode_integer,
830 x86_64_mode_sse,
831 x86_64_mode_x87
832 } X86_64_Mode;
834 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
835 if (a == b)
836 return a;
837 else if (a == x86_64_mode_none)
838 return b;
839 else if (b == x86_64_mode_none)
840 return a;
841 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
842 return x86_64_mode_memory;
843 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
844 return x86_64_mode_integer;
845 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
846 return x86_64_mode_memory;
847 else
848 return x86_64_mode_sse;
851 static X86_64_Mode classify_x86_64_inner(CType *ty) {
852 X86_64_Mode mode;
853 Sym *f;
855 if (ty->t & VT_BITFIELD)
856 return x86_64_mode_memory;
858 switch (ty->t & VT_BTYPE) {
859 case VT_VOID: return x86_64_mode_none;
861 case VT_INT:
862 case VT_BYTE:
863 case VT_SHORT:
864 case VT_LLONG:
865 case VT_BOOL:
866 case VT_PTR:
867 case VT_ENUM: return x86_64_mode_integer;
869 case VT_FLOAT:
870 case VT_DOUBLE: return x86_64_mode_sse;
872 case VT_LDOUBLE: return x86_64_mode_x87;
874 case VT_STRUCT:
875 f = ty->ref;
877 // Detect union
878 if (f->next && (f->c == f->next->c))
879 return x86_64_mode_memory;
881 mode = x86_64_mode_none;
882 for (; f; f = f->next)
883 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
885 return mode;
889 static X86_64_Mode classify_x86_64_arg(CType *ty, int *psize, int *reg_count) {
890 X86_64_Mode mode;
891 int size, align;
893 if (ty->t & VT_ARRAY) {
894 *psize = 8;
895 *reg_count = 1;
896 return x86_64_mode_integer;
899 size = type_size(ty, &align);
900 size = (size + 7) & ~7;
901 *psize = size;
902 if (size > 16)
903 return x86_64_mode_memory;
905 mode = classify_x86_64_inner(ty);
906 if (reg_count) {
907 if (mode == x86_64_mode_integer)
908 *reg_count = size / 8;
909 else if (mode == x86_64_mode_none)
910 *reg_count = 0;
911 else
912 *reg_count = 1;
914 return mode;
917 static X86_64_Mode classify_x86_64_arg_type(CType *vt, CType *ret, int *psize, int *reg_count) {
918 X86_64_Mode mode;
919 int size;
921 ret->ref = NULL;
923 mode = classify_x86_64_arg(vt, &size, reg_count);
924 *psize = size;
925 switch (mode) {
926 case x86_64_mode_integer:
927 if (size > 8)
928 ret->t = VT_QLONG;
929 else if (size > 4)
930 ret->t = VT_LLONG;
931 else
932 ret->t = VT_INT;
933 break;
935 case x86_64_mode_x87:
936 ret->t = VT_LDOUBLE;
937 break;
939 case x86_64_mode_sse:
940 if (size > 8)
941 ret->t = VT_QFLOAT;
942 else if (size > 4)
943 ret->t = VT_DOUBLE;
944 else
945 ret->t = VT_FLOAT;
946 break;
949 return mode;
952 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
953 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
954 int size, reg_count;
955 *ret_align = 1; // Never have to re-align return values for x86-64
956 return (classify_x86_64_arg_type(vt, ret, &size, &reg_count) == x86_64_mode_memory);
959 #define REGN 6
960 static const uint8_t arg_regs[REGN] = {
961 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
964 /* Generate function call. The function address is pushed first, then
965 all the parameters in call order. This functions pops all the
966 parameters and the function address. */
967 void gfunc_call(int nb_args)
969 X86_64_Mode mode;
970 CType type;
971 int size, align, r, args_size, i, j, reg_count;
972 int nb_reg_args = 0;
973 int nb_sse_args = 0;
974 int sse_reg, gen_reg;
976 /* calculate the number of integer/float arguments */
977 args_size = 0;
978 for(i = 0; i < nb_args; i++) {
979 mode = classify_x86_64_arg(&vtop[-i].type, &size, &reg_count);
980 switch (mode) {
981 case x86_64_mode_memory:
982 case x86_64_mode_x87:
983 args_size += size;
984 break;
986 case x86_64_mode_sse:
987 nb_sse_args += reg_count;
988 if (nb_sse_args > 8) args_size += size;
989 break;
991 case x86_64_mode_integer:
992 nb_reg_args += reg_count;
993 if (nb_reg_args > REGN) args_size += size;
994 break;
998 /* for struct arguments, we need to call memcpy and the function
999 call breaks register passing arguments we are preparing.
1000 So, we process arguments which will be passed by stack first. */
1001 gen_reg = nb_reg_args;
1002 sse_reg = nb_sse_args;
1004 /* adjust stack to align SSE boundary */
1005 if (args_size &= 15) {
1006 /* fetch cpu flag before the following sub will change the value */
1007 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1008 gv(RC_INT);
1010 args_size = 16 - args_size;
1011 o(0x48);
1012 oad(0xec81, args_size); /* sub $xxx, %rsp */
1015 for(i = 0; i < nb_args; i++) {
1016 /* Swap argument to top, it will possibly be changed here,
1017 and might use more temps. All arguments must remain on the
1018 stack, so that get_reg can correctly evict some of them onto
1019 stack. We could use also use a vrott(nb_args) at the end
1020 of this loop, but this seems faster. */
1021 SValue tmp = vtop[0];
1022 vtop[0] = vtop[-i];
1023 vtop[-i] = tmp;
1024 mode = classify_x86_64_arg(&vtop->type, &size, &reg_count);
1025 switch (mode) {
1026 case x86_64_mode_memory:
1027 /* allocate the necessary size on stack */
1028 o(0x48);
1029 oad(0xec81, size); /* sub $xxx, %rsp */
1030 /* generate structure store */
1031 r = get_reg(RC_INT);
1032 orex(1, r, 0, 0x89); /* mov %rsp, r */
1033 o(0xe0 + REG_VALUE(r));
1034 vset(&vtop->type, r | VT_LVAL, 0);
1035 vswap();
1036 vstore();
1037 args_size += size;
1038 break;
1040 case x86_64_mode_x87:
1041 gv(RC_ST0);
1042 size = LDOUBLE_SIZE;
1043 oad(0xec8148, size); /* sub $xxx, %rsp */
1044 o(0x7cdb); /* fstpt 0(%rsp) */
1045 g(0x24);
1046 g(0x00);
1047 args_size += size;
1048 break;
1050 case x86_64_mode_sse:
1051 if (sse_reg > 8) {
1052 gv(RC_FLOAT);
1053 o(0x50); /* push $rax */
1054 /* movq %xmm0, (%rsp) */
1055 o(0x04d60f66);
1056 o(0x24);
1057 args_size += size;
1059 sse_reg -= reg_count;
1060 break;
1062 case x86_64_mode_integer:
1063 /* simple type */
1064 /* XXX: implicit cast ? */
1065 if (gen_reg > REGN) {
1066 r = gv(RC_INT);
1067 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1068 args_size += size;
1070 gen_reg -= reg_count;
1071 break;
1074 /* And swap the argument back to it's original position. */
1075 tmp = vtop[0];
1076 vtop[0] = vtop[-i];
1077 vtop[-i] = tmp;
1080 /* XXX This should be superfluous. */
1081 save_regs(0); /* save used temporary registers */
1083 /* then, we prepare register passing arguments.
1084 Note that we cannot set RDX and RCX in this loop because gv()
1085 may break these temporary registers. Let's use R10 and R11
1086 instead of them */
1087 gen_reg = nb_reg_args;
1088 sse_reg = nb_sse_args;
1089 for(i = 0; i < nb_args; i++) {
1090 mode = classify_x86_64_arg_type(&vtop->type, &type, &size, &reg_count);
1091 /* Alter stack entry type so that gv() knows how to treat it */
1092 vtop->type = type;
1093 switch (mode) {
1094 default:
1095 break;
1097 case x86_64_mode_sse:
1098 if (sse_reg > 8) {
1099 sse_reg -= reg_count;
1100 } else {
1101 for (j = 0; j < reg_count; ++j) {
1102 --sse_reg;
1103 gv(RC_FLOAT); /* only one float register */
1104 /* movaps %xmm0, %xmmN */
1105 o(0x280f);
1106 o(0xc0 + (sse_reg << 3));
1109 break;
1111 case x86_64_mode_integer:
1112 /* simple type */
1113 /* XXX: implicit cast ? */
1114 if (gen_reg > 8) {
1115 gen_reg -= reg_count;
1116 } else {
1117 for (j = 0; j < reg_count; ++j) {
1118 --gen_reg;
1119 int d = arg_regs[gen_reg];
1120 r = gv(RC_INT);
1121 if (gen_reg == 2 || gen_reg == 3)
1122 /* gen_reg=2: r10, gen_reg=3: r11 */
1123 d = gen_reg + 8;
1124 orex(1,d,r,0x89); /* mov */
1125 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1128 break;
1130 vtop--;
1133 /* We shouldn't have many operands on the stack anymore, but the
1134 call address itself is still there, and it might be in %eax
1135 (or edx/ecx) currently, which the below writes would clobber.
1136 So evict all remaining operands here. */
1137 save_regs(0);
1139 /* Copy R10 and R11 into RDX and RCX, respectively */
1140 if (nb_reg_args > 2) {
1141 o(0xd2894c); /* mov %r10, %rdx */
1142 if (nb_reg_args > 3) {
1143 o(0xd9894c); /* mov %r11, %rcx */
1147 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1148 gcall_or_jmp(0);
1149 if (args_size)
1150 gadd_sp(args_size);
1151 vtop--;
1155 #define FUNC_PROLOG_SIZE 11
1157 static void push_arg_reg(int i) {
1158 loc -= 8;
1159 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1162 /* generate function prolog of type 't' */
1163 void gfunc_prolog(CType *func_type)
1165 X86_64_Mode mode;
1166 int i, addr, align, size, reg_count;
1167 int param_index, param_addr, reg_param_index, sse_param_index;
1168 Sym *sym;
1169 CType *type;
1171 sym = func_type->ref;
1172 addr = PTR_SIZE * 2;
1173 loc = 0;
1174 ind += FUNC_PROLOG_SIZE;
1175 func_sub_sp_offset = ind;
1176 func_ret_sub = 0;
1178 if (func_type->ref->c == FUNC_ELLIPSIS) {
1179 int seen_reg_num, seen_sse_num, seen_stack_size;
1180 seen_reg_num = seen_sse_num = 0;
1181 /* frame pointer and return address */
1182 seen_stack_size = PTR_SIZE * 2;
1183 /* count the number of seen parameters */
1184 sym = func_type->ref;
1185 while ((sym = sym->next) != NULL) {
1186 type = &sym->type;
1187 if (is_sse_float(type->t)) {
1188 if (seen_sse_num < 8) {
1189 seen_sse_num++;
1190 } else {
1191 seen_stack_size += 8;
1193 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
1194 size = type_size(type, &align);
1195 size = (size + 7) & ~7;
1196 seen_stack_size += size;
1197 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
1198 seen_stack_size += LDOUBLE_SIZE;
1199 } else {
1200 if (seen_reg_num < REGN) {
1201 seen_reg_num++;
1202 } else {
1203 seen_stack_size += 8;
1208 loc -= 16;
1209 /* movl $0x????????, -0x10(%rbp) */
1210 o(0xf045c7);
1211 gen_le32(seen_reg_num * 8);
1212 /* movl $0x????????, -0xc(%rbp) */
1213 o(0xf445c7);
1214 gen_le32(seen_sse_num * 16 + 48);
1215 /* movl $0x????????, -0x8(%rbp) */
1216 o(0xf845c7);
1217 gen_le32(seen_stack_size);
1219 /* save all register passing arguments */
1220 for (i = 0; i < 8; i++) {
1221 loc -= 16;
1222 o(0xd60f66); /* movq */
1223 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1224 /* movq $0, loc+8(%rbp) */
1225 o(0x85c748);
1226 gen_le32(loc + 8);
1227 gen_le32(0);
1229 for (i = 0; i < REGN; i++) {
1230 push_arg_reg(REGN-1-i);
1234 sym = func_type->ref;
1235 param_index = 0;
1236 reg_param_index = 0;
1237 sse_param_index = 0;
1239 /* if the function returns a structure, then add an
1240 implicit pointer parameter */
1241 func_vt = sym->type;
1242 mode = classify_x86_64_arg(&func_vt, &size, &reg_count);
1243 if (mode == x86_64_mode_memory) {
1244 push_arg_reg(reg_param_index);
1245 param_addr = loc;
1247 func_vc = loc;
1248 param_index++;
1249 reg_param_index++;
1251 /* define parameters */
1252 while ((sym = sym->next) != NULL) {
1253 type = &sym->type;
1254 mode = classify_x86_64_arg(type, &size, &reg_count);
1255 switch (mode) {
1256 case x86_64_mode_sse:
1257 if (sse_param_index + reg_count <= 8) {
1258 /* save arguments passed by register */
1259 for (i = 0; i < reg_count; ++i) {
1260 loc -= 8;
1261 o(0xd60f66); /* movq */
1262 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
1263 ++sse_param_index;
1265 param_addr = loc;
1266 } else {
1267 param_addr = addr;
1268 addr += size;
1269 sse_param_index += reg_count;
1271 break;
1273 case x86_64_mode_memory:
1274 case x86_64_mode_x87:
1275 param_addr = addr;
1276 addr += size;
1277 break;
1279 case x86_64_mode_integer: {
1280 if (reg_param_index + reg_count <= REGN) {
1281 /* save arguments passed by register */
1282 for (i = 0; i < reg_count; ++i) {
1283 push_arg_reg(reg_param_index);
1284 ++reg_param_index;
1286 param_addr = loc;
1287 } else {
1288 param_addr = addr;
1289 addr += size;
1290 reg_param_index += reg_count;
1292 break;
1295 sym_push(sym->v & ~SYM_FIELD, type,
1296 VT_LOCAL | VT_LVAL, param_addr);
1297 param_index++;
1301 /* generate function epilog */
1302 void gfunc_epilog(void)
1304 int v, saved_ind;
1306 o(0xc9); /* leave */
1307 if (func_ret_sub == 0) {
1308 o(0xc3); /* ret */
1309 } else {
1310 o(0xc2); /* ret n */
1311 g(func_ret_sub);
1312 g(func_ret_sub >> 8);
1314 /* align local size to word & save local variables */
1315 v = (-loc + 15) & -16;
1316 saved_ind = ind;
1317 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1318 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1319 o(0xec8148); /* sub rsp, stacksize */
1320 gen_le32(v);
1321 ind = saved_ind;
1324 #endif /* not PE */
1326 /* generate a jump to a label */
1327 int gjmp(int t)
1329 return psym(0xe9, t);
1332 /* generate a jump to a fixed address */
1333 void gjmp_addr(int a)
1335 int r;
1336 r = a - ind - 2;
1337 if (r == (char)r) {
1338 g(0xeb);
1339 g(r);
1340 } else {
1341 oad(0xe9, a - ind - 5);
1345 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1346 int gtst(int inv, int t)
1348 int v, *p;
1350 v = vtop->r & VT_VALMASK;
1351 if (v == VT_CMP) {
1352 /* fast case : can jump directly since flags are set */
1353 if (vtop->c.i & 0x100)
1355 /* This was a float compare. If the parity flag is set
1356 the result was unordered. For anything except != this
1357 means false and we don't jump (anding both conditions).
1358 For != this means true (oring both).
1359 Take care about inverting the test. We need to jump
1360 to our target if the result was unordered and test wasn't NE,
1361 otherwise if unordered we don't want to jump. */
1362 vtop->c.i &= ~0x100;
1363 if (!inv == (vtop->c.i != TOK_NE))
1364 o(0x067a); /* jp +6 */
1365 else
1367 g(0x0f);
1368 t = psym(0x8a, t); /* jp t */
1371 g(0x0f);
1372 t = psym((vtop->c.i - 16) ^ inv, t);
1373 } else if (v == VT_JMP || v == VT_JMPI) {
1374 /* && or || optimization */
1375 if ((v & 1) == inv) {
1376 /* insert vtop->c jump list in t */
1377 p = &vtop->c.i;
1378 while (*p != 0)
1379 p = (int *)(cur_text_section->data + *p);
1380 *p = t;
1381 t = vtop->c.i;
1382 } else {
1383 t = gjmp(t);
1384 gsym(vtop->c.i);
1386 } else {
1387 if (is_float(vtop->type.t) ||
1388 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1389 vpushi(0);
1390 gen_op(TOK_NE);
1392 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1393 /* constant jmp optimization */
1394 if ((vtop->c.i != 0) != inv)
1395 t = gjmp(t);
1396 } else {
1397 v = gv(RC_INT);
1398 orex(0,v,v,0x85);
1399 o(0xc0 + REG_VALUE(v) * 9);
1400 g(0x0f);
1401 t = psym(0x85 ^ inv, t);
1404 vtop--;
1405 return t;
1408 /* generate an integer binary operation */
1409 void gen_opi(int op)
1411 int r, fr, opc, c;
1412 int ll, uu, cc;
1414 ll = is64_type(vtop[-1].type.t);
1415 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1416 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1418 switch(op) {
1419 case '+':
1420 case TOK_ADDC1: /* add with carry generation */
1421 opc = 0;
1422 gen_op8:
1423 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1424 /* constant case */
1425 vswap();
1426 r = gv(RC_INT);
1427 vswap();
1428 c = vtop->c.i;
1429 if (c == (char)c) {
1430 /* XXX: generate inc and dec for smaller code ? */
1431 orex(ll, r, 0, 0x83);
1432 o(0xc0 | (opc << 3) | REG_VALUE(r));
1433 g(c);
1434 } else {
1435 orex(ll, r, 0, 0x81);
1436 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1438 } else {
1439 gv2(RC_INT, RC_INT);
1440 r = vtop[-1].r;
1441 fr = vtop[0].r;
1442 orex(ll, r, fr, (opc << 3) | 0x01);
1443 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1445 vtop--;
1446 if (op >= TOK_ULT && op <= TOK_GT) {
1447 vtop->r = VT_CMP;
1448 vtop->c.i = op;
1450 break;
1451 case '-':
1452 case TOK_SUBC1: /* sub with carry generation */
1453 opc = 5;
1454 goto gen_op8;
1455 case TOK_ADDC2: /* add with carry use */
1456 opc = 2;
1457 goto gen_op8;
1458 case TOK_SUBC2: /* sub with carry use */
1459 opc = 3;
1460 goto gen_op8;
1461 case '&':
1462 opc = 4;
1463 goto gen_op8;
1464 case '^':
1465 opc = 6;
1466 goto gen_op8;
1467 case '|':
1468 opc = 1;
1469 goto gen_op8;
1470 case '*':
1471 gv2(RC_INT, RC_INT);
1472 r = vtop[-1].r;
1473 fr = vtop[0].r;
1474 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1475 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1476 vtop--;
1477 break;
1478 case TOK_SHL:
1479 opc = 4;
1480 goto gen_shift;
1481 case TOK_SHR:
1482 opc = 5;
1483 goto gen_shift;
1484 case TOK_SAR:
1485 opc = 7;
1486 gen_shift:
1487 opc = 0xc0 | (opc << 3);
1488 if (cc) {
1489 /* constant case */
1490 vswap();
1491 r = gv(RC_INT);
1492 vswap();
1493 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1494 o(opc | REG_VALUE(r));
1495 g(vtop->c.i & (ll ? 63 : 31));
1496 } else {
1497 /* we generate the shift in ecx */
1498 gv2(RC_INT, RC_RCX);
1499 r = vtop[-1].r;
1500 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1501 o(opc | REG_VALUE(r));
1503 vtop--;
1504 break;
1505 case TOK_UDIV:
1506 case TOK_UMOD:
1507 uu = 1;
1508 goto divmod;
1509 case '/':
1510 case '%':
1511 case TOK_PDIV:
1512 uu = 0;
1513 divmod:
1514 /* first operand must be in eax */
1515 /* XXX: need better constraint for second operand */
1516 gv2(RC_RAX, RC_RCX);
1517 r = vtop[-1].r;
1518 fr = vtop[0].r;
1519 vtop--;
1520 save_reg(TREG_RDX);
1521 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1522 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1523 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1524 if (op == '%' || op == TOK_UMOD)
1525 r = TREG_RDX;
1526 else
1527 r = TREG_RAX;
1528 vtop->r = r;
1529 break;
1530 default:
1531 opc = 7;
1532 goto gen_op8;
1536 void gen_opl(int op)
1538 gen_opi(op);
1541 /* generate a floating point operation 'v = t1 op t2' instruction. The
1542 two operands are guaranted to have the same floating point type */
1543 /* XXX: need to use ST1 too */
1544 void gen_opf(int op)
1546 int a, ft, fc, swapped, r;
1547 int float_type =
1548 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1550 /* convert constants to memory references */
1551 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1552 vswap();
1553 gv(float_type);
1554 vswap();
1556 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1557 gv(float_type);
1559 /* must put at least one value in the floating point register */
1560 if ((vtop[-1].r & VT_LVAL) &&
1561 (vtop[0].r & VT_LVAL)) {
1562 vswap();
1563 gv(float_type);
1564 vswap();
1566 swapped = 0;
1567 /* swap the stack if needed so that t1 is the register and t2 is
1568 the memory reference */
1569 if (vtop[-1].r & VT_LVAL) {
1570 vswap();
1571 swapped = 1;
1573 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1574 if (op >= TOK_ULT && op <= TOK_GT) {
1575 /* load on stack second operand */
1576 load(TREG_ST0, vtop);
1577 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1578 if (op == TOK_GE || op == TOK_GT)
1579 swapped = !swapped;
1580 else if (op == TOK_EQ || op == TOK_NE)
1581 swapped = 0;
1582 if (swapped)
1583 o(0xc9d9); /* fxch %st(1) */
1584 o(0xe9da); /* fucompp */
1585 o(0xe0df); /* fnstsw %ax */
1586 if (op == TOK_EQ) {
1587 o(0x45e480); /* and $0x45, %ah */
1588 o(0x40fC80); /* cmp $0x40, %ah */
1589 } else if (op == TOK_NE) {
1590 o(0x45e480); /* and $0x45, %ah */
1591 o(0x40f480); /* xor $0x40, %ah */
1592 op = TOK_NE;
1593 } else if (op == TOK_GE || op == TOK_LE) {
1594 o(0x05c4f6); /* test $0x05, %ah */
1595 op = TOK_EQ;
1596 } else {
1597 o(0x45c4f6); /* test $0x45, %ah */
1598 op = TOK_EQ;
1600 vtop--;
1601 vtop->r = VT_CMP;
1602 vtop->c.i = op;
1603 } else {
1604 /* no memory reference possible for long double operations */
1605 load(TREG_ST0, vtop);
1606 swapped = !swapped;
1608 switch(op) {
1609 default:
1610 case '+':
1611 a = 0;
1612 break;
1613 case '-':
1614 a = 4;
1615 if (swapped)
1616 a++;
1617 break;
1618 case '*':
1619 a = 1;
1620 break;
1621 case '/':
1622 a = 6;
1623 if (swapped)
1624 a++;
1625 break;
1627 ft = vtop->type.t;
1628 fc = vtop->c.ul;
1629 o(0xde); /* fxxxp %st, %st(1) */
1630 o(0xc1 + (a << 3));
1631 vtop--;
1633 } else {
1634 if (op >= TOK_ULT && op <= TOK_GT) {
1635 /* if saved lvalue, then we must reload it */
1636 r = vtop->r;
1637 fc = vtop->c.ul;
1638 if ((r & VT_VALMASK) == VT_LLOCAL) {
1639 SValue v1;
1640 r = get_reg(RC_INT);
1641 v1.type.t = VT_PTR;
1642 v1.r = VT_LOCAL | VT_LVAL;
1643 v1.c.ul = fc;
1644 load(r, &v1);
1645 fc = 0;
1648 if (op == TOK_EQ || op == TOK_NE) {
1649 swapped = 0;
1650 } else {
1651 if (op == TOK_LE || op == TOK_LT)
1652 swapped = !swapped;
1653 if (op == TOK_LE || op == TOK_GE) {
1654 op = 0x93; /* setae */
1655 } else {
1656 op = 0x97; /* seta */
1660 if (swapped) {
1661 o(0x7e0ff3); /* movq */
1662 gen_modrm(1, r, vtop->sym, fc);
1664 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1665 o(0x66);
1667 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1668 o(0xc8);
1669 } else {
1670 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1671 o(0x66);
1673 o(0x2e0f); /* ucomisd */
1674 gen_modrm(0, r, vtop->sym, fc);
1677 vtop--;
1678 vtop->r = VT_CMP;
1679 vtop->c.i = op | 0x100;
1680 } else {
1681 /* no memory reference possible for long double operations */
1682 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1683 load(TREG_XMM0, vtop);
1684 swapped = !swapped;
1686 switch(op) {
1687 default:
1688 case '+':
1689 a = 0;
1690 break;
1691 case '-':
1692 a = 4;
1693 break;
1694 case '*':
1695 a = 1;
1696 break;
1697 case '/':
1698 a = 6;
1699 break;
1701 ft = vtop->type.t;
1702 fc = vtop->c.ul;
1703 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1704 o(0xde); /* fxxxp %st, %st(1) */
1705 o(0xc1 + (a << 3));
1706 } else {
1707 /* if saved lvalue, then we must reload it */
1708 r = vtop->r;
1709 if ((r & VT_VALMASK) == VT_LLOCAL) {
1710 SValue v1;
1711 r = get_reg(RC_INT);
1712 v1.type.t = VT_PTR;
1713 v1.r = VT_LOCAL | VT_LVAL;
1714 v1.c.ul = fc;
1715 load(r, &v1);
1716 fc = 0;
1718 if (swapped) {
1719 /* movq %xmm0,%xmm1 */
1720 o(0x7e0ff3);
1721 o(0xc8);
1722 load(TREG_XMM0, vtop);
1723 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1724 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1725 o(0xf2);
1726 } else {
1727 o(0xf3);
1729 o(0x0f);
1730 o(0x58 + a);
1731 o(0xc1);
1732 } else {
1733 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1734 o(0xf2);
1735 } else {
1736 o(0xf3);
1738 o(0x0f);
1739 o(0x58 + a);
1740 gen_modrm(0, r, vtop->sym, fc);
1743 vtop--;
1748 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1749 and 'long long' cases. */
1750 void gen_cvt_itof(int t)
1752 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1753 save_reg(TREG_ST0);
1754 gv(RC_INT);
1755 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1756 /* signed long long to float/double/long double (unsigned case
1757 is handled generically) */
1758 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1759 o(0x242cdf); /* fildll (%rsp) */
1760 o(0x08c48348); /* add $8, %rsp */
1761 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1762 (VT_INT | VT_UNSIGNED)) {
1763 /* unsigned int to float/double/long double */
1764 o(0x6a); /* push $0 */
1765 g(0x00);
1766 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1767 o(0x242cdf); /* fildll (%rsp) */
1768 o(0x10c48348); /* add $16, %rsp */
1769 } else {
1770 /* int to float/double/long double */
1771 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1772 o(0x2404db); /* fildl (%rsp) */
1773 o(0x08c48348); /* add $8, %rsp */
1775 vtop->r = TREG_ST0;
1776 } else {
1777 save_reg(TREG_XMM0);
1778 gv(RC_INT);
1779 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1780 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1781 (VT_INT | VT_UNSIGNED) ||
1782 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1783 o(0x48); /* REX */
1785 o(0x2a0f);
1786 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1787 vtop->r = TREG_XMM0;
1791 /* convert from one floating point type to another */
1792 void gen_cvt_ftof(int t)
1794 int ft, bt, tbt;
1796 ft = vtop->type.t;
1797 bt = ft & VT_BTYPE;
1798 tbt = t & VT_BTYPE;
1800 if (bt == VT_FLOAT) {
1801 gv(RC_FLOAT);
1802 if (tbt == VT_DOUBLE) {
1803 o(0xc0140f); /* unpcklps */
1804 o(0xc05a0f); /* cvtps2pd */
1805 } else if (tbt == VT_LDOUBLE) {
1806 /* movss %xmm0,-0x10(%rsp) */
1807 o(0x44110ff3);
1808 o(0xf024);
1809 o(0xf02444d9); /* flds -0x10(%rsp) */
1810 vtop->r = TREG_ST0;
1812 } else if (bt == VT_DOUBLE) {
1813 gv(RC_FLOAT);
1814 if (tbt == VT_FLOAT) {
1815 o(0xc0140f66); /* unpcklpd */
1816 o(0xc05a0f66); /* cvtpd2ps */
1817 } else if (tbt == VT_LDOUBLE) {
1818 /* movsd %xmm0,-0x10(%rsp) */
1819 o(0x44110ff2);
1820 o(0xf024);
1821 o(0xf02444dd); /* fldl -0x10(%rsp) */
1822 vtop->r = TREG_ST0;
1824 } else {
1825 gv(RC_ST0);
1826 if (tbt == VT_DOUBLE) {
1827 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1828 /* movsd -0x10(%rsp),%xmm0 */
1829 o(0x44100ff2);
1830 o(0xf024);
1831 vtop->r = TREG_XMM0;
1832 } else if (tbt == VT_FLOAT) {
1833 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1834 /* movss -0x10(%rsp),%xmm0 */
1835 o(0x44100ff3);
1836 o(0xf024);
1837 vtop->r = TREG_XMM0;
1842 /* convert fp to int 't' type */
1843 void gen_cvt_ftoi(int t)
1845 int ft, bt, size, r;
1846 ft = vtop->type.t;
1847 bt = ft & VT_BTYPE;
1848 if (bt == VT_LDOUBLE) {
1849 gen_cvt_ftof(VT_DOUBLE);
1850 bt = VT_DOUBLE;
1853 gv(RC_FLOAT);
1854 if (t != VT_INT)
1855 size = 8;
1856 else
1857 size = 4;
1859 r = get_reg(RC_INT);
1860 if (bt == VT_FLOAT) {
1861 o(0xf3);
1862 } else if (bt == VT_DOUBLE) {
1863 o(0xf2);
1864 } else {
1865 assert(0);
1867 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1868 o(0xc0 + (REG_VALUE(r) << 3));
1869 vtop->r = r;
1872 /* computed goto support */
1873 void ggoto(void)
1875 gcall_or_jmp(1);
1876 vtop--;
1879 /* end of x86-64 code generator */
1880 /*************************************************************/
1881 #endif /* ! TARGET_DEFS_ONLY */
1882 /******************************************************/