Update README,add x86_64/arm,FreeBSD/OSX etc.
[tinycc.git] / x86_64-gen.c
blobf85cd01be7f9b3f2a257dfec4f8949c2475b459e
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 5
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_R8 0x0100
38 #define RC_R9 0x0200
39 #define RC_R10 0x0400
40 #define RC_R11 0x0800
41 #define RC_XMM0 0x0020
42 #define RC_ST0 0x0040 /* only for long double */
43 #define RC_IRET RC_RAX /* function return: integer register */
44 #define RC_LRET RC_RDX /* function return: second integer register */
45 #define RC_FRET RC_XMM0 /* function return: float register */
47 /* pretty names for the registers */
48 enum {
49 TREG_RAX = 0,
50 TREG_RCX = 1,
51 TREG_RDX = 2,
52 TREG_XMM0 = 3,
53 TREG_ST0 = 4,
55 TREG_RSI = 6,
56 TREG_RDI = 7,
57 TREG_R8 = 8,
58 TREG_R9 = 9,
60 TREG_R10 = 10,
61 TREG_R11 = 11,
63 TREG_MEM = 0x10,
66 #define REX_BASE(reg) (((reg) >> 3) & 1)
67 #define REG_VALUE(reg) ((reg) & 7)
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
78 #define PTR_SIZE 8
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
84 #define MAX_ALIGN 8
86 /******************************************************/
87 /* ELF defines */
89 #define EM_TCC_TARGET EM_X86_64
91 /* relocation type for 32 bit data relocation */
92 #define R_DATA_32 R_X86_64_32
93 #define R_DATA_PTR R_X86_64_64
94 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
95 #define R_COPY R_X86_64_COPY
97 #define ELF_START_ADDR 0x08048000
98 #define ELF_PAGE_SIZE 0x1000
100 /******************************************************/
101 #else /* ! TARGET_DEFS_ONLY */
102 /******************************************************/
103 #include "tcc.h"
104 #include <assert.h>
106 ST_DATA const int reg_classes[NB_REGS+7] = {
107 /* eax */ RC_INT | RC_RAX,
108 /* ecx */ RC_INT | RC_RCX,
109 /* edx */ RC_INT | RC_RDX,
110 /* xmm0 */ RC_FLOAT | RC_XMM0,
111 /* st0 */ RC_ST0,
115 RC_INT | RC_R8,
116 RC_INT | RC_R9,
117 RC_INT | RC_R10,
118 RC_INT | RC_R11
121 static unsigned long func_sub_sp_offset;
122 static int func_ret_sub;
124 /* XXX: make it faster ? */
125 void g(int c)
127 int ind1;
128 ind1 = ind + 1;
129 if (ind1 > cur_text_section->data_allocated)
130 section_realloc(cur_text_section, ind1);
131 cur_text_section->data[ind] = c;
132 ind = ind1;
135 void o(unsigned int c)
137 while (c) {
138 g(c);
139 c = c >> 8;
143 void gen_le16(int v)
145 g(v);
146 g(v >> 8);
149 void gen_le32(int c)
151 g(c);
152 g(c >> 8);
153 g(c >> 16);
154 g(c >> 24);
157 void gen_le64(int64_t c)
159 g(c);
160 g(c >> 8);
161 g(c >> 16);
162 g(c >> 24);
163 g(c >> 32);
164 g(c >> 40);
165 g(c >> 48);
166 g(c >> 56);
169 void orex(int ll, int r, int r2, int b)
171 if ((r & VT_VALMASK) >= VT_CONST)
172 r = 0;
173 if ((r2 & VT_VALMASK) >= VT_CONST)
174 r2 = 0;
175 if (ll || REX_BASE(r) || REX_BASE(r2))
176 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
177 o(b);
180 /* output a symbol and patch all calls to it */
181 void gsym_addr(int t, int a)
183 int n, *ptr;
184 while (t) {
185 ptr = (int *)(cur_text_section->data + t);
186 n = *ptr; /* next value */
187 *ptr = a - t - 4;
188 t = n;
192 void gsym(int t)
194 gsym_addr(t, ind);
197 /* psym is used to put an instruction with a data field which is a
198 reference to a symbol. It is in fact the same as oad ! */
199 #define psym oad
201 static int is64_type(int t)
203 return ((t & VT_BTYPE) == VT_PTR ||
204 (t & VT_BTYPE) == VT_FUNC ||
205 (t & VT_BTYPE) == VT_LLONG);
208 static int is_sse_float(int t) {
209 int bt;
210 bt = t & VT_BTYPE;
211 return bt == VT_DOUBLE || bt == VT_FLOAT;
215 /* instruction + 4 bytes data. Return the address of the data */
216 ST_FUNC int oad(int c, int s)
218 int ind1;
220 o(c);
221 ind1 = ind + 4;
222 if (ind1 > cur_text_section->data_allocated)
223 section_realloc(cur_text_section, ind1);
224 *(int *)(cur_text_section->data + ind) = s;
225 s = ind;
226 ind = ind1;
227 return s;
230 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
232 if (r & VT_SYM)
233 greloc(cur_text_section, sym, ind, R_X86_64_32);
234 gen_le32(c);
237 /* output constant with relocation if 'r & VT_SYM' is true */
238 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
240 if (r & VT_SYM)
241 greloc(cur_text_section, sym, ind, R_X86_64_64);
242 gen_le64(c);
245 /* output constant with relocation if 'r & VT_SYM' is true */
246 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
248 if (r & VT_SYM)
249 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
250 gen_le32(c-4);
253 /* output got address with relocation */
254 static void gen_gotpcrel(int r, Sym *sym, int c)
256 #ifndef TCC_TARGET_PE
257 Section *sr;
258 ElfW(Rela) *rel;
259 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
260 sr = cur_text_section->reloc;
261 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
262 rel->r_addend = -4;
263 #else
264 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
265 cur_text_section->data[ind-3],
266 cur_text_section->data[ind-2],
267 cur_text_section->data[ind-1]
269 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
270 #endif
271 gen_le32(0);
272 if (c) {
273 /* we use add c, %xxx for displacement */
274 orex(1, r, 0, 0x81);
275 o(0xc0 + REG_VALUE(r));
276 gen_le32(c);
280 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
282 op_reg = REG_VALUE(op_reg) << 3;
283 if ((r & VT_VALMASK) == VT_CONST) {
284 /* constant memory reference */
285 o(0x05 | op_reg);
286 if (is_got) {
287 gen_gotpcrel(r, sym, c);
288 } else {
289 gen_addrpc32(r, sym, c);
291 } else if ((r & VT_VALMASK) == VT_LOCAL) {
292 /* currently, we use only ebp as base */
293 if (c == (char)c) {
294 /* short reference */
295 o(0x45 | op_reg);
296 g(c);
297 } else {
298 oad(0x85 | op_reg, c);
300 } else if ((r & VT_VALMASK) >= TREG_MEM) {
301 if (c) {
302 g(0x80 | op_reg | REG_VALUE(r));
303 gen_le32(c);
304 } else {
305 g(0x00 | op_reg | REG_VALUE(r));
307 } else {
308 g(0x00 | op_reg | REG_VALUE(r));
312 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
313 opcode bits */
314 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
316 gen_modrm_impl(op_reg, r, sym, c, 0);
319 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
320 opcode bits */
321 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
323 int is_got;
324 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
325 orex(1, r, op_reg, opcode);
326 gen_modrm_impl(op_reg, r, sym, c, is_got);
330 /* load 'r' from value 'sv' */
331 void load(int r, SValue *sv)
333 int v, t, ft, fc, fr;
334 SValue v1;
336 #ifdef TCC_TARGET_PE
337 SValue v2;
338 sv = pe_getimport(sv, &v2);
339 #endif
341 fr = sv->r;
342 ft = sv->type.t;
343 fc = sv->c.ul;
345 #ifndef TCC_TARGET_PE
346 /* we use indirect access via got */
347 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
348 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
349 /* use the result register as a temporal register */
350 int tr = r | TREG_MEM;
351 if (is_float(ft)) {
352 /* we cannot use float registers as a temporal register */
353 tr = get_reg(RC_INT) | TREG_MEM;
355 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
357 /* load from the temporal register */
358 fr = tr | VT_LVAL;
360 #endif
362 v = fr & VT_VALMASK;
363 if (fr & VT_LVAL) {
364 int b, ll;
365 if (v == VT_LLOCAL) {
366 v1.type.t = VT_PTR;
367 v1.r = VT_LOCAL | VT_LVAL;
368 v1.c.ul = fc;
369 fr = r;
370 if (!(reg_classes[fr] & RC_INT))
371 fr = get_reg(RC_INT);
372 load(fr, &v1);
374 ll = 0;
375 if ((ft & VT_BTYPE) == VT_FLOAT) {
376 b = 0x6e0f66, r = 0; /* movd */
377 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
378 b = 0x7e0ff3, r = 0; /* movq */
379 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
380 b = 0xdb, r = 5; /* fldt */
381 } else if ((ft & VT_TYPE) == VT_BYTE) {
382 b = 0xbe0f; /* movsbl */
383 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
384 b = 0xb60f; /* movzbl */
385 } else if ((ft & VT_TYPE) == VT_SHORT) {
386 b = 0xbf0f; /* movswl */
387 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
388 b = 0xb70f; /* movzwl */
389 } else {
390 ll = is64_type(ft);
391 b = 0x8b;
393 if (ll) {
394 gen_modrm64(b, r, fr, sv->sym, fc);
395 } else {
396 orex(ll, fr, r, b);
397 gen_modrm(r, fr, sv->sym, fc);
399 } else {
400 if (v == VT_CONST) {
401 if (fr & VT_SYM) {
402 #ifdef TCC_TARGET_PE
403 orex(1,0,r,0x8d);
404 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
405 gen_addrpc32(fr, sv->sym, fc);
406 #else
407 if (sv->sym->type.t & VT_STATIC) {
408 orex(1,0,r,0x8d);
409 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
410 gen_addrpc32(fr, sv->sym, fc);
411 } else {
412 orex(1,0,r,0x8b);
413 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
414 gen_gotpcrel(r, sv->sym, fc);
416 #endif
417 } else if (is64_type(ft)) {
418 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
419 gen_le64(sv->c.ull);
420 } else {
421 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
422 gen_le32(fc);
424 } else if (v == VT_LOCAL) {
425 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
426 gen_modrm(r, VT_LOCAL, sv->sym, fc);
427 } else if (v == VT_CMP) {
428 orex(0,r,0,0);
429 if ((fc & ~0x100) != TOK_NE)
430 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
431 else
432 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
433 if (fc & 0x100)
435 /* This was a float compare. If the parity bit is
436 set the result was unordered, meaning false for everything
437 except TOK_NE, and true for TOK_NE. */
438 fc &= ~0x100;
439 o(0x037a + (REX_BASE(r) << 8));
441 orex(0,r,0, 0x0f); /* setxx %br */
442 o(fc);
443 o(0xc0 + REG_VALUE(r));
444 } else if (v == VT_JMP || v == VT_JMPI) {
445 t = v & 1;
446 orex(0,r,0,0);
447 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
448 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
449 gsym(fc);
450 orex(0,r,0,0);
451 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
452 } else if (v != r) {
453 if (r == TREG_XMM0) {
454 assert(v == TREG_ST0);
455 /* gen_cvt_ftof(VT_DOUBLE); */
456 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
457 /* movsd -0x10(%rsp),%xmm0 */
458 o(0x44100ff2);
459 o(0xf024);
460 } else if (r == TREG_ST0) {
461 assert(v == TREG_XMM0);
462 /* gen_cvt_ftof(VT_LDOUBLE); */
463 /* movsd %xmm0,-0x10(%rsp) */
464 o(0x44110ff2);
465 o(0xf024);
466 o(0xf02444dd); /* fldl -0x10(%rsp) */
467 } else {
468 orex(1,r,v, 0x89);
469 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
475 /* store register 'r' in lvalue 'v' */
476 void store(int r, SValue *v)
478 int fr, bt, ft, fc;
479 int op64 = 0;
480 /* store the REX prefix in this variable when PIC is enabled */
481 int pic = 0;
483 #ifdef TCC_TARGET_PE
484 SValue v2;
485 v = pe_getimport(v, &v2);
486 #endif
488 ft = v->type.t;
489 fc = v->c.ul;
490 fr = v->r & VT_VALMASK;
491 bt = ft & VT_BTYPE;
493 #ifndef TCC_TARGET_PE
494 /* we need to access the variable via got */
495 if (fr == VT_CONST && (v->r & VT_SYM)) {
496 /* mov xx(%rip), %r11 */
497 o(0x1d8b4c);
498 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
499 pic = is64_type(bt) ? 0x49 : 0x41;
501 #endif
503 /* XXX: incorrect if float reg to reg */
504 if (bt == VT_FLOAT) {
505 o(0x66);
506 o(pic);
507 o(0x7e0f); /* movd */
508 r = 0;
509 } else if (bt == VT_DOUBLE) {
510 o(0x66);
511 o(pic);
512 o(0xd60f); /* movq */
513 r = 0;
514 } else if (bt == VT_LDOUBLE) {
515 o(0xc0d9); /* fld %st(0) */
516 o(pic);
517 o(0xdb); /* fstpt */
518 r = 7;
519 } else {
520 if (bt == VT_SHORT)
521 o(0x66);
522 o(pic);
523 if (bt == VT_BYTE || bt == VT_BOOL)
524 orex(0, 0, r, 0x88);
525 else if (is64_type(bt))
526 op64 = 0x89;
527 else
528 orex(0, 0, r, 0x89);
530 if (pic) {
531 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
532 if (op64)
533 o(op64);
534 o(3 + (r << 3));
535 } else if (op64) {
536 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
537 gen_modrm64(op64, r, v->r, v->sym, fc);
538 } else if (fr != r) {
539 /* XXX: don't we really come here? */
540 abort();
541 o(0xc0 + fr + r * 8); /* mov r, fr */
543 } else {
544 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
545 gen_modrm(r, v->r, v->sym, fc);
546 } else if (fr != r) {
547 /* XXX: don't we really come here? */
548 abort();
549 o(0xc0 + fr + r * 8); /* mov r, fr */
554 /* 'is_jmp' is '1' if it is a jump */
555 static void gcall_or_jmp(int is_jmp)
557 int r;
558 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
559 /* constant case */
560 if (vtop->r & VT_SYM) {
561 /* relocation case */
562 greloc(cur_text_section, vtop->sym,
563 ind + 1, R_X86_64_PC32);
564 } else {
565 /* put an empty PC32 relocation */
566 put_elf_reloc(symtab_section, cur_text_section,
567 ind + 1, R_X86_64_PC32, 0);
569 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
570 } else {
571 /* otherwise, indirect call */
572 r = TREG_R11;
573 load(r, vtop);
574 o(0x41); /* REX */
575 o(0xff); /* call/jmp *r */
576 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
580 #ifdef TCC_TARGET_PE
582 #define REGN 4
583 static const uint8_t arg_regs[] = {
584 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
587 static int func_scratch;
589 /* Generate function call. The function address is pushed first, then
590 all the parameters in call order. This functions pops all the
591 parameters and the function address. */
593 void gen_offs_sp(int b, int r, int d)
595 orex(1,0,r & 0x100 ? 0 : r, b);
596 if (d == (char)d) {
597 o(0x2444 | (REG_VALUE(r) << 3));
598 g(d);
599 } else {
600 o(0x2484 | (REG_VALUE(r) << 3));
601 gen_le32(d);
605 void gfunc_call(int nb_args)
607 int size, align, r, args_size, i, d, j, bt, struct_size;
608 int nb_reg_args, gen_reg;
610 nb_reg_args = nb_args;
611 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
613 /* for struct arguments, we need to call memcpy and the function
614 call breaks register passing arguments we are preparing.
615 So, we process arguments which will be passed by stack first. */
616 struct_size = args_size;
617 for(i = 0; i < nb_args; i++) {
618 SValue *sv = &vtop[-i];
619 bt = (sv->type.t & VT_BTYPE);
620 if (bt == VT_STRUCT) {
621 size = type_size(&sv->type, &align);
622 /* align to stack align size */
623 size = (size + 15) & ~15;
624 /* generate structure store */
625 r = get_reg(RC_INT);
626 gen_offs_sp(0x8d, r, struct_size);
627 struct_size += size;
629 /* generate memcpy call */
630 vset(&sv->type, r | VT_LVAL, 0);
631 vpushv(sv);
632 vstore();
633 --vtop;
635 } else if (bt == VT_LDOUBLE) {
637 gv(RC_ST0);
638 gen_offs_sp(0xdb, 0x107, struct_size);
639 struct_size += 16;
644 if (func_scratch < struct_size)
645 func_scratch = struct_size;
646 #if 1
647 for (i = 0; i < REGN; ++i)
648 save_reg(arg_regs[i]);
649 save_reg(TREG_RAX);
650 #endif
651 gen_reg = nb_reg_args;
652 struct_size = args_size;
654 for(i = 0; i < nb_args; i++) {
655 bt = (vtop->type.t & VT_BTYPE);
657 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
658 if (bt == VT_LDOUBLE)
659 size = 16;
660 else
661 size = type_size(&vtop->type, &align);
662 /* align to stack align size */
663 size = (size + 15) & ~15;
664 j = --gen_reg;
665 if (j >= REGN) {
666 d = TREG_RAX;
667 gen_offs_sp(0x8d, d, struct_size);
668 gen_offs_sp(0x89, d, j*8);
669 } else {
670 d = arg_regs[j];
671 gen_offs_sp(0x8d, d, struct_size);
673 struct_size += size;
675 } else if (is_sse_float(vtop->type.t)) {
676 gv(RC_FLOAT); /* only one float register */
677 j = --gen_reg;
678 if (j >= REGN) {
679 /* movq %xmm0, j*8(%rsp) */
680 gen_offs_sp(0xd60f66, 0x100, j*8);
681 } else {
682 /* movaps %xmm0, %xmmN */
683 o(0x280f);
684 o(0xc0 + (j << 3));
685 d = arg_regs[j];
686 /* mov %xmm0, %rxx */
687 o(0x66);
688 orex(1,d,0, 0x7e0f);
689 o(0xc0 + REG_VALUE(d));
691 } else {
692 j = --gen_reg;
693 if (j >= REGN) {
694 r = gv(RC_INT);
695 gen_offs_sp(0x89, r, j*8);
696 } else {
697 d = arg_regs[j];
698 if (d < NB_REGS) {
699 gv(reg_classes[d] & ~RC_INT);
700 } else {
701 r = gv(RC_INT);
702 if (d != r) {
703 orex(1,d,r, 0x89);
704 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
710 vtop--;
712 save_regs(0);
713 gcall_or_jmp(0);
714 vtop--;
718 #define FUNC_PROLOG_SIZE 11
720 /* generate function prolog of type 't' */
721 void gfunc_prolog(CType *func_type)
723 int addr, reg_param_index, bt;
724 Sym *sym;
725 CType *type;
727 func_ret_sub = 0;
728 func_scratch = 0;
729 loc = 0;
731 addr = PTR_SIZE * 2;
732 ind += FUNC_PROLOG_SIZE;
733 func_sub_sp_offset = ind;
734 reg_param_index = 0;
736 sym = func_type->ref;
738 /* if the function returns a structure, then add an
739 implicit pointer parameter */
740 func_vt = sym->type;
741 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
742 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
743 reg_param_index++;
744 addr += PTR_SIZE;
747 /* define parameters */
748 while ((sym = sym->next) != NULL) {
749 type = &sym->type;
750 bt = type->t & VT_BTYPE;
751 if (reg_param_index < REGN) {
752 /* save arguments passed by register */
753 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
755 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
756 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
757 } else {
758 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
760 reg_param_index++;
761 addr += PTR_SIZE;
764 while (reg_param_index < REGN) {
765 if (func_type->ref->c == FUNC_ELLIPSIS)
766 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
767 reg_param_index++;
768 addr += PTR_SIZE;
772 /* generate function epilog */
773 void gfunc_epilog(void)
775 int v, saved_ind;
777 o(0xc9); /* leave */
778 if (func_ret_sub == 0) {
779 o(0xc3); /* ret */
780 } else {
781 o(0xc2); /* ret n */
782 g(func_ret_sub);
783 g(func_ret_sub >> 8);
786 saved_ind = ind;
787 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
788 /* align local size to word & save local variables */
789 v = (func_scratch + -loc + 15) & -16;
791 if (v >= 4096) {
792 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
793 oad(0xb8, v); /* mov stacksize, %eax */
794 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
795 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
796 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
797 } else {
798 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
799 o(0xec8148); /* sub rsp, stacksize */
800 gen_le32(v);
803 cur_text_section->data_offset = saved_ind;
804 pe_add_unwind_data(ind, saved_ind, v);
805 ind = cur_text_section->data_offset;
808 #else
810 static void gadd_sp(int val)
812 if (val == (char)val) {
813 o(0xc48348);
814 g(val);
815 } else {
816 oad(0xc48148, val); /* add $xxx, %rsp */
820 #define REGN 6
821 static const uint8_t arg_regs[REGN] = {
822 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
825 /* Generate function call. The function address is pushed first, then
826 all the parameters in call order. This functions pops all the
827 parameters and the function address. */
828 void gfunc_call(int nb_args)
830 int size, align, r, args_size, i;
831 int nb_reg_args = 0;
832 int nb_sse_args = 0;
833 int sse_reg, gen_reg;
835 /* calculate the number of integer/float arguments */
836 args_size = 0;
837 for(i = 0; i < nb_args; i++) {
838 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
839 args_size += type_size(&vtop[-i].type, &align);
840 args_size = (args_size + 7) & ~7;
841 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
842 args_size += 16;
843 } else if (is_sse_float(vtop[-i].type.t)) {
844 nb_sse_args++;
845 if (nb_sse_args > 8) args_size += 8;
846 } else {
847 nb_reg_args++;
848 if (nb_reg_args > REGN) args_size += 8;
852 /* for struct arguments, we need to call memcpy and the function
853 call breaks register passing arguments we are preparing.
854 So, we process arguments which will be passed by stack first. */
855 gen_reg = nb_reg_args;
856 sse_reg = nb_sse_args;
858 /* adjust stack to align SSE boundary */
859 if (args_size &= 15) {
860 /* fetch cpu flag before the following sub will change the value */
861 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
862 gv(RC_INT);
864 args_size = 16 - args_size;
865 o(0x48);
866 oad(0xec81, args_size); /* sub $xxx, %rsp */
869 for(i = 0; i < nb_args; i++) {
870 /* Swap argument to top, it will possibly be changed here,
871 and might use more temps. All arguments must remain on the
872 stack, so that get_reg can correctly evict some of them onto
873 stack. We could use also use a vrott(nb_args) at the end
874 of this loop, but this seems faster. */
875 SValue tmp = vtop[0];
876 vtop[0] = vtop[-i];
877 vtop[-i] = tmp;
878 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
879 size = type_size(&vtop->type, &align);
880 /* align to stack align size */
881 size = (size + 7) & ~7;
882 /* allocate the necessary size on stack */
883 o(0x48);
884 oad(0xec81, size); /* sub $xxx, %rsp */
885 /* generate structure store */
886 r = get_reg(RC_INT);
887 orex(1, r, 0, 0x89); /* mov %rsp, r */
888 o(0xe0 + REG_VALUE(r));
889 vset(&vtop->type, r | VT_LVAL, 0);
890 vswap();
891 vstore();
892 args_size += size;
893 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
894 gv(RC_ST0);
895 size = LDOUBLE_SIZE;
896 oad(0xec8148, size); /* sub $xxx, %rsp */
897 o(0x7cdb); /* fstpt 0(%rsp) */
898 g(0x24);
899 g(0x00);
900 args_size += size;
901 } else if (is_sse_float(vtop->type.t)) {
902 int j = --sse_reg;
903 if (j >= 8) {
904 gv(RC_FLOAT);
905 o(0x50); /* push $rax */
906 /* movq %xmm0, (%rsp) */
907 o(0x04d60f66);
908 o(0x24);
909 args_size += 8;
911 } else {
912 int j = --gen_reg;
913 /* simple type */
914 /* XXX: implicit cast ? */
915 if (j >= REGN) {
916 r = gv(RC_INT);
917 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
918 args_size += 8;
922 /* And swap the argument back to it's original position. */
923 tmp = vtop[0];
924 vtop[0] = vtop[-i];
925 vtop[-i] = tmp;
928 /* XXX This should be superfluous. */
929 save_regs(0); /* save used temporary registers */
931 /* then, we prepare register passing arguments.
932 Note that we cannot set RDX and RCX in this loop because gv()
933 may break these temporary registers. Let's use R10 and R11
934 instead of them */
935 gen_reg = nb_reg_args;
936 sse_reg = nb_sse_args;
937 for(i = 0; i < nb_args; i++) {
938 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
939 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
940 } else if (is_sse_float(vtop->type.t)) {
941 int j = --sse_reg;
942 if (j < 8) {
943 gv(RC_FLOAT); /* only one float register */
944 /* movaps %xmm0, %xmmN */
945 o(0x280f);
946 o(0xc0 + (sse_reg << 3));
948 } else {
949 int j = --gen_reg;
950 /* simple type */
951 /* XXX: implicit cast ? */
952 if (j < REGN) {
953 int d = arg_regs[j];
954 r = gv(RC_INT);
955 if (j == 2 || j == 3)
956 /* j=2: r10, j=3: r11 */
957 d = j + 8;
958 orex(1,d,r,0x89); /* mov */
959 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
962 vtop--;
965 /* We shouldn't have many operands on the stack anymore, but the
966 call address itself is still there, and it might be in %eax
967 (or edx/ecx) currently, which the below writes would clobber.
968 So evict all remaining operands here. */
969 save_regs(0);
971 /* Copy R10 and R11 into RDX and RCX, respectively */
972 if (nb_reg_args > 2) {
973 o(0xd2894c); /* mov %r10, %rdx */
974 if (nb_reg_args > 3) {
975 o(0xd9894c); /* mov %r11, %rcx */
979 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
980 gcall_or_jmp(0);
981 if (args_size)
982 gadd_sp(args_size);
983 vtop--;
987 #define FUNC_PROLOG_SIZE 11
989 static void push_arg_reg(int i) {
990 loc -= 8;
991 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
994 /* generate function prolog of type 't' */
995 void gfunc_prolog(CType *func_type)
997 int i, addr, align, size;
998 int param_index, param_addr, reg_param_index, sse_param_index;
999 Sym *sym;
1000 CType *type;
1002 sym = func_type->ref;
1003 addr = PTR_SIZE * 2;
1004 loc = 0;
1005 ind += FUNC_PROLOG_SIZE;
1006 func_sub_sp_offset = ind;
1007 func_ret_sub = 0;
1009 if (func_type->ref->c == FUNC_ELLIPSIS) {
1010 int seen_reg_num, seen_sse_num, seen_stack_size;
1011 seen_reg_num = seen_sse_num = 0;
1012 /* frame pointer and return address */
1013 seen_stack_size = PTR_SIZE * 2;
1014 /* count the number of seen parameters */
1015 sym = func_type->ref;
1016 while ((sym = sym->next) != NULL) {
1017 type = &sym->type;
1018 if (is_sse_float(type->t)) {
1019 if (seen_sse_num < 8) {
1020 seen_sse_num++;
1021 } else {
1022 seen_stack_size += 8;
1024 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
1025 size = type_size(type, &align);
1026 size = (size + 7) & ~7;
1027 seen_stack_size += size;
1028 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
1029 seen_stack_size += LDOUBLE_SIZE;
1030 } else {
1031 if (seen_reg_num < REGN) {
1032 seen_reg_num++;
1033 } else {
1034 seen_stack_size += 8;
1039 loc -= 16;
1040 /* movl $0x????????, -0x10(%rbp) */
1041 o(0xf045c7);
1042 gen_le32(seen_reg_num * 8);
1043 /* movl $0x????????, -0xc(%rbp) */
1044 o(0xf445c7);
1045 gen_le32(seen_sse_num * 16 + 48);
1046 /* movl $0x????????, -0x8(%rbp) */
1047 o(0xf845c7);
1048 gen_le32(seen_stack_size);
1050 /* save all register passing arguments */
1051 for (i = 0; i < 8; i++) {
1052 loc -= 16;
1053 o(0xd60f66); /* movq */
1054 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1055 /* movq $0, loc+8(%rbp) */
1056 o(0x85c748);
1057 gen_le32(loc + 8);
1058 gen_le32(0);
1060 for (i = 0; i < REGN; i++) {
1061 push_arg_reg(REGN-1-i);
1065 sym = func_type->ref;
1066 param_index = 0;
1067 reg_param_index = 0;
1068 sse_param_index = 0;
1070 /* if the function returns a structure, then add an
1071 implicit pointer parameter */
1072 func_vt = sym->type;
1073 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
1074 push_arg_reg(reg_param_index);
1075 param_addr = loc;
1077 func_vc = loc;
1078 param_index++;
1079 reg_param_index++;
1081 /* define parameters */
1082 while ((sym = sym->next) != NULL) {
1083 type = &sym->type;
1084 size = type_size(type, &align);
1085 size = (size + 7) & ~7;
1086 if (is_sse_float(type->t)) {
1087 if (sse_param_index < 8) {
1088 /* save arguments passed by register */
1089 loc -= 8;
1090 o(0xd60f66); /* movq */
1091 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
1092 param_addr = loc;
1093 } else {
1094 param_addr = addr;
1095 addr += size;
1097 sse_param_index++;
1099 } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
1100 (type->t & VT_BTYPE) == VT_LDOUBLE) {
1101 param_addr = addr;
1102 addr += size;
1103 } else {
1104 if (reg_param_index < REGN) {
1105 /* save arguments passed by register */
1106 push_arg_reg(reg_param_index);
1107 param_addr = loc;
1108 } else {
1109 param_addr = addr;
1110 addr += 8;
1112 reg_param_index++;
1114 sym_push(sym->v & ~SYM_FIELD, type,
1115 VT_LOCAL | VT_LVAL, param_addr);
1116 param_index++;
1120 /* generate function epilog */
1121 void gfunc_epilog(void)
1123 int v, saved_ind;
1125 o(0xc9); /* leave */
1126 if (func_ret_sub == 0) {
1127 o(0xc3); /* ret */
1128 } else {
1129 o(0xc2); /* ret n */
1130 g(func_ret_sub);
1131 g(func_ret_sub >> 8);
1133 /* align local size to word & save local variables */
1134 v = (-loc + 15) & -16;
1135 saved_ind = ind;
1136 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1137 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1138 o(0xec8148); /* sub rsp, stacksize */
1139 gen_le32(v);
1140 ind = saved_ind;
1143 #endif /* not PE */
1145 /* generate a jump to a label */
1146 int gjmp(int t)
1148 return psym(0xe9, t);
1151 /* generate a jump to a fixed address */
1152 void gjmp_addr(int a)
1154 int r;
1155 r = a - ind - 2;
1156 if (r == (char)r) {
1157 g(0xeb);
1158 g(r);
1159 } else {
1160 oad(0xe9, a - ind - 5);
1164 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1165 int gtst(int inv, int t)
1167 int v, *p;
1169 v = vtop->r & VT_VALMASK;
1170 if (v == VT_CMP) {
1171 /* fast case : can jump directly since flags are set */
1172 if (vtop->c.i & 0x100)
1174 /* This was a float compare. If the parity flag is set
1175 the result was unordered. For anything except != this
1176 means false and we don't jump (anding both conditions).
1177 For != this means true (oring both).
1178 Take care about inverting the test. We need to jump
1179 to our target if the result was unordered and test wasn't NE,
1180 otherwise if unordered we don't want to jump. */
1181 vtop->c.i &= ~0x100;
1182 if (!inv == (vtop->c.i != TOK_NE))
1183 o(0x067a); /* jp +6 */
1184 else
1186 g(0x0f);
1187 t = psym(0x8a, t); /* jp t */
1190 g(0x0f);
1191 t = psym((vtop->c.i - 16) ^ inv, t);
1192 } else if (v == VT_JMP || v == VT_JMPI) {
1193 /* && or || optimization */
1194 if ((v & 1) == inv) {
1195 /* insert vtop->c jump list in t */
1196 p = &vtop->c.i;
1197 while (*p != 0)
1198 p = (int *)(cur_text_section->data + *p);
1199 *p = t;
1200 t = vtop->c.i;
1201 } else {
1202 t = gjmp(t);
1203 gsym(vtop->c.i);
1205 } else {
1206 if (is_float(vtop->type.t) ||
1207 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1208 vpushi(0);
1209 gen_op(TOK_NE);
1211 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1212 /* constant jmp optimization */
1213 if ((vtop->c.i != 0) != inv)
1214 t = gjmp(t);
1215 } else {
1216 v = gv(RC_INT);
1217 orex(0,v,v,0x85);
1218 o(0xc0 + REG_VALUE(v) * 9);
1219 g(0x0f);
1220 t = psym(0x85 ^ inv, t);
1223 vtop--;
1224 return t;
1227 /* generate an integer binary operation */
1228 void gen_opi(int op)
1230 int r, fr, opc, c;
1231 int ll, uu, cc;
1233 ll = is64_type(vtop[-1].type.t);
1234 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1235 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1237 switch(op) {
1238 case '+':
1239 case TOK_ADDC1: /* add with carry generation */
1240 opc = 0;
1241 gen_op8:
1242 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1243 /* constant case */
1244 vswap();
1245 r = gv(RC_INT);
1246 vswap();
1247 c = vtop->c.i;
1248 if (c == (char)c) {
1249 /* XXX: generate inc and dec for smaller code ? */
1250 orex(ll, r, 0, 0x83);
1251 o(0xc0 | (opc << 3) | REG_VALUE(r));
1252 g(c);
1253 } else {
1254 orex(ll, r, 0, 0x81);
1255 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1257 } else {
1258 gv2(RC_INT, RC_INT);
1259 r = vtop[-1].r;
1260 fr = vtop[0].r;
1261 orex(ll, r, fr, (opc << 3) | 0x01);
1262 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1264 vtop--;
1265 if (op >= TOK_ULT && op <= TOK_GT) {
1266 vtop->r = VT_CMP;
1267 vtop->c.i = op;
1269 break;
1270 case '-':
1271 case TOK_SUBC1: /* sub with carry generation */
1272 opc = 5;
1273 goto gen_op8;
1274 case TOK_ADDC2: /* add with carry use */
1275 opc = 2;
1276 goto gen_op8;
1277 case TOK_SUBC2: /* sub with carry use */
1278 opc = 3;
1279 goto gen_op8;
1280 case '&':
1281 opc = 4;
1282 goto gen_op8;
1283 case '^':
1284 opc = 6;
1285 goto gen_op8;
1286 case '|':
1287 opc = 1;
1288 goto gen_op8;
1289 case '*':
1290 gv2(RC_INT, RC_INT);
1291 r = vtop[-1].r;
1292 fr = vtop[0].r;
1293 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1294 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1295 vtop--;
1296 break;
1297 case TOK_SHL:
1298 opc = 4;
1299 goto gen_shift;
1300 case TOK_SHR:
1301 opc = 5;
1302 goto gen_shift;
1303 case TOK_SAR:
1304 opc = 7;
1305 gen_shift:
1306 opc = 0xc0 | (opc << 3);
1307 if (cc) {
1308 /* constant case */
1309 vswap();
1310 r = gv(RC_INT);
1311 vswap();
1312 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1313 o(opc | REG_VALUE(r));
1314 g(vtop->c.i & (ll ? 63 : 31));
1315 } else {
1316 /* we generate the shift in ecx */
1317 gv2(RC_INT, RC_RCX);
1318 r = vtop[-1].r;
1319 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1320 o(opc | REG_VALUE(r));
1322 vtop--;
1323 break;
1324 case TOK_UDIV:
1325 case TOK_UMOD:
1326 uu = 1;
1327 goto divmod;
1328 case '/':
1329 case '%':
1330 case TOK_PDIV:
1331 uu = 0;
1332 divmod:
1333 /* first operand must be in eax */
1334 /* XXX: need better constraint for second operand */
1335 gv2(RC_RAX, RC_RCX);
1336 r = vtop[-1].r;
1337 fr = vtop[0].r;
1338 vtop--;
1339 save_reg(TREG_RDX);
1340 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1341 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1342 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1343 if (op == '%' || op == TOK_UMOD)
1344 r = TREG_RDX;
1345 else
1346 r = TREG_RAX;
1347 vtop->r = r;
1348 break;
1349 default:
1350 opc = 7;
1351 goto gen_op8;
1355 void gen_opl(int op)
1357 gen_opi(op);
1360 /* generate a floating point operation 'v = t1 op t2' instruction. The
1361 two operands are guaranted to have the same floating point type */
1362 /* XXX: need to use ST1 too */
1363 void gen_opf(int op)
1365 int a, ft, fc, swapped, r;
1366 int float_type =
1367 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1369 /* convert constants to memory references */
1370 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1371 vswap();
1372 gv(float_type);
1373 vswap();
1375 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1376 gv(float_type);
1378 /* must put at least one value in the floating point register */
1379 if ((vtop[-1].r & VT_LVAL) &&
1380 (vtop[0].r & VT_LVAL)) {
1381 vswap();
1382 gv(float_type);
1383 vswap();
1385 swapped = 0;
1386 /* swap the stack if needed so that t1 is the register and t2 is
1387 the memory reference */
1388 if (vtop[-1].r & VT_LVAL) {
1389 vswap();
1390 swapped = 1;
1392 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1393 if (op >= TOK_ULT && op <= TOK_GT) {
1394 /* load on stack second operand */
1395 load(TREG_ST0, vtop);
1396 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1397 if (op == TOK_GE || op == TOK_GT)
1398 swapped = !swapped;
1399 else if (op == TOK_EQ || op == TOK_NE)
1400 swapped = 0;
1401 if (swapped)
1402 o(0xc9d9); /* fxch %st(1) */
1403 o(0xe9da); /* fucompp */
1404 o(0xe0df); /* fnstsw %ax */
1405 if (op == TOK_EQ) {
1406 o(0x45e480); /* and $0x45, %ah */
1407 o(0x40fC80); /* cmp $0x40, %ah */
1408 } else if (op == TOK_NE) {
1409 o(0x45e480); /* and $0x45, %ah */
1410 o(0x40f480); /* xor $0x40, %ah */
1411 op = TOK_NE;
1412 } else if (op == TOK_GE || op == TOK_LE) {
1413 o(0x05c4f6); /* test $0x05, %ah */
1414 op = TOK_EQ;
1415 } else {
1416 o(0x45c4f6); /* test $0x45, %ah */
1417 op = TOK_EQ;
1419 vtop--;
1420 vtop->r = VT_CMP;
1421 vtop->c.i = op;
1422 } else {
1423 /* no memory reference possible for long double operations */
1424 load(TREG_ST0, vtop);
1425 swapped = !swapped;
1427 switch(op) {
1428 default:
1429 case '+':
1430 a = 0;
1431 break;
1432 case '-':
1433 a = 4;
1434 if (swapped)
1435 a++;
1436 break;
1437 case '*':
1438 a = 1;
1439 break;
1440 case '/':
1441 a = 6;
1442 if (swapped)
1443 a++;
1444 break;
1446 ft = vtop->type.t;
1447 fc = vtop->c.ul;
1448 o(0xde); /* fxxxp %st, %st(1) */
1449 o(0xc1 + (a << 3));
1450 vtop--;
1452 } else {
1453 if (op >= TOK_ULT && op <= TOK_GT) {
1454 /* if saved lvalue, then we must reload it */
1455 r = vtop->r;
1456 fc = vtop->c.ul;
1457 if ((r & VT_VALMASK) == VT_LLOCAL) {
1458 SValue v1;
1459 r = get_reg(RC_INT);
1460 v1.type.t = VT_PTR;
1461 v1.r = VT_LOCAL | VT_LVAL;
1462 v1.c.ul = fc;
1463 load(r, &v1);
1464 fc = 0;
1467 if (op == TOK_EQ || op == TOK_NE) {
1468 swapped = 0;
1469 } else {
1470 if (op == TOK_LE || op == TOK_LT)
1471 swapped = !swapped;
1472 if (op == TOK_LE || op == TOK_GE) {
1473 op = 0x93; /* setae */
1474 } else {
1475 op = 0x97; /* seta */
1479 if (swapped) {
1480 o(0x7e0ff3); /* movq */
1481 gen_modrm(1, r, vtop->sym, fc);
1483 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1484 o(0x66);
1486 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1487 o(0xc8);
1488 } else {
1489 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1490 o(0x66);
1492 o(0x2e0f); /* ucomisd */
1493 gen_modrm(0, r, vtop->sym, fc);
1496 vtop--;
1497 vtop->r = VT_CMP;
1498 vtop->c.i = op | 0x100;
1499 } else {
1500 /* no memory reference possible for long double operations */
1501 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1502 load(TREG_XMM0, vtop);
1503 swapped = !swapped;
1505 switch(op) {
1506 default:
1507 case '+':
1508 a = 0;
1509 break;
1510 case '-':
1511 a = 4;
1512 break;
1513 case '*':
1514 a = 1;
1515 break;
1516 case '/':
1517 a = 6;
1518 break;
1520 ft = vtop->type.t;
1521 fc = vtop->c.ul;
1522 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1523 o(0xde); /* fxxxp %st, %st(1) */
1524 o(0xc1 + (a << 3));
1525 } else {
1526 /* if saved lvalue, then we must reload it */
1527 r = vtop->r;
1528 if ((r & VT_VALMASK) == VT_LLOCAL) {
1529 SValue v1;
1530 r = get_reg(RC_INT);
1531 v1.type.t = VT_PTR;
1532 v1.r = VT_LOCAL | VT_LVAL;
1533 v1.c.ul = fc;
1534 load(r, &v1);
1535 fc = 0;
1537 if (swapped) {
1538 /* movq %xmm0,%xmm1 */
1539 o(0x7e0ff3);
1540 o(0xc8);
1541 load(TREG_XMM0, vtop);
1542 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1543 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1544 o(0xf2);
1545 } else {
1546 o(0xf3);
1548 o(0x0f);
1549 o(0x58 + a);
1550 o(0xc1);
1551 } else {
1552 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1553 o(0xf2);
1554 } else {
1555 o(0xf3);
1557 o(0x0f);
1558 o(0x58 + a);
1559 gen_modrm(0, r, vtop->sym, fc);
1562 vtop--;
1567 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1568 and 'long long' cases. */
1569 void gen_cvt_itof(int t)
1571 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1572 save_reg(TREG_ST0);
1573 gv(RC_INT);
1574 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1575 /* signed long long to float/double/long double (unsigned case
1576 is handled generically) */
1577 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1578 o(0x242cdf); /* fildll (%rsp) */
1579 o(0x08c48348); /* add $8, %rsp */
1580 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1581 (VT_INT | VT_UNSIGNED)) {
1582 /* unsigned int to float/double/long double */
1583 o(0x6a); /* push $0 */
1584 g(0x00);
1585 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1586 o(0x242cdf); /* fildll (%rsp) */
1587 o(0x10c48348); /* add $16, %rsp */
1588 } else {
1589 /* int to float/double/long double */
1590 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1591 o(0x2404db); /* fildl (%rsp) */
1592 o(0x08c48348); /* add $8, %rsp */
1594 vtop->r = TREG_ST0;
1595 } else {
1596 save_reg(TREG_XMM0);
1597 gv(RC_INT);
1598 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1599 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1600 (VT_INT | VT_UNSIGNED) ||
1601 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1602 o(0x48); /* REX */
1604 o(0x2a0f);
1605 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1606 vtop->r = TREG_XMM0;
1610 /* convert from one floating point type to another */
1611 void gen_cvt_ftof(int t)
1613 int ft, bt, tbt;
1615 ft = vtop->type.t;
1616 bt = ft & VT_BTYPE;
1617 tbt = t & VT_BTYPE;
1619 if (bt == VT_FLOAT) {
1620 gv(RC_FLOAT);
1621 if (tbt == VT_DOUBLE) {
1622 o(0xc0140f); /* unpcklps */
1623 o(0xc05a0f); /* cvtps2pd */
1624 } else if (tbt == VT_LDOUBLE) {
1625 /* movss %xmm0,-0x10(%rsp) */
1626 o(0x44110ff3);
1627 o(0xf024);
1628 o(0xf02444d9); /* flds -0x10(%rsp) */
1629 vtop->r = TREG_ST0;
1631 } else if (bt == VT_DOUBLE) {
1632 gv(RC_FLOAT);
1633 if (tbt == VT_FLOAT) {
1634 o(0xc0140f66); /* unpcklpd */
1635 o(0xc05a0f66); /* cvtpd2ps */
1636 } else if (tbt == VT_LDOUBLE) {
1637 /* movsd %xmm0,-0x10(%rsp) */
1638 o(0x44110ff2);
1639 o(0xf024);
1640 o(0xf02444dd); /* fldl -0x10(%rsp) */
1641 vtop->r = TREG_ST0;
1643 } else {
1644 gv(RC_ST0);
1645 if (tbt == VT_DOUBLE) {
1646 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1647 /* movsd -0x10(%rsp),%xmm0 */
1648 o(0x44100ff2);
1649 o(0xf024);
1650 vtop->r = TREG_XMM0;
1651 } else if (tbt == VT_FLOAT) {
1652 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1653 /* movss -0x10(%rsp),%xmm0 */
1654 o(0x44100ff3);
1655 o(0xf024);
1656 vtop->r = TREG_XMM0;
1661 /* convert fp to int 't' type */
1662 void gen_cvt_ftoi(int t)
1664 int ft, bt, size, r;
1665 ft = vtop->type.t;
1666 bt = ft & VT_BTYPE;
1667 if (bt == VT_LDOUBLE) {
1668 gen_cvt_ftof(VT_DOUBLE);
1669 bt = VT_DOUBLE;
1672 gv(RC_FLOAT);
1673 if (t != VT_INT)
1674 size = 8;
1675 else
1676 size = 4;
1678 r = get_reg(RC_INT);
1679 if (bt == VT_FLOAT) {
1680 o(0xf3);
1681 } else if (bt == VT_DOUBLE) {
1682 o(0xf2);
1683 } else {
1684 assert(0);
1686 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1687 o(0xc0 + (REG_VALUE(r) << 3));
1688 vtop->r = r;
1691 /* computed goto support */
1692 void ggoto(void)
1694 gcall_or_jmp(1);
1695 vtop--;
1698 /* end of x86-64 code generator */
1699 /*************************************************************/
1700 #endif /* ! TARGET_DEFS_ONLY */
1701 /******************************************************/