Fixed problems with XMM1 use on Linux/x86-64.
[tinycc.git] / x86_64-gen.c
blob0ba928c0d0efb09c7bfbd3fb30a28824ac774596
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 18
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_R8 0x0100
38 #define RC_R9 0x0200
39 #define RC_R10 0x0400
40 #define RC_R11 0x0800
41 #define RC_XMM0 0x0020
42 #define RC_XMM1 0x0040
43 #define RC_ST0 0x0080 /* only for long double */
44 #define RC_IRET RC_RAX /* function return: integer register */
45 #define RC_LRET RC_RDX /* function return: second integer register */
46 #define RC_FRET RC_XMM0 /* function return: float register */
47 #define RC_QRET RC_XMM1 /* function return: second float register */
49 /* pretty names for the registers */
50 enum {
51 TREG_RAX = 0,
52 TREG_RCX = 1,
53 TREG_RDX = 2,
54 TREG_RSI = 6,
55 TREG_RDI = 7,
57 TREG_R8 = 8,
58 TREG_R9 = 9,
59 TREG_R10 = 10,
60 TREG_R11 = 11,
62 TREG_XMM0 = 16,
63 TREG_XMM1 = 17,
65 TREG_ST0 = 4, // SP slot won't be used
67 TREG_MEM = 0x20,
70 #define REX_BASE(reg) (((reg) >> 3) & 1)
71 #define REG_VALUE(reg) ((reg) & 7)
73 /* return registers for function */
74 #define REG_IRET TREG_RAX /* single word int return register */
75 #define REG_LRET TREG_RDX /* second word return register (for long long) */
76 #define REG_FRET TREG_XMM0 /* float return register */
77 #define REG_QRET TREG_XMM1 /* second float return register */
79 /* defined if function parameters must be evaluated in reverse order */
80 #define INVERT_FUNC_PARAMS
82 /* pointer size, in bytes */
83 #define PTR_SIZE 8
85 /* long double size and alignment, in bytes */
86 #define LDOUBLE_SIZE 16
87 #define LDOUBLE_ALIGN 8
88 /* maximum alignment (for aligned attribute support) */
89 #define MAX_ALIGN 8
91 /******************************************************/
92 /* ELF defines */
94 #define EM_TCC_TARGET EM_X86_64
96 /* relocation type for 32 bit data relocation */
97 #define R_DATA_32 R_X86_64_32
98 #define R_DATA_PTR R_X86_64_64
99 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
100 #define R_COPY R_X86_64_COPY
102 #define ELF_START_ADDR 0x08048000
103 #define ELF_PAGE_SIZE 0x1000
105 /******************************************************/
106 #else /* ! TARGET_DEFS_ONLY */
107 /******************************************************/
108 #include "tcc.h"
109 #include <assert.h>
111 ST_DATA const int reg_classes[NB_REGS] = {
112 /* eax */ RC_INT | RC_RAX,
113 /* ecx */ RC_INT | RC_RCX,
114 /* edx */ RC_INT | RC_RDX,
116 /* st0 */ RC_ST0,
120 /*RC_INT |*/ RC_R8,
121 /*RC_INT |*/ RC_R9,
122 /*RC_INT |*/ RC_R10,
123 /*RC_INT |*/ RC_R11,
128 /* xmm0 */ RC_FLOAT | RC_XMM0,
129 /* xmm1 */ RC_FLOAT | RC_XMM1,
132 static unsigned long func_sub_sp_offset;
133 static int func_ret_sub;
135 /* XXX: make it faster ? */
136 void g(int c)
138 int ind1;
139 ind1 = ind + 1;
140 if (ind1 > cur_text_section->data_allocated)
141 section_realloc(cur_text_section, ind1);
142 cur_text_section->data[ind] = c;
143 ind = ind1;
144 assert((ind < 4) || (cur_text_section->data[ind-4] != ('\362'&0xFF)) || (cur_text_section->data[ind-3] != '\017')
145 || (cur_text_section->data[ind-2] != 'X') || (cur_text_section->data[ind-1] != '\001'));
148 void o(unsigned int c)
150 while (c) {
151 g(c);
152 c = c >> 8;
156 void gen_le16(int v)
158 g(v);
159 g(v >> 8);
162 void gen_le32(int c)
164 g(c);
165 g(c >> 8);
166 g(c >> 16);
167 g(c >> 24);
170 void gen_le64(int64_t c)
172 g(c);
173 g(c >> 8);
174 g(c >> 16);
175 g(c >> 24);
176 g(c >> 32);
177 g(c >> 40);
178 g(c >> 48);
179 g(c >> 56);
182 void orex(int ll, int r, int r2, int b)
184 if ((r & VT_VALMASK) >= VT_CONST)
185 r = 0;
186 if ((r2 & VT_VALMASK) >= VT_CONST)
187 r2 = 0;
188 if (ll || REX_BASE(r) || REX_BASE(r2))
189 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
190 o(b);
193 /* output a symbol and patch all calls to it */
194 void gsym_addr(int t, int a)
196 int n, *ptr;
197 while (t) {
198 ptr = (int *)(cur_text_section->data + t);
199 n = *ptr; /* next value */
200 *ptr = a - t - 4;
201 t = n;
205 void gsym(int t)
207 gsym_addr(t, ind);
210 /* psym is used to put an instruction with a data field which is a
211 reference to a symbol. It is in fact the same as oad ! */
212 #define psym oad
214 static int is64_type(int t)
216 return ((t & VT_BTYPE) == VT_PTR ||
217 (t & VT_BTYPE) == VT_FUNC ||
218 (t & VT_BTYPE) == VT_LLONG);
221 static int is_sse_float(int t) {
222 int bt;
223 bt = t & VT_BTYPE;
224 return bt == VT_DOUBLE || bt == VT_FLOAT;
228 /* instruction + 4 bytes data. Return the address of the data */
229 ST_FUNC int oad(int c, int s)
231 int ind1;
233 o(c);
234 ind1 = ind + 4;
235 if (ind1 > cur_text_section->data_allocated)
236 section_realloc(cur_text_section, ind1);
237 *(int *)(cur_text_section->data + ind) = s;
238 s = ind;
239 ind = ind1;
240 return s;
243 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
245 if (r & VT_SYM)
246 greloc(cur_text_section, sym, ind, R_X86_64_32);
247 gen_le32(c);
250 /* output constant with relocation if 'r & VT_SYM' is true */
251 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
253 if (r & VT_SYM)
254 greloc(cur_text_section, sym, ind, R_X86_64_64);
255 gen_le64(c);
258 /* output constant with relocation if 'r & VT_SYM' is true */
259 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
261 if (r & VT_SYM)
262 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
263 gen_le32(c-4);
266 /* output got address with relocation */
267 static void gen_gotpcrel(int r, Sym *sym, int c)
269 #ifndef TCC_TARGET_PE
270 Section *sr;
271 ElfW(Rela) *rel;
272 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
273 sr = cur_text_section->reloc;
274 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
275 rel->r_addend = -4;
276 #else
277 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
278 cur_text_section->data[ind-3],
279 cur_text_section->data[ind-2],
280 cur_text_section->data[ind-1]
282 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
283 #endif
284 gen_le32(0);
285 if (c) {
286 /* we use add c, %xxx for displacement */
287 orex(1, r, 0, 0x81);
288 o(0xc0 + REG_VALUE(r));
289 gen_le32(c);
293 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
295 op_reg = REG_VALUE(op_reg) << 3;
296 if ((r & VT_VALMASK) == VT_CONST) {
297 /* constant memory reference */
298 o(0x05 | op_reg);
299 if (is_got) {
300 gen_gotpcrel(r, sym, c);
301 } else {
302 gen_addrpc32(r, sym, c);
304 } else if ((r & VT_VALMASK) == VT_LOCAL) {
305 /* currently, we use only ebp as base */
306 if (c == (char)c) {
307 /* short reference */
308 o(0x45 | op_reg);
309 g(c);
310 } else {
311 oad(0x85 | op_reg, c);
313 } else if ((r & VT_VALMASK) >= TREG_MEM) {
314 if (c) {
315 g(0x80 | op_reg | REG_VALUE(r));
316 gen_le32(c);
317 } else {
318 g(0x00 | op_reg | REG_VALUE(r));
320 } else {
321 g(0x00 | op_reg | REG_VALUE(r));
325 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
326 opcode bits */
327 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
329 gen_modrm_impl(op_reg, r, sym, c, 0);
332 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
333 opcode bits */
334 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
336 int is_got;
337 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
338 orex(1, r, op_reg, opcode);
339 gen_modrm_impl(op_reg, r, sym, c, is_got);
343 /* load 'r' from value 'sv' */
344 void load(int r, SValue *sv)
346 int v, t, ft, fc, fr;
347 SValue v1;
349 #ifdef TCC_TARGET_PE
350 SValue v2;
351 sv = pe_getimport(sv, &v2);
352 #endif
354 fr = sv->r;
355 ft = sv->type.t;
356 fc = sv->c.ul;
358 #ifndef TCC_TARGET_PE
359 /* we use indirect access via got */
360 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
361 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
362 /* use the result register as a temporal register */
363 int tr = r | TREG_MEM;
364 if (is_float(ft)) {
365 /* we cannot use float registers as a temporal register */
366 tr = get_reg(RC_INT) | TREG_MEM;
368 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
370 /* load from the temporal register */
371 fr = tr | VT_LVAL;
373 #endif
375 v = fr & VT_VALMASK;
376 if (fr & VT_LVAL) {
377 int b, ll;
378 if (v == VT_LLOCAL) {
379 v1.type.t = VT_PTR;
380 v1.r = VT_LOCAL | VT_LVAL;
381 v1.c.ul = fc;
382 fr = r;
383 if (!(reg_classes[fr] & RC_INT))
384 fr = get_reg(RC_INT);
385 load(fr, &v1);
387 ll = 0;
388 if ((ft & VT_BTYPE) == VT_FLOAT) {
389 b = 0x6e0f66;
390 r = REG_VALUE(r); /* movd */
391 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
392 b = 0x7e0ff3; /* movq */
393 r = REG_VALUE(r);
394 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
395 b = 0xdb, r = 5; /* fldt */
396 } else if ((ft & VT_TYPE) == VT_BYTE) {
397 b = 0xbe0f; /* movsbl */
398 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
399 b = 0xb60f; /* movzbl */
400 } else if ((ft & VT_TYPE) == VT_SHORT) {
401 b = 0xbf0f; /* movswl */
402 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
403 b = 0xb70f; /* movzwl */
404 } else {
405 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
406 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
407 || ((ft & VT_BTYPE) == VT_FUNC));
408 ll = is64_type(ft);
409 b = 0x8b;
411 if (ll) {
412 gen_modrm64(b, r, fr, sv->sym, fc);
413 } else {
414 orex(ll, fr, r, b);
415 gen_modrm(r, fr, sv->sym, fc);
417 } else {
418 if (v == VT_CONST) {
419 if (fr & VT_SYM) {
420 #ifdef TCC_TARGET_PE
421 orex(1,0,r,0x8d);
422 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
423 gen_addrpc32(fr, sv->sym, fc);
424 #else
425 if (sv->sym->type.t & VT_STATIC) {
426 orex(1,0,r,0x8d);
427 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
428 gen_addrpc32(fr, sv->sym, fc);
429 } else {
430 orex(1,0,r,0x8b);
431 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
432 gen_gotpcrel(r, sv->sym, fc);
434 #endif
435 } else if (is64_type(ft)) {
436 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
437 gen_le64(sv->c.ull);
438 } else {
439 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
440 gen_le32(fc);
442 } else if (v == VT_LOCAL) {
443 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
444 gen_modrm(r, VT_LOCAL, sv->sym, fc);
445 } else if (v == VT_CMP) {
446 orex(0,r,0,0);
447 if ((fc & ~0x100) != TOK_NE)
448 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
449 else
450 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
451 if (fc & 0x100)
453 /* This was a float compare. If the parity bit is
454 set the result was unordered, meaning false for everything
455 except TOK_NE, and true for TOK_NE. */
456 fc &= ~0x100;
457 o(0x037a + (REX_BASE(r) << 8));
459 orex(0,r,0, 0x0f); /* setxx %br */
460 o(fc);
461 o(0xc0 + REG_VALUE(r));
462 } else if (v == VT_JMP || v == VT_JMPI) {
463 t = v & 1;
464 orex(0,r,0,0);
465 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
466 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
467 gsym(fc);
468 orex(0,r,0,0);
469 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
470 } else if (v != r) {
471 if ((r == TREG_XMM0) || (r == TREG_XMM1)) {
472 if (v == TREG_ST0) {
473 /* gen_cvt_ftof(VT_DOUBLE); */
474 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
475 /* movsd -0x10(%rsp),%xmmN */
476 o(0x100ff2);
477 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
478 o(0xf024);
479 } else {
480 assert((v == TREG_XMM0) || (v == TREG_XMM1));
481 if ((ft & VT_BTYPE) == VT_FLOAT) {
482 o(0x100ff3);
483 } else {
484 assert((ft & VT_BTYPE) == VT_DOUBLE);
485 o(0x100ff2);
487 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
489 } else if (r == TREG_ST0) {
490 assert((v == TREG_XMM0) || (v == TREG_XMM1));
491 /* gen_cvt_ftof(VT_LDOUBLE); */
492 /* movsd %xmmN,-0x10(%rsp) */
493 o(0x110ff2);
494 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
495 o(0xf024);
496 o(0xf02444dd); /* fldl -0x10(%rsp) */
497 } else {
498 orex(1,r,v, 0x89);
499 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
505 /* store register 'r' in lvalue 'v' */
506 void store(int r, SValue *v)
508 int fr, bt, ft, fc;
509 int op64 = 0;
510 /* store the REX prefix in this variable when PIC is enabled */
511 int pic = 0;
513 #ifdef TCC_TARGET_PE
514 SValue v2;
515 v = pe_getimport(v, &v2);
516 #endif
518 ft = v->type.t;
519 fc = v->c.ul;
520 fr = v->r & VT_VALMASK;
521 bt = ft & VT_BTYPE;
523 #ifndef TCC_TARGET_PE
524 /* we need to access the variable via got */
525 if (fr == VT_CONST && (v->r & VT_SYM)) {
526 /* mov xx(%rip), %r11 */
527 o(0x1d8b4c);
528 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
529 pic = is64_type(bt) ? 0x49 : 0x41;
531 #endif
533 /* XXX: incorrect if float reg to reg */
534 if (bt == VT_FLOAT) {
535 o(0x66);
536 o(pic);
537 o(0x7e0f); /* movd */
538 r = REG_VALUE(r);
539 } else if (bt == VT_DOUBLE) {
540 o(0x66);
541 o(pic);
542 o(0xd60f); /* movq */
543 r = REG_VALUE(r);
544 } else if (bt == VT_LDOUBLE) {
545 o(0xc0d9); /* fld %st(0) */
546 o(pic);
547 o(0xdb); /* fstpt */
548 r = 7;
549 } else {
550 if (bt == VT_SHORT)
551 o(0x66);
552 o(pic);
553 if (bt == VT_BYTE || bt == VT_BOOL)
554 orex(0, 0, r, 0x88);
555 else if (is64_type(bt))
556 op64 = 0x89;
557 else
558 orex(0, 0, r, 0x89);
560 if (pic) {
561 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
562 if (op64)
563 o(op64);
564 o(3 + (r << 3));
565 } else if (op64) {
566 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
567 gen_modrm64(op64, r, v->r, v->sym, fc);
568 } else if (fr != r) {
569 /* XXX: don't we really come here? */
570 abort();
571 o(0xc0 + fr + r * 8); /* mov r, fr */
573 } else {
574 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
575 gen_modrm(r, v->r, v->sym, fc);
576 } else if (fr != r) {
577 /* XXX: don't we really come here? */
578 abort();
579 o(0xc0 + fr + r * 8); /* mov r, fr */
584 /* 'is_jmp' is '1' if it is a jump */
585 static void gcall_or_jmp(int is_jmp)
587 int r;
588 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
589 /* constant case */
590 if (vtop->r & VT_SYM) {
591 /* relocation case */
592 greloc(cur_text_section, vtop->sym,
593 ind + 1, R_X86_64_PC32);
594 } else {
595 /* put an empty PC32 relocation */
596 put_elf_reloc(symtab_section, cur_text_section,
597 ind + 1, R_X86_64_PC32, 0);
599 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
600 } else {
601 /* otherwise, indirect call */
602 r = TREG_R11;
603 load(r, vtop);
604 o(0x41); /* REX */
605 o(0xff); /* call/jmp *r */
606 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
610 #ifdef TCC_TARGET_PE
612 #define REGN 4
613 static const uint8_t arg_regs[] = {
614 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
617 static int func_scratch;
619 /* Generate function call. The function address is pushed first, then
620 all the parameters in call order. This functions pops all the
621 parameters and the function address. */
623 void gen_offs_sp(int b, int r, int d)
625 orex(1,0,r & 0x100 ? 0 : r, b);
626 if (d == (char)d) {
627 o(0x2444 | (REG_VALUE(r) << 3));
628 g(d);
629 } else {
630 o(0x2484 | (REG_VALUE(r) << 3));
631 gen_le32(d);
635 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
636 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
637 *ret_align = 1; // Never have to re-align return values for x86-64
638 return 1;
641 void gfunc_call(int nb_args)
643 int size, align, r, args_size, i, d, j, bt, struct_size;
644 int nb_reg_args, gen_reg;
646 nb_reg_args = nb_args;
647 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
649 /* for struct arguments, we need to call memcpy and the function
650 call breaks register passing arguments we are preparing.
651 So, we process arguments which will be passed by stack first. */
652 struct_size = args_size;
653 for(i = 0; i < nb_args; i++) {
654 SValue *sv = &vtop[-i];
655 bt = (sv->type.t & VT_BTYPE);
656 if (bt == VT_STRUCT) {
657 size = type_size(&sv->type, &align);
658 /* align to stack align size */
659 size = (size + 15) & ~15;
660 /* generate structure store */
661 r = get_reg(RC_INT);
662 gen_offs_sp(0x8d, r, struct_size);
663 struct_size += size;
665 /* generate memcpy call */
666 vset(&sv->type, r | VT_LVAL, 0);
667 vpushv(sv);
668 vstore();
669 --vtop;
671 } else if (bt == VT_LDOUBLE) {
673 gv(RC_ST0);
674 gen_offs_sp(0xdb, 0x107, struct_size);
675 struct_size += 16;
680 if (func_scratch < struct_size)
681 func_scratch = struct_size;
682 #if 1
683 for (i = 0; i < REGN; ++i)
684 save_reg(arg_regs[i]);
685 save_reg(TREG_RAX);
686 #endif
687 gen_reg = nb_reg_args;
688 struct_size = args_size;
690 for(i = 0; i < nb_args; i++) {
691 bt = (vtop->type.t & VT_BTYPE);
693 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
694 if (bt == VT_LDOUBLE)
695 size = 16;
696 else
697 size = type_size(&vtop->type, &align);
698 /* align to stack align size */
699 size = (size + 15) & ~15;
700 j = --gen_reg;
701 if (j >= REGN) {
702 d = TREG_RAX;
703 gen_offs_sp(0x8d, d, struct_size);
704 gen_offs_sp(0x89, d, j*8);
705 } else {
706 d = arg_regs[j];
707 gen_offs_sp(0x8d, d, struct_size);
709 struct_size += size;
711 } else if (is_sse_float(vtop->type.t)) {
712 gv(RC_XMM0); /* only one float register */
713 j = --gen_reg;
714 if (j >= REGN) {
715 /* movq %xmm0, j*8(%rsp) */
716 gen_offs_sp(0xd60f66, 0x100, j*8);
717 } else {
718 /* movaps %xmm0, %xmmN */
719 o(0x280f);
720 o(0xc0 + (j << 3));
721 d = arg_regs[j];
722 /* mov %xmm0, %rxx */
723 o(0x66);
724 orex(1,d,0, 0x7e0f);
725 o(0xc0 + REG_VALUE(d));
727 } else {
728 j = --gen_reg;
729 if (j >= REGN) {
730 r = gv(RC_INT);
731 gen_offs_sp(0x89, r, j*8);
732 } else {
733 d = arg_regs[j];
734 if (d < NB_REGS) {
735 gv(reg_classes[d] & ~RC_INT);
736 } else {
737 r = gv(RC_INT);
738 if (d != r) {
739 orex(1,d,r, 0x89);
740 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
746 vtop--;
748 save_regs(0);
749 gcall_or_jmp(0);
750 vtop--;
754 #define FUNC_PROLOG_SIZE 11
756 /* generate function prolog of type 't' */
757 void gfunc_prolog(CType *func_type)
759 int addr, reg_param_index, bt;
760 Sym *sym;
761 CType *type;
763 func_ret_sub = 0;
764 func_scratch = 0;
765 loc = 0;
767 addr = PTR_SIZE * 2;
768 ind += FUNC_PROLOG_SIZE;
769 func_sub_sp_offset = ind;
770 reg_param_index = 0;
772 sym = func_type->ref;
774 /* if the function returns a structure, then add an
775 implicit pointer parameter */
776 func_vt = sym->type;
777 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
778 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
779 reg_param_index++;
780 addr += PTR_SIZE;
783 /* define parameters */
784 while ((sym = sym->next) != NULL) {
785 type = &sym->type;
786 bt = type->t & VT_BTYPE;
787 if (reg_param_index < REGN) {
788 /* save arguments passed by register */
789 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
791 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
792 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
793 } else {
794 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
796 reg_param_index++;
797 addr += PTR_SIZE;
800 while (reg_param_index < REGN) {
801 if (func_type->ref->c == FUNC_ELLIPSIS)
802 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
803 reg_param_index++;
804 addr += PTR_SIZE;
808 /* generate function epilog */
809 void gfunc_epilog(void)
811 int v, saved_ind;
813 o(0xc9); /* leave */
814 if (func_ret_sub == 0) {
815 o(0xc3); /* ret */
816 } else {
817 o(0xc2); /* ret n */
818 g(func_ret_sub);
819 g(func_ret_sub >> 8);
822 saved_ind = ind;
823 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
824 /* align local size to word & save local variables */
825 v = (func_scratch + -loc + 15) & -16;
827 if (v >= 4096) {
828 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
829 oad(0xb8, v); /* mov stacksize, %eax */
830 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
831 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
832 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
833 } else {
834 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
835 o(0xec8148); /* sub rsp, stacksize */
836 gen_le32(v);
839 cur_text_section->data_offset = saved_ind;
840 pe_add_unwind_data(ind, saved_ind, v);
841 ind = cur_text_section->data_offset;
844 #else
846 static void gadd_sp(int val)
848 if (val == (char)val) {
849 o(0xc48348);
850 g(val);
851 } else {
852 oad(0xc48148, val); /* add $xxx, %rsp */
856 typedef enum X86_64_Mode {
857 x86_64_mode_none,
858 x86_64_mode_memory,
859 x86_64_mode_integer,
860 x86_64_mode_sse,
861 x86_64_mode_x87
862 } X86_64_Mode;
864 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
865 if (a == b)
866 return a;
867 else if (a == x86_64_mode_none)
868 return b;
869 else if (b == x86_64_mode_none)
870 return a;
871 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
872 return x86_64_mode_memory;
873 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
874 return x86_64_mode_integer;
875 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
876 return x86_64_mode_memory;
877 else
878 return x86_64_mode_sse;
881 static X86_64_Mode classify_x86_64_inner(CType *ty) {
882 X86_64_Mode mode;
883 Sym *f;
885 switch (ty->t & VT_BTYPE) {
886 case VT_VOID: return x86_64_mode_none;
888 case VT_INT:
889 case VT_BYTE:
890 case VT_SHORT:
891 case VT_LLONG:
892 case VT_BOOL:
893 case VT_PTR:
894 case VT_FUNC:
895 case VT_ENUM: return x86_64_mode_integer;
897 case VT_FLOAT:
898 case VT_DOUBLE: return x86_64_mode_sse;
900 case VT_LDOUBLE: return x86_64_mode_x87;
902 case VT_STRUCT:
903 f = ty->ref;
905 // Detect union
906 if (f->next && (f->c == f->next->c))
907 return x86_64_mode_memory;
909 mode = x86_64_mode_none;
910 for (; f; f = f->next)
911 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
913 return mode;
916 assert(0);
919 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *reg_count) {
920 X86_64_Mode mode;
921 int size, align, ret_t;
923 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
924 *psize = 8;
925 *reg_count = 1;
926 ret_t = ty->t;
927 mode = x86_64_mode_integer;
928 } else {
929 size = type_size(ty, &align);
930 *psize = (size + 7) & ~7;
932 if (size > 16) {
933 mode = x86_64_mode_memory;
934 } else {
935 mode = classify_x86_64_inner(ty);
936 switch (mode) {
937 case x86_64_mode_integer:
938 if (size > 8) {
939 *reg_count = 2;
940 ret_t = VT_QLONG;
941 } else {
942 *reg_count = 1;
943 ret_t = (size > 4) ? VT_LLONG : VT_INT;
945 break;
947 case x86_64_mode_x87:
948 *reg_count = 1;
949 ret_t = VT_LDOUBLE;
950 break;
952 case x86_64_mode_sse:
953 if (size > 8) {
954 *reg_count = 2;
955 ret_t = VT_QFLOAT;
956 } else {
957 *reg_count = 1;
958 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
960 break;
965 if (ret) {
966 ret->ref = NULL;
967 ret->t = ret_t;
970 return mode;
973 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
974 /* This definition must be synced with stdarg.h */
975 enum __va_arg_type {
976 __va_gen_reg, __va_float_reg, __va_stack
978 int size, reg_count;
979 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &reg_count);
980 switch (mode) {
981 default: return __va_stack;
982 case x86_64_mode_integer: return __va_gen_reg;
983 case x86_64_mode_sse: return __va_float_reg;
987 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
988 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
989 int size, reg_count;
990 *ret_align = 1; // Never have to re-align return values for x86-64
991 return (classify_x86_64_arg(vt, ret, &size, &reg_count) == x86_64_mode_memory);
994 #define REGN 6
995 static const uint8_t arg_regs[REGN] = {
996 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
999 static int arg_prepare_reg(int idx) {
1000 if (idx == 2 || idx == 3)
1001 /* idx=2: r10, idx=3: r11 */
1002 return idx + 8;
1003 else
1004 return arg_regs[idx];
1007 /* Generate function call. The function address is pushed first, then
1008 all the parameters in call order. This functions pops all the
1009 parameters and the function address. */
1010 void gfunc_call(int nb_args)
1012 X86_64_Mode mode;
1013 CType type;
1014 int size, align, r, args_size, i, j, reg_count;
1015 int nb_reg_args = 0;
1016 int nb_sse_args = 0;
1017 int sse_reg, gen_reg;
1019 /* calculate the number of integer/float arguments */
1020 args_size = 0;
1021 for(i = 0; i < nb_args; i++) {
1022 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &reg_count);
1023 switch (mode) {
1024 case x86_64_mode_memory:
1025 case x86_64_mode_x87:
1026 args_size += size;
1027 break;
1029 case x86_64_mode_sse:
1030 nb_sse_args += reg_count;
1031 if (nb_sse_args > 8) args_size += size;
1032 break;
1034 case x86_64_mode_integer:
1035 nb_reg_args += reg_count;
1036 if (nb_reg_args > REGN) args_size += size;
1037 break;
1041 /* for struct arguments, we need to call memcpy and the function
1042 call breaks register passing arguments we are preparing.
1043 So, we process arguments which will be passed by stack first. */
1044 gen_reg = nb_reg_args;
1045 sse_reg = nb_sse_args;
1047 /* adjust stack to align SSE boundary */
1048 if (args_size &= 15) {
1049 /* fetch cpu flag before the following sub will change the value */
1050 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1051 gv(RC_INT);
1053 args_size = 16 - args_size;
1054 o(0x48);
1055 oad(0xec81, args_size); /* sub $xxx, %rsp */
1058 for(i = 0; i < nb_args; i++) {
1059 /* Swap argument to top, it will possibly be changed here,
1060 and might use more temps. All arguments must remain on the
1061 stack, so that get_reg can correctly evict some of them onto
1062 stack. We could use also use a vrott(nb_args) at the end
1063 of this loop, but this seems faster. */
1064 SValue tmp = vtop[0];
1065 vtop[0] = vtop[-i];
1066 vtop[-i] = tmp;
1067 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &reg_count);
1068 switch (mode) {
1069 case x86_64_mode_memory:
1070 /* allocate the necessary size on stack */
1071 o(0x48);
1072 oad(0xec81, size); /* sub $xxx, %rsp */
1073 /* generate structure store */
1074 r = get_reg(RC_INT);
1075 orex(1, r, 0, 0x89); /* mov %rsp, r */
1076 o(0xe0 + REG_VALUE(r));
1077 vset(&vtop->type, r | VT_LVAL, 0);
1078 vswap();
1079 vstore();
1080 args_size += size;
1081 break;
1083 case x86_64_mode_x87:
1084 gv(RC_ST0);
1085 size = LDOUBLE_SIZE;
1086 oad(0xec8148, size); /* sub $xxx, %rsp */
1087 o(0x7cdb); /* fstpt 0(%rsp) */
1088 g(0x24);
1089 g(0x00);
1090 args_size += size;
1091 break;
1093 case x86_64_mode_sse:
1094 if (sse_reg > 8) {
1095 r = gv(RC_FLOAT);
1096 o(0x50); /* push $rax */
1097 /* movq %xmm0, (%rsp) */
1098 o(0xd60f66);
1099 o(0x04 + REG_VALUE(r)*8);
1100 o(0x24);
1101 args_size += size;
1103 sse_reg -= reg_count;
1104 break;
1106 case x86_64_mode_integer:
1107 /* simple type */
1108 /* XXX: implicit cast ? */
1109 if (gen_reg > REGN) {
1110 r = gv(RC_INT);
1111 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1112 args_size += size;
1114 gen_reg -= reg_count;
1115 break;
1118 /* And swap the argument back to it's original position. */
1119 tmp = vtop[0];
1120 vtop[0] = vtop[-i];
1121 vtop[-i] = tmp;
1124 /* XXX This should be superfluous. */
1125 save_regs(0); /* save used temporary registers */
1127 /* then, we prepare register passing arguments.
1128 Note that we cannot set RDX and RCX in this loop because gv()
1129 may break these temporary registers. Let's use R10 and R11
1130 instead of them */
1131 gen_reg = nb_reg_args;
1132 sse_reg = nb_sse_args;
1133 for(i = 0; i < nb_args; i++) {
1134 mode = classify_x86_64_arg(&vtop->type, &type, &size, &reg_count);
1135 /* Alter stack entry type so that gv() knows how to treat it */
1136 vtop->type = type;
1137 switch (mode) {
1138 default:
1139 break;
1141 case x86_64_mode_sse:
1142 sse_reg -= reg_count;
1143 if (sse_reg + reg_count <= 8) {
1144 gv(RC_FRET); /* only one float register */
1145 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1146 /* movaps %xmm0, %xmmN */
1147 o(0x280f);
1148 o(0xc0 + (sse_reg << 3));
1149 if (reg_count == 2) {
1150 /* movaps %xmm1, %xmmN */
1151 o(0x280f);
1152 o(0xc1 + ((sse_reg+1) << 3));
1156 break;
1158 case x86_64_mode_integer:
1159 /* simple type */
1160 /* XXX: implicit cast ? */
1161 gen_reg -= reg_count;
1162 if (gen_reg + reg_count <= REGN) {
1163 r = gv((reg_count == 1) ? RC_INT : RC_IRET);
1164 int d = arg_prepare_reg(gen_reg);
1165 orex(1,d,r,0x89); /* mov */
1166 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1167 if (reg_count == 2) {
1168 /* Second word of two-word value should always be in rdx
1169 this case is handled via RC_IRET */
1170 assert(vtop->r2 == TREG_RDX);
1171 d = arg_prepare_reg(gen_reg+1);
1172 orex(1,d,vtop->r2,0x89); /* mov */
1173 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1176 break;
1178 vtop--;
1181 /* We shouldn't have many operands on the stack anymore, but the
1182 call address itself is still there, and it might be in %eax
1183 (or edx/ecx) currently, which the below writes would clobber.
1184 So evict all remaining operands here. */
1185 save_regs(0);
1187 /* Copy R10 and R11 into RDX and RCX, respectively */
1188 if (nb_reg_args > 2) {
1189 o(0xd2894c); /* mov %r10, %rdx */
1190 if (nb_reg_args > 3) {
1191 o(0xd9894c); /* mov %r11, %rcx */
1195 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1196 gcall_or_jmp(0);
1197 if (args_size)
1198 gadd_sp(args_size);
1199 vtop--;
1203 #define FUNC_PROLOG_SIZE 11
1205 static void push_arg_reg(int i) {
1206 loc -= 8;
1207 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1210 /* generate function prolog of type 't' */
1211 void gfunc_prolog(CType *func_type)
1213 X86_64_Mode mode;
1214 int i, addr, align, size, reg_count;
1215 int param_index, param_addr, reg_param_index, sse_param_index;
1216 Sym *sym;
1217 CType *type;
1219 sym = func_type->ref;
1220 addr = PTR_SIZE * 2;
1221 loc = 0;
1222 ind += FUNC_PROLOG_SIZE;
1223 func_sub_sp_offset = ind;
1224 func_ret_sub = 0;
1226 if (func_type->ref->c == FUNC_ELLIPSIS) {
1227 int seen_reg_num, seen_sse_num, seen_stack_size;
1228 seen_reg_num = seen_sse_num = 0;
1229 /* frame pointer and return address */
1230 seen_stack_size = PTR_SIZE * 2;
1231 /* count the number of seen parameters */
1232 sym = func_type->ref;
1233 while ((sym = sym->next) != NULL) {
1234 type = &sym->type;
1235 mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1236 switch (mode) {
1237 default:
1238 seen_stack_size += size;
1239 break;
1241 case x86_64_mode_integer:
1242 if (seen_reg_num + reg_count <= 8) {
1243 seen_reg_num += reg_count;
1244 } else {
1245 seen_reg_num = 8;
1246 seen_stack_size += size;
1248 break;
1250 case x86_64_mode_sse:
1251 if (seen_sse_num + reg_count <= 8) {
1252 seen_sse_num += reg_count;
1253 } else {
1254 seen_sse_num = 8;
1255 seen_stack_size += size;
1257 break;
1261 loc -= 16;
1262 /* movl $0x????????, -0x10(%rbp) */
1263 o(0xf045c7);
1264 gen_le32(seen_reg_num * 8);
1265 /* movl $0x????????, -0xc(%rbp) */
1266 o(0xf445c7);
1267 gen_le32(seen_sse_num * 16 + 48);
1268 /* movl $0x????????, -0x8(%rbp) */
1269 o(0xf845c7);
1270 gen_le32(seen_stack_size);
1272 /* save all register passing arguments */
1273 for (i = 0; i < 8; i++) {
1274 loc -= 16;
1275 o(0xd60f66); /* movq */
1276 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1277 /* movq $0, loc+8(%rbp) */
1278 o(0x85c748);
1279 gen_le32(loc + 8);
1280 gen_le32(0);
1282 for (i = 0; i < REGN; i++) {
1283 push_arg_reg(REGN-1-i);
1287 sym = func_type->ref;
1288 param_index = 0;
1289 reg_param_index = 0;
1290 sse_param_index = 0;
1292 /* if the function returns a structure, then add an
1293 implicit pointer parameter */
1294 func_vt = sym->type;
1295 mode = classify_x86_64_arg(&func_vt, NULL, &size, &reg_count);
1296 if (mode == x86_64_mode_memory) {
1297 push_arg_reg(reg_param_index);
1298 param_addr = loc;
1300 func_vc = loc;
1301 param_index++;
1302 reg_param_index++;
1304 /* define parameters */
1305 while ((sym = sym->next) != NULL) {
1306 type = &sym->type;
1307 mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1308 switch (mode) {
1309 case x86_64_mode_sse:
1310 if (sse_param_index + reg_count <= 8) {
1311 /* save arguments passed by register */
1312 loc -= reg_count * 8;
1313 param_addr = loc;
1314 for (i = 0; i < reg_count; ++i) {
1315 o(0xd60f66); /* movq */
1316 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1317 ++sse_param_index;
1319 } else {
1320 param_addr = addr;
1321 addr += size;
1322 sse_param_index += reg_count;
1324 break;
1326 case x86_64_mode_memory:
1327 case x86_64_mode_x87:
1328 param_addr = addr;
1329 addr += size;
1330 break;
1332 case x86_64_mode_integer: {
1333 if (reg_param_index + reg_count <= REGN) {
1334 /* save arguments passed by register */
1335 loc -= reg_count * 8;
1336 param_addr = loc;
1337 for (i = 0; i < reg_count; ++i) {
1338 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1339 ++reg_param_index;
1341 } else {
1342 param_addr = addr;
1343 addr += size;
1344 reg_param_index += reg_count;
1346 break;
1349 sym_push(sym->v & ~SYM_FIELD, type,
1350 VT_LOCAL | VT_LVAL, param_addr);
1351 param_index++;
1355 /* generate function epilog */
1356 void gfunc_epilog(void)
1358 int v, saved_ind;
1360 o(0xc9); /* leave */
1361 if (func_ret_sub == 0) {
1362 o(0xc3); /* ret */
1363 } else {
1364 o(0xc2); /* ret n */
1365 g(func_ret_sub);
1366 g(func_ret_sub >> 8);
1368 /* align local size to word & save local variables */
1369 v = (-loc + 15) & -16;
1370 saved_ind = ind;
1371 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1372 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1373 o(0xec8148); /* sub rsp, stacksize */
1374 gen_le32(v);
1375 ind = saved_ind;
1378 #endif /* not PE */
1380 /* generate a jump to a label */
1381 int gjmp(int t)
1383 return psym(0xe9, t);
1386 /* generate a jump to a fixed address */
1387 void gjmp_addr(int a)
1389 int r;
1390 r = a - ind - 2;
1391 if (r == (char)r) {
1392 g(0xeb);
1393 g(r);
1394 } else {
1395 oad(0xe9, a - ind - 5);
1399 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1400 int gtst(int inv, int t)
1402 int v, *p;
1404 v = vtop->r & VT_VALMASK;
1405 if (v == VT_CMP) {
1406 /* fast case : can jump directly since flags are set */
1407 if (vtop->c.i & 0x100)
1409 /* This was a float compare. If the parity flag is set
1410 the result was unordered. For anything except != this
1411 means false and we don't jump (anding both conditions).
1412 For != this means true (oring both).
1413 Take care about inverting the test. We need to jump
1414 to our target if the result was unordered and test wasn't NE,
1415 otherwise if unordered we don't want to jump. */
1416 vtop->c.i &= ~0x100;
1417 if (!inv == (vtop->c.i != TOK_NE))
1418 o(0x067a); /* jp +6 */
1419 else
1421 g(0x0f);
1422 t = psym(0x8a, t); /* jp t */
1425 g(0x0f);
1426 t = psym((vtop->c.i - 16) ^ inv, t);
1427 } else if (v == VT_JMP || v == VT_JMPI) {
1428 /* && or || optimization */
1429 if ((v & 1) == inv) {
1430 /* insert vtop->c jump list in t */
1431 p = &vtop->c.i;
1432 while (*p != 0)
1433 p = (int *)(cur_text_section->data + *p);
1434 *p = t;
1435 t = vtop->c.i;
1436 } else {
1437 t = gjmp(t);
1438 gsym(vtop->c.i);
1440 } else {
1441 if (is_float(vtop->type.t) ||
1442 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1443 vpushi(0);
1444 gen_op(TOK_NE);
1446 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1447 /* constant jmp optimization */
1448 if ((vtop->c.i != 0) != inv)
1449 t = gjmp(t);
1450 } else {
1451 v = gv(RC_INT);
1452 orex(0,v,v,0x85);
1453 o(0xc0 + REG_VALUE(v) * 9);
1454 g(0x0f);
1455 t = psym(0x85 ^ inv, t);
1458 vtop--;
1459 return t;
1462 /* generate an integer binary operation */
1463 void gen_opi(int op)
1465 int r, fr, opc, c;
1466 int ll, uu, cc;
1468 ll = is64_type(vtop[-1].type.t);
1469 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1470 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1472 switch(op) {
1473 case '+':
1474 case TOK_ADDC1: /* add with carry generation */
1475 opc = 0;
1476 gen_op8:
1477 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1478 /* constant case */
1479 vswap();
1480 r = gv(RC_INT);
1481 vswap();
1482 c = vtop->c.i;
1483 if (c == (char)c) {
1484 /* XXX: generate inc and dec for smaller code ? */
1485 orex(ll, r, 0, 0x83);
1486 o(0xc0 | (opc << 3) | REG_VALUE(r));
1487 g(c);
1488 } else {
1489 orex(ll, r, 0, 0x81);
1490 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1492 } else {
1493 gv2(RC_INT, RC_INT);
1494 r = vtop[-1].r;
1495 fr = vtop[0].r;
1496 orex(ll, r, fr, (opc << 3) | 0x01);
1497 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1499 vtop--;
1500 if (op >= TOK_ULT && op <= TOK_GT) {
1501 vtop->r = VT_CMP;
1502 vtop->c.i = op;
1504 break;
1505 case '-':
1506 case TOK_SUBC1: /* sub with carry generation */
1507 opc = 5;
1508 goto gen_op8;
1509 case TOK_ADDC2: /* add with carry use */
1510 opc = 2;
1511 goto gen_op8;
1512 case TOK_SUBC2: /* sub with carry use */
1513 opc = 3;
1514 goto gen_op8;
1515 case '&':
1516 opc = 4;
1517 goto gen_op8;
1518 case '^':
1519 opc = 6;
1520 goto gen_op8;
1521 case '|':
1522 opc = 1;
1523 goto gen_op8;
1524 case '*':
1525 gv2(RC_INT, RC_INT);
1526 r = vtop[-1].r;
1527 fr = vtop[0].r;
1528 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1529 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1530 vtop--;
1531 break;
1532 case TOK_SHL:
1533 opc = 4;
1534 goto gen_shift;
1535 case TOK_SHR:
1536 opc = 5;
1537 goto gen_shift;
1538 case TOK_SAR:
1539 opc = 7;
1540 gen_shift:
1541 opc = 0xc0 | (opc << 3);
1542 if (cc) {
1543 /* constant case */
1544 vswap();
1545 r = gv(RC_INT);
1546 vswap();
1547 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1548 o(opc | REG_VALUE(r));
1549 g(vtop->c.i & (ll ? 63 : 31));
1550 } else {
1551 /* we generate the shift in ecx */
1552 gv2(RC_INT, RC_RCX);
1553 r = vtop[-1].r;
1554 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1555 o(opc | REG_VALUE(r));
1557 vtop--;
1558 break;
1559 case TOK_UDIV:
1560 case TOK_UMOD:
1561 uu = 1;
1562 goto divmod;
1563 case '/':
1564 case '%':
1565 case TOK_PDIV:
1566 uu = 0;
1567 divmod:
1568 /* first operand must be in eax */
1569 /* XXX: need better constraint for second operand */
1570 gv2(RC_RAX, RC_RCX);
1571 r = vtop[-1].r;
1572 fr = vtop[0].r;
1573 vtop--;
1574 save_reg(TREG_RDX);
1575 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1576 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1577 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1578 if (op == '%' || op == TOK_UMOD)
1579 r = TREG_RDX;
1580 else
1581 r = TREG_RAX;
1582 vtop->r = r;
1583 break;
1584 default:
1585 opc = 7;
1586 goto gen_op8;
1590 void gen_opl(int op)
1592 gen_opi(op);
1595 /* generate a floating point operation 'v = t1 op t2' instruction. The
1596 two operands are guaranted to have the same floating point type */
1597 /* XXX: need to use ST1 too */
1598 void gen_opf(int op)
1600 int a, ft, fc, swapped, r;
1601 int float_type =
1602 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1604 /* convert constants to memory references */
1605 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1606 vswap();
1607 gv(float_type);
1608 vswap();
1610 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1611 gv(float_type);
1613 /* must put at least one value in the floating point register */
1614 if ((vtop[-1].r & VT_LVAL) &&
1615 (vtop[0].r & VT_LVAL)) {
1616 vswap();
1617 gv(float_type);
1618 vswap();
1620 swapped = 0;
1621 /* swap the stack if needed so that t1 is the register and t2 is
1622 the memory reference */
1623 if (vtop[-1].r & VT_LVAL) {
1624 vswap();
1625 swapped = 1;
1627 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1628 if (op >= TOK_ULT && op <= TOK_GT) {
1629 /* load on stack second operand */
1630 load(TREG_ST0, vtop);
1631 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1632 if (op == TOK_GE || op == TOK_GT)
1633 swapped = !swapped;
1634 else if (op == TOK_EQ || op == TOK_NE)
1635 swapped = 0;
1636 if (swapped)
1637 o(0xc9d9); /* fxch %st(1) */
1638 o(0xe9da); /* fucompp */
1639 o(0xe0df); /* fnstsw %ax */
1640 if (op == TOK_EQ) {
1641 o(0x45e480); /* and $0x45, %ah */
1642 o(0x40fC80); /* cmp $0x40, %ah */
1643 } else if (op == TOK_NE) {
1644 o(0x45e480); /* and $0x45, %ah */
1645 o(0x40f480); /* xor $0x40, %ah */
1646 op = TOK_NE;
1647 } else if (op == TOK_GE || op == TOK_LE) {
1648 o(0x05c4f6); /* test $0x05, %ah */
1649 op = TOK_EQ;
1650 } else {
1651 o(0x45c4f6); /* test $0x45, %ah */
1652 op = TOK_EQ;
1654 vtop--;
1655 vtop->r = VT_CMP;
1656 vtop->c.i = op;
1657 } else {
1658 /* no memory reference possible for long double operations */
1659 load(TREG_ST0, vtop);
1660 swapped = !swapped;
1662 switch(op) {
1663 default:
1664 case '+':
1665 a = 0;
1666 break;
1667 case '-':
1668 a = 4;
1669 if (swapped)
1670 a++;
1671 break;
1672 case '*':
1673 a = 1;
1674 break;
1675 case '/':
1676 a = 6;
1677 if (swapped)
1678 a++;
1679 break;
1681 ft = vtop->type.t;
1682 fc = vtop->c.ul;
1683 o(0xde); /* fxxxp %st, %st(1) */
1684 o(0xc1 + (a << 3));
1685 vtop--;
1687 } else {
1688 if (op >= TOK_ULT && op <= TOK_GT) {
1689 /* if saved lvalue, then we must reload it */
1690 r = vtop->r;
1691 fc = vtop->c.ul;
1692 if ((r & VT_VALMASK) == VT_LLOCAL) {
1693 SValue v1;
1694 r = get_reg(RC_INT);
1695 v1.type.t = VT_PTR;
1696 v1.r = VT_LOCAL | VT_LVAL;
1697 v1.c.ul = fc;
1698 load(r, &v1);
1699 fc = 0;
1702 if (op == TOK_EQ || op == TOK_NE) {
1703 swapped = 0;
1704 } else {
1705 if (op == TOK_LE || op == TOK_LT)
1706 swapped = !swapped;
1707 if (op == TOK_LE || op == TOK_GE) {
1708 op = 0x93; /* setae */
1709 } else {
1710 op = 0x97; /* seta */
1714 if (swapped) {
1715 gv(RC_FLOAT);
1716 vswap();
1718 assert(!(vtop[-1].r & VT_LVAL));
1720 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1721 o(0x66);
1722 o(0x2e0f); /* ucomisd */
1724 if (vtop->r & VT_LVAL) {
1725 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1726 } else {
1727 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1730 vtop--;
1731 vtop->r = VT_CMP;
1732 vtop->c.i = op | 0x100;
1733 } else {
1734 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1735 switch(op) {
1736 default:
1737 case '+':
1738 a = 0;
1739 break;
1740 case '-':
1741 a = 4;
1742 break;
1743 case '*':
1744 a = 1;
1745 break;
1746 case '/':
1747 a = 6;
1748 break;
1750 ft = vtop->type.t;
1751 fc = vtop->c.ul;
1752 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1754 r = vtop->r;
1755 /* if saved lvalue, then we must reload it */
1756 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1757 SValue v1;
1758 r = get_reg(RC_INT);
1759 v1.type.t = VT_PTR;
1760 v1.r = VT_LOCAL | VT_LVAL;
1761 v1.c.ul = fc;
1762 load(r, &v1);
1763 fc = 0;
1766 assert(!(vtop[-1].r & VT_LVAL));
1767 if (swapped) {
1768 assert(vtop->r & VT_LVAL);
1769 gv(RC_FLOAT);
1770 vswap();
1773 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1774 o(0xf2);
1775 } else {
1776 o(0xf3);
1778 o(0x0f);
1779 o(0x58 + a);
1781 if (vtop->r & VT_LVAL) {
1782 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1783 } else {
1784 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1787 vtop--;
1792 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1793 and 'long long' cases. */
1794 void gen_cvt_itof(int t)
1796 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1797 save_reg(TREG_ST0);
1798 gv(RC_INT);
1799 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1800 /* signed long long to float/double/long double (unsigned case
1801 is handled generically) */
1802 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1803 o(0x242cdf); /* fildll (%rsp) */
1804 o(0x08c48348); /* add $8, %rsp */
1805 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1806 (VT_INT | VT_UNSIGNED)) {
1807 /* unsigned int to float/double/long double */
1808 o(0x6a); /* push $0 */
1809 g(0x00);
1810 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1811 o(0x242cdf); /* fildll (%rsp) */
1812 o(0x10c48348); /* add $16, %rsp */
1813 } else {
1814 /* int to float/double/long double */
1815 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1816 o(0x2404db); /* fildl (%rsp) */
1817 o(0x08c48348); /* add $8, %rsp */
1819 vtop->r = TREG_ST0;
1820 } else {
1821 int r = get_reg(RC_FLOAT);
1822 gv(RC_INT);
1823 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1824 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1825 (VT_INT | VT_UNSIGNED) ||
1826 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1827 o(0x48); /* REX */
1829 o(0x2a0f);
1830 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1831 vtop->r = r;
1835 /* convert from one floating point type to another */
1836 void gen_cvt_ftof(int t)
1838 int ft, bt, tbt;
1840 ft = vtop->type.t;
1841 bt = ft & VT_BTYPE;
1842 tbt = t & VT_BTYPE;
1844 if (bt == VT_FLOAT) {
1845 gv(RC_FLOAT);
1846 if (tbt == VT_DOUBLE) {
1847 o(0x140f); /* unpcklps */
1848 o(0xc0 + REG_VALUE(vtop->r)*9);
1849 o(0x5a0f); /* cvtps2pd */
1850 o(0xc0 + REG_VALUE(vtop->r)*9);
1851 } else if (tbt == VT_LDOUBLE) {
1852 save_reg(RC_ST0);
1853 /* movss %xmm0,-0x10(%rsp) */
1854 o(0x110ff3);
1855 o(0x44 + REG_VALUE(vtop->r)*8);
1856 o(0xf024);
1857 o(0xf02444d9); /* flds -0x10(%rsp) */
1858 vtop->r = TREG_ST0;
1860 } else if (bt == VT_DOUBLE) {
1861 gv(RC_FLOAT);
1862 if (tbt == VT_FLOAT) {
1863 o(0x140f66); /* unpcklpd */
1864 o(0xc0 + REG_VALUE(vtop->r)*9);
1865 o(0x5a0f66); /* cvtpd2ps */
1866 o(0xc0 + REG_VALUE(vtop->r)*9);
1867 } else if (tbt == VT_LDOUBLE) {
1868 save_reg(RC_ST0);
1869 /* movsd %xmm0,-0x10(%rsp) */
1870 o(0x110ff2);
1871 o(0x44 + REG_VALUE(vtop->r)*8);
1872 o(0xf024);
1873 o(0xf02444dd); /* fldl -0x10(%rsp) */
1874 vtop->r = TREG_ST0;
1876 } else {
1877 gv(RC_ST0);
1878 int r = get_reg(RC_FLOAT);
1879 if (tbt == VT_DOUBLE) {
1880 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1881 /* movsd -0x10(%rsp),%xmm0 */
1882 o(0x100ff2);
1883 o(0x44 + REG_VALUE(r)*8);
1884 o(0xf024);
1885 vtop->r = r;
1886 } else if (tbt == VT_FLOAT) {
1887 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1888 /* movss -0x10(%rsp),%xmm0 */
1889 o(0x100ff3);
1890 o(0x44 + REG_VALUE(r)*8);
1891 o(0xf024);
1892 vtop->r = r;
1897 /* convert fp to int 't' type */
1898 void gen_cvt_ftoi(int t)
1900 int ft, bt, size, r;
1901 ft = vtop->type.t;
1902 bt = ft & VT_BTYPE;
1903 if (bt == VT_LDOUBLE) {
1904 gen_cvt_ftof(VT_DOUBLE);
1905 bt = VT_DOUBLE;
1908 gv(RC_FLOAT);
1909 if (t != VT_INT)
1910 size = 8;
1911 else
1912 size = 4;
1914 r = get_reg(RC_INT);
1915 if (bt == VT_FLOAT) {
1916 o(0xf3);
1917 } else if (bt == VT_DOUBLE) {
1918 o(0xf2);
1919 } else {
1920 assert(0);
1922 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1923 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
1924 vtop->r = r;
1927 /* computed goto support */
1928 void ggoto(void)
1930 gcall_or_jmp(1);
1931 vtop--;
1934 /* end of x86-64 code generator */
1935 /*************************************************************/
1936 #endif /* ! TARGET_DEFS_ONLY */
1937 /******************************************************/