4b9d28d7b70f652a161f38645b3352721b02810d
[tinycc.git] / x86_64-gen.c
blob4b9d28d7b70f652a161f38645b3352721b02810d
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 24
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSI = 6,
61 TREG_RDI = 7,
63 TREG_R8 = 8,
64 TREG_R9 = 9,
65 TREG_R10 = 10,
66 TREG_R11 = 11,
68 TREG_XMM0 = 16,
69 TREG_XMM1 = 17,
70 TREG_XMM2 = 18,
71 TREG_XMM3 = 19,
72 TREG_XMM4 = 20,
73 TREG_XMM5 = 21,
74 TREG_XMM6 = 22,
75 TREG_XMM7 = 23,
77 TREG_ST0 = 4, // SP slot won't be used
79 TREG_MEM = 0x20,
82 #define REX_BASE(reg) (((reg) >> 3) & 1)
83 #define REG_VALUE(reg) ((reg) & 7)
85 /* return registers for function */
86 #define REG_IRET TREG_RAX /* single word int return register */
87 #define REG_LRET TREG_RDX /* second word return register (for long long) */
88 #define REG_FRET TREG_XMM0 /* float return register */
89 #define REG_QRET TREG_XMM1 /* second float return register */
91 /* defined if function parameters must be evaluated in reverse order */
92 #define INVERT_FUNC_PARAMS
94 /* pointer size, in bytes */
95 #define PTR_SIZE 8
97 /* long double size and alignment, in bytes */
98 #define LDOUBLE_SIZE 16
99 #define LDOUBLE_ALIGN 8
100 /* maximum alignment (for aligned attribute support) */
101 #define MAX_ALIGN 8
103 /******************************************************/
104 /* ELF defines */
106 #define EM_TCC_TARGET EM_X86_64
108 /* relocation type for 32 bit data relocation */
109 #define R_DATA_32 R_X86_64_32
110 #define R_DATA_PTR R_X86_64_64
111 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
112 #define R_COPY R_X86_64_COPY
114 #define ELF_START_ADDR 0x08048000
115 #define ELF_PAGE_SIZE 0x1000
117 /******************************************************/
118 #else /* ! TARGET_DEFS_ONLY */
119 /******************************************************/
120 #include "tcc.h"
121 #include <assert.h>
123 ST_DATA const int reg_classes[NB_REGS] = {
124 /* eax */ RC_INT | RC_RAX,
125 /* ecx */ RC_INT | RC_RCX,
126 /* edx */ RC_INT | RC_RDX,
128 /* st0 */ RC_ST0,
132 RC_R8,
133 RC_R9,
134 RC_R10,
135 RC_R11,
140 /* xmm0 */ RC_FLOAT | RC_XMM0,
141 /* xmm1 */ RC_FLOAT | RC_XMM1,
142 /* xmm2 */ RC_FLOAT | RC_XMM2,
143 /* xmm3 */ RC_FLOAT | RC_XMM3,
144 /* xmm4 */ RC_FLOAT | RC_XMM4,
145 /* xmm5 */ RC_FLOAT | RC_XMM5,
146 /* xmm6 */ RC_FLOAT | RC_XMM6,
147 /* xmm7 */ RC_FLOAT | RC_XMM7,
150 static unsigned long func_sub_sp_offset;
151 static int func_ret_sub;
153 /* XXX: make it faster ? */
154 void g(int c)
156 int ind1;
157 ind1 = ind + 1;
158 if (ind1 > cur_text_section->data_allocated)
159 section_realloc(cur_text_section, ind1);
160 cur_text_section->data[ind] = c;
161 ind = ind1;
164 void o(unsigned int c)
166 while (c) {
167 g(c);
168 c = c >> 8;
172 void gen_le16(int v)
174 g(v);
175 g(v >> 8);
178 void gen_le32(int c)
180 g(c);
181 g(c >> 8);
182 g(c >> 16);
183 g(c >> 24);
186 void gen_le64(int64_t c)
188 g(c);
189 g(c >> 8);
190 g(c >> 16);
191 g(c >> 24);
192 g(c >> 32);
193 g(c >> 40);
194 g(c >> 48);
195 g(c >> 56);
198 void orex(int ll, int r, int r2, int b)
200 if ((r & VT_VALMASK) >= VT_CONST)
201 r = 0;
202 if ((r2 & VT_VALMASK) >= VT_CONST)
203 r2 = 0;
204 if (ll || REX_BASE(r) || REX_BASE(r2))
205 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
206 o(b);
209 /* output a symbol and patch all calls to it */
210 void gsym_addr(int t, int a)
212 int n, *ptr;
213 while (t) {
214 ptr = (int *)(cur_text_section->data + t);
215 n = *ptr; /* next value */
216 *ptr = a - t - 4;
217 t = n;
221 void gsym(int t)
223 gsym_addr(t, ind);
226 /* psym is used to put an instruction with a data field which is a
227 reference to a symbol. It is in fact the same as oad ! */
228 #define psym oad
230 static int is64_type(int t)
232 return ((t & VT_BTYPE) == VT_PTR ||
233 (t & VT_BTYPE) == VT_FUNC ||
234 (t & VT_BTYPE) == VT_LLONG);
237 static int is_sse_float(int t) {
238 int bt;
239 bt = t & VT_BTYPE;
240 return bt == VT_DOUBLE || bt == VT_FLOAT;
244 /* instruction + 4 bytes data. Return the address of the data */
245 ST_FUNC int oad(int c, int s)
247 int ind1;
249 o(c);
250 ind1 = ind + 4;
251 if (ind1 > cur_text_section->data_allocated)
252 section_realloc(cur_text_section, ind1);
253 *(int *)(cur_text_section->data + ind) = s;
254 s = ind;
255 ind = ind1;
256 return s;
259 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
261 if (r & VT_SYM)
262 greloc(cur_text_section, sym, ind, R_X86_64_32);
263 gen_le32(c);
266 /* output constant with relocation if 'r & VT_SYM' is true */
267 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
269 if (r & VT_SYM)
270 greloc(cur_text_section, sym, ind, R_X86_64_64);
271 gen_le64(c);
274 /* output constant with relocation if 'r & VT_SYM' is true */
275 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
277 if (r & VT_SYM)
278 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
279 gen_le32(c-4);
282 /* output got address with relocation */
283 static void gen_gotpcrel(int r, Sym *sym, int c)
285 #ifndef TCC_TARGET_PE
286 Section *sr;
287 ElfW(Rela) *rel;
288 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
289 sr = cur_text_section->reloc;
290 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
291 rel->r_addend = -4;
292 #else
293 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r,
294 cur_text_section->data[ind-3],
295 cur_text_section->data[ind-2],
296 cur_text_section->data[ind-1]
298 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
299 #endif
300 gen_le32(0);
301 if (c) {
302 /* we use add c, %xxx for displacement */
303 orex(1, r, 0, 0x81);
304 o(0xc0 + REG_VALUE(r));
305 gen_le32(c);
309 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
311 op_reg = REG_VALUE(op_reg) << 3;
312 if ((r & VT_VALMASK) == VT_CONST) {
313 /* constant memory reference */
314 o(0x05 | op_reg);
315 if (is_got) {
316 gen_gotpcrel(r, sym, c);
317 } else {
318 gen_addrpc32(r, sym, c);
320 } else if ((r & VT_VALMASK) == VT_LOCAL) {
321 /* currently, we use only ebp as base */
322 if (c == (char)c) {
323 /* short reference */
324 o(0x45 | op_reg);
325 g(c);
326 } else {
327 oad(0x85 | op_reg, c);
329 } else if ((r & VT_VALMASK) >= TREG_MEM) {
330 if (c) {
331 g(0x80 | op_reg | REG_VALUE(r));
332 gen_le32(c);
333 } else {
334 g(0x00 | op_reg | REG_VALUE(r));
336 } else {
337 g(0x00 | op_reg | REG_VALUE(r));
341 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
342 opcode bits */
343 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
345 gen_modrm_impl(op_reg, r, sym, c, 0);
348 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
349 opcode bits */
350 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
352 int is_got;
353 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
354 orex(1, r, op_reg, opcode);
355 gen_modrm_impl(op_reg, r, sym, c, is_got);
359 /* load 'r' from value 'sv' */
360 void load(int r, SValue *sv)
362 int v, t, ft, fc, fr;
363 SValue v1;
365 #ifdef TCC_TARGET_PE
366 SValue v2;
367 sv = pe_getimport(sv, &v2);
368 #endif
370 fr = sv->r;
371 ft = sv->type.t;
372 fc = sv->c.ul;
374 #ifndef TCC_TARGET_PE
375 /* we use indirect access via got */
376 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
377 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
378 /* use the result register as a temporal register */
379 int tr = r | TREG_MEM;
380 if (is_float(ft)) {
381 /* we cannot use float registers as a temporal register */
382 tr = get_reg(RC_INT) | TREG_MEM;
384 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
386 /* load from the temporal register */
387 fr = tr | VT_LVAL;
389 #endif
391 v = fr & VT_VALMASK;
392 if (fr & VT_LVAL) {
393 int b, ll;
394 if (v == VT_LLOCAL) {
395 v1.type.t = VT_PTR;
396 v1.r = VT_LOCAL | VT_LVAL;
397 v1.c.ul = fc;
398 fr = r;
399 if (!(reg_classes[fr] & RC_INT))
400 fr = get_reg(RC_INT);
401 load(fr, &v1);
403 ll = 0;
404 if ((ft & VT_BTYPE) == VT_FLOAT) {
405 b = 0x6e0f66;
406 r = REG_VALUE(r); /* movd */
407 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
408 b = 0x7e0ff3; /* movq */
409 r = REG_VALUE(r);
410 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
411 b = 0xdb, r = 5; /* fldt */
412 } else if ((ft & VT_TYPE) == VT_BYTE) {
413 b = 0xbe0f; /* movsbl */
414 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
415 b = 0xb60f; /* movzbl */
416 } else if ((ft & VT_TYPE) == VT_SHORT) {
417 b = 0xbf0f; /* movswl */
418 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
419 b = 0xb70f; /* movzwl */
420 } else {
421 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
422 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
423 || ((ft & VT_BTYPE) == VT_FUNC));
424 ll = is64_type(ft);
425 b = 0x8b;
427 if (ll) {
428 gen_modrm64(b, r, fr, sv->sym, fc);
429 } else {
430 orex(ll, fr, r, b);
431 gen_modrm(r, fr, sv->sym, fc);
433 } else {
434 if (v == VT_CONST) {
435 if (fr & VT_SYM) {
436 #ifdef TCC_TARGET_PE
437 orex(1,0,r,0x8d);
438 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
439 gen_addrpc32(fr, sv->sym, fc);
440 #else
441 if (sv->sym->type.t & VT_STATIC) {
442 orex(1,0,r,0x8d);
443 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
444 gen_addrpc32(fr, sv->sym, fc);
445 } else {
446 orex(1,0,r,0x8b);
447 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
448 gen_gotpcrel(r, sv->sym, fc);
450 #endif
451 } else if (is64_type(ft)) {
452 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
453 gen_le64(sv->c.ull);
454 } else {
455 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
456 gen_le32(fc);
458 } else if (v == VT_LOCAL) {
459 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
460 gen_modrm(r, VT_LOCAL, sv->sym, fc);
461 } else if (v == VT_CMP) {
462 orex(0,r,0,0);
463 if ((fc & ~0x100) != TOK_NE)
464 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
465 else
466 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
467 if (fc & 0x100)
469 /* This was a float compare. If the parity bit is
470 set the result was unordered, meaning false for everything
471 except TOK_NE, and true for TOK_NE. */
472 fc &= ~0x100;
473 o(0x037a + (REX_BASE(r) << 8));
475 orex(0,r,0, 0x0f); /* setxx %br */
476 o(fc);
477 o(0xc0 + REG_VALUE(r));
478 } else if (v == VT_JMP || v == VT_JMPI) {
479 t = v & 1;
480 orex(0,r,0,0);
481 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
482 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
483 gsym(fc);
484 orex(0,r,0,0);
485 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
486 } else if (v != r) {
487 if ((r == TREG_XMM0) || (r == TREG_XMM1)) {
488 if (v == TREG_ST0) {
489 /* gen_cvt_ftof(VT_DOUBLE); */
490 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
491 /* movsd -0x10(%rsp),%xmmN */
492 o(0x100ff2);
493 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
494 o(0xf024);
495 } else {
496 assert((v == TREG_XMM0) || (v == TREG_XMM1));
497 if ((ft & VT_BTYPE) == VT_FLOAT) {
498 o(0x100ff3);
499 } else {
500 assert((ft & VT_BTYPE) == VT_DOUBLE);
501 o(0x100ff2);
503 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
505 } else if (r == TREG_ST0) {
506 assert((v == TREG_XMM0) || (v == TREG_XMM1));
507 /* gen_cvt_ftof(VT_LDOUBLE); */
508 /* movsd %xmmN,-0x10(%rsp) */
509 o(0x110ff2);
510 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
511 o(0xf024);
512 o(0xf02444dd); /* fldl -0x10(%rsp) */
513 } else {
514 orex(1,r,v, 0x89);
515 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
521 /* store register 'r' in lvalue 'v' */
522 void store(int r, SValue *v)
524 int fr, bt, ft, fc;
525 int op64 = 0;
526 /* store the REX prefix in this variable when PIC is enabled */
527 int pic = 0;
529 #ifdef TCC_TARGET_PE
530 SValue v2;
531 v = pe_getimport(v, &v2);
532 #endif
534 ft = v->type.t;
535 fc = v->c.ul;
536 fr = v->r & VT_VALMASK;
537 bt = ft & VT_BTYPE;
539 #ifndef TCC_TARGET_PE
540 /* we need to access the variable via got */
541 if (fr == VT_CONST && (v->r & VT_SYM)) {
542 /* mov xx(%rip), %r11 */
543 o(0x1d8b4c);
544 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
545 pic = is64_type(bt) ? 0x49 : 0x41;
547 #endif
549 /* XXX: incorrect if float reg to reg */
550 if (bt == VT_FLOAT) {
551 o(0x66);
552 o(pic);
553 o(0x7e0f); /* movd */
554 r = REG_VALUE(r);
555 } else if (bt == VT_DOUBLE) {
556 o(0x66);
557 o(pic);
558 o(0xd60f); /* movq */
559 r = REG_VALUE(r);
560 } else if (bt == VT_LDOUBLE) {
561 o(0xc0d9); /* fld %st(0) */
562 o(pic);
563 o(0xdb); /* fstpt */
564 r = 7;
565 } else {
566 if (bt == VT_SHORT)
567 o(0x66);
568 o(pic);
569 if (bt == VT_BYTE || bt == VT_BOOL)
570 orex(0, 0, r, 0x88);
571 else if (is64_type(bt))
572 op64 = 0x89;
573 else
574 orex(0, 0, r, 0x89);
576 if (pic) {
577 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
578 if (op64)
579 o(op64);
580 o(3 + (r << 3));
581 } else if (op64) {
582 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
583 gen_modrm64(op64, r, v->r, v->sym, fc);
584 } else if (fr != r) {
585 /* XXX: don't we really come here? */
586 abort();
587 o(0xc0 + fr + r * 8); /* mov r, fr */
589 } else {
590 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
591 gen_modrm(r, v->r, v->sym, fc);
592 } else if (fr != r) {
593 /* XXX: don't we really come here? */
594 abort();
595 o(0xc0 + fr + r * 8); /* mov r, fr */
600 /* 'is_jmp' is '1' if it is a jump */
601 static void gcall_or_jmp(int is_jmp)
603 int r;
604 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
605 /* constant case */
606 if (vtop->r & VT_SYM) {
607 /* relocation case */
608 greloc(cur_text_section, vtop->sym,
609 ind + 1, R_X86_64_PC32);
610 } else {
611 /* put an empty PC32 relocation */
612 put_elf_reloc(symtab_section, cur_text_section,
613 ind + 1, R_X86_64_PC32, 0);
615 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
616 } else {
617 /* otherwise, indirect call */
618 r = TREG_R11;
619 load(r, vtop);
620 o(0x41); /* REX */
621 o(0xff); /* call/jmp *r */
622 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
626 #ifdef TCC_TARGET_PE
628 #define REGN 4
629 static const uint8_t arg_regs[] = {
630 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
633 static int func_scratch;
635 /* Generate function call. The function address is pushed first, then
636 all the parameters in call order. This functions pops all the
637 parameters and the function address. */
639 void gen_offs_sp(int b, int r, int d)
641 orex(1,0,r & 0x100 ? 0 : r, b);
642 if (d == (char)d) {
643 o(0x2444 | (REG_VALUE(r) << 3));
644 g(d);
645 } else {
646 o(0x2484 | (REG_VALUE(r) << 3));
647 gen_le32(d);
651 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
652 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
653 *ret_align = 1; // Never have to re-align return values for x86-64
654 return 1;
657 void gfunc_call(int nb_args)
659 int size, align, r, args_size, i, d, j, bt, struct_size;
660 int nb_reg_args, gen_reg;
662 nb_reg_args = nb_args;
663 args_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;
665 /* for struct arguments, we need to call memcpy and the function
666 call breaks register passing arguments we are preparing.
667 So, we process arguments which will be passed by stack first. */
668 struct_size = args_size;
669 for(i = 0; i < nb_args; i++) {
670 SValue *sv = &vtop[-i];
671 bt = (sv->type.t & VT_BTYPE);
672 if (bt == VT_STRUCT) {
673 size = type_size(&sv->type, &align);
674 /* align to stack align size */
675 size = (size + 15) & ~15;
676 /* generate structure store */
677 r = get_reg(RC_INT);
678 gen_offs_sp(0x8d, r, struct_size);
679 struct_size += size;
681 /* generate memcpy call */
682 vset(&sv->type, r | VT_LVAL, 0);
683 vpushv(sv);
684 vstore();
685 --vtop;
687 } else if (bt == VT_LDOUBLE) {
689 gv(RC_ST0);
690 gen_offs_sp(0xdb, 0x107, struct_size);
691 struct_size += 16;
696 if (func_scratch < struct_size)
697 func_scratch = struct_size;
698 #if 1
699 for (i = 0; i < REGN; ++i)
700 save_reg(arg_regs[i]);
701 save_reg(TREG_RAX);
702 #endif
703 gen_reg = nb_reg_args;
704 struct_size = args_size;
706 for(i = 0; i < nb_args; i++) {
707 bt = (vtop->type.t & VT_BTYPE);
709 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
710 if (bt == VT_LDOUBLE)
711 size = 16;
712 else
713 size = type_size(&vtop->type, &align);
714 /* align to stack align size */
715 size = (size + 15) & ~15;
716 j = --gen_reg;
717 if (j >= REGN) {
718 d = TREG_RAX;
719 gen_offs_sp(0x8d, d, struct_size);
720 gen_offs_sp(0x89, d, j*8);
721 } else {
722 d = arg_regs[j];
723 gen_offs_sp(0x8d, d, struct_size);
725 struct_size += size;
727 } else if (is_sse_float(vtop->type.t)) {
728 gv(RC_XMM0); /* only one float register */
729 j = --gen_reg;
730 if (j >= REGN) {
731 /* movq %xmm0, j*8(%rsp) */
732 gen_offs_sp(0xd60f66, 0x100, j*8);
733 } else {
734 /* movaps %xmm0, %xmmN */
735 o(0x280f);
736 o(0xc0 + (j << 3));
737 d = arg_regs[j];
738 /* mov %xmm0, %rxx */
739 o(0x66);
740 orex(1,d,0, 0x7e0f);
741 o(0xc0 + REG_VALUE(d));
743 } else {
744 j = --gen_reg;
745 if (j >= REGN) {
746 r = gv(RC_INT);
747 gen_offs_sp(0x89, r, j*8);
748 } else {
749 d = arg_regs[j];
750 if (d < NB_REGS) {
751 gv(reg_classes[d] & ~RC_INT);
752 } else {
753 r = gv(RC_INT);
754 if (d != r) {
755 orex(1,d,r, 0x89);
756 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8);
762 vtop--;
764 save_regs(0);
765 gcall_or_jmp(0);
766 vtop--;
770 #define FUNC_PROLOG_SIZE 11
772 /* generate function prolog of type 't' */
773 void gfunc_prolog(CType *func_type)
775 int addr, reg_param_index, bt;
776 Sym *sym;
777 CType *type;
779 func_ret_sub = 0;
780 func_scratch = 0;
781 loc = 0;
783 addr = PTR_SIZE * 2;
784 ind += FUNC_PROLOG_SIZE;
785 func_sub_sp_offset = ind;
786 reg_param_index = 0;
788 sym = func_type->ref;
790 /* if the function returns a structure, then add an
791 implicit pointer parameter */
792 func_vt = sym->type;
793 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
794 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
795 reg_param_index++;
796 addr += PTR_SIZE;
799 /* define parameters */
800 while ((sym = sym->next) != NULL) {
801 type = &sym->type;
802 bt = type->t & VT_BTYPE;
803 if (reg_param_index < REGN) {
804 /* save arguments passed by register */
805 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
807 if (bt == VT_STRUCT || bt == VT_LDOUBLE) {
808 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
809 } else {
810 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
812 reg_param_index++;
813 addr += PTR_SIZE;
816 while (reg_param_index < REGN) {
817 if (func_type->ref->c == FUNC_ELLIPSIS)
818 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
819 reg_param_index++;
820 addr += PTR_SIZE;
824 /* generate function epilog */
825 void gfunc_epilog(void)
827 int v, saved_ind;
829 o(0xc9); /* leave */
830 if (func_ret_sub == 0) {
831 o(0xc3); /* ret */
832 } else {
833 o(0xc2); /* ret n */
834 g(func_ret_sub);
835 g(func_ret_sub >> 8);
838 saved_ind = ind;
839 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
840 /* align local size to word & save local variables */
841 v = (func_scratch + -loc + 15) & -16;
843 if (v >= 4096) {
844 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
845 oad(0xb8, v); /* mov stacksize, %eax */
846 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
847 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
848 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
849 } else {
850 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
851 o(0xec8148); /* sub rsp, stacksize */
852 gen_le32(v);
855 cur_text_section->data_offset = saved_ind;
856 pe_add_unwind_data(ind, saved_ind, v);
857 ind = cur_text_section->data_offset;
860 #else
862 static void gadd_sp(int val)
864 if (val == (char)val) {
865 o(0xc48348);
866 g(val);
867 } else {
868 oad(0xc48148, val); /* add $xxx, %rsp */
872 typedef enum X86_64_Mode {
873 x86_64_mode_none,
874 x86_64_mode_memory,
875 x86_64_mode_integer,
876 x86_64_mode_sse,
877 x86_64_mode_x87
878 } X86_64_Mode;
880 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) {
881 if (a == b)
882 return a;
883 else if (a == x86_64_mode_none)
884 return b;
885 else if (b == x86_64_mode_none)
886 return a;
887 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
888 return x86_64_mode_memory;
889 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
890 return x86_64_mode_integer;
891 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
892 return x86_64_mode_memory;
893 else
894 return x86_64_mode_sse;
897 static X86_64_Mode classify_x86_64_inner(CType *ty) {
898 X86_64_Mode mode;
899 Sym *f;
901 switch (ty->t & VT_BTYPE) {
902 case VT_VOID: return x86_64_mode_none;
904 case VT_INT:
905 case VT_BYTE:
906 case VT_SHORT:
907 case VT_LLONG:
908 case VT_BOOL:
909 case VT_PTR:
910 case VT_FUNC:
911 case VT_ENUM: return x86_64_mode_integer;
913 case VT_FLOAT:
914 case VT_DOUBLE: return x86_64_mode_sse;
916 case VT_LDOUBLE: return x86_64_mode_x87;
918 case VT_STRUCT:
919 f = ty->ref;
921 // Detect union
922 if (f->next && (f->c == f->next->c))
923 return x86_64_mode_memory;
925 mode = x86_64_mode_none;
926 for (; f; f = f->next)
927 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
929 return mode;
932 assert(0);
935 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *reg_count) {
936 X86_64_Mode mode;
937 int size, align, ret_t;
939 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
940 *psize = 8;
941 *reg_count = 1;
942 ret_t = ty->t;
943 mode = x86_64_mode_integer;
944 } else {
945 size = type_size(ty, &align);
946 *psize = (size + 7) & ~7;
948 if (size > 16) {
949 mode = x86_64_mode_memory;
950 } else {
951 mode = classify_x86_64_inner(ty);
952 switch (mode) {
953 case x86_64_mode_integer:
954 if (size > 8) {
955 *reg_count = 2;
956 ret_t = VT_QLONG;
957 } else {
958 *reg_count = 1;
959 ret_t = (size > 4) ? VT_LLONG : VT_INT;
961 break;
963 case x86_64_mode_x87:
964 *reg_count = 1;
965 ret_t = VT_LDOUBLE;
966 break;
968 case x86_64_mode_sse:
969 if (size > 8) {
970 *reg_count = 2;
971 ret_t = VT_QFLOAT;
972 } else {
973 *reg_count = 1;
974 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
976 break;
981 if (ret) {
982 ret->ref = NULL;
983 ret->t = ret_t;
986 return mode;
989 ST_FUNC int classify_x86_64_va_arg(CType *ty) {
990 /* This definition must be synced with stdarg.h */
991 enum __va_arg_type {
992 __va_gen_reg, __va_float_reg, __va_stack
994 int size, reg_count;
995 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &reg_count);
996 switch (mode) {
997 default: return __va_stack;
998 case x86_64_mode_integer: return __va_gen_reg;
999 case x86_64_mode_sse: return __va_float_reg;
1003 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
1004 int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
1005 int size, reg_count;
1006 *ret_align = 1; // Never have to re-align return values for x86-64
1007 return (classify_x86_64_arg(vt, ret, &size, &reg_count) == x86_64_mode_memory);
1010 #define REGN 6
1011 static const uint8_t arg_regs[REGN] = {
1012 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1015 static int arg_prepare_reg(int idx) {
1016 if (idx == 2 || idx == 3)
1017 /* idx=2: r10, idx=3: r11 */
1018 return idx + 8;
1019 else
1020 return arg_regs[idx];
1023 /* Generate function call. The function address is pushed first, then
1024 all the parameters in call order. This functions pops all the
1025 parameters and the function address. */
1026 void gfunc_call(int nb_args)
1028 X86_64_Mode mode;
1029 CType type;
1030 int size, align, r, args_size, i, j, reg_count;
1031 int nb_reg_args = 0;
1032 int nb_sse_args = 0;
1033 int sse_reg, gen_reg;
1035 /* calculate the number of integer/float arguments */
1036 args_size = 0;
1037 for(i = 0; i < nb_args; i++) {
1038 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &reg_count);
1039 switch (mode) {
1040 case x86_64_mode_memory:
1041 case x86_64_mode_x87:
1042 args_size += size;
1043 break;
1045 case x86_64_mode_sse:
1046 nb_sse_args += reg_count;
1047 if (nb_sse_args > 8) args_size += size;
1048 break;
1050 case x86_64_mode_integer:
1051 nb_reg_args += reg_count;
1052 if (nb_reg_args > REGN) args_size += size;
1053 break;
1057 /* for struct arguments, we need to call memcpy and the function
1058 call breaks register passing arguments we are preparing.
1059 So, we process arguments which will be passed by stack first. */
1060 gen_reg = nb_reg_args;
1061 sse_reg = nb_sse_args;
1063 /* adjust stack to align SSE boundary */
1064 if (args_size &= 15) {
1065 /* fetch cpu flag before the following sub will change the value */
1066 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1067 gv(RC_INT);
1069 args_size = 16 - args_size;
1070 o(0x48);
1071 oad(0xec81, args_size); /* sub $xxx, %rsp */
1074 for(i = 0; i < nb_args;) {
1075 /* Swap argument to top, it will possibly be changed here,
1076 and might use more temps. At the end of the loop we keep
1077 in on the stack and swap it back to its original position
1078 if it is a register. */
1079 SValue tmp = vtop[0];
1080 vtop[0] = vtop[-i];
1081 vtop[-i] = tmp;
1083 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &reg_count);
1085 int arg_stored = 1;
1086 switch (vtop->type.t & VT_BTYPE) {
1087 case VT_STRUCT:
1088 if (mode == x86_64_mode_sse) {
1089 if (sse_reg > 8)
1090 sse_reg -= reg_count;
1091 else
1092 arg_stored = 0;
1093 } else if (mode == x86_64_mode_integer) {
1094 if (gen_reg > REGN)
1095 gen_reg -= reg_count;
1096 else
1097 arg_stored = 0;
1100 if (arg_stored) {
1101 /* allocate the necessary size on stack */
1102 o(0x48);
1103 oad(0xec81, size); /* sub $xxx, %rsp */
1104 /* generate structure store */
1105 r = get_reg(RC_INT);
1106 orex(1, r, 0, 0x89); /* mov %rsp, r */
1107 o(0xe0 + REG_VALUE(r));
1108 vset(&vtop->type, r | VT_LVAL, 0);
1109 vswap();
1110 vstore();
1111 args_size += size;
1113 break;
1115 case VT_LDOUBLE:
1116 gv(RC_ST0);
1117 size = LDOUBLE_SIZE;
1118 oad(0xec8148, size); /* sub $xxx, %rsp */
1119 o(0x7cdb); /* fstpt 0(%rsp) */
1120 g(0x24);
1121 g(0x00);
1122 args_size += size;
1123 break;
1125 case VT_FLOAT:
1126 case VT_DOUBLE:
1127 assert(mode == x86_64_mode_sse);
1128 if (sse_reg > 8) {
1129 --sse_reg;
1130 r = gv(RC_FLOAT);
1131 o(0x50); /* push $rax */
1132 /* movq %xmm0, (%rsp) */
1133 o(0xd60f66);
1134 o(0x04 + REG_VALUE(r)*8);
1135 o(0x24);
1136 args_size += size;
1137 } else {
1138 arg_stored = 0;
1140 break;
1142 default:
1143 assert(mode == x86_64_mode_integer);
1144 /* simple type */
1145 /* XXX: implicit cast ? */
1146 if (gen_reg > REGN) {
1147 --gen_reg;
1148 r = gv(RC_INT);
1149 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1150 args_size += size;
1151 } else {
1152 arg_stored = 0;
1154 break;
1157 /* And swap the argument back to it's original position. */
1158 tmp = vtop[0];
1159 vtop[0] = vtop[-i];
1160 vtop[-i] = tmp;
1162 if (arg_stored) {
1163 vrotb(i+1);
1164 assert(vtop->type.t == tmp.type.t);
1165 vpop();
1166 --nb_args;
1167 } else {
1168 ++i;
1172 /* XXX This should be superfluous. */
1173 save_regs(0); /* save used temporary registers */
1175 /* then, we prepare register passing arguments.
1176 Note that we cannot set RDX and RCX in this loop because gv()
1177 may break these temporary registers. Let's use R10 and R11
1178 instead of them */
1179 assert(gen_reg <= REGN);
1180 assert(sse_reg <= 8);
1181 for(i = 0; i < nb_args; i++) {
1182 mode = classify_x86_64_arg(&vtop->type, &type, &size, &reg_count);
1183 /* Alter stack entry type so that gv() knows how to treat it */
1184 vtop->type = type;
1185 if (mode == x86_64_mode_sse) {
1186 if (reg_count == 2) {
1187 sse_reg -= 2;
1188 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1189 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1190 /* movaps %xmm0, %xmmN */
1191 o(0x280f);
1192 o(0xc0 + (sse_reg << 3));
1193 /* movaps %xmm1, %xmmN */
1194 o(0x280f);
1195 o(0xc1 + ((sse_reg+1) << 3));
1197 } else {
1198 assert(reg_count == 1);
1199 --sse_reg;
1200 /* Load directly to register */
1201 gv(RC_XMM0 << sse_reg);
1203 } else if (mode == x86_64_mode_integer) {
1204 /* simple type */
1205 /* XXX: implicit cast ? */
1206 gen_reg -= reg_count;
1207 r = gv(RC_INT);
1208 int d = arg_prepare_reg(gen_reg);
1209 orex(1,d,r,0x89); /* mov */
1210 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1211 if (reg_count == 2) {
1212 /* Second word of two-word value should always be in rdx
1213 this case is handled via RC_IRET */
1214 d = arg_prepare_reg(gen_reg+1);
1215 orex(1,d,vtop->r2,0x89); /* mov */
1216 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1219 vtop--;
1221 assert(gen_reg == 0);
1222 assert(sse_reg == 0);
1224 /* We shouldn't have many operands on the stack anymore, but the
1225 call address itself is still there, and it might be in %eax
1226 (or edx/ecx) currently, which the below writes would clobber.
1227 So evict all remaining operands here. */
1228 save_regs(0);
1230 /* Copy R10 and R11 into RDX and RCX, respectively */
1231 if (nb_reg_args > 2) {
1232 o(0xd2894c); /* mov %r10, %rdx */
1233 if (nb_reg_args > 3) {
1234 o(0xd9894c); /* mov %r11, %rcx */
1238 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1239 gcall_or_jmp(0);
1240 if (args_size)
1241 gadd_sp(args_size);
1242 vtop--;
1246 #define FUNC_PROLOG_SIZE 11
1248 static void push_arg_reg(int i) {
1249 loc -= 8;
1250 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1253 /* generate function prolog of type 't' */
1254 void gfunc_prolog(CType *func_type)
1256 X86_64_Mode mode;
1257 int i, addr, align, size, reg_count;
1258 int param_index, param_addr, reg_param_index, sse_param_index;
1259 Sym *sym;
1260 CType *type;
1262 sym = func_type->ref;
1263 addr = PTR_SIZE * 2;
1264 loc = 0;
1265 ind += FUNC_PROLOG_SIZE;
1266 func_sub_sp_offset = ind;
1267 func_ret_sub = 0;
1269 if (func_type->ref->c == FUNC_ELLIPSIS) {
1270 int seen_reg_num, seen_sse_num, seen_stack_size;
1271 seen_reg_num = seen_sse_num = 0;
1272 /* frame pointer and return address */
1273 seen_stack_size = PTR_SIZE * 2;
1274 /* count the number of seen parameters */
1275 sym = func_type->ref;
1276 while ((sym = sym->next) != NULL) {
1277 type = &sym->type;
1278 mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1279 switch (mode) {
1280 default:
1281 seen_stack_size += size;
1282 break;
1284 case x86_64_mode_integer:
1285 if (seen_reg_num + reg_count <= 8) {
1286 seen_reg_num += reg_count;
1287 } else {
1288 seen_reg_num = 8;
1289 seen_stack_size += size;
1291 break;
1293 case x86_64_mode_sse:
1294 if (seen_sse_num + reg_count <= 8) {
1295 seen_sse_num += reg_count;
1296 } else {
1297 seen_sse_num = 8;
1298 seen_stack_size += size;
1300 break;
1304 loc -= 16;
1305 /* movl $0x????????, -0x10(%rbp) */
1306 o(0xf045c7);
1307 gen_le32(seen_reg_num * 8);
1308 /* movl $0x????????, -0xc(%rbp) */
1309 o(0xf445c7);
1310 gen_le32(seen_sse_num * 16 + 48);
1311 /* movl $0x????????, -0x8(%rbp) */
1312 o(0xf845c7);
1313 gen_le32(seen_stack_size);
1315 /* save all register passing arguments */
1316 for (i = 0; i < 8; i++) {
1317 loc -= 16;
1318 o(0xd60f66); /* movq */
1319 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1320 /* movq $0, loc+8(%rbp) */
1321 o(0x85c748);
1322 gen_le32(loc + 8);
1323 gen_le32(0);
1325 for (i = 0; i < REGN; i++) {
1326 push_arg_reg(REGN-1-i);
1330 sym = func_type->ref;
1331 param_index = 0;
1332 reg_param_index = 0;
1333 sse_param_index = 0;
1335 /* if the function returns a structure, then add an
1336 implicit pointer parameter */
1337 func_vt = sym->type;
1338 mode = classify_x86_64_arg(&func_vt, NULL, &size, &reg_count);
1339 if (mode == x86_64_mode_memory) {
1340 push_arg_reg(reg_param_index);
1341 param_addr = loc;
1343 func_vc = loc;
1344 param_index++;
1345 reg_param_index++;
1347 /* define parameters */
1348 while ((sym = sym->next) != NULL) {
1349 type = &sym->type;
1350 mode = classify_x86_64_arg(type, NULL, &size, &reg_count);
1351 switch (mode) {
1352 case x86_64_mode_sse:
1353 if (sse_param_index + reg_count <= 8) {
1354 /* save arguments passed by register */
1355 loc -= reg_count * 8;
1356 param_addr = loc;
1357 for (i = 0; i < reg_count; ++i) {
1358 o(0xd60f66); /* movq */
1359 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1360 ++sse_param_index;
1362 } else {
1363 param_addr = addr;
1364 addr += size;
1365 sse_param_index += reg_count;
1367 break;
1369 case x86_64_mode_memory:
1370 case x86_64_mode_x87:
1371 param_addr = addr;
1372 addr += size;
1373 break;
1375 case x86_64_mode_integer: {
1376 if (reg_param_index + reg_count <= REGN) {
1377 /* save arguments passed by register */
1378 loc -= reg_count * 8;
1379 param_addr = loc;
1380 for (i = 0; i < reg_count; ++i) {
1381 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1382 ++reg_param_index;
1384 } else {
1385 param_addr = addr;
1386 addr += size;
1387 reg_param_index += reg_count;
1389 break;
1392 sym_push(sym->v & ~SYM_FIELD, type,
1393 VT_LOCAL | VT_LVAL, param_addr);
1394 param_index++;
1398 /* generate function epilog */
1399 void gfunc_epilog(void)
1401 int v, saved_ind;
1403 o(0xc9); /* leave */
1404 if (func_ret_sub == 0) {
1405 o(0xc3); /* ret */
1406 } else {
1407 o(0xc2); /* ret n */
1408 g(func_ret_sub);
1409 g(func_ret_sub >> 8);
1411 /* align local size to word & save local variables */
1412 v = (-loc + 15) & -16;
1413 saved_ind = ind;
1414 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1415 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1416 o(0xec8148); /* sub rsp, stacksize */
1417 gen_le32(v);
1418 ind = saved_ind;
1421 #endif /* not PE */
1423 /* generate a jump to a label */
1424 int gjmp(int t)
1426 return psym(0xe9, t);
1429 /* generate a jump to a fixed address */
1430 void gjmp_addr(int a)
1432 int r;
1433 r = a - ind - 2;
1434 if (r == (char)r) {
1435 g(0xeb);
1436 g(r);
1437 } else {
1438 oad(0xe9, a - ind - 5);
1442 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1443 int gtst(int inv, int t)
1445 int v, *p;
1447 v = vtop->r & VT_VALMASK;
1448 if (v == VT_CMP) {
1449 /* fast case : can jump directly since flags are set */
1450 if (vtop->c.i & 0x100)
1452 /* This was a float compare. If the parity flag is set
1453 the result was unordered. For anything except != this
1454 means false and we don't jump (anding both conditions).
1455 For != this means true (oring both).
1456 Take care about inverting the test. We need to jump
1457 to our target if the result was unordered and test wasn't NE,
1458 otherwise if unordered we don't want to jump. */
1459 vtop->c.i &= ~0x100;
1460 if (!inv == (vtop->c.i != TOK_NE))
1461 o(0x067a); /* jp +6 */
1462 else
1464 g(0x0f);
1465 t = psym(0x8a, t); /* jp t */
1468 g(0x0f);
1469 t = psym((vtop->c.i - 16) ^ inv, t);
1470 } else if (v == VT_JMP || v == VT_JMPI) {
1471 /* && or || optimization */
1472 if ((v & 1) == inv) {
1473 /* insert vtop->c jump list in t */
1474 p = &vtop->c.i;
1475 while (*p != 0)
1476 p = (int *)(cur_text_section->data + *p);
1477 *p = t;
1478 t = vtop->c.i;
1479 } else {
1480 t = gjmp(t);
1481 gsym(vtop->c.i);
1483 } else {
1484 if (is_float(vtop->type.t) ||
1485 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1486 vpushi(0);
1487 gen_op(TOK_NE);
1489 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1490 /* constant jmp optimization */
1491 if ((vtop->c.i != 0) != inv)
1492 t = gjmp(t);
1493 } else {
1494 v = gv(RC_INT);
1495 orex(0,v,v,0x85);
1496 o(0xc0 + REG_VALUE(v) * 9);
1497 g(0x0f);
1498 t = psym(0x85 ^ inv, t);
1501 vtop--;
1502 return t;
1505 /* generate an integer binary operation */
1506 void gen_opi(int op)
1508 int r, fr, opc, c;
1509 int ll, uu, cc;
1511 ll = is64_type(vtop[-1].type.t);
1512 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1513 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1515 switch(op) {
1516 case '+':
1517 case TOK_ADDC1: /* add with carry generation */
1518 opc = 0;
1519 gen_op8:
1520 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1521 /* constant case */
1522 vswap();
1523 r = gv(RC_INT);
1524 vswap();
1525 c = vtop->c.i;
1526 if (c == (char)c) {
1527 /* XXX: generate inc and dec for smaller code ? */
1528 orex(ll, r, 0, 0x83);
1529 o(0xc0 | (opc << 3) | REG_VALUE(r));
1530 g(c);
1531 } else {
1532 orex(ll, r, 0, 0x81);
1533 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1535 } else {
1536 gv2(RC_INT, RC_INT);
1537 r = vtop[-1].r;
1538 fr = vtop[0].r;
1539 orex(ll, r, fr, (opc << 3) | 0x01);
1540 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1542 vtop--;
1543 if (op >= TOK_ULT && op <= TOK_GT) {
1544 vtop->r = VT_CMP;
1545 vtop->c.i = op;
1547 break;
1548 case '-':
1549 case TOK_SUBC1: /* sub with carry generation */
1550 opc = 5;
1551 goto gen_op8;
1552 case TOK_ADDC2: /* add with carry use */
1553 opc = 2;
1554 goto gen_op8;
1555 case TOK_SUBC2: /* sub with carry use */
1556 opc = 3;
1557 goto gen_op8;
1558 case '&':
1559 opc = 4;
1560 goto gen_op8;
1561 case '^':
1562 opc = 6;
1563 goto gen_op8;
1564 case '|':
1565 opc = 1;
1566 goto gen_op8;
1567 case '*':
1568 gv2(RC_INT, RC_INT);
1569 r = vtop[-1].r;
1570 fr = vtop[0].r;
1571 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1572 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1573 vtop--;
1574 break;
1575 case TOK_SHL:
1576 opc = 4;
1577 goto gen_shift;
1578 case TOK_SHR:
1579 opc = 5;
1580 goto gen_shift;
1581 case TOK_SAR:
1582 opc = 7;
1583 gen_shift:
1584 opc = 0xc0 | (opc << 3);
1585 if (cc) {
1586 /* constant case */
1587 vswap();
1588 r = gv(RC_INT);
1589 vswap();
1590 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1591 o(opc | REG_VALUE(r));
1592 g(vtop->c.i & (ll ? 63 : 31));
1593 } else {
1594 /* we generate the shift in ecx */
1595 gv2(RC_INT, RC_RCX);
1596 r = vtop[-1].r;
1597 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1598 o(opc | REG_VALUE(r));
1600 vtop--;
1601 break;
1602 case TOK_UDIV:
1603 case TOK_UMOD:
1604 uu = 1;
1605 goto divmod;
1606 case '/':
1607 case '%':
1608 case TOK_PDIV:
1609 uu = 0;
1610 divmod:
1611 /* first operand must be in eax */
1612 /* XXX: need better constraint for second operand */
1613 gv2(RC_RAX, RC_RCX);
1614 r = vtop[-1].r;
1615 fr = vtop[0].r;
1616 vtop--;
1617 save_reg(TREG_RDX);
1618 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1619 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1620 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1621 if (op == '%' || op == TOK_UMOD)
1622 r = TREG_RDX;
1623 else
1624 r = TREG_RAX;
1625 vtop->r = r;
1626 break;
1627 default:
1628 opc = 7;
1629 goto gen_op8;
1633 void gen_opl(int op)
1635 gen_opi(op);
1638 /* generate a floating point operation 'v = t1 op t2' instruction. The
1639 two operands are guaranted to have the same floating point type */
1640 /* XXX: need to use ST1 too */
1641 void gen_opf(int op)
1643 int a, ft, fc, swapped, r;
1644 int float_type =
1645 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1647 /* convert constants to memory references */
1648 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1649 vswap();
1650 gv(float_type);
1651 vswap();
1653 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1654 gv(float_type);
1656 /* must put at least one value in the floating point register */
1657 if ((vtop[-1].r & VT_LVAL) &&
1658 (vtop[0].r & VT_LVAL)) {
1659 vswap();
1660 gv(float_type);
1661 vswap();
1663 swapped = 0;
1664 /* swap the stack if needed so that t1 is the register and t2 is
1665 the memory reference */
1666 if (vtop[-1].r & VT_LVAL) {
1667 vswap();
1668 swapped = 1;
1670 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1671 if (op >= TOK_ULT && op <= TOK_GT) {
1672 /* load on stack second operand */
1673 load(TREG_ST0, vtop);
1674 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1675 if (op == TOK_GE || op == TOK_GT)
1676 swapped = !swapped;
1677 else if (op == TOK_EQ || op == TOK_NE)
1678 swapped = 0;
1679 if (swapped)
1680 o(0xc9d9); /* fxch %st(1) */
1681 o(0xe9da); /* fucompp */
1682 o(0xe0df); /* fnstsw %ax */
1683 if (op == TOK_EQ) {
1684 o(0x45e480); /* and $0x45, %ah */
1685 o(0x40fC80); /* cmp $0x40, %ah */
1686 } else if (op == TOK_NE) {
1687 o(0x45e480); /* and $0x45, %ah */
1688 o(0x40f480); /* xor $0x40, %ah */
1689 op = TOK_NE;
1690 } else if (op == TOK_GE || op == TOK_LE) {
1691 o(0x05c4f6); /* test $0x05, %ah */
1692 op = TOK_EQ;
1693 } else {
1694 o(0x45c4f6); /* test $0x45, %ah */
1695 op = TOK_EQ;
1697 vtop--;
1698 vtop->r = VT_CMP;
1699 vtop->c.i = op;
1700 } else {
1701 /* no memory reference possible for long double operations */
1702 load(TREG_ST0, vtop);
1703 swapped = !swapped;
1705 switch(op) {
1706 default:
1707 case '+':
1708 a = 0;
1709 break;
1710 case '-':
1711 a = 4;
1712 if (swapped)
1713 a++;
1714 break;
1715 case '*':
1716 a = 1;
1717 break;
1718 case '/':
1719 a = 6;
1720 if (swapped)
1721 a++;
1722 break;
1724 ft = vtop->type.t;
1725 fc = vtop->c.ul;
1726 o(0xde); /* fxxxp %st, %st(1) */
1727 o(0xc1 + (a << 3));
1728 vtop--;
1730 } else {
1731 if (op >= TOK_ULT && op <= TOK_GT) {
1732 /* if saved lvalue, then we must reload it */
1733 r = vtop->r;
1734 fc = vtop->c.ul;
1735 if ((r & VT_VALMASK) == VT_LLOCAL) {
1736 SValue v1;
1737 r = get_reg(RC_INT);
1738 v1.type.t = VT_PTR;
1739 v1.r = VT_LOCAL | VT_LVAL;
1740 v1.c.ul = fc;
1741 load(r, &v1);
1742 fc = 0;
1745 if (op == TOK_EQ || op == TOK_NE) {
1746 swapped = 0;
1747 } else {
1748 if (op == TOK_LE || op == TOK_LT)
1749 swapped = !swapped;
1750 if (op == TOK_LE || op == TOK_GE) {
1751 op = 0x93; /* setae */
1752 } else {
1753 op = 0x97; /* seta */
1757 if (swapped) {
1758 gv(RC_FLOAT);
1759 vswap();
1761 assert(!(vtop[-1].r & VT_LVAL));
1763 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1764 o(0x66);
1765 o(0x2e0f); /* ucomisd */
1767 if (vtop->r & VT_LVAL) {
1768 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1769 } else {
1770 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1773 vtop--;
1774 vtop->r = VT_CMP;
1775 vtop->c.i = op | 0x100;
1776 } else {
1777 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1778 switch(op) {
1779 default:
1780 case '+':
1781 a = 0;
1782 break;
1783 case '-':
1784 a = 4;
1785 break;
1786 case '*':
1787 a = 1;
1788 break;
1789 case '/':
1790 a = 6;
1791 break;
1793 ft = vtop->type.t;
1794 fc = vtop->c.ul;
1795 assert((ft & VT_BTYPE) != VT_LDOUBLE);
1797 r = vtop->r;
1798 /* if saved lvalue, then we must reload it */
1799 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
1800 SValue v1;
1801 r = get_reg(RC_INT);
1802 v1.type.t = VT_PTR;
1803 v1.r = VT_LOCAL | VT_LVAL;
1804 v1.c.ul = fc;
1805 load(r, &v1);
1806 fc = 0;
1809 assert(!(vtop[-1].r & VT_LVAL));
1810 if (swapped) {
1811 assert(vtop->r & VT_LVAL);
1812 gv(RC_FLOAT);
1813 vswap();
1816 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1817 o(0xf2);
1818 } else {
1819 o(0xf3);
1821 o(0x0f);
1822 o(0x58 + a);
1824 if (vtop->r & VT_LVAL) {
1825 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1826 } else {
1827 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1830 vtop--;
1835 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1836 and 'long long' cases. */
1837 void gen_cvt_itof(int t)
1839 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1840 save_reg(TREG_ST0);
1841 gv(RC_INT);
1842 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1843 /* signed long long to float/double/long double (unsigned case
1844 is handled generically) */
1845 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1846 o(0x242cdf); /* fildll (%rsp) */
1847 o(0x08c48348); /* add $8, %rsp */
1848 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1849 (VT_INT | VT_UNSIGNED)) {
1850 /* unsigned int to float/double/long double */
1851 o(0x6a); /* push $0 */
1852 g(0x00);
1853 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1854 o(0x242cdf); /* fildll (%rsp) */
1855 o(0x10c48348); /* add $16, %rsp */
1856 } else {
1857 /* int to float/double/long double */
1858 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1859 o(0x2404db); /* fildl (%rsp) */
1860 o(0x08c48348); /* add $8, %rsp */
1862 vtop->r = TREG_ST0;
1863 } else {
1864 int r = get_reg(RC_FLOAT);
1865 gv(RC_INT);
1866 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
1867 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1868 (VT_INT | VT_UNSIGNED) ||
1869 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1870 o(0x48); /* REX */
1872 o(0x2a0f);
1873 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
1874 vtop->r = r;
1878 /* convert from one floating point type to another */
1879 void gen_cvt_ftof(int t)
1881 int ft, bt, tbt;
1883 ft = vtop->type.t;
1884 bt = ft & VT_BTYPE;
1885 tbt = t & VT_BTYPE;
1887 if (bt == VT_FLOAT) {
1888 gv(RC_FLOAT);
1889 if (tbt == VT_DOUBLE) {
1890 o(0x140f); /* unpcklps */
1891 o(0xc0 + REG_VALUE(vtop->r)*9);
1892 o(0x5a0f); /* cvtps2pd */
1893 o(0xc0 + REG_VALUE(vtop->r)*9);
1894 } else if (tbt == VT_LDOUBLE) {
1895 save_reg(RC_ST0);
1896 /* movss %xmm0,-0x10(%rsp) */
1897 o(0x110ff3);
1898 o(0x44 + REG_VALUE(vtop->r)*8);
1899 o(0xf024);
1900 o(0xf02444d9); /* flds -0x10(%rsp) */
1901 vtop->r = TREG_ST0;
1903 } else if (bt == VT_DOUBLE) {
1904 gv(RC_FLOAT);
1905 if (tbt == VT_FLOAT) {
1906 o(0x140f66); /* unpcklpd */
1907 o(0xc0 + REG_VALUE(vtop->r)*9);
1908 o(0x5a0f66); /* cvtpd2ps */
1909 o(0xc0 + REG_VALUE(vtop->r)*9);
1910 } else if (tbt == VT_LDOUBLE) {
1911 save_reg(RC_ST0);
1912 /* movsd %xmm0,-0x10(%rsp) */
1913 o(0x110ff2);
1914 o(0x44 + REG_VALUE(vtop->r)*8);
1915 o(0xf024);
1916 o(0xf02444dd); /* fldl -0x10(%rsp) */
1917 vtop->r = TREG_ST0;
1919 } else {
1920 gv(RC_ST0);
1921 int r = get_reg(RC_FLOAT);
1922 if (tbt == VT_DOUBLE) {
1923 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1924 /* movsd -0x10(%rsp),%xmm0 */
1925 o(0x100ff2);
1926 o(0x44 + REG_VALUE(r)*8);
1927 o(0xf024);
1928 vtop->r = r;
1929 } else if (tbt == VT_FLOAT) {
1930 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1931 /* movss -0x10(%rsp),%xmm0 */
1932 o(0x100ff3);
1933 o(0x44 + REG_VALUE(r)*8);
1934 o(0xf024);
1935 vtop->r = r;
1940 /* convert fp to int 't' type */
1941 void gen_cvt_ftoi(int t)
1943 int ft, bt, size, r;
1944 ft = vtop->type.t;
1945 bt = ft & VT_BTYPE;
1946 if (bt == VT_LDOUBLE) {
1947 gen_cvt_ftof(VT_DOUBLE);
1948 bt = VT_DOUBLE;
1951 gv(RC_FLOAT);
1952 if (t != VT_INT)
1953 size = 8;
1954 else
1955 size = 4;
1957 r = get_reg(RC_INT);
1958 if (bt == VT_FLOAT) {
1959 o(0xf3);
1960 } else if (bt == VT_DOUBLE) {
1961 o(0xf2);
1962 } else {
1963 assert(0);
1965 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
1966 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
1967 vtop->r = r;
1970 /* computed goto support */
1971 void ggoto(void)
1973 gcall_or_jmp(1);
1974 vtop--;
1977 /* end of x86-64 code generator */
1978 /*************************************************************/
1979 #endif /* ! TARGET_DEFS_ONLY */
1980 /******************************************************/