fix-mixed-struct (patch by Pip Cet)
[tinycc.git] / x86_64-gen.c
blob558987749cab2abc407b91bcc4feb9db3f655bed
1 /*
2 * x86-64 code generator for TCC
3 *
4 * Copyright (c) 2008 Shinichiro Hamaji
5 *
6 * Based on i386-gen.c by Fabrice Bellard
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
28 #define REG_ARGS_MAX 2 /* at most 2 registers used for each argument */
30 #ifdef TCC_TARGET_PE
31 typedef int RegArgs;
32 #else
33 /* This struct stores the struct offsets at which %rax, %rdx, %xmm0, and
34 * %xmm1 are to be stored.
35 *
36 * struct { long long l; double x; }: ireg = { 0, -1 } freg = { 8, -1 }
37 * struct { double x; long long l; }: ireg = { 8, -1 } freg = { 0, -1 }
38 * struct { long long l; long long l2; }: ireg = { 0, 8 } freg = { -1, -1 }
39 * struct { double x; double x2; }: ireg = { -1, -1 } freg = { 0, 8 }
40 */
41 typedef struct {
42 int ireg[REG_ARGS_MAX];
43 int freg[REG_ARGS_MAX];
44 } RegArgs;
45 #endif
47 /* a register can belong to several classes. The classes must be
48 sorted from more general to more precise (see gv2() code which does
49 assumptions on it). */
50 #define RC_INT 0x0001 /* generic integer register */
51 #define RC_FLOAT 0x0002 /* generic float register */
52 #define RC_RAX 0x0004
53 #define RC_RCX 0x0008
54 #define RC_RDX 0x0010
55 #define RC_ST0 0x0080 /* only for long double */
56 #define RC_R8 0x0100
57 #define RC_R9 0x0200
58 #define RC_R10 0x0400
59 #define RC_R11 0x0800
60 #define RC_XMM0 0x1000
61 #define RC_XMM1 0x2000
62 #define RC_XMM2 0x4000
63 #define RC_XMM3 0x8000
64 #define RC_XMM4 0x10000
65 #define RC_XMM5 0x20000
66 #define RC_XMM6 0x40000
67 #define RC_XMM7 0x80000
68 #define RC_IRET RC_RAX /* function return: integer register */
69 #define RC_LRET RC_RDX /* function return: second integer register */
70 #define RC_FRET RC_XMM0 /* function return: float register */
71 #define RC_QRET RC_XMM1 /* function return: second float register */
73 /* pretty names for the registers */
74 enum {
75 TREG_RAX = 0,
76 TREG_RCX = 1,
77 TREG_RDX = 2,
78 TREG_RSP = 4,
79 TREG_RSI = 6,
80 TREG_RDI = 7,
82 TREG_R8 = 8,
83 TREG_R9 = 9,
84 TREG_R10 = 10,
85 TREG_R11 = 11,
87 TREG_XMM0 = 16,
88 TREG_XMM1 = 17,
89 TREG_XMM2 = 18,
90 TREG_XMM3 = 19,
91 TREG_XMM4 = 20,
92 TREG_XMM5 = 21,
93 TREG_XMM6 = 22,
94 TREG_XMM7 = 23,
96 TREG_ST0 = 24,
98 TREG_MEM = 0x20,
99 };
101 #define REX_BASE(reg) (((reg) >> 3) & 1)
102 #define REG_VALUE(reg) ((reg) & 7)
104 /* return registers for function */
105 #define REG_IRET TREG_RAX /* single word int return register */
106 #define REG_LRET TREG_RDX /* second word return register (for long long) */
107 #define REG_FRET TREG_XMM0 /* float return register */
108 #define REG_QRET TREG_XMM1 /* second float return register */
110 /* defined if function parameters must be evaluated in reverse order */
111 #define INVERT_FUNC_PARAMS
113 /* pointer size, in bytes */
114 #define PTR_SIZE 8
116 /* long double size and alignment, in bytes */
117 #define LDOUBLE_SIZE 16
118 #define LDOUBLE_ALIGN 16
119 /* maximum alignment (for aligned attribute support) */
120 #define MAX_ALIGN 16
122 /******************************************************/
123 /* ELF defines */
125 #define EM_TCC_TARGET EM_X86_64
127 /* relocation type for 32 bit data relocation */
128 #define R_DATA_32 R_X86_64_32
129 #define R_DATA_PTR R_X86_64_64
130 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
131 #define R_COPY R_X86_64_COPY
133 #define ELF_START_ADDR 0x400000
134 #define ELF_PAGE_SIZE 0x200000
136 /******************************************************/
137 #else /* ! TARGET_DEFS_ONLY */
138 /******************************************************/
139 #include "tcc.h"
140 #include <assert.h>
142 ST_DATA const int reg_classes[NB_REGS] = {
143 /* eax */ RC_INT | RC_RAX,
144 /* ecx */ RC_INT | RC_RCX,
145 /* edx */ RC_INT | RC_RDX,
146 0,
147 0,
148 0,
149 0,
150 0,
151 RC_R8,
152 RC_R9,
153 RC_R10,
154 RC_R11,
155 0,
156 0,
157 0,
158 0,
159 /* xmm0 */ RC_FLOAT | RC_XMM0,
160 /* xmm1 */ RC_FLOAT | RC_XMM1,
161 /* xmm2 */ RC_FLOAT | RC_XMM2,
162 /* xmm3 */ RC_FLOAT | RC_XMM3,
163 /* xmm4 */ RC_FLOAT | RC_XMM4,
164 /* xmm5 */ RC_FLOAT | RC_XMM5,
165 /* xmm6 an xmm7 are included so gv() can be used on them,
166 but they are not tagged with RC_FLOAT because they are
167 callee saved on Windows */
168 RC_XMM6,
169 RC_XMM7,
170 /* st0 */ RC_ST0
171 };
173 static unsigned long func_sub_sp_offset;
174 static int func_ret_sub;
176 /* XXX: make it faster ? */
177 void g(int c)
179 int ind1;
180 ind1 = ind + 1;
181 if (ind1 > cur_text_section->data_allocated)
182 section_realloc(cur_text_section, ind1);
183 cur_text_section->data[ind] = c;
184 ind = ind1;
187 void o(unsigned int c)
189 while (c) {
190 g(c);
191 c = c >> 8;
195 void gen_le16(int v)
197 g(v);
198 g(v >> 8);
201 void gen_le32(int c)
203 g(c);
204 g(c >> 8);
205 g(c >> 16);
206 g(c >> 24);
209 void gen_le64(int64_t c)
211 g(c);
212 g(c >> 8);
213 g(c >> 16);
214 g(c >> 24);
215 g(c >> 32);
216 g(c >> 40);
217 g(c >> 48);
218 g(c >> 56);
221 void orex(int ll, int r, int r2, int b)
223 if ((r & VT_VALMASK) >= VT_CONST)
224 r = 0;
225 if ((r2 & VT_VALMASK) >= VT_CONST)
226 r2 = 0;
227 if (ll || REX_BASE(r) || REX_BASE(r2))
228 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
229 o(b);
232 /* output a symbol and patch all calls to it */
233 void gsym_addr(int t, int a)
235 int n, *ptr;
236 while (t) {
237 ptr = (int *)(cur_text_section->data + t);
238 n = *ptr; /* next value */
239 *ptr = a - t - 4;
240 t = n;
244 void gsym(int t)
246 gsym_addr(t, ind);
249 /* psym is used to put an instruction with a data field which is a
250 reference to a symbol. It is in fact the same as oad ! */
251 #define psym oad
253 static int is64_type(int t)
255 return ((t & VT_BTYPE) == VT_PTR ||
256 (t & VT_BTYPE) == VT_FUNC ||
257 (t & VT_BTYPE) == VT_LLONG);
260 /* instruction + 4 bytes data. Return the address of the data */
261 ST_FUNC int oad(int c, int s)
263 int ind1;
265 o(c);
266 ind1 = ind + 4;
267 if (ind1 > cur_text_section->data_allocated)
268 section_realloc(cur_text_section, ind1);
269 *(int *)(cur_text_section->data + ind) = s;
270 s = ind;
271 ind = ind1;
272 return s;
275 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
277 if (r & VT_SYM)
278 greloc(cur_text_section, sym, ind, R_X86_64_32);
279 gen_le32(c);
282 /* output constant with relocation if 'r & VT_SYM' is true */
283 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
285 if (r & VT_SYM)
286 greloc(cur_text_section, sym, ind, R_X86_64_64);
287 gen_le64(c);
290 /* output constant with relocation if 'r & VT_SYM' is true */
291 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
293 if (r & VT_SYM)
294 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
295 gen_le32(c-4);
298 /* output got address with relocation */
299 static void gen_gotpcrel(int r, Sym *sym, int c)
301 #ifndef TCC_TARGET_PE
302 Section *sr;
303 ElfW(Rela) *rel;
304 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
305 sr = cur_text_section->reloc;
306 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
307 rel->r_addend = -4;
308 #else
309 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
310 get_tok_str(sym->v, NULL), c, r,
311 cur_text_section->data[ind-3],
312 cur_text_section->data[ind-2],
313 cur_text_section->data[ind-1]
314 );
315 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
316 #endif
317 gen_le32(0);
318 if (c) {
319 /* we use add c, %xxx for displacement */
320 orex(1, r, 0, 0x81);
321 o(0xc0 + REG_VALUE(r));
322 gen_le32(c);
326 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
328 op_reg = REG_VALUE(op_reg) << 3;
329 if ((r & VT_VALMASK) == VT_CONST) {
330 /* constant memory reference */
331 o(0x05 | op_reg);
332 if (is_got) {
333 gen_gotpcrel(r, sym, c);
334 } else {
335 gen_addrpc32(r, sym, c);
337 } else if ((r & VT_VALMASK) == VT_LOCAL) {
338 /* currently, we use only ebp as base */
339 if (c == (char)c) {
340 /* short reference */
341 o(0x45 | op_reg);
342 g(c);
343 } else {
344 oad(0x85 | op_reg, c);
346 } else if ((r & VT_VALMASK) >= TREG_MEM) {
347 if (c) {
348 g(0x80 | op_reg | REG_VALUE(r));
349 gen_le32(c);
350 } else {
351 g(0x00 | op_reg | REG_VALUE(r));
353 } else {
354 g(0x00 | op_reg | REG_VALUE(r));
358 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
359 opcode bits */
360 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
362 gen_modrm_impl(op_reg, r, sym, c, 0);
365 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
366 opcode bits */
367 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
369 int is_got;
370 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
371 orex(1, r, op_reg, opcode);
372 gen_modrm_impl(op_reg, r, sym, c, is_got);
376 /* load 'r' from value 'sv' */
377 void load(int r, SValue *sv)
379 int v, t, ft, fc, fr;
380 SValue v1;
382 #ifdef TCC_TARGET_PE
383 SValue v2;
384 sv = pe_getimport(sv, &v2);
385 #endif
387 fr = sv->r;
388 ft = sv->type.t & ~VT_DEFSIGN;
389 fc = sv->c.ul;
391 #ifndef TCC_TARGET_PE
392 /* we use indirect access via got */
393 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
394 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
395 /* use the result register as a temporal register */
396 int tr = r | TREG_MEM;
397 if (is_float(ft)) {
398 /* we cannot use float registers as a temporal register */
399 tr = get_reg(RC_INT) | TREG_MEM;
401 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
403 /* load from the temporal register */
404 fr = tr | VT_LVAL;
406 #endif
408 v = fr & VT_VALMASK;
409 if (fr & VT_LVAL) {
410 int b, ll;
411 if (v == VT_LLOCAL) {
412 v1.type.t = VT_PTR;
413 v1.r = VT_LOCAL | VT_LVAL;
414 v1.c.ul = fc;
415 fr = r;
416 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
417 fr = get_reg(RC_INT);
418 load(fr, &v1);
420 ll = 0;
421 if ((ft & VT_BTYPE) == VT_FLOAT) {
422 b = 0x6e0f66;
423 r = REG_VALUE(r); /* movd */
424 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
425 b = 0x7e0ff3; /* movq */
426 r = REG_VALUE(r);
427 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
428 b = 0xdb, r = 5; /* fldt */
429 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
430 b = 0xbe0f; /* movsbl */
431 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
432 b = 0xb60f; /* movzbl */
433 } else if ((ft & VT_TYPE) == VT_SHORT) {
434 b = 0xbf0f; /* movswl */
435 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
436 b = 0xb70f; /* movzwl */
437 } else {
438 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
439 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
440 || ((ft & VT_BTYPE) == VT_FUNC));
441 ll = is64_type(ft);
442 b = 0x8b;
444 if (ll) {
445 gen_modrm64(b, r, fr, sv->sym, fc);
446 } else {
447 orex(ll, fr, r, b);
448 gen_modrm(r, fr, sv->sym, fc);
450 } else {
451 if (v == VT_CONST) {
452 if (fr & VT_SYM) {
453 #ifdef TCC_TARGET_PE
454 orex(1,0,r,0x8d);
455 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
456 gen_addrpc32(fr, sv->sym, fc);
457 #else
458 if (sv->sym->type.t & VT_STATIC) {
459 orex(1,0,r,0x8d);
460 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
461 gen_addrpc32(fr, sv->sym, fc);
462 } else {
463 orex(1,0,r,0x8b);
464 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
465 gen_gotpcrel(r, sv->sym, fc);
467 #endif
468 } else if (is64_type(ft)) {
469 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
470 gen_le64(sv->c.ull);
471 } else {
472 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
473 gen_le32(fc);
475 } else if (v == VT_LOCAL) {
476 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
477 gen_modrm(r, VT_LOCAL, sv->sym, fc);
478 } else if (v == VT_CMP) {
479 orex(0,r,0,0);
480 if ((fc & ~0x100) != TOK_NE)
481 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
482 else
483 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
484 if (fc & 0x100)
486 /* This was a float compare. If the parity bit is
487 set the result was unordered, meaning false for everything
488 except TOK_NE, and true for TOK_NE. */
489 fc &= ~0x100;
490 o(0x037a + (REX_BASE(r) << 8));
492 orex(0,r,0, 0x0f); /* setxx %br */
493 o(fc);
494 o(0xc0 + REG_VALUE(r));
495 } else if (v == VT_JMP || v == VT_JMPI) {
496 t = v & 1;
497 orex(0,r,0,0);
498 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
499 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
500 gsym(fc);
501 orex(0,r,0,0);
502 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
503 } else if (v != r) {
504 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
505 if (v == TREG_ST0) {
506 /* gen_cvt_ftof(VT_DOUBLE); */
507 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
508 /* movsd -0x10(%rsp),%xmmN */
509 o(0x100ff2);
510 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
511 o(0xf024);
512 } else {
513 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
514 if ((ft & VT_BTYPE) == VT_FLOAT) {
515 o(0x100ff3);
516 } else {
517 assert((ft & VT_BTYPE) == VT_DOUBLE);
518 o(0x100ff2);
520 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
522 } else if (r == TREG_ST0) {
523 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
524 /* gen_cvt_ftof(VT_LDOUBLE); */
525 /* movsd %xmmN,-0x10(%rsp) */
526 o(0x110ff2);
527 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
528 o(0xf024);
529 o(0xf02444dd); /* fldl -0x10(%rsp) */
530 } else {
531 orex(1,r,v, 0x89);
532 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
538 /* store register 'r' in lvalue 'v' */
539 void store(int r, SValue *v)
541 int fr, bt, ft, fc;
542 int op64 = 0;
543 /* store the REX prefix in this variable when PIC is enabled */
544 int pic = 0;
546 #ifdef TCC_TARGET_PE
547 SValue v2;
548 v = pe_getimport(v, &v2);
549 #endif
551 ft = v->type.t;
552 fc = v->c.ul;
553 fr = v->r & VT_VALMASK;
554 bt = ft & VT_BTYPE;
556 #ifndef TCC_TARGET_PE
557 /* we need to access the variable via got */
558 if (fr == VT_CONST && (v->r & VT_SYM)) {
559 /* mov xx(%rip), %r11 */
560 o(0x1d8b4c);
561 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
562 pic = is64_type(bt) ? 0x49 : 0x41;
564 #endif
566 /* XXX: incorrect if float reg to reg */
567 if (bt == VT_FLOAT) {
568 o(0x66);
569 o(pic);
570 o(0x7e0f); /* movd */
571 r = REG_VALUE(r);
572 } else if (bt == VT_DOUBLE) {
573 o(0x66);
574 o(pic);
575 o(0xd60f); /* movq */
576 r = REG_VALUE(r);
577 } else if (bt == VT_LDOUBLE) {
578 o(0xc0d9); /* fld %st(0) */
579 o(pic);
580 o(0xdb); /* fstpt */
581 r = 7;
582 } else {
583 if (bt == VT_SHORT)
584 o(0x66);
585 o(pic);
586 if (bt == VT_BYTE || bt == VT_BOOL)
587 orex(0, 0, r, 0x88);
588 else if (is64_type(bt))
589 op64 = 0x89;
590 else
591 orex(0, 0, r, 0x89);
593 if (pic) {
594 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
595 if (op64)
596 o(op64);
597 o(3 + (r << 3));
598 } else if (op64) {
599 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
600 gen_modrm64(op64, r, v->r, v->sym, fc);
601 } else if (fr != r) {
602 /* XXX: don't we really come here? */
603 abort();
604 o(0xc0 + fr + r * 8); /* mov r, fr */
606 } else {
607 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
608 gen_modrm(r, v->r, v->sym, fc);
609 } else if (fr != r) {
610 /* XXX: don't we really come here? */
611 abort();
612 o(0xc0 + fr + r * 8); /* mov r, fr */
617 /* 'is_jmp' is '1' if it is a jump */
618 static void gcall_or_jmp(int is_jmp)
620 int r;
621 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
622 ((vtop->r & VT_SYM) || (vtop->c.ll-4) == (int)(vtop->c.ll-4))) {
623 /* constant case */
624 if (vtop->r & VT_SYM) {
625 /* relocation case */
626 #ifdef TCC_TARGET_PE
627 greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32);
628 #else
629 greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32);
630 #endif
631 } else {
632 /* put an empty PC32 relocation */
633 put_elf_reloc(symtab_section, cur_text_section,
634 ind + 1, R_X86_64_PC32, 0);
636 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
637 } else {
638 /* otherwise, indirect call */
639 r = TREG_R11;
640 load(r, vtop);
641 o(0x41); /* REX */
642 o(0xff); /* call/jmp *r */
643 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
647 #if defined(CONFIG_TCC_BCHECK)
648 #ifndef TCC_TARGET_PE
649 static addr_t func_bound_offset;
650 static unsigned long func_bound_ind;
651 #endif
653 static void gen_static_call(int v)
655 Sym *sym = external_global_sym(v, &func_old_type, 0);
656 oad(0xe8, -4);
657 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
660 /* generate a bounded pointer addition */
661 ST_FUNC void gen_bounded_ptr_add(void)
663 /* save all temporary registers */
664 save_regs(0);
666 /* prepare fast x86_64 function call */
667 gv(RC_RAX);
668 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
669 vtop--;
671 gv(RC_RAX);
672 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
673 vtop--;
675 /* do a fast function call */
676 gen_static_call(TOK___bound_ptr_add);
678 /* returned pointer is in rax */
679 vtop++;
680 vtop->r = TREG_RAX | VT_BOUNDED;
683 /* relocation offset of the bounding function call point */
684 vtop->c.ull = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
687 /* patch pointer addition in vtop so that pointer dereferencing is
688 also tested */
689 ST_FUNC void gen_bounded_ptr_deref(void)
691 addr_t func;
692 int size, align;
693 ElfW(Rela) *rel;
694 Sym *sym;
696 size = 0;
697 /* XXX: put that code in generic part of tcc */
698 if (!is_float(vtop->type.t)) {
699 if (vtop->r & VT_LVAL_BYTE)
700 size = 1;
701 else if (vtop->r & VT_LVAL_SHORT)
702 size = 2;
704 if (!size)
705 size = type_size(&vtop->type, &align);
706 switch(size) {
707 case 1: func = TOK___bound_ptr_indir1; break;
708 case 2: func = TOK___bound_ptr_indir2; break;
709 case 4: func = TOK___bound_ptr_indir4; break;
710 case 8: func = TOK___bound_ptr_indir8; break;
711 case 12: func = TOK___bound_ptr_indir12; break;
712 case 16: func = TOK___bound_ptr_indir16; break;
713 default:
714 tcc_error("unhandled size when dereferencing bounded pointer");
715 func = 0;
716 break;
719 sym = external_global_sym(func, &func_old_type, 0);
720 if (!sym->c)
721 put_extern_sym(sym, NULL, 0, 0);
723 /* patch relocation */
724 /* XXX: find a better solution ? */
726 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.ull);
727 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
729 #endif
731 #ifdef TCC_TARGET_PE
733 #define REGN 4
734 static const uint8_t arg_regs[REGN] = {
735 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
736 };
738 /* Prepare arguments in R10 and R11 rather than RCX and RDX
739 because gv() will not ever use these */
740 static int arg_prepare_reg(int idx) {
741 if (idx == 0 || idx == 1)
742 /* idx=0: r10, idx=1: r11 */
743 return idx + 10;
744 else
745 return arg_regs[idx];
748 static int func_scratch;
750 /* Generate function call. The function address is pushed first, then
751 all the parameters in call order. This functions pops all the
752 parameters and the function address. */
754 void gen_offs_sp(int b, int r, int d)
756 orex(1,0,r & 0x100 ? 0 : r, b);
757 if (d == (char)d) {
758 o(0x2444 | (REG_VALUE(r) << 3));
759 g(d);
760 } else {
761 o(0x2484 | (REG_VALUE(r) << 3));
762 gen_le32(d);
766 ST_FUNC int regargs_nregs(RegArgs *args)
768 return *args;
771 /* Return the number of registers needed to return the struct, or 0 if
772 returning via struct pointer. */
773 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args)
775 int size, align;
776 *regsize = 8;
777 *ret_align = 1; // Never have to re-align return values for x86-64
778 size = type_size(vt, &align);
779 ret->ref = NULL;
780 if (size > 8) {
781 *args = 0;
782 } else if (size > 4) {
783 ret->t = VT_LLONG;
784 *args = 1;
785 } else if (size > 2) {
786 ret->t = VT_INT;
787 *args = 1;
788 } else if (size > 1) {
789 ret->t = VT_SHORT;
790 *args = 1;
791 } else {
792 ret->t = VT_BYTE;
793 *args = 1;
796 return *args != 0;
799 static int is_sse_float(int t) {
800 int bt;
801 bt = t & VT_BTYPE;
802 return bt == VT_DOUBLE || bt == VT_FLOAT;
805 int gfunc_arg_size(CType *type) {
806 int align;
807 if (type->t & (VT_ARRAY|VT_BITFIELD))
808 return 8;
809 return type_size(type, &align);
812 void gfunc_call(int nb_args)
814 int size, r, args_size, i, d, bt, struct_size;
815 int arg;
817 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
818 arg = nb_args;
820 /* for struct arguments, we need to call memcpy and the function
821 call breaks register passing arguments we are preparing.
822 So, we process arguments which will be passed by stack first. */
823 struct_size = args_size;
824 for(i = 0; i < nb_args; i++) {
825 SValue *sv;
827 --arg;
828 sv = &vtop[-i];
829 bt = (sv->type.t & VT_BTYPE);
830 size = gfunc_arg_size(&sv->type);
832 if (size <= 8)
833 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
835 if (bt == VT_STRUCT) {
836 /* align to stack align size */
837 size = (size + 15) & ~15;
838 /* generate structure store */
839 r = get_reg(RC_INT);
840 gen_offs_sp(0x8d, r, struct_size);
841 struct_size += size;
843 /* generate memcpy call */
844 vset(&sv->type, r | VT_LVAL, 0);
845 vpushv(sv);
846 vstore();
847 --vtop;
848 } else if (bt == VT_LDOUBLE) {
849 gv(RC_ST0);
850 gen_offs_sp(0xdb, 0x107, struct_size);
851 struct_size += 16;
855 if (func_scratch < struct_size)
856 func_scratch = struct_size;
858 arg = nb_args;
859 struct_size = args_size;
861 for(i = 0; i < nb_args; i++) {
862 --arg;
863 bt = (vtop->type.t & VT_BTYPE);
865 size = gfunc_arg_size(&vtop->type);
866 if (size > 8) {
867 /* align to stack align size */
868 size = (size + 15) & ~15;
869 if (arg >= REGN) {
870 d = get_reg(RC_INT);
871 gen_offs_sp(0x8d, d, struct_size);
872 gen_offs_sp(0x89, d, arg*8);
873 } else {
874 d = arg_prepare_reg(arg);
875 gen_offs_sp(0x8d, d, struct_size);
877 struct_size += size;
878 } else {
879 if (is_sse_float(vtop->type.t)) {
880 gv(RC_XMM0); /* only use one float register */
881 if (arg >= REGN) {
882 /* movq %xmm0, j*8(%rsp) */
883 gen_offs_sp(0xd60f66, 0x100, arg*8);
884 } else {
885 /* movaps %xmm0, %xmmN */
886 o(0x280f);
887 o(0xc0 + (arg << 3));
888 d = arg_prepare_reg(arg);
889 /* mov %xmm0, %rxx */
890 o(0x66);
891 orex(1,d,0, 0x7e0f);
892 o(0xc0 + REG_VALUE(d));
894 } else {
895 if (bt == VT_STRUCT) {
896 vtop->type.ref = NULL;
897 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
898 : size > 1 ? VT_SHORT : VT_BYTE;
901 r = gv(RC_INT);
902 if (arg >= REGN) {
903 gen_offs_sp(0x89, r, arg*8);
904 } else {
905 d = arg_prepare_reg(arg);
906 orex(1,d,r,0x89); /* mov */
907 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
911 vtop--;
913 save_regs(0);
915 /* Copy R10 and R11 into RCX and RDX, respectively */
916 if (nb_args > 0) {
917 o(0xd1894c); /* mov %r10, %rcx */
918 if (nb_args > 1) {
919 o(0xda894c); /* mov %r11, %rdx */
923 gcall_or_jmp(0);
924 vtop--;
928 #define FUNC_PROLOG_SIZE 11
930 /* generate function prolog of type 't' */
931 void gfunc_prolog(CType *func_type)
933 int addr, reg_param_index, bt, size;
934 Sym *sym;
935 CType *type;
937 func_ret_sub = 0;
938 func_scratch = 0;
939 loc = 0;
941 addr = PTR_SIZE * 2;
942 ind += FUNC_PROLOG_SIZE;
943 func_sub_sp_offset = ind;
944 reg_param_index = 0;
946 sym = func_type->ref;
948 /* if the function returns a structure, then add an
949 implicit pointer parameter */
950 func_vt = sym->type;
951 func_var = (sym->c == FUNC_ELLIPSIS);
952 size = gfunc_arg_size(&func_vt);
953 if (size > 8) {
954 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
955 func_vc = addr;
956 reg_param_index++;
957 addr += 8;
960 /* define parameters */
961 while ((sym = sym->next) != NULL) {
962 type = &sym->type;
963 bt = type->t & VT_BTYPE;
964 size = gfunc_arg_size(type);
965 if (size > 8) {
966 if (reg_param_index < REGN) {
967 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
969 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
970 } else {
971 if (reg_param_index < REGN) {
972 /* save arguments passed by register */
973 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
974 o(0xd60f66); /* movq */
975 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
976 } else {
977 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
980 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
982 addr += 8;
983 reg_param_index++;
986 while (reg_param_index < REGN) {
987 if (func_type->ref->c == FUNC_ELLIPSIS) {
988 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
989 addr += 8;
991 reg_param_index++;
995 /* generate function epilog */
996 void gfunc_epilog(void)
998 int v, saved_ind;
1000 o(0xc9); /* leave */
1001 if (func_ret_sub == 0) {
1002 o(0xc3); /* ret */
1003 } else {
1004 o(0xc2); /* ret n */
1005 g(func_ret_sub);
1006 g(func_ret_sub >> 8);
1009 saved_ind = ind;
1010 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1011 /* align local size to word & save local variables */
1012 v = (func_scratch + -loc + 15) & -16;
1014 if (v >= 4096) {
1015 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
1016 oad(0xb8, v); /* mov stacksize, %eax */
1017 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
1018 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
1019 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1020 } else {
1021 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1022 o(0xec8148); /* sub rsp, stacksize */
1023 gen_le32(v);
1026 cur_text_section->data_offset = saved_ind;
1027 pe_add_unwind_data(ind, saved_ind, v);
1028 ind = cur_text_section->data_offset;
1031 #else
1033 static void gadd_sp(int val)
1035 if (val == (char)val) {
1036 o(0xc48348);
1037 g(val);
1038 } else {
1039 oad(0xc48148, val); /* add $xxx, %rsp */
1043 typedef enum X86_64_Mode {
1044 x86_64_mode_none,
1045 x86_64_mode_memory,
1046 x86_64_mode_integer,
1047 x86_64_mode_sse,
1048 x86_64_mode_x87
1049 } X86_64_Mode;
1051 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1053 if (a == b)
1054 return a;
1055 else if (a == x86_64_mode_none)
1056 return b;
1057 else if (b == x86_64_mode_none)
1058 return a;
1059 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1060 return x86_64_mode_memory;
1061 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1062 return x86_64_mode_integer;
1063 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1064 return x86_64_mode_memory;
1065 else
1066 return x86_64_mode_sse;
1069 /* classify the x86 eightbytes from byte index start to byte index
1070 * end, at offset offset from the root struct */
1071 static X86_64_Mode classify_x86_64_inner(CType *ty, int offset, int start, int end)
1073 X86_64_Mode mode;
1074 Sym *f;
1076 switch (ty->t & VT_BTYPE) {
1077 case VT_VOID: return x86_64_mode_none;
1079 case VT_INT:
1080 case VT_BYTE:
1081 case VT_SHORT:
1082 case VT_LLONG:
1083 case VT_BOOL:
1084 case VT_PTR:
1085 case VT_FUNC:
1086 case VT_ENUM: return x86_64_mode_integer;
1088 case VT_FLOAT:
1089 case VT_DOUBLE: return x86_64_mode_sse;
1091 case VT_LDOUBLE: return x86_64_mode_x87;
1093 case VT_STRUCT:
1094 f = ty->ref;
1096 mode = x86_64_mode_none;
1097 while ((f = f->next) != NULL) {
1098 if (f->c + offset >= start && f->c + offset < end)
1099 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type, f->c + offset, start, end));
1102 return mode;
1105 assert(0);
1108 static X86_64_Mode classify_x86_64_arg_eightbyte(CType *ty, int offset)
1110 X86_64_Mode mode;
1112 assert((ty->t & VT_BTYPE) == VT_STRUCT);
1114 mode = classify_x86_64_inner(ty, 0, offset, offset + 8);
1116 return mode;
1119 static void regargs_init(RegArgs *args)
1121 int i;
1122 for(i=0; i<REG_ARGS_MAX; i++) {
1123 args->ireg[i] = -1;
1124 args->freg[i] = -1;
1128 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, RegArgs *args)
1130 X86_64_Mode mode = x86_64_mode_none;
1131 int size, align, ret_t = 0;
1132 int ireg = 0, freg = 0;
1134 if (args)
1135 regargs_init(args);
1137 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1138 *psize = 8;
1139 *palign = 8;
1140 if (args)
1141 args->ireg[ireg++] = 0;
1142 ret_t = ty->t;
1143 mode = x86_64_mode_integer;
1144 } else {
1145 size = type_size(ty, &align);
1146 *psize = (size + 7) & ~7;
1147 *palign = (align + 7) & ~7;
1149 if (size > 16) {
1150 mode = x86_64_mode_memory;
1151 } else {
1152 int start;
1154 for(start=0; start < size; start += 8) {
1155 if ((ty->t & VT_BTYPE) == VT_STRUCT) {
1156 mode = classify_x86_64_arg_eightbyte(ty, start);
1157 } else {
1158 mode = classify_x86_64_inner(ty, 0, 0, size);
1161 if (mode == x86_64_mode_integer) {
1162 if (args)
1163 args->ireg[ireg++] = start;
1164 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1165 } else if (mode == x86_64_mode_sse) {
1166 if (args)
1167 args->freg[freg++] = start;
1168 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1169 } else {
1170 ret_t = VT_LDOUBLE;
1176 if (ret) {
1177 ret->ref = NULL;
1178 ret->t = ret_t;
1181 return mode;
1184 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1186 /* This definition must be synced with stdarg.h */
1187 enum __va_arg_type {
1188 __va_gen_reg, __va_float_reg, __va_stack
1189 };
1190 int size, align;
1191 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, NULL);
1192 switch (mode) {
1193 default: return __va_stack;
1194 case x86_64_mode_integer: return __va_gen_reg;
1195 case x86_64_mode_sse: return __va_float_reg;
1199 static int regargs_iregs(RegArgs *args)
1201 int i;
1202 int ret = 0;
1203 for(i=0; i<REG_ARGS_MAX; i++) {
1204 if(args->ireg[i] != -1)
1205 ret++;
1208 return ret;
1211 static int regargs_fregs(RegArgs *args)
1213 int i;
1214 int ret = 0;
1215 for(i=0; i<REG_ARGS_MAX; i++) {
1216 if(args->freg[i] != -1)
1217 ret++;
1220 return ret;
1223 /* Count the total number of registers used by args */
1224 ST_FUNC int regargs_nregs(RegArgs *args)
1226 int i;
1227 int ret = 0;
1228 for(i=0; i<REG_ARGS_MAX; i++) {
1229 if(args->ireg[i] != -1)
1230 ret++;
1232 if(args->freg[i] != -1)
1233 ret++;
1236 return ret;
1239 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args)
1241 int size, align;
1242 X86_64_Mode mode;
1243 *ret_align = 1; // Never have to re-align return values for x86-64
1244 *regsize = 8;
1246 mode = classify_x86_64_arg(vt, ret, &size, &align, args);
1248 return mode != x86_64_mode_memory &&
1249 mode != x86_64_mode_none;
1252 #define REGN 6
1253 static const uint8_t arg_regs[REGN] = {
1254 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1255 };
1257 static int arg_prepare_reg(int idx) {
1258 if (idx == 2 || idx == 3)
1259 /* idx=2: r10, idx=3: r11 */
1260 return idx + 8;
1261 else
1262 return arg_regs[idx];
1265 /* Generate function call. The function address is pushed first, then
1266 all the parameters in call order. This functions pops all the
1267 parameters and the function address. */
1268 void gfunc_call(int nb_args)
1270 X86_64_Mode mode;
1271 CType type;
1272 int size, align, r, args_size, stack_adjust, run_start, run_end, i;
1273 int nb_reg_args = 0;
1274 int nb_sse_args = 0;
1275 int sse_reg = 0, gen_reg = 0;
1276 RegArgs *reg_args = alloca(nb_args * sizeof *reg_args);
1278 /* calculate the number of integer/float register arguments */
1279 for(i = nb_args - 1; i >= 0; i--) {
1280 int fregs, iregs;
1281 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_args[i]);
1282 fregs = regargs_fregs(&reg_args[i]);
1283 iregs = regargs_iregs(&reg_args[i]);
1285 nb_sse_args += fregs;
1286 nb_reg_args += iregs;
1288 if (sse_reg + fregs > 8 || gen_reg + iregs > REGN) {
1289 regargs_init(&reg_args[i]);
1290 } else {
1291 sse_reg += fregs;
1292 gen_reg += iregs;
1296 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1297 and ended by a 16-byte aligned argument. This is because, from the point of view of
1298 the callee, argument alignment is computed from the bottom up. */
1299 /* for struct arguments, we need to call memcpy and the function
1300 call breaks register passing arguments we are preparing.
1301 So, we process arguments which will be passed by stack first. */
1302 gen_reg = nb_reg_args;
1303 sse_reg = nb_sse_args;
1304 run_start = 0;
1305 args_size = 0;
1306 while (run_start != nb_args) {
1307 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1309 run_end = nb_args;
1310 stack_adjust = 0;
1311 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1312 int stack = regargs_nregs(&reg_args[i]) == 0;
1313 classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, NULL);
1315 if (stack) {
1316 if (align == 16)
1317 run_end = i;
1318 else
1319 stack_adjust += size;
1323 gen_reg = run_gen_reg;
1324 sse_reg = run_sse_reg;
1326 /* adjust stack to align SSE boundary */
1327 if (stack_adjust &= 15) {
1328 /* fetch cpu flag before the following sub will change the value */
1329 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1330 gv(RC_INT);
1332 stack_adjust = 16 - stack_adjust;
1333 o(0x48);
1334 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1335 args_size += stack_adjust;
1338 for(i = run_start; i < run_end;) {
1339 int arg_stored = regargs_nregs(&reg_args[i]) == 0;
1340 SValue tmp;
1341 RegArgs args;
1343 if (!arg_stored) {
1344 ++i;
1345 continue;
1348 /* Swap argument to top, it will possibly be changed here,
1349 and might use more temps. At the end of the loop we keep
1350 in on the stack and swap it back to its original position
1351 if it is a register. */
1352 tmp = vtop[0];
1353 vtop[0] = vtop[-i];
1354 vtop[-i] = tmp;
1356 classify_x86_64_arg(&vtop->type, NULL, &size, &align, &args);
1358 switch (vtop->type.t & VT_BTYPE) {
1359 case VT_STRUCT:
1360 /* allocate the necessary size on stack */
1361 o(0x48);
1362 oad(0xec81, size); /* sub $xxx, %rsp */
1363 /* generate structure store */
1364 r = get_reg(RC_INT);
1365 orex(1, r, 0, 0x89); /* mov %rsp, r */
1366 o(0xe0 + REG_VALUE(r));
1367 vset(&vtop->type, r | VT_LVAL, 0);
1368 vswap();
1369 vstore();
1370 args_size += size;
1371 break;
1373 case VT_LDOUBLE:
1374 assert(0);
1375 break;
1377 case VT_FLOAT:
1378 case VT_DOUBLE:
1379 r = gv(RC_FLOAT);
1380 o(0x50); /* push $rax */
1381 /* movq %xmmN, (%rsp) */
1382 o(0xd60f66);
1383 o(0x04 + REG_VALUE(r)*8);
1384 o(0x24);
1385 args_size += size;
1386 break;
1388 default:
1389 /* simple type */
1390 /* XXX: implicit cast ? */
1391 --gen_reg;
1392 r = gv(RC_INT);
1393 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1394 args_size += size;
1395 break;
1398 /* And swap the argument back to its original position. */
1399 tmp = vtop[0];
1400 vtop[0] = vtop[-i];
1401 vtop[-i] = tmp;
1403 vrotb(i+1);
1404 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1405 vpop();
1406 memmove(reg_args + i, reg_args + i + 1, (nb_args - i - 1) * sizeof *reg_args);
1407 --nb_args;
1408 --run_end;
1411 /* handle 16 byte aligned arguments at end of run */
1412 run_start = i = run_end;
1413 while (i < nb_args) {
1414 /* Rotate argument to top since it will always be popped */
1415 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, NULL);
1416 if (align != 16)
1417 break;
1419 vrotb(i+1);
1421 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1422 gv(RC_ST0);
1423 oad(0xec8148, size); /* sub $xxx, %rsp */
1424 o(0x7cdb); /* fstpt 0(%rsp) */
1425 g(0x24);
1426 g(0x00);
1427 args_size += size;
1428 } else {
1429 assert(mode == x86_64_mode_memory);
1431 /* allocate the necessary size on stack */
1432 o(0x48);
1433 oad(0xec81, size); /* sub $xxx, %rsp */
1434 /* generate structure store */
1435 r = get_reg(RC_INT);
1436 orex(1, r, 0, 0x89); /* mov %rsp, r */
1437 o(0xe0 + REG_VALUE(r));
1438 vset(&vtop->type, r | VT_LVAL, 0);
1439 vswap();
1440 vstore();
1441 args_size += size;
1444 vpop();
1445 memmove(reg_args + i, reg_args + i + 1, (nb_args - i - 1) * sizeof *reg_args);
1446 --nb_args;
1450 /* XXX This should be superfluous. */
1451 save_regs(0); /* save used temporary registers */
1453 /* recalculate the number of register arguments there actually
1454 * are. This is slow but more obviously correct than using the
1455 * old counts. */
1456 gen_reg = 0;
1457 sse_reg = 0;
1458 for(i = 0; i < nb_args; i++) {
1459 gen_reg += regargs_iregs(&reg_args[i]);
1460 sse_reg += regargs_fregs(&reg_args[i]);
1463 /* then, we prepare register passing arguments.
1464 Note that we cannot set RDX and RCX in this loop because gv()
1465 may break these temporary registers. Let's use R10 and R11
1466 instead of them */
1467 assert(gen_reg <= REGN);
1468 assert(sse_reg <= 8);
1469 for(i = 0; i < nb_args; i++) {
1470 RegArgs args;
1472 args = reg_args[i];
1474 /* Alter stack entry type so that gv() knows how to treat it */
1475 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
1476 int k;
1478 for(k=REG_ARGS_MAX-1; k>=0; k--) {
1479 if (args.freg[k] == -1)
1480 continue;
1482 sse_reg--;
1483 assert(sse_reg >= 0);
1485 vdup();
1486 vtop->type.t = VT_DOUBLE;
1487 vtop->c.ull += args.freg[k];
1488 gv(RC_XMM0 << sse_reg);
1489 vpop();
1491 for(k=REG_ARGS_MAX-1; k>=0; k--) {
1492 int d;
1493 if (args.ireg[k] == -1)
1494 continue;
1496 gen_reg--;
1498 vdup();
1499 vtop->type.t = VT_LLONG;
1500 vtop->c.ull += args.ireg[k];
1501 r = gv(RC_INT);
1502 d = arg_prepare_reg(gen_reg);
1503 orex(1,d,r,0x89); /* mov */
1504 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1505 vpop();
1507 } else {
1508 /* XXX is it really necessary to set vtop->type? */
1509 classify_x86_64_arg(&vtop->type, &type, &size, &align, NULL);
1510 vtop->type = type;
1511 if (args.freg[0] != -1) {
1512 --sse_reg;
1513 /* Load directly to register */
1514 gv(RC_XMM0 << sse_reg);
1515 } else if (args.ireg[0] != -1) {
1516 int d;
1517 /* simple type */
1518 /* XXX: implicit cast ? */
1519 gen_reg--;
1520 r = gv(RC_INT);
1521 d = arg_prepare_reg(gen_reg);
1522 orex(1,d,r,0x89); /* mov */
1523 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1524 } else {
1525 assert(0);
1528 vtop--;
1530 assert(gen_reg == 0);
1531 assert(sse_reg == 0);
1533 /* We shouldn't have many operands on the stack anymore, but the
1534 call address itself is still there, and it might be in %eax
1535 (or edx/ecx) currently, which the below writes would clobber.
1536 So evict all remaining operands here. */
1537 save_regs(0);
1539 /* Copy R10 and R11 into RDX and RCX, respectively */
1540 if (nb_reg_args > 2) {
1541 o(0xd2894c); /* mov %r10, %rdx */
1542 if (nb_reg_args > 3) {
1543 o(0xd9894c); /* mov %r11, %rcx */
1547 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1548 gcall_or_jmp(0);
1549 if (args_size)
1550 gadd_sp(args_size);
1551 vtop--;
1555 #define FUNC_PROLOG_SIZE 11
1557 static void push_arg_reg(int i) {
1558 loc -= 8;
1559 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1562 /* generate function prolog of type 't' */
1563 void gfunc_prolog(CType *func_type)
1565 X86_64_Mode mode;
1566 int i, addr, align, size;
1567 int param_addr = 0, reg_param_index, sse_param_index;
1568 Sym *sym;
1569 CType *type;
1571 sym = func_type->ref;
1572 addr = PTR_SIZE * 2;
1573 loc = 0;
1574 ind += FUNC_PROLOG_SIZE;
1575 func_sub_sp_offset = ind;
1576 func_ret_sub = 0;
1578 if (func_type->ref->c == FUNC_ELLIPSIS) {
1579 int seen_reg_num, seen_sse_num, seen_stack_size;
1580 seen_reg_num = seen_sse_num = 0;
1581 /* frame pointer and return address */
1582 seen_stack_size = PTR_SIZE * 2;
1583 /* count the number of seen parameters */
1584 sym = func_type->ref;
1585 while ((sym = sym->next) != NULL) {
1586 RegArgs args;
1588 type = &sym->type;
1589 mode = classify_x86_64_arg(type, NULL, &size, &align, &args);
1591 switch (mode) {
1592 default:
1593 stack_arg:
1594 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1595 break;
1597 case x86_64_mode_integer:
1598 case x86_64_mode_sse: {
1599 int stack = 0;
1601 seen_sse_num += regargs_fregs(&args);
1602 seen_reg_num += regargs_iregs(&args);
1604 if (seen_reg_num > 8) {
1605 seen_reg_num = 8;
1606 stack = 1;
1608 if (seen_sse_num > 8) {
1609 seen_sse_num = 8;
1610 stack = 1;
1613 if (stack)
1614 goto stack_arg;
1615 break;
1620 loc -= 16;
1621 /* movl $0x????????, -0x10(%rbp) */
1622 o(0xf045c7);
1623 gen_le32(seen_reg_num * 8);
1624 /* movl $0x????????, -0xc(%rbp) */
1625 o(0xf445c7);
1626 gen_le32(seen_sse_num * 16 + 48);
1627 /* movl $0x????????, -0x8(%rbp) */
1628 o(0xf845c7);
1629 gen_le32(seen_stack_size);
1631 /* save all register passing arguments */
1632 for (i = 0; i < 8; i++) {
1633 loc -= 16;
1634 o(0xd60f66); /* movq */
1635 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1636 /* movq $0, loc+8(%rbp) */
1637 o(0x85c748);
1638 gen_le32(loc + 8);
1639 gen_le32(0);
1641 for (i = 0; i < REGN; i++) {
1642 push_arg_reg(REGN-1-i);
1646 sym = func_type->ref;
1647 reg_param_index = 0;
1648 sse_param_index = 0;
1650 /* if the function returns a structure, then add an
1651 implicit pointer parameter */
1652 func_vt = sym->type;
1653 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, NULL);
1654 if (mode == x86_64_mode_memory) {
1655 push_arg_reg(reg_param_index);
1656 func_vc = loc;
1657 reg_param_index++;
1659 /* define parameters */
1660 while ((sym = sym->next) != NULL) {
1661 RegArgs args;
1662 int reg_count_integer = 0;
1663 int reg_count_sse = 0;
1664 int arg_stored = 1;
1666 type = &sym->type;
1667 mode = classify_x86_64_arg(type, NULL, &size, &align, &args);
1668 reg_count_integer = regargs_iregs(&args);
1669 reg_count_sse = regargs_fregs(&args);
1671 switch (mode) {
1672 case x86_64_mode_integer:
1673 case x86_64_mode_sse:
1674 if (reg_count_integer || reg_count_sse) {
1675 if ((reg_count_sse == 0 || sse_param_index + reg_count_sse <= 8) &&
1676 (reg_count_integer == 0 || reg_param_index + reg_count_integer <= REGN)) {
1677 /* argument fits into registers */
1678 arg_stored = 0;
1682 if (!arg_stored) {
1683 /* save arguments passed by register */
1684 loc -= (reg_count_sse + reg_count_integer) * 8;
1685 param_addr = loc;
1686 for (i = 0; i < reg_count_sse; ++i) {
1687 o(0xd60f66); /* movq */
1688 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + args.freg[i]);
1689 ++sse_param_index;
1691 for (i = 0; i < reg_count_integer; ++i) {
1692 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + args.ireg[i]);
1693 ++reg_param_index;
1695 } else {
1696 addr = (addr + align - 1) & -align;
1697 param_addr = addr;
1698 addr += size;
1700 break;
1702 case x86_64_mode_memory:
1703 case x86_64_mode_x87:
1704 addr = (addr + align - 1) & -align;
1705 param_addr = addr;
1706 addr += size;
1707 break;
1708 default: break; /* nothing to be done for x86_64_mode_none */
1710 sym_push(sym->v & ~SYM_FIELD, type,
1711 VT_LOCAL | VT_LVAL, param_addr);
1714 #ifdef CONFIG_TCC_BCHECK
1715 /* leave some room for bound checking code */
1716 if (tcc_state->do_bounds_check) {
1717 func_bound_offset = lbounds_section->data_offset;
1718 func_bound_ind = ind;
1719 oad(0xb8, 0); /* lbound section pointer */
1720 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1721 oad(0xb8, 0); /* call to function */
1723 #endif
1726 /* generate function epilog */
1727 void gfunc_epilog(void)
1729 int v, saved_ind;
1731 #ifdef CONFIG_TCC_BCHECK
1732 if (tcc_state->do_bounds_check
1733 && func_bound_offset != lbounds_section->data_offset)
1735 addr_t saved_ind;
1736 addr_t *bounds_ptr;
1737 Sym *sym_data;
1739 /* add end of table info */
1740 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
1741 *bounds_ptr = 0;
1743 /* generate bound local allocation */
1744 sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
1745 func_bound_offset, lbounds_section->data_offset);
1746 saved_ind = ind;
1747 ind = func_bound_ind;
1748 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1749 ind = ind + 5 + 3;
1750 gen_static_call(TOK___bound_local_new);
1751 ind = saved_ind;
1753 /* generate bound check local freeing */
1754 o(0x5250); /* save returned value, if any */
1755 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1756 oad(0xb8, 0); /* mov xxx, %rax */
1757 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1758 gen_static_call(TOK___bound_local_delete);
1759 o(0x585a); /* restore returned value, if any */
1761 #endif
1762 o(0xc9); /* leave */
1763 if (func_ret_sub == 0) {
1764 o(0xc3); /* ret */
1765 } else {
1766 o(0xc2); /* ret n */
1767 g(func_ret_sub);
1768 g(func_ret_sub >> 8);
1770 /* align local size to word & save local variables */
1771 v = (-loc + 15) & -16;
1772 saved_ind = ind;
1773 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1774 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1775 o(0xec8148); /* sub rsp, stacksize */
1776 gen_le32(v);
1777 ind = saved_ind;
1780 #endif /* not PE */
1782 /* generate a jump to a label */
1783 int gjmp(int t)
1785 return psym(0xe9, t);
1788 /* generate a jump to a fixed address */
1789 void gjmp_addr(int a)
1791 int r;
1792 r = a - ind - 2;
1793 if (r == (char)r) {
1794 g(0xeb);
1795 g(r);
1796 } else {
1797 oad(0xe9, a - ind - 5);
1801 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1802 int gtst(int inv, int t)
1804 int v, *p;
1806 v = vtop->r & VT_VALMASK;
1807 if (v == VT_CMP) {
1808 /* fast case : can jump directly since flags are set */
1809 if (vtop->c.i & 0x100)
1811 /* This was a float compare. If the parity flag is set
1812 the result was unordered. For anything except != this
1813 means false and we don't jump (anding both conditions).
1814 For != this means true (oring both).
1815 Take care about inverting the test. We need to jump
1816 to our target if the result was unordered and test wasn't NE,
1817 otherwise if unordered we don't want to jump. */
1818 vtop->c.i &= ~0x100;
1819 if (!inv == (vtop->c.i != TOK_NE))
1820 o(0x067a); /* jp +6 */
1821 else
1823 g(0x0f);
1824 t = psym(0x8a, t); /* jp t */
1827 g(0x0f);
1828 t = psym((vtop->c.i - 16) ^ inv, t);
1829 } else if (v == VT_JMP || v == VT_JMPI) {
1830 /* && or || optimization */
1831 if ((v & 1) == inv) {
1832 /* insert vtop->c jump list in t */
1833 p = &vtop->c.i;
1834 while (*p != 0)
1835 p = (int *)(cur_text_section->data + *p);
1836 *p = t;
1837 t = vtop->c.i;
1838 } else {
1839 t = gjmp(t);
1840 gsym(vtop->c.i);
1843 vtop--;
1844 return t;
1847 /* generate an integer binary operation */
1848 void gen_opi(int op)
1850 int r, fr, opc, c;
1851 int ll, uu, cc;
1853 ll = is64_type(vtop[-1].type.t);
1854 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1855 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1857 switch(op) {
1858 case '+':
1859 case TOK_ADDC1: /* add with carry generation */
1860 opc = 0;
1861 gen_op8:
1862 if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) {
1863 /* constant case */
1864 vswap();
1865 r = gv(RC_INT);
1866 vswap();
1867 c = vtop->c.i;
1868 if (c == (char)c) {
1869 /* XXX: generate inc and dec for smaller code ? */
1870 orex(ll, r, 0, 0x83);
1871 o(0xc0 | (opc << 3) | REG_VALUE(r));
1872 g(c);
1873 } else {
1874 orex(ll, r, 0, 0x81);
1875 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1877 } else {
1878 gv2(RC_INT, RC_INT);
1879 r = vtop[-1].r;
1880 fr = vtop[0].r;
1881 orex(ll, r, fr, (opc << 3) | 0x01);
1882 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1884 vtop--;
1885 if (op >= TOK_ULT && op <= TOK_GT) {
1886 vtop->r = VT_CMP;
1887 vtop->c.i = op;
1889 break;
1890 case '-':
1891 case TOK_SUBC1: /* sub with carry generation */
1892 opc = 5;
1893 goto gen_op8;
1894 case TOK_ADDC2: /* add with carry use */
1895 opc = 2;
1896 goto gen_op8;
1897 case TOK_SUBC2: /* sub with carry use */
1898 opc = 3;
1899 goto gen_op8;
1900 case '&':
1901 opc = 4;
1902 goto gen_op8;
1903 case '^':
1904 opc = 6;
1905 goto gen_op8;
1906 case '|':
1907 opc = 1;
1908 goto gen_op8;
1909 case '*':
1910 gv2(RC_INT, RC_INT);
1911 r = vtop[-1].r;
1912 fr = vtop[0].r;
1913 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1914 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1915 vtop--;
1916 break;
1917 case TOK_SHL:
1918 opc = 4;
1919 goto gen_shift;
1920 case TOK_SHR:
1921 opc = 5;
1922 goto gen_shift;
1923 case TOK_SAR:
1924 opc = 7;
1925 gen_shift:
1926 opc = 0xc0 | (opc << 3);
1927 if (cc) {
1928 /* constant case */
1929 vswap();
1930 r = gv(RC_INT);
1931 vswap();
1932 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1933 o(opc | REG_VALUE(r));
1934 g(vtop->c.i & (ll ? 63 : 31));
1935 } else {
1936 /* we generate the shift in ecx */
1937 gv2(RC_INT, RC_RCX);
1938 r = vtop[-1].r;
1939 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1940 o(opc | REG_VALUE(r));
1942 vtop--;
1943 break;
1944 case TOK_UDIV:
1945 case TOK_UMOD:
1946 uu = 1;
1947 goto divmod;
1948 case '/':
1949 case '%':
1950 case TOK_PDIV:
1951 uu = 0;
1952 divmod:
1953 /* first operand must be in eax */
1954 /* XXX: need better constraint for second operand */
1955 gv2(RC_RAX, RC_RCX);
1956 r = vtop[-1].r;
1957 fr = vtop[0].r;
1958 vtop--;
1959 save_reg(TREG_RDX);
1960 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1961 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1962 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1963 if (op == '%' || op == TOK_UMOD)
1964 r = TREG_RDX;
1965 else
1966 r = TREG_RAX;
1967 vtop->r = r;
1968 break;
1969 default:
1970 opc = 7;
1971 goto gen_op8;
1975 void gen_opl(int op)
1977 gen_opi(op);
1980 /* generate a floating point operation 'v = t1 op t2' instruction. The
1981 two operands are guaranted to have the same floating point type */
1982 /* XXX: need to use ST1 too */
1983 void gen_opf(int op)
1985 int a, ft, fc, swapped, r;
1986 int float_type =
1987 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1989 /* convert constants to memory references */
1990 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1991 vswap();
1992 gv(float_type);
1993 vswap();
1995 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1996 gv(float_type);
1998 /* must put at least one value in the floating point register */
1999 if ((vtop[-1].r & VT_LVAL) &&
2000 (vtop[0].r & VT_LVAL)) {
2001 vswap();
2002 gv(float_type);
2003 vswap();
2005 swapped = 0;
2006 /* swap the stack if needed so that t1 is the register and t2 is
2007 the memory reference */
2008 if (vtop[-1].r & VT_LVAL) {
2009 vswap();
2010 swapped = 1;
2012 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
2013 if (op >= TOK_ULT && op <= TOK_GT) {
2014 /* load on stack second operand */
2015 load(TREG_ST0, vtop);
2016 save_reg(TREG_RAX); /* eax is used by FP comparison code */
2017 if (op == TOK_GE || op == TOK_GT)
2018 swapped = !swapped;
2019 else if (op == TOK_EQ || op == TOK_NE)
2020 swapped = 0;
2021 if (swapped)
2022 o(0xc9d9); /* fxch %st(1) */
2023 if (op == TOK_EQ || op == TOK_NE)
2024 o(0xe9da); /* fucompp */
2025 else
2026 o(0xd9de); /* fcompp */
2027 o(0xe0df); /* fnstsw %ax */
2028 if (op == TOK_EQ) {
2029 o(0x45e480); /* and $0x45, %ah */
2030 o(0x40fC80); /* cmp $0x40, %ah */
2031 } else if (op == TOK_NE) {
2032 o(0x45e480); /* and $0x45, %ah */
2033 o(0x40f480); /* xor $0x40, %ah */
2034 op = TOK_NE;
2035 } else if (op == TOK_GE || op == TOK_LE) {
2036 o(0x05c4f6); /* test $0x05, %ah */
2037 op = TOK_EQ;
2038 } else {
2039 o(0x45c4f6); /* test $0x45, %ah */
2040 op = TOK_EQ;
2042 vtop--;
2043 vtop->r = VT_CMP;
2044 vtop->c.i = op;
2045 } else {
2046 /* no memory reference possible for long double operations */
2047 load(TREG_ST0, vtop);
2048 swapped = !swapped;
2050 switch(op) {
2051 default:
2052 case '+':
2053 a = 0;
2054 break;
2055 case '-':
2056 a = 4;
2057 if (swapped)
2058 a++;
2059 break;
2060 case '*':
2061 a = 1;
2062 break;
2063 case '/':
2064 a = 6;
2065 if (swapped)
2066 a++;
2067 break;
2069 ft = vtop->type.t;
2070 fc = vtop->c.ul;
2071 o(0xde); /* fxxxp %st, %st(1) */
2072 o(0xc1 + (a << 3));
2073 vtop--;
2075 } else {
2076 if (op >= TOK_ULT && op <= TOK_GT) {
2077 /* if saved lvalue, then we must reload it */
2078 r = vtop->r;
2079 fc = vtop->c.ul;
2080 if ((r & VT_VALMASK) == VT_LLOCAL) {
2081 SValue v1;
2082 r = get_reg(RC_INT);
2083 v1.type.t = VT_PTR;
2084 v1.r = VT_LOCAL | VT_LVAL;
2085 v1.c.ul = fc;
2086 load(r, &v1);
2087 fc = 0;
2090 if (op == TOK_EQ || op == TOK_NE) {
2091 swapped = 0;
2092 } else {
2093 if (op == TOK_LE || op == TOK_LT)
2094 swapped = !swapped;
2095 if (op == TOK_LE || op == TOK_GE) {
2096 op = 0x93; /* setae */
2097 } else {
2098 op = 0x97; /* seta */
2102 if (swapped) {
2103 gv(RC_FLOAT);
2104 vswap();
2106 assert(!(vtop[-1].r & VT_LVAL));
2108 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
2109 o(0x66);
2110 if (op == TOK_EQ || op == TOK_NE)
2111 o(0x2e0f); /* ucomisd */
2112 else
2113 o(0x2f0f); /* comisd */
2115 if (vtop->r & VT_LVAL) {
2116 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2117 } else {
2118 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2121 vtop--;
2122 vtop->r = VT_CMP;
2123 vtop->c.i = op | 0x100;
2124 } else {
2125 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
2126 switch(op) {
2127 default:
2128 case '+':
2129 a = 0;
2130 break;
2131 case '-':
2132 a = 4;
2133 break;
2134 case '*':
2135 a = 1;
2136 break;
2137 case '/':
2138 a = 6;
2139 break;
2141 ft = vtop->type.t;
2142 fc = vtop->c.ul;
2143 assert((ft & VT_BTYPE) != VT_LDOUBLE);
2145 r = vtop->r;
2146 /* if saved lvalue, then we must reload it */
2147 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2148 SValue v1;
2149 r = get_reg(RC_INT);
2150 v1.type.t = VT_PTR;
2151 v1.r = VT_LOCAL | VT_LVAL;
2152 v1.c.ul = fc;
2153 load(r, &v1);
2154 fc = 0;
2157 assert(!(vtop[-1].r & VT_LVAL));
2158 if (swapped) {
2159 assert(vtop->r & VT_LVAL);
2160 gv(RC_FLOAT);
2161 vswap();
2164 if ((ft & VT_BTYPE) == VT_DOUBLE) {
2165 o(0xf2);
2166 } else {
2167 o(0xf3);
2169 o(0x0f);
2170 o(0x58 + a);
2172 if (vtop->r & VT_LVAL) {
2173 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2174 } else {
2175 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2178 vtop--;
2183 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2184 and 'long long' cases. */
2185 void gen_cvt_itof(int t)
2187 if ((t & VT_BTYPE) == VT_LDOUBLE) {
2188 save_reg(TREG_ST0);
2189 gv(RC_INT);
2190 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2191 /* signed long long to float/double/long double (unsigned case
2192 is handled generically) */
2193 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2194 o(0x242cdf); /* fildll (%rsp) */
2195 o(0x08c48348); /* add $8, %rsp */
2196 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2197 (VT_INT | VT_UNSIGNED)) {
2198 /* unsigned int to float/double/long double */
2199 o(0x6a); /* push $0 */
2200 g(0x00);
2201 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2202 o(0x242cdf); /* fildll (%rsp) */
2203 o(0x10c48348); /* add $16, %rsp */
2204 } else {
2205 /* int to float/double/long double */
2206 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2207 o(0x2404db); /* fildl (%rsp) */
2208 o(0x08c48348); /* add $8, %rsp */
2210 vtop->r = TREG_ST0;
2211 } else {
2212 int r = get_reg(RC_FLOAT);
2213 gv(RC_INT);
2214 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2215 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2216 (VT_INT | VT_UNSIGNED) ||
2217 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2218 o(0x48); /* REX */
2220 o(0x2a0f);
2221 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2222 vtop->r = r;
2226 /* convert from one floating point type to another */
2227 void gen_cvt_ftof(int t)
2229 int ft, bt, tbt;
2231 ft = vtop->type.t;
2232 bt = ft & VT_BTYPE;
2233 tbt = t & VT_BTYPE;
2235 if (bt == VT_FLOAT) {
2236 gv(RC_FLOAT);
2237 if (tbt == VT_DOUBLE) {
2238 o(0x140f); /* unpcklps */
2239 o(0xc0 + REG_VALUE(vtop->r)*9);
2240 o(0x5a0f); /* cvtps2pd */
2241 o(0xc0 + REG_VALUE(vtop->r)*9);
2242 } else if (tbt == VT_LDOUBLE) {
2243 save_reg(RC_ST0);
2244 /* movss %xmm0,-0x10(%rsp) */
2245 o(0x110ff3);
2246 o(0x44 + REG_VALUE(vtop->r)*8);
2247 o(0xf024);
2248 o(0xf02444d9); /* flds -0x10(%rsp) */
2249 vtop->r = TREG_ST0;
2251 } else if (bt == VT_DOUBLE) {
2252 gv(RC_FLOAT);
2253 if (tbt == VT_FLOAT) {
2254 o(0x140f66); /* unpcklpd */
2255 o(0xc0 + REG_VALUE(vtop->r)*9);
2256 o(0x5a0f66); /* cvtpd2ps */
2257 o(0xc0 + REG_VALUE(vtop->r)*9);
2258 } else if (tbt == VT_LDOUBLE) {
2259 save_reg(RC_ST0);
2260 /* movsd %xmm0,-0x10(%rsp) */
2261 o(0x110ff2);
2262 o(0x44 + REG_VALUE(vtop->r)*8);
2263 o(0xf024);
2264 o(0xf02444dd); /* fldl -0x10(%rsp) */
2265 vtop->r = TREG_ST0;
2267 } else {
2268 int r;
2269 gv(RC_ST0);
2270 r = get_reg(RC_FLOAT);
2271 if (tbt == VT_DOUBLE) {
2272 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2273 /* movsd -0x10(%rsp),%xmm0 */
2274 o(0x100ff2);
2275 o(0x44 + REG_VALUE(r)*8);
2276 o(0xf024);
2277 vtop->r = r;
2278 } else if (tbt == VT_FLOAT) {
2279 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2280 /* movss -0x10(%rsp),%xmm0 */
2281 o(0x100ff3);
2282 o(0x44 + REG_VALUE(r)*8);
2283 o(0xf024);
2284 vtop->r = r;
2289 /* convert fp to int 't' type */
2290 void gen_cvt_ftoi(int t)
2292 int ft, bt, size, r;
2293 ft = vtop->type.t;
2294 bt = ft & VT_BTYPE;
2295 if (bt == VT_LDOUBLE) {
2296 gen_cvt_ftof(VT_DOUBLE);
2297 bt = VT_DOUBLE;
2300 gv(RC_FLOAT);
2301 if (t != VT_INT)
2302 size = 8;
2303 else
2304 size = 4;
2306 r = get_reg(RC_INT);
2307 if (bt == VT_FLOAT) {
2308 o(0xf3);
2309 } else if (bt == VT_DOUBLE) {
2310 o(0xf2);
2311 } else {
2312 assert(0);
2314 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2315 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2316 vtop->r = r;
2319 /* computed goto support */
2320 void ggoto(void)
2322 gcall_or_jmp(1);
2323 vtop--;
2326 /* Save the stack pointer onto the stack and return the location of its address */
2327 ST_FUNC void gen_vla_sp_save(int addr) {
2328 /* mov %rsp,addr(%rbp)*/
2329 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2332 /* Restore the SP from a location on the stack */
2333 ST_FUNC void gen_vla_sp_restore(int addr) {
2334 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2337 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2338 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2339 #ifdef TCC_TARGET_PE
2340 /* alloca does more than just adjust %rsp on Windows */
2341 vpush_global_sym(&func_old_type, TOK_alloca);
2342 vswap(); /* Move alloca ref past allocation size */
2343 gfunc_call(1);
2344 vset(type, REG_IRET, 0);
2345 #else
2346 int r = gv(RC_INT); /* allocation size */
2347 /* sub r,%rsp */
2348 o(0x2b48);
2349 o(0xe0 | REG_VALUE(r));
2350 /* We align to 16 bytes rather than align */
2351 /* and ~15, %rsp */
2352 o(0xf0e48348);
2353 vpop();
2354 #endif
2358 /* end of x86-64 code generator */
2359 /*************************************************************/
2360 #endif /* ! TARGET_DEFS_ONLY */
2361 /******************************************************/