macos: add tcc version to libtcc.dylib. It can be shown with 'otool -L'.
[tinycc.git] / x86_64-gen.c
blob208669f1d565c7a0e86ef71b56fcb56d5be0b152
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 16
28 #define CONFIG_TCC_ASM
30 /* a register can belong to several classes. The classes must be
31 sorted from more general to more precise (see gv2() code which does
32 assumptions on it). */
33 #define RC_INT 0x0001 /* generic integer register */
34 #define RC_FLOAT 0x0002 /* generic float register */
35 #define RC_RAX 0x0004
36 #define RC_RCX 0x0008
37 #define RC_RDX 0x0010
38 #define RC_ST0 0x0080 /* only for long double */
39 #define RC_R8 0x0100
40 #define RC_R9 0x0200
41 #define RC_R10 0x0400
42 #define RC_R11 0x0800
43 #define RC_XMM0 0x1000
44 #define RC_XMM1 0x2000
45 #define RC_XMM2 0x4000
46 #define RC_XMM3 0x8000
47 #define RC_XMM4 0x10000
48 #define RC_XMM5 0x20000
49 #define RC_XMM6 0x40000
50 #define RC_XMM7 0x80000
51 #define RC_IRET RC_RAX /* function return: integer register */
52 #define RC_IRE2 RC_RDX /* function return: second integer register */
53 #define RC_FRET RC_XMM0 /* function return: float register */
54 #define RC_FRE2 RC_XMM1 /* function return: second float register */
56 /* pretty names for the registers */
57 enum {
58 TREG_RAX = 0,
59 TREG_RCX = 1,
60 TREG_RDX = 2,
61 TREG_RSP = 4,
62 TREG_RSI = 6,
63 TREG_RDI = 7,
65 TREG_R8 = 8,
66 TREG_R9 = 9,
67 TREG_R10 = 10,
68 TREG_R11 = 11,
70 TREG_XMM0 = 16,
71 TREG_XMM1 = 17,
72 TREG_XMM2 = 18,
73 TREG_XMM3 = 19,
74 TREG_XMM4 = 20,
75 TREG_XMM5 = 21,
76 TREG_XMM6 = 22,
77 TREG_XMM7 = 23,
79 TREG_ST0 = 24,
81 TREG_MEM = 0x20
84 #define REX_BASE(reg) (((reg) >> 3) & 1)
85 #define REG_VALUE(reg) ((reg) & 7)
87 /* return registers for function */
88 #define REG_IRET TREG_RAX /* single word int return register */
89 #define REG_IRE2 TREG_RDX /* second word return register (for long long) */
90 #define REG_FRET TREG_XMM0 /* float return register */
91 #define REG_FRE2 TREG_XMM1 /* second float return register */
93 /* defined if function parameters must be evaluated in reverse order */
94 #define INVERT_FUNC_PARAMS
96 /* pointer size, in bytes */
97 #define PTR_SIZE 8
99 /* long double size and alignment, in bytes */
100 #define LDOUBLE_SIZE 16
101 #define LDOUBLE_ALIGN 16
102 /* maximum alignment (for aligned attribute support) */
103 #define MAX_ALIGN 16
105 /* define if return values need to be extended explicitely
106 at caller side (for interfacing with non-TCC compilers) */
107 #define PROMOTE_RET
108 /******************************************************/
109 #else /* ! TARGET_DEFS_ONLY */
110 /******************************************************/
111 #define USING_GLOBALS
112 #include "tcc.h"
113 #include <assert.h>
115 ST_DATA const int reg_classes[NB_REGS] = {
116 /* eax */ RC_INT | RC_RAX,
117 /* ecx */ RC_INT | RC_RCX,
118 /* edx */ RC_INT | RC_RDX,
124 RC_R8,
125 RC_R9,
126 RC_R10,
127 RC_R11,
132 /* xmm0 */ RC_FLOAT | RC_XMM0,
133 /* xmm1 */ RC_FLOAT | RC_XMM1,
134 /* xmm2 */ RC_FLOAT | RC_XMM2,
135 /* xmm3 */ RC_FLOAT | RC_XMM3,
136 /* xmm4 */ RC_FLOAT | RC_XMM4,
137 /* xmm5 */ RC_FLOAT | RC_XMM5,
138 /* xmm6 an xmm7 are included so gv() can be used on them,
139 but they are not tagged with RC_FLOAT because they are
140 callee saved on Windows */
141 RC_XMM6,
142 RC_XMM7,
143 /* st0 */ RC_ST0
146 static unsigned long func_sub_sp_offset;
147 static int func_ret_sub;
149 #if defined(CONFIG_TCC_BCHECK)
150 static addr_t func_bound_offset;
151 static unsigned long func_bound_ind;
152 ST_DATA int func_bound_add_epilog;
153 #endif
155 #ifdef TCC_TARGET_PE
156 static int func_scratch, func_alloca;
157 #endif
159 /* XXX: make it faster ? */
160 ST_FUNC void g(int c)
162 int ind1;
163 if (nocode_wanted)
164 return;
165 ind1 = ind + 1;
166 if (ind1 > cur_text_section->data_allocated)
167 section_realloc(cur_text_section, ind1);
168 cur_text_section->data[ind] = c;
169 ind = ind1;
172 ST_FUNC void o(unsigned int c)
174 while (c) {
175 g(c);
176 c = c >> 8;
180 ST_FUNC void gen_le16(int v)
182 g(v);
183 g(v >> 8);
186 ST_FUNC void gen_le32(int c)
188 g(c);
189 g(c >> 8);
190 g(c >> 16);
191 g(c >> 24);
194 ST_FUNC void gen_le64(int64_t c)
196 g(c);
197 g(c >> 8);
198 g(c >> 16);
199 g(c >> 24);
200 g(c >> 32);
201 g(c >> 40);
202 g(c >> 48);
203 g(c >> 56);
206 static void orex(int ll, int r, int r2, int b)
208 if ((r & VT_VALMASK) >= VT_CONST)
209 r = 0;
210 if ((r2 & VT_VALMASK) >= VT_CONST)
211 r2 = 0;
212 if (ll || REX_BASE(r) || REX_BASE(r2))
213 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
214 o(b);
217 /* output a symbol and patch all calls to it */
218 ST_FUNC void gsym_addr(int t, int a)
220 while (t) {
221 unsigned char *ptr = cur_text_section->data + t;
222 uint32_t n = read32le(ptr); /* next value */
223 write32le(ptr, a < 0 ? -a : a - t - 4);
224 t = n;
228 static int is64_type(int t)
230 return ((t & VT_BTYPE) == VT_PTR ||
231 (t & VT_BTYPE) == VT_FUNC ||
232 (t & VT_BTYPE) == VT_LLONG);
235 /* instruction + 4 bytes data. Return the address of the data */
236 static int oad(int c, int s)
238 int t;
239 if (nocode_wanted)
240 return s;
241 o(c);
242 t = ind;
243 gen_le32(s);
244 return t;
247 /* generate jmp to a label */
248 #define gjmp2(instr,lbl) oad(instr,lbl)
250 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
252 if (r & VT_SYM)
253 greloca(cur_text_section, sym, ind, R_X86_64_32S, c), c=0;
254 gen_le32(c);
257 /* output constant with relocation if 'r & VT_SYM' is true */
258 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
260 if (r & VT_SYM)
261 greloca(cur_text_section, sym, ind, R_X86_64_64, c), c=0;
262 gen_le64(c);
265 /* output constant with relocation if 'r & VT_SYM' is true */
266 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
268 if (r & VT_SYM)
269 greloca(cur_text_section, sym, ind, R_X86_64_PC32, c-4), c=4;
270 gen_le32(c-4);
273 /* output got address with relocation */
274 static void gen_gotpcrel(int r, Sym *sym, int c)
276 #ifdef TCC_TARGET_PE
277 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
278 get_tok_str(sym->v, NULL), c, r,
279 cur_text_section->data[ind-3],
280 cur_text_section->data[ind-2],
281 cur_text_section->data[ind-1]
283 #endif
284 greloca(cur_text_section, sym, ind, R_X86_64_GOTPCREL, -4);
285 gen_le32(0);
286 if (c) {
287 /* we use add c, %xxx for displacement */
288 orex(1, r, 0, 0x81);
289 o(0xc0 + REG_VALUE(r));
290 gen_le32(c);
294 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
296 op_reg = REG_VALUE(op_reg) << 3;
297 if ((r & VT_VALMASK) == VT_CONST) {
298 /* constant memory reference */
299 if (!(r & VT_SYM)) {
300 /* Absolute memory reference */
301 o(0x04 | op_reg); /* [sib] | destreg */
302 oad(0x25, c); /* disp32 */
303 } else {
304 o(0x05 | op_reg); /* (%rip)+disp32 | destreg */
305 if (is_got) {
306 gen_gotpcrel(r, sym, c);
307 } else {
308 gen_addrpc32(r, sym, c);
311 } else if ((r & VT_VALMASK) == VT_LOCAL) {
312 /* currently, we use only ebp as base */
313 if (c == (char)c) {
314 /* short reference */
315 o(0x45 | op_reg);
316 g(c);
317 } else {
318 oad(0x85 | op_reg, c);
320 } else if ((r & VT_VALMASK) >= TREG_MEM) {
321 if (c) {
322 g(0x80 | op_reg | REG_VALUE(r));
323 gen_le32(c);
324 } else {
325 g(0x00 | op_reg | REG_VALUE(r));
327 } else {
328 g(0x00 | op_reg | REG_VALUE(r));
332 /* generate a modrm reference. 'op_reg' contains the additional 3
333 opcode bits */
334 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
336 gen_modrm_impl(op_reg, r, sym, c, 0);
339 /* generate a modrm reference. 'op_reg' contains the additional 3
340 opcode bits */
341 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
343 int is_got;
344 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
345 orex(1, r, op_reg, opcode);
346 gen_modrm_impl(op_reg, r, sym, c, is_got);
350 /* load 'r' from value 'sv' */
351 void load(int r, SValue *sv)
353 int v, t, ft, fc, fr;
354 SValue v1;
356 #ifdef TCC_TARGET_PE
357 SValue v2;
358 sv = pe_getimport(sv, &v2);
359 #endif
361 fr = sv->r;
362 ft = sv->type.t & ~VT_DEFSIGN;
363 fc = sv->c.i;
364 if (fc != sv->c.i && (fr & VT_SYM))
365 tcc_error("64 bit addend in load");
367 ft &= ~(VT_VOLATILE | VT_CONSTANT);
369 #ifndef TCC_TARGET_PE
370 /* we use indirect access via got */
371 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
372 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
373 /* use the result register as a temporal register */
374 int tr = r | TREG_MEM;
375 if (is_float(ft)) {
376 /* we cannot use float registers as a temporal register */
377 tr = get_reg(RC_INT) | TREG_MEM;
379 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
381 /* load from the temporal register */
382 fr = tr | VT_LVAL;
384 #endif
386 v = fr & VT_VALMASK;
387 if (fr & VT_LVAL) {
388 int b, ll;
389 if (v == VT_LLOCAL) {
390 v1.type.t = VT_PTR;
391 v1.r = VT_LOCAL | VT_LVAL;
392 v1.c.i = fc;
393 fr = r;
394 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
395 fr = get_reg(RC_INT);
396 load(fr, &v1);
398 if (fc != sv->c.i) {
399 /* If the addends doesn't fit into a 32bit signed
400 we must use a 64bit move. We've checked above
401 that this doesn't have a sym associated. */
402 v1.type.t = VT_LLONG;
403 v1.r = VT_CONST;
404 v1.c.i = sv->c.i;
405 fr = r;
406 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
407 fr = get_reg(RC_INT);
408 load(fr, &v1);
409 fc = 0;
411 ll = 0;
412 /* Like GCC we can load from small enough properly sized
413 structs and unions as well.
414 XXX maybe move to generic operand handling, but should
415 occur only with asm, so tccasm.c might also be a better place */
416 if ((ft & VT_BTYPE) == VT_STRUCT) {
417 int align;
418 switch (type_size(&sv->type, &align)) {
419 case 1: ft = VT_BYTE; break;
420 case 2: ft = VT_SHORT; break;
421 case 4: ft = VT_INT; break;
422 case 8: ft = VT_LLONG; break;
423 default:
424 tcc_error("invalid aggregate type for register load");
425 break;
428 if ((ft & VT_BTYPE) == VT_FLOAT) {
429 b = 0x6e0f66;
430 r = REG_VALUE(r); /* movd */
431 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
432 b = 0x7e0ff3; /* movq */
433 r = REG_VALUE(r);
434 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
435 b = 0xdb, r = 5; /* fldt */
436 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
437 b = 0xbe0f; /* movsbl */
438 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
439 b = 0xb60f; /* movzbl */
440 } else if ((ft & VT_TYPE) == VT_SHORT) {
441 b = 0xbf0f; /* movswl */
442 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
443 b = 0xb70f; /* movzwl */
444 } else if ((ft & VT_TYPE) == (VT_VOID)) {
445 /* Can happen with zero size structs */
446 return;
447 } else {
448 assert(((ft & VT_BTYPE) == VT_INT)
449 || ((ft & VT_BTYPE) == VT_LLONG)
450 || ((ft & VT_BTYPE) == VT_PTR)
451 || ((ft & VT_BTYPE) == VT_FUNC)
453 ll = is64_type(ft);
454 b = 0x8b;
456 if (ll) {
457 gen_modrm64(b, r, fr, sv->sym, fc);
458 } else {
459 orex(ll, fr, r, b);
460 gen_modrm(r, fr, sv->sym, fc);
462 } else {
463 if (v == VT_CONST) {
464 if (fr & VT_SYM) {
465 #ifdef TCC_TARGET_PE
466 orex(1,0,r,0x8d);
467 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
468 gen_addrpc32(fr, sv->sym, fc);
469 #else
470 if (sv->sym->type.t & VT_STATIC) {
471 orex(1,0,r,0x8d);
472 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
473 gen_addrpc32(fr, sv->sym, fc);
474 } else {
475 orex(1,0,r,0x8b);
476 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
477 gen_gotpcrel(r, sv->sym, fc);
479 #endif
480 } else if (is64_type(ft)) {
481 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
482 gen_le64(sv->c.i);
483 } else {
484 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
485 gen_le32(fc);
487 } else if (v == VT_LOCAL) {
488 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
489 gen_modrm(r, VT_LOCAL, sv->sym, fc);
490 } else if (v == VT_CMP) {
491 if (fc & 0x100)
493 v = vtop->cmp_r;
494 fc &= ~0x100;
495 /* This was a float compare. If the parity bit is
496 set the result was unordered, meaning false for everything
497 except TOK_NE, and true for TOK_NE. */
498 orex(0, r, 0, 0xb0 + REG_VALUE(r)); /* mov $0/1,%al */
499 g(v ^ fc ^ (v == TOK_NE));
500 o(0x037a + (REX_BASE(r) << 8));
502 orex(0,r,0, 0x0f); /* setxx %br */
503 o(fc);
504 o(0xc0 + REG_VALUE(r));
505 orex(0,r,0, 0x0f);
506 o(0xc0b6 + REG_VALUE(r) * 0x900); /* movzbl %al, %eax */
507 } else if (v == VT_JMP || v == VT_JMPI) {
508 t = v & 1;
509 orex(0,r,0,0);
510 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
511 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
512 gsym(fc);
513 orex(0,r,0,0);
514 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
515 } else if (v != r) {
516 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
517 if (v == TREG_ST0) {
518 /* gen_cvt_ftof(VT_DOUBLE); */
519 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
520 /* movsd -0x10(%rsp),%xmmN */
521 o(0x100ff2);
522 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
523 o(0xf024);
524 } else {
525 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
526 if ((ft & VT_BTYPE) == VT_FLOAT) {
527 o(0x100ff3);
528 } else {
529 assert((ft & VT_BTYPE) == VT_DOUBLE);
530 o(0x100ff2);
532 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
534 } else if (r == TREG_ST0) {
535 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
536 /* gen_cvt_ftof(VT_LDOUBLE); */
537 /* movsd %xmmN,-0x10(%rsp) */
538 o(0x110ff2);
539 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
540 o(0xf024);
541 o(0xf02444dd); /* fldl -0x10(%rsp) */
542 } else {
543 orex(is64_type(ft), r, v, 0x89);
544 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
550 /* store register 'r' in lvalue 'v' */
551 void store(int r, SValue *v)
553 int fr, bt, ft, fc;
554 int op64 = 0;
555 /* store the REX prefix in this variable when PIC is enabled */
556 int pic = 0;
558 #ifdef TCC_TARGET_PE
559 SValue v2;
560 v = pe_getimport(v, &v2);
561 #endif
563 fr = v->r & VT_VALMASK;
564 ft = v->type.t;
565 fc = v->c.i;
566 if (fc != v->c.i && (fr & VT_SYM))
567 tcc_error("64 bit addend in store");
568 ft &= ~(VT_VOLATILE | VT_CONSTANT);
569 bt = ft & VT_BTYPE;
571 #ifndef TCC_TARGET_PE
572 /* we need to access the variable via got */
573 if (fr == VT_CONST && (v->r & VT_SYM)) {
574 /* mov xx(%rip), %r11 */
575 o(0x1d8b4c);
576 gen_gotpcrel(TREG_R11, v->sym, v->c.i);
577 pic = is64_type(bt) ? 0x49 : 0x41;
579 #endif
581 /* XXX: incorrect if float reg to reg */
582 if (bt == VT_FLOAT) {
583 o(0x66);
584 o(pic);
585 o(0x7e0f); /* movd */
586 r = REG_VALUE(r);
587 } else if (bt == VT_DOUBLE) {
588 o(0x66);
589 o(pic);
590 o(0xd60f); /* movq */
591 r = REG_VALUE(r);
592 } else if (bt == VT_LDOUBLE) {
593 o(0xc0d9); /* fld %st(0) */
594 o(pic);
595 o(0xdb); /* fstpt */
596 r = 7;
597 } else {
598 if (bt == VT_SHORT)
599 o(0x66);
600 o(pic);
601 if (bt == VT_BYTE || bt == VT_BOOL)
602 orex(0, 0, r, 0x88);
603 else if (is64_type(bt))
604 op64 = 0x89;
605 else
606 orex(0, 0, r, 0x89);
608 if (pic) {
609 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
610 if (op64)
611 o(op64);
612 o(3 + (r << 3));
613 } else if (op64) {
614 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
615 gen_modrm64(op64, r, v->r, v->sym, fc);
616 } else if (fr != r) {
617 orex(1, fr, r, op64);
618 o(0xc0 + fr + r * 8); /* mov r, fr */
620 } else {
621 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
622 gen_modrm(r, v->r, v->sym, fc);
623 } else if (fr != r) {
624 o(0xc0 + fr + r * 8); /* mov r, fr */
629 /* 'is_jmp' is '1' if it is a jump */
630 static void gcall_or_jmp(int is_jmp)
632 int r;
633 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
634 ((vtop->r & VT_SYM) && (vtop->c.i-4) == (int)(vtop->c.i-4))) {
635 /* constant symbolic case -> simple relocation */
636 #ifdef TCC_TARGET_PE
637 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32, (int)(vtop->c.i-4));
638 #else
639 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32, (int)(vtop->c.i-4));
640 #endif
641 oad(0xe8 + is_jmp, 0); /* call/jmp im */
642 } else {
643 /* otherwise, indirect call */
644 r = TREG_R11;
645 load(r, vtop);
646 o(0x41); /* REX */
647 o(0xff); /* call/jmp *r */
648 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
652 #if defined(CONFIG_TCC_BCHECK)
654 static void gen_bounds_call(int v)
656 Sym *sym = external_global_sym(v, &func_old_type);
657 oad(0xe8, 0);
658 #ifdef TCC_TARGET_PE
659 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
660 #else
661 greloca(cur_text_section, sym, ind-4, R_X86_64_PLT32, -4);
662 #endif
665 /* generate a bounded pointer addition */
666 ST_FUNC void gen_bounded_ptr_add(void)
668 vpush_global_sym(&func_old_type, TOK___bound_ptr_add);
669 vrott(3);
670 gfunc_call(2);
671 vpushi(0);
672 /* returned pointer is in rax */
673 vtop->r = TREG_RAX | VT_BOUNDED;
674 if (nocode_wanted)
675 return;
676 /* relocation offset of the bounding function call point */
677 vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
680 /* patch pointer addition in vtop so that pointer dereferencing is
681 also tested */
682 ST_FUNC void gen_bounded_ptr_deref(void)
684 addr_t func;
685 int size, align;
686 ElfW(Rela) *rel;
687 Sym *sym;
689 if (nocode_wanted)
690 return;
692 size = type_size(&vtop->type, &align);
693 switch(size) {
694 case 1: func = TOK___bound_ptr_indir1; break;
695 case 2: func = TOK___bound_ptr_indir2; break;
696 case 4: func = TOK___bound_ptr_indir4; break;
697 case 8: func = TOK___bound_ptr_indir8; break;
698 case 12: func = TOK___bound_ptr_indir12; break;
699 case 16: func = TOK___bound_ptr_indir16; break;
700 default:
701 /* may happen with struct member access */
702 return;
703 //tcc_error("unhandled size when dereferencing bounded pointer");
704 //func = 0;
705 //break;
707 sym = external_global_sym(func, &func_old_type);
708 if (!sym->c)
709 put_extern_sym(sym, NULL, 0, 0);
710 /* patch relocation */
711 /* XXX: find a better solution ? */
712 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i);
713 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
716 #ifdef TCC_TARGET_PE
717 # define TREG_FASTCALL_1 TREG_RCX
718 #else
719 # define TREG_FASTCALL_1 TREG_RDI
720 #endif
722 static void gen_bounds_prolog(void)
724 /* leave some room for bound checking code */
725 func_bound_offset = lbounds_section->data_offset;
726 func_bound_ind = ind;
727 func_bound_add_epilog = 0;
728 o(0xb848 + TREG_FASTCALL_1 * 0x100); /*lbound section pointer */
729 gen_le64 (0);
730 oad(0xb8, 0); /* call to function */
733 static void gen_bounds_epilog(void)
735 addr_t saved_ind;
736 addr_t *bounds_ptr;
737 Sym *sym_data;
738 int offset_modified = func_bound_offset != lbounds_section->data_offset;
740 if (!offset_modified && !func_bound_add_epilog)
741 return;
743 /* add end of table info */
744 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
745 *bounds_ptr = 0;
747 sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
748 func_bound_offset, lbounds_section->data_offset);
750 /* generate bound local allocation */
751 if (offset_modified) {
752 saved_ind = ind;
753 ind = func_bound_ind;
754 greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
755 ind = ind + 10;
756 gen_bounds_call(TOK___bound_local_new);
757 ind = saved_ind;
760 /* generate bound check local freeing */
761 o(0x5250); /* save returned value, if any */
762 greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
763 o(0xb848 + TREG_FASTCALL_1 * 0x100); /* mov xxx, %rcx/di */
764 gen_le64 (0);
765 gen_bounds_call(TOK___bound_local_delete);
766 o(0x585a); /* restore returned value, if any */
768 #endif
770 #ifdef TCC_TARGET_PE
772 #define REGN 4
773 static const uint8_t arg_regs[REGN] = {
774 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
777 /* Prepare arguments in R10 and R11 rather than RCX and RDX
778 because gv() will not ever use these */
779 static int arg_prepare_reg(int idx) {
780 if (idx == 0 || idx == 1)
781 /* idx=0: r10, idx=1: r11 */
782 return idx + 10;
783 else
784 return arg_regs[idx];
787 /* Generate function call. The function address is pushed first, then
788 all the parameters in call order. This functions pops all the
789 parameters and the function address. */
791 static void gen_offs_sp(int b, int r, int d)
793 orex(1,0,r & 0x100 ? 0 : r, b);
794 if (d == (char)d) {
795 o(0x2444 | (REG_VALUE(r) << 3));
796 g(d);
797 } else {
798 o(0x2484 | (REG_VALUE(r) << 3));
799 gen_le32(d);
803 static int using_regs(int size)
805 return !(size > 8 || (size & (size - 1)));
808 /* Return the number of registers needed to return the struct, or 0 if
809 returning via struct pointer. */
810 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
812 int size, align;
813 *ret_align = 1; // Never have to re-align return values for x86-64
814 *regsize = 8;
815 size = type_size(vt, &align);
816 if (!using_regs(size))
817 return 0;
818 if (size == 8)
819 ret->t = VT_LLONG;
820 else if (size == 4)
821 ret->t = VT_INT;
822 else if (size == 2)
823 ret->t = VT_SHORT;
824 else
825 ret->t = VT_BYTE;
826 ret->ref = NULL;
827 return 1;
830 static int is_sse_float(int t) {
831 int bt;
832 bt = t & VT_BTYPE;
833 return bt == VT_DOUBLE || bt == VT_FLOAT;
836 static int gfunc_arg_size(CType *type) {
837 int align;
838 if (type->t & (VT_ARRAY|VT_BITFIELD))
839 return 8;
840 return type_size(type, &align);
843 void gfunc_call(int nb_args)
845 int size, r, args_size, i, d, bt, struct_size;
846 int arg;
848 #ifdef CONFIG_TCC_BCHECK
849 if (tcc_state->do_bounds_check)
850 gbound_args(nb_args);
851 #endif
853 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
854 arg = nb_args;
856 /* for struct arguments, we need to call memcpy and the function
857 call breaks register passing arguments we are preparing.
858 So, we process arguments which will be passed by stack first. */
859 struct_size = args_size;
860 for(i = 0; i < nb_args; i++) {
861 SValue *sv;
863 --arg;
864 sv = &vtop[-i];
865 bt = (sv->type.t & VT_BTYPE);
866 size = gfunc_arg_size(&sv->type);
868 if (using_regs(size))
869 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
871 if (bt == VT_STRUCT) {
872 /* align to stack align size */
873 size = (size + 15) & ~15;
874 /* generate structure store */
875 r = get_reg(RC_INT);
876 gen_offs_sp(0x8d, r, struct_size);
877 struct_size += size;
879 /* generate memcpy call */
880 vset(&sv->type, r | VT_LVAL, 0);
881 vpushv(sv);
882 vstore();
883 --vtop;
884 } else if (bt == VT_LDOUBLE) {
885 gv(RC_ST0);
886 gen_offs_sp(0xdb, 0x107, struct_size);
887 struct_size += 16;
891 if (func_scratch < struct_size)
892 func_scratch = struct_size;
894 arg = nb_args;
895 struct_size = args_size;
897 for(i = 0; i < nb_args; i++) {
898 --arg;
899 bt = (vtop->type.t & VT_BTYPE);
901 size = gfunc_arg_size(&vtop->type);
902 if (!using_regs(size)) {
903 /* align to stack align size */
904 size = (size + 15) & ~15;
905 if (arg >= REGN) {
906 d = get_reg(RC_INT);
907 gen_offs_sp(0x8d, d, struct_size);
908 gen_offs_sp(0x89, d, arg*8);
909 } else {
910 d = arg_prepare_reg(arg);
911 gen_offs_sp(0x8d, d, struct_size);
913 struct_size += size;
914 } else {
915 if (is_sse_float(vtop->type.t)) {
916 if (tcc_state->nosse)
917 tcc_error("SSE disabled");
918 if (arg >= REGN) {
919 gv(RC_XMM0);
920 /* movq %xmm0, j*8(%rsp) */
921 gen_offs_sp(0xd60f66, 0x100, arg*8);
922 } else {
923 /* Load directly to xmmN register */
924 gv(RC_XMM0 << arg);
925 d = arg_prepare_reg(arg);
926 /* mov %xmmN, %rxx */
927 o(0x66);
928 orex(1,d,0, 0x7e0f);
929 o(0xc0 + arg*8 + REG_VALUE(d));
931 } else {
932 if (bt == VT_STRUCT) {
933 vtop->type.ref = NULL;
934 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
935 : size > 1 ? VT_SHORT : VT_BYTE;
938 r = gv(RC_INT);
939 if (arg >= REGN) {
940 gen_offs_sp(0x89, r, arg*8);
941 } else {
942 d = arg_prepare_reg(arg);
943 orex(1,d,r,0x89); /* mov */
944 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
948 vtop--;
950 save_regs(0);
951 /* Copy R10 and R11 into RCX and RDX, respectively */
952 if (nb_args > 0) {
953 o(0xd1894c); /* mov %r10, %rcx */
954 if (nb_args > 1) {
955 o(0xda894c); /* mov %r11, %rdx */
959 gcall_or_jmp(0);
961 if ((vtop->r & VT_SYM) && vtop->sym->v == TOK_alloca) {
962 /* need to add the "func_scratch" area after alloca */
963 o(0x48); func_alloca = oad(0x05, func_alloca); /* add $NN, %rax */
964 #ifdef CONFIG_TCC_BCHECK
965 if (tcc_state->do_bounds_check)
966 gen_bounds_call(TOK___bound_alloca_nr); /* new region */
967 #endif
969 vtop--;
973 #define FUNC_PROLOG_SIZE 11
975 /* generate function prolog of type 't' */
976 void gfunc_prolog(Sym *func_sym)
978 CType *func_type = &func_sym->type;
979 int addr, reg_param_index, bt, size;
980 Sym *sym;
981 CType *type;
983 func_ret_sub = 0;
984 func_scratch = 32;
985 func_alloca = 0;
986 loc = 0;
988 addr = PTR_SIZE * 2;
989 ind += FUNC_PROLOG_SIZE;
990 func_sub_sp_offset = ind;
991 reg_param_index = 0;
993 sym = func_type->ref;
995 /* if the function returns a structure, then add an
996 implicit pointer parameter */
997 size = gfunc_arg_size(&func_vt);
998 if (!using_regs(size)) {
999 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1000 func_vc = addr;
1001 reg_param_index++;
1002 addr += 8;
1005 /* define parameters */
1006 while ((sym = sym->next) != NULL) {
1007 type = &sym->type;
1008 bt = type->t & VT_BTYPE;
1009 size = gfunc_arg_size(type);
1010 if (!using_regs(size)) {
1011 if (reg_param_index < REGN) {
1012 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1014 sym_push(sym->v & ~SYM_FIELD, type,
1015 VT_LLOCAL | VT_LVAL, addr);
1016 } else {
1017 if (reg_param_index < REGN) {
1018 /* save arguments passed by register */
1019 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
1020 if (tcc_state->nosse)
1021 tcc_error("SSE disabled");
1022 o(0xd60f66); /* movq */
1023 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
1024 } else {
1025 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1028 sym_push(sym->v & ~SYM_FIELD, type,
1029 VT_LOCAL | VT_LVAL, addr);
1031 addr += 8;
1032 reg_param_index++;
1035 while (reg_param_index < REGN) {
1036 if (func_var) {
1037 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1038 addr += 8;
1040 reg_param_index++;
1042 #ifdef CONFIG_TCC_BCHECK
1043 if (tcc_state->do_bounds_check)
1044 gen_bounds_prolog();
1045 #endif
1048 /* generate function epilog */
1049 void gfunc_epilog(void)
1051 int v, saved_ind;
1053 /* align local size to word & save local variables */
1054 func_scratch = (func_scratch + 15) & -16;
1055 loc = (loc & -16) - func_scratch;
1057 #ifdef CONFIG_TCC_BCHECK
1058 if (tcc_state->do_bounds_check)
1059 gen_bounds_epilog();
1060 #endif
1062 o(0xc9); /* leave */
1063 if (func_ret_sub == 0) {
1064 o(0xc3); /* ret */
1065 } else {
1066 o(0xc2); /* ret n */
1067 g(func_ret_sub);
1068 g(func_ret_sub >> 8);
1071 saved_ind = ind;
1072 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1073 v = -loc;
1075 if (v >= 4096) {
1076 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type);
1077 oad(0xb8, v); /* mov stacksize, %eax */
1078 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1079 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
1080 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1081 } else {
1082 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1083 o(0xec8148); /* sub rsp, stacksize */
1084 gen_le32(v);
1087 /* add the "func_scratch" area after each alloca seen */
1088 gsym_addr(func_alloca, -func_scratch);
1090 cur_text_section->data_offset = saved_ind;
1091 pe_add_unwind_data(ind, saved_ind, v);
1092 ind = cur_text_section->data_offset;
1095 #else
1097 static void gadd_sp(int val)
1099 if (val == (char)val) {
1100 o(0xc48348);
1101 g(val);
1102 } else {
1103 oad(0xc48148, val); /* add $xxx, %rsp */
1107 typedef enum X86_64_Mode {
1108 x86_64_mode_none,
1109 x86_64_mode_memory,
1110 x86_64_mode_integer,
1111 x86_64_mode_sse,
1112 x86_64_mode_x87
1113 } X86_64_Mode;
1115 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1117 if (a == b)
1118 return a;
1119 else if (a == x86_64_mode_none)
1120 return b;
1121 else if (b == x86_64_mode_none)
1122 return a;
1123 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1124 return x86_64_mode_memory;
1125 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1126 return x86_64_mode_integer;
1127 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1128 return x86_64_mode_memory;
1129 else
1130 return x86_64_mode_sse;
1133 static X86_64_Mode classify_x86_64_inner(CType *ty)
1135 X86_64_Mode mode;
1136 Sym *f;
1138 switch (ty->t & VT_BTYPE) {
1139 case VT_VOID: return x86_64_mode_none;
1141 case VT_INT:
1142 case VT_BYTE:
1143 case VT_SHORT:
1144 case VT_LLONG:
1145 case VT_BOOL:
1146 case VT_PTR:
1147 case VT_FUNC:
1148 return x86_64_mode_integer;
1150 case VT_FLOAT:
1151 case VT_DOUBLE: return x86_64_mode_sse;
1153 case VT_LDOUBLE: return x86_64_mode_x87;
1155 case VT_STRUCT:
1156 f = ty->ref;
1158 mode = x86_64_mode_none;
1159 for (f = f->next; f; f = f->next)
1160 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
1162 return mode;
1164 assert(0);
1165 return 0;
1168 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
1170 X86_64_Mode mode;
1171 int size, align, ret_t = 0;
1173 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1174 *psize = 8;
1175 *palign = 8;
1176 *reg_count = 1;
1177 ret_t = ty->t;
1178 mode = x86_64_mode_integer;
1179 } else {
1180 size = type_size(ty, &align);
1181 *psize = (size + 7) & ~7;
1182 *palign = (align + 7) & ~7;
1184 if (size > 16) {
1185 mode = x86_64_mode_memory;
1186 } else {
1187 mode = classify_x86_64_inner(ty);
1188 switch (mode) {
1189 case x86_64_mode_integer:
1190 if (size > 8) {
1191 *reg_count = 2;
1192 ret_t = VT_QLONG;
1193 } else {
1194 *reg_count = 1;
1195 if (size > 4)
1196 ret_t = VT_LLONG;
1197 else if (size > 2)
1198 ret_t = VT_INT;
1199 else if (size > 1)
1200 ret_t = VT_SHORT;
1201 else
1202 ret_t = VT_BYTE;
1203 if ((ty->t & VT_BTYPE) == VT_STRUCT || (ty->t & VT_UNSIGNED))
1204 ret_t |= VT_UNSIGNED;
1206 break;
1208 case x86_64_mode_x87:
1209 *reg_count = 1;
1210 ret_t = VT_LDOUBLE;
1211 break;
1213 case x86_64_mode_sse:
1214 if (size > 8) {
1215 *reg_count = 2;
1216 ret_t = VT_QFLOAT;
1217 } else {
1218 *reg_count = 1;
1219 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1221 break;
1222 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1227 if (ret) {
1228 ret->ref = NULL;
1229 ret->t = ret_t;
1232 return mode;
1235 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1237 /* This definition must be synced with stdarg.h */
1238 enum __va_arg_type {
1239 __va_gen_reg, __va_float_reg, __va_stack
1241 int size, align, reg_count;
1242 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1243 switch (mode) {
1244 default: return __va_stack;
1245 case x86_64_mode_integer: return __va_gen_reg;
1246 case x86_64_mode_sse: return __va_float_reg;
1250 /* Return the number of registers needed to return the struct, or 0 if
1251 returning via struct pointer. */
1252 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
1254 int size, align, reg_count;
1255 *ret_align = 1; // Never have to re-align return values for x86-64
1256 *regsize = 8;
1257 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1260 #define REGN 6
1261 static const uint8_t arg_regs[REGN] = {
1262 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1265 static int arg_prepare_reg(int idx) {
1266 if (idx == 2 || idx == 3)
1267 /* idx=2: r10, idx=3: r11 */
1268 return idx + 8;
1269 else
1270 return arg_regs[idx];
1273 /* Generate function call. The function address is pushed first, then
1274 all the parameters in call order. This functions pops all the
1275 parameters and the function address. */
1276 void gfunc_call(int nb_args)
1278 X86_64_Mode mode;
1279 CType type;
1280 int size, align, r, args_size, stack_adjust, i, reg_count, k;
1281 int nb_reg_args = 0;
1282 int nb_sse_args = 0;
1283 int sse_reg, gen_reg;
1284 char *onstack = tcc_malloc((nb_args + 1) * sizeof (char));
1286 #ifdef CONFIG_TCC_BCHECK
1287 if (tcc_state->do_bounds_check)
1288 gbound_args(nb_args);
1289 #endif
1291 /* calculate the number of integer/float register arguments, remember
1292 arguments to be passed via stack (in onstack[]), and also remember
1293 if we have to align the stack pointer to 16 (onstack[i] == 2). Needs
1294 to be done in a left-to-right pass over arguments. */
1295 stack_adjust = 0;
1296 for(i = nb_args - 1; i >= 0; i--) {
1297 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1298 if (mode == x86_64_mode_sse && nb_sse_args + reg_count <= 8) {
1299 nb_sse_args += reg_count;
1300 onstack[i] = 0;
1301 } else if (mode == x86_64_mode_integer && nb_reg_args + reg_count <= REGN) {
1302 nb_reg_args += reg_count;
1303 onstack[i] = 0;
1304 } else if (mode == x86_64_mode_none) {
1305 onstack[i] = 0;
1306 } else {
1307 if (align == 16 && (stack_adjust &= 15)) {
1308 onstack[i] = 2;
1309 stack_adjust = 0;
1310 } else
1311 onstack[i] = 1;
1312 stack_adjust += size;
1316 if (nb_sse_args && tcc_state->nosse)
1317 tcc_error("SSE disabled but floating point arguments passed");
1319 /* fetch cpu flag before generating any code */
1320 if ((vtop->r & VT_VALMASK) == VT_CMP)
1321 gv(RC_INT);
1323 /* for struct arguments, we need to call memcpy and the function
1324 call breaks register passing arguments we are preparing.
1325 So, we process arguments which will be passed by stack first. */
1326 gen_reg = nb_reg_args;
1327 sse_reg = nb_sse_args;
1328 args_size = 0;
1329 stack_adjust &= 15;
1330 for (i = k = 0; i < nb_args;) {
1331 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1332 if (!onstack[i + k]) {
1333 ++i;
1334 continue;
1336 /* Possibly adjust stack to align SSE boundary. We're processing
1337 args from right to left while allocating happens left to right
1338 (stack grows down), so the adjustment needs to happen _after_
1339 an argument that requires it. */
1340 if (stack_adjust) {
1341 o(0x50); /* push %rax; aka sub $8,%rsp */
1342 args_size += 8;
1343 stack_adjust = 0;
1345 if (onstack[i + k] == 2)
1346 stack_adjust = 1;
1348 vrotb(i+1);
1350 switch (vtop->type.t & VT_BTYPE) {
1351 case VT_STRUCT:
1352 /* allocate the necessary size on stack */
1353 o(0x48);
1354 oad(0xec81, size); /* sub $xxx, %rsp */
1355 /* generate structure store */
1356 r = get_reg(RC_INT);
1357 orex(1, r, 0, 0x89); /* mov %rsp, r */
1358 o(0xe0 + REG_VALUE(r));
1359 vset(&vtop->type, r | VT_LVAL, 0);
1360 vswap();
1361 vstore();
1362 break;
1364 case VT_LDOUBLE:
1365 gv(RC_ST0);
1366 oad(0xec8148, size); /* sub $xxx, %rsp */
1367 o(0x7cdb); /* fstpt 0(%rsp) */
1368 g(0x24);
1369 g(0x00);
1370 break;
1372 case VT_FLOAT:
1373 case VT_DOUBLE:
1374 assert(mode == x86_64_mode_sse);
1375 r = gv(RC_FLOAT);
1376 o(0x50); /* push $rax */
1377 /* movq %xmmN, (%rsp) */
1378 o(0xd60f66);
1379 o(0x04 + REG_VALUE(r)*8);
1380 o(0x24);
1381 break;
1383 default:
1384 assert(mode == x86_64_mode_integer);
1385 /* simple type */
1386 /* XXX: implicit cast ? */
1387 r = gv(RC_INT);
1388 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1389 break;
1391 args_size += size;
1393 vpop();
1394 --nb_args;
1395 k++;
1398 tcc_free(onstack);
1400 /* XXX This should be superfluous. */
1401 save_regs(0); /* save used temporary registers */
1403 /* then, we prepare register passing arguments.
1404 Note that we cannot set RDX and RCX in this loop because gv()
1405 may break these temporary registers. Let's use R10 and R11
1406 instead of them */
1407 assert(gen_reg <= REGN);
1408 assert(sse_reg <= 8);
1409 for(i = 0; i < nb_args; i++) {
1410 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1411 /* Alter stack entry type so that gv() knows how to treat it */
1412 vtop->type = type;
1413 if (mode == x86_64_mode_sse) {
1414 if (reg_count == 2) {
1415 sse_reg -= 2;
1416 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1417 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1418 /* movaps %xmm1, %xmmN */
1419 o(0x280f);
1420 o(0xc1 + ((sse_reg+1) << 3));
1421 /* movaps %xmm0, %xmmN */
1422 o(0x280f);
1423 o(0xc0 + (sse_reg << 3));
1425 } else {
1426 assert(reg_count == 1);
1427 --sse_reg;
1428 /* Load directly to register */
1429 gv(RC_XMM0 << sse_reg);
1431 } else if (mode == x86_64_mode_integer) {
1432 /* simple type */
1433 /* XXX: implicit cast ? */
1434 int d;
1435 gen_reg -= reg_count;
1436 r = gv(RC_INT);
1437 d = arg_prepare_reg(gen_reg);
1438 orex(1,d,r,0x89); /* mov */
1439 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1440 if (reg_count == 2) {
1441 d = arg_prepare_reg(gen_reg+1);
1442 orex(1,d,vtop->r2,0x89); /* mov */
1443 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1446 vtop--;
1448 assert(gen_reg == 0);
1449 assert(sse_reg == 0);
1451 /* We shouldn't have many operands on the stack anymore, but the
1452 call address itself is still there, and it might be in %eax
1453 (or edx/ecx) currently, which the below writes would clobber.
1454 So evict all remaining operands here. */
1455 save_regs(0);
1457 /* Copy R10 and R11 into RDX and RCX, respectively */
1458 if (nb_reg_args > 2) {
1459 o(0xd2894c); /* mov %r10, %rdx */
1460 if (nb_reg_args > 3) {
1461 o(0xd9894c); /* mov %r11, %rcx */
1465 if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1466 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1467 gcall_or_jmp(0);
1468 if (args_size)
1469 gadd_sp(args_size);
1470 vtop--;
1473 #define FUNC_PROLOG_SIZE 11
1475 static void push_arg_reg(int i) {
1476 loc -= 8;
1477 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1480 /* generate function prolog of type 't' */
1481 void gfunc_prolog(Sym *func_sym)
1483 CType *func_type = &func_sym->type;
1484 X86_64_Mode mode;
1485 int i, addr, align, size, reg_count;
1486 int param_addr = 0, reg_param_index, sse_param_index;
1487 Sym *sym;
1488 CType *type;
1490 sym = func_type->ref;
1491 addr = PTR_SIZE * 2;
1492 loc = 0;
1493 ind += FUNC_PROLOG_SIZE;
1494 func_sub_sp_offset = ind;
1495 func_ret_sub = 0;
1497 if (func_var) {
1498 int seen_reg_num, seen_sse_num, seen_stack_size;
1499 seen_reg_num = seen_sse_num = 0;
1500 /* frame pointer and return address */
1501 seen_stack_size = PTR_SIZE * 2;
1502 /* count the number of seen parameters */
1503 sym = func_type->ref;
1504 while ((sym = sym->next) != NULL) {
1505 type = &sym->type;
1506 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1507 switch (mode) {
1508 default:
1509 stack_arg:
1510 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1511 break;
1513 case x86_64_mode_integer:
1514 if (seen_reg_num + reg_count > REGN)
1515 goto stack_arg;
1516 seen_reg_num += reg_count;
1517 break;
1519 case x86_64_mode_sse:
1520 if (seen_sse_num + reg_count > 8)
1521 goto stack_arg;
1522 seen_sse_num += reg_count;
1523 break;
1527 loc -= 24;
1528 /* movl $0x????????, -0x18(%rbp) */
1529 o(0xe845c7);
1530 gen_le32(seen_reg_num * 8);
1531 /* movl $0x????????, -0x14(%rbp) */
1532 o(0xec45c7);
1533 gen_le32(seen_sse_num * 16 + 48);
1534 /* leaq $0x????????, %r11 */
1535 o(0x9d8d4c);
1536 gen_le32(seen_stack_size);
1537 /* movq %r11, -0x10(%rbp) */
1538 o(0xf05d894c);
1539 /* leaq $-192(%rbp), %r11 */
1540 o(0x9d8d4c);
1541 gen_le32(-176 - 24);
1542 /* movq %r11, -0x8(%rbp) */
1543 o(0xf85d894c);
1545 /* save all register passing arguments */
1546 for (i = 0; i < 8; i++) {
1547 loc -= 16;
1548 if (!tcc_state->nosse) {
1549 o(0xd60f66); /* movq */
1550 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1552 /* movq $0, loc+8(%rbp) */
1553 o(0x85c748);
1554 gen_le32(loc + 8);
1555 gen_le32(0);
1557 for (i = 0; i < REGN; i++) {
1558 push_arg_reg(REGN-1-i);
1562 sym = func_type->ref;
1563 reg_param_index = 0;
1564 sse_param_index = 0;
1566 /* if the function returns a structure, then add an
1567 implicit pointer parameter */
1568 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1569 if (mode == x86_64_mode_memory) {
1570 push_arg_reg(reg_param_index);
1571 func_vc = loc;
1572 reg_param_index++;
1574 /* define parameters */
1575 while ((sym = sym->next) != NULL) {
1576 type = &sym->type;
1577 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1578 switch (mode) {
1579 case x86_64_mode_sse:
1580 if (tcc_state->nosse)
1581 tcc_error("SSE disabled but floating point arguments used");
1582 if (sse_param_index + reg_count <= 8) {
1583 /* save arguments passed by register */
1584 loc -= reg_count * 8;
1585 param_addr = loc;
1586 for (i = 0; i < reg_count; ++i) {
1587 o(0xd60f66); /* movq */
1588 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1589 ++sse_param_index;
1591 } else {
1592 addr = (addr + align - 1) & -align;
1593 param_addr = addr;
1594 addr += size;
1596 break;
1598 case x86_64_mode_memory:
1599 case x86_64_mode_x87:
1600 addr = (addr + align - 1) & -align;
1601 param_addr = addr;
1602 addr += size;
1603 break;
1605 case x86_64_mode_integer: {
1606 if (reg_param_index + reg_count <= REGN) {
1607 /* save arguments passed by register */
1608 loc -= reg_count * 8;
1609 param_addr = loc;
1610 for (i = 0; i < reg_count; ++i) {
1611 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1612 ++reg_param_index;
1614 } else {
1615 addr = (addr + align - 1) & -align;
1616 param_addr = addr;
1617 addr += size;
1619 break;
1621 default: break; /* nothing to be done for x86_64_mode_none */
1623 sym_push(sym->v & ~SYM_FIELD, type,
1624 VT_LOCAL | VT_LVAL, param_addr);
1627 #ifdef CONFIG_TCC_BCHECK
1628 if (tcc_state->do_bounds_check)
1629 gen_bounds_prolog();
1630 #endif
1633 /* generate function epilog */
1634 void gfunc_epilog(void)
1636 int v, saved_ind;
1638 #ifdef CONFIG_TCC_BCHECK
1639 if (tcc_state->do_bounds_check)
1640 gen_bounds_epilog();
1641 #endif
1642 o(0xc9); /* leave */
1643 if (func_ret_sub == 0) {
1644 o(0xc3); /* ret */
1645 } else {
1646 o(0xc2); /* ret n */
1647 g(func_ret_sub);
1648 g(func_ret_sub >> 8);
1650 /* align local size to word & save local variables */
1651 v = (-loc + 15) & -16;
1652 saved_ind = ind;
1653 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1654 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1655 o(0xec8148); /* sub rsp, stacksize */
1656 gen_le32(v);
1657 ind = saved_ind;
1660 #endif /* not PE */
1662 ST_FUNC void gen_fill_nops(int bytes)
1664 while (bytes--)
1665 g(0x90);
1668 /* generate a jump to a label */
1669 int gjmp(int t)
1671 return gjmp2(0xe9, t);
1674 /* generate a jump to a fixed address */
1675 void gjmp_addr(int a)
1677 int r;
1678 r = a - ind - 2;
1679 if (r == (char)r) {
1680 g(0xeb);
1681 g(r);
1682 } else {
1683 oad(0xe9, a - ind - 5);
1687 ST_FUNC int gjmp_append(int n, int t)
1689 void *p;
1690 /* insert vtop->c jump list in t */
1691 if (n) {
1692 uint32_t n1 = n, n2;
1693 while ((n2 = read32le(p = cur_text_section->data + n1)))
1694 n1 = n2;
1695 write32le(p, t);
1696 t = n;
1698 return t;
1701 ST_FUNC int gjmp_cond(int op, int t)
1703 if (op & 0x100)
1705 /* This was a float compare. If the parity flag is set
1706 the result was unordered. For anything except != this
1707 means false and we don't jump (anding both conditions).
1708 For != this means true (oring both).
1709 Take care about inverting the test. We need to jump
1710 to our target if the result was unordered and test wasn't NE,
1711 otherwise if unordered we don't want to jump. */
1712 int v = vtop->cmp_r;
1713 op &= ~0x100;
1714 if (op ^ v ^ (v != TOK_NE))
1715 o(0x067a); /* jp +6 */
1716 else
1718 g(0x0f);
1719 t = gjmp2(0x8a, t); /* jp t */
1722 g(0x0f);
1723 t = gjmp2(op - 16, t);
1724 return t;
1727 /* generate an integer binary operation */
1728 void gen_opi(int op)
1730 int r, fr, opc, c;
1731 int ll, uu, cc;
1733 ll = is64_type(vtop[-1].type.t);
1734 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1735 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1737 switch(op) {
1738 case '+':
1739 case TOK_ADDC1: /* add with carry generation */
1740 opc = 0;
1741 gen_op8:
1742 if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) {
1743 /* constant case */
1744 vswap();
1745 r = gv(RC_INT);
1746 vswap();
1747 c = vtop->c.i;
1748 if (c == (char)c) {
1749 /* XXX: generate inc and dec for smaller code ? */
1750 orex(ll, r, 0, 0x83);
1751 o(0xc0 | (opc << 3) | REG_VALUE(r));
1752 g(c);
1753 } else {
1754 orex(ll, r, 0, 0x81);
1755 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1757 } else {
1758 gv2(RC_INT, RC_INT);
1759 r = vtop[-1].r;
1760 fr = vtop[0].r;
1761 orex(ll, r, fr, (opc << 3) | 0x01);
1762 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1764 vtop--;
1765 if (op >= TOK_ULT && op <= TOK_GT)
1766 vset_VT_CMP(op);
1767 break;
1768 case '-':
1769 case TOK_SUBC1: /* sub with carry generation */
1770 opc = 5;
1771 goto gen_op8;
1772 case TOK_ADDC2: /* add with carry use */
1773 opc = 2;
1774 goto gen_op8;
1775 case TOK_SUBC2: /* sub with carry use */
1776 opc = 3;
1777 goto gen_op8;
1778 case '&':
1779 opc = 4;
1780 goto gen_op8;
1781 case '^':
1782 opc = 6;
1783 goto gen_op8;
1784 case '|':
1785 opc = 1;
1786 goto gen_op8;
1787 case '*':
1788 gv2(RC_INT, RC_INT);
1789 r = vtop[-1].r;
1790 fr = vtop[0].r;
1791 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1792 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1793 vtop--;
1794 break;
1795 case TOK_SHL:
1796 opc = 4;
1797 goto gen_shift;
1798 case TOK_SHR:
1799 opc = 5;
1800 goto gen_shift;
1801 case TOK_SAR:
1802 opc = 7;
1803 gen_shift:
1804 opc = 0xc0 | (opc << 3);
1805 if (cc) {
1806 /* constant case */
1807 vswap();
1808 r = gv(RC_INT);
1809 vswap();
1810 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1811 o(opc | REG_VALUE(r));
1812 g(vtop->c.i & (ll ? 63 : 31));
1813 } else {
1814 /* we generate the shift in ecx */
1815 gv2(RC_INT, RC_RCX);
1816 r = vtop[-1].r;
1817 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1818 o(opc | REG_VALUE(r));
1820 vtop--;
1821 break;
1822 case TOK_UDIV:
1823 case TOK_UMOD:
1824 uu = 1;
1825 goto divmod;
1826 case '/':
1827 case '%':
1828 case TOK_PDIV:
1829 uu = 0;
1830 divmod:
1831 /* first operand must be in eax */
1832 /* XXX: need better constraint for second operand */
1833 gv2(RC_RAX, RC_RCX);
1834 r = vtop[-1].r;
1835 fr = vtop[0].r;
1836 vtop--;
1837 save_reg(TREG_RDX);
1838 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1839 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1840 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1841 if (op == '%' || op == TOK_UMOD)
1842 r = TREG_RDX;
1843 else
1844 r = TREG_RAX;
1845 vtop->r = r;
1846 break;
1847 default:
1848 opc = 7;
1849 goto gen_op8;
1853 void gen_opl(int op)
1855 gen_opi(op);
1858 /* generate a floating point operation 'v = t1 op t2' instruction. The
1859 two operands are guaranteed to have the same floating point type */
1860 /* XXX: need to use ST1 too */
1861 void gen_opf(int op)
1863 int a, ft, fc, swapped, r;
1864 int float_type =
1865 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1867 /* convert constants to memory references */
1868 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1869 vswap();
1870 gv(float_type);
1871 vswap();
1873 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1874 gv(float_type);
1876 /* must put at least one value in the floating point register */
1877 if ((vtop[-1].r & VT_LVAL) &&
1878 (vtop[0].r & VT_LVAL)) {
1879 vswap();
1880 gv(float_type);
1881 vswap();
1883 swapped = 0;
1884 /* swap the stack if needed so that t1 is the register and t2 is
1885 the memory reference */
1886 if (vtop[-1].r & VT_LVAL) {
1887 vswap();
1888 swapped = 1;
1890 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1891 if (op >= TOK_ULT && op <= TOK_GT) {
1892 /* load on stack second operand */
1893 load(TREG_ST0, vtop);
1894 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1895 if (op == TOK_GE || op == TOK_GT)
1896 swapped = !swapped;
1897 else if (op == TOK_EQ || op == TOK_NE)
1898 swapped = 0;
1899 if (swapped)
1900 o(0xc9d9); /* fxch %st(1) */
1901 if (op == TOK_EQ || op == TOK_NE)
1902 o(0xe9da); /* fucompp */
1903 else
1904 o(0xd9de); /* fcompp */
1905 o(0xe0df); /* fnstsw %ax */
1906 if (op == TOK_EQ) {
1907 o(0x45e480); /* and $0x45, %ah */
1908 o(0x40fC80); /* cmp $0x40, %ah */
1909 } else if (op == TOK_NE) {
1910 o(0x45e480); /* and $0x45, %ah */
1911 o(0x40f480); /* xor $0x40, %ah */
1912 op = TOK_NE;
1913 } else if (op == TOK_GE || op == TOK_LE) {
1914 o(0x05c4f6); /* test $0x05, %ah */
1915 op = TOK_EQ;
1916 } else {
1917 o(0x45c4f6); /* test $0x45, %ah */
1918 op = TOK_EQ;
1920 vtop--;
1921 vset_VT_CMP(op);
1922 } else {
1923 /* no memory reference possible for long double operations */
1924 load(TREG_ST0, vtop);
1925 swapped = !swapped;
1927 switch(op) {
1928 default:
1929 case '+':
1930 a = 0;
1931 break;
1932 case '-':
1933 a = 4;
1934 if (swapped)
1935 a++;
1936 break;
1937 case '*':
1938 a = 1;
1939 break;
1940 case '/':
1941 a = 6;
1942 if (swapped)
1943 a++;
1944 break;
1946 ft = vtop->type.t;
1947 fc = vtop->c.i;
1948 o(0xde); /* fxxxp %st, %st(1) */
1949 o(0xc1 + (a << 3));
1950 vtop--;
1952 } else {
1953 if (op >= TOK_ULT && op <= TOK_GT) {
1954 /* if saved lvalue, then we must reload it */
1955 r = vtop->r;
1956 fc = vtop->c.i;
1957 if ((r & VT_VALMASK) == VT_LLOCAL) {
1958 SValue v1;
1959 r = get_reg(RC_INT);
1960 v1.type.t = VT_PTR;
1961 v1.r = VT_LOCAL | VT_LVAL;
1962 v1.c.i = fc;
1963 load(r, &v1);
1964 fc = 0;
1965 vtop->r = r = r | VT_LVAL;
1968 if (op == TOK_EQ || op == TOK_NE) {
1969 swapped = 0;
1970 } else {
1971 if (op == TOK_LE || op == TOK_LT)
1972 swapped = !swapped;
1973 if (op == TOK_LE || op == TOK_GE) {
1974 op = 0x93; /* setae */
1975 } else {
1976 op = 0x97; /* seta */
1980 if (swapped) {
1981 gv(RC_FLOAT);
1982 vswap();
1984 assert(!(vtop[-1].r & VT_LVAL));
1986 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1987 o(0x66);
1988 if (op == TOK_EQ || op == TOK_NE)
1989 o(0x2e0f); /* ucomisd */
1990 else
1991 o(0x2f0f); /* comisd */
1993 if (vtop->r & VT_LVAL) {
1994 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1995 } else {
1996 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1999 vtop--;
2000 vset_VT_CMP(op | 0x100);
2001 vtop->cmp_r = op;
2002 } else {
2003 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
2004 switch(op) {
2005 default:
2006 case '+':
2007 a = 0;
2008 break;
2009 case '-':
2010 a = 4;
2011 break;
2012 case '*':
2013 a = 1;
2014 break;
2015 case '/':
2016 a = 6;
2017 break;
2019 ft = vtop->type.t;
2020 fc = vtop->c.i;
2021 assert((ft & VT_BTYPE) != VT_LDOUBLE);
2023 r = vtop->r;
2024 /* if saved lvalue, then we must reload it */
2025 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2026 SValue v1;
2027 r = get_reg(RC_INT);
2028 v1.type.t = VT_PTR;
2029 v1.r = VT_LOCAL | VT_LVAL;
2030 v1.c.i = fc;
2031 load(r, &v1);
2032 fc = 0;
2033 vtop->r = r = r | VT_LVAL;
2036 assert(!(vtop[-1].r & VT_LVAL));
2037 if (swapped) {
2038 assert(vtop->r & VT_LVAL);
2039 gv(RC_FLOAT);
2040 vswap();
2043 if ((ft & VT_BTYPE) == VT_DOUBLE) {
2044 o(0xf2);
2045 } else {
2046 o(0xf3);
2048 o(0x0f);
2049 o(0x58 + a);
2051 if (vtop->r & VT_LVAL) {
2052 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2053 } else {
2054 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2057 vtop--;
2062 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2063 and 'long long' cases. */
2064 void gen_cvt_itof(int t)
2066 if ((t & VT_BTYPE) == VT_LDOUBLE) {
2067 save_reg(TREG_ST0);
2068 gv(RC_INT);
2069 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2070 /* signed long long to float/double/long double (unsigned case
2071 is handled generically) */
2072 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2073 o(0x242cdf); /* fildll (%rsp) */
2074 o(0x08c48348); /* add $8, %rsp */
2075 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2076 (VT_INT | VT_UNSIGNED)) {
2077 /* unsigned int to float/double/long double */
2078 o(0x6a); /* push $0 */
2079 g(0x00);
2080 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2081 o(0x242cdf); /* fildll (%rsp) */
2082 o(0x10c48348); /* add $16, %rsp */
2083 } else {
2084 /* int to float/double/long double */
2085 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2086 o(0x2404db); /* fildl (%rsp) */
2087 o(0x08c48348); /* add $8, %rsp */
2089 vtop->r = TREG_ST0;
2090 } else {
2091 int r = get_reg(RC_FLOAT);
2092 gv(RC_INT);
2093 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2094 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2095 (VT_INT | VT_UNSIGNED) ||
2096 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2097 o(0x48); /* REX */
2099 o(0x2a0f);
2100 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2101 vtop->r = r;
2105 /* convert from one floating point type to another */
2106 void gen_cvt_ftof(int t)
2108 int ft, bt, tbt;
2110 ft = vtop->type.t;
2111 bt = ft & VT_BTYPE;
2112 tbt = t & VT_BTYPE;
2114 if (bt == VT_FLOAT) {
2115 gv(RC_FLOAT);
2116 if (tbt == VT_DOUBLE) {
2117 o(0x140f); /* unpcklps */
2118 o(0xc0 + REG_VALUE(vtop->r)*9);
2119 o(0x5a0f); /* cvtps2pd */
2120 o(0xc0 + REG_VALUE(vtop->r)*9);
2121 } else if (tbt == VT_LDOUBLE) {
2122 save_reg(RC_ST0);
2123 /* movss %xmm0,-0x10(%rsp) */
2124 o(0x110ff3);
2125 o(0x44 + REG_VALUE(vtop->r)*8);
2126 o(0xf024);
2127 o(0xf02444d9); /* flds -0x10(%rsp) */
2128 vtop->r = TREG_ST0;
2130 } else if (bt == VT_DOUBLE) {
2131 gv(RC_FLOAT);
2132 if (tbt == VT_FLOAT) {
2133 o(0x140f66); /* unpcklpd */
2134 o(0xc0 + REG_VALUE(vtop->r)*9);
2135 o(0x5a0f66); /* cvtpd2ps */
2136 o(0xc0 + REG_VALUE(vtop->r)*9);
2137 } else if (tbt == VT_LDOUBLE) {
2138 save_reg(RC_ST0);
2139 /* movsd %xmm0,-0x10(%rsp) */
2140 o(0x110ff2);
2141 o(0x44 + REG_VALUE(vtop->r)*8);
2142 o(0xf024);
2143 o(0xf02444dd); /* fldl -0x10(%rsp) */
2144 vtop->r = TREG_ST0;
2146 } else {
2147 int r;
2148 gv(RC_ST0);
2149 r = get_reg(RC_FLOAT);
2150 if (tbt == VT_DOUBLE) {
2151 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2152 /* movsd -0x10(%rsp),%xmm0 */
2153 o(0x100ff2);
2154 o(0x44 + REG_VALUE(r)*8);
2155 o(0xf024);
2156 vtop->r = r;
2157 } else if (tbt == VT_FLOAT) {
2158 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2159 /* movss -0x10(%rsp),%xmm0 */
2160 o(0x100ff3);
2161 o(0x44 + REG_VALUE(r)*8);
2162 o(0xf024);
2163 vtop->r = r;
2168 /* convert fp to int 't' type */
2169 void gen_cvt_ftoi(int t)
2171 int ft, bt, size, r;
2172 ft = vtop->type.t;
2173 bt = ft & VT_BTYPE;
2174 if (bt == VT_LDOUBLE) {
2175 gen_cvt_ftof(VT_DOUBLE);
2176 bt = VT_DOUBLE;
2179 gv(RC_FLOAT);
2180 if (t != VT_INT)
2181 size = 8;
2182 else
2183 size = 4;
2185 r = get_reg(RC_INT);
2186 if (bt == VT_FLOAT) {
2187 o(0xf3);
2188 } else if (bt == VT_DOUBLE) {
2189 o(0xf2);
2190 } else {
2191 assert(0);
2193 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2194 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2195 vtop->r = r;
2198 // Generate sign extension from 32 to 64 bits:
2199 ST_FUNC void gen_cvt_sxtw(void)
2201 int r = gv(RC_INT);
2202 /* x86_64 specific: movslq */
2203 o(0x6348);
2204 o(0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r));
2207 /* char/short to int conversion */
2208 ST_FUNC void gen_cvt_csti(int t)
2210 int r, sz, xl, ll;
2211 r = gv(RC_INT);
2212 sz = !(t & VT_UNSIGNED);
2213 xl = (t & VT_BTYPE) == VT_SHORT;
2214 ll = (vtop->type.t & VT_BTYPE) == VT_LLONG;
2215 orex(ll, r, 0, 0xc0b60f /* mov[sz] %a[xl], %eax */
2216 | (sz << 3 | xl) << 8
2217 | (REG_VALUE(r) << 3 | REG_VALUE(r)) << 16
2221 /* computed goto support */
2222 void ggoto(void)
2224 gcall_or_jmp(1);
2225 vtop--;
2228 /* Save the stack pointer onto the stack and return the location of its address */
2229 ST_FUNC void gen_vla_sp_save(int addr) {
2230 /* mov %rsp,addr(%rbp)*/
2231 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2234 /* Restore the SP from a location on the stack */
2235 ST_FUNC void gen_vla_sp_restore(int addr) {
2236 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2239 #ifdef TCC_TARGET_PE
2240 /* Save result of gen_vla_alloc onto the stack */
2241 ST_FUNC void gen_vla_result(int addr) {
2242 /* mov %rax,addr(%rbp)*/
2243 gen_modrm64(0x89, TREG_RAX, VT_LOCAL, NULL, addr);
2245 #endif
2247 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2248 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2249 int use_call = 0;
2251 #if defined(CONFIG_TCC_BCHECK)
2252 use_call = tcc_state->do_bounds_check;
2253 #endif
2254 #ifdef TCC_TARGET_PE /* alloca does more than just adjust %rsp on Windows */
2255 use_call = 1;
2256 #endif
2257 if (use_call)
2259 vpush_global_sym(&func_old_type, TOK_alloca);
2260 vswap(); /* Move alloca ref past allocation size */
2261 gfunc_call(1);
2263 else {
2264 int r;
2265 r = gv(RC_INT); /* allocation size */
2266 /* sub r,%rsp */
2267 o(0x2b48);
2268 o(0xe0 | REG_VALUE(r));
2269 /* We align to 16 bytes rather than align */
2270 /* and ~15, %rsp */
2271 o(0xf0e48348);
2272 vpop();
2277 /* end of x86-64 code generator */
2278 /*************************************************************/
2279 #endif /* ! TARGET_DEFS_ONLY */
2280 /******************************************************/