macos: add __builtin_flt_rounds. Forced to 1 which means 'to nearest'
[tinycc.git] / x86_64-gen.c
blob36176a5c807fa050f06f133957233637f8087d3a
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 16
28 #define CONFIG_TCC_ASM
30 /* a register can belong to several classes. The classes must be
31 sorted from more general to more precise (see gv2() code which does
32 assumptions on it). */
33 #define RC_INT 0x0001 /* generic integer register */
34 #define RC_FLOAT 0x0002 /* generic float register */
35 #define RC_RAX 0x0004
36 #define RC_RCX 0x0008
37 #define RC_RDX 0x0010
38 #define RC_ST0 0x0080 /* only for long double */
39 #define RC_R8 0x0100
40 #define RC_R9 0x0200
41 #define RC_R10 0x0400
42 #define RC_R11 0x0800
43 #define RC_XMM0 0x1000
44 #define RC_XMM1 0x2000
45 #define RC_XMM2 0x4000
46 #define RC_XMM3 0x8000
47 #define RC_XMM4 0x10000
48 #define RC_XMM5 0x20000
49 #define RC_XMM6 0x40000
50 #define RC_XMM7 0x80000
51 #define RC_IRET RC_RAX /* function return: integer register */
52 #define RC_IRE2 RC_RDX /* function return: second integer register */
53 #define RC_FRET RC_XMM0 /* function return: float register */
54 #define RC_FRE2 RC_XMM1 /* function return: second float register */
56 /* pretty names for the registers */
57 enum {
58 TREG_RAX = 0,
59 TREG_RCX = 1,
60 TREG_RDX = 2,
61 TREG_RSP = 4,
62 TREG_RSI = 6,
63 TREG_RDI = 7,
65 TREG_R8 = 8,
66 TREG_R9 = 9,
67 TREG_R10 = 10,
68 TREG_R11 = 11,
70 TREG_XMM0 = 16,
71 TREG_XMM1 = 17,
72 TREG_XMM2 = 18,
73 TREG_XMM3 = 19,
74 TREG_XMM4 = 20,
75 TREG_XMM5 = 21,
76 TREG_XMM6 = 22,
77 TREG_XMM7 = 23,
79 TREG_ST0 = 24,
81 TREG_MEM = 0x20
84 #define REX_BASE(reg) (((reg) >> 3) & 1)
85 #define REG_VALUE(reg) ((reg) & 7)
87 /* return registers for function */
88 #define REG_IRET TREG_RAX /* single word int return register */
89 #define REG_IRE2 TREG_RDX /* second word return register (for long long) */
90 #define REG_FRET TREG_XMM0 /* float return register */
91 #define REG_FRE2 TREG_XMM1 /* second float return register */
93 /* defined if function parameters must be evaluated in reverse order */
94 #define INVERT_FUNC_PARAMS
96 /* pointer size, in bytes */
97 #define PTR_SIZE 8
99 /* long double size and alignment, in bytes */
100 #define LDOUBLE_SIZE 16
101 #define LDOUBLE_ALIGN 16
102 /* maximum alignment (for aligned attribute support) */
103 #define MAX_ALIGN 16
105 /* define if return values need to be extended explicitely
106 at caller side (for interfacing with non-TCC compilers) */
107 #define PROMOTE_RET
108 /******************************************************/
109 #else /* ! TARGET_DEFS_ONLY */
110 /******************************************************/
111 #define USING_GLOBALS
112 #include "tcc.h"
113 #include <assert.h>
115 ST_DATA const int reg_classes[NB_REGS] = {
116 /* eax */ RC_INT | RC_RAX,
117 /* ecx */ RC_INT | RC_RCX,
118 /* edx */ RC_INT | RC_RDX,
124 RC_R8,
125 RC_R9,
126 RC_R10,
127 RC_R11,
132 /* xmm0 */ RC_FLOAT | RC_XMM0,
133 /* xmm1 */ RC_FLOAT | RC_XMM1,
134 /* xmm2 */ RC_FLOAT | RC_XMM2,
135 /* xmm3 */ RC_FLOAT | RC_XMM3,
136 /* xmm4 */ RC_FLOAT | RC_XMM4,
137 /* xmm5 */ RC_FLOAT | RC_XMM5,
138 /* xmm6 an xmm7 are included so gv() can be used on them,
139 but they are not tagged with RC_FLOAT because they are
140 callee saved on Windows */
141 RC_XMM6,
142 RC_XMM7,
143 /* st0 */ RC_ST0
146 static unsigned long func_sub_sp_offset;
147 static int func_ret_sub;
149 #if defined(CONFIG_TCC_BCHECK)
150 static addr_t func_bound_offset;
151 static unsigned long func_bound_ind;
152 static int func_bound_add_epilog;
153 #endif
155 #ifdef TCC_TARGET_PE
156 static int func_scratch, func_alloca;
157 #endif
159 /* XXX: make it faster ? */
160 ST_FUNC void g(int c)
162 int ind1;
163 if (nocode_wanted)
164 return;
165 ind1 = ind + 1;
166 if (ind1 > cur_text_section->data_allocated)
167 section_realloc(cur_text_section, ind1);
168 cur_text_section->data[ind] = c;
169 ind = ind1;
172 ST_FUNC void o(unsigned int c)
174 while (c) {
175 g(c);
176 c = c >> 8;
180 ST_FUNC void gen_le16(int v)
182 g(v);
183 g(v >> 8);
186 ST_FUNC void gen_le32(int c)
188 g(c);
189 g(c >> 8);
190 g(c >> 16);
191 g(c >> 24);
194 ST_FUNC void gen_le64(int64_t c)
196 g(c);
197 g(c >> 8);
198 g(c >> 16);
199 g(c >> 24);
200 g(c >> 32);
201 g(c >> 40);
202 g(c >> 48);
203 g(c >> 56);
206 static void orex(int ll, int r, int r2, int b)
208 if ((r & VT_VALMASK) >= VT_CONST)
209 r = 0;
210 if ((r2 & VT_VALMASK) >= VT_CONST)
211 r2 = 0;
212 if (ll || REX_BASE(r) || REX_BASE(r2))
213 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
214 o(b);
217 /* output a symbol and patch all calls to it */
218 ST_FUNC void gsym_addr(int t, int a)
220 while (t) {
221 unsigned char *ptr = cur_text_section->data + t;
222 uint32_t n = read32le(ptr); /* next value */
223 write32le(ptr, a < 0 ? -a : a - t - 4);
224 t = n;
228 static int is64_type(int t)
230 return ((t & VT_BTYPE) == VT_PTR ||
231 (t & VT_BTYPE) == VT_FUNC ||
232 (t & VT_BTYPE) == VT_LLONG);
235 /* instruction + 4 bytes data. Return the address of the data */
236 static int oad(int c, int s)
238 int t;
239 if (nocode_wanted)
240 return s;
241 o(c);
242 t = ind;
243 gen_le32(s);
244 return t;
247 /* generate jmp to a label */
248 #define gjmp2(instr,lbl) oad(instr,lbl)
250 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
252 if (r & VT_SYM)
253 greloca(cur_text_section, sym, ind, R_X86_64_32S, c), c=0;
254 gen_le32(c);
257 /* output constant with relocation if 'r & VT_SYM' is true */
258 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
260 if (r & VT_SYM)
261 greloca(cur_text_section, sym, ind, R_X86_64_64, c), c=0;
262 gen_le64(c);
265 /* output constant with relocation if 'r & VT_SYM' is true */
266 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
268 if (r & VT_SYM)
269 greloca(cur_text_section, sym, ind, R_X86_64_PC32, c-4), c=4;
270 gen_le32(c-4);
273 /* output got address with relocation */
274 static void gen_gotpcrel(int r, Sym *sym, int c)
276 #ifdef TCC_TARGET_PE
277 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
278 get_tok_str(sym->v, NULL), c, r,
279 cur_text_section->data[ind-3],
280 cur_text_section->data[ind-2],
281 cur_text_section->data[ind-1]
283 #endif
284 greloca(cur_text_section, sym, ind, R_X86_64_GOTPCREL, -4);
285 gen_le32(0);
286 if (c) {
287 /* we use add c, %xxx for displacement */
288 orex(1, r, 0, 0x81);
289 o(0xc0 + REG_VALUE(r));
290 gen_le32(c);
294 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
296 op_reg = REG_VALUE(op_reg) << 3;
297 if ((r & VT_VALMASK) == VT_CONST) {
298 /* constant memory reference */
299 if (!(r & VT_SYM)) {
300 /* Absolute memory reference */
301 o(0x04 | op_reg); /* [sib] | destreg */
302 oad(0x25, c); /* disp32 */
303 } else {
304 o(0x05 | op_reg); /* (%rip)+disp32 | destreg */
305 if (is_got) {
306 gen_gotpcrel(r, sym, c);
307 } else {
308 gen_addrpc32(r, sym, c);
311 } else if ((r & VT_VALMASK) == VT_LOCAL) {
312 /* currently, we use only ebp as base */
313 if (c == (char)c) {
314 /* short reference */
315 o(0x45 | op_reg);
316 g(c);
317 } else {
318 oad(0x85 | op_reg, c);
320 } else if ((r & VT_VALMASK) >= TREG_MEM) {
321 if (c) {
322 g(0x80 | op_reg | REG_VALUE(r));
323 gen_le32(c);
324 } else {
325 g(0x00 | op_reg | REG_VALUE(r));
327 } else {
328 g(0x00 | op_reg | REG_VALUE(r));
332 /* generate a modrm reference. 'op_reg' contains the additional 3
333 opcode bits */
334 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
336 gen_modrm_impl(op_reg, r, sym, c, 0);
339 /* generate a modrm reference. 'op_reg' contains the additional 3
340 opcode bits */
341 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
343 int is_got;
344 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
345 orex(1, r, op_reg, opcode);
346 gen_modrm_impl(op_reg, r, sym, c, is_got);
350 /* load 'r' from value 'sv' */
351 void load(int r, SValue *sv)
353 int v, t, ft, fc, fr;
354 SValue v1;
356 #ifdef TCC_TARGET_PE
357 SValue v2;
358 sv = pe_getimport(sv, &v2);
359 #endif
361 fr = sv->r;
362 ft = sv->type.t & ~VT_DEFSIGN;
363 fc = sv->c.i;
364 if (fc != sv->c.i && (fr & VT_SYM))
365 tcc_error("64 bit addend in load");
367 ft &= ~(VT_VOLATILE | VT_CONSTANT);
369 #ifndef TCC_TARGET_PE
370 /* we use indirect access via got */
371 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
372 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
373 /* use the result register as a temporal register */
374 int tr = r | TREG_MEM;
375 if (is_float(ft)) {
376 /* we cannot use float registers as a temporal register */
377 tr = get_reg(RC_INT) | TREG_MEM;
379 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
381 /* load from the temporal register */
382 fr = tr | VT_LVAL;
384 #endif
386 v = fr & VT_VALMASK;
387 if (fr & VT_LVAL) {
388 int b, ll;
389 if (v == VT_LLOCAL) {
390 v1.type.t = VT_PTR;
391 v1.r = VT_LOCAL | VT_LVAL;
392 v1.c.i = fc;
393 fr = r;
394 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
395 fr = get_reg(RC_INT);
396 load(fr, &v1);
398 if (fc != sv->c.i) {
399 /* If the addends doesn't fit into a 32bit signed
400 we must use a 64bit move. We've checked above
401 that this doesn't have a sym associated. */
402 v1.type.t = VT_LLONG;
403 v1.r = VT_CONST;
404 v1.c.i = sv->c.i;
405 fr = r;
406 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
407 fr = get_reg(RC_INT);
408 load(fr, &v1);
409 fc = 0;
411 ll = 0;
412 /* Like GCC we can load from small enough properly sized
413 structs and unions as well.
414 XXX maybe move to generic operand handling, but should
415 occur only with asm, so tccasm.c might also be a better place */
416 if ((ft & VT_BTYPE) == VT_STRUCT) {
417 int align;
418 switch (type_size(&sv->type, &align)) {
419 case 1: ft = VT_BYTE; break;
420 case 2: ft = VT_SHORT; break;
421 case 4: ft = VT_INT; break;
422 case 8: ft = VT_LLONG; break;
423 default:
424 tcc_error("invalid aggregate type for register load");
425 break;
428 if ((ft & VT_BTYPE) == VT_FLOAT) {
429 b = 0x6e0f66;
430 r = REG_VALUE(r); /* movd */
431 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
432 b = 0x7e0ff3; /* movq */
433 r = REG_VALUE(r);
434 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
435 b = 0xdb, r = 5; /* fldt */
436 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
437 b = 0xbe0f; /* movsbl */
438 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
439 b = 0xb60f; /* movzbl */
440 } else if ((ft & VT_TYPE) == VT_SHORT) {
441 b = 0xbf0f; /* movswl */
442 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
443 b = 0xb70f; /* movzwl */
444 } else {
445 assert(((ft & VT_BTYPE) == VT_INT)
446 || ((ft & VT_BTYPE) == VT_LLONG)
447 || ((ft & VT_BTYPE) == VT_PTR)
448 || ((ft & VT_BTYPE) == VT_FUNC)
450 ll = is64_type(ft);
451 b = 0x8b;
453 if (ll) {
454 gen_modrm64(b, r, fr, sv->sym, fc);
455 } else {
456 orex(ll, fr, r, b);
457 gen_modrm(r, fr, sv->sym, fc);
459 } else {
460 if (v == VT_CONST) {
461 if (fr & VT_SYM) {
462 #ifdef TCC_TARGET_PE
463 orex(1,0,r,0x8d);
464 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
465 gen_addrpc32(fr, sv->sym, fc);
466 #else
467 if (sv->sym->type.t & VT_STATIC) {
468 orex(1,0,r,0x8d);
469 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
470 gen_addrpc32(fr, sv->sym, fc);
471 } else {
472 orex(1,0,r,0x8b);
473 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
474 gen_gotpcrel(r, sv->sym, fc);
476 #endif
477 } else if (is64_type(ft)) {
478 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
479 gen_le64(sv->c.i);
480 } else {
481 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
482 gen_le32(fc);
484 } else if (v == VT_LOCAL) {
485 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
486 gen_modrm(r, VT_LOCAL, sv->sym, fc);
487 } else if (v == VT_CMP) {
488 if (fc & 0x100)
490 v = vtop->cmp_r;
491 fc &= ~0x100;
492 /* This was a float compare. If the parity bit is
493 set the result was unordered, meaning false for everything
494 except TOK_NE, and true for TOK_NE. */
495 orex(0, r, 0, 0xb0 + REG_VALUE(r)); /* mov $0/1,%al */
496 g(v ^ fc ^ (v == TOK_NE));
497 o(0x037a + (REX_BASE(r) << 8));
499 orex(0,r,0, 0x0f); /* setxx %br */
500 o(fc);
501 o(0xc0 + REG_VALUE(r));
502 orex(0,r,0, 0x0f);
503 o(0xc0b6 + REG_VALUE(r) * 0x900); /* movzbl %al, %eax */
504 } else if (v == VT_JMP || v == VT_JMPI) {
505 t = v & 1;
506 orex(0,r,0,0);
507 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
508 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
509 gsym(fc);
510 orex(0,r,0,0);
511 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
512 } else if (v != r) {
513 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
514 if (v == TREG_ST0) {
515 /* gen_cvt_ftof(VT_DOUBLE); */
516 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
517 /* movsd -0x10(%rsp),%xmmN */
518 o(0x100ff2);
519 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
520 o(0xf024);
521 } else {
522 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
523 if ((ft & VT_BTYPE) == VT_FLOAT) {
524 o(0x100ff3);
525 } else {
526 assert((ft & VT_BTYPE) == VT_DOUBLE);
527 o(0x100ff2);
529 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
531 } else if (r == TREG_ST0) {
532 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
533 /* gen_cvt_ftof(VT_LDOUBLE); */
534 /* movsd %xmmN,-0x10(%rsp) */
535 o(0x110ff2);
536 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
537 o(0xf024);
538 o(0xf02444dd); /* fldl -0x10(%rsp) */
539 } else {
540 orex(is64_type(ft), r, v, 0x89);
541 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
547 /* store register 'r' in lvalue 'v' */
548 void store(int r, SValue *v)
550 int fr, bt, ft, fc;
551 int op64 = 0;
552 /* store the REX prefix in this variable when PIC is enabled */
553 int pic = 0;
555 #ifdef TCC_TARGET_PE
556 SValue v2;
557 v = pe_getimport(v, &v2);
558 #endif
560 fr = v->r & VT_VALMASK;
561 ft = v->type.t;
562 fc = v->c.i;
563 if (fc != v->c.i && (fr & VT_SYM))
564 tcc_error("64 bit addend in store");
565 ft &= ~(VT_VOLATILE | VT_CONSTANT);
566 bt = ft & VT_BTYPE;
568 #ifndef TCC_TARGET_PE
569 /* we need to access the variable via got */
570 if (fr == VT_CONST && (v->r & VT_SYM)) {
571 /* mov xx(%rip), %r11 */
572 o(0x1d8b4c);
573 gen_gotpcrel(TREG_R11, v->sym, v->c.i);
574 pic = is64_type(bt) ? 0x49 : 0x41;
576 #endif
578 /* XXX: incorrect if float reg to reg */
579 if (bt == VT_FLOAT) {
580 o(0x66);
581 o(pic);
582 o(0x7e0f); /* movd */
583 r = REG_VALUE(r);
584 } else if (bt == VT_DOUBLE) {
585 o(0x66);
586 o(pic);
587 o(0xd60f); /* movq */
588 r = REG_VALUE(r);
589 } else if (bt == VT_LDOUBLE) {
590 o(0xc0d9); /* fld %st(0) */
591 o(pic);
592 o(0xdb); /* fstpt */
593 r = 7;
594 } else {
595 if (bt == VT_SHORT)
596 o(0x66);
597 o(pic);
598 if (bt == VT_BYTE || bt == VT_BOOL)
599 orex(0, 0, r, 0x88);
600 else if (is64_type(bt))
601 op64 = 0x89;
602 else
603 orex(0, 0, r, 0x89);
605 if (pic) {
606 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
607 if (op64)
608 o(op64);
609 o(3 + (r << 3));
610 } else if (op64) {
611 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
612 gen_modrm64(op64, r, v->r, v->sym, fc);
613 } else if (fr != r) {
614 orex(1, fr, r, op64);
615 o(0xc0 + fr + r * 8); /* mov r, fr */
617 } else {
618 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
619 gen_modrm(r, v->r, v->sym, fc);
620 } else if (fr != r) {
621 o(0xc0 + fr + r * 8); /* mov r, fr */
626 /* 'is_jmp' is '1' if it is a jump */
627 static void gcall_or_jmp(int is_jmp)
629 int r;
630 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
631 ((vtop->r & VT_SYM) && (vtop->c.i-4) == (int)(vtop->c.i-4))) {
632 /* constant symbolic case -> simple relocation */
633 #ifdef TCC_TARGET_PE
634 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32, (int)(vtop->c.i-4));
635 #else
636 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32, (int)(vtop->c.i-4));
637 #endif
638 oad(0xe8 + is_jmp, 0); /* call/jmp im */
639 #ifdef CONFIG_TCC_BCHECK
640 if (tcc_state->do_bounds_check &&
641 (vtop->sym->v == TOK_alloca ||
642 vtop->sym->v == TOK_setjmp ||
643 vtop->sym->v == TOK__setjmp
644 #ifndef TCC_TARGET_PE
645 || vtop->sym->v == TOK_sigsetjmp
646 || vtop->sym->v == TOK___sigsetjmp
647 #endif
649 func_bound_add_epilog = 1;
650 #endif
651 } else {
652 /* otherwise, indirect call */
653 r = TREG_R11;
654 load(r, vtop);
655 o(0x41); /* REX */
656 o(0xff); /* call/jmp *r */
657 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
661 #if defined(CONFIG_TCC_BCHECK)
663 static void gen_bounds_call(int v)
665 Sym *sym = external_global_sym(v, &func_old_type);
666 oad(0xe8, 0);
667 #ifdef TCC_TARGET_PE
668 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
669 #else
670 greloca(cur_text_section, sym, ind-4, R_X86_64_PLT32, -4);
671 #endif
674 /* generate a bounded pointer addition */
675 ST_FUNC void gen_bounded_ptr_add(void)
677 vpush_global_sym(&func_old_type, TOK___bound_ptr_add);
678 vrott(3);
679 gfunc_call(2);
680 vpushi(0);
681 /* returned pointer is in rax */
682 vtop->r = TREG_RAX | VT_BOUNDED;
683 if (nocode_wanted)
684 return;
685 /* relocation offset of the bounding function call point */
686 vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
689 /* patch pointer addition in vtop so that pointer dereferencing is
690 also tested */
691 ST_FUNC void gen_bounded_ptr_deref(void)
693 addr_t func;
694 int size, align;
695 ElfW(Rela) *rel;
696 Sym *sym;
698 if (nocode_wanted)
699 return;
701 size = type_size(&vtop->type, &align);
702 switch(size) {
703 case 1: func = TOK___bound_ptr_indir1; break;
704 case 2: func = TOK___bound_ptr_indir2; break;
705 case 4: func = TOK___bound_ptr_indir4; break;
706 case 8: func = TOK___bound_ptr_indir8; break;
707 case 12: func = TOK___bound_ptr_indir12; break;
708 case 16: func = TOK___bound_ptr_indir16; break;
709 default:
710 /* may happen with struct member access */
711 return;
712 //tcc_error("unhandled size when dereferencing bounded pointer");
713 //func = 0;
714 //break;
716 sym = external_global_sym(func, &func_old_type);
717 if (!sym->c)
718 put_extern_sym(sym, NULL, 0, 0);
719 /* patch relocation */
720 /* XXX: find a better solution ? */
721 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i);
722 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
725 #ifdef TCC_TARGET_PE
726 # define TREG_FASTCALL_1 TREG_RCX
727 #else
728 # define TREG_FASTCALL_1 TREG_RDI
729 #endif
731 static void gen_bounds_prolog(void)
733 /* leave some room for bound checking code */
734 func_bound_offset = lbounds_section->data_offset;
735 func_bound_ind = ind;
736 func_bound_add_epilog = 0;
737 o(0xb848 + TREG_FASTCALL_1 * 0x100); /*lbound section pointer */
738 gen_le64 (0);
739 oad(0xb8, 0); /* call to function */
742 static void gen_bounds_epilog(void)
744 addr_t saved_ind;
745 addr_t *bounds_ptr;
746 Sym *sym_data;
747 int offset_modified = func_bound_offset != lbounds_section->data_offset;
749 if (!offset_modified && !func_bound_add_epilog)
750 return;
752 /* add end of table info */
753 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
754 *bounds_ptr = 0;
756 sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
757 func_bound_offset, lbounds_section->data_offset);
759 /* generate bound local allocation */
760 if (offset_modified) {
761 saved_ind = ind;
762 ind = func_bound_ind;
763 greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
764 ind = ind + 10;
765 gen_bounds_call(TOK___bound_local_new);
766 ind = saved_ind;
769 /* generate bound check local freeing */
770 o(0x5250); /* save returned value, if any */
771 greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
772 o(0xb848 + TREG_FASTCALL_1 * 0x100); /* mov xxx, %rcx/di */
773 gen_le64 (0);
774 gen_bounds_call(TOK___bound_local_delete);
775 o(0x585a); /* restore returned value, if any */
777 #endif
779 #ifdef TCC_TARGET_PE
781 #define REGN 4
782 static const uint8_t arg_regs[REGN] = {
783 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
786 /* Prepare arguments in R10 and R11 rather than RCX and RDX
787 because gv() will not ever use these */
788 static int arg_prepare_reg(int idx) {
789 if (idx == 0 || idx == 1)
790 /* idx=0: r10, idx=1: r11 */
791 return idx + 10;
792 else
793 return arg_regs[idx];
796 /* Generate function call. The function address is pushed first, then
797 all the parameters in call order. This functions pops all the
798 parameters and the function address. */
800 static void gen_offs_sp(int b, int r, int d)
802 orex(1,0,r & 0x100 ? 0 : r, b);
803 if (d == (char)d) {
804 o(0x2444 | (REG_VALUE(r) << 3));
805 g(d);
806 } else {
807 o(0x2484 | (REG_VALUE(r) << 3));
808 gen_le32(d);
812 static int using_regs(int size)
814 return !(size > 8 || (size & (size - 1)));
817 /* Return the number of registers needed to return the struct, or 0 if
818 returning via struct pointer. */
819 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
821 int size, align;
822 *ret_align = 1; // Never have to re-align return values for x86-64
823 *regsize = 8;
824 size = type_size(vt, &align);
825 if (!using_regs(size))
826 return 0;
827 if (size == 8)
828 ret->t = VT_LLONG;
829 else if (size == 4)
830 ret->t = VT_INT;
831 else if (size == 2)
832 ret->t = VT_SHORT;
833 else
834 ret->t = VT_BYTE;
835 ret->ref = NULL;
836 return 1;
839 static int is_sse_float(int t) {
840 int bt;
841 bt = t & VT_BTYPE;
842 return bt == VT_DOUBLE || bt == VT_FLOAT;
845 static int gfunc_arg_size(CType *type) {
846 int align;
847 if (type->t & (VT_ARRAY|VT_BITFIELD))
848 return 8;
849 return type_size(type, &align);
852 void gfunc_call(int nb_args)
854 int size, r, args_size, i, d, bt, struct_size;
855 int arg;
857 #ifdef CONFIG_TCC_BCHECK
858 if (tcc_state->do_bounds_check)
859 gbound_args(nb_args);
860 #endif
862 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
863 arg = nb_args;
865 /* for struct arguments, we need to call memcpy and the function
866 call breaks register passing arguments we are preparing.
867 So, we process arguments which will be passed by stack first. */
868 struct_size = args_size;
869 for(i = 0; i < nb_args; i++) {
870 SValue *sv;
872 --arg;
873 sv = &vtop[-i];
874 bt = (sv->type.t & VT_BTYPE);
875 size = gfunc_arg_size(&sv->type);
877 if (using_regs(size))
878 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
880 if (bt == VT_STRUCT) {
881 /* align to stack align size */
882 size = (size + 15) & ~15;
883 /* generate structure store */
884 r = get_reg(RC_INT);
885 gen_offs_sp(0x8d, r, struct_size);
886 struct_size += size;
888 /* generate memcpy call */
889 vset(&sv->type, r | VT_LVAL, 0);
890 vpushv(sv);
891 vstore();
892 --vtop;
893 } else if (bt == VT_LDOUBLE) {
894 gv(RC_ST0);
895 gen_offs_sp(0xdb, 0x107, struct_size);
896 struct_size += 16;
900 if (func_scratch < struct_size)
901 func_scratch = struct_size;
903 arg = nb_args;
904 struct_size = args_size;
906 for(i = 0; i < nb_args; i++) {
907 --arg;
908 bt = (vtop->type.t & VT_BTYPE);
910 size = gfunc_arg_size(&vtop->type);
911 if (!using_regs(size)) {
912 /* align to stack align size */
913 size = (size + 15) & ~15;
914 if (arg >= REGN) {
915 d = get_reg(RC_INT);
916 gen_offs_sp(0x8d, d, struct_size);
917 gen_offs_sp(0x89, d, arg*8);
918 } else {
919 d = arg_prepare_reg(arg);
920 gen_offs_sp(0x8d, d, struct_size);
922 struct_size += size;
923 } else {
924 if (is_sse_float(vtop->type.t)) {
925 if (tcc_state->nosse)
926 tcc_error("SSE disabled");
927 if (arg >= REGN) {
928 gv(RC_XMM0);
929 /* movq %xmm0, j*8(%rsp) */
930 gen_offs_sp(0xd60f66, 0x100, arg*8);
931 } else {
932 /* Load directly to xmmN register */
933 gv(RC_XMM0 << arg);
934 d = arg_prepare_reg(arg);
935 /* mov %xmmN, %rxx */
936 o(0x66);
937 orex(1,d,0, 0x7e0f);
938 o(0xc0 + arg*8 + REG_VALUE(d));
940 } else {
941 if (bt == VT_STRUCT) {
942 vtop->type.ref = NULL;
943 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
944 : size > 1 ? VT_SHORT : VT_BYTE;
947 r = gv(RC_INT);
948 if (arg >= REGN) {
949 gen_offs_sp(0x89, r, arg*8);
950 } else {
951 d = arg_prepare_reg(arg);
952 orex(1,d,r,0x89); /* mov */
953 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
957 vtop--;
959 save_regs(0);
960 /* Copy R10 and R11 into RCX and RDX, respectively */
961 if (nb_args > 0) {
962 o(0xd1894c); /* mov %r10, %rcx */
963 if (nb_args > 1) {
964 o(0xda894c); /* mov %r11, %rdx */
968 gcall_or_jmp(0);
970 if ((vtop->r & VT_SYM) && vtop->sym->v == TOK_alloca) {
971 /* need to add the "func_scratch" area after alloca */
972 o(0x48); func_alloca = oad(0x05, func_alloca); /* add $NN, %rax */
973 #ifdef CONFIG_TCC_BCHECK
974 if (tcc_state->do_bounds_check)
975 gen_bounds_call(TOK___bound_alloca_nr); /* new region */
976 #endif
978 vtop--;
982 #define FUNC_PROLOG_SIZE 11
984 /* generate function prolog of type 't' */
985 void gfunc_prolog(Sym *func_sym)
987 CType *func_type = &func_sym->type;
988 int addr, reg_param_index, bt, size;
989 Sym *sym;
990 CType *type;
992 func_ret_sub = 0;
993 func_scratch = 32;
994 func_alloca = 0;
995 loc = 0;
997 addr = PTR_SIZE * 2;
998 ind += FUNC_PROLOG_SIZE;
999 func_sub_sp_offset = ind;
1000 reg_param_index = 0;
1002 sym = func_type->ref;
1004 /* if the function returns a structure, then add an
1005 implicit pointer parameter */
1006 size = gfunc_arg_size(&func_vt);
1007 if (!using_regs(size)) {
1008 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1009 func_vc = addr;
1010 reg_param_index++;
1011 addr += 8;
1014 /* define parameters */
1015 while ((sym = sym->next) != NULL) {
1016 type = &sym->type;
1017 bt = type->t & VT_BTYPE;
1018 size = gfunc_arg_size(type);
1019 if (!using_regs(size)) {
1020 if (reg_param_index < REGN) {
1021 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1023 sym_push(sym->v & ~SYM_FIELD, type,
1024 VT_LLOCAL | VT_LVAL, addr);
1025 } else {
1026 if (reg_param_index < REGN) {
1027 /* save arguments passed by register */
1028 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
1029 if (tcc_state->nosse)
1030 tcc_error("SSE disabled");
1031 o(0xd60f66); /* movq */
1032 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
1033 } else {
1034 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1037 sym_push(sym->v & ~SYM_FIELD, type,
1038 VT_LOCAL | VT_LVAL, addr);
1040 addr += 8;
1041 reg_param_index++;
1044 while (reg_param_index < REGN) {
1045 if (func_var) {
1046 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1047 addr += 8;
1049 reg_param_index++;
1051 #ifdef CONFIG_TCC_BCHECK
1052 if (tcc_state->do_bounds_check)
1053 gen_bounds_prolog();
1054 #endif
1057 /* generate function epilog */
1058 void gfunc_epilog(void)
1060 int v, saved_ind;
1062 /* align local size to word & save local variables */
1063 func_scratch = (func_scratch + 15) & -16;
1064 loc = (loc & -16) - func_scratch;
1066 #ifdef CONFIG_TCC_BCHECK
1067 if (tcc_state->do_bounds_check)
1068 gen_bounds_epilog();
1069 #endif
1071 o(0xc9); /* leave */
1072 if (func_ret_sub == 0) {
1073 o(0xc3); /* ret */
1074 } else {
1075 o(0xc2); /* ret n */
1076 g(func_ret_sub);
1077 g(func_ret_sub >> 8);
1080 saved_ind = ind;
1081 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1082 v = -loc;
1084 if (v >= 4096) {
1085 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type);
1086 oad(0xb8, v); /* mov stacksize, %eax */
1087 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1088 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
1089 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1090 } else {
1091 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1092 o(0xec8148); /* sub rsp, stacksize */
1093 gen_le32(v);
1096 /* add the "func_scratch" area after each alloca seen */
1097 gsym_addr(func_alloca, -func_scratch);
1099 cur_text_section->data_offset = saved_ind;
1100 pe_add_unwind_data(ind, saved_ind, v);
1101 ind = cur_text_section->data_offset;
1104 #else
1106 static void gadd_sp(int val)
1108 if (val == (char)val) {
1109 o(0xc48348);
1110 g(val);
1111 } else {
1112 oad(0xc48148, val); /* add $xxx, %rsp */
1116 typedef enum X86_64_Mode {
1117 x86_64_mode_none,
1118 x86_64_mode_memory,
1119 x86_64_mode_integer,
1120 x86_64_mode_sse,
1121 x86_64_mode_x87
1122 } X86_64_Mode;
1124 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1126 if (a == b)
1127 return a;
1128 else if (a == x86_64_mode_none)
1129 return b;
1130 else if (b == x86_64_mode_none)
1131 return a;
1132 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1133 return x86_64_mode_memory;
1134 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1135 return x86_64_mode_integer;
1136 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1137 return x86_64_mode_memory;
1138 else
1139 return x86_64_mode_sse;
1142 static X86_64_Mode classify_x86_64_inner(CType *ty)
1144 X86_64_Mode mode;
1145 Sym *f;
1147 switch (ty->t & VT_BTYPE) {
1148 case VT_VOID: return x86_64_mode_none;
1150 case VT_INT:
1151 case VT_BYTE:
1152 case VT_SHORT:
1153 case VT_LLONG:
1154 case VT_BOOL:
1155 case VT_PTR:
1156 case VT_FUNC:
1157 return x86_64_mode_integer;
1159 case VT_FLOAT:
1160 case VT_DOUBLE: return x86_64_mode_sse;
1162 case VT_LDOUBLE: return x86_64_mode_x87;
1164 case VT_STRUCT:
1165 f = ty->ref;
1167 mode = x86_64_mode_none;
1168 for (f = f->next; f; f = f->next)
1169 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
1171 return mode;
1173 assert(0);
1174 return 0;
1177 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
1179 X86_64_Mode mode;
1180 int size, align, ret_t = 0;
1182 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1183 *psize = 8;
1184 *palign = 8;
1185 *reg_count = 1;
1186 ret_t = ty->t;
1187 mode = x86_64_mode_integer;
1188 } else {
1189 size = type_size(ty, &align);
1190 *psize = (size + 7) & ~7;
1191 *palign = (align + 7) & ~7;
1193 if (size > 16) {
1194 mode = x86_64_mode_memory;
1195 } else {
1196 mode = classify_x86_64_inner(ty);
1197 switch (mode) {
1198 case x86_64_mode_integer:
1199 if (size > 8) {
1200 *reg_count = 2;
1201 ret_t = VT_QLONG;
1202 } else {
1203 *reg_count = 1;
1204 if (size > 4)
1205 ret_t = VT_LLONG;
1206 else if (size > 2)
1207 ret_t = VT_INT;
1208 else if (size > 1)
1209 ret_t = VT_SHORT;
1210 else
1211 ret_t = VT_BYTE;
1212 if ((ty->t & VT_BTYPE) == VT_STRUCT || (ty->t & VT_UNSIGNED))
1213 ret_t |= VT_UNSIGNED;
1215 break;
1217 case x86_64_mode_x87:
1218 *reg_count = 1;
1219 ret_t = VT_LDOUBLE;
1220 break;
1222 case x86_64_mode_sse:
1223 if (size > 8) {
1224 *reg_count = 2;
1225 ret_t = VT_QFLOAT;
1226 } else {
1227 *reg_count = 1;
1228 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1230 break;
1231 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1236 if (ret) {
1237 ret->ref = NULL;
1238 ret->t = ret_t;
1241 return mode;
1244 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1246 /* This definition must be synced with stdarg.h */
1247 enum __va_arg_type {
1248 __va_gen_reg, __va_float_reg, __va_stack
1250 int size, align, reg_count;
1251 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1252 switch (mode) {
1253 default: return __va_stack;
1254 case x86_64_mode_integer: return __va_gen_reg;
1255 case x86_64_mode_sse: return __va_float_reg;
1259 /* Return the number of registers needed to return the struct, or 0 if
1260 returning via struct pointer. */
1261 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
1263 int size, align, reg_count;
1264 *ret_align = 1; // Never have to re-align return values for x86-64
1265 *regsize = 8;
1266 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1269 #define REGN 6
1270 static const uint8_t arg_regs[REGN] = {
1271 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1274 static int arg_prepare_reg(int idx) {
1275 if (idx == 2 || idx == 3)
1276 /* idx=2: r10, idx=3: r11 */
1277 return idx + 8;
1278 else
1279 return arg_regs[idx];
1282 /* Generate function call. The function address is pushed first, then
1283 all the parameters in call order. This functions pops all the
1284 parameters and the function address. */
1285 void gfunc_call(int nb_args)
1287 X86_64_Mode mode;
1288 CType type;
1289 int size, align, r, args_size, stack_adjust, i, reg_count;
1290 int nb_reg_args = 0;
1291 int nb_sse_args = 0;
1292 int sse_reg, gen_reg;
1293 char _onstack[nb_args ? nb_args : 1], *onstack = _onstack;
1295 #ifdef CONFIG_TCC_BCHECK
1296 if (tcc_state->do_bounds_check)
1297 gbound_args(nb_args);
1298 #endif
1300 /* calculate the number of integer/float register arguments, remember
1301 arguments to be passed via stack (in onstack[]), and also remember
1302 if we have to align the stack pointer to 16 (onstack[i] == 2). Needs
1303 to be done in a left-to-right pass over arguments. */
1304 stack_adjust = 0;
1305 for(i = nb_args - 1; i >= 0; i--) {
1306 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1307 if (mode == x86_64_mode_sse && nb_sse_args + reg_count <= 8) {
1308 nb_sse_args += reg_count;
1309 onstack[i] = 0;
1310 } else if (mode == x86_64_mode_integer && nb_reg_args + reg_count <= REGN) {
1311 nb_reg_args += reg_count;
1312 onstack[i] = 0;
1313 } else if (mode == x86_64_mode_none) {
1314 onstack[i] = 0;
1315 } else {
1316 if (align == 16 && (stack_adjust &= 15)) {
1317 onstack[i] = 2;
1318 stack_adjust = 0;
1319 } else
1320 onstack[i] = 1;
1321 stack_adjust += size;
1325 if (nb_sse_args && tcc_state->nosse)
1326 tcc_error("SSE disabled but floating point arguments passed");
1328 /* fetch cpu flag before generating any code */
1329 if ((vtop->r & VT_VALMASK) == VT_CMP)
1330 gv(RC_INT);
1332 /* for struct arguments, we need to call memcpy and the function
1333 call breaks register passing arguments we are preparing.
1334 So, we process arguments which will be passed by stack first. */
1335 gen_reg = nb_reg_args;
1336 sse_reg = nb_sse_args;
1337 args_size = 0;
1338 stack_adjust &= 15;
1339 for (i = 0; i < nb_args;) {
1340 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1341 if (!onstack[i]) {
1342 ++i;
1343 continue;
1345 /* Possibly adjust stack to align SSE boundary. We're processing
1346 args from right to left while allocating happens left to right
1347 (stack grows down), so the adjustment needs to happen _after_
1348 an argument that requires it. */
1349 if (stack_adjust) {
1350 o(0x50); /* push %rax; aka sub $8,%rsp */
1351 args_size += 8;
1352 stack_adjust = 0;
1354 if (onstack[i] == 2)
1355 stack_adjust = 1;
1357 vrotb(i+1);
1359 switch (vtop->type.t & VT_BTYPE) {
1360 case VT_STRUCT:
1361 /* allocate the necessary size on stack */
1362 o(0x48);
1363 oad(0xec81, size); /* sub $xxx, %rsp */
1364 /* generate structure store */
1365 r = get_reg(RC_INT);
1366 orex(1, r, 0, 0x89); /* mov %rsp, r */
1367 o(0xe0 + REG_VALUE(r));
1368 vset(&vtop->type, r | VT_LVAL, 0);
1369 vswap();
1370 vstore();
1371 break;
1373 case VT_LDOUBLE:
1374 gv(RC_ST0);
1375 oad(0xec8148, size); /* sub $xxx, %rsp */
1376 o(0x7cdb); /* fstpt 0(%rsp) */
1377 g(0x24);
1378 g(0x00);
1379 break;
1381 case VT_FLOAT:
1382 case VT_DOUBLE:
1383 assert(mode == x86_64_mode_sse);
1384 r = gv(RC_FLOAT);
1385 o(0x50); /* push $rax */
1386 /* movq %xmmN, (%rsp) */
1387 o(0xd60f66);
1388 o(0x04 + REG_VALUE(r)*8);
1389 o(0x24);
1390 break;
1392 default:
1393 assert(mode == x86_64_mode_integer);
1394 /* simple type */
1395 /* XXX: implicit cast ? */
1396 r = gv(RC_INT);
1397 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1398 break;
1400 args_size += size;
1402 vpop();
1403 --nb_args;
1404 onstack++;
1407 /* XXX This should be superfluous. */
1408 save_regs(0); /* save used temporary registers */
1410 /* then, we prepare register passing arguments.
1411 Note that we cannot set RDX and RCX in this loop because gv()
1412 may break these temporary registers. Let's use R10 and R11
1413 instead of them */
1414 assert(gen_reg <= REGN);
1415 assert(sse_reg <= 8);
1416 for(i = 0; i < nb_args; i++) {
1417 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1418 /* Alter stack entry type so that gv() knows how to treat it */
1419 vtop->type = type;
1420 if (mode == x86_64_mode_sse) {
1421 if (reg_count == 2) {
1422 sse_reg -= 2;
1423 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1424 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1425 /* movaps %xmm1, %xmmN */
1426 o(0x280f);
1427 o(0xc1 + ((sse_reg+1) << 3));
1428 /* movaps %xmm0, %xmmN */
1429 o(0x280f);
1430 o(0xc0 + (sse_reg << 3));
1432 } else {
1433 assert(reg_count == 1);
1434 --sse_reg;
1435 /* Load directly to register */
1436 gv(RC_XMM0 << sse_reg);
1438 } else if (mode == x86_64_mode_integer) {
1439 /* simple type */
1440 /* XXX: implicit cast ? */
1441 int d;
1442 gen_reg -= reg_count;
1443 r = gv(RC_INT);
1444 d = arg_prepare_reg(gen_reg);
1445 orex(1,d,r,0x89); /* mov */
1446 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1447 if (reg_count == 2) {
1448 d = arg_prepare_reg(gen_reg+1);
1449 orex(1,d,vtop->r2,0x89); /* mov */
1450 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1453 vtop--;
1455 assert(gen_reg == 0);
1456 assert(sse_reg == 0);
1458 /* We shouldn't have many operands on the stack anymore, but the
1459 call address itself is still there, and it might be in %eax
1460 (or edx/ecx) currently, which the below writes would clobber.
1461 So evict all remaining operands here. */
1462 save_regs(0);
1464 /* Copy R10 and R11 into RDX and RCX, respectively */
1465 if (nb_reg_args > 2) {
1466 o(0xd2894c); /* mov %r10, %rdx */
1467 if (nb_reg_args > 3) {
1468 o(0xd9894c); /* mov %r11, %rcx */
1472 if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1473 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1474 gcall_or_jmp(0);
1475 if (args_size)
1476 gadd_sp(args_size);
1477 vtop--;
1480 #define FUNC_PROLOG_SIZE 11
1482 static void push_arg_reg(int i) {
1483 loc -= 8;
1484 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1487 /* generate function prolog of type 't' */
1488 void gfunc_prolog(Sym *func_sym)
1490 CType *func_type = &func_sym->type;
1491 X86_64_Mode mode;
1492 int i, addr, align, size, reg_count;
1493 int param_addr = 0, reg_param_index, sse_param_index;
1494 Sym *sym;
1495 CType *type;
1497 sym = func_type->ref;
1498 addr = PTR_SIZE * 2;
1499 loc = 0;
1500 ind += FUNC_PROLOG_SIZE;
1501 func_sub_sp_offset = ind;
1502 func_ret_sub = 0;
1504 if (func_var) {
1505 int seen_reg_num, seen_sse_num, seen_stack_size;
1506 seen_reg_num = seen_sse_num = 0;
1507 /* frame pointer and return address */
1508 seen_stack_size = PTR_SIZE * 2;
1509 /* count the number of seen parameters */
1510 sym = func_type->ref;
1511 while ((sym = sym->next) != NULL) {
1512 type = &sym->type;
1513 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1514 switch (mode) {
1515 default:
1516 stack_arg:
1517 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1518 break;
1520 case x86_64_mode_integer:
1521 if (seen_reg_num + reg_count > REGN)
1522 goto stack_arg;
1523 seen_reg_num += reg_count;
1524 break;
1526 case x86_64_mode_sse:
1527 if (seen_sse_num + reg_count > 8)
1528 goto stack_arg;
1529 seen_sse_num += reg_count;
1530 break;
1534 loc -= 24;
1535 /* movl $0x????????, -0x18(%rbp) */
1536 o(0xe845c7);
1537 gen_le32(seen_reg_num * 8);
1538 /* movl $0x????????, -0x14(%rbp) */
1539 o(0xec45c7);
1540 gen_le32(seen_sse_num * 16 + 48);
1541 /* leaq $0x????????, %r11 */
1542 o(0x9d8d4c);
1543 gen_le32(seen_stack_size);
1544 /* movq %r11, -0x10(%rbp) */
1545 o(0xf05d894c);
1546 /* leaq $-192(%rbp), %r11 */
1547 o(0x9d8d4c);
1548 gen_le32(-176 - 24);
1549 /* movq %r11, -0x8(%rbp) */
1550 o(0xf85d894c);
1552 /* save all register passing arguments */
1553 for (i = 0; i < 8; i++) {
1554 loc -= 16;
1555 if (!tcc_state->nosse) {
1556 o(0xd60f66); /* movq */
1557 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1559 /* movq $0, loc+8(%rbp) */
1560 o(0x85c748);
1561 gen_le32(loc + 8);
1562 gen_le32(0);
1564 for (i = 0; i < REGN; i++) {
1565 push_arg_reg(REGN-1-i);
1569 sym = func_type->ref;
1570 reg_param_index = 0;
1571 sse_param_index = 0;
1573 /* if the function returns a structure, then add an
1574 implicit pointer parameter */
1575 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1576 if (mode == x86_64_mode_memory) {
1577 push_arg_reg(reg_param_index);
1578 func_vc = loc;
1579 reg_param_index++;
1581 /* define parameters */
1582 while ((sym = sym->next) != NULL) {
1583 type = &sym->type;
1584 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1585 switch (mode) {
1586 case x86_64_mode_sse:
1587 if (tcc_state->nosse)
1588 tcc_error("SSE disabled but floating point arguments used");
1589 if (sse_param_index + reg_count <= 8) {
1590 /* save arguments passed by register */
1591 loc -= reg_count * 8;
1592 param_addr = loc;
1593 for (i = 0; i < reg_count; ++i) {
1594 o(0xd60f66); /* movq */
1595 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1596 ++sse_param_index;
1598 } else {
1599 addr = (addr + align - 1) & -align;
1600 param_addr = addr;
1601 addr += size;
1603 break;
1605 case x86_64_mode_memory:
1606 case x86_64_mode_x87:
1607 addr = (addr + align - 1) & -align;
1608 param_addr = addr;
1609 addr += size;
1610 break;
1612 case x86_64_mode_integer: {
1613 if (reg_param_index + reg_count <= REGN) {
1614 /* save arguments passed by register */
1615 loc -= reg_count * 8;
1616 param_addr = loc;
1617 for (i = 0; i < reg_count; ++i) {
1618 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1619 ++reg_param_index;
1621 } else {
1622 addr = (addr + align - 1) & -align;
1623 param_addr = addr;
1624 addr += size;
1626 break;
1628 default: break; /* nothing to be done for x86_64_mode_none */
1630 sym_push(sym->v & ~SYM_FIELD, type,
1631 VT_LOCAL | VT_LVAL, param_addr);
1634 #ifdef CONFIG_TCC_BCHECK
1635 if (tcc_state->do_bounds_check)
1636 gen_bounds_prolog();
1637 #endif
1640 /* generate function epilog */
1641 void gfunc_epilog(void)
1643 int v, saved_ind;
1645 #ifdef CONFIG_TCC_BCHECK
1646 if (tcc_state->do_bounds_check)
1647 gen_bounds_epilog();
1648 #endif
1649 o(0xc9); /* leave */
1650 if (func_ret_sub == 0) {
1651 o(0xc3); /* ret */
1652 } else {
1653 o(0xc2); /* ret n */
1654 g(func_ret_sub);
1655 g(func_ret_sub >> 8);
1657 /* align local size to word & save local variables */
1658 v = (-loc + 15) & -16;
1659 saved_ind = ind;
1660 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1661 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1662 o(0xec8148); /* sub rsp, stacksize */
1663 gen_le32(v);
1664 ind = saved_ind;
1667 #endif /* not PE */
1669 ST_FUNC void gen_fill_nops(int bytes)
1671 while (bytes--)
1672 g(0x90);
1675 /* generate a jump to a label */
1676 int gjmp(int t)
1678 return gjmp2(0xe9, t);
1681 /* generate a jump to a fixed address */
1682 void gjmp_addr(int a)
1684 int r;
1685 r = a - ind - 2;
1686 if (r == (char)r) {
1687 g(0xeb);
1688 g(r);
1689 } else {
1690 oad(0xe9, a - ind - 5);
1694 ST_FUNC int gjmp_append(int n, int t)
1696 void *p;
1697 /* insert vtop->c jump list in t */
1698 if (n) {
1699 uint32_t n1 = n, n2;
1700 while ((n2 = read32le(p = cur_text_section->data + n1)))
1701 n1 = n2;
1702 write32le(p, t);
1703 t = n;
1705 return t;
1708 ST_FUNC int gjmp_cond(int op, int t)
1710 if (op & 0x100)
1712 /* This was a float compare. If the parity flag is set
1713 the result was unordered. For anything except != this
1714 means false and we don't jump (anding both conditions).
1715 For != this means true (oring both).
1716 Take care about inverting the test. We need to jump
1717 to our target if the result was unordered and test wasn't NE,
1718 otherwise if unordered we don't want to jump. */
1719 int v = vtop->cmp_r;
1720 op &= ~0x100;
1721 if (op ^ v ^ (v != TOK_NE))
1722 o(0x067a); /* jp +6 */
1723 else
1725 g(0x0f);
1726 t = gjmp2(0x8a, t); /* jp t */
1729 g(0x0f);
1730 t = gjmp2(op - 16, t);
1731 return t;
1734 /* generate an integer binary operation */
1735 void gen_opi(int op)
1737 int r, fr, opc, c;
1738 int ll, uu, cc;
1740 ll = is64_type(vtop[-1].type.t);
1741 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1742 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1744 switch(op) {
1745 case '+':
1746 case TOK_ADDC1: /* add with carry generation */
1747 opc = 0;
1748 gen_op8:
1749 if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) {
1750 /* constant case */
1751 vswap();
1752 r = gv(RC_INT);
1753 vswap();
1754 c = vtop->c.i;
1755 if (c == (char)c) {
1756 /* XXX: generate inc and dec for smaller code ? */
1757 orex(ll, r, 0, 0x83);
1758 o(0xc0 | (opc << 3) | REG_VALUE(r));
1759 g(c);
1760 } else {
1761 orex(ll, r, 0, 0x81);
1762 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1764 } else {
1765 gv2(RC_INT, RC_INT);
1766 r = vtop[-1].r;
1767 fr = vtop[0].r;
1768 orex(ll, r, fr, (opc << 3) | 0x01);
1769 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1771 vtop--;
1772 if (op >= TOK_ULT && op <= TOK_GT)
1773 vset_VT_CMP(op);
1774 break;
1775 case '-':
1776 case TOK_SUBC1: /* sub with carry generation */
1777 opc = 5;
1778 goto gen_op8;
1779 case TOK_ADDC2: /* add with carry use */
1780 opc = 2;
1781 goto gen_op8;
1782 case TOK_SUBC2: /* sub with carry use */
1783 opc = 3;
1784 goto gen_op8;
1785 case '&':
1786 opc = 4;
1787 goto gen_op8;
1788 case '^':
1789 opc = 6;
1790 goto gen_op8;
1791 case '|':
1792 opc = 1;
1793 goto gen_op8;
1794 case '*':
1795 gv2(RC_INT, RC_INT);
1796 r = vtop[-1].r;
1797 fr = vtop[0].r;
1798 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1799 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1800 vtop--;
1801 break;
1802 case TOK_SHL:
1803 opc = 4;
1804 goto gen_shift;
1805 case TOK_SHR:
1806 opc = 5;
1807 goto gen_shift;
1808 case TOK_SAR:
1809 opc = 7;
1810 gen_shift:
1811 opc = 0xc0 | (opc << 3);
1812 if (cc) {
1813 /* constant case */
1814 vswap();
1815 r = gv(RC_INT);
1816 vswap();
1817 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1818 o(opc | REG_VALUE(r));
1819 g(vtop->c.i & (ll ? 63 : 31));
1820 } else {
1821 /* we generate the shift in ecx */
1822 gv2(RC_INT, RC_RCX);
1823 r = vtop[-1].r;
1824 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1825 o(opc | REG_VALUE(r));
1827 vtop--;
1828 break;
1829 case TOK_UDIV:
1830 case TOK_UMOD:
1831 uu = 1;
1832 goto divmod;
1833 case '/':
1834 case '%':
1835 case TOK_PDIV:
1836 uu = 0;
1837 divmod:
1838 /* first operand must be in eax */
1839 /* XXX: need better constraint for second operand */
1840 gv2(RC_RAX, RC_RCX);
1841 r = vtop[-1].r;
1842 fr = vtop[0].r;
1843 vtop--;
1844 save_reg(TREG_RDX);
1845 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1846 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1847 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1848 if (op == '%' || op == TOK_UMOD)
1849 r = TREG_RDX;
1850 else
1851 r = TREG_RAX;
1852 vtop->r = r;
1853 break;
1854 default:
1855 opc = 7;
1856 goto gen_op8;
1860 void gen_opl(int op)
1862 gen_opi(op);
1865 /* generate a floating point operation 'v = t1 op t2' instruction. The
1866 two operands are guaranteed to have the same floating point type */
1867 /* XXX: need to use ST1 too */
1868 void gen_opf(int op)
1870 int a, ft, fc, swapped, r;
1871 int float_type =
1872 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1874 /* convert constants to memory references */
1875 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1876 vswap();
1877 gv(float_type);
1878 vswap();
1880 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1881 gv(float_type);
1883 /* must put at least one value in the floating point register */
1884 if ((vtop[-1].r & VT_LVAL) &&
1885 (vtop[0].r & VT_LVAL)) {
1886 vswap();
1887 gv(float_type);
1888 vswap();
1890 swapped = 0;
1891 /* swap the stack if needed so that t1 is the register and t2 is
1892 the memory reference */
1893 if (vtop[-1].r & VT_LVAL) {
1894 vswap();
1895 swapped = 1;
1897 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1898 if (op >= TOK_ULT && op <= TOK_GT) {
1899 /* load on stack second operand */
1900 load(TREG_ST0, vtop);
1901 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1902 if (op == TOK_GE || op == TOK_GT)
1903 swapped = !swapped;
1904 else if (op == TOK_EQ || op == TOK_NE)
1905 swapped = 0;
1906 if (swapped)
1907 o(0xc9d9); /* fxch %st(1) */
1908 if (op == TOK_EQ || op == TOK_NE)
1909 o(0xe9da); /* fucompp */
1910 else
1911 o(0xd9de); /* fcompp */
1912 o(0xe0df); /* fnstsw %ax */
1913 if (op == TOK_EQ) {
1914 o(0x45e480); /* and $0x45, %ah */
1915 o(0x40fC80); /* cmp $0x40, %ah */
1916 } else if (op == TOK_NE) {
1917 o(0x45e480); /* and $0x45, %ah */
1918 o(0x40f480); /* xor $0x40, %ah */
1919 op = TOK_NE;
1920 } else if (op == TOK_GE || op == TOK_LE) {
1921 o(0x05c4f6); /* test $0x05, %ah */
1922 op = TOK_EQ;
1923 } else {
1924 o(0x45c4f6); /* test $0x45, %ah */
1925 op = TOK_EQ;
1927 vtop--;
1928 vset_VT_CMP(op);
1929 } else {
1930 /* no memory reference possible for long double operations */
1931 load(TREG_ST0, vtop);
1932 swapped = !swapped;
1934 switch(op) {
1935 default:
1936 case '+':
1937 a = 0;
1938 break;
1939 case '-':
1940 a = 4;
1941 if (swapped)
1942 a++;
1943 break;
1944 case '*':
1945 a = 1;
1946 break;
1947 case '/':
1948 a = 6;
1949 if (swapped)
1950 a++;
1951 break;
1953 ft = vtop->type.t;
1954 fc = vtop->c.i;
1955 o(0xde); /* fxxxp %st, %st(1) */
1956 o(0xc1 + (a << 3));
1957 vtop--;
1959 } else {
1960 if (op >= TOK_ULT && op <= TOK_GT) {
1961 /* if saved lvalue, then we must reload it */
1962 r = vtop->r;
1963 fc = vtop->c.i;
1964 if ((r & VT_VALMASK) == VT_LLOCAL) {
1965 SValue v1;
1966 r = get_reg(RC_INT);
1967 v1.type.t = VT_PTR;
1968 v1.r = VT_LOCAL | VT_LVAL;
1969 v1.c.i = fc;
1970 load(r, &v1);
1971 fc = 0;
1972 vtop->r = r = r | VT_LVAL;
1975 if (op == TOK_EQ || op == TOK_NE) {
1976 swapped = 0;
1977 } else {
1978 if (op == TOK_LE || op == TOK_LT)
1979 swapped = !swapped;
1980 if (op == TOK_LE || op == TOK_GE) {
1981 op = 0x93; /* setae */
1982 } else {
1983 op = 0x97; /* seta */
1987 if (swapped) {
1988 gv(RC_FLOAT);
1989 vswap();
1991 assert(!(vtop[-1].r & VT_LVAL));
1993 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1994 o(0x66);
1995 if (op == TOK_EQ || op == TOK_NE)
1996 o(0x2e0f); /* ucomisd */
1997 else
1998 o(0x2f0f); /* comisd */
2000 if (vtop->r & VT_LVAL) {
2001 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2002 } else {
2003 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2006 vtop--;
2007 vset_VT_CMP(op | 0x100);
2008 vtop->cmp_r = op;
2009 } else {
2010 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
2011 switch(op) {
2012 default:
2013 case '+':
2014 a = 0;
2015 break;
2016 case '-':
2017 a = 4;
2018 break;
2019 case '*':
2020 a = 1;
2021 break;
2022 case '/':
2023 a = 6;
2024 break;
2026 ft = vtop->type.t;
2027 fc = vtop->c.i;
2028 assert((ft & VT_BTYPE) != VT_LDOUBLE);
2030 r = vtop->r;
2031 /* if saved lvalue, then we must reload it */
2032 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2033 SValue v1;
2034 r = get_reg(RC_INT);
2035 v1.type.t = VT_PTR;
2036 v1.r = VT_LOCAL | VT_LVAL;
2037 v1.c.i = fc;
2038 load(r, &v1);
2039 fc = 0;
2040 vtop->r = r = r | VT_LVAL;
2043 assert(!(vtop[-1].r & VT_LVAL));
2044 if (swapped) {
2045 assert(vtop->r & VT_LVAL);
2046 gv(RC_FLOAT);
2047 vswap();
2050 if ((ft & VT_BTYPE) == VT_DOUBLE) {
2051 o(0xf2);
2052 } else {
2053 o(0xf3);
2055 o(0x0f);
2056 o(0x58 + a);
2058 if (vtop->r & VT_LVAL) {
2059 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2060 } else {
2061 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2064 vtop--;
2069 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2070 and 'long long' cases. */
2071 void gen_cvt_itof(int t)
2073 if ((t & VT_BTYPE) == VT_LDOUBLE) {
2074 save_reg(TREG_ST0);
2075 gv(RC_INT);
2076 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2077 /* signed long long to float/double/long double (unsigned case
2078 is handled generically) */
2079 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2080 o(0x242cdf); /* fildll (%rsp) */
2081 o(0x08c48348); /* add $8, %rsp */
2082 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2083 (VT_INT | VT_UNSIGNED)) {
2084 /* unsigned int to float/double/long double */
2085 o(0x6a); /* push $0 */
2086 g(0x00);
2087 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2088 o(0x242cdf); /* fildll (%rsp) */
2089 o(0x10c48348); /* add $16, %rsp */
2090 } else {
2091 /* int to float/double/long double */
2092 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2093 o(0x2404db); /* fildl (%rsp) */
2094 o(0x08c48348); /* add $8, %rsp */
2096 vtop->r = TREG_ST0;
2097 } else {
2098 int r = get_reg(RC_FLOAT);
2099 gv(RC_INT);
2100 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2101 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2102 (VT_INT | VT_UNSIGNED) ||
2103 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2104 o(0x48); /* REX */
2106 o(0x2a0f);
2107 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2108 vtop->r = r;
2112 /* convert from one floating point type to another */
2113 void gen_cvt_ftof(int t)
2115 int ft, bt, tbt;
2117 ft = vtop->type.t;
2118 bt = ft & VT_BTYPE;
2119 tbt = t & VT_BTYPE;
2121 if (bt == VT_FLOAT) {
2122 gv(RC_FLOAT);
2123 if (tbt == VT_DOUBLE) {
2124 o(0x140f); /* unpcklps */
2125 o(0xc0 + REG_VALUE(vtop->r)*9);
2126 o(0x5a0f); /* cvtps2pd */
2127 o(0xc0 + REG_VALUE(vtop->r)*9);
2128 } else if (tbt == VT_LDOUBLE) {
2129 save_reg(RC_ST0);
2130 /* movss %xmm0,-0x10(%rsp) */
2131 o(0x110ff3);
2132 o(0x44 + REG_VALUE(vtop->r)*8);
2133 o(0xf024);
2134 o(0xf02444d9); /* flds -0x10(%rsp) */
2135 vtop->r = TREG_ST0;
2137 } else if (bt == VT_DOUBLE) {
2138 gv(RC_FLOAT);
2139 if (tbt == VT_FLOAT) {
2140 o(0x140f66); /* unpcklpd */
2141 o(0xc0 + REG_VALUE(vtop->r)*9);
2142 o(0x5a0f66); /* cvtpd2ps */
2143 o(0xc0 + REG_VALUE(vtop->r)*9);
2144 } else if (tbt == VT_LDOUBLE) {
2145 save_reg(RC_ST0);
2146 /* movsd %xmm0,-0x10(%rsp) */
2147 o(0x110ff2);
2148 o(0x44 + REG_VALUE(vtop->r)*8);
2149 o(0xf024);
2150 o(0xf02444dd); /* fldl -0x10(%rsp) */
2151 vtop->r = TREG_ST0;
2153 } else {
2154 int r;
2155 gv(RC_ST0);
2156 r = get_reg(RC_FLOAT);
2157 if (tbt == VT_DOUBLE) {
2158 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2159 /* movsd -0x10(%rsp),%xmm0 */
2160 o(0x100ff2);
2161 o(0x44 + REG_VALUE(r)*8);
2162 o(0xf024);
2163 vtop->r = r;
2164 } else if (tbt == VT_FLOAT) {
2165 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2166 /* movss -0x10(%rsp),%xmm0 */
2167 o(0x100ff3);
2168 o(0x44 + REG_VALUE(r)*8);
2169 o(0xf024);
2170 vtop->r = r;
2175 /* convert fp to int 't' type */
2176 void gen_cvt_ftoi(int t)
2178 int ft, bt, size, r;
2179 ft = vtop->type.t;
2180 bt = ft & VT_BTYPE;
2181 if (bt == VT_LDOUBLE) {
2182 gen_cvt_ftof(VT_DOUBLE);
2183 bt = VT_DOUBLE;
2186 gv(RC_FLOAT);
2187 if (t != VT_INT)
2188 size = 8;
2189 else
2190 size = 4;
2192 r = get_reg(RC_INT);
2193 if (bt == VT_FLOAT) {
2194 o(0xf3);
2195 } else if (bt == VT_DOUBLE) {
2196 o(0xf2);
2197 } else {
2198 assert(0);
2200 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2201 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2202 vtop->r = r;
2205 // Generate sign extension from 32 to 64 bits:
2206 ST_FUNC void gen_cvt_sxtw(void)
2208 int r = gv(RC_INT);
2209 /* x86_64 specific: movslq */
2210 o(0x6348);
2211 o(0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r));
2214 /* char/short to int conversion */
2215 ST_FUNC void gen_cvt_csti(int t)
2217 int r, sz, xl, ll;
2218 r = gv(RC_INT);
2219 sz = !(t & VT_UNSIGNED);
2220 xl = (t & VT_BTYPE) == VT_SHORT;
2221 ll = (vtop->type.t & VT_BTYPE) == VT_LLONG;
2222 orex(ll, r, 0, 0xc0b60f /* mov[sz] %a[xl], %eax */
2223 | (sz << 3 | xl) << 8
2224 | (REG_VALUE(r) << 3 | REG_VALUE(r)) << 16
2228 /* computed goto support */
2229 void ggoto(void)
2231 gcall_or_jmp(1);
2232 vtop--;
2235 /* Save the stack pointer onto the stack and return the location of its address */
2236 ST_FUNC void gen_vla_sp_save(int addr) {
2237 /* mov %rsp,addr(%rbp)*/
2238 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2241 /* Restore the SP from a location on the stack */
2242 ST_FUNC void gen_vla_sp_restore(int addr) {
2243 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2246 #ifdef TCC_TARGET_PE
2247 /* Save result of gen_vla_alloc onto the stack */
2248 ST_FUNC void gen_vla_result(int addr) {
2249 /* mov %rax,addr(%rbp)*/
2250 gen_modrm64(0x89, TREG_RAX, VT_LOCAL, NULL, addr);
2252 #endif
2254 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2255 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2256 int use_call = 0;
2258 #if defined(CONFIG_TCC_BCHECK)
2259 use_call = tcc_state->do_bounds_check;
2260 #endif
2261 #ifdef TCC_TARGET_PE /* alloca does more than just adjust %rsp on Windows */
2262 use_call = 1;
2263 #endif
2264 if (use_call)
2266 vpush_global_sym(&func_old_type, TOK_alloca);
2267 vswap(); /* Move alloca ref past allocation size */
2268 gfunc_call(1);
2270 else {
2271 int r;
2272 r = gv(RC_INT); /* allocation size */
2273 /* sub r,%rsp */
2274 o(0x2b48);
2275 o(0xe0 | REG_VALUE(r));
2276 /* We align to 16 bytes rather than align */
2277 /* and ~15, %rsp */
2278 o(0xf0e48348);
2279 vpop();
2284 /* end of x86-64 code generator */
2285 /*************************************************************/
2286 #endif /* ! TARGET_DEFS_ONLY */
2287 /******************************************************/