bitfields: Implement MS compatible layout
[tinycc.git] / x86_64-gen.c
blobb76580687f78d8471f8e0faebd0d1c96ac5eb72d
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 16
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSP = 4,
61 TREG_RSI = 6,
62 TREG_RDI = 7,
64 TREG_R8 = 8,
65 TREG_R9 = 9,
66 TREG_R10 = 10,
67 TREG_R11 = 11,
69 TREG_XMM0 = 16,
70 TREG_XMM1 = 17,
71 TREG_XMM2 = 18,
72 TREG_XMM3 = 19,
73 TREG_XMM4 = 20,
74 TREG_XMM5 = 21,
75 TREG_XMM6 = 22,
76 TREG_XMM7 = 23,
78 TREG_ST0 = 24,
80 TREG_MEM = 0x20
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
96 #define PTR_SIZE 8
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
102 #define MAX_ALIGN 16
104 /******************************************************/
105 #else /* ! TARGET_DEFS_ONLY */
106 /******************************************************/
107 #include "tcc.h"
108 #include <assert.h>
110 ST_DATA const int reg_classes[NB_REGS] = {
111 /* eax */ RC_INT | RC_RAX,
112 /* ecx */ RC_INT | RC_RCX,
113 /* edx */ RC_INT | RC_RDX,
119 RC_R8,
120 RC_R9,
121 RC_R10,
122 RC_R11,
127 /* xmm0 */ RC_FLOAT | RC_XMM0,
128 /* xmm1 */ RC_FLOAT | RC_XMM1,
129 /* xmm2 */ RC_FLOAT | RC_XMM2,
130 /* xmm3 */ RC_FLOAT | RC_XMM3,
131 /* xmm4 */ RC_FLOAT | RC_XMM4,
132 /* xmm5 */ RC_FLOAT | RC_XMM5,
133 /* xmm6 an xmm7 are included so gv() can be used on them,
134 but they are not tagged with RC_FLOAT because they are
135 callee saved on Windows */
136 RC_XMM6,
137 RC_XMM7,
138 /* st0 */ RC_ST0
141 static unsigned long func_sub_sp_offset;
142 static int func_ret_sub;
144 /* XXX: make it faster ? */
145 ST_FUNC void g(int c)
147 int ind1;
148 ind1 = ind + 1;
149 if (ind1 > cur_text_section->data_allocated)
150 section_realloc(cur_text_section, ind1);
151 cur_text_section->data[ind] = c;
152 ind = ind1;
155 ST_FUNC void o(unsigned int c)
157 while (c) {
158 g(c);
159 c = c >> 8;
163 ST_FUNC void gen_le16(int v)
165 g(v);
166 g(v >> 8);
169 ST_FUNC void gen_le32(int c)
171 g(c);
172 g(c >> 8);
173 g(c >> 16);
174 g(c >> 24);
177 ST_FUNC void gen_le64(int64_t c)
179 g(c);
180 g(c >> 8);
181 g(c >> 16);
182 g(c >> 24);
183 g(c >> 32);
184 g(c >> 40);
185 g(c >> 48);
186 g(c >> 56);
189 static void orex(int ll, int r, int r2, int b)
191 if ((r & VT_VALMASK) >= VT_CONST)
192 r = 0;
193 if ((r2 & VT_VALMASK) >= VT_CONST)
194 r2 = 0;
195 if (ll || REX_BASE(r) || REX_BASE(r2))
196 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
197 o(b);
200 /* output a symbol and patch all calls to it */
201 ST_FUNC void gsym_addr(int t, int a)
203 while (t) {
204 unsigned char *ptr = cur_text_section->data + t;
205 uint32_t n = read32le(ptr); /* next value */
206 write32le(ptr, a - t - 4);
207 t = n;
211 void gsym(int t)
213 gsym_addr(t, ind);
216 /* psym is used to put an instruction with a data field which is a
217 reference to a symbol. It is in fact the same as oad ! */
218 #define psym oad
220 static int is64_type(int t)
222 return ((t & VT_BTYPE) == VT_PTR ||
223 (t & VT_BTYPE) == VT_FUNC ||
224 (t & VT_BTYPE) == VT_LLONG);
227 /* instruction + 4 bytes data. Return the address of the data */
228 ST_FUNC int oad(int c, int s)
230 int ind1;
232 o(c);
233 ind1 = ind + 4;
234 if (ind1 > cur_text_section->data_allocated)
235 section_realloc(cur_text_section, ind1);
236 write32le(cur_text_section->data + ind, s);
237 s = ind;
238 ind = ind1;
239 return s;
242 ST_FUNC void gen_addr32(int r, Sym *sym, long c)
244 if (r & VT_SYM)
245 greloca(cur_text_section, sym, ind, R_X86_64_32S, c), c=0;
246 gen_le32(c);
249 /* output constant with relocation if 'r & VT_SYM' is true */
250 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
252 if (r & VT_SYM)
253 greloca(cur_text_section, sym, ind, R_X86_64_64, c), c=0;
254 gen_le64(c);
257 /* output constant with relocation if 'r & VT_SYM' is true */
258 ST_FUNC void gen_addrpc32(int r, Sym *sym, long c)
260 if (r & VT_SYM)
261 greloca(cur_text_section, sym, ind, R_X86_64_PC32, c-4), c=4;
262 gen_le32(c-4);
265 /* output got address with relocation */
266 static void gen_gotpcrel(int r, Sym *sym, int c)
268 #ifndef TCC_TARGET_PE
269 greloca(cur_text_section, sym, ind, R_X86_64_GOTPCREL, -4);
270 #else
271 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
272 get_tok_str(sym->v, NULL), c, r,
273 cur_text_section->data[ind-3],
274 cur_text_section->data[ind-2],
275 cur_text_section->data[ind-1]
277 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
278 #endif
279 gen_le32(0);
280 if (c) {
281 /* we use add c, %xxx for displacement */
282 orex(1, r, 0, 0x81);
283 o(0xc0 + REG_VALUE(r));
284 gen_le32(c);
288 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
290 op_reg = REG_VALUE(op_reg) << 3;
291 if ((r & VT_VALMASK) == VT_CONST) {
292 /* constant memory reference */
293 o(0x05 | op_reg);
294 if (is_got) {
295 gen_gotpcrel(r, sym, c);
296 } else {
297 gen_addrpc32(r, sym, c);
299 } else if ((r & VT_VALMASK) == VT_LOCAL) {
300 /* currently, we use only ebp as base */
301 if (c == (char)c) {
302 /* short reference */
303 o(0x45 | op_reg);
304 g(c);
305 } else {
306 oad(0x85 | op_reg, c);
308 } else if ((r & VT_VALMASK) >= TREG_MEM) {
309 if (c) {
310 g(0x80 | op_reg | REG_VALUE(r));
311 gen_le32(c);
312 } else {
313 g(0x00 | op_reg | REG_VALUE(r));
315 } else {
316 g(0x00 | op_reg | REG_VALUE(r));
320 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
321 opcode bits */
322 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
324 gen_modrm_impl(op_reg, r, sym, c, 0);
327 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
328 opcode bits */
329 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
331 int is_got;
332 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
333 orex(1, r, op_reg, opcode);
334 gen_modrm_impl(op_reg, r, sym, c, is_got);
338 /* load 'r' from value 'sv' */
339 void load(int r, SValue *sv)
341 int v, t, ft, fc, fr;
342 SValue v1;
344 #ifdef TCC_TARGET_PE
345 SValue v2;
346 sv = pe_getimport(sv, &v2);
347 #endif
349 fr = sv->r;
350 ft = sv->type.t & ~VT_DEFSIGN;
351 fc = sv->c.i;
352 if (fc != sv->c.i && (fr & VT_SYM))
353 tcc_error("64 bit addend in load");
355 ft &= ~(VT_VOLATILE | VT_CONSTANT);
357 #ifndef TCC_TARGET_PE
358 /* we use indirect access via got */
359 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
360 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
361 /* use the result register as a temporal register */
362 int tr = r | TREG_MEM;
363 if (is_float(ft)) {
364 /* we cannot use float registers as a temporal register */
365 tr = get_reg(RC_INT) | TREG_MEM;
367 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
369 /* load from the temporal register */
370 fr = tr | VT_LVAL;
372 #endif
374 v = fr & VT_VALMASK;
375 if (fr & VT_LVAL) {
376 int b, ll;
377 if (v == VT_LLOCAL) {
378 v1.type.t = VT_PTR;
379 v1.r = VT_LOCAL | VT_LVAL;
380 v1.c.i = fc;
381 fr = r;
382 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
383 fr = get_reg(RC_INT);
384 load(fr, &v1);
386 ll = 0;
387 /* Like GCC we can load from small enough properly sized
388 structs and unions as well.
389 XXX maybe move to generic operand handling, but should
390 occur only with asm, so tccasm.c might also be a better place */
391 if ((ft & VT_BTYPE) == VT_STRUCT) {
392 int align;
393 switch (type_size(&sv->type, &align)) {
394 case 1: ft = VT_BYTE; break;
395 case 2: ft = VT_SHORT; break;
396 case 4: ft = VT_INT; break;
397 case 8: ft = VT_LLONG; break;
398 default:
399 tcc_error("invalid aggregate type for register load");
400 break;
403 if ((ft & VT_BTYPE) == VT_FLOAT) {
404 b = 0x6e0f66;
405 r = REG_VALUE(r); /* movd */
406 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
407 b = 0x7e0ff3; /* movq */
408 r = REG_VALUE(r);
409 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
410 b = 0xdb, r = 5; /* fldt */
411 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
412 b = 0xbe0f; /* movsbl */
413 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
414 b = 0xb60f; /* movzbl */
415 } else if ((ft & VT_TYPE) == VT_SHORT) {
416 b = 0xbf0f; /* movswl */
417 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
418 b = 0xb70f; /* movzwl */
419 } else {
420 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
421 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
422 || ((ft & VT_BTYPE) == VT_FUNC));
423 ll = is64_type(ft);
424 b = 0x8b;
426 if (ll) {
427 gen_modrm64(b, r, fr, sv->sym, fc);
428 } else {
429 orex(ll, fr, r, b);
430 gen_modrm(r, fr, sv->sym, fc);
432 } else {
433 if (v == VT_CONST) {
434 if (fr & VT_SYM) {
435 #ifdef TCC_TARGET_PE
436 orex(1,0,r,0x8d);
437 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
438 gen_addrpc32(fr, sv->sym, fc);
439 #else
440 if (sv->sym->type.t & VT_STATIC) {
441 orex(1,0,r,0x8d);
442 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
443 gen_addrpc32(fr, sv->sym, fc);
444 } else {
445 orex(1,0,r,0x8b);
446 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
447 gen_gotpcrel(r, sv->sym, fc);
449 #endif
450 } else if (is64_type(ft)) {
451 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
452 gen_le64(sv->c.i);
453 } else {
454 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
455 gen_le32(fc);
457 } else if (v == VT_LOCAL) {
458 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
459 gen_modrm(r, VT_LOCAL, sv->sym, fc);
460 } else if (v == VT_CMP) {
461 orex(0,r,0,0);
462 if ((fc & ~0x100) != TOK_NE)
463 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
464 else
465 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
466 if (fc & 0x100)
468 /* This was a float compare. If the parity bit is
469 set the result was unordered, meaning false for everything
470 except TOK_NE, and true for TOK_NE. */
471 fc &= ~0x100;
472 o(0x037a + (REX_BASE(r) << 8));
474 orex(0,r,0, 0x0f); /* setxx %br */
475 o(fc);
476 o(0xc0 + REG_VALUE(r));
477 } else if (v == VT_JMP || v == VT_JMPI) {
478 t = v & 1;
479 orex(0,r,0,0);
480 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
481 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
482 gsym(fc);
483 orex(0,r,0,0);
484 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
485 } else if (v != r) {
486 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
487 if (v == TREG_ST0) {
488 /* gen_cvt_ftof(VT_DOUBLE); */
489 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
490 /* movsd -0x10(%rsp),%xmmN */
491 o(0x100ff2);
492 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
493 o(0xf024);
494 } else {
495 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
496 if ((ft & VT_BTYPE) == VT_FLOAT) {
497 o(0x100ff3);
498 } else {
499 assert((ft & VT_BTYPE) == VT_DOUBLE);
500 o(0x100ff2);
502 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
504 } else if (r == TREG_ST0) {
505 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
506 /* gen_cvt_ftof(VT_LDOUBLE); */
507 /* movsd %xmmN,-0x10(%rsp) */
508 o(0x110ff2);
509 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
510 o(0xf024);
511 o(0xf02444dd); /* fldl -0x10(%rsp) */
512 } else {
513 orex(1,r,v, 0x89);
514 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
520 /* store register 'r' in lvalue 'v' */
521 void store(int r, SValue *v)
523 int fr, bt, ft, fc;
524 int op64 = 0;
525 /* store the REX prefix in this variable when PIC is enabled */
526 int pic = 0;
528 #ifdef TCC_TARGET_PE
529 SValue v2;
530 v = pe_getimport(v, &v2);
531 #endif
533 fr = v->r & VT_VALMASK;
534 ft = v->type.t;
535 fc = v->c.i;
536 if (fc != v->c.i && (fr & VT_SYM))
537 tcc_error("64 bit addend in store");
538 ft &= ~(VT_VOLATILE | VT_CONSTANT);
539 bt = ft & VT_BTYPE;
541 #ifndef TCC_TARGET_PE
542 /* we need to access the variable via got */
543 if (fr == VT_CONST && (v->r & VT_SYM)) {
544 /* mov xx(%rip), %r11 */
545 o(0x1d8b4c);
546 gen_gotpcrel(TREG_R11, v->sym, v->c.i);
547 pic = is64_type(bt) ? 0x49 : 0x41;
549 #endif
551 /* XXX: incorrect if float reg to reg */
552 if (bt == VT_FLOAT) {
553 o(0x66);
554 o(pic);
555 o(0x7e0f); /* movd */
556 r = REG_VALUE(r);
557 } else if (bt == VT_DOUBLE) {
558 o(0x66);
559 o(pic);
560 o(0xd60f); /* movq */
561 r = REG_VALUE(r);
562 } else if (bt == VT_LDOUBLE) {
563 o(0xc0d9); /* fld %st(0) */
564 o(pic);
565 o(0xdb); /* fstpt */
566 r = 7;
567 } else {
568 if (bt == VT_SHORT)
569 o(0x66);
570 o(pic);
571 if (bt == VT_BYTE || bt == VT_BOOL)
572 orex(0, 0, r, 0x88);
573 else if (is64_type(bt))
574 op64 = 0x89;
575 else
576 orex(0, 0, r, 0x89);
578 if (pic) {
579 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
580 if (op64)
581 o(op64);
582 o(3 + (r << 3));
583 } else if (op64) {
584 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
585 gen_modrm64(op64, r, v->r, v->sym, fc);
586 } else if (fr != r) {
587 /* XXX: don't we really come here? */
588 abort();
589 o(0xc0 + fr + r * 8); /* mov r, fr */
591 } else {
592 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
593 gen_modrm(r, v->r, v->sym, fc);
594 } else if (fr != r) {
595 /* XXX: don't we really come here? */
596 abort();
597 o(0xc0 + fr + r * 8); /* mov r, fr */
602 /* 'is_jmp' is '1' if it is a jump */
603 static void gcall_or_jmp(int is_jmp)
605 int r;
606 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
607 ((vtop->r & VT_SYM) || (vtop->c.i-4) == (int)(vtop->c.i-4))) {
608 /* constant case */
609 if (vtop->r & VT_SYM) {
610 /* relocation case */
611 #ifdef TCC_TARGET_PE
612 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32, (int)(vtop->c.i-4));
613 #else
614 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32, (int)(vtop->c.i-4));
615 #endif
616 } else {
617 /* put an empty PC32 relocation */
618 put_elf_reloca(symtab_section, cur_text_section,
619 ind + 1, R_X86_64_PC32, 0, (int)(vtop->c.i-4));
621 oad(0xe8 + is_jmp, 0); /* call/jmp im */
622 } else {
623 /* otherwise, indirect call */
624 r = TREG_R11;
625 load(r, vtop);
626 o(0x41); /* REX */
627 o(0xff); /* call/jmp *r */
628 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
632 #if defined(CONFIG_TCC_BCHECK)
633 #ifndef TCC_TARGET_PE
634 static addr_t func_bound_offset;
635 static unsigned long func_bound_ind;
636 #endif
638 static void gen_static_call(int v)
640 Sym *sym = external_global_sym(v, &func_old_type, 0);
641 oad(0xe8, 0);
642 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
645 /* generate a bounded pointer addition */
646 ST_FUNC void gen_bounded_ptr_add(void)
648 /* save all temporary registers */
649 save_regs(0);
651 /* prepare fast x86_64 function call */
652 gv(RC_RAX);
653 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
654 vtop--;
656 gv(RC_RAX);
657 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
658 vtop--;
660 /* do a fast function call */
661 gen_static_call(TOK___bound_ptr_add);
663 /* returned pointer is in rax */
664 vtop++;
665 vtop->r = TREG_RAX | VT_BOUNDED;
668 /* relocation offset of the bounding function call point */
669 vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
672 /* patch pointer addition in vtop so that pointer dereferencing is
673 also tested */
674 ST_FUNC void gen_bounded_ptr_deref(void)
676 addr_t func;
677 int size, align;
678 ElfW(Rela) *rel;
679 Sym *sym;
681 size = 0;
682 /* XXX: put that code in generic part of tcc */
683 if (!is_float(vtop->type.t)) {
684 if (vtop->r & VT_LVAL_BYTE)
685 size = 1;
686 else if (vtop->r & VT_LVAL_SHORT)
687 size = 2;
689 if (!size)
690 size = type_size(&vtop->type, &align);
691 switch(size) {
692 case 1: func = TOK___bound_ptr_indir1; break;
693 case 2: func = TOK___bound_ptr_indir2; break;
694 case 4: func = TOK___bound_ptr_indir4; break;
695 case 8: func = TOK___bound_ptr_indir8; break;
696 case 12: func = TOK___bound_ptr_indir12; break;
697 case 16: func = TOK___bound_ptr_indir16; break;
698 default:
699 tcc_error("unhandled size when dereferencing bounded pointer");
700 func = 0;
701 break;
704 sym = external_global_sym(func, &func_old_type, 0);
705 if (!sym->c)
706 put_extern_sym(sym, NULL, 0, 0);
708 /* patch relocation */
709 /* XXX: find a better solution ? */
711 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i);
712 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
714 #endif
716 #ifdef TCC_TARGET_PE
718 #define REGN 4
719 static const uint8_t arg_regs[REGN] = {
720 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
723 /* Prepare arguments in R10 and R11 rather than RCX and RDX
724 because gv() will not ever use these */
725 static int arg_prepare_reg(int idx) {
726 if (idx == 0 || idx == 1)
727 /* idx=0: r10, idx=1: r11 */
728 return idx + 10;
729 else
730 return arg_regs[idx];
733 static int func_scratch;
735 /* Generate function call. The function address is pushed first, then
736 all the parameters in call order. This functions pops all the
737 parameters and the function address. */
739 void gen_offs_sp(int b, int r, int d)
741 orex(1,0,r & 0x100 ? 0 : r, b);
742 if (d == (char)d) {
743 o(0x2444 | (REG_VALUE(r) << 3));
744 g(d);
745 } else {
746 o(0x2484 | (REG_VALUE(r) << 3));
747 gen_le32(d);
751 /* Return the number of registers needed to return the struct, or 0 if
752 returning via struct pointer. */
753 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
755 int size, align;
756 *regsize = 8;
757 *ret_align = 1; // Never have to re-align return values for x86-64
758 size = type_size(vt, &align);
759 ret->ref = NULL;
760 if (size > 8) {
761 return 0;
762 } else if (size > 4) {
763 ret->t = VT_LLONG;
764 return 1;
765 } else if (size > 2) {
766 ret->t = VT_INT;
767 return 1;
768 } else if (size > 1) {
769 ret->t = VT_SHORT;
770 return 1;
771 } else {
772 ret->t = VT_BYTE;
773 return 1;
777 static int is_sse_float(int t) {
778 int bt;
779 bt = t & VT_BTYPE;
780 return bt == VT_DOUBLE || bt == VT_FLOAT;
783 int gfunc_arg_size(CType *type) {
784 int align;
785 if (type->t & (VT_ARRAY|VT_BITFIELD))
786 return 8;
787 return type_size(type, &align);
790 void gfunc_call(int nb_args)
792 int size, r, args_size, i, d, bt, struct_size;
793 int arg;
795 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
796 arg = nb_args;
798 /* for struct arguments, we need to call memcpy and the function
799 call breaks register passing arguments we are preparing.
800 So, we process arguments which will be passed by stack first. */
801 struct_size = args_size;
802 for(i = 0; i < nb_args; i++) {
803 SValue *sv;
805 --arg;
806 sv = &vtop[-i];
807 bt = (sv->type.t & VT_BTYPE);
808 size = gfunc_arg_size(&sv->type);
810 if (size <= 8)
811 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
813 if (bt == VT_STRUCT) {
814 /* align to stack align size */
815 size = (size + 15) & ~15;
816 /* generate structure store */
817 r = get_reg(RC_INT);
818 gen_offs_sp(0x8d, r, struct_size);
819 struct_size += size;
821 /* generate memcpy call */
822 vset(&sv->type, r | VT_LVAL, 0);
823 vpushv(sv);
824 vstore();
825 --vtop;
826 } else if (bt == VT_LDOUBLE) {
827 gv(RC_ST0);
828 gen_offs_sp(0xdb, 0x107, struct_size);
829 struct_size += 16;
833 if (func_scratch < struct_size)
834 func_scratch = struct_size;
836 arg = nb_args;
837 struct_size = args_size;
839 for(i = 0; i < nb_args; i++) {
840 --arg;
841 bt = (vtop->type.t & VT_BTYPE);
843 size = gfunc_arg_size(&vtop->type);
844 if (size > 8) {
845 /* align to stack align size */
846 size = (size + 15) & ~15;
847 if (arg >= REGN) {
848 d = get_reg(RC_INT);
849 gen_offs_sp(0x8d, d, struct_size);
850 gen_offs_sp(0x89, d, arg*8);
851 } else {
852 d = arg_prepare_reg(arg);
853 gen_offs_sp(0x8d, d, struct_size);
855 struct_size += size;
856 } else {
857 if (is_sse_float(vtop->type.t)) {
858 if (tcc_state->nosse)
859 tcc_error("SSE disabled");
860 gv(RC_XMM0); /* only use one float register */
861 if (arg >= REGN) {
862 /* movq %xmm0, j*8(%rsp) */
863 gen_offs_sp(0xd60f66, 0x100, arg*8);
864 } else {
865 /* movaps %xmm0, %xmmN */
866 o(0x280f);
867 o(0xc0 + (arg << 3));
868 d = arg_prepare_reg(arg);
869 /* mov %xmm0, %rxx */
870 o(0x66);
871 orex(1,d,0, 0x7e0f);
872 o(0xc0 + REG_VALUE(d));
874 } else {
875 if (bt == VT_STRUCT) {
876 vtop->type.ref = NULL;
877 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
878 : size > 1 ? VT_SHORT : VT_BYTE;
881 r = gv(RC_INT);
882 if (arg >= REGN) {
883 gen_offs_sp(0x89, r, arg*8);
884 } else {
885 d = arg_prepare_reg(arg);
886 orex(1,d,r,0x89); /* mov */
887 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
891 vtop--;
893 save_regs(0);
895 /* Copy R10 and R11 into RCX and RDX, respectively */
896 if (nb_args > 0) {
897 o(0xd1894c); /* mov %r10, %rcx */
898 if (nb_args > 1) {
899 o(0xda894c); /* mov %r11, %rdx */
903 gcall_or_jmp(0);
904 /* other compilers don't clear the upper bits when returning char/short */
905 bt = vtop->type.ref->type.t & (VT_BTYPE | VT_UNSIGNED);
906 if (bt == (VT_BYTE | VT_UNSIGNED))
907 o(0xc0b60f); /* movzbl %al, %eax */
908 else if (bt == VT_BYTE)
909 o(0xc0be0f); /* movsbl %al, %eax */
910 else if (bt == VT_SHORT)
911 o(0x98); /* cwtl */
912 else if (bt == (VT_SHORT | VT_UNSIGNED))
913 o(0xc0b70f); /* movzbl %al, %eax */
914 #if 0 /* handled in gen_cast() */
915 else if (bt == VT_INT)
916 o(0x9848); /* cltq */
917 else if (bt == (VT_INT | VT_UNSIGNED))
918 o(0xc089); /* mov %eax,%eax */
919 #endif
920 vtop--;
924 #define FUNC_PROLOG_SIZE 11
926 /* generate function prolog of type 't' */
927 void gfunc_prolog(CType *func_type)
929 int addr, reg_param_index, bt, size;
930 Sym *sym;
931 CType *type;
933 func_ret_sub = 0;
934 func_scratch = 0;
935 loc = 0;
937 addr = PTR_SIZE * 2;
938 ind += FUNC_PROLOG_SIZE;
939 func_sub_sp_offset = ind;
940 reg_param_index = 0;
942 sym = func_type->ref;
944 /* if the function returns a structure, then add an
945 implicit pointer parameter */
946 func_vt = sym->type;
947 func_var = (sym->c == FUNC_ELLIPSIS);
948 size = gfunc_arg_size(&func_vt);
949 if (size > 8) {
950 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
951 func_vc = addr;
952 reg_param_index++;
953 addr += 8;
956 /* define parameters */
957 while ((sym = sym->next) != NULL) {
958 type = &sym->type;
959 bt = type->t & VT_BTYPE;
960 size = gfunc_arg_size(type);
961 if (size > 8) {
962 if (reg_param_index < REGN) {
963 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
965 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
966 } else {
967 if (reg_param_index < REGN) {
968 /* save arguments passed by register */
969 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
970 if (tcc_state->nosse)
971 tcc_error("SSE disabled");
972 o(0xd60f66); /* movq */
973 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
974 } else {
975 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
978 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
980 addr += 8;
981 reg_param_index++;
984 while (reg_param_index < REGN) {
985 if (func_type->ref->c == FUNC_ELLIPSIS) {
986 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
987 addr += 8;
989 reg_param_index++;
993 /* generate function epilog */
994 void gfunc_epilog(void)
996 int v, saved_ind;
998 o(0xc9); /* leave */
999 if (func_ret_sub == 0) {
1000 o(0xc3); /* ret */
1001 } else {
1002 o(0xc2); /* ret n */
1003 g(func_ret_sub);
1004 g(func_ret_sub >> 8);
1007 saved_ind = ind;
1008 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1009 /* align local size to word & save local variables */
1010 v = (func_scratch + -loc + 15) & -16;
1012 if (v >= 4096) {
1013 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
1014 oad(0xb8, v); /* mov stacksize, %eax */
1015 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1016 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
1017 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1018 } else {
1019 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1020 o(0xec8148); /* sub rsp, stacksize */
1021 gen_le32(v);
1024 cur_text_section->data_offset = saved_ind;
1025 pe_add_unwind_data(ind, saved_ind, v);
1026 ind = cur_text_section->data_offset;
1029 #else
1031 static void gadd_sp(int val)
1033 if (val == (char)val) {
1034 o(0xc48348);
1035 g(val);
1036 } else {
1037 oad(0xc48148, val); /* add $xxx, %rsp */
1041 typedef enum X86_64_Mode {
1042 x86_64_mode_none,
1043 x86_64_mode_memory,
1044 x86_64_mode_integer,
1045 x86_64_mode_sse,
1046 x86_64_mode_x87
1047 } X86_64_Mode;
1049 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1051 if (a == b)
1052 return a;
1053 else if (a == x86_64_mode_none)
1054 return b;
1055 else if (b == x86_64_mode_none)
1056 return a;
1057 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1058 return x86_64_mode_memory;
1059 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1060 return x86_64_mode_integer;
1061 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1062 return x86_64_mode_memory;
1063 else
1064 return x86_64_mode_sse;
1067 static X86_64_Mode classify_x86_64_inner(CType *ty)
1069 X86_64_Mode mode;
1070 Sym *f;
1072 switch (ty->t & VT_BTYPE) {
1073 case VT_VOID: return x86_64_mode_none;
1075 case VT_INT:
1076 case VT_BYTE:
1077 case VT_SHORT:
1078 case VT_LLONG:
1079 case VT_BOOL:
1080 case VT_PTR:
1081 case VT_FUNC:
1082 case VT_ENUM: return x86_64_mode_integer;
1084 case VT_FLOAT:
1085 case VT_DOUBLE: return x86_64_mode_sse;
1087 case VT_LDOUBLE: return x86_64_mode_x87;
1089 case VT_STRUCT:
1090 f = ty->ref;
1092 mode = x86_64_mode_none;
1093 for (f = f->next; f; f = f->next)
1094 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
1096 return mode;
1098 assert(0);
1099 return 0;
1102 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
1104 X86_64_Mode mode;
1105 int size, align, ret_t = 0;
1107 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1108 *psize = 8;
1109 *palign = 8;
1110 *reg_count = 1;
1111 ret_t = ty->t;
1112 mode = x86_64_mode_integer;
1113 } else {
1114 size = type_size(ty, &align);
1115 *psize = (size + 7) & ~7;
1116 *palign = (align + 7) & ~7;
1118 if (size > 16) {
1119 mode = x86_64_mode_memory;
1120 } else {
1121 mode = classify_x86_64_inner(ty);
1122 switch (mode) {
1123 case x86_64_mode_integer:
1124 if (size > 8) {
1125 *reg_count = 2;
1126 ret_t = VT_QLONG;
1127 } else {
1128 *reg_count = 1;
1129 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1131 break;
1133 case x86_64_mode_x87:
1134 *reg_count = 1;
1135 ret_t = VT_LDOUBLE;
1136 break;
1138 case x86_64_mode_sse:
1139 if (size > 8) {
1140 *reg_count = 2;
1141 ret_t = VT_QFLOAT;
1142 } else {
1143 *reg_count = 1;
1144 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1146 break;
1147 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1152 if (ret) {
1153 ret->ref = NULL;
1154 ret->t = ret_t;
1157 return mode;
1160 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1162 /* This definition must be synced with stdarg.h */
1163 enum __va_arg_type {
1164 __va_gen_reg, __va_float_reg, __va_stack
1166 int size, align, reg_count;
1167 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1168 switch (mode) {
1169 default: return __va_stack;
1170 case x86_64_mode_integer: return __va_gen_reg;
1171 case x86_64_mode_sse: return __va_float_reg;
1175 /* Return the number of registers needed to return the struct, or 0 if
1176 returning via struct pointer. */
1177 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
1179 int size, align, reg_count;
1180 *ret_align = 1; // Never have to re-align return values for x86-64
1181 *regsize = 8;
1182 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1185 #define REGN 6
1186 static const uint8_t arg_regs[REGN] = {
1187 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1190 static int arg_prepare_reg(int idx) {
1191 if (idx == 2 || idx == 3)
1192 /* idx=2: r10, idx=3: r11 */
1193 return idx + 8;
1194 else
1195 return arg_regs[idx];
1198 /* Generate function call. The function address is pushed first, then
1199 all the parameters in call order. This functions pops all the
1200 parameters and the function address. */
1201 void gfunc_call(int nb_args)
1203 X86_64_Mode mode;
1204 CType type;
1205 int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count;
1206 int nb_reg_args = 0;
1207 int nb_sse_args = 0;
1208 int sse_reg, gen_reg;
1210 /* calculate the number of integer/float register arguments */
1211 for(i = 0; i < nb_args; i++) {
1212 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1213 if (mode == x86_64_mode_sse)
1214 nb_sse_args += reg_count;
1215 else if (mode == x86_64_mode_integer)
1216 nb_reg_args += reg_count;
1219 if (nb_sse_args && tcc_state->nosse)
1220 tcc_error("SSE disabled but floating point arguments passed");
1222 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1223 and ended by a 16-byte aligned argument. This is because, from the point of view of
1224 the callee, argument alignment is computed from the bottom up. */
1225 /* for struct arguments, we need to call memcpy and the function
1226 call breaks register passing arguments we are preparing.
1227 So, we process arguments which will be passed by stack first. */
1228 gen_reg = nb_reg_args;
1229 sse_reg = nb_sse_args;
1230 run_start = 0;
1231 args_size = 0;
1232 while (run_start != nb_args) {
1233 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1235 run_end = nb_args;
1236 stack_adjust = 0;
1237 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1238 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1239 switch (mode) {
1240 case x86_64_mode_memory:
1241 case x86_64_mode_x87:
1242 stack_arg:
1243 if (align == 16)
1244 run_end = i;
1245 else
1246 stack_adjust += size;
1247 break;
1249 case x86_64_mode_sse:
1250 sse_reg -= reg_count;
1251 if (sse_reg + reg_count > 8) goto stack_arg;
1252 break;
1254 case x86_64_mode_integer:
1255 gen_reg -= reg_count;
1256 if (gen_reg + reg_count > REGN) goto stack_arg;
1257 break;
1258 default: break; /* nothing to be done for x86_64_mode_none */
1262 gen_reg = run_gen_reg;
1263 sse_reg = run_sse_reg;
1265 /* adjust stack to align SSE boundary */
1266 if (stack_adjust &= 15) {
1267 /* fetch cpu flag before the following sub will change the value */
1268 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1269 gv(RC_INT);
1271 stack_adjust = 16 - stack_adjust;
1272 o(0x48);
1273 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1274 args_size += stack_adjust;
1277 for(i = run_start; i < run_end;) {
1278 /* Swap argument to top, it will possibly be changed here,
1279 and might use more temps. At the end of the loop we keep
1280 in on the stack and swap it back to its original position
1281 if it is a register. */
1282 SValue tmp = vtop[0];
1283 int arg_stored = 1;
1285 vtop[0] = vtop[-i];
1286 vtop[-i] = tmp;
1287 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, &reg_count);
1289 switch (vtop->type.t & VT_BTYPE) {
1290 case VT_STRUCT:
1291 if (mode == x86_64_mode_sse) {
1292 if (sse_reg > 8)
1293 sse_reg -= reg_count;
1294 else
1295 arg_stored = 0;
1296 } else if (mode == x86_64_mode_integer) {
1297 if (gen_reg > REGN)
1298 gen_reg -= reg_count;
1299 else
1300 arg_stored = 0;
1303 if (arg_stored) {
1304 /* allocate the necessary size on stack */
1305 o(0x48);
1306 oad(0xec81, size); /* sub $xxx, %rsp */
1307 /* generate structure store */
1308 r = get_reg(RC_INT);
1309 orex(1, r, 0, 0x89); /* mov %rsp, r */
1310 o(0xe0 + REG_VALUE(r));
1311 vset(&vtop->type, r | VT_LVAL, 0);
1312 vswap();
1313 vstore();
1314 args_size += size;
1316 break;
1318 case VT_LDOUBLE:
1319 assert(0);
1320 break;
1322 case VT_FLOAT:
1323 case VT_DOUBLE:
1324 assert(mode == x86_64_mode_sse);
1325 if (sse_reg > 8) {
1326 --sse_reg;
1327 r = gv(RC_FLOAT);
1328 o(0x50); /* push $rax */
1329 /* movq %xmmN, (%rsp) */
1330 o(0xd60f66);
1331 o(0x04 + REG_VALUE(r)*8);
1332 o(0x24);
1333 args_size += size;
1334 } else {
1335 arg_stored = 0;
1337 break;
1339 default:
1340 assert(mode == x86_64_mode_integer);
1341 /* simple type */
1342 /* XXX: implicit cast ? */
1343 if (gen_reg > REGN) {
1344 --gen_reg;
1345 r = gv(RC_INT);
1346 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1347 args_size += size;
1348 } else {
1349 arg_stored = 0;
1351 break;
1354 /* And swap the argument back to it's original position. */
1355 tmp = vtop[0];
1356 vtop[0] = vtop[-i];
1357 vtop[-i] = tmp;
1359 if (arg_stored) {
1360 vrotb(i+1);
1361 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1362 vpop();
1363 --nb_args;
1364 --run_end;
1365 } else {
1366 ++i;
1370 /* handle 16 byte aligned arguments at end of run */
1371 run_start = i = run_end;
1372 while (i < nb_args) {
1373 /* Rotate argument to top since it will always be popped */
1374 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1375 if (align != 16)
1376 break;
1378 vrotb(i+1);
1380 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1381 gv(RC_ST0);
1382 oad(0xec8148, size); /* sub $xxx, %rsp */
1383 o(0x7cdb); /* fstpt 0(%rsp) */
1384 g(0x24);
1385 g(0x00);
1386 args_size += size;
1387 } else {
1388 assert(mode == x86_64_mode_memory);
1390 /* allocate the necessary size on stack */
1391 o(0x48);
1392 oad(0xec81, size); /* sub $xxx, %rsp */
1393 /* generate structure store */
1394 r = get_reg(RC_INT);
1395 orex(1, r, 0, 0x89); /* mov %rsp, r */
1396 o(0xe0 + REG_VALUE(r));
1397 vset(&vtop->type, r | VT_LVAL, 0);
1398 vswap();
1399 vstore();
1400 args_size += size;
1403 vpop();
1404 --nb_args;
1408 /* XXX This should be superfluous. */
1409 save_regs(0); /* save used temporary registers */
1411 /* then, we prepare register passing arguments.
1412 Note that we cannot set RDX and RCX in this loop because gv()
1413 may break these temporary registers. Let's use R10 and R11
1414 instead of them */
1415 assert(gen_reg <= REGN);
1416 assert(sse_reg <= 8);
1417 for(i = 0; i < nb_args; i++) {
1418 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1419 /* Alter stack entry type so that gv() knows how to treat it */
1420 vtop->type = type;
1421 if (mode == x86_64_mode_sse) {
1422 if (reg_count == 2) {
1423 sse_reg -= 2;
1424 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1425 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1426 /* movaps %xmm0, %xmmN */
1427 o(0x280f);
1428 o(0xc0 + (sse_reg << 3));
1429 /* movaps %xmm1, %xmmN */
1430 o(0x280f);
1431 o(0xc1 + ((sse_reg+1) << 3));
1433 } else {
1434 assert(reg_count == 1);
1435 --sse_reg;
1436 /* Load directly to register */
1437 gv(RC_XMM0 << sse_reg);
1439 } else if (mode == x86_64_mode_integer) {
1440 /* simple type */
1441 /* XXX: implicit cast ? */
1442 int d;
1443 gen_reg -= reg_count;
1444 r = gv(RC_INT);
1445 d = arg_prepare_reg(gen_reg);
1446 orex(1,d,r,0x89); /* mov */
1447 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1448 if (reg_count == 2) {
1449 d = arg_prepare_reg(gen_reg+1);
1450 orex(1,d,vtop->r2,0x89); /* mov */
1451 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1454 vtop--;
1456 assert(gen_reg == 0);
1457 assert(sse_reg == 0);
1459 /* We shouldn't have many operands on the stack anymore, but the
1460 call address itself is still there, and it might be in %eax
1461 (or edx/ecx) currently, which the below writes would clobber.
1462 So evict all remaining operands here. */
1463 save_regs(0);
1465 /* Copy R10 and R11 into RDX and RCX, respectively */
1466 if (nb_reg_args > 2) {
1467 o(0xd2894c); /* mov %r10, %rdx */
1468 if (nb_reg_args > 3) {
1469 o(0xd9894c); /* mov %r11, %rcx */
1473 if (vtop->type.ref->c != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1474 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1475 gcall_or_jmp(0);
1476 if (args_size)
1477 gadd_sp(args_size);
1478 vtop--;
1482 #define FUNC_PROLOG_SIZE 11
1484 static void push_arg_reg(int i) {
1485 loc -= 8;
1486 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1489 /* generate function prolog of type 't' */
1490 void gfunc_prolog(CType *func_type)
1492 X86_64_Mode mode;
1493 int i, addr, align, size, reg_count;
1494 int param_addr = 0, reg_param_index, sse_param_index;
1495 Sym *sym;
1496 CType *type;
1498 sym = func_type->ref;
1499 addr = PTR_SIZE * 2;
1500 loc = 0;
1501 ind += FUNC_PROLOG_SIZE;
1502 func_sub_sp_offset = ind;
1503 func_ret_sub = 0;
1505 if (func_type->ref->c == FUNC_ELLIPSIS) {
1506 int seen_reg_num, seen_sse_num, seen_stack_size;
1507 seen_reg_num = seen_sse_num = 0;
1508 /* frame pointer and return address */
1509 seen_stack_size = PTR_SIZE * 2;
1510 /* count the number of seen parameters */
1511 sym = func_type->ref;
1512 while ((sym = sym->next) != NULL) {
1513 type = &sym->type;
1514 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1515 switch (mode) {
1516 default:
1517 stack_arg:
1518 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1519 break;
1521 case x86_64_mode_integer:
1522 if (seen_reg_num + reg_count <= 8) {
1523 seen_reg_num += reg_count;
1524 } else {
1525 seen_reg_num = 8;
1526 goto stack_arg;
1528 break;
1530 case x86_64_mode_sse:
1531 if (seen_sse_num + reg_count <= 8) {
1532 seen_sse_num += reg_count;
1533 } else {
1534 seen_sse_num = 8;
1535 goto stack_arg;
1537 break;
1541 loc -= 16;
1542 /* movl $0x????????, -0x10(%rbp) */
1543 o(0xf045c7);
1544 gen_le32(seen_reg_num * 8);
1545 /* movl $0x????????, -0xc(%rbp) */
1546 o(0xf445c7);
1547 gen_le32(seen_sse_num * 16 + 48);
1548 /* movl $0x????????, -0x8(%rbp) */
1549 o(0xf845c7);
1550 gen_le32(seen_stack_size);
1552 /* save all register passing arguments */
1553 for (i = 0; i < 8; i++) {
1554 loc -= 16;
1555 if (!tcc_state->nosse) {
1556 o(0xd60f66); /* movq */
1557 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1559 /* movq $0, loc+8(%rbp) */
1560 o(0x85c748);
1561 gen_le32(loc + 8);
1562 gen_le32(0);
1564 for (i = 0; i < REGN; i++) {
1565 push_arg_reg(REGN-1-i);
1569 sym = func_type->ref;
1570 reg_param_index = 0;
1571 sse_param_index = 0;
1573 /* if the function returns a structure, then add an
1574 implicit pointer parameter */
1575 func_vt = sym->type;
1576 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1577 if (mode == x86_64_mode_memory) {
1578 push_arg_reg(reg_param_index);
1579 func_vc = loc;
1580 reg_param_index++;
1582 /* define parameters */
1583 while ((sym = sym->next) != NULL) {
1584 type = &sym->type;
1585 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1586 switch (mode) {
1587 case x86_64_mode_sse:
1588 if (tcc_state->nosse)
1589 tcc_error("SSE disabled but floating point arguments used");
1590 if (sse_param_index + reg_count <= 8) {
1591 /* save arguments passed by register */
1592 loc -= reg_count * 8;
1593 param_addr = loc;
1594 for (i = 0; i < reg_count; ++i) {
1595 o(0xd60f66); /* movq */
1596 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1597 ++sse_param_index;
1599 } else {
1600 addr = (addr + align - 1) & -align;
1601 param_addr = addr;
1602 addr += size;
1604 break;
1606 case x86_64_mode_memory:
1607 case x86_64_mode_x87:
1608 addr = (addr + align - 1) & -align;
1609 param_addr = addr;
1610 addr += size;
1611 break;
1613 case x86_64_mode_integer: {
1614 if (reg_param_index + reg_count <= REGN) {
1615 /* save arguments passed by register */
1616 loc -= reg_count * 8;
1617 param_addr = loc;
1618 for (i = 0; i < reg_count; ++i) {
1619 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1620 ++reg_param_index;
1622 } else {
1623 addr = (addr + align - 1) & -align;
1624 param_addr = addr;
1625 addr += size;
1627 break;
1629 default: break; /* nothing to be done for x86_64_mode_none */
1631 sym_push(sym->v & ~SYM_FIELD, type,
1632 VT_LOCAL | VT_LVAL, param_addr);
1635 #ifdef CONFIG_TCC_BCHECK
1636 /* leave some room for bound checking code */
1637 if (tcc_state->do_bounds_check) {
1638 func_bound_offset = lbounds_section->data_offset;
1639 func_bound_ind = ind;
1640 oad(0xb8, 0); /* lbound section pointer */
1641 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1642 oad(0xb8, 0); /* call to function */
1644 #endif
1647 /* generate function epilog */
1648 void gfunc_epilog(void)
1650 int v, saved_ind;
1652 #ifdef CONFIG_TCC_BCHECK
1653 if (tcc_state->do_bounds_check
1654 && func_bound_offset != lbounds_section->data_offset)
1656 addr_t saved_ind;
1657 addr_t *bounds_ptr;
1658 Sym *sym_data;
1660 /* add end of table info */
1661 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
1662 *bounds_ptr = 0;
1664 /* generate bound local allocation */
1665 sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
1666 func_bound_offset, lbounds_section->data_offset);
1667 saved_ind = ind;
1668 ind = func_bound_ind;
1669 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1670 ind = ind + 5 + 3;
1671 gen_static_call(TOK___bound_local_new);
1672 ind = saved_ind;
1674 /* generate bound check local freeing */
1675 o(0x5250); /* save returned value, if any */
1676 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1677 oad(0xb8, 0); /* mov xxx, %rax */
1678 o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
1679 gen_static_call(TOK___bound_local_delete);
1680 o(0x585a); /* restore returned value, if any */
1682 #endif
1683 o(0xc9); /* leave */
1684 if (func_ret_sub == 0) {
1685 o(0xc3); /* ret */
1686 } else {
1687 o(0xc2); /* ret n */
1688 g(func_ret_sub);
1689 g(func_ret_sub >> 8);
1691 /* align local size to word & save local variables */
1692 v = (-loc + 15) & -16;
1693 saved_ind = ind;
1694 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1695 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1696 o(0xec8148); /* sub rsp, stacksize */
1697 gen_le32(v);
1698 ind = saved_ind;
1701 #endif /* not PE */
1703 /* generate a jump to a label */
1704 int gjmp(int t)
1706 return psym(0xe9, t);
1709 /* generate a jump to a fixed address */
1710 void gjmp_addr(int a)
1712 int r;
1713 r = a - ind - 2;
1714 if (r == (char)r) {
1715 g(0xeb);
1716 g(r);
1717 } else {
1718 oad(0xe9, a - ind - 5);
1722 ST_FUNC void gtst_addr(int inv, int a)
1724 inv ^= (vtop--)->c.i;
1725 a -= ind + 2;
1726 if (a == (char)a) {
1727 g(inv - 32);
1728 g(a);
1729 } else {
1730 g(0x0f);
1731 oad(inv - 16, a - 4);
1735 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1736 ST_FUNC int gtst(int inv, int t)
1738 int v = vtop->r & VT_VALMASK;
1739 if (v == VT_CMP) {
1740 /* fast case : can jump directly since flags are set */
1741 if (vtop->c.i & 0x100)
1743 /* This was a float compare. If the parity flag is set
1744 the result was unordered. For anything except != this
1745 means false and we don't jump (anding both conditions).
1746 For != this means true (oring both).
1747 Take care about inverting the test. We need to jump
1748 to our target if the result was unordered and test wasn't NE,
1749 otherwise if unordered we don't want to jump. */
1750 vtop->c.i &= ~0x100;
1751 if (inv == (vtop->c.i == TOK_NE))
1752 o(0x067a); /* jp +6 */
1753 else
1755 g(0x0f);
1756 t = psym(0x8a, t); /* jp t */
1759 g(0x0f);
1760 t = psym((vtop->c.i - 16) ^ inv, t);
1761 } else if (v == VT_JMP || v == VT_JMPI) {
1762 /* && or || optimization */
1763 if ((v & 1) == inv) {
1764 /* insert vtop->c jump list in t */
1765 uint32_t n1, n = vtop->c.i;
1766 if (n) {
1767 while ((n1 = read32le(cur_text_section->data + n)))
1768 n = n1;
1769 write32le(cur_text_section->data + n, t);
1770 t = vtop->c.i;
1772 } else {
1773 t = gjmp(t);
1774 gsym(vtop->c.i);
1777 vtop--;
1778 return t;
1781 /* generate an integer binary operation */
1782 void gen_opi(int op)
1784 int r, fr, opc, c;
1785 int ll, uu, cc;
1787 ll = is64_type(vtop[-1].type.t);
1788 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1789 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1791 switch(op) {
1792 case '+':
1793 case TOK_ADDC1: /* add with carry generation */
1794 opc = 0;
1795 gen_op8:
1796 if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) {
1797 /* constant case */
1798 vswap();
1799 r = gv(RC_INT);
1800 vswap();
1801 c = vtop->c.i;
1802 if (c == (char)c) {
1803 /* XXX: generate inc and dec for smaller code ? */
1804 orex(ll, r, 0, 0x83);
1805 o(0xc0 | (opc << 3) | REG_VALUE(r));
1806 g(c);
1807 } else {
1808 orex(ll, r, 0, 0x81);
1809 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1811 } else {
1812 gv2(RC_INT, RC_INT);
1813 r = vtop[-1].r;
1814 fr = vtop[0].r;
1815 orex(ll, r, fr, (opc << 3) | 0x01);
1816 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1818 vtop--;
1819 if (op >= TOK_ULT && op <= TOK_GT) {
1820 vtop->r = VT_CMP;
1821 vtop->c.i = op;
1823 break;
1824 case '-':
1825 case TOK_SUBC1: /* sub with carry generation */
1826 opc = 5;
1827 goto gen_op8;
1828 case TOK_ADDC2: /* add with carry use */
1829 opc = 2;
1830 goto gen_op8;
1831 case TOK_SUBC2: /* sub with carry use */
1832 opc = 3;
1833 goto gen_op8;
1834 case '&':
1835 opc = 4;
1836 goto gen_op8;
1837 case '^':
1838 opc = 6;
1839 goto gen_op8;
1840 case '|':
1841 opc = 1;
1842 goto gen_op8;
1843 case '*':
1844 gv2(RC_INT, RC_INT);
1845 r = vtop[-1].r;
1846 fr = vtop[0].r;
1847 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1848 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1849 vtop--;
1850 break;
1851 case TOK_SHL:
1852 opc = 4;
1853 goto gen_shift;
1854 case TOK_SHR:
1855 opc = 5;
1856 goto gen_shift;
1857 case TOK_SAR:
1858 opc = 7;
1859 gen_shift:
1860 opc = 0xc0 | (opc << 3);
1861 if (cc) {
1862 /* constant case */
1863 vswap();
1864 r = gv(RC_INT);
1865 vswap();
1866 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1867 o(opc | REG_VALUE(r));
1868 g(vtop->c.i & (ll ? 63 : 31));
1869 } else {
1870 /* we generate the shift in ecx */
1871 gv2(RC_INT, RC_RCX);
1872 r = vtop[-1].r;
1873 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1874 o(opc | REG_VALUE(r));
1876 vtop--;
1877 break;
1878 case TOK_UDIV:
1879 case TOK_UMOD:
1880 uu = 1;
1881 goto divmod;
1882 case '/':
1883 case '%':
1884 case TOK_PDIV:
1885 uu = 0;
1886 divmod:
1887 /* first operand must be in eax */
1888 /* XXX: need better constraint for second operand */
1889 gv2(RC_RAX, RC_RCX);
1890 r = vtop[-1].r;
1891 fr = vtop[0].r;
1892 vtop--;
1893 save_reg(TREG_RDX);
1894 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1895 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1896 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1897 if (op == '%' || op == TOK_UMOD)
1898 r = TREG_RDX;
1899 else
1900 r = TREG_RAX;
1901 vtop->r = r;
1902 break;
1903 default:
1904 opc = 7;
1905 goto gen_op8;
1909 void gen_opl(int op)
1911 gen_opi(op);
1914 /* generate a floating point operation 'v = t1 op t2' instruction. The
1915 two operands are guaranted to have the same floating point type */
1916 /* XXX: need to use ST1 too */
1917 void gen_opf(int op)
1919 int a, ft, fc, swapped, r;
1920 int float_type =
1921 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1923 /* convert constants to memory references */
1924 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1925 vswap();
1926 gv(float_type);
1927 vswap();
1929 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1930 gv(float_type);
1932 /* must put at least one value in the floating point register */
1933 if ((vtop[-1].r & VT_LVAL) &&
1934 (vtop[0].r & VT_LVAL)) {
1935 vswap();
1936 gv(float_type);
1937 vswap();
1939 swapped = 0;
1940 /* swap the stack if needed so that t1 is the register and t2 is
1941 the memory reference */
1942 if (vtop[-1].r & VT_LVAL) {
1943 vswap();
1944 swapped = 1;
1946 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1947 if (op >= TOK_ULT && op <= TOK_GT) {
1948 /* load on stack second operand */
1949 load(TREG_ST0, vtop);
1950 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1951 if (op == TOK_GE || op == TOK_GT)
1952 swapped = !swapped;
1953 else if (op == TOK_EQ || op == TOK_NE)
1954 swapped = 0;
1955 if (swapped)
1956 o(0xc9d9); /* fxch %st(1) */
1957 if (op == TOK_EQ || op == TOK_NE)
1958 o(0xe9da); /* fucompp */
1959 else
1960 o(0xd9de); /* fcompp */
1961 o(0xe0df); /* fnstsw %ax */
1962 if (op == TOK_EQ) {
1963 o(0x45e480); /* and $0x45, %ah */
1964 o(0x40fC80); /* cmp $0x40, %ah */
1965 } else if (op == TOK_NE) {
1966 o(0x45e480); /* and $0x45, %ah */
1967 o(0x40f480); /* xor $0x40, %ah */
1968 op = TOK_NE;
1969 } else if (op == TOK_GE || op == TOK_LE) {
1970 o(0x05c4f6); /* test $0x05, %ah */
1971 op = TOK_EQ;
1972 } else {
1973 o(0x45c4f6); /* test $0x45, %ah */
1974 op = TOK_EQ;
1976 vtop--;
1977 vtop->r = VT_CMP;
1978 vtop->c.i = op;
1979 } else {
1980 /* no memory reference possible for long double operations */
1981 load(TREG_ST0, vtop);
1982 swapped = !swapped;
1984 switch(op) {
1985 default:
1986 case '+':
1987 a = 0;
1988 break;
1989 case '-':
1990 a = 4;
1991 if (swapped)
1992 a++;
1993 break;
1994 case '*':
1995 a = 1;
1996 break;
1997 case '/':
1998 a = 6;
1999 if (swapped)
2000 a++;
2001 break;
2003 ft = vtop->type.t;
2004 fc = vtop->c.i;
2005 o(0xde); /* fxxxp %st, %st(1) */
2006 o(0xc1 + (a << 3));
2007 vtop--;
2009 } else {
2010 if (op >= TOK_ULT && op <= TOK_GT) {
2011 /* if saved lvalue, then we must reload it */
2012 r = vtop->r;
2013 fc = vtop->c.i;
2014 if ((r & VT_VALMASK) == VT_LLOCAL) {
2015 SValue v1;
2016 r = get_reg(RC_INT);
2017 v1.type.t = VT_PTR;
2018 v1.r = VT_LOCAL | VT_LVAL;
2019 v1.c.i = fc;
2020 load(r, &v1);
2021 fc = 0;
2024 if (op == TOK_EQ || op == TOK_NE) {
2025 swapped = 0;
2026 } else {
2027 if (op == TOK_LE || op == TOK_LT)
2028 swapped = !swapped;
2029 if (op == TOK_LE || op == TOK_GE) {
2030 op = 0x93; /* setae */
2031 } else {
2032 op = 0x97; /* seta */
2036 if (swapped) {
2037 gv(RC_FLOAT);
2038 vswap();
2040 assert(!(vtop[-1].r & VT_LVAL));
2042 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
2043 o(0x66);
2044 if (op == TOK_EQ || op == TOK_NE)
2045 o(0x2e0f); /* ucomisd */
2046 else
2047 o(0x2f0f); /* comisd */
2049 if (vtop->r & VT_LVAL) {
2050 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2051 } else {
2052 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2055 vtop--;
2056 vtop->r = VT_CMP;
2057 vtop->c.i = op | 0x100;
2058 } else {
2059 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
2060 switch(op) {
2061 default:
2062 case '+':
2063 a = 0;
2064 break;
2065 case '-':
2066 a = 4;
2067 break;
2068 case '*':
2069 a = 1;
2070 break;
2071 case '/':
2072 a = 6;
2073 break;
2075 ft = vtop->type.t;
2076 fc = vtop->c.i;
2077 assert((ft & VT_BTYPE) != VT_LDOUBLE);
2079 r = vtop->r;
2080 /* if saved lvalue, then we must reload it */
2081 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2082 SValue v1;
2083 r = get_reg(RC_INT);
2084 v1.type.t = VT_PTR;
2085 v1.r = VT_LOCAL | VT_LVAL;
2086 v1.c.i = fc;
2087 load(r, &v1);
2088 fc = 0;
2091 assert(!(vtop[-1].r & VT_LVAL));
2092 if (swapped) {
2093 assert(vtop->r & VT_LVAL);
2094 gv(RC_FLOAT);
2095 vswap();
2098 if ((ft & VT_BTYPE) == VT_DOUBLE) {
2099 o(0xf2);
2100 } else {
2101 o(0xf3);
2103 o(0x0f);
2104 o(0x58 + a);
2106 if (vtop->r & VT_LVAL) {
2107 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2108 } else {
2109 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2112 vtop--;
2117 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2118 and 'long long' cases. */
2119 void gen_cvt_itof(int t)
2121 if ((t & VT_BTYPE) == VT_LDOUBLE) {
2122 save_reg(TREG_ST0);
2123 gv(RC_INT);
2124 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2125 /* signed long long to float/double/long double (unsigned case
2126 is handled generically) */
2127 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2128 o(0x242cdf); /* fildll (%rsp) */
2129 o(0x08c48348); /* add $8, %rsp */
2130 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2131 (VT_INT | VT_UNSIGNED)) {
2132 /* unsigned int to float/double/long double */
2133 o(0x6a); /* push $0 */
2134 g(0x00);
2135 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2136 o(0x242cdf); /* fildll (%rsp) */
2137 o(0x10c48348); /* add $16, %rsp */
2138 } else {
2139 /* int to float/double/long double */
2140 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2141 o(0x2404db); /* fildl (%rsp) */
2142 o(0x08c48348); /* add $8, %rsp */
2144 vtop->r = TREG_ST0;
2145 } else {
2146 int r = get_reg(RC_FLOAT);
2147 gv(RC_INT);
2148 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2149 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2150 (VT_INT | VT_UNSIGNED) ||
2151 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2152 o(0x48); /* REX */
2154 o(0x2a0f);
2155 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2156 vtop->r = r;
2160 /* convert from one floating point type to another */
2161 void gen_cvt_ftof(int t)
2163 int ft, bt, tbt;
2165 ft = vtop->type.t;
2166 bt = ft & VT_BTYPE;
2167 tbt = t & VT_BTYPE;
2169 if (bt == VT_FLOAT) {
2170 gv(RC_FLOAT);
2171 if (tbt == VT_DOUBLE) {
2172 o(0x140f); /* unpcklps */
2173 o(0xc0 + REG_VALUE(vtop->r)*9);
2174 o(0x5a0f); /* cvtps2pd */
2175 o(0xc0 + REG_VALUE(vtop->r)*9);
2176 } else if (tbt == VT_LDOUBLE) {
2177 save_reg(RC_ST0);
2178 /* movss %xmm0,-0x10(%rsp) */
2179 o(0x110ff3);
2180 o(0x44 + REG_VALUE(vtop->r)*8);
2181 o(0xf024);
2182 o(0xf02444d9); /* flds -0x10(%rsp) */
2183 vtop->r = TREG_ST0;
2185 } else if (bt == VT_DOUBLE) {
2186 gv(RC_FLOAT);
2187 if (tbt == VT_FLOAT) {
2188 o(0x140f66); /* unpcklpd */
2189 o(0xc0 + REG_VALUE(vtop->r)*9);
2190 o(0x5a0f66); /* cvtpd2ps */
2191 o(0xc0 + REG_VALUE(vtop->r)*9);
2192 } else if (tbt == VT_LDOUBLE) {
2193 save_reg(RC_ST0);
2194 /* movsd %xmm0,-0x10(%rsp) */
2195 o(0x110ff2);
2196 o(0x44 + REG_VALUE(vtop->r)*8);
2197 o(0xf024);
2198 o(0xf02444dd); /* fldl -0x10(%rsp) */
2199 vtop->r = TREG_ST0;
2201 } else {
2202 int r;
2203 gv(RC_ST0);
2204 r = get_reg(RC_FLOAT);
2205 if (tbt == VT_DOUBLE) {
2206 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2207 /* movsd -0x10(%rsp),%xmm0 */
2208 o(0x100ff2);
2209 o(0x44 + REG_VALUE(r)*8);
2210 o(0xf024);
2211 vtop->r = r;
2212 } else if (tbt == VT_FLOAT) {
2213 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2214 /* movss -0x10(%rsp),%xmm0 */
2215 o(0x100ff3);
2216 o(0x44 + REG_VALUE(r)*8);
2217 o(0xf024);
2218 vtop->r = r;
2223 /* convert fp to int 't' type */
2224 void gen_cvt_ftoi(int t)
2226 int ft, bt, size, r;
2227 ft = vtop->type.t;
2228 bt = ft & VT_BTYPE;
2229 if (bt == VT_LDOUBLE) {
2230 gen_cvt_ftof(VT_DOUBLE);
2231 bt = VT_DOUBLE;
2234 gv(RC_FLOAT);
2235 if (t != VT_INT)
2236 size = 8;
2237 else
2238 size = 4;
2240 r = get_reg(RC_INT);
2241 if (bt == VT_FLOAT) {
2242 o(0xf3);
2243 } else if (bt == VT_DOUBLE) {
2244 o(0xf2);
2245 } else {
2246 assert(0);
2248 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2249 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2250 vtop->r = r;
2253 /* computed goto support */
2254 void ggoto(void)
2256 gcall_or_jmp(1);
2257 vtop--;
2260 /* Save the stack pointer onto the stack and return the location of its address */
2261 ST_FUNC void gen_vla_sp_save(int addr) {
2262 /* mov %rsp,addr(%rbp)*/
2263 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2266 /* Restore the SP from a location on the stack */
2267 ST_FUNC void gen_vla_sp_restore(int addr) {
2268 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2271 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2272 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2273 #ifdef TCC_TARGET_PE
2274 /* alloca does more than just adjust %rsp on Windows */
2275 vpush_global_sym(&func_old_type, TOK_alloca);
2276 vswap(); /* Move alloca ref past allocation size */
2277 gfunc_call(1);
2278 #else
2279 int r;
2280 r = gv(RC_INT); /* allocation size */
2281 /* sub r,%rsp */
2282 o(0x2b48);
2283 o(0xe0 | REG_VALUE(r));
2284 /* We align to 16 bytes rather than align */
2285 /* and ~15, %rsp */
2286 o(0xf0e48348);
2287 vpop();
2288 #endif
2292 /* end of x86-64 code generator */
2293 /*************************************************************/
2294 #endif /* ! TARGET_DEFS_ONLY */
2295 /******************************************************/