Update Cygwin Makefile to use -B. for bootstrap
[tinycc.git] / x86_64-gen.c
bloba71e2092010c377fbbe13dc40ffb46acc0cda09f
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 16
28 #define CONFIG_TCC_ASM
30 /* a register can belong to several classes. The classes must be
31 sorted from more general to more precise (see gv2() code which does
32 assumptions on it). */
33 #define RC_INT 0x0001 /* generic integer register */
34 #define RC_FLOAT 0x0002 /* generic float register */
35 #define RC_RAX 0x0004
36 #define RC_RCX 0x0008
37 #define RC_RDX 0x0010
38 #define RC_ST0 0x0080 /* only for long double */
39 #define RC_R8 0x0100
40 #define RC_R9 0x0200
41 #define RC_R10 0x0400
42 #define RC_R11 0x0800
43 #define RC_XMM0 0x1000
44 #define RC_XMM1 0x2000
45 #define RC_XMM2 0x4000
46 #define RC_XMM3 0x8000
47 #define RC_XMM4 0x10000
48 #define RC_XMM5 0x20000
49 #define RC_XMM6 0x40000
50 #define RC_XMM7 0x80000
51 #define RC_IRET RC_RAX /* function return: integer register */
52 #define RC_LRET RC_RDX /* function return: second integer register */
53 #define RC_FRET RC_XMM0 /* function return: float register */
54 #define RC_QRET RC_XMM1 /* function return: second float register */
56 /* pretty names for the registers */
57 enum {
58 TREG_RAX = 0,
59 TREG_RCX = 1,
60 TREG_RDX = 2,
61 TREG_RSP = 4,
62 TREG_RSI = 6,
63 TREG_RDI = 7,
65 TREG_R8 = 8,
66 TREG_R9 = 9,
67 TREG_R10 = 10,
68 TREG_R11 = 11,
70 TREG_XMM0 = 16,
71 TREG_XMM1 = 17,
72 TREG_XMM2 = 18,
73 TREG_XMM3 = 19,
74 TREG_XMM4 = 20,
75 TREG_XMM5 = 21,
76 TREG_XMM6 = 22,
77 TREG_XMM7 = 23,
79 TREG_ST0 = 24,
81 TREG_MEM = 0x20
84 #define REX_BASE(reg) (((reg) >> 3) & 1)
85 #define REG_VALUE(reg) ((reg) & 7)
87 /* return registers for function */
88 #define REG_IRET TREG_RAX /* single word int return register */
89 #define REG_LRET TREG_RDX /* second word return register (for long long) */
90 #define REG_FRET TREG_XMM0 /* float return register */
91 #define REG_QRET TREG_XMM1 /* second float return register */
93 /* defined if function parameters must be evaluated in reverse order */
94 #define INVERT_FUNC_PARAMS
96 /* pointer size, in bytes */
97 #define PTR_SIZE 8
99 /* long double size and alignment, in bytes */
100 #define LDOUBLE_SIZE 16
101 #define LDOUBLE_ALIGN 16
102 /* maximum alignment (for aligned attribute support) */
103 #define MAX_ALIGN 16
105 /******************************************************/
106 #else /* ! TARGET_DEFS_ONLY */
107 /******************************************************/
108 #include "tcc.h"
109 #include <assert.h>
111 ST_DATA const int reg_classes[NB_REGS] = {
112 /* eax */ RC_INT | RC_RAX,
113 /* ecx */ RC_INT | RC_RCX,
114 /* edx */ RC_INT | RC_RDX,
120 RC_R8,
121 RC_R9,
122 RC_R10,
123 RC_R11,
128 /* xmm0 */ RC_FLOAT | RC_XMM0,
129 /* xmm1 */ RC_FLOAT | RC_XMM1,
130 /* xmm2 */ RC_FLOAT | RC_XMM2,
131 /* xmm3 */ RC_FLOAT | RC_XMM3,
132 /* xmm4 */ RC_FLOAT | RC_XMM4,
133 /* xmm5 */ RC_FLOAT | RC_XMM5,
134 /* xmm6 an xmm7 are included so gv() can be used on them,
135 but they are not tagged with RC_FLOAT because they are
136 callee saved on Windows */
137 RC_XMM6,
138 RC_XMM7,
139 /* st0 */ RC_ST0
142 static unsigned long func_sub_sp_offset;
143 static int func_ret_sub;
145 /* XXX: make it faster ? */
146 ST_FUNC void g(int c)
148 int ind1;
149 if (nocode_wanted)
150 return;
151 ind1 = ind + 1;
152 if (ind1 > cur_text_section->data_allocated)
153 section_realloc(cur_text_section, ind1);
154 cur_text_section->data[ind] = c;
155 ind = ind1;
158 ST_FUNC void o(unsigned int c)
160 while (c) {
161 g(c);
162 c = c >> 8;
166 ST_FUNC void gen_le16(int v)
168 g(v);
169 g(v >> 8);
172 ST_FUNC void gen_le32(int c)
174 g(c);
175 g(c >> 8);
176 g(c >> 16);
177 g(c >> 24);
180 ST_FUNC void gen_le64(int64_t c)
182 g(c);
183 g(c >> 8);
184 g(c >> 16);
185 g(c >> 24);
186 g(c >> 32);
187 g(c >> 40);
188 g(c >> 48);
189 g(c >> 56);
192 static void orex(int ll, int r, int r2, int b)
194 if ((r & VT_VALMASK) >= VT_CONST)
195 r = 0;
196 if ((r2 & VT_VALMASK) >= VT_CONST)
197 r2 = 0;
198 if (ll || REX_BASE(r) || REX_BASE(r2))
199 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
200 o(b);
203 /* output a symbol and patch all calls to it */
204 ST_FUNC void gsym_addr(int t, int a)
206 while (t) {
207 unsigned char *ptr = cur_text_section->data + t;
208 uint32_t n = read32le(ptr); /* next value */
209 write32le(ptr, a - t - 4);
210 t = n;
214 void gsym(int t)
216 gsym_addr(t, ind);
220 static int is64_type(int t)
222 return ((t & VT_BTYPE) == VT_PTR ||
223 (t & VT_BTYPE) == VT_FUNC ||
224 (t & VT_BTYPE) == VT_LLONG);
227 /* instruction + 4 bytes data. Return the address of the data */
228 static int oad(int c, int s)
230 int t;
231 if (nocode_wanted)
232 return s;
233 o(c);
234 t = ind;
235 gen_le32(s);
236 return t;
239 /* generate jmp to a label */
240 #define gjmp2(instr,lbl) oad(instr,lbl)
242 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
244 if (r & VT_SYM)
245 greloca(cur_text_section, sym, ind, R_X86_64_32S, c), c=0;
246 gen_le32(c);
249 /* output constant with relocation if 'r & VT_SYM' is true */
250 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
252 if (r & VT_SYM)
253 greloca(cur_text_section, sym, ind, R_X86_64_64, c), c=0;
254 gen_le64(c);
257 /* output constant with relocation if 'r & VT_SYM' is true */
258 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
260 if (r & VT_SYM)
261 greloca(cur_text_section, sym, ind, R_X86_64_PC32, c-4), c=4;
262 gen_le32(c-4);
265 /* output got address with relocation */
266 static void gen_gotpcrel(int r, Sym *sym, int c)
268 #ifdef TCC_TARGET_PE
269 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
270 get_tok_str(sym->v, NULL), c, r,
271 cur_text_section->data[ind-3],
272 cur_text_section->data[ind-2],
273 cur_text_section->data[ind-1]
275 #endif
276 greloca(cur_text_section, sym, ind, R_X86_64_GOTPCREL, -4);
277 gen_le32(0);
278 if (c) {
279 /* we use add c, %xxx for displacement */
280 orex(1, r, 0, 0x81);
281 o(0xc0 + REG_VALUE(r));
282 gen_le32(c);
286 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
288 op_reg = REG_VALUE(op_reg) << 3;
289 if ((r & VT_VALMASK) == VT_CONST) {
290 /* constant memory reference */
291 o(0x05 | op_reg);
292 if (is_got) {
293 gen_gotpcrel(r, sym, c);
294 } else {
295 gen_addrpc32(r, sym, c);
297 } else if ((r & VT_VALMASK) == VT_LOCAL) {
298 /* currently, we use only ebp as base */
299 if (c == (char)c) {
300 /* short reference */
301 o(0x45 | op_reg);
302 g(c);
303 } else {
304 oad(0x85 | op_reg, c);
306 } else if ((r & VT_VALMASK) >= TREG_MEM) {
307 if (c) {
308 g(0x80 | op_reg | REG_VALUE(r));
309 gen_le32(c);
310 } else {
311 g(0x00 | op_reg | REG_VALUE(r));
313 } else {
314 g(0x00 | op_reg | REG_VALUE(r));
318 /* generate a modrm reference. 'op_reg' contains the addtional 3
319 opcode bits */
320 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
322 gen_modrm_impl(op_reg, r, sym, c, 0);
325 /* generate a modrm reference. 'op_reg' contains the addtional 3
326 opcode bits */
327 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
329 int is_got;
330 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
331 orex(1, r, op_reg, opcode);
332 gen_modrm_impl(op_reg, r, sym, c, is_got);
336 /* load 'r' from value 'sv' */
337 void load(int r, SValue *sv)
339 int v, t, ft, fc, fr;
340 SValue v1;
342 #ifdef TCC_TARGET_PE
343 SValue v2;
344 sv = pe_getimport(sv, &v2);
345 #endif
347 fr = sv->r;
348 ft = sv->type.t & ~VT_DEFSIGN;
349 fc = sv->c.i;
350 if (fc != sv->c.i && (fr & VT_SYM))
351 tcc_error("64 bit addend in load");
353 ft &= ~(VT_VOLATILE | VT_CONSTANT);
355 #ifndef TCC_TARGET_PE
356 /* we use indirect access via got */
357 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
358 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
359 /* use the result register as a temporal register */
360 int tr = r | TREG_MEM;
361 if (is_float(ft)) {
362 /* we cannot use float registers as a temporal register */
363 tr = get_reg(RC_INT) | TREG_MEM;
365 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
367 /* load from the temporal register */
368 fr = tr | VT_LVAL;
370 #endif
372 v = fr & VT_VALMASK;
373 if (fr & VT_LVAL) {
374 int b, ll;
375 if (v == VT_LLOCAL) {
376 v1.type.t = VT_PTR;
377 v1.r = VT_LOCAL | VT_LVAL;
378 v1.c.i = fc;
379 fr = r;
380 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
381 fr = get_reg(RC_INT);
382 load(fr, &v1);
384 ll = 0;
385 /* Like GCC we can load from small enough properly sized
386 structs and unions as well.
387 XXX maybe move to generic operand handling, but should
388 occur only with asm, so tccasm.c might also be a better place */
389 if ((ft & VT_BTYPE) == VT_STRUCT) {
390 int align;
391 switch (type_size(&sv->type, &align)) {
392 case 1: ft = VT_BYTE; break;
393 case 2: ft = VT_SHORT; break;
394 case 4: ft = VT_INT; break;
395 case 8: ft = VT_LLONG; break;
396 default:
397 tcc_error("invalid aggregate type for register load");
398 break;
401 if ((ft & VT_BTYPE) == VT_FLOAT) {
402 b = 0x6e0f66;
403 r = REG_VALUE(r); /* movd */
404 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
405 b = 0x7e0ff3; /* movq */
406 r = REG_VALUE(r);
407 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
408 b = 0xdb, r = 5; /* fldt */
409 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
410 b = 0xbe0f; /* movsbl */
411 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
412 b = 0xb60f; /* movzbl */
413 } else if ((ft & VT_TYPE) == VT_SHORT) {
414 b = 0xbf0f; /* movswl */
415 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
416 b = 0xb70f; /* movzwl */
417 } else {
418 assert(((ft & VT_BTYPE) == VT_INT)
419 || ((ft & VT_BTYPE) == VT_LLONG)
420 || ((ft & VT_BTYPE) == VT_PTR)
421 || ((ft & VT_BTYPE) == VT_FUNC)
423 ll = is64_type(ft);
424 b = 0x8b;
426 if (ll) {
427 gen_modrm64(b, r, fr, sv->sym, fc);
428 } else {
429 orex(ll, fr, r, b);
430 gen_modrm(r, fr, sv->sym, fc);
432 } else {
433 if (v == VT_CONST) {
434 if (fr & VT_SYM) {
435 #ifdef TCC_TARGET_PE
436 orex(1,0,r,0x8d);
437 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
438 gen_addrpc32(fr, sv->sym, fc);
439 #else
440 if (sv->sym->type.t & VT_STATIC) {
441 orex(1,0,r,0x8d);
442 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
443 gen_addrpc32(fr, sv->sym, fc);
444 } else {
445 orex(1,0,r,0x8b);
446 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
447 gen_gotpcrel(r, sv->sym, fc);
449 #endif
450 } else if (is64_type(ft)) {
451 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
452 gen_le64(sv->c.i);
453 } else {
454 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
455 gen_le32(fc);
457 } else if (v == VT_LOCAL) {
458 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
459 gen_modrm(r, VT_LOCAL, sv->sym, fc);
460 } else if (v == VT_CMP) {
461 orex(0,r,0,0);
462 if ((fc & ~0x100) != TOK_NE)
463 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
464 else
465 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
466 if (fc & 0x100)
468 /* This was a float compare. If the parity bit is
469 set the result was unordered, meaning false for everything
470 except TOK_NE, and true for TOK_NE. */
471 fc &= ~0x100;
472 o(0x037a + (REX_BASE(r) << 8));
474 orex(0,r,0, 0x0f); /* setxx %br */
475 o(fc);
476 o(0xc0 + REG_VALUE(r));
477 } else if (v == VT_JMP || v == VT_JMPI) {
478 t = v & 1;
479 orex(0,r,0,0);
480 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
481 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
482 gsym(fc);
483 orex(0,r,0,0);
484 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
485 } else if (v != r) {
486 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
487 if (v == TREG_ST0) {
488 /* gen_cvt_ftof(VT_DOUBLE); */
489 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
490 /* movsd -0x10(%rsp),%xmmN */
491 o(0x100ff2);
492 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
493 o(0xf024);
494 } else {
495 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
496 if ((ft & VT_BTYPE) == VT_FLOAT) {
497 o(0x100ff3);
498 } else {
499 assert((ft & VT_BTYPE) == VT_DOUBLE);
500 o(0x100ff2);
502 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
504 } else if (r == TREG_ST0) {
505 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
506 /* gen_cvt_ftof(VT_LDOUBLE); */
507 /* movsd %xmmN,-0x10(%rsp) */
508 o(0x110ff2);
509 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
510 o(0xf024);
511 o(0xf02444dd); /* fldl -0x10(%rsp) */
512 } else {
513 orex(1,r,v, 0x89);
514 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
520 /* store register 'r' in lvalue 'v' */
521 void store(int r, SValue *v)
523 int fr, bt, ft, fc;
524 int op64 = 0;
525 /* store the REX prefix in this variable when PIC is enabled */
526 int pic = 0;
528 #ifdef TCC_TARGET_PE
529 SValue v2;
530 v = pe_getimport(v, &v2);
531 #endif
533 fr = v->r & VT_VALMASK;
534 ft = v->type.t;
535 fc = v->c.i;
536 if (fc != v->c.i && (fr & VT_SYM))
537 tcc_error("64 bit addend in store");
538 ft &= ~(VT_VOLATILE | VT_CONSTANT);
539 bt = ft & VT_BTYPE;
541 #ifndef TCC_TARGET_PE
542 /* we need to access the variable via got */
543 if (fr == VT_CONST && (v->r & VT_SYM)) {
544 /* mov xx(%rip), %r11 */
545 o(0x1d8b4c);
546 gen_gotpcrel(TREG_R11, v->sym, v->c.i);
547 pic = is64_type(bt) ? 0x49 : 0x41;
549 #endif
551 /* XXX: incorrect if float reg to reg */
552 if (bt == VT_FLOAT) {
553 o(0x66);
554 o(pic);
555 o(0x7e0f); /* movd */
556 r = REG_VALUE(r);
557 } else if (bt == VT_DOUBLE) {
558 o(0x66);
559 o(pic);
560 o(0xd60f); /* movq */
561 r = REG_VALUE(r);
562 } else if (bt == VT_LDOUBLE) {
563 o(0xc0d9); /* fld %st(0) */
564 o(pic);
565 o(0xdb); /* fstpt */
566 r = 7;
567 } else {
568 if (bt == VT_SHORT)
569 o(0x66);
570 o(pic);
571 if (bt == VT_BYTE || bt == VT_BOOL)
572 orex(0, 0, r, 0x88);
573 else if (is64_type(bt))
574 op64 = 0x89;
575 else
576 orex(0, 0, r, 0x89);
578 if (pic) {
579 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
580 if (op64)
581 o(op64);
582 o(3 + (r << 3));
583 } else if (op64) {
584 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
585 gen_modrm64(op64, r, v->r, v->sym, fc);
586 } else if (fr != r) {
587 /* XXX: don't we really come here? */
588 abort();
589 o(0xc0 + fr + r * 8); /* mov r, fr */
591 } else {
592 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
593 gen_modrm(r, v->r, v->sym, fc);
594 } else if (fr != r) {
595 /* XXX: don't we really come here? */
596 abort();
597 o(0xc0 + fr + r * 8); /* mov r, fr */
602 /* 'is_jmp' is '1' if it is a jump */
603 static void gcall_or_jmp(int is_jmp)
605 int r;
606 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
607 ((vtop->r & VT_SYM) || (vtop->c.i-4) == (int)(vtop->c.i-4))) {
608 /* constant case */
609 if (vtop->r & VT_SYM) {
610 /* relocation case */
611 #ifdef TCC_TARGET_PE
612 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32, (int)(vtop->c.i-4));
613 #else
614 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32, (int)(vtop->c.i-4));
615 #endif
616 } else {
617 /* put an empty PC32 relocation */
618 put_elf_reloca(symtab_section, cur_text_section,
619 ind + 1, R_X86_64_PC32, 0, (int)(vtop->c.i-4));
621 oad(0xe8 + is_jmp, 0); /* call/jmp im */
622 } else {
623 /* otherwise, indirect call */
624 r = TREG_R11;
625 load(r, vtop);
626 o(0x41); /* REX */
627 o(0xff); /* call/jmp *r */
628 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
632 #if defined(CONFIG_TCC_BCHECK)
633 #ifndef TCC_TARGET_PE
634 static addr_t func_bound_offset;
635 static unsigned long func_bound_ind;
636 #endif
638 static void gen_static_call(int v)
640 Sym *sym = external_global_sym(v, &func_old_type, 0);
641 oad(0xe8, 0);
642 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
645 /* generate a bounded pointer addition */
646 ST_FUNC void gen_bounded_ptr_add(void)
648 /* save all temporary registers */
649 save_regs(0);
651 /* prepare fast x86_64 function call */
652 gv(RC_RAX);
653 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
654 vtop--;
656 gv(RC_RAX);
657 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
658 vtop--;
660 /* do a fast function call */
661 gen_static_call(TOK___bound_ptr_add);
663 /* returned pointer is in rax */
664 vtop++;
665 vtop->r = TREG_RAX | VT_BOUNDED;
668 /* relocation offset of the bounding function call point */
669 vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
672 /* patch pointer addition in vtop so that pointer dereferencing is
673 also tested */
674 ST_FUNC void gen_bounded_ptr_deref(void)
676 addr_t func;
677 int size, align;
678 ElfW(Rela) *rel;
679 Sym *sym;
681 size = 0;
682 /* XXX: put that code in generic part of tcc */
683 if (!is_float(vtop->type.t)) {
684 if (vtop->r & VT_LVAL_BYTE)
685 size = 1;
686 else if (vtop->r & VT_LVAL_SHORT)
687 size = 2;
689 if (!size)
690 size = type_size(&vtop->type, &align);
691 switch(size) {
692 case 1: func = TOK___bound_ptr_indir1; break;
693 case 2: func = TOK___bound_ptr_indir2; break;
694 case 4: func = TOK___bound_ptr_indir4; break;
695 case 8: func = TOK___bound_ptr_indir8; break;
696 case 12: func = TOK___bound_ptr_indir12; break;
697 case 16: func = TOK___bound_ptr_indir16; break;
698 default:
699 tcc_error("unhandled size when dereferencing bounded pointer");
700 func = 0;
701 break;
704 sym = external_global_sym(func, &func_old_type, 0);
705 if (!sym->c)
706 put_extern_sym(sym, NULL, 0, 0);
708 /* patch relocation */
709 /* XXX: find a better solution ? */
711 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i);
712 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
714 #endif
716 #ifdef TCC_TARGET_PE
718 #define REGN 4
719 static const uint8_t arg_regs[REGN] = {
720 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
723 /* Prepare arguments in R10 and R11 rather than RCX and RDX
724 because gv() will not ever use these */
725 static int arg_prepare_reg(int idx) {
726 if (idx == 0 || idx == 1)
727 /* idx=0: r10, idx=1: r11 */
728 return idx + 10;
729 else
730 return arg_regs[idx];
733 static int func_scratch, func_alloca;
735 /* Generate function call. The function address is pushed first, then
736 all the parameters in call order. This functions pops all the
737 parameters and the function address. */
739 static void gen_offs_sp(int b, int r, int d)
741 orex(1,0,r & 0x100 ? 0 : r, b);
742 if (d == (char)d) {
743 o(0x2444 | (REG_VALUE(r) << 3));
744 g(d);
745 } else {
746 o(0x2484 | (REG_VALUE(r) << 3));
747 gen_le32(d);
751 static int using_regs(int size)
753 return !(size > 8 || (size & (size - 1)));
756 /* Return the number of registers needed to return the struct, or 0 if
757 returning via struct pointer. */
758 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
760 int size, align;
761 *ret_align = 1; // Never have to re-align return values for x86-64
762 *regsize = 8;
763 size = type_size(vt, &align);
764 if (!using_regs(size))
765 return 0;
766 if (size == 8)
767 ret->t = VT_LLONG;
768 else if (size == 4)
769 ret->t = VT_INT;
770 else if (size == 2)
771 ret->t = VT_SHORT;
772 else
773 ret->t = VT_BYTE;
774 ret->ref = NULL;
775 return 1;
778 static int is_sse_float(int t) {
779 int bt;
780 bt = t & VT_BTYPE;
781 return bt == VT_DOUBLE || bt == VT_FLOAT;
784 static int gfunc_arg_size(CType *type) {
785 int align;
786 if (type->t & (VT_ARRAY|VT_BITFIELD))
787 return 8;
788 return type_size(type, &align);
791 void gfunc_call(int nb_args)
793 int size, r, args_size, i, d, bt, struct_size;
794 int arg;
796 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
797 arg = nb_args;
799 /* for struct arguments, we need to call memcpy and the function
800 call breaks register passing arguments we are preparing.
801 So, we process arguments which will be passed by stack first. */
802 struct_size = args_size;
803 for(i = 0; i < nb_args; i++) {
804 SValue *sv;
806 --arg;
807 sv = &vtop[-i];
808 bt = (sv->type.t & VT_BTYPE);
809 size = gfunc_arg_size(&sv->type);
811 if (using_regs(size))
812 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
814 if (bt == VT_STRUCT) {
815 /* align to stack align size */
816 size = (size + 15) & ~15;
817 /* generate structure store */
818 r = get_reg(RC_INT);
819 gen_offs_sp(0x8d, r, struct_size);
820 struct_size += size;
822 /* generate memcpy call */
823 vset(&sv->type, r | VT_LVAL, 0);
824 vpushv(sv);
825 vstore();
826 --vtop;
827 } else if (bt == VT_LDOUBLE) {
828 gv(RC_ST0);
829 gen_offs_sp(0xdb, 0x107, struct_size);
830 struct_size += 16;
834 if (func_scratch < struct_size)
835 func_scratch = struct_size;
837 arg = nb_args;
838 struct_size = args_size;
840 for(i = 0; i < nb_args; i++) {
841 --arg;
842 bt = (vtop->type.t & VT_BTYPE);
844 size = gfunc_arg_size(&vtop->type);
845 if (!using_regs(size)) {
846 /* align to stack align size */
847 size = (size + 15) & ~15;
848 if (arg >= REGN) {
849 d = get_reg(RC_INT);
850 gen_offs_sp(0x8d, d, struct_size);
851 gen_offs_sp(0x89, d, arg*8);
852 } else {
853 d = arg_prepare_reg(arg);
854 gen_offs_sp(0x8d, d, struct_size);
856 struct_size += size;
857 } else {
858 if (is_sse_float(vtop->type.t)) {
859 if (tcc_state->nosse)
860 tcc_error("SSE disabled");
861 gv(RC_XMM0); /* only use one float register */
862 if (arg >= REGN) {
863 /* movq %xmm0, j*8(%rsp) */
864 gen_offs_sp(0xd60f66, 0x100, arg*8);
865 } else {
866 /* movaps %xmm0, %xmmN */
867 o(0x280f);
868 o(0xc0 + (arg << 3));
869 d = arg_prepare_reg(arg);
870 /* mov %xmm0, %rxx */
871 o(0x66);
872 orex(1,d,0, 0x7e0f);
873 o(0xc0 + REG_VALUE(d));
875 } else {
876 if (bt == VT_STRUCT) {
877 vtop->type.ref = NULL;
878 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
879 : size > 1 ? VT_SHORT : VT_BYTE;
882 r = gv(RC_INT);
883 if (arg >= REGN) {
884 gen_offs_sp(0x89, r, arg*8);
885 } else {
886 d = arg_prepare_reg(arg);
887 orex(1,d,r,0x89); /* mov */
888 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
892 vtop--;
894 save_regs(0);
896 /* Copy R10 and R11 into RCX and RDX, respectively */
897 if (nb_args > 0) {
898 o(0xd1894c); /* mov %r10, %rcx */
899 if (nb_args > 1) {
900 o(0xda894c); /* mov %r11, %rdx */
904 gcall_or_jmp(0);
906 if ((vtop->r & VT_SYM) && vtop->sym->v == TOK_alloca) {
907 /* need to add the "func_scratch" area after alloca */
908 o(0x0548), gen_le32(func_alloca), func_alloca = ind - 4;
911 /* other compilers don't clear the upper bits when returning char/short */
912 bt = vtop->type.ref->type.t & (VT_BTYPE | VT_UNSIGNED);
913 if (bt == (VT_BYTE | VT_UNSIGNED))
914 o(0xc0b60f); /* movzbl %al, %eax */
915 else if (bt == VT_BYTE)
916 o(0xc0be0f); /* movsbl %al, %eax */
917 else if (bt == VT_SHORT)
918 o(0x98); /* cwtl */
919 else if (bt == (VT_SHORT | VT_UNSIGNED))
920 o(0xc0b70f); /* movzbl %al, %eax */
921 #if 0 /* handled in gen_cast() */
922 else if (bt == VT_INT)
923 o(0x9848); /* cltq */
924 else if (bt == (VT_INT | VT_UNSIGNED))
925 o(0xc089); /* mov %eax,%eax */
926 #endif
927 vtop--;
931 #define FUNC_PROLOG_SIZE 11
933 /* generate function prolog of type 't' */
934 void gfunc_prolog(CType *func_type)
936 int addr, reg_param_index, bt, size;
937 Sym *sym;
938 CType *type;
940 func_ret_sub = 0;
941 func_scratch = 0;
942 func_alloca = 0;
943 loc = 0;
945 addr = PTR_SIZE * 2;
946 ind += FUNC_PROLOG_SIZE;
947 func_sub_sp_offset = ind;
948 reg_param_index = 0;
950 sym = func_type->ref;
952 /* if the function returns a structure, then add an
953 implicit pointer parameter */
954 func_vt = sym->type;
955 func_var = (sym->f.func_type == FUNC_ELLIPSIS);
956 size = gfunc_arg_size(&func_vt);
957 if (!using_regs(size)) {
958 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
959 func_vc = addr;
960 reg_param_index++;
961 addr += 8;
964 /* define parameters */
965 while ((sym = sym->next) != NULL) {
966 type = &sym->type;
967 bt = type->t & VT_BTYPE;
968 size = gfunc_arg_size(type);
969 if (!using_regs(size)) {
970 if (reg_param_index < REGN) {
971 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
973 sym_push(sym->v & ~SYM_FIELD, type, VT_LLOCAL | VT_LVAL, addr);
974 } else {
975 if (reg_param_index < REGN) {
976 /* save arguments passed by register */
977 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
978 if (tcc_state->nosse)
979 tcc_error("SSE disabled");
980 o(0xd60f66); /* movq */
981 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
982 } else {
983 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
986 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
988 addr += 8;
989 reg_param_index++;
992 while (reg_param_index < REGN) {
993 if (func_type->ref->f.func_type == FUNC_ELLIPSIS) {
994 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
995 addr += 8;
997 reg_param_index++;
1001 /* generate function epilog */
1002 void gfunc_epilog(void)
1004 int v, saved_ind;
1006 o(0xc9); /* leave */
1007 if (func_ret_sub == 0) {
1008 o(0xc3); /* ret */
1009 } else {
1010 o(0xc2); /* ret n */
1011 g(func_ret_sub);
1012 g(func_ret_sub >> 8);
1015 saved_ind = ind;
1016 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1017 /* align local size to word & save local variables */
1018 v = (func_scratch + -loc + 15) & -16;
1020 if (v >= 4096) {
1021 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
1022 oad(0xb8, v); /* mov stacksize, %eax */
1023 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1024 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
1025 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1026 } else {
1027 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1028 o(0xec8148); /* sub rsp, stacksize */
1029 gen_le32(v);
1032 /* add the "func_scratch" area after each alloca seen */
1033 while (func_alloca) {
1034 unsigned char *ptr = cur_text_section->data + func_alloca;
1035 func_alloca = read32le(ptr);
1036 write32le(ptr, func_scratch);
1039 cur_text_section->data_offset = saved_ind;
1040 pe_add_unwind_data(ind, saved_ind, v);
1041 ind = cur_text_section->data_offset;
1044 #else
1046 static void gadd_sp(int val)
1048 if (val == (char)val) {
1049 o(0xc48348);
1050 g(val);
1051 } else {
1052 oad(0xc48148, val); /* add $xxx, %rsp */
1056 typedef enum X86_64_Mode {
1057 x86_64_mode_none,
1058 x86_64_mode_memory,
1059 x86_64_mode_integer,
1060 x86_64_mode_sse,
1061 x86_64_mode_x87
1062 } X86_64_Mode;
1064 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1066 if (a == b)
1067 return a;
1068 else if (a == x86_64_mode_none)
1069 return b;
1070 else if (b == x86_64_mode_none)
1071 return a;
1072 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1073 return x86_64_mode_memory;
1074 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1075 return x86_64_mode_integer;
1076 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1077 return x86_64_mode_memory;
1078 else
1079 return x86_64_mode_sse;
1082 static X86_64_Mode classify_x86_64_inner(CType *ty)
1084 X86_64_Mode mode;
1085 Sym *f;
1087 switch (ty->t & VT_BTYPE) {
1088 case VT_VOID: return x86_64_mode_none;
1090 case VT_INT:
1091 case VT_BYTE:
1092 case VT_SHORT:
1093 case VT_LLONG:
1094 case VT_BOOL:
1095 case VT_PTR:
1096 case VT_FUNC:
1097 return x86_64_mode_integer;
1099 case VT_FLOAT:
1100 case VT_DOUBLE: return x86_64_mode_sse;
1102 case VT_LDOUBLE: return x86_64_mode_x87;
1104 case VT_STRUCT:
1105 f = ty->ref;
1107 mode = x86_64_mode_none;
1108 for (f = f->next; f; f = f->next)
1109 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
1111 return mode;
1113 assert(0);
1114 return 0;
1117 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
1119 X86_64_Mode mode;
1120 int size, align, ret_t = 0;
1122 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1123 *psize = 8;
1124 *palign = 8;
1125 *reg_count = 1;
1126 ret_t = ty->t;
1127 mode = x86_64_mode_integer;
1128 } else {
1129 size = type_size(ty, &align);
1130 *psize = (size + 7) & ~7;
1131 *palign = (align + 7) & ~7;
1133 if (size > 16) {
1134 mode = x86_64_mode_memory;
1135 } else {
1136 mode = classify_x86_64_inner(ty);
1137 switch (mode) {
1138 case x86_64_mode_integer:
1139 if (size > 8) {
1140 *reg_count = 2;
1141 ret_t = VT_QLONG;
1142 } else {
1143 *reg_count = 1;
1144 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1146 break;
1148 case x86_64_mode_x87:
1149 *reg_count = 1;
1150 ret_t = VT_LDOUBLE;
1151 break;
1153 case x86_64_mode_sse:
1154 if (size > 8) {
1155 *reg_count = 2;
1156 ret_t = VT_QFLOAT;
1157 } else {
1158 *reg_count = 1;
1159 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1161 break;
1162 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1167 if (ret) {
1168 ret->ref = NULL;
1169 ret->t = ret_t;
1172 return mode;
1175 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1177 /* This definition must be synced with stdarg.h */
1178 enum __va_arg_type {
1179 __va_gen_reg, __va_float_reg, __va_stack
1181 int size, align, reg_count;
1182 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1183 switch (mode) {
1184 default: return __va_stack;
1185 case x86_64_mode_integer: return __va_gen_reg;
1186 case x86_64_mode_sse: return __va_float_reg;
1190 /* Return the number of registers needed to return the struct, or 0 if
1191 returning via struct pointer. */
1192 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
1194 int size, align, reg_count;
1195 *ret_align = 1; // Never have to re-align return values for x86-64
1196 *regsize = 8;
1197 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1200 #define REGN 6
1201 static const uint8_t arg_regs[REGN] = {
1202 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1205 static int arg_prepare_reg(int idx) {
1206 if (idx == 2 || idx == 3)
1207 /* idx=2: r10, idx=3: r11 */
1208 return idx + 8;
1209 else
1210 return arg_regs[idx];
1213 /* Generate function call. The function address is pushed first, then
1214 all the parameters in call order. This functions pops all the
1215 parameters and the function address. */
1216 void gfunc_call(int nb_args)
1218 X86_64_Mode mode;
1219 CType type;
1220 int size, align, r, args_size, stack_adjust, i, reg_count;
1221 int nb_reg_args = 0;
1222 int nb_sse_args = 0;
1223 int sse_reg, gen_reg;
1224 char _onstack[nb_args], *onstack = _onstack;
1226 /* calculate the number of integer/float register arguments, remember
1227 arguments to be passed via stack (in onstack[]), and also remember
1228 if we have to align the stack pointer to 16 (onstack[i] == 2). Needs
1229 to be done in a left-to-right pass over arguments. */
1230 stack_adjust = 0;
1231 for(i = nb_args - 1; i >= 0; i--) {
1232 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1233 if (mode == x86_64_mode_sse && nb_sse_args + reg_count <= 8) {
1234 nb_sse_args += reg_count;
1235 onstack[i] = 0;
1236 } else if (mode == x86_64_mode_integer && nb_reg_args + reg_count <= REGN) {
1237 nb_reg_args += reg_count;
1238 onstack[i] = 0;
1239 } else if (mode == x86_64_mode_none) {
1240 onstack[i] = 0;
1241 } else {
1242 if (align == 16 && (stack_adjust &= 15)) {
1243 onstack[i] = 2;
1244 stack_adjust = 0;
1245 } else
1246 onstack[i] = 1;
1247 stack_adjust += size;
1251 if (nb_sse_args && tcc_state->nosse)
1252 tcc_error("SSE disabled but floating point arguments passed");
1254 /* fetch cpu flag before generating any code */
1255 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1256 gv(RC_INT);
1258 /* for struct arguments, we need to call memcpy and the function
1259 call breaks register passing arguments we are preparing.
1260 So, we process arguments which will be passed by stack first. */
1261 gen_reg = nb_reg_args;
1262 sse_reg = nb_sse_args;
1263 args_size = 0;
1264 stack_adjust &= 15;
1265 for (i = 0; i < nb_args;) {
1266 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1267 if (!onstack[i]) {
1268 ++i;
1269 continue;
1271 /* Possibly adjust stack to align SSE boundary. We're processing
1272 args from right to left while allocating happens left to right
1273 (stack grows down), so the adjustment needs to happen _after_
1274 an argument that requires it. */
1275 if (stack_adjust) {
1276 o(0x50); /* push %rax; aka sub $8,%rsp */
1277 args_size += 8;
1278 stack_adjust = 0;
1280 if (onstack[i] == 2)
1281 stack_adjust = 1;
1283 vrotb(i+1);
1285 switch (vtop->type.t & VT_BTYPE) {
1286 case VT_STRUCT:
1287 /* allocate the necessary size on stack */
1288 o(0x48);
1289 oad(0xec81, size); /* sub $xxx, %rsp */
1290 /* generate structure store */
1291 r = get_reg(RC_INT);
1292 orex(1, r, 0, 0x89); /* mov %rsp, r */
1293 o(0xe0 + REG_VALUE(r));
1294 vset(&vtop->type, r | VT_LVAL, 0);
1295 vswap();
1296 vstore();
1297 break;
1299 case VT_LDOUBLE:
1300 gv(RC_ST0);
1301 oad(0xec8148, size); /* sub $xxx, %rsp */
1302 o(0x7cdb); /* fstpt 0(%rsp) */
1303 g(0x24);
1304 g(0x00);
1305 break;
1307 case VT_FLOAT:
1308 case VT_DOUBLE:
1309 assert(mode == x86_64_mode_sse);
1310 r = gv(RC_FLOAT);
1311 o(0x50); /* push $rax */
1312 /* movq %xmmN, (%rsp) */
1313 o(0xd60f66);
1314 o(0x04 + REG_VALUE(r)*8);
1315 o(0x24);
1316 break;
1318 default:
1319 assert(mode == x86_64_mode_integer);
1320 /* simple type */
1321 /* XXX: implicit cast ? */
1322 r = gv(RC_INT);
1323 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1324 break;
1326 args_size += size;
1328 vpop();
1329 --nb_args;
1330 onstack++;
1333 /* XXX This should be superfluous. */
1334 save_regs(0); /* save used temporary registers */
1336 /* then, we prepare register passing arguments.
1337 Note that we cannot set RDX and RCX in this loop because gv()
1338 may break these temporary registers. Let's use R10 and R11
1339 instead of them */
1340 assert(gen_reg <= REGN);
1341 assert(sse_reg <= 8);
1342 for(i = 0; i < nb_args; i++) {
1343 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1344 /* Alter stack entry type so that gv() knows how to treat it */
1345 vtop->type = type;
1346 if (mode == x86_64_mode_sse) {
1347 if (reg_count == 2) {
1348 sse_reg -= 2;
1349 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1350 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1351 /* movaps %xmm0, %xmmN */
1352 o(0x280f);
1353 o(0xc0 + (sse_reg << 3));
1354 /* movaps %xmm1, %xmmN */
1355 o(0x280f);
1356 o(0xc1 + ((sse_reg+1) << 3));
1358 } else {
1359 assert(reg_count == 1);
1360 --sse_reg;
1361 /* Load directly to register */
1362 gv(RC_XMM0 << sse_reg);
1364 } else if (mode == x86_64_mode_integer) {
1365 /* simple type */
1366 /* XXX: implicit cast ? */
1367 int d;
1368 gen_reg -= reg_count;
1369 r = gv(RC_INT);
1370 d = arg_prepare_reg(gen_reg);
1371 orex(1,d,r,0x89); /* mov */
1372 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1373 if (reg_count == 2) {
1374 d = arg_prepare_reg(gen_reg+1);
1375 orex(1,d,vtop->r2,0x89); /* mov */
1376 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1379 vtop--;
1381 assert(gen_reg == 0);
1382 assert(sse_reg == 0);
1384 /* We shouldn't have many operands on the stack anymore, but the
1385 call address itself is still there, and it might be in %eax
1386 (or edx/ecx) currently, which the below writes would clobber.
1387 So evict all remaining operands here. */
1388 save_regs(0);
1390 /* Copy R10 and R11 into RDX and RCX, respectively */
1391 if (nb_reg_args > 2) {
1392 o(0xd2894c); /* mov %r10, %rdx */
1393 if (nb_reg_args > 3) {
1394 o(0xd9894c); /* mov %r11, %rcx */
1398 if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1399 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1400 gcall_or_jmp(0);
1401 if (args_size)
1402 gadd_sp(args_size);
1403 vtop--;
1407 #define FUNC_PROLOG_SIZE 11
1409 static void push_arg_reg(int i) {
1410 loc -= 8;
1411 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1414 /* generate function prolog of type 't' */
1415 void gfunc_prolog(CType *func_type)
1417 X86_64_Mode mode;
1418 int i, addr, align, size, reg_count;
1419 int param_addr = 0, reg_param_index, sse_param_index;
1420 Sym *sym;
1421 CType *type;
1423 sym = func_type->ref;
1424 addr = PTR_SIZE * 2;
1425 loc = 0;
1426 ind += FUNC_PROLOG_SIZE;
1427 func_sub_sp_offset = ind;
1428 func_ret_sub = 0;
1430 if (sym->f.func_type == FUNC_ELLIPSIS) {
1431 int seen_reg_num, seen_sse_num, seen_stack_size;
1432 seen_reg_num = seen_sse_num = 0;
1433 /* frame pointer and return address */
1434 seen_stack_size = PTR_SIZE * 2;
1435 /* count the number of seen parameters */
1436 sym = func_type->ref;
1437 while ((sym = sym->next) != NULL) {
1438 type = &sym->type;
1439 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1440 switch (mode) {
1441 default:
1442 stack_arg:
1443 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1444 break;
1446 case x86_64_mode_integer:
1447 if (seen_reg_num + reg_count > REGN)
1448 goto stack_arg;
1449 seen_reg_num += reg_count;
1450 break;
1452 case x86_64_mode_sse:
1453 if (seen_sse_num + reg_count > 8)
1454 goto stack_arg;
1455 seen_sse_num += reg_count;
1456 break;
1460 loc -= 16;
1461 /* movl $0x????????, -0x10(%rbp) */
1462 o(0xf045c7);
1463 gen_le32(seen_reg_num * 8);
1464 /* movl $0x????????, -0xc(%rbp) */
1465 o(0xf445c7);
1466 gen_le32(seen_sse_num * 16 + 48);
1467 /* movl $0x????????, -0x8(%rbp) */
1468 o(0xf845c7);
1469 gen_le32(seen_stack_size);
1471 /* save all register passing arguments */
1472 for (i = 0; i < 8; i++) {
1473 loc -= 16;
1474 if (!tcc_state->nosse) {
1475 o(0xd60f66); /* movq */
1476 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1478 /* movq $0, loc+8(%rbp) */
1479 o(0x85c748);
1480 gen_le32(loc + 8);
1481 gen_le32(0);
1483 for (i = 0; i < REGN; i++) {
1484 push_arg_reg(REGN-1-i);
1488 sym = func_type->ref;
1489 reg_param_index = 0;
1490 sse_param_index = 0;
1492 /* if the function returns a structure, then add an
1493 implicit pointer parameter */
1494 func_vt = sym->type;
1495 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1496 if (mode == x86_64_mode_memory) {
1497 push_arg_reg(reg_param_index);
1498 func_vc = loc;
1499 reg_param_index++;
1501 /* define parameters */
1502 while ((sym = sym->next) != NULL) {
1503 type = &sym->type;
1504 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1505 switch (mode) {
1506 case x86_64_mode_sse:
1507 if (tcc_state->nosse)
1508 tcc_error("SSE disabled but floating point arguments used");
1509 if (sse_param_index + reg_count <= 8) {
1510 /* save arguments passed by register */
1511 loc -= reg_count * 8;
1512 param_addr = loc;
1513 for (i = 0; i < reg_count; ++i) {
1514 o(0xd60f66); /* movq */
1515 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1516 ++sse_param_index;
1518 } else {
1519 addr = (addr + align - 1) & -align;
1520 param_addr = addr;
1521 addr += size;
1523 break;
1525 case x86_64_mode_memory:
1526 case x86_64_mode_x87:
1527 addr = (addr + align - 1) & -align;
1528 param_addr = addr;
1529 addr += size;
1530 break;
1532 case x86_64_mode_integer: {
1533 if (reg_param_index + reg_count <= REGN) {
1534 /* save arguments passed by register */
1535 loc -= reg_count * 8;
1536 param_addr = loc;
1537 for (i = 0; i < reg_count; ++i) {
1538 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1539 ++reg_param_index;
1541 } else {
1542 addr = (addr + align - 1) & -align;
1543 param_addr = addr;
1544 addr += size;
1546 break;
1548 default: break; /* nothing to be done for x86_64_mode_none */
1550 sym_push(sym->v & ~SYM_FIELD, type,
1551 VT_LOCAL | VT_LVAL, param_addr);
1554 #ifdef CONFIG_TCC_BCHECK
1555 /* leave some room for bound checking code */
1556 if (tcc_state->do_bounds_check) {
1557 func_bound_offset = lbounds_section->data_offset;
1558 func_bound_ind = ind;
1559 oad(0xb8, 0); /* lbound section pointer */
1560 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1561 oad(0xb8, 0); /* call to function */
1563 #endif
1566 /* generate function epilog */
1567 void gfunc_epilog(void)
1569 int v, saved_ind;
1571 #ifdef CONFIG_TCC_BCHECK
1572 if (tcc_state->do_bounds_check
1573 && func_bound_offset != lbounds_section->data_offset)
1575 addr_t saved_ind;
1576 addr_t *bounds_ptr;
1577 Sym *sym_data;
1579 /* add end of table info */
1580 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
1581 *bounds_ptr = 0;
1583 /* generate bound local allocation */
1584 sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
1585 func_bound_offset, lbounds_section->data_offset);
1586 saved_ind = ind;
1587 ind = func_bound_ind;
1588 greloca(cur_text_section, sym_data, ind + 1, R_X86_64_64, 0);
1589 ind = ind + 5 + 3;
1590 gen_static_call(TOK___bound_local_new);
1591 ind = saved_ind;
1593 /* generate bound check local freeing */
1594 o(0x5250); /* save returned value, if any */
1595 greloca(cur_text_section, sym_data, ind + 1, R_X86_64_64, 0);
1596 oad(0xb8, 0); /* mov xxx, %rax */
1597 o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
1598 gen_static_call(TOK___bound_local_delete);
1599 o(0x585a); /* restore returned value, if any */
1601 #endif
1602 o(0xc9); /* leave */
1603 if (func_ret_sub == 0) {
1604 o(0xc3); /* ret */
1605 } else {
1606 o(0xc2); /* ret n */
1607 g(func_ret_sub);
1608 g(func_ret_sub >> 8);
1610 /* align local size to word & save local variables */
1611 v = (-loc + 15) & -16;
1612 saved_ind = ind;
1613 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1614 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1615 o(0xec8148); /* sub rsp, stacksize */
1616 gen_le32(v);
1617 ind = saved_ind;
1620 #endif /* not PE */
1622 /* generate a jump to a label */
1623 int gjmp(int t)
1625 return gjmp2(0xe9, t);
1628 /* generate a jump to a fixed address */
1629 void gjmp_addr(int a)
1631 int r;
1632 r = a - ind - 2;
1633 if (r == (char)r) {
1634 g(0xeb);
1635 g(r);
1636 } else {
1637 oad(0xe9, a - ind - 5);
1641 ST_FUNC void gtst_addr(int inv, int a)
1643 int v = vtop->r & VT_VALMASK;
1644 if (v == VT_CMP) {
1645 inv ^= (vtop--)->c.i;
1646 a -= ind + 2;
1647 if (a == (char)a) {
1648 g(inv - 32);
1649 g(a);
1650 } else {
1651 g(0x0f);
1652 oad(inv - 16, a - 4);
1654 } else if ((v & ~1) == VT_JMP) {
1655 if ((v & 1) != inv) {
1656 gjmp_addr(a);
1657 gsym(vtop->c.i);
1658 } else {
1659 gsym(vtop->c.i);
1660 o(0x05eb);
1661 gjmp_addr(a);
1663 vtop--;
1667 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1668 ST_FUNC int gtst(int inv, int t)
1670 int v = vtop->r & VT_VALMASK;
1672 if (nocode_wanted) {
1674 } else if (v == VT_CMP) {
1675 /* fast case : can jump directly since flags are set */
1676 if (vtop->c.i & 0x100)
1678 /* This was a float compare. If the parity flag is set
1679 the result was unordered. For anything except != this
1680 means false and we don't jump (anding both conditions).
1681 For != this means true (oring both).
1682 Take care about inverting the test. We need to jump
1683 to our target if the result was unordered and test wasn't NE,
1684 otherwise if unordered we don't want to jump. */
1685 vtop->c.i &= ~0x100;
1686 if (inv == (vtop->c.i == TOK_NE))
1687 o(0x067a); /* jp +6 */
1688 else
1690 g(0x0f);
1691 t = gjmp2(0x8a, t); /* jp t */
1694 g(0x0f);
1695 t = gjmp2((vtop->c.i - 16) ^ inv, t);
1696 } else if (v == VT_JMP || v == VT_JMPI) {
1697 /* && or || optimization */
1698 if ((v & 1) == inv) {
1699 /* insert vtop->c jump list in t */
1700 uint32_t n1, n = vtop->c.i;
1701 if (n) {
1702 while ((n1 = read32le(cur_text_section->data + n)))
1703 n = n1;
1704 write32le(cur_text_section->data + n, t);
1705 t = vtop->c.i;
1707 } else {
1708 t = gjmp(t);
1709 gsym(vtop->c.i);
1712 vtop--;
1713 return t;
1716 /* generate an integer binary operation */
1717 void gen_opi(int op)
1719 int r, fr, opc, c;
1720 int ll, uu, cc;
1722 ll = is64_type(vtop[-1].type.t);
1723 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1724 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1726 switch(op) {
1727 case '+':
1728 case TOK_ADDC1: /* add with carry generation */
1729 opc = 0;
1730 gen_op8:
1731 if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) {
1732 /* constant case */
1733 vswap();
1734 r = gv(RC_INT);
1735 vswap();
1736 c = vtop->c.i;
1737 if (c == (char)c) {
1738 /* XXX: generate inc and dec for smaller code ? */
1739 orex(ll, r, 0, 0x83);
1740 o(0xc0 | (opc << 3) | REG_VALUE(r));
1741 g(c);
1742 } else {
1743 orex(ll, r, 0, 0x81);
1744 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1746 } else {
1747 gv2(RC_INT, RC_INT);
1748 r = vtop[-1].r;
1749 fr = vtop[0].r;
1750 orex(ll, r, fr, (opc << 3) | 0x01);
1751 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1753 vtop--;
1754 if (op >= TOK_ULT && op <= TOK_GT) {
1755 vtop->r = VT_CMP;
1756 vtop->c.i = op;
1758 break;
1759 case '-':
1760 case TOK_SUBC1: /* sub with carry generation */
1761 opc = 5;
1762 goto gen_op8;
1763 case TOK_ADDC2: /* add with carry use */
1764 opc = 2;
1765 goto gen_op8;
1766 case TOK_SUBC2: /* sub with carry use */
1767 opc = 3;
1768 goto gen_op8;
1769 case '&':
1770 opc = 4;
1771 goto gen_op8;
1772 case '^':
1773 opc = 6;
1774 goto gen_op8;
1775 case '|':
1776 opc = 1;
1777 goto gen_op8;
1778 case '*':
1779 gv2(RC_INT, RC_INT);
1780 r = vtop[-1].r;
1781 fr = vtop[0].r;
1782 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1783 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1784 vtop--;
1785 break;
1786 case TOK_SHL:
1787 opc = 4;
1788 goto gen_shift;
1789 case TOK_SHR:
1790 opc = 5;
1791 goto gen_shift;
1792 case TOK_SAR:
1793 opc = 7;
1794 gen_shift:
1795 opc = 0xc0 | (opc << 3);
1796 if (cc) {
1797 /* constant case */
1798 vswap();
1799 r = gv(RC_INT);
1800 vswap();
1801 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1802 o(opc | REG_VALUE(r));
1803 g(vtop->c.i & (ll ? 63 : 31));
1804 } else {
1805 /* we generate the shift in ecx */
1806 gv2(RC_INT, RC_RCX);
1807 r = vtop[-1].r;
1808 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1809 o(opc | REG_VALUE(r));
1811 vtop--;
1812 break;
1813 case TOK_UDIV:
1814 case TOK_UMOD:
1815 uu = 1;
1816 goto divmod;
1817 case '/':
1818 case '%':
1819 case TOK_PDIV:
1820 uu = 0;
1821 divmod:
1822 /* first operand must be in eax */
1823 /* XXX: need better constraint for second operand */
1824 gv2(RC_RAX, RC_RCX);
1825 r = vtop[-1].r;
1826 fr = vtop[0].r;
1827 vtop--;
1828 save_reg(TREG_RDX);
1829 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1830 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1831 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1832 if (op == '%' || op == TOK_UMOD)
1833 r = TREG_RDX;
1834 else
1835 r = TREG_RAX;
1836 vtop->r = r;
1837 break;
1838 default:
1839 opc = 7;
1840 goto gen_op8;
1844 void gen_opl(int op)
1846 gen_opi(op);
1849 /* generate a floating point operation 'v = t1 op t2' instruction. The
1850 two operands are guaranteed to have the same floating point type */
1851 /* XXX: need to use ST1 too */
1852 void gen_opf(int op)
1854 int a, ft, fc, swapped, r;
1855 int float_type =
1856 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1858 /* convert constants to memory references */
1859 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1860 vswap();
1861 gv(float_type);
1862 vswap();
1864 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1865 gv(float_type);
1867 /* must put at least one value in the floating point register */
1868 if ((vtop[-1].r & VT_LVAL) &&
1869 (vtop[0].r & VT_LVAL)) {
1870 vswap();
1871 gv(float_type);
1872 vswap();
1874 swapped = 0;
1875 /* swap the stack if needed so that t1 is the register and t2 is
1876 the memory reference */
1877 if (vtop[-1].r & VT_LVAL) {
1878 vswap();
1879 swapped = 1;
1881 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1882 if (op >= TOK_ULT && op <= TOK_GT) {
1883 /* load on stack second operand */
1884 load(TREG_ST0, vtop);
1885 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1886 if (op == TOK_GE || op == TOK_GT)
1887 swapped = !swapped;
1888 else if (op == TOK_EQ || op == TOK_NE)
1889 swapped = 0;
1890 if (swapped)
1891 o(0xc9d9); /* fxch %st(1) */
1892 if (op == TOK_EQ || op == TOK_NE)
1893 o(0xe9da); /* fucompp */
1894 else
1895 o(0xd9de); /* fcompp */
1896 o(0xe0df); /* fnstsw %ax */
1897 if (op == TOK_EQ) {
1898 o(0x45e480); /* and $0x45, %ah */
1899 o(0x40fC80); /* cmp $0x40, %ah */
1900 } else if (op == TOK_NE) {
1901 o(0x45e480); /* and $0x45, %ah */
1902 o(0x40f480); /* xor $0x40, %ah */
1903 op = TOK_NE;
1904 } else if (op == TOK_GE || op == TOK_LE) {
1905 o(0x05c4f6); /* test $0x05, %ah */
1906 op = TOK_EQ;
1907 } else {
1908 o(0x45c4f6); /* test $0x45, %ah */
1909 op = TOK_EQ;
1911 vtop--;
1912 vtop->r = VT_CMP;
1913 vtop->c.i = op;
1914 } else {
1915 /* no memory reference possible for long double operations */
1916 load(TREG_ST0, vtop);
1917 swapped = !swapped;
1919 switch(op) {
1920 default:
1921 case '+':
1922 a = 0;
1923 break;
1924 case '-':
1925 a = 4;
1926 if (swapped)
1927 a++;
1928 break;
1929 case '*':
1930 a = 1;
1931 break;
1932 case '/':
1933 a = 6;
1934 if (swapped)
1935 a++;
1936 break;
1938 ft = vtop->type.t;
1939 fc = vtop->c.i;
1940 o(0xde); /* fxxxp %st, %st(1) */
1941 o(0xc1 + (a << 3));
1942 vtop--;
1944 } else {
1945 if (op >= TOK_ULT && op <= TOK_GT) {
1946 /* if saved lvalue, then we must reload it */
1947 r = vtop->r;
1948 fc = vtop->c.i;
1949 if ((r & VT_VALMASK) == VT_LLOCAL) {
1950 SValue v1;
1951 r = get_reg(RC_INT);
1952 v1.type.t = VT_PTR;
1953 v1.r = VT_LOCAL | VT_LVAL;
1954 v1.c.i = fc;
1955 load(r, &v1);
1956 fc = 0;
1959 if (op == TOK_EQ || op == TOK_NE) {
1960 swapped = 0;
1961 } else {
1962 if (op == TOK_LE || op == TOK_LT)
1963 swapped = !swapped;
1964 if (op == TOK_LE || op == TOK_GE) {
1965 op = 0x93; /* setae */
1966 } else {
1967 op = 0x97; /* seta */
1971 if (swapped) {
1972 gv(RC_FLOAT);
1973 vswap();
1975 assert(!(vtop[-1].r & VT_LVAL));
1977 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1978 o(0x66);
1979 if (op == TOK_EQ || op == TOK_NE)
1980 o(0x2e0f); /* ucomisd */
1981 else
1982 o(0x2f0f); /* comisd */
1984 if (vtop->r & VT_LVAL) {
1985 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1986 } else {
1987 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1990 vtop--;
1991 vtop->r = VT_CMP;
1992 vtop->c.i = op | 0x100;
1993 } else {
1994 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1995 switch(op) {
1996 default:
1997 case '+':
1998 a = 0;
1999 break;
2000 case '-':
2001 a = 4;
2002 break;
2003 case '*':
2004 a = 1;
2005 break;
2006 case '/':
2007 a = 6;
2008 break;
2010 ft = vtop->type.t;
2011 fc = vtop->c.i;
2012 assert((ft & VT_BTYPE) != VT_LDOUBLE);
2014 r = vtop->r;
2015 /* if saved lvalue, then we must reload it */
2016 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2017 SValue v1;
2018 r = get_reg(RC_INT);
2019 v1.type.t = VT_PTR;
2020 v1.r = VT_LOCAL | VT_LVAL;
2021 v1.c.i = fc;
2022 load(r, &v1);
2023 fc = 0;
2026 assert(!(vtop[-1].r & VT_LVAL));
2027 if (swapped) {
2028 assert(vtop->r & VT_LVAL);
2029 gv(RC_FLOAT);
2030 vswap();
2033 if ((ft & VT_BTYPE) == VT_DOUBLE) {
2034 o(0xf2);
2035 } else {
2036 o(0xf3);
2038 o(0x0f);
2039 o(0x58 + a);
2041 if (vtop->r & VT_LVAL) {
2042 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2043 } else {
2044 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2047 vtop--;
2052 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2053 and 'long long' cases. */
2054 void gen_cvt_itof(int t)
2056 if ((t & VT_BTYPE) == VT_LDOUBLE) {
2057 save_reg(TREG_ST0);
2058 gv(RC_INT);
2059 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2060 /* signed long long to float/double/long double (unsigned case
2061 is handled generically) */
2062 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2063 o(0x242cdf); /* fildll (%rsp) */
2064 o(0x08c48348); /* add $8, %rsp */
2065 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2066 (VT_INT | VT_UNSIGNED)) {
2067 /* unsigned int to float/double/long double */
2068 o(0x6a); /* push $0 */
2069 g(0x00);
2070 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2071 o(0x242cdf); /* fildll (%rsp) */
2072 o(0x10c48348); /* add $16, %rsp */
2073 } else {
2074 /* int to float/double/long double */
2075 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2076 o(0x2404db); /* fildl (%rsp) */
2077 o(0x08c48348); /* add $8, %rsp */
2079 vtop->r = TREG_ST0;
2080 } else {
2081 int r = get_reg(RC_FLOAT);
2082 gv(RC_INT);
2083 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2084 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2085 (VT_INT | VT_UNSIGNED) ||
2086 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2087 o(0x48); /* REX */
2089 o(0x2a0f);
2090 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2091 vtop->r = r;
2095 /* convert from one floating point type to another */
2096 void gen_cvt_ftof(int t)
2098 int ft, bt, tbt;
2100 ft = vtop->type.t;
2101 bt = ft & VT_BTYPE;
2102 tbt = t & VT_BTYPE;
2104 if (bt == VT_FLOAT) {
2105 gv(RC_FLOAT);
2106 if (tbt == VT_DOUBLE) {
2107 o(0x140f); /* unpcklps */
2108 o(0xc0 + REG_VALUE(vtop->r)*9);
2109 o(0x5a0f); /* cvtps2pd */
2110 o(0xc0 + REG_VALUE(vtop->r)*9);
2111 } else if (tbt == VT_LDOUBLE) {
2112 save_reg(RC_ST0);
2113 /* movss %xmm0,-0x10(%rsp) */
2114 o(0x110ff3);
2115 o(0x44 + REG_VALUE(vtop->r)*8);
2116 o(0xf024);
2117 o(0xf02444d9); /* flds -0x10(%rsp) */
2118 vtop->r = TREG_ST0;
2120 } else if (bt == VT_DOUBLE) {
2121 gv(RC_FLOAT);
2122 if (tbt == VT_FLOAT) {
2123 o(0x140f66); /* unpcklpd */
2124 o(0xc0 + REG_VALUE(vtop->r)*9);
2125 o(0x5a0f66); /* cvtpd2ps */
2126 o(0xc0 + REG_VALUE(vtop->r)*9);
2127 } else if (tbt == VT_LDOUBLE) {
2128 save_reg(RC_ST0);
2129 /* movsd %xmm0,-0x10(%rsp) */
2130 o(0x110ff2);
2131 o(0x44 + REG_VALUE(vtop->r)*8);
2132 o(0xf024);
2133 o(0xf02444dd); /* fldl -0x10(%rsp) */
2134 vtop->r = TREG_ST0;
2136 } else {
2137 int r;
2138 gv(RC_ST0);
2139 r = get_reg(RC_FLOAT);
2140 if (tbt == VT_DOUBLE) {
2141 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2142 /* movsd -0x10(%rsp),%xmm0 */
2143 o(0x100ff2);
2144 o(0x44 + REG_VALUE(r)*8);
2145 o(0xf024);
2146 vtop->r = r;
2147 } else if (tbt == VT_FLOAT) {
2148 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2149 /* movss -0x10(%rsp),%xmm0 */
2150 o(0x100ff3);
2151 o(0x44 + REG_VALUE(r)*8);
2152 o(0xf024);
2153 vtop->r = r;
2158 /* convert fp to int 't' type */
2159 void gen_cvt_ftoi(int t)
2161 int ft, bt, size, r;
2162 ft = vtop->type.t;
2163 bt = ft & VT_BTYPE;
2164 if (bt == VT_LDOUBLE) {
2165 gen_cvt_ftof(VT_DOUBLE);
2166 bt = VT_DOUBLE;
2169 gv(RC_FLOAT);
2170 if (t != VT_INT)
2171 size = 8;
2172 else
2173 size = 4;
2175 r = get_reg(RC_INT);
2176 if (bt == VT_FLOAT) {
2177 o(0xf3);
2178 } else if (bt == VT_DOUBLE) {
2179 o(0xf2);
2180 } else {
2181 assert(0);
2183 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2184 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2185 vtop->r = r;
2188 /* computed goto support */
2189 void ggoto(void)
2191 gcall_or_jmp(1);
2192 vtop--;
2195 /* Save the stack pointer onto the stack and return the location of its address */
2196 ST_FUNC void gen_vla_sp_save(int addr) {
2197 /* mov %rsp,addr(%rbp)*/
2198 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2201 /* Restore the SP from a location on the stack */
2202 ST_FUNC void gen_vla_sp_restore(int addr) {
2203 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2206 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2207 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2208 #ifdef TCC_TARGET_PE
2209 /* alloca does more than just adjust %rsp on Windows */
2210 vpush_global_sym(&func_old_type, TOK_alloca);
2211 vswap(); /* Move alloca ref past allocation size */
2212 gfunc_call(1);
2213 #else
2214 int r;
2215 r = gv(RC_INT); /* allocation size */
2216 /* sub r,%rsp */
2217 o(0x2b48);
2218 o(0xe0 | REG_VALUE(r));
2219 /* We align to 16 bytes rather than align */
2220 /* and ~15, %rsp */
2221 o(0xf0e48348);
2222 vpop();
2223 #endif
2227 /* end of x86-64 code generator */
2228 /*************************************************************/
2229 #endif /* ! TARGET_DEFS_ONLY */
2230 /******************************************************/