Add address of GOT + 8 in PLT + 16 and fix PLT0
[tinycc.git] / x86_64-gen.c
bloba33bb480e67087b5a35f45e7a71bb92f4e22904c
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
26 #define NB_REGS 25
27 #define NB_ASM_REGS 8
29 /* a register can belong to several classes. The classes must be
30 sorted from more general to more precise (see gv2() code which does
31 assumptions on it). */
32 #define RC_INT 0x0001 /* generic integer register */
33 #define RC_FLOAT 0x0002 /* generic float register */
34 #define RC_RAX 0x0004
35 #define RC_RCX 0x0008
36 #define RC_RDX 0x0010
37 #define RC_ST0 0x0080 /* only for long double */
38 #define RC_R8 0x0100
39 #define RC_R9 0x0200
40 #define RC_R10 0x0400
41 #define RC_R11 0x0800
42 #define RC_XMM0 0x1000
43 #define RC_XMM1 0x2000
44 #define RC_XMM2 0x4000
45 #define RC_XMM3 0x8000
46 #define RC_XMM4 0x10000
47 #define RC_XMM5 0x20000
48 #define RC_XMM6 0x40000
49 #define RC_XMM7 0x80000
50 #define RC_IRET RC_RAX /* function return: integer register */
51 #define RC_LRET RC_RDX /* function return: second integer register */
52 #define RC_FRET RC_XMM0 /* function return: float register */
53 #define RC_QRET RC_XMM1 /* function return: second float register */
55 /* pretty names for the registers */
56 enum {
57 TREG_RAX = 0,
58 TREG_RCX = 1,
59 TREG_RDX = 2,
60 TREG_RSP = 4,
61 TREG_RSI = 6,
62 TREG_RDI = 7,
64 TREG_R8 = 8,
65 TREG_R9 = 9,
66 TREG_R10 = 10,
67 TREG_R11 = 11,
69 TREG_XMM0 = 16,
70 TREG_XMM1 = 17,
71 TREG_XMM2 = 18,
72 TREG_XMM3 = 19,
73 TREG_XMM4 = 20,
74 TREG_XMM5 = 21,
75 TREG_XMM6 = 22,
76 TREG_XMM7 = 23,
78 TREG_ST0 = 24,
80 TREG_MEM = 0x20
83 #define REX_BASE(reg) (((reg) >> 3) & 1)
84 #define REG_VALUE(reg) ((reg) & 7)
86 /* return registers for function */
87 #define REG_IRET TREG_RAX /* single word int return register */
88 #define REG_LRET TREG_RDX /* second word return register (for long long) */
89 #define REG_FRET TREG_XMM0 /* float return register */
90 #define REG_QRET TREG_XMM1 /* second float return register */
92 /* defined if function parameters must be evaluated in reverse order */
93 #define INVERT_FUNC_PARAMS
95 /* pointer size, in bytes */
96 #define PTR_SIZE 8
98 /* long double size and alignment, in bytes */
99 #define LDOUBLE_SIZE 16
100 #define LDOUBLE_ALIGN 16
101 /* maximum alignment (for aligned attribute support) */
102 #define MAX_ALIGN 16
104 /******************************************************/
105 /* ELF defines */
107 #define EM_TCC_TARGET EM_X86_64
109 /* relocation type for 32 bit data relocation */
110 #define R_DATA_32 R_X86_64_32
111 #define R_DATA_PTR R_X86_64_64
112 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
113 #define R_COPY R_X86_64_COPY
115 #define ELF_START_ADDR 0x400000
116 #define ELF_PAGE_SIZE 0x200000
118 /******************************************************/
119 #else /* ! TARGET_DEFS_ONLY */
120 /******************************************************/
121 #include "tcc.h"
122 #include <assert.h>
124 ST_DATA const int reg_classes[NB_REGS] = {
125 /* eax */ RC_INT | RC_RAX,
126 /* ecx */ RC_INT | RC_RCX,
127 /* edx */ RC_INT | RC_RDX,
133 RC_R8,
134 RC_R9,
135 RC_R10,
136 RC_R11,
141 /* xmm0 */ RC_FLOAT | RC_XMM0,
142 /* xmm1 */ RC_FLOAT | RC_XMM1,
143 /* xmm2 */ RC_FLOAT | RC_XMM2,
144 /* xmm3 */ RC_FLOAT | RC_XMM3,
145 /* xmm4 */ RC_FLOAT | RC_XMM4,
146 /* xmm5 */ RC_FLOAT | RC_XMM5,
147 /* xmm6 an xmm7 are included so gv() can be used on them,
148 but they are not tagged with RC_FLOAT because they are
149 callee saved on Windows */
150 RC_XMM6,
151 RC_XMM7,
152 /* st0 */ RC_ST0
155 static unsigned long func_sub_sp_offset;
156 static int func_ret_sub;
158 /* XXX: make it faster ? */
159 ST_FUNC void g(int c)
161 int ind1;
162 ind1 = ind + 1;
163 if (ind1 > cur_text_section->data_allocated)
164 section_realloc(cur_text_section, ind1);
165 cur_text_section->data[ind] = c;
166 ind = ind1;
169 ST_FUNC void o(unsigned int c)
171 while (c) {
172 g(c);
173 c = c >> 8;
177 ST_FUNC void gen_le16(int v)
179 g(v);
180 g(v >> 8);
183 ST_FUNC void gen_le32(int c)
185 g(c);
186 g(c >> 8);
187 g(c >> 16);
188 g(c >> 24);
191 ST_FUNC void gen_le64(int64_t c)
193 g(c);
194 g(c >> 8);
195 g(c >> 16);
196 g(c >> 24);
197 g(c >> 32);
198 g(c >> 40);
199 g(c >> 48);
200 g(c >> 56);
203 static void orex(int ll, int r, int r2, int b)
205 if ((r & VT_VALMASK) >= VT_CONST)
206 r = 0;
207 if ((r2 & VT_VALMASK) >= VT_CONST)
208 r2 = 0;
209 if (ll || REX_BASE(r) || REX_BASE(r2))
210 o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
211 o(b);
214 /* output a symbol and patch all calls to it */
215 ST_FUNC void gsym_addr(int t, int a)
217 while (t) {
218 unsigned char *ptr = cur_text_section->data + t;
219 uint32_t n = read32le(ptr); /* next value */
220 write32le(ptr, a - t - 4);
221 t = n;
225 void gsym(int t)
227 gsym_addr(t, ind);
230 /* psym is used to put an instruction with a data field which is a
231 reference to a symbol. It is in fact the same as oad ! */
232 #define psym oad
234 static int is64_type(int t)
236 return ((t & VT_BTYPE) == VT_PTR ||
237 (t & VT_BTYPE) == VT_FUNC ||
238 (t & VT_BTYPE) == VT_LLONG);
241 /* instruction + 4 bytes data. Return the address of the data */
242 ST_FUNC int oad(int c, int s)
244 int ind1;
246 o(c);
247 ind1 = ind + 4;
248 if (ind1 > cur_text_section->data_allocated)
249 section_realloc(cur_text_section, ind1);
250 write32le(cur_text_section->data + ind, s);
251 s = ind;
252 ind = ind1;
253 return s;
256 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
258 if (r & VT_SYM)
259 greloca(cur_text_section, sym, ind, R_X86_64_32, c), c=0;
260 gen_le32(c);
263 /* output constant with relocation if 'r & VT_SYM' is true */
264 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
266 if (r & VT_SYM)
267 greloca(cur_text_section, sym, ind, R_X86_64_64, c), c=0;
268 gen_le64(c);
271 /* output constant with relocation if 'r & VT_SYM' is true */
272 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
274 if (r & VT_SYM)
275 greloca(cur_text_section, sym, ind, R_X86_64_PC32, c-4), c=4;
276 gen_le32(c-4);
279 /* output got address with relocation */
280 static void gen_gotpcrel(int r, Sym *sym, int c)
282 #ifndef TCC_TARGET_PE
283 greloca(cur_text_section, sym, ind, R_X86_64_GOTPCREL, -4);
284 #else
285 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
286 get_tok_str(sym->v, NULL), c, r,
287 cur_text_section->data[ind-3],
288 cur_text_section->data[ind-2],
289 cur_text_section->data[ind-1]
291 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
292 #endif
293 gen_le32(0);
294 if (c) {
295 /* we use add c, %xxx for displacement */
296 orex(1, r, 0, 0x81);
297 o(0xc0 + REG_VALUE(r));
298 gen_le32(c);
302 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
304 op_reg = REG_VALUE(op_reg) << 3;
305 if ((r & VT_VALMASK) == VT_CONST) {
306 /* constant memory reference */
307 o(0x05 | op_reg);
308 if (is_got) {
309 gen_gotpcrel(r, sym, c);
310 } else {
311 gen_addrpc32(r, sym, c);
313 } else if ((r & VT_VALMASK) == VT_LOCAL) {
314 /* currently, we use only ebp as base */
315 if (c == (char)c) {
316 /* short reference */
317 o(0x45 | op_reg);
318 g(c);
319 } else {
320 oad(0x85 | op_reg, c);
322 } else if ((r & VT_VALMASK) >= TREG_MEM) {
323 if (c) {
324 g(0x80 | op_reg | REG_VALUE(r));
325 gen_le32(c);
326 } else {
327 g(0x00 | op_reg | REG_VALUE(r));
329 } else {
330 g(0x00 | op_reg | REG_VALUE(r));
334 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
335 opcode bits */
336 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
338 gen_modrm_impl(op_reg, r, sym, c, 0);
341 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
342 opcode bits */
343 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
345 int is_got;
346 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
347 orex(1, r, op_reg, opcode);
348 gen_modrm_impl(op_reg, r, sym, c, is_got);
352 /* load 'r' from value 'sv' */
353 void load(int r, SValue *sv)
355 int v, t, ft, fc, fr;
356 SValue v1;
358 #ifdef TCC_TARGET_PE
359 SValue v2;
360 sv = pe_getimport(sv, &v2);
361 #endif
363 fr = sv->r;
364 ft = sv->type.t & ~VT_DEFSIGN;
365 fc = sv->c.i;
367 ft &= ~(VT_VOLATILE | VT_CONSTANT);
369 #ifndef TCC_TARGET_PE
370 /* we use indirect access via got */
371 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
372 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
373 /* use the result register as a temporal register */
374 int tr = r | TREG_MEM;
375 if (is_float(ft)) {
376 /* we cannot use float registers as a temporal register */
377 tr = get_reg(RC_INT) | TREG_MEM;
379 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
381 /* load from the temporal register */
382 fr = tr | VT_LVAL;
384 #endif
386 v = fr & VT_VALMASK;
387 if (fr & VT_LVAL) {
388 int b, ll;
389 if (v == VT_LLOCAL) {
390 v1.type.t = VT_PTR;
391 v1.r = VT_LOCAL | VT_LVAL;
392 v1.c.i = fc;
393 fr = r;
394 if (!(reg_classes[fr] & (RC_INT|RC_R11)))
395 fr = get_reg(RC_INT);
396 load(fr, &v1);
398 ll = 0;
399 if ((ft & VT_BTYPE) == VT_FLOAT) {
400 b = 0x6e0f66;
401 r = REG_VALUE(r); /* movd */
402 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
403 b = 0x7e0ff3; /* movq */
404 r = REG_VALUE(r);
405 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
406 b = 0xdb, r = 5; /* fldt */
407 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
408 b = 0xbe0f; /* movsbl */
409 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
410 b = 0xb60f; /* movzbl */
411 } else if ((ft & VT_TYPE) == VT_SHORT) {
412 b = 0xbf0f; /* movswl */
413 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
414 b = 0xb70f; /* movzwl */
415 } else {
416 assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
417 || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
418 || ((ft & VT_BTYPE) == VT_FUNC));
419 ll = is64_type(ft);
420 b = 0x8b;
422 if (ll) {
423 gen_modrm64(b, r, fr, sv->sym, fc);
424 } else {
425 orex(ll, fr, r, b);
426 gen_modrm(r, fr, sv->sym, fc);
428 } else {
429 if (v == VT_CONST) {
430 if (fr & VT_SYM) {
431 #ifdef TCC_TARGET_PE
432 orex(1,0,r,0x8d);
433 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
434 gen_addrpc32(fr, sv->sym, fc);
435 #else
436 if (sv->sym->type.t & VT_STATIC) {
437 orex(1,0,r,0x8d);
438 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
439 gen_addrpc32(fr, sv->sym, fc);
440 } else {
441 orex(1,0,r,0x8b);
442 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
443 gen_gotpcrel(r, sv->sym, fc);
445 #endif
446 } else if (is64_type(ft)) {
447 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
448 gen_le64(sv->c.i);
449 } else {
450 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
451 gen_le32(fc);
453 } else if (v == VT_LOCAL) {
454 orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
455 gen_modrm(r, VT_LOCAL, sv->sym, fc);
456 } else if (v == VT_CMP) {
457 orex(0,r,0,0);
458 if ((fc & ~0x100) != TOK_NE)
459 oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
460 else
461 oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
462 if (fc & 0x100)
464 /* This was a float compare. If the parity bit is
465 set the result was unordered, meaning false for everything
466 except TOK_NE, and true for TOK_NE. */
467 fc &= ~0x100;
468 o(0x037a + (REX_BASE(r) << 8));
470 orex(0,r,0, 0x0f); /* setxx %br */
471 o(fc);
472 o(0xc0 + REG_VALUE(r));
473 } else if (v == VT_JMP || v == VT_JMPI) {
474 t = v & 1;
475 orex(0,r,0,0);
476 oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
477 o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
478 gsym(fc);
479 orex(0,r,0,0);
480 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
481 } else if (v != r) {
482 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
483 if (v == TREG_ST0) {
484 /* gen_cvt_ftof(VT_DOUBLE); */
485 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
486 /* movsd -0x10(%rsp),%xmmN */
487 o(0x100ff2);
488 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
489 o(0xf024);
490 } else {
491 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
492 if ((ft & VT_BTYPE) == VT_FLOAT) {
493 o(0x100ff3);
494 } else {
495 assert((ft & VT_BTYPE) == VT_DOUBLE);
496 o(0x100ff2);
498 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
500 } else if (r == TREG_ST0) {
501 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
502 /* gen_cvt_ftof(VT_LDOUBLE); */
503 /* movsd %xmmN,-0x10(%rsp) */
504 o(0x110ff2);
505 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
506 o(0xf024);
507 o(0xf02444dd); /* fldl -0x10(%rsp) */
508 } else {
509 orex(1,r,v, 0x89);
510 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
516 /* store register 'r' in lvalue 'v' */
517 void store(int r, SValue *v)
519 int fr, bt, ft, fc;
520 int op64 = 0;
521 /* store the REX prefix in this variable when PIC is enabled */
522 int pic = 0;
524 #ifdef TCC_TARGET_PE
525 SValue v2;
526 v = pe_getimport(v, &v2);
527 #endif
529 ft = v->type.t;
530 fc = v->c.i;
531 fr = v->r & VT_VALMASK;
532 ft &= ~(VT_VOLATILE | VT_CONSTANT);
533 bt = ft & VT_BTYPE;
535 #ifndef TCC_TARGET_PE
536 /* we need to access the variable via got */
537 if (fr == VT_CONST && (v->r & VT_SYM)) {
538 /* mov xx(%rip), %r11 */
539 o(0x1d8b4c);
540 gen_gotpcrel(TREG_R11, v->sym, v->c.i);
541 pic = is64_type(bt) ? 0x49 : 0x41;
543 #endif
545 /* XXX: incorrect if float reg to reg */
546 if (bt == VT_FLOAT) {
547 o(0x66);
548 o(pic);
549 o(0x7e0f); /* movd */
550 r = REG_VALUE(r);
551 } else if (bt == VT_DOUBLE) {
552 o(0x66);
553 o(pic);
554 o(0xd60f); /* movq */
555 r = REG_VALUE(r);
556 } else if (bt == VT_LDOUBLE) {
557 o(0xc0d9); /* fld %st(0) */
558 o(pic);
559 o(0xdb); /* fstpt */
560 r = 7;
561 } else {
562 if (bt == VT_SHORT)
563 o(0x66);
564 o(pic);
565 if (bt == VT_BYTE || bt == VT_BOOL)
566 orex(0, 0, r, 0x88);
567 else if (is64_type(bt))
568 op64 = 0x89;
569 else
570 orex(0, 0, r, 0x89);
572 if (pic) {
573 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
574 if (op64)
575 o(op64);
576 o(3 + (r << 3));
577 } else if (op64) {
578 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
579 gen_modrm64(op64, r, v->r, v->sym, fc);
580 } else if (fr != r) {
581 /* XXX: don't we really come here? */
582 abort();
583 o(0xc0 + fr + r * 8); /* mov r, fr */
585 } else {
586 if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
587 gen_modrm(r, v->r, v->sym, fc);
588 } else if (fr != r) {
589 /* XXX: don't we really come here? */
590 abort();
591 o(0xc0 + fr + r * 8); /* mov r, fr */
596 /* 'is_jmp' is '1' if it is a jump */
597 static void gcall_or_jmp(int is_jmp)
599 int r;
600 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
601 ((vtop->r & VT_SYM) || (vtop->c.i-4) == (int)(vtop->c.i-4))) {
602 /* constant case */
603 if (vtop->r & VT_SYM) {
604 /* relocation case */
605 #ifdef TCC_TARGET_PE
606 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32, (int)(vtop->c.i-4));
607 #else
608 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32, (int)(vtop->c.i-4));
609 #endif
610 } else {
611 /* put an empty PC32 relocation */
612 put_elf_reloca(symtab_section, cur_text_section,
613 ind + 1, R_X86_64_PC32, 0, (int)(vtop->c.i-4));
615 oad(0xe8 + is_jmp, 0); /* call/jmp im */
616 } else {
617 /* otherwise, indirect call */
618 r = TREG_R11;
619 load(r, vtop);
620 o(0x41); /* REX */
621 o(0xff); /* call/jmp *r */
622 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
626 #if defined(CONFIG_TCC_BCHECK)
627 #ifndef TCC_TARGET_PE
628 static addr_t func_bound_offset;
629 static unsigned long func_bound_ind;
630 #endif
632 static void gen_static_call(int v)
634 Sym *sym = external_global_sym(v, &func_old_type, 0);
635 oad(0xe8, 0);
636 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
639 /* generate a bounded pointer addition */
640 ST_FUNC void gen_bounded_ptr_add(void)
642 /* save all temporary registers */
643 save_regs(0);
645 /* prepare fast x86_64 function call */
646 gv(RC_RAX);
647 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
648 vtop--;
650 gv(RC_RAX);
651 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
652 vtop--;
654 /* do a fast function call */
655 gen_static_call(TOK___bound_ptr_add);
657 /* returned pointer is in rax */
658 vtop++;
659 vtop->r = TREG_RAX | VT_BOUNDED;
662 /* relocation offset of the bounding function call point */
663 vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
666 /* patch pointer addition in vtop so that pointer dereferencing is
667 also tested */
668 ST_FUNC void gen_bounded_ptr_deref(void)
670 addr_t func;
671 int size, align;
672 ElfW(Rela) *rel;
673 Sym *sym;
675 size = 0;
676 /* XXX: put that code in generic part of tcc */
677 if (!is_float(vtop->type.t)) {
678 if (vtop->r & VT_LVAL_BYTE)
679 size = 1;
680 else if (vtop->r & VT_LVAL_SHORT)
681 size = 2;
683 if (!size)
684 size = type_size(&vtop->type, &align);
685 switch(size) {
686 case 1: func = TOK___bound_ptr_indir1; break;
687 case 2: func = TOK___bound_ptr_indir2; break;
688 case 4: func = TOK___bound_ptr_indir4; break;
689 case 8: func = TOK___bound_ptr_indir8; break;
690 case 12: func = TOK___bound_ptr_indir12; break;
691 case 16: func = TOK___bound_ptr_indir16; break;
692 default:
693 tcc_error("unhandled size when dereferencing bounded pointer");
694 func = 0;
695 break;
698 sym = external_global_sym(func, &func_old_type, 0);
699 if (!sym->c)
700 put_extern_sym(sym, NULL, 0, 0);
702 /* patch relocation */
703 /* XXX: find a better solution ? */
705 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i);
706 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
708 #endif
710 #ifdef TCC_TARGET_PE
712 #define REGN 4
713 static const uint8_t arg_regs[REGN] = {
714 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
717 /* Prepare arguments in R10 and R11 rather than RCX and RDX
718 because gv() will not ever use these */
719 static int arg_prepare_reg(int idx) {
720 if (idx == 0 || idx == 1)
721 /* idx=0: r10, idx=1: r11 */
722 return idx + 10;
723 else
724 return arg_regs[idx];
727 static int func_scratch;
729 /* Generate function call. The function address is pushed first, then
730 all the parameters in call order. This functions pops all the
731 parameters and the function address. */
733 void gen_offs_sp(int b, int r, int d)
735 orex(1,0,r & 0x100 ? 0 : r, b);
736 if (d == (char)d) {
737 o(0x2444 | (REG_VALUE(r) << 3));
738 g(d);
739 } else {
740 o(0x2484 | (REG_VALUE(r) << 3));
741 gen_le32(d);
745 /* Return the number of registers needed to return the struct, or 0 if
746 returning via struct pointer. */
747 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
749 int size, align;
750 *regsize = 8;
751 *ret_align = 1; // Never have to re-align return values for x86-64
752 size = type_size(vt, &align);
753 ret->ref = NULL;
754 if (size > 8) {
755 return 0;
756 } else if (size > 4) {
757 ret->t = VT_LLONG;
758 return 1;
759 } else if (size > 2) {
760 ret->t = VT_INT;
761 return 1;
762 } else if (size > 1) {
763 ret->t = VT_SHORT;
764 return 1;
765 } else {
766 ret->t = VT_BYTE;
767 return 1;
771 static int is_sse_float(int t) {
772 int bt;
773 bt = t & VT_BTYPE;
774 return bt == VT_DOUBLE || bt == VT_FLOAT;
777 int gfunc_arg_size(CType *type) {
778 int align;
779 if (type->t & (VT_ARRAY|VT_BITFIELD))
780 return 8;
781 return type_size(type, &align);
784 void gfunc_call(int nb_args)
786 int size, r, args_size, i, d, bt, struct_size;
787 int arg;
789 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
790 arg = nb_args;
792 /* for struct arguments, we need to call memcpy and the function
793 call breaks register passing arguments we are preparing.
794 So, we process arguments which will be passed by stack first. */
795 struct_size = args_size;
796 for(i = 0; i < nb_args; i++) {
797 SValue *sv;
799 --arg;
800 sv = &vtop[-i];
801 bt = (sv->type.t & VT_BTYPE);
802 size = gfunc_arg_size(&sv->type);
804 if (size <= 8)
805 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
807 if (bt == VT_STRUCT) {
808 /* align to stack align size */
809 size = (size + 15) & ~15;
810 /* generate structure store */
811 r = get_reg(RC_INT);
812 gen_offs_sp(0x8d, r, struct_size);
813 struct_size += size;
815 /* generate memcpy call */
816 vset(&sv->type, r | VT_LVAL, 0);
817 vpushv(sv);
818 vstore();
819 --vtop;
820 } else if (bt == VT_LDOUBLE) {
821 gv(RC_ST0);
822 gen_offs_sp(0xdb, 0x107, struct_size);
823 struct_size += 16;
827 if (func_scratch < struct_size)
828 func_scratch = struct_size;
830 arg = nb_args;
831 struct_size = args_size;
833 for(i = 0; i < nb_args; i++) {
834 --arg;
835 bt = (vtop->type.t & VT_BTYPE);
837 size = gfunc_arg_size(&vtop->type);
838 if (size > 8) {
839 /* align to stack align size */
840 size = (size + 15) & ~15;
841 if (arg >= REGN) {
842 d = get_reg(RC_INT);
843 gen_offs_sp(0x8d, d, struct_size);
844 gen_offs_sp(0x89, d, arg*8);
845 } else {
846 d = arg_prepare_reg(arg);
847 gen_offs_sp(0x8d, d, struct_size);
849 struct_size += size;
850 } else {
851 if (is_sse_float(vtop->type.t)) {
852 gv(RC_XMM0); /* only use one float register */
853 if (arg >= REGN) {
854 /* movq %xmm0, j*8(%rsp) */
855 gen_offs_sp(0xd60f66, 0x100, arg*8);
856 } else {
857 /* movaps %xmm0, %xmmN */
858 o(0x280f);
859 o(0xc0 + (arg << 3));
860 d = arg_prepare_reg(arg);
861 /* mov %xmm0, %rxx */
862 o(0x66);
863 orex(1,d,0, 0x7e0f);
864 o(0xc0 + REG_VALUE(d));
866 } else {
867 if (bt == VT_STRUCT) {
868 vtop->type.ref = NULL;
869 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
870 : size > 1 ? VT_SHORT : VT_BYTE;
873 r = gv(RC_INT);
874 if (arg >= REGN) {
875 gen_offs_sp(0x89, r, arg*8);
876 } else {
877 d = arg_prepare_reg(arg);
878 orex(1,d,r,0x89); /* mov */
879 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
883 vtop--;
885 save_regs(0);
887 /* Copy R10 and R11 into RCX and RDX, respectively */
888 if (nb_args > 0) {
889 o(0xd1894c); /* mov %r10, %rcx */
890 if (nb_args > 1) {
891 o(0xda894c); /* mov %r11, %rdx */
895 gcall_or_jmp(0);
896 /* other compilers don't clear the upper bits when returning char/short */
897 bt = vtop->type.ref->type.t & (VT_BTYPE | VT_UNSIGNED);
898 if (bt == (VT_BYTE | VT_UNSIGNED))
899 o(0xc0b60f); /* movzbl %al, %eax */
900 else if (bt == VT_BYTE)
901 o(0xc0be0f); /* movsbl %al, %eax */
902 else if (bt == VT_SHORT)
903 o(0x98); /* cwtl */
904 else if (bt == (VT_SHORT | VT_UNSIGNED))
905 o(0xc0b70f); /* movzbl %al, %eax */
906 #if 0 /* handled in gen_cast() */
907 else if (bt == VT_INT)
908 o(0x9848); /* cltq */
909 else if (bt == (VT_INT | VT_UNSIGNED))
910 o(0xc089); /* mov %eax,%eax */
911 #endif
912 vtop--;
916 #define FUNC_PROLOG_SIZE 11
918 /* generate function prolog of type 't' */
919 void gfunc_prolog(CType *func_type)
921 int addr, reg_param_index, bt, size;
922 Sym *sym;
923 CType *type;
925 func_ret_sub = 0;
926 func_scratch = 0;
927 loc = 0;
929 addr = PTR_SIZE * 2;
930 ind += FUNC_PROLOG_SIZE;
931 func_sub_sp_offset = ind;
932 reg_param_index = 0;
934 sym = func_type->ref;
936 /* if the function returns a structure, then add an
937 implicit pointer parameter */
938 func_vt = sym->type;
939 func_var = (sym->c == FUNC_ELLIPSIS);
940 size = gfunc_arg_size(&func_vt);
941 if (size > 8) {
942 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
943 func_vc = addr;
944 reg_param_index++;
945 addr += 8;
948 /* define parameters */
949 while ((sym = sym->next) != NULL) {
950 type = &sym->type;
951 bt = type->t & VT_BTYPE;
952 size = gfunc_arg_size(type);
953 if (size > 8) {
954 if (reg_param_index < REGN) {
955 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
957 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
958 } else {
959 if (reg_param_index < REGN) {
960 /* save arguments passed by register */
961 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
962 o(0xd60f66); /* movq */
963 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
964 } else {
965 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
968 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
970 addr += 8;
971 reg_param_index++;
974 while (reg_param_index < REGN) {
975 if (func_type->ref->c == FUNC_ELLIPSIS) {
976 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
977 addr += 8;
979 reg_param_index++;
983 /* generate function epilog */
984 void gfunc_epilog(void)
986 int v, saved_ind;
988 o(0xc9); /* leave */
989 if (func_ret_sub == 0) {
990 o(0xc3); /* ret */
991 } else {
992 o(0xc2); /* ret n */
993 g(func_ret_sub);
994 g(func_ret_sub >> 8);
997 saved_ind = ind;
998 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
999 /* align local size to word & save local variables */
1000 v = (func_scratch + -loc + 15) & -16;
1002 if (v >= 4096) {
1003 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
1004 oad(0xb8, v); /* mov stacksize, %eax */
1005 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1006 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
1007 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1008 } else {
1009 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1010 o(0xec8148); /* sub rsp, stacksize */
1011 gen_le32(v);
1014 cur_text_section->data_offset = saved_ind;
1015 pe_add_unwind_data(ind, saved_ind, v);
1016 ind = cur_text_section->data_offset;
1019 #else
1021 static void gadd_sp(int val)
1023 if (val == (char)val) {
1024 o(0xc48348);
1025 g(val);
1026 } else {
1027 oad(0xc48148, val); /* add $xxx, %rsp */
1031 typedef enum X86_64_Mode {
1032 x86_64_mode_none,
1033 x86_64_mode_memory,
1034 x86_64_mode_integer,
1035 x86_64_mode_sse,
1036 x86_64_mode_x87
1037 } X86_64_Mode;
1039 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1041 if (a == b)
1042 return a;
1043 else if (a == x86_64_mode_none)
1044 return b;
1045 else if (b == x86_64_mode_none)
1046 return a;
1047 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1048 return x86_64_mode_memory;
1049 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1050 return x86_64_mode_integer;
1051 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1052 return x86_64_mode_memory;
1053 else
1054 return x86_64_mode_sse;
1057 static X86_64_Mode classify_x86_64_inner(CType *ty)
1059 X86_64_Mode mode;
1060 Sym *f;
1062 switch (ty->t & VT_BTYPE) {
1063 case VT_VOID: return x86_64_mode_none;
1065 case VT_INT:
1066 case VT_BYTE:
1067 case VT_SHORT:
1068 case VT_LLONG:
1069 case VT_BOOL:
1070 case VT_PTR:
1071 case VT_FUNC:
1072 case VT_ENUM: return x86_64_mode_integer;
1074 case VT_FLOAT:
1075 case VT_DOUBLE: return x86_64_mode_sse;
1077 case VT_LDOUBLE: return x86_64_mode_x87;
1079 case VT_STRUCT:
1080 f = ty->ref;
1082 mode = x86_64_mode_none;
1083 for (f = f->next; f; f = f->next)
1084 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
1086 return mode;
1088 assert(0);
1089 return 0;
1092 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
1094 X86_64_Mode mode;
1095 int size, align, ret_t = 0;
1097 if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1098 *psize = 8;
1099 *palign = 8;
1100 *reg_count = 1;
1101 ret_t = ty->t;
1102 mode = x86_64_mode_integer;
1103 } else {
1104 size = type_size(ty, &align);
1105 *psize = (size + 7) & ~7;
1106 *palign = (align + 7) & ~7;
1108 if (size > 16) {
1109 mode = x86_64_mode_memory;
1110 } else {
1111 mode = classify_x86_64_inner(ty);
1112 switch (mode) {
1113 case x86_64_mode_integer:
1114 if (size > 8) {
1115 *reg_count = 2;
1116 ret_t = VT_QLONG;
1117 } else {
1118 *reg_count = 1;
1119 ret_t = (size > 4) ? VT_LLONG : VT_INT;
1121 break;
1123 case x86_64_mode_x87:
1124 *reg_count = 1;
1125 ret_t = VT_LDOUBLE;
1126 break;
1128 case x86_64_mode_sse:
1129 if (size > 8) {
1130 *reg_count = 2;
1131 ret_t = VT_QFLOAT;
1132 } else {
1133 *reg_count = 1;
1134 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1136 break;
1137 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1142 if (ret) {
1143 ret->ref = NULL;
1144 ret->t = ret_t;
1147 return mode;
1150 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1152 /* This definition must be synced with stdarg.h */
1153 enum __va_arg_type {
1154 __va_gen_reg, __va_float_reg, __va_stack
1156 int size, align, reg_count;
1157 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1158 switch (mode) {
1159 default: return __va_stack;
1160 case x86_64_mode_integer: return __va_gen_reg;
1161 case x86_64_mode_sse: return __va_float_reg;
1165 /* Return the number of registers needed to return the struct, or 0 if
1166 returning via struct pointer. */
1167 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
1169 int size, align, reg_count;
1170 *ret_align = 1; // Never have to re-align return values for x86-64
1171 *regsize = 8;
1172 return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1175 #define REGN 6
1176 static const uint8_t arg_regs[REGN] = {
1177 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1180 static int arg_prepare_reg(int idx) {
1181 if (idx == 2 || idx == 3)
1182 /* idx=2: r10, idx=3: r11 */
1183 return idx + 8;
1184 else
1185 return arg_regs[idx];
1188 /* Generate function call. The function address is pushed first, then
1189 all the parameters in call order. This functions pops all the
1190 parameters and the function address. */
1191 void gfunc_call(int nb_args)
1193 X86_64_Mode mode;
1194 CType type;
1195 int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count;
1196 int nb_reg_args = 0;
1197 int nb_sse_args = 0;
1198 int sse_reg, gen_reg;
1200 /* calculate the number of integer/float register arguments */
1201 for(i = 0; i < nb_args; i++) {
1202 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1203 if (mode == x86_64_mode_sse)
1204 nb_sse_args += reg_count;
1205 else if (mode == x86_64_mode_integer)
1206 nb_reg_args += reg_count;
1209 /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1210 and ended by a 16-byte aligned argument. This is because, from the point of view of
1211 the callee, argument alignment is computed from the bottom up. */
1212 /* for struct arguments, we need to call memcpy and the function
1213 call breaks register passing arguments we are preparing.
1214 So, we process arguments which will be passed by stack first. */
1215 gen_reg = nb_reg_args;
1216 sse_reg = nb_sse_args;
1217 run_start = 0;
1218 args_size = 0;
1219 while (run_start != nb_args) {
1220 int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1222 run_end = nb_args;
1223 stack_adjust = 0;
1224 for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1225 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1226 switch (mode) {
1227 case x86_64_mode_memory:
1228 case x86_64_mode_x87:
1229 stack_arg:
1230 if (align == 16)
1231 run_end = i;
1232 else
1233 stack_adjust += size;
1234 break;
1236 case x86_64_mode_sse:
1237 sse_reg -= reg_count;
1238 if (sse_reg + reg_count > 8) goto stack_arg;
1239 break;
1241 case x86_64_mode_integer:
1242 gen_reg -= reg_count;
1243 if (gen_reg + reg_count > REGN) goto stack_arg;
1244 break;
1245 default: break; /* nothing to be done for x86_64_mode_none */
1249 gen_reg = run_gen_reg;
1250 sse_reg = run_sse_reg;
1252 /* adjust stack to align SSE boundary */
1253 if (stack_adjust &= 15) {
1254 /* fetch cpu flag before the following sub will change the value */
1255 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1256 gv(RC_INT);
1258 stack_adjust = 16 - stack_adjust;
1259 o(0x48);
1260 oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1261 args_size += stack_adjust;
1264 for(i = run_start; i < run_end;) {
1265 /* Swap argument to top, it will possibly be changed here,
1266 and might use more temps. At the end of the loop we keep
1267 in on the stack and swap it back to its original position
1268 if it is a register. */
1269 SValue tmp = vtop[0];
1270 int arg_stored = 1;
1272 vtop[0] = vtop[-i];
1273 vtop[-i] = tmp;
1274 mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, &reg_count);
1276 switch (vtop->type.t & VT_BTYPE) {
1277 case VT_STRUCT:
1278 if (mode == x86_64_mode_sse) {
1279 if (sse_reg > 8)
1280 sse_reg -= reg_count;
1281 else
1282 arg_stored = 0;
1283 } else if (mode == x86_64_mode_integer) {
1284 if (gen_reg > REGN)
1285 gen_reg -= reg_count;
1286 else
1287 arg_stored = 0;
1290 if (arg_stored) {
1291 /* allocate the necessary size on stack */
1292 o(0x48);
1293 oad(0xec81, size); /* sub $xxx, %rsp */
1294 /* generate structure store */
1295 r = get_reg(RC_INT);
1296 orex(1, r, 0, 0x89); /* mov %rsp, r */
1297 o(0xe0 + REG_VALUE(r));
1298 vset(&vtop->type, r | VT_LVAL, 0);
1299 vswap();
1300 vstore();
1301 args_size += size;
1303 break;
1305 case VT_LDOUBLE:
1306 assert(0);
1307 break;
1309 case VT_FLOAT:
1310 case VT_DOUBLE:
1311 assert(mode == x86_64_mode_sse);
1312 if (sse_reg > 8) {
1313 --sse_reg;
1314 r = gv(RC_FLOAT);
1315 o(0x50); /* push $rax */
1316 /* movq %xmmN, (%rsp) */
1317 o(0xd60f66);
1318 o(0x04 + REG_VALUE(r)*8);
1319 o(0x24);
1320 args_size += size;
1321 } else {
1322 arg_stored = 0;
1324 break;
1326 default:
1327 assert(mode == x86_64_mode_integer);
1328 /* simple type */
1329 /* XXX: implicit cast ? */
1330 if (gen_reg > REGN) {
1331 --gen_reg;
1332 r = gv(RC_INT);
1333 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1334 args_size += size;
1335 } else {
1336 arg_stored = 0;
1338 break;
1341 /* And swap the argument back to it's original position. */
1342 tmp = vtop[0];
1343 vtop[0] = vtop[-i];
1344 vtop[-i] = tmp;
1346 if (arg_stored) {
1347 vrotb(i+1);
1348 assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1349 vpop();
1350 --nb_args;
1351 --run_end;
1352 } else {
1353 ++i;
1357 /* handle 16 byte aligned arguments at end of run */
1358 run_start = i = run_end;
1359 while (i < nb_args) {
1360 /* Rotate argument to top since it will always be popped */
1361 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1362 if (align != 16)
1363 break;
1365 vrotb(i+1);
1367 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1368 gv(RC_ST0);
1369 oad(0xec8148, size); /* sub $xxx, %rsp */
1370 o(0x7cdb); /* fstpt 0(%rsp) */
1371 g(0x24);
1372 g(0x00);
1373 args_size += size;
1374 } else {
1375 assert(mode == x86_64_mode_memory);
1377 /* allocate the necessary size on stack */
1378 o(0x48);
1379 oad(0xec81, size); /* sub $xxx, %rsp */
1380 /* generate structure store */
1381 r = get_reg(RC_INT);
1382 orex(1, r, 0, 0x89); /* mov %rsp, r */
1383 o(0xe0 + REG_VALUE(r));
1384 vset(&vtop->type, r | VT_LVAL, 0);
1385 vswap();
1386 vstore();
1387 args_size += size;
1390 vpop();
1391 --nb_args;
1395 /* XXX This should be superfluous. */
1396 save_regs(0); /* save used temporary registers */
1398 /* then, we prepare register passing arguments.
1399 Note that we cannot set RDX and RCX in this loop because gv()
1400 may break these temporary registers. Let's use R10 and R11
1401 instead of them */
1402 assert(gen_reg <= REGN);
1403 assert(sse_reg <= 8);
1404 for(i = 0; i < nb_args; i++) {
1405 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1406 /* Alter stack entry type so that gv() knows how to treat it */
1407 vtop->type = type;
1408 if (mode == x86_64_mode_sse) {
1409 if (reg_count == 2) {
1410 sse_reg -= 2;
1411 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1412 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1413 /* movaps %xmm0, %xmmN */
1414 o(0x280f);
1415 o(0xc0 + (sse_reg << 3));
1416 /* movaps %xmm1, %xmmN */
1417 o(0x280f);
1418 o(0xc1 + ((sse_reg+1) << 3));
1420 } else {
1421 assert(reg_count == 1);
1422 --sse_reg;
1423 /* Load directly to register */
1424 gv(RC_XMM0 << sse_reg);
1426 } else if (mode == x86_64_mode_integer) {
1427 /* simple type */
1428 /* XXX: implicit cast ? */
1429 int d;
1430 gen_reg -= reg_count;
1431 r = gv(RC_INT);
1432 d = arg_prepare_reg(gen_reg);
1433 orex(1,d,r,0x89); /* mov */
1434 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1435 if (reg_count == 2) {
1436 d = arg_prepare_reg(gen_reg+1);
1437 orex(1,d,vtop->r2,0x89); /* mov */
1438 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1441 vtop--;
1443 assert(gen_reg == 0);
1444 assert(sse_reg == 0);
1446 /* We shouldn't have many operands on the stack anymore, but the
1447 call address itself is still there, and it might be in %eax
1448 (or edx/ecx) currently, which the below writes would clobber.
1449 So evict all remaining operands here. */
1450 save_regs(0);
1452 /* Copy R10 and R11 into RDX and RCX, respectively */
1453 if (nb_reg_args > 2) {
1454 o(0xd2894c); /* mov %r10, %rdx */
1455 if (nb_reg_args > 3) {
1456 o(0xd9894c); /* mov %r11, %rcx */
1460 if (vtop->type.ref->c != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1461 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1462 gcall_or_jmp(0);
1463 if (args_size)
1464 gadd_sp(args_size);
1465 vtop--;
1469 #define FUNC_PROLOG_SIZE 11
1471 static void push_arg_reg(int i) {
1472 loc -= 8;
1473 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1476 /* generate function prolog of type 't' */
1477 void gfunc_prolog(CType *func_type)
1479 X86_64_Mode mode;
1480 int i, addr, align, size, reg_count;
1481 int param_addr = 0, reg_param_index, sse_param_index;
1482 Sym *sym;
1483 CType *type;
1485 sym = func_type->ref;
1486 addr = PTR_SIZE * 2;
1487 loc = 0;
1488 ind += FUNC_PROLOG_SIZE;
1489 func_sub_sp_offset = ind;
1490 func_ret_sub = 0;
1492 if (func_type->ref->c == FUNC_ELLIPSIS) {
1493 int seen_reg_num, seen_sse_num, seen_stack_size;
1494 seen_reg_num = seen_sse_num = 0;
1495 /* frame pointer and return address */
1496 seen_stack_size = PTR_SIZE * 2;
1497 /* count the number of seen parameters */
1498 sym = func_type->ref;
1499 while ((sym = sym->next) != NULL) {
1500 type = &sym->type;
1501 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1502 switch (mode) {
1503 default:
1504 stack_arg:
1505 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1506 break;
1508 case x86_64_mode_integer:
1509 if (seen_reg_num + reg_count <= 8) {
1510 seen_reg_num += reg_count;
1511 } else {
1512 seen_reg_num = 8;
1513 goto stack_arg;
1515 break;
1517 case x86_64_mode_sse:
1518 if (seen_sse_num + reg_count <= 8) {
1519 seen_sse_num += reg_count;
1520 } else {
1521 seen_sse_num = 8;
1522 goto stack_arg;
1524 break;
1528 loc -= 16;
1529 /* movl $0x????????, -0x10(%rbp) */
1530 o(0xf045c7);
1531 gen_le32(seen_reg_num * 8);
1532 /* movl $0x????????, -0xc(%rbp) */
1533 o(0xf445c7);
1534 gen_le32(seen_sse_num * 16 + 48);
1535 /* movl $0x????????, -0x8(%rbp) */
1536 o(0xf845c7);
1537 gen_le32(seen_stack_size);
1539 /* save all register passing arguments */
1540 for (i = 0; i < 8; i++) {
1541 loc -= 16;
1542 o(0xd60f66); /* movq */
1543 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1544 /* movq $0, loc+8(%rbp) */
1545 o(0x85c748);
1546 gen_le32(loc + 8);
1547 gen_le32(0);
1549 for (i = 0; i < REGN; i++) {
1550 push_arg_reg(REGN-1-i);
1554 sym = func_type->ref;
1555 reg_param_index = 0;
1556 sse_param_index = 0;
1558 /* if the function returns a structure, then add an
1559 implicit pointer parameter */
1560 func_vt = sym->type;
1561 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1562 if (mode == x86_64_mode_memory) {
1563 push_arg_reg(reg_param_index);
1564 func_vc = loc;
1565 reg_param_index++;
1567 /* define parameters */
1568 while ((sym = sym->next) != NULL) {
1569 type = &sym->type;
1570 mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1571 switch (mode) {
1572 case x86_64_mode_sse:
1573 if (sse_param_index + reg_count <= 8) {
1574 /* save arguments passed by register */
1575 loc -= reg_count * 8;
1576 param_addr = loc;
1577 for (i = 0; i < reg_count; ++i) {
1578 o(0xd60f66); /* movq */
1579 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1580 ++sse_param_index;
1582 } else {
1583 addr = (addr + align - 1) & -align;
1584 param_addr = addr;
1585 addr += size;
1587 break;
1589 case x86_64_mode_memory:
1590 case x86_64_mode_x87:
1591 addr = (addr + align - 1) & -align;
1592 param_addr = addr;
1593 addr += size;
1594 break;
1596 case x86_64_mode_integer: {
1597 if (reg_param_index + reg_count <= REGN) {
1598 /* save arguments passed by register */
1599 loc -= reg_count * 8;
1600 param_addr = loc;
1601 for (i = 0; i < reg_count; ++i) {
1602 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1603 ++reg_param_index;
1605 } else {
1606 addr = (addr + align - 1) & -align;
1607 param_addr = addr;
1608 addr += size;
1610 break;
1612 default: break; /* nothing to be done for x86_64_mode_none */
1614 sym_push(sym->v & ~SYM_FIELD, type,
1615 VT_LOCAL | VT_LVAL, param_addr);
1618 #ifdef CONFIG_TCC_BCHECK
1619 /* leave some room for bound checking code */
1620 if (tcc_state->do_bounds_check) {
1621 func_bound_offset = lbounds_section->data_offset;
1622 func_bound_ind = ind;
1623 oad(0xb8, 0); /* lbound section pointer */
1624 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1625 oad(0xb8, 0); /* call to function */
1627 #endif
1630 /* generate function epilog */
1631 void gfunc_epilog(void)
1633 int v, saved_ind;
1635 #ifdef CONFIG_TCC_BCHECK
1636 if (tcc_state->do_bounds_check
1637 && func_bound_offset != lbounds_section->data_offset)
1639 addr_t saved_ind;
1640 addr_t *bounds_ptr;
1641 Sym *sym_data;
1643 /* add end of table info */
1644 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
1645 *bounds_ptr = 0;
1647 /* generate bound local allocation */
1648 sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
1649 func_bound_offset, lbounds_section->data_offset);
1650 saved_ind = ind;
1651 ind = func_bound_ind;
1652 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1653 ind = ind + 5 + 3;
1654 gen_static_call(TOK___bound_local_new);
1655 ind = saved_ind;
1657 /* generate bound check local freeing */
1658 o(0x5250); /* save returned value, if any */
1659 greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1660 oad(0xb8, 0); /* mov xxx, %rax */
1661 o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
1662 gen_static_call(TOK___bound_local_delete);
1663 o(0x585a); /* restore returned value, if any */
1665 #endif
1666 o(0xc9); /* leave */
1667 if (func_ret_sub == 0) {
1668 o(0xc3); /* ret */
1669 } else {
1670 o(0xc2); /* ret n */
1671 g(func_ret_sub);
1672 g(func_ret_sub >> 8);
1674 /* align local size to word & save local variables */
1675 v = (-loc + 15) & -16;
1676 saved_ind = ind;
1677 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1678 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1679 o(0xec8148); /* sub rsp, stacksize */
1680 gen_le32(v);
1681 ind = saved_ind;
1684 #endif /* not PE */
1686 /* generate a jump to a label */
1687 int gjmp(int t)
1689 return psym(0xe9, t);
1692 /* generate a jump to a fixed address */
1693 void gjmp_addr(int a)
1695 int r;
1696 r = a - ind - 2;
1697 if (r == (char)r) {
1698 g(0xeb);
1699 g(r);
1700 } else {
1701 oad(0xe9, a - ind - 5);
1705 ST_FUNC void gtst_addr(int inv, int a)
1707 inv ^= (vtop--)->c.i;
1708 a -= ind + 2;
1709 if (a == (char)a) {
1710 g(inv - 32);
1711 g(a);
1712 } else {
1713 g(0x0f);
1714 oad(inv - 16, a - 4);
1718 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1719 ST_FUNC int gtst(int inv, int t)
1721 int v = vtop->r & VT_VALMASK;
1722 if (v == VT_CMP) {
1723 /* fast case : can jump directly since flags are set */
1724 if (vtop->c.i & 0x100)
1726 /* This was a float compare. If the parity flag is set
1727 the result was unordered. For anything except != this
1728 means false and we don't jump (anding both conditions).
1729 For != this means true (oring both).
1730 Take care about inverting the test. We need to jump
1731 to our target if the result was unordered and test wasn't NE,
1732 otherwise if unordered we don't want to jump. */
1733 vtop->c.i &= ~0x100;
1734 if (inv == (vtop->c.i == TOK_NE))
1735 o(0x067a); /* jp +6 */
1736 else
1738 g(0x0f);
1739 t = psym(0x8a, t); /* jp t */
1742 g(0x0f);
1743 t = psym((vtop->c.i - 16) ^ inv, t);
1744 } else if (v == VT_JMP || v == VT_JMPI) {
1745 /* && or || optimization */
1746 if ((v & 1) == inv) {
1747 /* insert vtop->c jump list in t */
1748 uint32_t n1, n = vtop->c.i;
1749 if (n) {
1750 while ((n1 = read32le(cur_text_section->data + n)))
1751 n = n1;
1752 write32le(cur_text_section->data + n, t);
1753 t = vtop->c.i;
1755 } else {
1756 t = gjmp(t);
1757 gsym(vtop->c.i);
1760 vtop--;
1761 return t;
1764 /* generate an integer binary operation */
1765 void gen_opi(int op)
1767 int r, fr, opc, c;
1768 int ll, uu, cc;
1770 ll = is64_type(vtop[-1].type.t);
1771 uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1772 cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1774 switch(op) {
1775 case '+':
1776 case TOK_ADDC1: /* add with carry generation */
1777 opc = 0;
1778 gen_op8:
1779 if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) {
1780 /* constant case */
1781 vswap();
1782 r = gv(RC_INT);
1783 vswap();
1784 c = vtop->c.i;
1785 if (c == (char)c) {
1786 /* XXX: generate inc and dec for smaller code ? */
1787 orex(ll, r, 0, 0x83);
1788 o(0xc0 | (opc << 3) | REG_VALUE(r));
1789 g(c);
1790 } else {
1791 orex(ll, r, 0, 0x81);
1792 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1794 } else {
1795 gv2(RC_INT, RC_INT);
1796 r = vtop[-1].r;
1797 fr = vtop[0].r;
1798 orex(ll, r, fr, (opc << 3) | 0x01);
1799 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1801 vtop--;
1802 if (op >= TOK_ULT && op <= TOK_GT) {
1803 vtop->r = VT_CMP;
1804 vtop->c.i = op;
1806 break;
1807 case '-':
1808 case TOK_SUBC1: /* sub with carry generation */
1809 opc = 5;
1810 goto gen_op8;
1811 case TOK_ADDC2: /* add with carry use */
1812 opc = 2;
1813 goto gen_op8;
1814 case TOK_SUBC2: /* sub with carry use */
1815 opc = 3;
1816 goto gen_op8;
1817 case '&':
1818 opc = 4;
1819 goto gen_op8;
1820 case '^':
1821 opc = 6;
1822 goto gen_op8;
1823 case '|':
1824 opc = 1;
1825 goto gen_op8;
1826 case '*':
1827 gv2(RC_INT, RC_INT);
1828 r = vtop[-1].r;
1829 fr = vtop[0].r;
1830 orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1831 o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1832 vtop--;
1833 break;
1834 case TOK_SHL:
1835 opc = 4;
1836 goto gen_shift;
1837 case TOK_SHR:
1838 opc = 5;
1839 goto gen_shift;
1840 case TOK_SAR:
1841 opc = 7;
1842 gen_shift:
1843 opc = 0xc0 | (opc << 3);
1844 if (cc) {
1845 /* constant case */
1846 vswap();
1847 r = gv(RC_INT);
1848 vswap();
1849 orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1850 o(opc | REG_VALUE(r));
1851 g(vtop->c.i & (ll ? 63 : 31));
1852 } else {
1853 /* we generate the shift in ecx */
1854 gv2(RC_INT, RC_RCX);
1855 r = vtop[-1].r;
1856 orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1857 o(opc | REG_VALUE(r));
1859 vtop--;
1860 break;
1861 case TOK_UDIV:
1862 case TOK_UMOD:
1863 uu = 1;
1864 goto divmod;
1865 case '/':
1866 case '%':
1867 case TOK_PDIV:
1868 uu = 0;
1869 divmod:
1870 /* first operand must be in eax */
1871 /* XXX: need better constraint for second operand */
1872 gv2(RC_RAX, RC_RCX);
1873 r = vtop[-1].r;
1874 fr = vtop[0].r;
1875 vtop--;
1876 save_reg(TREG_RDX);
1877 orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1878 orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1879 o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1880 if (op == '%' || op == TOK_UMOD)
1881 r = TREG_RDX;
1882 else
1883 r = TREG_RAX;
1884 vtop->r = r;
1885 break;
1886 default:
1887 opc = 7;
1888 goto gen_op8;
1892 void gen_opl(int op)
1894 gen_opi(op);
1897 /* generate a floating point operation 'v = t1 op t2' instruction. The
1898 two operands are guaranted to have the same floating point type */
1899 /* XXX: need to use ST1 too */
1900 void gen_opf(int op)
1902 int a, ft, fc, swapped, r;
1903 int float_type =
1904 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1906 /* convert constants to memory references */
1907 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1908 vswap();
1909 gv(float_type);
1910 vswap();
1912 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1913 gv(float_type);
1915 /* must put at least one value in the floating point register */
1916 if ((vtop[-1].r & VT_LVAL) &&
1917 (vtop[0].r & VT_LVAL)) {
1918 vswap();
1919 gv(float_type);
1920 vswap();
1922 swapped = 0;
1923 /* swap the stack if needed so that t1 is the register and t2 is
1924 the memory reference */
1925 if (vtop[-1].r & VT_LVAL) {
1926 vswap();
1927 swapped = 1;
1929 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1930 if (op >= TOK_ULT && op <= TOK_GT) {
1931 /* load on stack second operand */
1932 load(TREG_ST0, vtop);
1933 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1934 if (op == TOK_GE || op == TOK_GT)
1935 swapped = !swapped;
1936 else if (op == TOK_EQ || op == TOK_NE)
1937 swapped = 0;
1938 if (swapped)
1939 o(0xc9d9); /* fxch %st(1) */
1940 if (op == TOK_EQ || op == TOK_NE)
1941 o(0xe9da); /* fucompp */
1942 else
1943 o(0xd9de); /* fcompp */
1944 o(0xe0df); /* fnstsw %ax */
1945 if (op == TOK_EQ) {
1946 o(0x45e480); /* and $0x45, %ah */
1947 o(0x40fC80); /* cmp $0x40, %ah */
1948 } else if (op == TOK_NE) {
1949 o(0x45e480); /* and $0x45, %ah */
1950 o(0x40f480); /* xor $0x40, %ah */
1951 op = TOK_NE;
1952 } else if (op == TOK_GE || op == TOK_LE) {
1953 o(0x05c4f6); /* test $0x05, %ah */
1954 op = TOK_EQ;
1955 } else {
1956 o(0x45c4f6); /* test $0x45, %ah */
1957 op = TOK_EQ;
1959 vtop--;
1960 vtop->r = VT_CMP;
1961 vtop->c.i = op;
1962 } else {
1963 /* no memory reference possible for long double operations */
1964 load(TREG_ST0, vtop);
1965 swapped = !swapped;
1967 switch(op) {
1968 default:
1969 case '+':
1970 a = 0;
1971 break;
1972 case '-':
1973 a = 4;
1974 if (swapped)
1975 a++;
1976 break;
1977 case '*':
1978 a = 1;
1979 break;
1980 case '/':
1981 a = 6;
1982 if (swapped)
1983 a++;
1984 break;
1986 ft = vtop->type.t;
1987 fc = vtop->c.i;
1988 o(0xde); /* fxxxp %st, %st(1) */
1989 o(0xc1 + (a << 3));
1990 vtop--;
1992 } else {
1993 if (op >= TOK_ULT && op <= TOK_GT) {
1994 /* if saved lvalue, then we must reload it */
1995 r = vtop->r;
1996 fc = vtop->c.i;
1997 if ((r & VT_VALMASK) == VT_LLOCAL) {
1998 SValue v1;
1999 r = get_reg(RC_INT);
2000 v1.type.t = VT_PTR;
2001 v1.r = VT_LOCAL | VT_LVAL;
2002 v1.c.i = fc;
2003 load(r, &v1);
2004 fc = 0;
2007 if (op == TOK_EQ || op == TOK_NE) {
2008 swapped = 0;
2009 } else {
2010 if (op == TOK_LE || op == TOK_LT)
2011 swapped = !swapped;
2012 if (op == TOK_LE || op == TOK_GE) {
2013 op = 0x93; /* setae */
2014 } else {
2015 op = 0x97; /* seta */
2019 if (swapped) {
2020 gv(RC_FLOAT);
2021 vswap();
2023 assert(!(vtop[-1].r & VT_LVAL));
2025 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
2026 o(0x66);
2027 if (op == TOK_EQ || op == TOK_NE)
2028 o(0x2e0f); /* ucomisd */
2029 else
2030 o(0x2f0f); /* comisd */
2032 if (vtop->r & VT_LVAL) {
2033 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2034 } else {
2035 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2038 vtop--;
2039 vtop->r = VT_CMP;
2040 vtop->c.i = op | 0x100;
2041 } else {
2042 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
2043 switch(op) {
2044 default:
2045 case '+':
2046 a = 0;
2047 break;
2048 case '-':
2049 a = 4;
2050 break;
2051 case '*':
2052 a = 1;
2053 break;
2054 case '/':
2055 a = 6;
2056 break;
2058 ft = vtop->type.t;
2059 fc = vtop->c.i;
2060 assert((ft & VT_BTYPE) != VT_LDOUBLE);
2062 r = vtop->r;
2063 /* if saved lvalue, then we must reload it */
2064 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2065 SValue v1;
2066 r = get_reg(RC_INT);
2067 v1.type.t = VT_PTR;
2068 v1.r = VT_LOCAL | VT_LVAL;
2069 v1.c.i = fc;
2070 load(r, &v1);
2071 fc = 0;
2074 assert(!(vtop[-1].r & VT_LVAL));
2075 if (swapped) {
2076 assert(vtop->r & VT_LVAL);
2077 gv(RC_FLOAT);
2078 vswap();
2081 if ((ft & VT_BTYPE) == VT_DOUBLE) {
2082 o(0xf2);
2083 } else {
2084 o(0xf3);
2086 o(0x0f);
2087 o(0x58 + a);
2089 if (vtop->r & VT_LVAL) {
2090 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2091 } else {
2092 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2095 vtop--;
2100 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2101 and 'long long' cases. */
2102 void gen_cvt_itof(int t)
2104 if ((t & VT_BTYPE) == VT_LDOUBLE) {
2105 save_reg(TREG_ST0);
2106 gv(RC_INT);
2107 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2108 /* signed long long to float/double/long double (unsigned case
2109 is handled generically) */
2110 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2111 o(0x242cdf); /* fildll (%rsp) */
2112 o(0x08c48348); /* add $8, %rsp */
2113 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2114 (VT_INT | VT_UNSIGNED)) {
2115 /* unsigned int to float/double/long double */
2116 o(0x6a); /* push $0 */
2117 g(0x00);
2118 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2119 o(0x242cdf); /* fildll (%rsp) */
2120 o(0x10c48348); /* add $16, %rsp */
2121 } else {
2122 /* int to float/double/long double */
2123 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2124 o(0x2404db); /* fildl (%rsp) */
2125 o(0x08c48348); /* add $8, %rsp */
2127 vtop->r = TREG_ST0;
2128 } else {
2129 int r = get_reg(RC_FLOAT);
2130 gv(RC_INT);
2131 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2132 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2133 (VT_INT | VT_UNSIGNED) ||
2134 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2135 o(0x48); /* REX */
2137 o(0x2a0f);
2138 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2139 vtop->r = r;
2143 /* convert from one floating point type to another */
2144 void gen_cvt_ftof(int t)
2146 int ft, bt, tbt;
2148 ft = vtop->type.t;
2149 bt = ft & VT_BTYPE;
2150 tbt = t & VT_BTYPE;
2152 if (bt == VT_FLOAT) {
2153 gv(RC_FLOAT);
2154 if (tbt == VT_DOUBLE) {
2155 o(0x140f); /* unpcklps */
2156 o(0xc0 + REG_VALUE(vtop->r)*9);
2157 o(0x5a0f); /* cvtps2pd */
2158 o(0xc0 + REG_VALUE(vtop->r)*9);
2159 } else if (tbt == VT_LDOUBLE) {
2160 save_reg(RC_ST0);
2161 /* movss %xmm0,-0x10(%rsp) */
2162 o(0x110ff3);
2163 o(0x44 + REG_VALUE(vtop->r)*8);
2164 o(0xf024);
2165 o(0xf02444d9); /* flds -0x10(%rsp) */
2166 vtop->r = TREG_ST0;
2168 } else if (bt == VT_DOUBLE) {
2169 gv(RC_FLOAT);
2170 if (tbt == VT_FLOAT) {
2171 o(0x140f66); /* unpcklpd */
2172 o(0xc0 + REG_VALUE(vtop->r)*9);
2173 o(0x5a0f66); /* cvtpd2ps */
2174 o(0xc0 + REG_VALUE(vtop->r)*9);
2175 } else if (tbt == VT_LDOUBLE) {
2176 save_reg(RC_ST0);
2177 /* movsd %xmm0,-0x10(%rsp) */
2178 o(0x110ff2);
2179 o(0x44 + REG_VALUE(vtop->r)*8);
2180 o(0xf024);
2181 o(0xf02444dd); /* fldl -0x10(%rsp) */
2182 vtop->r = TREG_ST0;
2184 } else {
2185 int r;
2186 gv(RC_ST0);
2187 r = get_reg(RC_FLOAT);
2188 if (tbt == VT_DOUBLE) {
2189 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2190 /* movsd -0x10(%rsp),%xmm0 */
2191 o(0x100ff2);
2192 o(0x44 + REG_VALUE(r)*8);
2193 o(0xf024);
2194 vtop->r = r;
2195 } else if (tbt == VT_FLOAT) {
2196 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2197 /* movss -0x10(%rsp),%xmm0 */
2198 o(0x100ff3);
2199 o(0x44 + REG_VALUE(r)*8);
2200 o(0xf024);
2201 vtop->r = r;
2206 /* convert fp to int 't' type */
2207 void gen_cvt_ftoi(int t)
2209 int ft, bt, size, r;
2210 ft = vtop->type.t;
2211 bt = ft & VT_BTYPE;
2212 if (bt == VT_LDOUBLE) {
2213 gen_cvt_ftof(VT_DOUBLE);
2214 bt = VT_DOUBLE;
2217 gv(RC_FLOAT);
2218 if (t != VT_INT)
2219 size = 8;
2220 else
2221 size = 4;
2223 r = get_reg(RC_INT);
2224 if (bt == VT_FLOAT) {
2225 o(0xf3);
2226 } else if (bt == VT_DOUBLE) {
2227 o(0xf2);
2228 } else {
2229 assert(0);
2231 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2232 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2233 vtop->r = r;
2236 /* computed goto support */
2237 void ggoto(void)
2239 gcall_or_jmp(1);
2240 vtop--;
2243 /* Save the stack pointer onto the stack and return the location of its address */
2244 ST_FUNC void gen_vla_sp_save(int addr) {
2245 /* mov %rsp,addr(%rbp)*/
2246 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2249 /* Restore the SP from a location on the stack */
2250 ST_FUNC void gen_vla_sp_restore(int addr) {
2251 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2254 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2255 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2256 #ifdef TCC_TARGET_PE
2257 /* alloca does more than just adjust %rsp on Windows */
2258 vpush_global_sym(&func_old_type, TOK_alloca);
2259 vswap(); /* Move alloca ref past allocation size */
2260 gfunc_call(1);
2261 #else
2262 int r;
2263 r = gv(RC_INT); /* allocation size */
2264 /* sub r,%rsp */
2265 o(0x2b48);
2266 o(0xe0 | REG_VALUE(r));
2267 /* We align to 16 bytes rather than align */
2268 /* and ~15, %rsp */
2269 o(0xf0e48348);
2270 vpop();
2271 #endif
2275 /* end of x86-64 code generator */
2276 /*************************************************************/
2277 #endif /* ! TARGET_DEFS_ONLY */
2278 /******************************************************/